Skip to content

Commit

Permalink
Worgraph fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
godlikepanos committed Sep 11, 2024
1 parent 6514b56 commit e28eb1c
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 22 deletions.
23 changes: 14 additions & 9 deletions AnKi/Gr/D3D/D3DCommandBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,14 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
ANKI_D3D_SELF(CommandBufferImpl);
self.commandCommon();

auto sanitizeAccess = [](D3D12_BARRIER_ACCESS& access) {
if((access & D3D12_BARRIER_ACCESS_NO_ACCESS) && access != D3D12_BARRIER_ACCESS_NO_ACCESS)
{
// If access has other accesses as well as NO_ACCESS then remove the NO_ACCESS
access &= ~D3D12_BARRIER_ACCESS_NO_ACCESS;
}
};

DynamicArray<D3D12_TEXTURE_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> texBarriers(self.m_fastPool);
DynamicArray<D3D12_BUFFER_BARRIER, MemoryPoolPtrWrapper<StackMemoryPool>> bufferBarriers(self.m_fastPool);

Expand All @@ -701,6 +709,9 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
const TextureImpl& impl = static_cast<const TextureImpl&>(barrier.m_textureView.getTexture());
D3D12_TEXTURE_BARRIER& d3dBarrier = *texBarriers.emplaceBack();
d3dBarrier = impl.computeBarrierInfo(barrier.m_previousUsage, barrier.m_nextUsage, barrier.m_textureView.getSubresource());

sanitizeAccess(d3dBarrier.AccessBefore);
sanitizeAccess(d3dBarrier.AccessAfter);
}

for(const BufferBarrierInfo& barrier : buffers)
Expand All @@ -712,15 +723,6 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
{
// Merge barriers

if(bufferBarriers.getBack().AccessBefore == D3D12_BARRIER_ACCESS_NO_ACCESS && b.AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS)
{
bufferBarriers.getBack().AccessBefore = D3D12_BARRIER_ACCESS(0);
}
else if(bufferBarriers.getBack().AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS && b.AccessBefore == D3D12_BARRIER_ACCESS_NO_ACCESS)
{
b.AccessBefore = D3D12_BARRIER_ACCESS(0);
}

bufferBarriers.getBack().AccessBefore |= b.AccessBefore;
bufferBarriers.getBack().AccessAfter |= b.AccessAfter;
bufferBarriers.getBack().SyncBefore |= b.SyncBefore;
Expand All @@ -732,6 +734,9 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
D3D12_BUFFER_BARRIER& d3dBarrier = *bufferBarriers.emplaceBack();
d3dBarrier = b;
}

sanitizeAccess(bufferBarriers.getBack().AccessBefore);
sanitizeAccess(bufferBarriers.getBack().AccessAfter);
}

ANKI_ASSERT(accelerationStructures.getSize() == 0 && "TODO");
Expand Down
2 changes: 1 addition & 1 deletion AnKi/Gr/D3D/D3DGrManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
if(res == S_OK)
{
infoq->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true);
infoq->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true);
// infoq->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true);
}
}
}
Expand Down
33 changes: 21 additions & 12 deletions Tests/Gr/GrWorkGraphs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,20 @@ static void runBenchmark(U32 iterationCount, U32 iterationsPerCommandBuffer, Boo
{
ANKI_ASSERT(iterationCount >= iterationsPerCommandBuffer && (iterationCount % iterationsPerCommandBuffer) == 0);

U64 startUs = 0;
FencePtr fence;

F64 avgCpuTimePerIterationMs = 0.0;
DynamicArray<TimestampQueryPtr> timestamps;

const U32 commandBufferCount = iterationCount / iterationsPerCommandBuffer;
for(U32 icmdb = 0; icmdb < commandBufferCount; ++icmdb)
{
CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(CommandBufferInitInfo(CommandBufferFlag::kGeneralWork));

TimestampQueryPtr query1 = GrManager::getSingleton().newTimestampQuery();
cmdb->writeTimestamp(query1.get());
timestamps.emplaceBack(query1);

const U64 cpuTimeStart = HighRezTimer::getCurrentTimeUs();
for(U32 i = 0; i < iterationsPerCommandBuffer; ++i)
{
Expand All @@ -67,25 +71,31 @@ static void runBenchmark(U32 iterationCount, U32 iterationsPerCommandBuffer, Boo

// clearSwapchain(cmdb);

TimestampQueryPtr query2 = GrManager::getSingleton().newTimestampQuery();
cmdb->writeTimestamp(query2.get());
timestamps.emplaceBack(query2);

cmdb->endRecording();
const U64 cpuTimeEnd = HighRezTimer::getCurrentTimeUs();
avgCpuTimePerIterationMs += (Second(cpuTimeEnd - cpuTimeStart) * 0.001) / Second(iterationCount);

if(icmdb == 0)
{
startUs = HighRezTimer::getCurrentTimeUs();
}

GrManager::getSingleton().submit(cmdb.get(), {}, (icmdb == commandBufferCount - 1) ? &fence : nullptr);

// GrManager::getSingleton().swapBuffers();
}

const Bool done = fence->clientWait(kMaxSecond);
ANKI_TEST_EXPECT_EQ(done, true);
const U64 endUs = HighRezTimer::getCurrentTimeUs();

const F64 avgTimePerIterationMs = (Second(endUs - startUs) * 0.001) / Second(iterationCount);
F64 avgTimePerIterationMs = 0.0f;
for(U32 i = 0; i < timestamps.getSize(); i += 2)
{
Second a, b;
ANKI_TEST_EXPECT_EQ(timestamps[i]->getResult(a), TimestampQueryResult::kAvailable);
ANKI_TEST_EXPECT_EQ(timestamps[i + 1]->getResult(b), TimestampQueryResult::kAvailable);

avgTimePerIterationMs += (Second(b - a) * 1000.0) / Second(iterationCount);
}

if(bBenchmark)
{
Expand Down Expand Up @@ -251,8 +261,7 @@ StructuredBuffer<uint> g_positions : register(t1);
#define THREAD_COUNT 64u
// Operates per object
[Shader("node")] [NodeLaunch("broadcasting")] [NodeIsProgramEntry] [NodeMaxDispatchGrid(1, 1, 1)]
[NumThreads(THREAD_COUNT, 1, 1)]
[Shader("node")] [NodeLaunch("broadcasting")] [NodeIsProgramEntry] [NodeMaxDispatchGrid(1, 1, 1)] [NumThreads(THREAD_COUNT, 1, 1)]
void main(DispatchNodeInputRecord<FirstNodeRecord> inp, [MaxRecords(THREAD_COUNT)] NodeOutput<SecondNodeRecord> computeAabb,
uint svGroupIndex : SV_GroupIndex, uint svDispatchThreadId : SV_DispatchThreadId)
{
Expand Down Expand Up @@ -451,9 +460,9 @@ void main(uint svDispatchThreadId : SV_DispatchThreadId, uint svGroupIndex : SV_

// Execute
const U32 iterationsPerCmdb = (!bBenchmark) ? 1 : 100u;
const U32 iterationCount = (!bBenchmark) ? 1 : iterationsPerCmdb * 10;
const U32 iterationCount = (!bBenchmark) ? iterationsPerCmdb : iterationsPerCmdb * 1;
runBenchmark(iterationCount, iterationsPerCmdb, bBenchmark, [&](CommandBuffer& cmdb) {
BufferBarrierInfo barr = {BufferView(aabbsBuff.get()), BufferUsageBit::kUavCompute, BufferUsageBit::kUavCompute};
const BufferBarrierInfo barr = {BufferView(aabbsBuff.get()), BufferUsageBit::kUavCompute, BufferUsageBit::kUavCompute};
cmdb.setPipelineBarrier({}, {&barr, 1}, {});

if(bWorkgraphs)
Expand Down

0 comments on commit e28eb1c

Please sign in to comment.