Skip to content

Commit

Permalink
Add deferred render pass gpu timers and perf stats
Browse files Browse the repository at this point in the history
  • Loading branch information
Nelarius committed Apr 3, 2024
1 parent 2f779e1 commit e9b4860
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 22 deletions.
130 changes: 128 additions & 2 deletions src/pt/deferred_renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <algorithm>
#include <array>
#include <numeric>

namespace nlrs
{
Expand All @@ -17,6 +18,17 @@ const WGPUTextureFormat DEPTH_TEXTURE_FORMAT = WGPUTextureFormat_Depth24Plus;
const WGPUTextureFormat ALBEDO_TEXTURE_FORMAT = WGPUTextureFormat_BGRA8Unorm;
const WGPUTextureFormat NORMAL_TEXTURE_FORMAT = WGPUTextureFormat_RGBA16Float;

struct TimestampsLayout
{
std::uint64_t gbufferPassStart;
std::uint64_t gbufferPassEnd;
std::uint64_t lightingPassStart;
std::uint64_t lightingPassEnd;

static constexpr std::uint32_t QUERY_COUNT = 4;
static constexpr std::size_t MEMBER_SIZE = sizeof(std::uint64_t);
};

WGPUTexture createGbufferTexture(
const WGPUDevice device,
const char* const label,
Expand Down Expand Up @@ -70,9 +82,22 @@ DeferredRenderer::DeferredRenderer(
mNormalTextureView(nullptr),
mGbufferBindGroupLayout(),
mGbufferBindGroup(),
mQuerySet(nullptr),
mQueryBuffer(
gpuContext.device,
"Deferred renderer query buffer",
GpuBufferUsage::QueryResolve | GpuBufferUsage::CopySrc,
sizeof(TimestampsLayout)),
mTimestampsBuffer(
gpuContext.device,
"Deferred renderer timestamp buffer",
GpuBufferUsage::CopyDst | GpuBufferUsage::MapRead,
sizeof(TimestampsLayout)),
mGbufferPass(gpuContext, rendererDesc),
mDebugPass(),
mLightingPass()
mLightingPass(),
mGbufferPassDurationsNs(),
mLightingPassDurationsNs()
{
{
const std::array<WGPUTextureFormat, 1> depthFormats{
Expand Down Expand Up @@ -149,12 +174,23 @@ DeferredRenderer::DeferredRenderer(
textureBindGroupEntry(1, mNormalTextureView),
textureBindGroupEntry(2, mDepthTextureView)}};

{
const WGPUQuerySetDescriptor querySetDesc{
.nextInChain = nullptr,
.label = "Deferred renderer query set",
.type = WGPUQueryType_Timestamp,
.count = TimestampsLayout::QUERY_COUNT};
mQuerySet = wgpuDeviceCreateQuerySet(gpuContext.device, &querySetDesc);
}

mDebugPass = DebugPass{gpuContext, mGbufferBindGroupLayout, rendererDesc.framebufferSize};
mLightingPass = LightingPass{gpuContext, mGbufferBindGroupLayout};
}

DeferredRenderer::~DeferredRenderer()
{
querySetSafeRelease(mQuerySet);
mQuerySet = nullptr;
textureViewSafeRelease(mNormalTextureView);
mNormalTextureView = nullptr;
textureSafeRelease(mNormalTexture);
Expand All @@ -171,7 +207,11 @@ DeferredRenderer::~DeferredRenderer()

void DeferredRenderer::render(const GpuContext& gpuContext, const RenderDescriptor& renderDesc)
{
wgpuDeviceTick(gpuContext.device);
// Non-standard Dawn way to ensure that Dawn ticks pending async operations.
do
{
wgpuDeviceTick(gpuContext.device);
} while (wgpuBufferGetMapState(mTimestampsBuffer.ptr()) != WGPUBufferMapState_Unmapped);

const WGPUCommandEncoder encoder = [&gpuContext]() {
const WGPUCommandEncoderDescriptor cmdEncoderDesc{
Expand All @@ -181,14 +221,26 @@ void DeferredRenderer::render(const GpuContext& gpuContext, const RenderDescript
return wgpuDeviceCreateCommandEncoder(gpuContext.device, &cmdEncoderDesc);
}();

wgpuCommandEncoderWriteTimestamp(
encoder,
mQuerySet,
offsetof(TimestampsLayout, gbufferPassStart) / TimestampsLayout::MEMBER_SIZE);
mGbufferPass.render(
gpuContext,
renderDesc.viewProjectionMatrix,
encoder,
mDepthTextureView,
mAlbedoTextureView,
mNormalTextureView);
wgpuCommandEncoderWriteTimestamp(
encoder,
mQuerySet,
offsetof(TimestampsLayout, gbufferPassEnd) / TimestampsLayout::MEMBER_SIZE);

wgpuCommandEncoderWriteTimestamp(
encoder,
mQuerySet,
offsetof(TimestampsLayout, lightingPassStart) / TimestampsLayout::MEMBER_SIZE);
{
const glm::mat4 inverseViewProjectionMat = glm::inverse(renderDesc.viewProjectionMatrix);
const Extent2f framebufferSize = Extent2f(renderDesc.framebufferSize);
Expand All @@ -203,6 +255,15 @@ void DeferredRenderer::render(const GpuContext& gpuContext, const RenderDescript
renderDesc.sky,
renderDesc.exposure);
}
wgpuCommandEncoderWriteTimestamp(
encoder,
mQuerySet,
offsetof(TimestampsLayout, lightingPassEnd) / TimestampsLayout::MEMBER_SIZE);

wgpuCommandEncoderResolveQuerySet(
encoder, mQuerySet, 0, TimestampsLayout::QUERY_COUNT, mQueryBuffer.ptr(), 0);
wgpuCommandEncoderCopyBufferToBuffer(
encoder, mQueryBuffer.ptr(), 0, mTimestampsBuffer.ptr(), 0, sizeof(TimestampsLayout));

const WGPUCommandBuffer cmdBuffer = [encoder]() {
const WGPUCommandBufferDescriptor cmdBufferDesc{
Expand All @@ -212,6 +273,51 @@ void DeferredRenderer::render(const GpuContext& gpuContext, const RenderDescript
return wgpuCommandEncoderFinish(encoder, &cmdBufferDesc);
}();
wgpuQueueSubmit(gpuContext.queue, 1, &cmdBuffer);

wgpuBufferMapAsync(
mTimestampsBuffer.ptr(),
WGPUMapMode_Read,
0,
sizeof(TimestampsLayout),
[](const WGPUBufferMapAsyncStatus status, void* const userdata) -> void {
if (status == WGPUBufferMapAsyncStatus_Success)
{
NLRS_ASSERT(userdata != nullptr);
DeferredRenderer& renderer = *static_cast<DeferredRenderer*>(userdata);
GpuBuffer& timestampBuffer = renderer.mTimestampsBuffer;
const void* const bufferData = wgpuBufferGetConstMappedRange(
timestampBuffer.ptr(), 0, sizeof(TimestampsLayout));
NLRS_ASSERT(bufferData != nullptr);

const TimestampsLayout& timestamps =
*static_cast<const TimestampsLayout*>(bufferData);

auto& gbufferDurations = renderer.mGbufferPassDurationsNs;
const auto gbufferDuration =
timestamps.gbufferPassEnd - timestamps.gbufferPassStart;
gbufferDurations.push_back(gbufferDuration);
if (gbufferDurations.size() > 30)
{
gbufferDurations.pop_front();
}

auto& lightingDurations = renderer.mLightingPassDurationsNs;
const auto lightingDuration =
timestamps.lightingPassEnd - timestamps.lightingPassStart;
lightingDurations.push_back(lightingDuration);
if (lightingDurations.size() > 30)
{
lightingDurations.pop_front();
}

wgpuBufferUnmap(timestampBuffer.ptr());
}
else
{
std::fprintf(stderr, "Failed to map timestamps buffer\n");
}
},
this);
}

void DeferredRenderer::renderDebug(
Expand Down Expand Up @@ -1352,4 +1458,24 @@ void DeferredRenderer::LightingPass::render(
wgpuRenderPassEncoderDraw(renderPass, 6, 1, 0, 0);
wgpuRenderPassEncoderEnd(renderPass);
}

DeferredRenderer::PerfStats DeferredRenderer::getPerfStats() const
{
NLRS_ASSERT(mGbufferPassDurationsNs.size() == mLightingPassDurationsNs.size());

if (mGbufferPassDurationsNs.empty())
{
return {};
}

return {
0.000001f *
static_cast<float>(std::accumulate(
mGbufferPassDurationsNs.begin(), mGbufferPassDurationsNs.end(), 0ll)) /
mGbufferPassDurationsNs.size(),
0.000001f *
static_cast<float>(std::accumulate(
mLightingPassDurationsNs.begin(), mLightingPassDurationsNs.end(), 0ll)) /
mLightingPassDurationsNs.size()};
}
} // namespace nlrs
36 changes: 25 additions & 11 deletions src/pt/deferred_renderer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <cstddef>
#include <cstdint>
#include <deque>
#include <span>
#include <vector>

Expand Down Expand Up @@ -45,6 +46,12 @@ struct RenderDescriptor
class DeferredRenderer
{
public:
struct PerfStats
{
float averageGbufferPassDurationsMs = 0.0f;
float averageLightingPassDurationsMs = 0.0f;
};

DeferredRenderer(const GpuContext&, const DeferredRendererDescriptor&);
~DeferredRenderer();

Expand All @@ -58,6 +65,8 @@ class DeferredRenderer
void renderDebug(const GpuContext&, const glm::mat4&, const Extent2f&, WGPUTextureView);
void resize(const GpuContext&, const Extent2u&);

PerfStats getPerfStats() const;

private:
struct IndexBuffer
{
Expand Down Expand Up @@ -182,16 +191,21 @@ class DeferredRenderer
float exposure);
};

WGPUTexture mDepthTexture;
WGPUTextureView mDepthTextureView;
WGPUTexture mAlbedoTexture;
WGPUTextureView mAlbedoTextureView;
WGPUTexture mNormalTexture;
WGPUTextureView mNormalTextureView;
GpuBindGroupLayout mGbufferBindGroupLayout;
GpuBindGroup mGbufferBindGroup;
GbufferPass mGbufferPass;
DebugPass mDebugPass;
LightingPass mLightingPass;
WGPUTexture mDepthTexture;
WGPUTextureView mDepthTextureView;
WGPUTexture mAlbedoTexture;
WGPUTextureView mAlbedoTextureView;
WGPUTexture mNormalTexture;
WGPUTextureView mNormalTextureView;
GpuBindGroupLayout mGbufferBindGroupLayout;
GpuBindGroup mGbufferBindGroup;
WGPUQuerySet mQuerySet;
GpuBuffer mQueryBuffer;
GpuBuffer mTimestampsBuffer;
GbufferPass mGbufferPass;
DebugPass mDebugPass;
LightingPass mLightingPass;
std::deque<std::uint64_t> mGbufferPassDurationsNs;
std::deque<std::uint64_t> mLightingPassDurationsNs;
};
} // namespace nlrs
44 changes: 35 additions & 9 deletions src/pt/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,8 @@ try

auto onNewFrame = [&gui]() -> void { gui.beginFrame(); };

auto onUpdate = [&appState, &renderer](GLFWwindow* windowPtr, float deltaTime) -> void {
auto onUpdate =
[&appState, &renderer, &deferredRenderer](GLFWwindow* windowPtr, float deltaTime) -> void {
{
// Skip input if ImGui captured input
if (!ImGui::GetIO().WantCaptureMouse)
Expand Down Expand Up @@ -320,14 +321,39 @@ try
ImGui::RadioButton("debug", &appState.ui.rendererType, RendererType_Debug);
ImGui::Separator();

ImGui::Text("Renderer stats");
ImGui::Text("Perf stats");
{
const float renderAverageMs = renderer.averageRenderpassDurationMs();
const float progressPercentage = renderer.renderProgressPercentage();
ImGui::Text(
"render pass: %.2f ms (%.1f FPS)", renderAverageMs, 1000.0f / renderAverageMs);
ImGui::Text("render progress: %.2f %%", progressPercentage);
switch (appState.ui.rendererType)
{
case RendererType_PathTracer:
{
const float renderAverageMs = renderer.averageRenderpassDurationMs();
const float progressPercentage = renderer.renderProgressPercentage();
ImGui::Text(
"render pass: %.2f ms (%.1f FPS)",
renderAverageMs,
1000.0f / renderAverageMs);
ImGui::Text("render progress: %.2f %%", progressPercentage);
break;
}
case RendererType_Deferred:
{
const auto perfStats = deferredRenderer.getPerfStats();
ImGui::Text(
"gbuffer pass: %.2f ms (%.1f FPS)",
perfStats.averageGbufferPassDurationsMs,
1000.0f / perfStats.averageGbufferPassDurationsMs);
ImGui::Text(
"lighting pass: %.2f ms (%.1f FPS)",
perfStats.averageLightingPassDurationsMs,
1000.0f / perfStats.averageLightingPassDurationsMs);
break;
}
default:
ImGui::Text("no perf stats available");
}
}

ImGui::Separator();

ImGui::Text("Parameters");
Expand Down Expand Up @@ -455,8 +481,8 @@ try

auto onResize = [&gpuContext, &deferredRenderer, &textureBlitter](
const nlrs::FramebufferSize newSize) -> void {
// TODO: this function is not really needed since I get the current framebuffer size on each
// render anyway.
// TODO: this function is not really needed since I get the current framebuffer size on
// each render anyway.
const auto sz = nlrs::Extent2u(newSize);
deferredRenderer.resize(gpuContext, sz);
textureBlitter.resize(gpuContext, sz);
Expand Down

0 comments on commit e9b4860

Please sign in to comment.