Skip to content

Commit

Permalink
Update gpurt from commit 8f5372fc
Browse files Browse the repository at this point in the history
[Continuations] Add helper function for instance inclusion mask
[Continuations] Do not modify ray flags in system data
[Continuations] Fix CallShader(null)
[Continuations] Fix continuation Debug path
[Continuations] Fix traversal address loading
Add larger embedded data allocation functions
Conditionally enable lgc.cps style continuation stack lowering
Deactivate primitives in the scratch memory that don't have a valid index for its geometry
Handle index and transform offsets in indirect AS build
Remove support for noCopySortedNodes disabled BVH builds
Shader building changes for offline compiler tools
Support ScratchGlobal in BatchedBuilder
Fix type ambiguity compilation error on some systems
  • Loading branch information
qiaojbao committed Mar 21, 2024
1 parent 8d50dc6 commit 2e6d528
Show file tree
Hide file tree
Showing 48 changed files with 3,769 additions and 442 deletions.
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ endif()
# Set the client API, error checking done via static_asserts.
gpurt_add_compile_definitions(GPURT_CLIENT_API_${GPURT_CLIENT_API}=1)

#if GPURT_BUILD_CONTINUATION
option(GPURT_BUILD_CONTINUATION "GpuRt uses continuation traversal" ON)

if (GPURT_BUILD_CONTINUATION)
gpurt_add_compile_definitions(GPURT_BUILD_CONTINUATION=1)
endif()
#endif

# Disable run time type information
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
gpurt_add_compile_options(-fno-rtti)
Expand Down
63 changes: 48 additions & 15 deletions backends/pal/gpurtPalBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ uint32 PalBackend::GetMaxDescriptorTableSize(
return GetCmdBuffer(cmdBuffer)->GetLargeEmbeddedDataLimit() / bufferSrdSizeDw;
}

// =====================================================================================================================
uint32 PalBackend::GetEmbeddedDataLimit(ClientCmdBufferHandle cmdBuffer) const
{
return GetCmdBuffer(cmdBuffer)->GetEmbeddedDataLimit();
}

// =====================================================================================================================
uint32* PalBackend::AllocateEmbeddedData(
ClientCmdBufferHandle cmdBuffer,
Expand All @@ -182,6 +188,46 @@ uint32* PalBackend::AllocateEmbeddedData(
return GetCmdBuffer(cmdBuffer)->CmdAllocateEmbeddedData(sizeInDwords, alignment, pGpuAddress);
}

// =====================================================================================================================
uint32 PalBackend::GetLargeEmbeddedDataLimit(ClientCmdBufferHandle cmdBuffer) const
{
return GetCmdBuffer(cmdBuffer)->GetLargeEmbeddedDataLimit();
}

// =====================================================================================================================
uint32* PalBackend::AllocateLargeEmbeddedData(
ClientCmdBufferHandle cmdBuffer,
uint32 sizeInDwords,
uint32 alignment,
gpusize* pGpuAddress
) const
{
return GetCmdBuffer(cmdBuffer)->CmdAllocateLargeEmbeddedData(sizeInDwords, alignment, pGpuAddress);
}

// =====================================================================================================================
uint32* PalBackend::RequestTemporaryGpuMemory(
ClientCmdBufferHandle cmdBuffer,
uint32 sizeInDwords,
gpusize* pGpuAddress
) const
{
const uint32 embeddedDataLimitInDwords = GetEmbeddedDataLimit(cmdBuffer);
const uint32 largeEmbeddedDataLimitInDwords = GetLargeEmbeddedDataLimit(cmdBuffer);

uint32* pMappedData = nullptr;
if (sizeInDwords <= embeddedDataLimitInDwords)
{
pMappedData = AllocateEmbeddedData(cmdBuffer, sizeInDwords, 1, pGpuAddress);
}
else if (sizeInDwords <= largeEmbeddedDataLimitInDwords)
{
pMappedData = AllocateLargeEmbeddedData(cmdBuffer, sizeInDwords, 1, pGpuAddress);
}

return pMappedData;
}

// =====================================================================================================================
void PalBackend::InsertBarrier(
ClientCmdBufferHandle cmdBuffer,
Expand Down Expand Up @@ -253,19 +299,6 @@ void PalBackend::InsertBarrier(
}
}

// =====================================================================================================================
uint32* PalBackend::AllocateDescriptorTable(
ClientCmdBufferHandle cmdBuffer,
uint32 count,
gpusize* pGpuAddress,
uint32* pSrdSizeOut
) const
{
const uint32 bufferSrdSizeDw = m_deviceProperties.gfxipProperties.srdSizes.bufferView / sizeof(uint32);
*pSrdSizeOut = bufferSrdSizeDw * sizeof(uint32);
return GetCmdBuffer(cmdBuffer)->CmdAllocateLargeEmbeddedData(count * bufferSrdSizeDw, bufferSrdSizeDw, pGpuAddress);
}

// =====================================================================================================================
void PalBackend::CreateBufferViewSrds(
uint32 count,
Expand Down Expand Up @@ -300,7 +333,7 @@ uint32 PalBackend::GetOptimalNumThreadGroups(
const auto* pProps = &m_deviceProperties.gfxipProperties.shaderCore;
const uint32 wavesPerGroup = Util::RoundUpQuotient(threadGroupSize, pProps->nativeWavefrontSize);

return (pProps->numAvailableCus * pProps->numSimdsPerCu) / wavesPerGroup;
return Util::RoundUpQuotient((pProps->numAvailableCus * pProps->numSimdsPerCu), wavesPerGroup);
}

// =====================================================================================================================
Expand All @@ -324,7 +357,7 @@ void PalBackend::UploadCpuMemory(
{
Pal::ICmdBuffer* pCmdBuffer = GetCmdBuffer(cmdBuffer);
const uint32 embeddedDataLimitDwords = pCmdBuffer->GetEmbeddedDataLimit();
const uint32 uploadSizeDwords = Util::Pow2Align(sizeInBytes, 4);
const uint32 uploadSizeDwords = Util::RoundUpQuotient(sizeInBytes, 4u);
PAL_ASSERT(uploadSizeDwords <= embeddedDataLimitDwords);

gpusize srcGpuVa = 0;
Expand Down
32 changes: 23 additions & 9 deletions backends/pal/gpurtPalBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,14 @@ class PalBackend : public IBackend

virtual uint32 GetMaxDescriptorTableSize(ClientCmdBufferHandle cmdBuffer) const override;

virtual uint32* AllocateEmbeddedData(
virtual uint32* RequestTemporaryGpuMemory(
ClientCmdBufferHandle cmdBuffer,
uint32 sizeInDwords,
uint32 alignment,
gpusize* pGpuAddress
) const override;

virtual void InsertBarrier(ClientCmdBufferHandle cmdBuffer, uint32 flags) const override;

virtual uint32* AllocateDescriptorTable(
ClientCmdBufferHandle cmdBuffer,
uint32 count,
gpusize* pGpuAddress,
uint32* pSrdSizeOut
) const override;

virtual void CreateBufferViewSrds(
uint32 count,
const BufferViewInfo& bufferViewInfo,
Expand Down Expand Up @@ -161,6 +153,28 @@ class PalBackend : public IBackend
{ Pal::ChannelSwizzle::X, Pal::ChannelSwizzle::Y, Pal::ChannelSwizzle::Z, Pal::ChannelSwizzle::Zero };

static Pal::BufferViewInfo ConvertBufferViewToPalBufferView(const BufferViewInfo& bufferViewInfo);

// Queries how many DWORDs of embedded data the command buffer can allocate in one call to AllocateEmbeddedData.
uint32 GetEmbeddedDataLimit(ClientCmdBufferHandle cmdBuffer) const;

// Allocates embedded data.
uint32* AllocateEmbeddedData(
ClientCmdBufferHandle cmdBuffer,
uint32 sizeInDwords,
uint32 alignment,
gpusize* pGpuAddress
) const;

// Queries how many DWORDs of embedded data the command buffer can allocate in one call to AllocateLargeEmbeddedData.
uint32 GetLargeEmbeddedDataLimit(ClientCmdBufferHandle cmdBuffer) const;

// Allocates embedded data.
uint32* AllocateLargeEmbeddedData(
ClientCmdBufferHandle cmdBuffer,
uint32 sizeInDwords,
uint32 alignment,
gpusize* pGpuAddress
) const;
};

} // namespace GpuRt
88 changes: 68 additions & 20 deletions cmake/GpuRtGenerateShaders.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if (TARGET llpc_version)
# Propagate include directories and defines from llpc_version into the HLSL code
get_target_property(LLPC_VERSION_INCLUDE_DIRS llpc_version INTERFACE_INCLUDE_DIRECTORIES)
get_target_property(LLPC_VERSION_DEFS llpc_version INTERFACE_COMPILE_DEFINITIONS)
set(gpurtDefines "${gpurtDefines},${LLPC_VERSION_DEFS}")
list(APPEND gpurtDefines "${LLPC_VERSION_DEFS}")
set(gpurtIncludeDirectories "${LLPC_VERSION_INCLUDE_DIRS}")
set(gpurtSharedDependencies llpc_version)
endif()
Expand All @@ -54,16 +54,11 @@ set(gpurtStripWhitelist "${gpurtToolsDir}/strip_whitelist.txt")

# Outputs
set(gpurtOutputDir "${CMAKE_CURRENT_BINARY_DIR}/pipelines")
set(gpurtShaders
"${gpurtOutputDir}/g_internal_shaders.h"
"${gpurtOutputDir}/g_GpuRtLibrary.h"
)
set(gpurtBvhShaders "${gpurtOutputDir}/g_internal_shaders.h")
set(gpurtTraceShadersSpirv "${gpurtOutputDir}/g_GpuRtLibrary_spv.h")

set(gpurtDebugInfoFile "${CMAKE_CURRENT_BINARY_DIR}/g_gpurtDebugInfo.h")

# Make the outputs accessible in the source code.
target_include_directories(gpurt_internal PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

set(originalShaderSourceDir "${GPU_RAY_TRACING_SOURCE_DIR}/src/shaders/")
set(originalShaderSource ${GPURT_SHADER_SOURCE_FILES})
list(TRANSFORM originalShaderSource PREPEND "${originalShaderSourceDir}")
Expand Down Expand Up @@ -97,31 +92,31 @@ set(gpurtSharedDependencies
${gpurtCompileScript}
)

# Create custom command that outputs the generated shaders
# Create custom command that outputs the generated BVH shaders
# The generated shaders depend on all the above mentioned files
if(GPURT_CLIENT_API STREQUAL "VULKAN")
set(SPIRV_FLAG "--spirv")

if (CMAKE_HOST_SYSTEM_NAME MATCHES "Linux")
if (NOT CMAKE_HOST_SYSTEM_NAME MATCHES "Windows")
# Find other executable paths so we can check if they updated in the DEPENDS.
set(gpurtSpirvRemap "")
find_program(gpurtSpirvRemap spirv-remap REQUIRED)
set(gpurtDxcCompiler "")
find_program(gpurtDxcCompiler dxc REQUIRED)

if (EXISTS ${GPU_RAY_TRACING_SOURCE_DIR}/tools/lnx)
set(SPIRV_COMPILER_ARGUMENT "--spirvCompilerPath" "${GPU_RAY_TRACING_SOURCE_DIR}/tools/lnx")
set(SPIRV_REMAP_ARGUMENT "--spirvRemapPath" "${GPU_RAY_TRACING_SOURCE_DIR}/tools/lnx")
set(COMPILER_ARGUMENT "--compilerPath" "${GPU_RAY_TRACING_SOURCE_DIR}/tools/lnx")
set(SPIRV_REMAP_ARGUMENT "--spirvRemapPath" "${GPU_RAY_TRACING_SOURCE_DIR}/tools/lnx")
else()
# Adjust arguments to remove the unnecessary ones. These binaries should be found on the system PATH.
set(SPIRV_COMPILER_ARGUMENT "")
set(COMPILER_ARGUMENT "")
set(SPIRV_REMAP_ARGUMENT "")
endif()
endif()

add_custom_command(
OUTPUT
${gpurtShaders}
${gpurtBvhShaders}

DEPENDS
${gpurtSharedDependencies}
Expand All @@ -133,7 +128,8 @@ if(GPURT_CLIENT_API STREQUAL "VULKAN")
--vulkan
"${SPIRV_FLAG}"
--outputDir "${gpurtOutputDir}"
${SPIRV_COMPILER_ARGUMENT}
--skip-trace
${COMPILER_ARGUMENT}
${SPIRV_REMAP_ARGUMENT}
--defines "\"${gpurtDefines}\""
--includePaths "\"${gpurtIncludeDirectories}\""
Expand All @@ -145,9 +141,61 @@ else()
message(FATAL_ERROR "Unknown graphics API: ${GPURT_CLIENT_API}")
endif()

# Create the custom target
# Have it depend on the above custom commands' output to establish a dependency
add_custom_target(GpuRtGenerateShaders DEPENDS ${gpurtShaders})
# For trace shaders, remove the GPURT_CLIENT_API_* definition.
# The command below adds the appropriate GPURT_CLIENT_API_* definition for the
# trace shaders being built.
list(FILTER gpurtDefines EXCLUDE REGEX "GPURT_CLIENT_API_.*")

# Create custom command that outputs the generated trace shaders as SPIR-V.
# The generated shaders depend on all the above mentioned files.
add_custom_command(
OUTPUT
${gpurtTraceShadersSpirv}

DEPENDS
${gpurtSharedDependencies}
${gpurtStripWhitelist}
${gpurtDxcCompiler}
${gpurtSpirvRemap}

COMMAND Python3::Interpreter "${gpurtCompileScript}"
--vulkan
"${SPIRV_FLAG}"
--outputDir "${gpurtOutputDir}"
--skip-bvh
--spirv
${COMPILER_ARGUMENT}
${SPIRV_REMAP_ARGUMENT}
--defines "\"${gpurtDefines};GPURT_CLIENT_API_VULKAN=1\""
--includePaths "\"${gpurtIncludeDirectories}\""
--whiteListPath "${gpurtStripWhitelist}"
"${gpurtShadersSourceDir}"
--strict
)

# Create the custom targets for generating the header files.
# Have them depend on the above custom commands' output to establish a dependency.
add_custom_target(GpuRtGenerateBvhShaders DEPENDS ${gpurtBvhShaders})
set_target_properties(GpuRtGenerateBvhShaders PROPERTIES EXCLUDE_FROM_ALL TRUE)
add_custom_target(GpuRtGenerateTraceShadersSpirv DEPENDS ${gpurtTraceShadersSpirv})
set_target_properties(GpuRtGenerateTraceShadersSpirv PROPERTIES EXCLUDE_FROM_ALL TRUE)

# Create interface targets for using the generated header files. The interface targets encode
# the include directory, which is inherited by anything using the interface.
add_library(GpuRtBvhShaders INTERFACE)
add_dependencies(GpuRtBvhShaders GpuRtGenerateBvhShaders)
target_include_directories(GpuRtBvhShaders INTERFACE ${CMAKE_CURRENT_BINARY_DIR})

add_library(GpuRtTraceShadersSpirv INTERFACE)
add_dependencies(GpuRtTraceShadersSpirv GpuRtGenerateTraceShadersSpirv)
target_include_directories(GpuRtTraceShadersSpirv INTERFACE ${CMAKE_CURRENT_BINARY_DIR})
target_compile_definitions(GpuRtTraceShadersSpirv INTERFACE HAVE_GPURT_TRACE_SHADERS_SPIRV=1)

# Make gpurt dependent on RT shader generation and include directory.
target_link_libraries(gpurt_internal PRIVATE GpuRtBvhShaders)
if(GPURT_CLIENT_API STREQUAL "VULKAN")
target_link_libraries(gpurt_internal PRIVATE GpuRtTraceShadersSpirv)
else()
message(FATAL_ERROR "Unknown graphics API: ${GPURT_CLIENT_API}")
endif()

# Make gpurt dependent on RT shader generation
add_dependencies(gpurt_internal GpuRtGenerateShaders)
16 changes: 14 additions & 2 deletions gpurt/gpurt.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,6 @@ enum class InternalRayTracingCsType : uint32
Rebraid,
GenerateMortonCodes,
BuildBVH,
BuildBVHSortLeaves,
BuildBVHTD,
BuildBVHTDTR,
BuildBVHPLOC,
Expand Down Expand Up @@ -744,7 +743,6 @@ struct DeviceSettings
uint32 topDownBuild : 1; // Top down build in TLAS
uint32 allowFp16BoxNodesInUpdatableBvh : 1; // Allow box node in updatable bvh.
uint32 fp16BoxNodesRequireCompaction : 1; // Compaction is set or not.
uint32 noCopySortedNodes : 1; // Disable CopyUnsortedScratchLeafNode()
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 43
uint32 enableSAHCost : 1; // Use more accurate SAH cost
#endif
Expand Down Expand Up @@ -898,6 +896,13 @@ struct EntryFunctionTable
const char* pFetchTrianglePositionFromNodePointer;
const char* pFetchTrianglePositionFromRayQuery;
} intrinsic;
#if GPURT_BUILD_CONTINUATION
struct
{
const char* pContTraceRay;
const char* pContTraversal;
} cps;
#endif
};

// Input flags to enable/disable GPURT shader library features
Expand Down Expand Up @@ -1324,6 +1329,12 @@ typedef void (*FnClientFreeGpuMem)(
const DeviceInitInfo& initInfo,
ClientGpuMemHandle gpuMem);

typedef Pal::Result(*FnClientGetTemporaryGpuMemory)(
ClientCmdBufferHandle cmdBuf, // Opaque handle to command buffer that will handle the allocation
uint64 sizeInBytes, // Buffer size in bytes
gpusize* pDestGpuVa, // (out) Buffer GPU VA
void** ppMappedData); // (out) Map data

// =====================================================================================================================
// Client callback function pointers
struct ClientCallbacks
Expand All @@ -1343,6 +1354,7 @@ struct ClientCallbacks
FnClientFlushCmdContext pfnFlushCmdContext;
FnClientAllocateGpuMemory pfnAllocateGpuMemory;
FnClientFreeGpuMem pfnFreeGpuMem;
FnClientGetTemporaryGpuMemory pfnClientGetTemporaryGpuMemory;
};

class IDevice;
Expand Down
13 changes: 3 additions & 10 deletions gpurt/gpurtBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,20 +141,13 @@ class IBackend
// Calculates the maximum number of geometries that will fit in an SRD table.
virtual uint32 GetMaxDescriptorTableSize(ClientCmdBufferHandle cmdBuffer) const = 0;

// Allocates embedded data.
virtual uint32* AllocateEmbeddedData(
// Requests temporary mapped GPU memory.
// May return a nullptr if the request is too large to be allocated.
virtual uint32* RequestTemporaryGpuMemory(
ClientCmdBufferHandle cmdBuffer,
uint32 sizeInDwords,
uint32 alignment,
gpusize* pGpuAddress) const = 0;

// Allocates embedded data for a descriptor table using hardware-specific SRD sizes.
virtual uint32* AllocateDescriptorTable(
ClientCmdBufferHandle cmdBuffer,
uint32 count,
gpusize* pGpuAddress,
uint32* pSrdSizeOut) const = 0;

// Performs a generic barrier that's used to synchronize internal ray tracing shaders
virtual void InsertBarrier(ClientCmdBufferHandle cmdBuffer, uint32 flags) const = 0;

Expand Down
Loading

0 comments on commit 2e6d528

Please sign in to comment.