Skip to content

Commit

Permalink
Update pal from commit 53117dfb
Browse files Browse the repository at this point in the history
Implement GPU work submission for Trace Session 'Preparation' state
Add Absolute and Relative Capture Modes to Trace Controllers
Fix task shader user data missing in Execute Indirect Shader
Fix issue with gang-submit and compute scratch ring
Validate AceRingSet only for ACE
Add supportInt4 to gfxipProperties
Implement cancel trace for UberTrace-based tools
Bind Instrumentation changes for RGP
Handle CancelTrace for TDR and RenderOp controllers
Update address-lib submodule
Fix "Unknown()" events appearing in RGP captures
Clean up virtual functions in RsrcProcMgr
[CodingStandards] Add allowance for const function value-type parameters
HSA ABI permit HiddenHostcallBuffer
MultiElfCacheLayer fixes
Set DrawIndex to Zero for Execute Indirect
Add support for Wayland explicit sync during presents
Parse dimension info from Mesa metadata
Clean up image clone copy path codes
Gpu Profiler SPM Draw Markers: Add updates of the RLCUSER0 and RLCUSER1 "counters" to indicate the TimedCallId (Draw, Dispatch, Copy, Barrier, etc) and CommandBufferID, respectively
Allow acquiring an unique id for StringTableTraceSource
Update SwWarDetection submodule
Optimize the HwlImageToImageMissingPixelCopy() path in CmdCopyImage()
Zero init embedded data in Execute Indirect
Drop using PWS late acquire point PrePs
Re-enable zwp dmabuf protocol
Remove non-primary GartCacheable alert
Evaluate and override GpuProfilerMode in PlatformSettings validation to ensure downstream choices for GpuProfilerMode are consisten
Archive pipeline loader: Avoid indiscriminately propagating reg/stack usage
Add IDevice::GetDefaultSamplePattern
GpuProfiler - Add flush to WaitForFences
Update DevDriver submodule
Pre-increment instead of post-increment for String table Id
Recompile rpm
Remove a pre-RDNA check
[codegen] Generate formats at build time.
Enable the support for int4 type on gfx11
[AddrMgr2] Fix failure with InitSubresourcesForImage on user specified dma_buf_modifier
Fix failure in VK.dgc.ext.compute* compute queue cases when running in batch
Fix an underflow in BoxesCoverWholeExtent
Fix gpuDebugQueue with ReBAR/SAM enabled
Fix YUV regression with format generation
Disable DevDriverOverlay in preparation state
munmap format table in zwp_linux_dmabuf_feedback_v1::done
Fix build error with the latest clang: missing header for sscanf calls
  • Loading branch information
qiaojbao committed Dec 4, 2024
1 parent 0a622c9 commit b6da370
Show file tree
Hide file tree
Showing 270 changed files with 21,286 additions and 38,428 deletions.
66 changes: 66 additions & 0 deletions cmake/PalCodegen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ function(pal_setup_generated_code)
INCLUDE_HEADERS core/hw/gfxip/gfxDevice.h)
endif()

pal_gen_formats()
endfunction()

function(nongen_source_groups DIR)
Expand Down Expand Up @@ -246,3 +247,68 @@ function(nongen_source_groups DIR)
endforeach()
endfunction()

function(pal_gen_formats)
set(FORMAT_GEN_DIR ${PAL_GEN_DIR}/formats)
set(FORMAT_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR})

set(NEEDS_CONFIG_GEN_STEP FALSE)

set(FORMAT_INDEPENDENT_HDR "${FORMAT_OUT_DIR}/src/core/g_mergedFormatInfo.h")

if (NOT EXISTS ${FORMAT_INDEPENDENT_HDR})
set(NEEDS_CONFIG_GEN_STEP TRUE)
endif()

set(FORMAT_GFX9_HDR "${FORMAT_OUT_DIR}/src/core/hw/gfxip/gfx9/g_gfx9MergedDataFormats.h")
if (NOT EXISTS ${FORMAT_GFX9_HDR})
set(NEEDS_CONFIG_GEN_STEP TRUE)
endif()

if (NEEDS_CONFIG_GEN_STEP)
# Generate these during configuration so that they are guaranteed to exist.
execute_process(
COMMAND ${Python3_EXECUTABLE} ${FORMAT_GEN_DIR}/main.py
${FORMAT_OUT_DIR}
COMMAND_ECHO STDOUT
WORKING_DIRECTORY ${FORMAT_GEN_DIR}
)
endif()

add_custom_command(
OUTPUT ${FORMAT_INDEPENDENT_HDR}
${FORMAT_GFX9_HDR}
COMMAND ${Python3_EXECUTABLE} ${FORMAT_GEN_DIR}/main.py
${FORMAT_OUT_DIR}
COMMENT "Generating formats from ${FORMAT_GEN_DIR}/..."
DEPENDS ${FORMAT_GEN_DIR}/main.py
${FORMAT_GEN_DIR}/data/pal.yaml
${FORMAT_GEN_DIR}/data/gfx10.yaml
${FORMAT_GEN_DIR}/data/gfx10_3.yaml
${FORMAT_GEN_DIR}/data/gfx11.yaml
${FORMAT_GEN_DIR}/shared/structs.py
${FORMAT_GEN_DIR}/shared/template_hwl.h.j2
${FORMAT_GEN_DIR}/shared/template_independent.h.j2
${FORMAT_GEN_DIR}/shared/utils.py
WORKING_DIRECTORY ${FORMAT_GEN_DIR}
)

add_custom_target(pal_generate_formats
DEPENDS ${FORMAT_INDEPENDENT_HDR}
${FORMAT_GFX9_HDR}
SOURCES ${FORMAT_INDEPENDENT_HDR}
${FORMAT_GFX9_HDR}
)
target_include_directories(pal PRIVATE ${FORMAT_OUT_DIR}/src)
add_dependencies(pal pal_generate_formats)
set_target_properties(pal_generate_formats
PROPERTIES
FOLDER "${CMAKE_FOLDER}/Generate/Formats"
)

source_group(
TREE ${PAL_BINARY_DIR}
FILES
${FORMAT_INDEPENDENT_HDR}
${FORMAT_GFX9_HDR}
)
endfunction()
9 changes: 8 additions & 1 deletion cmake/PalCompilerOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,15 @@ function(pal_compiler_options TARGET)

set(isGNU FALSE)
set(isClang FALSE)
set(isMSVC FALSE)

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC"
OR "${CMAKE_CXX_COMPILER_FRONTEND_VARIANT}" STREQUAL "MSVC")
# Either Microsoft's cl or LLVM's clang-cl. Note this check
# is done before the Clang check below because clang-cl uses
# "Clang" for COMPILER_ID too.
set(isMSVC TRUE)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(isGNU TRUE)
# Output with color if in terminal: https://github.com/ninja-build/ninja/wiki/FAQ
target_compile_options(${TARGET} PRIVATE -fdiagnostics-color=always)
Expand Down
4 changes: 4 additions & 0 deletions doc/process/palCodingStandards.md
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,10 @@ Const Usage

- Use the const specifier ***whenever possible***.

- Use of const for value-type function parameters ***should*** be used when
the function author wants the value to be guaranteed constant for the
life of the function.

- Floating point constants ***must*** be suffixed with an "f" to
prevent an implicit conversion from double to float. Constants of
other types ***should*** limit conversions using the "ul", "ull",
Expand Down
37 changes: 33 additions & 4 deletions inc/core/palCmdBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,17 @@ struct GpuVirtAddrAndStride
};
};

/// Flags to describe a dispatch
union DispatchInfoFlags
{
struct
{
uint32 devDriverOverlay : 1; ///< Flag indicates this dispatch draws the DevDriver overlay
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};

/// Specifies the different stages at which a combiner can choose between different shading rates.
enum class VrsCombinerStage : uint32
{
Expand Down Expand Up @@ -1663,8 +1674,9 @@ typedef void (PAL_STDCALL *CmdDrawIndexedIndirectMultiFunc)(
///
/// @see ICmdBuffer::CmdDispatch().
typedef void (PAL_STDCALL *CmdDispatchFunc)(
ICmdBuffer* pCmdBuffer,
DispatchDims size);
ICmdBuffer* pCmdBuffer,
DispatchDims size,
DispatchInfoFlags infoFlags);

/// @internal Function pointer type definition for issuing indirect dispatches.
///
Expand Down Expand Up @@ -3137,17 +3149,34 @@ class ICmdBuffer : public IDestroyable
}
#endif

#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 909
/// Dispatches a compute workload of the given dimensions using the command buffer's currently bound compute state.
///
/// The thread group size is defined in the compute shader.
///
/// Supports PAL ABI and HSA ABI pipelines.
///
/// @param [in] size Thread groups to dispatch. If any components are zero the dispatch will be discarded.
void CmdDispatch(
inline void CmdDispatch(
DispatchDims size)
{
m_funcTable.pfnCmdDispatch(this, size);
m_funcTable.pfnCmdDispatch(this, size, {});
}
#endif

/// Dispatches a compute workload of the given dimensions using the command buffer's currently bound compute state.
///
/// The thread group size is defined in the compute shader.
///
/// Supports PAL ABI and HSA ABI pipelines.
///
/// @param [in] size Thread groups to dispatch. If any components are zero the dispatch will be discarded.
/// @param [in] infoFlags Additional information about the dispatch.
void CmdDispatch(
DispatchDims size,
DispatchInfoFlags infoFlags)
{
m_funcTable.pfnCmdDispatch(this, size, infoFlags);
}

/// Dispatches a compute workload using the command buffer's currently bound compute state. The dimensions of the
Expand Down
4 changes: 3 additions & 1 deletion inc/core/palDeveloperHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ enum class CallbackType : uint32
BindGpuMemory, ///< This callback is to inform of a new binding to GPU memory.
SubAllocGpuMemory, ///< This callback is to inform of suballocation from base GPU memory allocation.
SubFreeGpuMemory, ///< This callback is to inform that GPU memory suballocation has been freed.

#if PAL_DEVELOPER_BUILD
RpmBlt, ///< This callback is to describe the internal RPM blt calls.
#endif
Expand Down Expand Up @@ -497,6 +496,9 @@ struct DrawDispatchDispatchArgs
DispatchDims groupStart; ///< Thread/workgroup start offsets in X/Y/Z dimensions. Only valid for CmdDispatchOffset.
DispatchDims groupDims; ///< Thread/workgroup counts in X/Y/Z dimensions. Only valid for CmdDispatch[Offset].
DispatchDims logicalSize; ///< Thread/workgroup counts as seen by the shader. Only valid for CmdDispatchOffset.
/// Optional flags to help the client driver understand the dispatch.
/// For example, if the dispatch originated in PAL rather than the client driver.
DispatchInfoFlags infoFlags;
};

/// Information for DrawDispatch callbacks
Expand Down
28 changes: 22 additions & 6 deletions inc/core/palDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,10 +452,11 @@ union RsFeatureInfo
/// Global Boost settings.
struct
{
bool enabled; ///< Specifies whether Boost is enabled globally.
uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Boost.
uint32 hotkeyInd;///< If nonzero, specifies the virtual key code assigned to Boost's indicator.
uint32 minRes; ///< Specifies the global Boost minimum resolution.
bool enabled; ///< Specifies whether Boost is enabled globally.
uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Boost.
uint32 hotkeyInd; ///< If nonzero, specifies the virtual key code assigned to Boost's indicator.
uint32 minRes; ///< Specifies the global Boost minimum resolution.
bool adaptiveVrsEnabled; ///< Specifies whether BoostAdaptiveVrs is enabled globally.
} boost;

/// Global ProVsr settings.
Expand Down Expand Up @@ -1432,7 +1433,8 @@ struct DeviceProperties
#endif
uint64 supportBFloat16 : 1; ///< HW supports bf16 instructions.
uint64 supportFloat8 : 1; ///< HW supports float 8-bit instructions.
uint64 reserved : 63; ///< Reserved for future use.
uint64 supportInt4 : 1; ///< HW supports integer 4-bit instructions.
uint64 reserved : 62; ///< Reserved for future use.
};
uint64 u64All[2]; ///< Flags packed as 32-bit uint.
} flags; ///< Device IP property flags.
Expand Down Expand Up @@ -2993,7 +2995,7 @@ class IDevice
/// modifying settings, the client must call CommitSettingsAndInit() before creating finalizing the device.
///
/// @warning The returned value points to an internal PAL structure. Modifying data using this pointer after
/// calling FinalizeSettings() will result in undefined behavior.
/// calling CommitSettingsAndInit() will result in undefined behavior.
///
/// @returns Pointer to this devices public settings for examination and/or modification by the client.
virtual PalPublicSettings* GetPublicSettings() = 0;
Expand Down Expand Up @@ -3144,6 +3146,20 @@ class IDevice
virtual Result GetPerfExperimentProperties(
PerfExperimentProperties* pProperties) const = 0;

/// Fills out the default MSAA quad sample pattern for the given sample count.
///
/// @param [in] samples The number of valid samples in the sample pattern. Must be a power of two.
/// @param [out] pQuadSamplePattern Fill this with the default pattern.
///
/// @returns Success if @ref pQuadSamplePattern was filled with the default sample pattern.
/// Otherwise, one of the following errors may be returned:
/// + ErrorInvalidPointer if @ref pQuadSamplePattern is null.
/// + ErrorInvalidValue if @ref samples is not a supported power of two.
/// + ErrorUnavailable if this device lacks GfxIp support.
virtual Result GetDefaultSamplePattern(
uint32 samples,
MsaaQuadSamplePattern* pQuadSamplePattern) const = 0;

/// Adds a list of per-device memory object references that persist across command buffer submissions. It is the
/// responsibility of the client to make sure that all required memory references have been added before submitting
/// the command buffer that uses on them. References can be added at the device, queue or specified at submit time.
Expand Down
2 changes: 1 addition & 1 deletion inc/core/palLib.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
#endif
///
/// @ingroup LibInit
#define PAL_INTERFACE_MAJOR_VERSION 907
#define PAL_INTERFACE_MAJOR_VERSION 909

#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 831
/// Minor interface version. Note that the interface version is distinct from the PAL version itself, which is returned
Expand Down
Loading

0 comments on commit b6da370

Please sign in to comment.