Skip to content

Commit

Permalink
Merge pull request #179 from jrbyrnes/NewAMDLLVMENv
Browse files Browse the repository at this point in the history
Integration with ROCm 4.5.2
  • Loading branch information
jrbyrnes authored May 27, 2022
2 parents 5df3319 + 009097b commit 7c6ce6b
Show file tree
Hide file tree
Showing 47 changed files with 1,374 additions and 334 deletions.
21 changes: 7 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,28 @@ cmake_minimum_required(VERSION 3.4.3)

project(OptSched)

option(OPTSCHED_INCLUDE_TESTS "Generate build targets for the OptSched unit tests." ON)
option(OPTSCHED_ENABLE_AMDGPU "Build the AMDGPU code. Requires that the AMDGPU target is supported." OFF)
option(OPTSCHED_INCLUDE_TESTS "Generate build targets for the OptSched unit tests." OFF)
option(OPTSCHED_ENABLE_AMDGPU "Build the AMDGPU code. Requires that the AMDGPU target is supported." ON)

set(OPTSCHED_LIT_ARGS "-sv" CACHE STRING "Arguments to pass to lit")
set(OPTSCHED_EXTRA_LINK_LIBRARIES "" CACHE STRING "Extra link_libraries to pass to OptSched, ;-separated")
set(OPTSCHED_EXTRA_INCLUDE_DIRS "" CACHE STRING "Extra include_directories to pass to OptSched, ;-separated")
# To add OptSched debug defines, e.g.:
# '-DOPTSCHED_EXTRA_DEFINITIONS=-DIS_DEBUG_DEFS_AND_USES;-DIS_DEBUG_DEF_USE_COUNT'
set(OPTSCHED_EXTRA_DEFINITIONS "" CACHE STRING "Extra add_definitions to pass to OptSched, ;-separated")

if(TARGET LLVMCodeGen)
if(TARGET LLVMAMDGPUCodeGen OR TARGET LLVMCodeGen OR TARGET LLVMX86CodeGen)
set(llvm_subproject TRUE)
else()
set(llvm_subproject FALSE)
endif()

# Not supported
if(NOT llvm_subproject)
set(llvm_version 6.0)
if(OPTSCHED_ENABLE_AMDGPU)
set(llvm_version 9.0)
endif()

set(OPTSCHED_LLVM_VERSION ${llvm_version} CACHE STRING "The LLVM version to build OptSched with (independent build only)")

find_package(LLVM ${OPTSCHED_LLVM_VERSION} REQUIRED CONFIG)
Expand All @@ -36,8 +37,8 @@ endif()
if(OPTSCHED_ENABLE_AMDGPU)
if(NOT "AMDGPU" IN_LIST LLVM_ALL_TARGETS)
message(FATAL_ERROR "Trying to build the AMDGPU code, but AMDGPU is not supported by this build of LLVM")
elseif(LLVM_VERSION VERSION_LESS 7.0)
message(FATAL_ERROR "OptSched requries LLVM version >= 7.0 to build the AMDGPU scheduler.")
elseif(LLVM_VERSION VERSION_LESS 13.0)
message(FATAL_ERROR "OptSched requries LLVM version >= 13.0 to build the AMDGPU scheduler.")
endif()
endif()

Expand All @@ -62,10 +63,6 @@ include_directories(
add_definitions(${OPTSCHED_EXTRA_DEFINITIONS})
link_directories(${OPTSCHED_EXTRA_LINK_LIBRARIES})

if(LLVM_VERSION VERSION_LESS 7.0)
add_definitions(-DLLVM_DEBUG=DEBUG)
endif()

if(NOT llvm_subproject)
include(GetLocalLLVM)

Expand Down Expand Up @@ -98,9 +95,5 @@ if(OPTSCHED_INCLUDE_TESTS)
COMMAND
${LLVM_TOOLS_BINARY_DIR}/clang ${CMAKE_CURRENT_SOURCE_DIR}/example/helloworld.cpp
-O3
-fplugin=$<TARGET_FILE:OptSched>
-mllvm -misched=optsched
-mllvm -enable-misched
-mllvm -optsched-cfg=${CMAKE_CURRENT_SOURCE_DIR}/example/optsched-cfg
)
endif()
13 changes: 9 additions & 4 deletions cmake/superbuild/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@
# - LLVM_PARALLEL_LINK_JOBS.
# - *_EXTRA_CMAKE_ARGS: Passes these CMake arguments on to the corresponding sub-build.
# - The flang builds can be configured to use a custom CMAKE_GENERATOR, separate from the superbuild's generator.


############################################
#
# As of 5/26/2022, the superbuild script is
# no longer gauranteed to work.
#
############################################

cmake_minimum_required(VERSION 3.7)

project(OptSched-SuperBuild)
Expand Down Expand Up @@ -99,8 +108,4 @@ add_test(NAME OptSched-CompileHelloWorld
COMMAND
${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX}/bin/clang ${ROOT_DIR}/example/helloworld.cpp
-O3
-fplugin=${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX}/lib/OptSched.so
-mllvm -misched=optsched
-mllvm -enable-misched
-mllvm -optsched-cfg=${ROOT_DIR}/example/optsched-cfg
)
126 changes: 126 additions & 0 deletions example/optsched-cfg/hotfuncs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,129 @@ module_big_step_utilities_em_calc_cq_ YES
mgau_eval YES
vector_gautbl_eval_logs3 YES
subvq_mgau_shortlist YES

# ======================================
# SPEC CPU2017 (fp rate only)
# ======================================

#503.bwaves_r Total 97.51% (95.88% selected)
mat_times_vec_ YES #68.22%
bi_cgstab_block_ YES #12.60%
shell_ YES #10.81%
jacobian_ YES # 4.25%
#flux_ YES # 1.63%

#507.cactuBSSN_r Total 90.09%
_ZL16ML_BSSN_RHS_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES #35.83%
_ZL19ML_BSSN_Advect_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES #30.82%
_ZL24ML_BSSN_constraints_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES # 8.90%
_ZL41ML_BSSN_convertToADMBaseDtLapseShift_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES # 8.45%
MoL_LinearCombination YES # 3.27%
_ZL29ML_BSSN_convertToADMBase_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES # 2.82%

#508.namd_r Total 99.34%
_Z22pairlist_from_pairlistddddPK8CompAtomPKtiPtdPd YES #18.81%
_ZN20ComputeNonbondedUtil26calc_pair_energy_fullelectEP9nonbonded YES #13.12%
_ZN20ComputeNonbondedUtil19calc_pair_fullelectEP9nonbonded YES # 9.52%
_ZN20ComputeNonbondedUtil16calc_pair_energyEP9nonbonded YES # 9.35%
_ZN20ComputeNonbondedUtil32calc_pair_energy_merge_fullelectEP9nonbonded YES # 9.11%
_ZN20ComputeNonbondedUtil25calc_pair_merge_fullelectEP9nonbonded YES # 7.00%
_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded YES # 6.98%
_ZN20ComputeNonbondedUtil26calc_self_energy_fullelectEP9nonbonded YES # 5.78%
_ZN20ComputeNonbondedUtil32calc_self_energy_merge_fullelectEP9nonbonded YES # 4.80%
_ZN20ComputeNonbondedUtil16calc_self_energyEP9nonbonded YES # 4.73%
_ZN20ComputeNonbondedUtil19calc_self_fullelectEP9nonbonded YES # 4.11%
_ZN20ComputeNonbondedUtil9calc_selfEP9nonbonded YES # 3.02%
_ZN20ComputeNonbondedUtil25calc_self_merge_fullelectEP9nonbonded YES # 3.01%

#510.parest_r Total 85.12% (83.38% selected)
_ZNK6dealii9SparseILUIdE5vmultIdEEvRNS_6VectorIT_EERKS5_ YES #29.73%
_ZNK6dealii12SparseMatrixIdE5vmultINS_6VectorIdEES4_EEvRT_RKT0_ YES #25.33%
_ZNK6dealii6VectorIdEmlIdEEdRKNS0_IT_EE YES #13.83%
_ZNK6dealii12SparseMatrixIdE17precondition_SSORIdEEvRNS_6VectorIT_EERKS5_dRKSt6vectorIjSaIjEE YES # 5.94%
_ZN6dealii11SolverGMRESINS_6VectorIdEEE5solveINS_12SparseMatrixIdEENS_9SparseILUIdEEEEvRKT_RS2_RKS2_RKT0_ YES # 3.79%
_ZN6dealii8FESystemILi3ELi3EE10initializeEv YES # 2.66%
_ZN12METomography5Slave5SlaveILi3EE12GlobalMatrix15assemble_matrixERKN6dealii18TriaActiveIteratorINS4_15DoFCellAccessorINS4_10DoFHandlerILi3ELi3EEEEEEERNS0_8internal13AssemblerDataILi3EEE YES # 2.10%
#_ZNK6dealii15SparsityPatternclEjj YES # 1.74%

#511.povray_r Total 82.24% (78.66% selected)
_ZN3povL23All_Plane_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES #16.55%
_ZN3povL31All_CSG_Intersect_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES #10.95%
_ZN3povL24All_Sphere_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES #10.72%
_ZN3pov17Check_And_EnqueueEPNS_21Priority_Queue_StructEPNS_16BBox_Tree_StructEPNS_19Bounding_Box_StructEPNS_14Rayinfo_StructE YES # 8.58%
_ZN3povL12Inside_PlaneEPdPNS_13Object_StructE YES # 4.83%
_ZN3pov12Ray_In_BoundEPNS_10Ray_StructEPNS_13Object_StructE YES # 4.55%
_ZN3pov19Intersect_BBox_TreeEPNS_16BBox_Tree_StructEPNS_10Ray_StructEPNS_10istk_entryEPPNS_13Object_StructEb YES # 4.09%
_ZN3pov6DNoiseEPdS0_ YES # 4.07%
_ZN3povL25All_Quadric_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES # 3.78%
_ZN3povL14Inside_QuadricEPdPNS_13Object_StructE YES # 2.93%
_ZN3pov13Inside_ObjectEPdPNS_13Object_StructE YES # 2.90%
_ZN3pov20Intersect_Light_TreeEPNS_10Ray_StructEPNS_24Project_Tree_Node_StructEiiPNS_10istk_entryEPPNS_13Object_StructEPNS_19Light_Source_StructE YES # 2.46%
_ZN3pov5NoiseEPdPNS_14Pattern_StructE YES # 2.25%
#_ZN3pov18MInvTransDirectionEPdS0_PNS_16Transform_StructE YES # 1.83%
#_ZN3pov12IntersectionEPNS_10istk_entryEPNS_13Object_StructEPNS_10Ray_StructE YES # 1.75%

#519.lbm_r
LBM_performStreamCollideTRT YES #99.04%

#526.blender_r Total 91.73% (84.1% selected)
_ZL9intersectILi1024EEiP8VBVHTreeP5Isect YES #61.79%
RE_rayobject_intersect YES #14.34%
add_radiance YES # 3.95%
ray_ao YES # 2.50%
#zbuffer_sss YES # 1.72%
#traverse_octree YES # 1.72%
#zbuffer_solid YES # 1.57%
#zbuf_part_project YES # 1.52%
#ray_shadow YES # 1.42%
#RE_rayobject_raycast YES # 1.20%

#527.cam4_r Total 25.90% (excluding libraries) 47.35% (including libraries)
#__fsd_pow_fma3 YES # 8.05% #From libpgmath.so(runtime shared library)
aer_rad_props_aer_rad_props_sw_ YES # 5.66%
#__fsd_exp_fma3 YES # 5.38% #From libpgmath.so(runtime shared library)
radsw_radcswmx_ YES # 5.14%
radae_radabs_ YES # 3.42%
zm_conv_ientropy_ YES # 3.28%
#__fd_log_1_avx512 YES # 3.25% #From libpgmath.so(runtime shared library)
radsw_raddedmx_ YES # 3.16%
tracer_data_vert_interp_ YES # 3.10%
#__memset_avx2_unaligned_erms YES # 2.64% #From standard library
radae_trcab_ YES # 2.14%
#__memcmp_avx2_movbe YES # 2.13% #From standard library

#538.imagick_r Total 96.98%
MorphologyApply YES #45.04%
MeanShiftImage YES #21.48%
SetPixelCacheNexusPixels YES #16.88%
GetVirtualPixelsFromNexus YES # 9.65%
GetOneCacheViewVirtualPixel YES # 3.93%

#544.nab_r Total 89.04% (excluding libraries) 98.63 (including libraries)
mme34 YES #66.21%
nbond YES # 8.45%
searchkdtree YES # 7.24%
heapsort_pairs YES # 7.14%
#__ieee754_log_fma YES # 4.21%
#__ieee754_exp_fma YES # 3.05%
#exp@@GLIBC_2.29 YES # 2.33%

#549.fotonik3d_r Total 99.18%
upml_mod_upml_updatee_simple_ YES #26.47%
upml_mod_upml_updateh_ YES #24.04%
material_mod_mat_updatee_ YES #21.26%
update_mod_updateh_ YES #17.24%
power_mod_power_dft_ YES #10.17%

#554.roms_r.txt Total 80.63% (excluding library) 84.62%(including library)
step2d_mod_step2d_tile_ YES #27.67%
pre_step3d_mod_pre_step3d_tile_ YES #10.74%
lmd_skpp_mod_lmd_skpp_tile_ YES # 7.19%
step3d_t_mod_step3d_t_tile_ YES # 6.54%
rhs3d_mod_rhs3d_tile_ YES # 6.14%
t3dmix_mod_t3dmix2_tile_ YES # 6.05%
step3d_uv_mod_step3d_uv_tile_ YES # 5.93%
#__fsd_exp_fma3 YES # 3.99% #From libpgmath.so(runtime shared library)
rho_eos_mod_rho_eos_tile_ YES # 3.73%
prsgrd_mod_prsgrd_tile_ YES # 3.62%
uv3dmix_mod_uv3dmix2_tile_ YES # 3.02%
Loading

0 comments on commit 7c6ce6b

Please sign in to comment.