Skip to content

Commit

Permalink
Merge branch 'main' into yutji/msccl-build
Browse files Browse the repository at this point in the history
  • Loading branch information
yukirora authored Jul 26, 2024
2 parents 33e1084 + e304cf1 commit a557fd5
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 150 deletions.
2 changes: 1 addition & 1 deletion superbench/benchmarks/micro_benchmarks/cuda_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ if(NOT DEFINED NVCC_ARCHS_SUPPORTED)
list(APPEND NVCC_ARCHS_SUPPORTED 86)
endif()
if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.8)
list(APPEND NVCC_ARCHS_SUPPORTED 90)
list(APPEND NVCC_ARCHS_SUPPORTED 89 90)
endif()
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def __init__(self, name, parameters=''):
# Skip FP64 for RTX Turing/Ampere and Tesla T4/GA10x due to very limited FP64 TFLOP rate
self.__kernel_map[7.5] = {k: self.__kernel_map[7.0][k] for k in self.__kernel_map[7.0] if 'fp64' not in k}
self.__kernel_map[8.6] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'fp64' not in k}
# Skip FP64 for Ada Lovelace L4/L40 due to no native CUDA/Tensor Cores
self.__kernel_map[8.9] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'fp64' not in k}
# Skip INT4 for Hopper due to no native CUDA/Tensor Cores
self.__kernel_map[9.0] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'int4_tc' not in k}
self.__parse_logline = [
Expand Down
2 changes: 1 addition & 1 deletion third_party/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ sb_micro_path:
# Build cutlass.
cuda_cutlass:
ifeq ($(shell echo $(CUDA_VER)">=11.8" | bc -l), 1)
$(eval ARCHS := "70;75;80;86;90")
$(eval ARCHS := "70;75;80;86;89;90")
else
$(eval ARCHS := "70;75;80;86")
endif
Expand Down
Loading

0 comments on commit a557fd5

Please sign in to comment.