diff --git a/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_gemm.cu b/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_gemm.cu index 659c2df33..002b06447 100644 --- a/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_gemm.cu +++ b/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_gemm.cu @@ -86,7 +86,7 @@ template cudaDataType_t get_datatype() { if (std::is_same::value) return CUDA_R_8F_E5M2; if (std::is_same::value) - return CUDA_R_8I; + return CUDA_R_8I; throw std::invalid_argument("Unknown type"); } @@ -166,7 +166,7 @@ int main(int argc, char **argv) { else if (args.in_type == "fp8e5m2") run(&args); else if (args.in_type == "int8") - run(&args); + run(&args); else throw std::invalid_argument("Unknown type " + args.in_type); diff --git a/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_utils.cc b/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_utils.cc index d047fcb01..6ec5a101e 100644 --- a/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_utils.cc +++ b/superbench/benchmarks/micro_benchmarks/cublaslt_gemm/cublaslt_utils.cc @@ -63,7 +63,7 @@ void cublasLtGemm::Setup(int m, int n, int k, int batch, int lda, int ldb, int l if (a_type == CUDA_R_64F || b_type == CUDA_R_64F) gemm_compute_type = CUBLAS_COMPUTE_64F; if (a_type == CUDA_R_8I) - gemm_compute_type = CUBLAS_COMPUTE_32I; + gemm_compute_type = CUBLAS_COMPUTE_32I; cublasLtMatmulDesc_t op_desc = nullptr; CUBLAS_CHECK(cublasLtMatmulDescCreate(&op_desc, gemm_compute_type, CUDA_R_32F)); diff --git a/tests/benchmarks/micro_benchmarks/test_cublaslt_function.py b/tests/benchmarks/micro_benchmarks/test_cublaslt_function.py index 1b1f740fa..a6fae8f0e 100644 --- a/tests/benchmarks/micro_benchmarks/test_cublaslt_function.py +++ b/tests/benchmarks/micro_benchmarks/test_cublaslt_function.py @@ -66,7 +66,7 @@ def test_cublaslt_gemm_command_generation(self): parameters='--batch 2:16:2 --shapes 2:4,4:8,8:32 32:128:4,128,128 --in_types fp16 fp32 fp64 int8', ) self.assertTrue(benchmark._preprocess()) - self.assertEqual(4 * (2 * 2 * 3 + 2) * 3, len(benchmark._commands)) + self.assertEqual(4 * (2 * 2 * 3 + 2) * len(benchmark._args.in_types), len(benchmark._commands)) def cmd(t, b, m, n, k): return f'{benchmark._CublasLtBenchmark__bin_path} -m {m} -n {n} -k {k} -b {b} -w 20 -i 50 -t {t}'