diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 122763b8e..af1a65ac4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -106,6 +106,11 @@ snitch-cluster-vsim: # Run additional, more extensive tests - cd sw/apps/blas/gemm/test - ./run.py runs.yaml --cfg $PWD/cfg/* --simulator vsim -j + - cd ../../../dnn/flashattention_2/test + # FP8 FA-2 tests are failing with precision mismatch + # due to operand ordering + - ./run.py runs.yaml --cfg $PWD/cfg/fp32* --simulator vsim -j + - ./run.py runs.yaml --cfg $PWD/cfg/fp16* --simulator vsim -j # Banshee snitch-cluster-banshee: diff --git a/sw/blas/gemm/src/gemm_fp16.h b/sw/blas/gemm/src/gemm_fp16.h index 4b1901cd8..26caa4e04 100644 --- a/sw/blas/gemm/src/gemm_fp16.h +++ b/sw/blas/gemm/src/gemm_fp16.h @@ -19,7 +19,7 @@ void gemm_fp16_naive(uint32_t M, uint32_t N, uint32_t K, void* A_p, for (uint32_t m = 0; m < M; m++) { for (uint32_t n = 0; n < N; n++) { __fp16 c; - if (beta != 0) { + if (beta != 0) { c = C[m * ldC + n] * beta; } else { c = 0.0; diff --git a/sw/blas/gemm/src/gemm_fp8.h b/sw/blas/gemm/src/gemm_fp8.h index d9ffe867e..2ec5e0510 100644 --- a/sw/blas/gemm/src/gemm_fp8.h +++ b/sw/blas/gemm/src/gemm_fp8.h @@ -18,21 +18,19 @@ void gemm_fp8_naive(uint32_t M, uint32_t N, uint32_t K, void* A_p, uint32_t ldA, for (uint32_t m = 0; m < M; m++) { for (uint32_t n = 0; n < N; n++) { char c; - if (BETA != 0){ + if (BETA != 0) { c = C[m * ldC + n]; // FIXME: get the correct beta value - asm volatile ( + asm volatile( // "fmv.b.x ft0, %[beta]\n" "fcvt.b.s ft0, %[beta]\n" "fmv.b.x ft1, %[c]\n" "fmul.b ft2, ft0, ft1\n" "fmv.x.b %[c], ft2\n" - : [c] "+r"(c) - : [beta] "f"(1.0f) - : "ft0", "ft1", "ft2" - ); - } - else{ + : [ c ] "+r"(c) + : [ beta ] "f"(1.0f) + : "ft0", "ft1", "ft2"); + } else { c = 0.0; } for (uint32_t k = 0; k < K; k++) {