From 7f1fdf3ab5349a003930a31ed8ca2a403ab4f7eb Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Thu, 21 Dec 2023 13:09:01 +0100 Subject: [PATCH] snRuntime: Add all-to-all reduction and use in exit function --- sw/snRuntime/api/sync_decls.h | 1 + sw/snRuntime/src/start.c | 1 + sw/snRuntime/src/sync.c | 1 + sw/snRuntime/src/sync.h | 6 ++++++ sw/tests/event_unit.c | 1 - sw/tests/fp16_comparison_scalar.c | 9 +++++---- sw/tests/fp16_comparison_vector.c | 9 +++++---- sw/tests/fp16_computation_scalar.c | 9 +++++---- sw/tests/fp16_computation_vector.c | 9 +++++---- sw/tests/fp16alt_comparison_scalar.c | 9 +++++---- sw/tests/fp16alt_comparison_vector.c | 9 +++++---- sw/tests/fp16alt_computation_scalar.c | 9 +++++---- sw/tests/fp16alt_computation_vector.c | 9 +++++---- sw/tests/fp32_comparison_scalar.c | 9 +++++---- sw/tests/fp32_comparison_vector.c | 9 +++++---- sw/tests/fp32_computation_scalar.c | 9 +++++---- sw/tests/fp32_computation_vector.c | 9 +++++---- sw/tests/fp32_conversions_scalar.c | 9 +++++---- sw/tests/fp64_conversions_scalar.c | 7 ++++--- sw/tests/fp8_comparison_scalar.c | 9 +++++---- sw/tests/fp8_comparison_vector.c | 9 +++++---- sw/tests/fp8_computation_scalar.c | 9 +++++---- sw/tests/fp8_computation_vector.c | 9 +++++---- sw/tests/fp8alt_comparison_scalar.c | 9 +++++---- sw/tests/fp8alt_comparison_vector.c | 9 +++++---- sw/tests/fp8alt_computation_scalar.c | 9 +++++---- sw/tests/fp8alt_computation_vector.c | 9 +++++---- sw/tests/team_global.c | 2 +- target/snitch_cluster/sw/run.yaml | 6 +++--- 29 files changed, 122 insertions(+), 92 deletions(-) diff --git a/sw/snRuntime/api/sync_decls.h b/sw/snRuntime/api/sync_decls.h index 2ece472ed3..9b8ea6bebf 100644 --- a/sw/snRuntime/api/sync_decls.h +++ b/sw/snRuntime/api/sync_decls.h @@ -9,6 +9,7 @@ typedef struct { extern volatile uint32_t _snrt_mutex; extern volatile snrt_barrier_t _snrt_barrier; +extern volatile uint32_t _reduction_result; inline volatile uint32_t *snrt_mutex(); diff --git a/sw/snRuntime/src/start.c b/sw/snRuntime/src/start.c index b79ffd8732..582e93b8e7 100644 --- a/sw/snRuntime/src/start.c +++ b/sw/snRuntime/src/start.c @@ -98,6 +98,7 @@ static inline void snrt_init_libs() { snrt_alloc_init(); } #ifdef SNRT_CRT0_EXIT static inline void snrt_exit_default(int exit_code) { + exit_code = snrt_global_all_to_all_reduction(exit_code); if (snrt_global_core_idx() == 0) *(snrt_exit_code_destination()) = (exit_code << 1) | 1; } diff --git a/sw/snRuntime/src/sync.c b/sw/snRuntime/src/sync.c index 58b079268b..5d7173a9fd 100644 --- a/sw/snRuntime/src/sync.c +++ b/sw/snRuntime/src/sync.c @@ -8,6 +8,7 @@ volatile uint32_t _snrt_mutex; volatile snrt_barrier_t _snrt_barrier; +volatile uint32_t _reduction_result; //================================================================================ // Functions diff --git a/sw/snRuntime/src/sync.h b/sw/snRuntime/src/sync.h index 07eea700fc..7557a57e97 100644 --- a/sw/snRuntime/src/sync.h +++ b/sw/snRuntime/src/sync.h @@ -81,6 +81,12 @@ inline void snrt_global_barrier() { snrt_cluster_hw_barrier(); } +inline uint32_t snrt_global_all_to_all_reduction(uint32_t value) { + __atomic_add_fetch(&_reduction_result, value, __ATOMIC_RELAXED); + snrt_global_barrier(); + return _reduction_result; +} + /** * @brief Generic barrier * diff --git a/sw/tests/event_unit.c b/sw/tests/event_unit.c index 4e66954145..18586b3b79 100644 --- a/sw/tests/event_unit.c +++ b/sw/tests/event_unit.c @@ -9,7 +9,6 @@ volatile static uint32_t sum = 0; static void task(void *arg, uint32_t argc) { uint32_t arg0 = ((uint32_t *)arg)[0]; __atomic_add_fetch(&sum, arg0, __ATOMIC_RELAXED); - printf("work arg[0] = %d argc = %d\n", arg0, argc); } uint32_t run_and_verify_task(uint32_t *arg, uint32_t n_workers) { diff --git a/sw/tests/fp16_comparison_scalar.c b/sw/tests/fp16_comparison_scalar.c index 925cf11d7e..3cd6f437a9 100644 --- a/sw/tests/fp16_comparison_scalar.c +++ b/sw/tests/fp16_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFF4248; // 3.14 uint32_t i8an = 0xFFFFC248; // -3.14 uint32_t i8b = 0xFFFF3E79; // 1.618 @@ -210,7 +210,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16_comparison_vector.c b/sw/tests/fp16_comparison_vector.c index 2bb25993bb..565957f23e 100644 --- a/sw/tests/fp16_comparison_vector.c +++ b/sw/tests/fp16_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa16 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa16n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb16 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -287,7 +287,8 @@ int main() { "vfeq.h %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16_computation_scalar.c b/sw/tests/fp16_computation_scalar.c index f5eeab74ea..2107e54745 100644 --- a/sw/tests/fp16_computation_scalar.c +++ b/sw/tests/fp16_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFF4248; // 3.14 uint32_t i_an = 0xFFFFC248; // -3.14 uint32_t i_b = 0xFFFF3E79; // 1.618 @@ -317,7 +317,8 @@ int main() { "feq.h %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16_computation_vector.c b/sw/tests/fp16_computation_vector.c index 20ba1c54ce..ae1f6c29bd 100644 --- a/sw/tests/fp16_computation_vector.c +++ b/sw/tests/fp16_computation_vector.c @@ -4,9 +4,9 @@ #include "snrt.h" int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -563,7 +563,8 @@ int main() { "vfeq.h %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_comparison_scalar.c b/sw/tests/fp16alt_comparison_scalar.c index 9985f0c241..e3496cf649 100644 --- a/sw/tests/fp16alt_comparison_scalar.c +++ b/sw/tests/fp16alt_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFF4049; // 3.14 uint32_t i8an = 0xFFFFC049; // -3.14 uint32_t i8b = 0xFFFF3FCF; // 1.618 @@ -213,7 +213,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_comparison_vector.c b/sw/tests/fp16alt_comparison_vector.c index ad504cae77..a612382fb5 100644 --- a/sw/tests/fp16alt_comparison_vector.c +++ b/sw/tests/fp16alt_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa16 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa16n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb16 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -289,7 +289,8 @@ int main() { "vfeq.ah %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_computation_scalar.c b/sw/tests/fp16alt_computation_scalar.c index 323bfd9d76..49e6130fe3 100644 --- a/sw/tests/fp16alt_computation_scalar.c +++ b/sw/tests/fp16alt_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFF4049; // 3.14 uint32_t i_an = 0xFFFFC049; // -3.14 uint32_t i_b = 0xFFFF3FCF; // 1.618 @@ -319,7 +319,8 @@ int main() { "feq.ah %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_computation_vector.c b/sw/tests/fp16alt_computation_vector.c index 3e1740a28c..6ddfed0d6e 100644 --- a/sw/tests/fp16alt_computation_vector.c +++ b/sw/tests/fp16alt_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -566,7 +566,8 @@ int main() { "vfeq.ah %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_comparison_scalar.c b/sw/tests/fp32_comparison_scalar.c index d06cdf51a1..5560f3d33f 100644 --- a/sw/tests/fp32_comparison_scalar.c +++ b/sw/tests/fp32_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0x4048F5C3; // 3.14 uint32_t i8an = 0xC048F5C3; // -3.14 uint32_t i8b = 0x3FCF1AA0; // 1.618 @@ -210,7 +210,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_comparison_vector.c b/sw/tests/fp32_comparison_vector.c index d04c04a654..431f1b3ef5 100644 --- a/sw/tests/fp32_comparison_vector.c +++ b/sw/tests/fp32_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa32 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa32n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb32 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -283,7 +283,8 @@ int main() { "vfeq.s %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 3); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_computation_scalar.c b/sw/tests/fp32_computation_scalar.c index b0f3267a3d..231f8a2f8c 100644 --- a/sw/tests/fp32_computation_scalar.c +++ b/sw/tests/fp32_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0x4048F5C3; // 3.14 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 @@ -317,7 +317,8 @@ int main() { "feq.s %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_computation_vector.c b/sw/tests/fp32_computation_vector.c index b717ee2878..da699747ae 100644 --- a/sw/tests/fp32_computation_vector.c +++ b/sw/tests/fp32_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -531,7 +531,8 @@ int main() { "vfeq.s %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0x3); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_conversions_scalar.c b/sw/tests/fp32_conversions_scalar.c index 14c52f1f7a..ca5783095e 100644 --- a/sw/tests/fp32_conversions_scalar.c +++ b/sw/tests/fp32_conversions_scalar.c @@ -8,9 +8,9 @@ typedef float v2s __attribute__((vector_size(8))); int main() { - int errs = 48; - if (snrt_is_compute_core()) { + int errs = 48; + unsigned int res_cvt0 = 0; unsigned int res_cvt1 = 0; @@ -480,7 +480,8 @@ int main() { : "+r"(res_cvt0), "+r"(res_cvt1)); errs -= (res_cvt0 == 0x1); errs -= (res_cvt1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp64_conversions_scalar.c b/sw/tests/fp64_conversions_scalar.c index 49ec4c64da..44a95c82d5 100644 --- a/sw/tests/fp64_conversions_scalar.c +++ b/sw/tests/fp64_conversions_scalar.c @@ -8,9 +8,9 @@ typedef float v2s __attribute__((vector_size(8))); int main() { - int errs = 48; - if (snrt_is_compute_core()) { + int errs = 48; + unsigned int res_cvt0 = 0; unsigned int res_cvt1 = 0; @@ -495,7 +495,8 @@ int main() { "+f"(fvalue_negative)); errs -= (res_cvt0 == 0x1); errs -= (res_cvt1 == 0x1); - } + return errs; + } return 0; } diff --git a/sw/tests/fp8_comparison_scalar.c b/sw/tests/fp8_comparison_scalar.c index 60caca4005..bcabab8745 100644 --- a/sw/tests/fp8_comparison_scalar.c +++ b/sw/tests/fp8_comparison_scalar.c @@ -5,9 +5,9 @@ #include "snrt.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFFFF42; // 3.14 uint32_t i8an = 0xFFFFFFC2; // -3.14 uint32_t i8b = 0xFFFFFF3E; // 1.618 @@ -209,7 +209,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8_comparison_vector.c b/sw/tests/fp8_comparison_vector.c index 156d9bb786..9dc77404de 100644 --- a/sw/tests/fp8_comparison_vector.c +++ b/sw/tests/fp8_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa8 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa8n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb8 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -295,7 +295,8 @@ int main() { "vfeq.b %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8_computation_scalar.c b/sw/tests/fp8_computation_scalar.c index a82c4103af..de2b2afc07 100644 --- a/sw/tests/fp8_computation_scalar.c +++ b/sw/tests/fp8_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFFFF42; // 3.14 uint32_t i_an = 0xFFFFFFC2; // -3.14 uint32_t i_b = 0xFFFFFF3E; // 1.618 @@ -317,7 +317,8 @@ int main() { "feq.b %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8_computation_vector.c b/sw/tests/fp8_computation_vector.c index 4a2f602d0d..d62edced3c 100644 --- a/sw/tests/fp8_computation_vector.c +++ b/sw/tests/fp8_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -631,7 +631,8 @@ int main() { "vfeq.b %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_comparison_scalar.c b/sw/tests/fp8alt_comparison_scalar.c index 6f367a7d96..d4c29b9921 100644 --- a/sw/tests/fp8alt_comparison_scalar.c +++ b/sw/tests/fp8alt_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFFFF45; // 3.14 uint32_t i8an = 0xFFFFFFC5; // -3.14 uint32_t i8b = 0xFFFFFF3D; // 1.618 @@ -212,7 +212,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_comparison_vector.c b/sw/tests/fp8alt_comparison_vector.c index 25e27bfb99..ee27dddfdf 100644 --- a/sw/tests/fp8alt_comparison_vector.c +++ b/sw/tests/fp8alt_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa8 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa8n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb8 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -297,7 +297,8 @@ int main() { "vfeq.ab %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_computation_scalar.c b/sw/tests/fp8alt_computation_scalar.c index 4b79aac72c..7bc93ae620 100644 --- a/sw/tests/fp8alt_computation_scalar.c +++ b/sw/tests/fp8alt_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFFFF45; // 3.14 uint32_t i_an = 0xFFFFFFC5; // -3.14 uint32_t i_b = 0xFFFFFF3D; // 1.618 @@ -319,7 +319,8 @@ int main() { "feq.ab %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_computation_vector.c b/sw/tests/fp8alt_computation_vector.c index 15da6ec048..1dea865869 100644 --- a/sw/tests/fp8alt_computation_vector.c +++ b/sw/tests/fp8alt_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -635,7 +635,8 @@ int main() { "vfeq.ab %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/team_global.c b/sw/tests/team_global.c index a03fc8de1e..cf9da6b89a 100644 --- a/sw/tests/team_global.c +++ b/sw/tests/team_global.c @@ -15,7 +15,7 @@ int main() { uint32_t errors = 0; errors += (snrt_global_core_idx() != i); errors += (snrt_global_core_num() != 9); - errors += (snrt_cluster_idx() != i / 1); + errors += (snrt_cluster_idx() != i / 9); errors += (snrt_cluster_num() != 1); errors += (snrt_cluster_core_idx() != i % 9); errors += (snrt_cluster_core_num() != 9); diff --git a/target/snitch_cluster/sw/run.yaml b/target/snitch_cluster/sw/run.yaml index ce241a8d46..1e9d25e897 100644 --- a/target/snitch_cluster/sw/run.yaml +++ b/target/snitch_cluster/sw/run.yaml @@ -50,8 +50,8 @@ runs: simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x2 - elf: tests/build/fp32_conversions_scalar.elf simulators: [vsim, vcs, verilator] # banshee fails with illegal instruction - - elf: tests/build/fp64_conversions_scalar.elf - simulators: [vsim, vcs, verilator] + # - elf: tests/build/fp64_conversions_scalar.elf + # simulators: [vsim, vcs, verilator] # - elf: tests/build/interrupt.elf - elf: tests/build/interrupt_local.elf - elf: tests/build/multi_cluster.elf @@ -68,7 +68,7 @@ runs: - elf: tests/build/varargs_2.elf - elf: tests/build/zero_mem.elf - elf: tests/build/non_null_exitcode.elf - retcode: 14 + retcode: 126 - elf: apps/blas/axpy/build/axpy.elf cmd: [../../../sw/blas/axpy/verify.py, "${sim_bin}", "${elf}"] - elf: apps/blas/gemm/build/gemm.elf