diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index a4760147d..38d296200 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -6,7 +6,7 @@ name: build-docker on: push: - branches: [main] + branches: [tracer/dma] workflow_dispatch: jobs: build-docker: @@ -27,6 +27,6 @@ jobs: context: . file: util/container/Dockerfile push: true - tags: ghcr.io/pulp-platform/snitch_cluster:${{ github.ref_name }} + tags: ghcr.io/pulp-platform/snitch_cluster:tracer-dma build-args: |- SNITCH_LLVM_VERSION=latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f8f87b3f8..c2088aac5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,12 +15,26 @@ jobs: name: Build documentation runs-on: ubuntu-22.04 container: - image: ghcr.io/pulp-platform/snitch_cluster:main + image: ghcr.io/pulp-platform/snitch_cluster:tracer-dma steps: - uses: actions/checkout@v2 - name: Build docs run: make docs + ##################### + # Python unit tests # + ##################### + + pytest: + name: Python unit tests + runs-on: ubuntu-22.04 + container: + image: ghcr.io/pulp-platform/snitch_cluster:tracer-dma + steps: + - uses: actions/checkout@v2 + - name: Run pytest + run: pytest + ############################################## # Simulate SW on Snitch Cluster w/ Verilator # ############################################## @@ -29,7 +43,7 @@ jobs: name: Simulate SW on Snitch Cluster w/ Verilator runs-on: ubuntu-22.04 container: - image: ghcr.io/pulp-platform/snitch_cluster:main + image: ghcr.io/pulp-platform/snitch_cluster:tracer-dma steps: - uses: actions/checkout@v2 with: @@ -54,7 +68,7 @@ jobs: name: Simulate SW on Snitch Cluster w/ Banshee runs-on: ubuntu-22.04 container: - image: ghcr.io/pulp-platform/snitch_cluster:main + image: ghcr.io/pulp-platform/snitch_cluster:tracer-dma steps: - uses: actions/checkout@v2 with: diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 18cd5d4aa..784183381 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -40,6 +40,14 @@ docs: script: - make docs +##################### +# Python unit tests # +##################### + +pytest: + script: + - pytest + ################################# # Build Snitch cluster software # ################################# diff --git a/docs/rm/bench/join.md b/docs/rm/bench/join.md new file mode 100644 index 000000000..ee9aa8221 --- /dev/null +++ b/docs/rm/bench/join.md @@ -0,0 +1 @@ +::: join \ No newline at end of file diff --git a/docs/rm/bench/roi.md b/docs/rm/bench/roi.md new file mode 100644 index 000000000..239fedf30 --- /dev/null +++ b/docs/rm/bench/roi.md @@ -0,0 +1 @@ +::: roi \ No newline at end of file diff --git a/docs/rm/bench/visualize.md b/docs/rm/bench/visualize.md new file mode 100644 index 000000000..b2c2bed8b --- /dev/null +++ b/docs/rm/bench/visualize.md @@ -0,0 +1 @@ +::: visualize \ No newline at end of file diff --git a/docs/rm/trace/annotate.md b/docs/rm/trace/annotate.md new file mode 100644 index 000000000..b70b1a847 --- /dev/null +++ b/docs/rm/trace/annotate.md @@ -0,0 +1 @@ +::: annotate \ No newline at end of file diff --git a/docs/rm/trace/events.md b/docs/rm/trace/events.md new file mode 100644 index 000000000..5b9cca4ae --- /dev/null +++ b/docs/rm/trace/events.md @@ -0,0 +1 @@ +::: events \ No newline at end of file diff --git a/docs/rm/trace/gen_trace.md b/docs/rm/trace/gen_trace.md new file mode 100644 index 000000000..3ba7b50eb --- /dev/null +++ b/docs/rm/trace/gen_trace.md @@ -0,0 +1 @@ +::: gen_trace \ No newline at end of file diff --git a/hw/future/src/dma/axi_dma_backend.sv b/hw/future/src/dma/axi_dma_backend.sv index b8cfa81dc..09a27f0a9 100644 --- a/hw/future/src/dma/axi_dma_backend.sv +++ b/hw/future/src/dma/axi_dma_backend.sv @@ -288,7 +288,6 @@ module axi_dma_backend #( //-------------------------------------- //pragma translate_off `ifndef SYNTHESYS -`ifndef VERILATOR generate if (DmaTracing) begin : gen_dma_tracer string fn; @@ -595,7 +594,6 @@ module axi_dma_backend #( end end endgenerate -`endif `endif //pragma translate_on endmodule : axi_dma_backend diff --git a/mkdocs.yml b/mkdocs.yml index 70d213601..158e453b6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,7 +25,7 @@ plugins: - mkdocstrings: handlers: python: - paths: [util/sim] + paths: [util/sim, util/trace, util/bench] - macros: on_error_fail: true use_directory_urls: false @@ -57,6 +57,14 @@ nav: - sim_utils: rm/sim/sim_utils.md - rm/sim/Simulation.md - rm/sim/Simulator.md + - Trace Utilities: + - gen_trace.py: rm/trace/gen_trace.md + - annotate.py: rm/trace/annotate.md + - events.py: rm/trace/events.md + - Benchmarking Utilities: + - join.py: rm/bench/join.md + - roi.py: rm/bench/roi.md + - visualize.py: rm/bench/visualize.md - Snitch Runtime: - Pages: runtime/Pages/index.md - Files: runtime/Files/index.md diff --git a/python-requirements.txt b/python-requirements.txt index 6db0bf03f..2c28c2a25 100644 --- a/python-requirements.txt +++ b/python-requirements.txt @@ -2,24 +2,28 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 +# Keep sorted. bin2coe dataclasses editorconfig-checker==2.3.51 flake8 gitpython hjson +json5 jsonref jsonschema mako +matplotlib +pandas progressbar2 -tabulate -yamllint -pyyaml +psutil +pyelftools pytablewriter +pytest +pyyaml +tabulate termcolor -pandas -pyelftools -psutil +yamllint -r docs/requirements.txt -r sw/dnn/requirements.txt diff --git a/sw/snRuntime/api/sync_decls.h b/sw/snRuntime/api/sync_decls.h index 2ece472ed..9b8ea6beb 100644 --- a/sw/snRuntime/api/sync_decls.h +++ b/sw/snRuntime/api/sync_decls.h @@ -9,6 +9,7 @@ typedef struct { extern volatile uint32_t _snrt_mutex; extern volatile snrt_barrier_t _snrt_barrier; +extern volatile uint32_t _reduction_result; inline volatile uint32_t *snrt_mutex(); diff --git a/sw/snRuntime/src/start.c b/sw/snRuntime/src/start.c index 4e4cd2152..582e93b8e 100644 --- a/sw/snRuntime/src/start.c +++ b/sw/snRuntime/src/start.c @@ -97,10 +97,14 @@ static inline void snrt_init_libs() { snrt_alloc_init(); } #endif #ifdef SNRT_CRT0_EXIT -static inline void snrt_exit(int exit_code) { +static inline void snrt_exit_default(int exit_code) { + exit_code = snrt_global_all_to_all_reduction(exit_code); if (snrt_global_core_idx() == 0) *(snrt_exit_code_destination()) = (exit_code << 1) | 1; } +#ifndef SNRT_CRT0_ALTERNATE_EXIT +static inline void snrt_exit(int exit_code) { snrt_exit_default(exit_code); } +#endif #endif void snrt_main() { diff --git a/sw/snRuntime/src/sync.c b/sw/snRuntime/src/sync.c index 58b079268..5d7173a9f 100644 --- a/sw/snRuntime/src/sync.c +++ b/sw/snRuntime/src/sync.c @@ -8,6 +8,7 @@ volatile uint32_t _snrt_mutex; volatile snrt_barrier_t _snrt_barrier; +volatile uint32_t _reduction_result; //================================================================================ // Functions diff --git a/sw/snRuntime/src/sync.h b/sw/snRuntime/src/sync.h index 07eea700f..7557a57e9 100644 --- a/sw/snRuntime/src/sync.h +++ b/sw/snRuntime/src/sync.h @@ -81,6 +81,12 @@ inline void snrt_global_barrier() { snrt_cluster_hw_barrier(); } +inline uint32_t snrt_global_all_to_all_reduction(uint32_t value) { + __atomic_add_fetch(&_reduction_result, value, __ATOMIC_RELAXED); + snrt_global_barrier(); + return _reduction_result; +} + /** * @brief Generic barrier * diff --git a/sw/tests/event_unit.c b/sw/tests/event_unit.c index 4e6695414..18586b3b7 100644 --- a/sw/tests/event_unit.c +++ b/sw/tests/event_unit.c @@ -9,7 +9,6 @@ volatile static uint32_t sum = 0; static void task(void *arg, uint32_t argc) { uint32_t arg0 = ((uint32_t *)arg)[0]; __atomic_add_fetch(&sum, arg0, __ATOMIC_RELAXED); - printf("work arg[0] = %d argc = %d\n", arg0, argc); } uint32_t run_and_verify_task(uint32_t *arg, uint32_t n_workers) { diff --git a/sw/tests/fp16_comparison_scalar.c b/sw/tests/fp16_comparison_scalar.c index 925cf11d7..3cd6f437a 100644 --- a/sw/tests/fp16_comparison_scalar.c +++ b/sw/tests/fp16_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFF4248; // 3.14 uint32_t i8an = 0xFFFFC248; // -3.14 uint32_t i8b = 0xFFFF3E79; // 1.618 @@ -210,7 +210,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16_comparison_vector.c b/sw/tests/fp16_comparison_vector.c index 2bb25993b..565957f23 100644 --- a/sw/tests/fp16_comparison_vector.c +++ b/sw/tests/fp16_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa16 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa16n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb16 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -287,7 +287,8 @@ int main() { "vfeq.h %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16_computation_scalar.c b/sw/tests/fp16_computation_scalar.c index f5eeab74e..2107e5474 100644 --- a/sw/tests/fp16_computation_scalar.c +++ b/sw/tests/fp16_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFF4248; // 3.14 uint32_t i_an = 0xFFFFC248; // -3.14 uint32_t i_b = 0xFFFF3E79; // 1.618 @@ -317,7 +317,8 @@ int main() { "feq.h %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16_computation_vector.c b/sw/tests/fp16_computation_vector.c index 20ba1c54c..ae1f6c29b 100644 --- a/sw/tests/fp16_computation_vector.c +++ b/sw/tests/fp16_computation_vector.c @@ -4,9 +4,9 @@ #include "snrt.h" int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -563,7 +563,8 @@ int main() { "vfeq.h %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_comparison_scalar.c b/sw/tests/fp16alt_comparison_scalar.c index 9985f0c24..e3496cf64 100644 --- a/sw/tests/fp16alt_comparison_scalar.c +++ b/sw/tests/fp16alt_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFF4049; // 3.14 uint32_t i8an = 0xFFFFC049; // -3.14 uint32_t i8b = 0xFFFF3FCF; // 1.618 @@ -213,7 +213,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_comparison_vector.c b/sw/tests/fp16alt_comparison_vector.c index ad504cae7..a612382fb 100644 --- a/sw/tests/fp16alt_comparison_vector.c +++ b/sw/tests/fp16alt_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa16 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa16n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb16 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -289,7 +289,8 @@ int main() { "vfeq.ah %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_computation_scalar.c b/sw/tests/fp16alt_computation_scalar.c index 323bfd9d7..49e6130fe 100644 --- a/sw/tests/fp16alt_computation_scalar.c +++ b/sw/tests/fp16alt_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFF4049; // 3.14 uint32_t i_an = 0xFFFFC049; // -3.14 uint32_t i_b = 0xFFFF3FCF; // 1.618 @@ -319,7 +319,8 @@ int main() { "feq.ah %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp16alt_computation_vector.c b/sw/tests/fp16alt_computation_vector.c index 3e1740a28..6ddfed0d6 100644 --- a/sw/tests/fp16alt_computation_vector.c +++ b/sw/tests/fp16alt_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -566,7 +566,8 @@ int main() { "vfeq.ah %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xf); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_comparison_scalar.c b/sw/tests/fp32_comparison_scalar.c index d06cdf51a..5560f3d33 100644 --- a/sw/tests/fp32_comparison_scalar.c +++ b/sw/tests/fp32_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0x4048F5C3; // 3.14 uint32_t i8an = 0xC048F5C3; // -3.14 uint32_t i8b = 0x3FCF1AA0; // 1.618 @@ -210,7 +210,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_comparison_vector.c b/sw/tests/fp32_comparison_vector.c index d04c04a65..431f1b3ef 100644 --- a/sw/tests/fp32_comparison_vector.c +++ b/sw/tests/fp32_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa32 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa32n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb32 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -283,7 +283,8 @@ int main() { "vfeq.s %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 3); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_computation_scalar.c b/sw/tests/fp32_computation_scalar.c index b0f3267a3..231f8a2f8 100644 --- a/sw/tests/fp32_computation_scalar.c +++ b/sw/tests/fp32_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0x4048F5C3; // 3.14 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 @@ -317,7 +317,8 @@ int main() { "feq.s %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_computation_vector.c b/sw/tests/fp32_computation_vector.c index b717ee287..da699747a 100644 --- a/sw/tests/fp32_computation_vector.c +++ b/sw/tests/fp32_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -531,7 +531,8 @@ int main() { "vfeq.s %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0x3); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp32_conversions_scalar.c b/sw/tests/fp32_conversions_scalar.c index 14c52f1f7..ca5783095 100644 --- a/sw/tests/fp32_conversions_scalar.c +++ b/sw/tests/fp32_conversions_scalar.c @@ -8,9 +8,9 @@ typedef float v2s __attribute__((vector_size(8))); int main() { - int errs = 48; - if (snrt_is_compute_core()) { + int errs = 48; + unsigned int res_cvt0 = 0; unsigned int res_cvt1 = 0; @@ -480,7 +480,8 @@ int main() { : "+r"(res_cvt0), "+r"(res_cvt1)); errs -= (res_cvt0 == 0x1); errs -= (res_cvt1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp64_conversions_scalar.c b/sw/tests/fp64_conversions_scalar.c index 49ec4c64d..44a95c82d 100644 --- a/sw/tests/fp64_conversions_scalar.c +++ b/sw/tests/fp64_conversions_scalar.c @@ -8,9 +8,9 @@ typedef float v2s __attribute__((vector_size(8))); int main() { - int errs = 48; - if (snrt_is_compute_core()) { + int errs = 48; + unsigned int res_cvt0 = 0; unsigned int res_cvt1 = 0; @@ -495,7 +495,8 @@ int main() { "+f"(fvalue_negative)); errs -= (res_cvt0 == 0x1); errs -= (res_cvt1 == 0x1); - } + return errs; + } return 0; } diff --git a/sw/tests/fp8_comparison_scalar.c b/sw/tests/fp8_comparison_scalar.c index 60caca400..bcabab874 100644 --- a/sw/tests/fp8_comparison_scalar.c +++ b/sw/tests/fp8_comparison_scalar.c @@ -5,9 +5,9 @@ #include "snrt.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFFFF42; // 3.14 uint32_t i8an = 0xFFFFFFC2; // -3.14 uint32_t i8b = 0xFFFFFF3E; // 1.618 @@ -209,7 +209,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8_comparison_vector.c b/sw/tests/fp8_comparison_vector.c index 156d9bb78..9dc77404d 100644 --- a/sw/tests/fp8_comparison_vector.c +++ b/sw/tests/fp8_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa8 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa8n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb8 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -295,7 +295,8 @@ int main() { "vfeq.b %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8_computation_scalar.c b/sw/tests/fp8_computation_scalar.c index a82c4103a..de2b2afc0 100644 --- a/sw/tests/fp8_computation_scalar.c +++ b/sw/tests/fp8_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFFFF42; // 3.14 uint32_t i_an = 0xFFFFFFC2; // -3.14 uint32_t i_b = 0xFFFFFF3E; // 1.618 @@ -317,7 +317,8 @@ int main() { "feq.b %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8_computation_vector.c b/sw/tests/fp8_computation_vector.c index 4a2f602d0..d62edced3 100644 --- a/sw/tests/fp8_computation_vector.c +++ b/sw/tests/fp8_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -631,7 +631,8 @@ int main() { "vfeq.b %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_comparison_scalar.c b/sw/tests/fp8alt_comparison_scalar.c index 6f367a7d9..d4c29b992 100644 --- a/sw/tests/fp8alt_comparison_scalar.c +++ b/sw/tests/fp8alt_comparison_scalar.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 40; - if (snrt_is_compute_core()) { + int errs = 40; + uint32_t i8a = 0xFFFFFF45; // 3.14 uint32_t i8an = 0xFFFFFFC5; // -3.14 uint32_t i8b = 0xFFFFFF3D; // 1.618 @@ -212,7 +212,8 @@ int main() { errs -= (cmp0 == 0x1); errs -= (cmp1 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_comparison_vector.c b/sw/tests/fp8alt_comparison_vector.c index 25e27bfb9..ee27dddfd 100644 --- a/sw/tests/fp8alt_comparison_vector.c +++ b/sw/tests/fp8alt_comparison_vector.c @@ -6,9 +6,9 @@ #include "printf.h" int main() { - int errs = 64; - if (snrt_is_compute_core()) { + int errs = 64; + uint32_t fa8 = 0x4048F5C3; // 0x4248 3.14 uint32_t fa8n = 0xC048F5C3; // 0xC248 -3.14 uint32_t fb8 = 0x3FCF1AA0; // 0x3E79 1.618 @@ -297,7 +297,8 @@ int main() { "vfeq.ab %1, ft8, ft0\n" : "+r"(cmp0)); errs -= (cmp0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_computation_scalar.c b/sw/tests/fp8alt_computation_scalar.c index 4b79aac72..7bc93ae62 100644 --- a/sw/tests/fp8alt_computation_scalar.c +++ b/sw/tests/fp8alt_computation_scalar.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 33; - if (snrt_is_compute_core()) { + int errs = 33; + uint32_t i_a = 0xFFFFFF45; // 3.14 uint32_t i_an = 0xFFFFFFC5; // -3.14 uint32_t i_b = 0xFFFFFF3D; // 1.618 @@ -319,7 +319,8 @@ int main() { "feq.ab %0, ft3, ft0\n" : "+r"(res0)); errs -= (res0 == 0x1); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/fp8alt_computation_vector.c b/sw/tests/fp8alt_computation_vector.c index 15da6ec04..1dea86586 100644 --- a/sw/tests/fp8alt_computation_vector.c +++ b/sw/tests/fp8alt_computation_vector.c @@ -4,9 +4,9 @@ #include int main() { - int errs = 46; - if (snrt_is_compute_core()) { + int errs = 46; + uint32_t i_a = 0x4048F5C3; // 3.14 0 uint32_t i_an = 0xC048F5C3; // -3.14 uint32_t i_b = 0x3FCF1AA0; // 1.618 2 @@ -635,7 +635,8 @@ int main() { "vfeq.ab %0, ft7, ft0\n" : "+r"(res0)); errs -= (res0 == 0xff); - } - return errs; + return errs; + } + return 0; } diff --git a/sw/tests/team_global.c b/sw/tests/team_global.c index a03fc8de1..cf9da6b89 100644 --- a/sw/tests/team_global.c +++ b/sw/tests/team_global.c @@ -15,7 +15,7 @@ int main() { uint32_t errors = 0; errors += (snrt_global_core_idx() != i); errors += (snrt_global_core_num() != 9); - errors += (snrt_cluster_idx() != i / 1); + errors += (snrt_cluster_idx() != i / 9); errors += (snrt_cluster_num() != 1); errors += (snrt_cluster_core_idx() != i % 9); errors += (snrt_cluster_core_num() != 9); diff --git a/target/common/common.mk b/target/common/common.mk index 0cf03c463..143f9b9a8 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -31,9 +31,9 @@ VLIB ?= $(QUESTA_SEPP) vlib GENTRACE_PY ?= $(UTIL_DIR)/trace/gen_trace.py ANNOTATE_PY ?= $(UTIL_DIR)/trace/annotate.py EVENTS_PY ?= $(UTIL_DIR)/trace/events.py -PERF_CSV_PY ?= $(UTIL_DIR)/trace/perf_csv.py -LAYOUT_EVENTS_PY ?= $(UTIL_DIR)/trace/layout_events.py -EVENTVIS_PY ?= $(UTIL_DIR)/trace/eventvis.py +JOIN_PY ?= $(UTIL_DIR)/bench/join.py +ROI_PY ?= $(UTIL_DIR)/bench/roi.py +VISUALIZE_PY ?= $(UTIL_DIR)/bench/visualize.py VERILATOR_ROOT ?= $(dir $(shell $(VERILATOR_SEPP) which verilator)).. VLT_ROOT ?= ${VERILATOR_ROOT} @@ -77,6 +77,7 @@ VLT_FLAGS += -Wno-UNSIGNED VLT_FLAGS += -Wno-UNOPTFLAT VLT_FLAGS += -Wno-fatal VLT_FLAGS += --unroll-count 1024 +VLT_FLAGS += --timescale 1ns/1ps VLT_CFLAGS += -std=c++14 -pthread VLT_CFLAGS +=-I ${VLT_BUILDDIR} -I $(VLT_ROOT)/include -I $(VLT_ROOT)/include/vltstd -I $(VLT_FESVR)/include -I $(TB_DIR) -I ${MKFILE_DIR}/test @@ -232,26 +233,27 @@ endef DASM_TRACES = $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null)) TXT_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.txt/g')) -PERF_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/trace_hart/hart/g' | sed 's/.dasm/_perf.json/g')) +PERF_DUMPS = $(shell (echo $(DASM_TRACES) | sed 's/trace_hart/hart/g' | sed 's/.dasm/_perf.json/g')) ANNOTATED_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.s/g')) DIFF_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.diff/g')) -GENTRACE_OUTPUTS = $(TXT_TRACES) $(PERF_TRACES) +GENTRACE_OUTPUTS = $(TXT_TRACES) $(PERF_DUMPS) ANNOTATE_OUTPUTS = $(ANNOTATED_TRACES) -PERF_CSV = $(LOGS_DIR)/perf.csv -EVENT_CSV = $(LOGS_DIR)/event.csv -TRACE_CSV = $(LOGS_DIR)/trace.csv +PERF_DUMP = $(LOGS_DIR)/perf.json +ROI_DUMP = $(LOGS_DIR)/roi.json TRACE_JSON = $(LOGS_DIR)/trace.json -.PHONY: traces annotate perf-csv event-csv layout +.PHONY: traces annotate trace-view clean-traces clean-annotate traces: $(GENTRACE_OUTPUTS) annotate: $(ANNOTATE_OUTPUTS) -perf-csv: $(PERF_CSV) -event-csv: $(EVENT_CSV) -layout: $(TRACE_CSV) $(TRACE_JSON) +trace-view: $(TRACE_JSON) +clean-traces: + rm -f $(GENTRACE_OUTPUTS) +clean-annotate: + rm -f $(ANNOTATE_OUTPUTS) -$(LOGS_DIR)/trace_hart_%.txt $(LOGS_DIR)/hart_%_perf.json: $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) - $(DASM) < $< | $(PYTHON) $(GENTRACE_PY) --permissive -d $(LOGS_DIR)/hart_$*_perf.json > $(LOGS_DIR)/trace_hart_$*.txt +$(addprefix $(LOGS_DIR)/,trace_hart_%.txt hart_%_perf.json): $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) + $(DASM) < $< | $(PYTHON) $(GENTRACE_PY) --permissive --dma-trace $(LOGS_DIR)/dma_trace_$*.log --dump-hart-perf $(LOGS_DIR)/hart_$*_perf.json --dump-dma-perf $(LOGS_DIR)/dma_$*_perf.json -o $(LOGS_DIR)/trace_hart_$*.txt # Generate source-code interleaved traces for all harts. Reads the binary from # the logs/.rtlbinary file that is written at start of simulation in the vsim script @@ -261,14 +263,11 @@ $(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} $(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} $(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d -$(PERF_CSV): $(PERF_TRACES) $(PERF_CSV_PY) - $(PYTHON) $(PERF_CSV_PY) -o $@ -i $(PERF_TRACES) +$(PERF_DUMP): $(PERF_DUMPS) $(JOIN_PY) + $(PYTHON) $(JOIN_PY) -i $(shell ls $(LOGS_DIR)/*_perf.json) -o $@ -$(EVENT_CSV): $(PERF_TRACES) $(PERF_CSV_PY) - $(PYTHON) $(PERF_CSV_PY) -o $@ -i $(PERF_TRACES) --filter tstart tend +$(ROI_DUMP): $(PERF_DUMP) $(ROI_SPEC) $(ROI_PY) + $(PYTHON) $(ROI_PY) $(PERF_DUMP) $(ROI_SPEC) --cfg $(CFG) -o $@ -$(TRACE_CSV): $(EVENT_CSV) $(LAYOUT_FILE) $(LAYOUT_EVENTS_PY) - $(PYTHON) $(LAYOUT_EVENTS_PY) $(LAYOUT_EVENTS_FLAGS) $(EVENT_CSV) $(LAYOUT_FILE) -o $@ - -$(TRACE_JSON): $(TRACE_CSV) $(EVENTVIS_PY) - $(PYTHON) $(EVENTVIS_PY) -o $@ $(TRACE_CSV) +$(TRACE_JSON): $(ROI_DUMP) $(VISUALIZE_PY) + $(PYTHON) $(VISUALIZE_PY) $(ROI_DUMP) --traces $(TXT_TRACES) --elf $(BINARY) -o $@ diff --git a/target/snitch_cluster/cfg/default.hjson b/target/snitch_cluster/cfg/default.hjson index 7f28a1073..48c98d234 100644 --- a/target/snitch_cluster/cfg/default.hjson +++ b/target/snitch_cluster/cfg/default.hjson @@ -36,7 +36,7 @@ lat_noncomp: 1, lat_conv: 2, lat_sdotp: 3, - fpu_pipe_config: "BEFORE" + fpu_pipe_config: "BEFORE", narrow_xbar_latency: "CUT_ALL_PORTS", wide_xbar_latency: "CUT_ALL_PORTS", // Isolate the core. @@ -106,10 +106,10 @@ dma_core_template: { isa: "rv32imafd", // Xdiv_sqrt: true, - # isa: "rv32ema", - xdma: true - xssr: false - xfrep: false + // isa: "rv32ema", + xdma: true, + xssr: false, + xfrep: false, xf16: false, xf16alt: false, xf8: false, diff --git a/target/snitch_cluster/sw/run.yaml b/target/snitch_cluster/sw/run.yaml index ce241a8d4..1e9d25e89 100644 --- a/target/snitch_cluster/sw/run.yaml +++ b/target/snitch_cluster/sw/run.yaml @@ -50,8 +50,8 @@ runs: simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x2 - elf: tests/build/fp32_conversions_scalar.elf simulators: [vsim, vcs, verilator] # banshee fails with illegal instruction - - elf: tests/build/fp64_conversions_scalar.elf - simulators: [vsim, vcs, verilator] + # - elf: tests/build/fp64_conversions_scalar.elf + # simulators: [vsim, vcs, verilator] # - elf: tests/build/interrupt.elf - elf: tests/build/interrupt_local.elf - elf: tests/build/multi_cluster.elf @@ -68,7 +68,7 @@ runs: - elf: tests/build/varargs_2.elf - elf: tests/build/zero_mem.elf - elf: tests/build/non_null_exitcode.elf - retcode: 14 + retcode: 126 - elf: apps/blas/axpy/build/axpy.elf cmd: [../../../sw/blas/axpy/verify.py, "${sim_bin}", "${elf}"] - elf: apps/blas/gemm/build/gemm.elf diff --git a/util/bench/__init__.py b/util/bench/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/util/bench/join.py b/util/bench/join.py new file mode 100755 index 000000000..56c0defe0 --- /dev/null +++ b/util/bench/join.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +"""Combines performance metrics from all threads into one JSON file. + +This script takes the performance metrics from multiple cores or DMA +engines, in JSON format as dumped by the [`events.py`][events] or +[`gen_trace.py`][gen_trace] scripts, and merges them into a single +JSON file for global inspection and further processing. +""" + +import sys +import argparse +import re +import json + + +FILENAME_REGEX = r'([a-z]+)_([0-9a-f]+)_perf.json' + + +def main(): + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument( + '-i', + '--inputs', + metavar='', + nargs='+', + help='Input performance metric dumps') + parser.add_argument( + '-o', + '--output', + metavar='', + nargs='?', + default='perf.json', + help='Output JSON file') + args = parser.parse_args() + + # Populate a list (one entry per hart) of dictionaries + # enumerating all the performance metrics for each hart + data = {} + for filename in sorted(args.inputs): + + # Get thread ID and type (DMA or hart) from filename + match = re.search(FILENAME_REGEX, filename) + typ = match.group(1) + idx = int(match.group(2), base=16) + + # Populate dictionary of metrics for the current hart + with open(filename, 'r') as f: + data[f'{typ}_{idx}'] = json.load(f) + + # Export data + with open(args.output, 'w') as f: + json.dump(data, f, indent=4) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/util/bench/roi.py b/util/bench/roi.py new file mode 100755 index 000000000..4671bff95 --- /dev/null +++ b/util/bench/roi.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +"""Filters and labels execution regions for visualization. + +This script takes a JSON file of performance metrics, as output by +[`join.py`][join], and generates another JSON, where the execution +regions are filtered and labeled for visualization, according to an +auxiliary region-of-interest (ROI) specification file (JSON format). +The specification file can be a Mako template to parameterize +certain parameters, such as the number of clusters in the system. +The output JSON can be passed to the [`visualize.py`][visualize] +script for visualization. + +Check out `test_data/data.json` and `test_data/spec.json` for an +example input and specification file which can be fed as input to the +tool respectively. The corresponding output is contained in +`test_data/roi.json`. +""" + +import argparse +import json +import json5 +from mako.template import Template +import sys + + +def format_roi(roi, label): + return { + "label": label, + "tstart": roi["tstart"], + "tend": roi["tend"], + "attrs": {key: value for key, value in roi.items() if key not in ["tstart", "tend"]} + } + + +def get_roi(data, thread, idx): + thread_type, thread_idx = thread.split('_') + thread_idx = int(thread_idx) + thread_data = data[thread] + if thread_type == "hart": + return thread_data[idx] + elif thread_type == "dma": + return thread_data["transfers"][idx] + else: + raise ValueError(f"Unsupported thread type {thread_type}") + + +def filter_and_label_rois(data, spec): + output = {} + # Iterate all threads in the rendered specification + for thread_spec in spec: + thread = thread_spec['thread'] + output_rois = [] + # Iterate all ROIs to keep for the current thread + for roi in thread_spec['roi']: + output_roi = format_roi(get_roi(data, thread, roi['idx']), roi['label']) + output_rois.append(output_roi) + # Add ROIs for current thread to output, if any + if output_rois: + output[thread] = output_rois + return output + + +def load_json_inputs(input_path, spec_path, **kwargs): + # Read input JSON + with open(input_path, 'r') as f: + data = json5.load(f) + # Read and render specification template JSON + with open(spec_path, 'r') as f: + spec_template = Template(f.read()) + rendered_spec = spec_template.render(**kwargs) + spec = json5.loads(rendered_spec) + return data, spec + + +def main(): + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument( + 'input', + help='Input JSON file') + parser.add_argument( + 'spec', + help='ROI specification file (JSON format)') + parser.add_argument( + '--cfg', + help='Hardware configuration file used to render the specification file') + parser.add_argument( + '-o', + '--output', + nargs='?', + default='roi.json', + help='Output JSON file') + args = parser.parse_args() + + # Load hardware configuration + with open(args.cfg, 'r') as f: + cfg = json5.load(f) + + # Read and render input files + data, spec = load_json_inputs(args.input, args.spec, cfg=cfg) + + # Process inputs and generate output JSON + output = filter_and_label_rois(data, spec) + + # Write output to file + with open(args.output, 'w') as f: + json.dump(output, f, indent=4) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/util/bench/tests/__init__.py b/util/bench/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/util/bench/tests/test_data/data.json b/util/bench/tests/test_data/data.json new file mode 100644 index 000000000..77ca26416 --- /dev/null +++ b/util/bench/tests/test_data/data.json @@ -0,0 +1,46 @@ +{ + "hart_0": [ + { + "tstart": 1759.0, + "tend": 6802.0, + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + }, + { + "tstart": 6802.0, + "tend": 12647.0, + "fpss_fpu_occupancy": 0.013860369609856264, + "total_ipc": 0.20756331279945245 + } + ], + "dma_9": { + "aggregate_bw": 11.829313543599257, + "transfers": [ + { + "tstart": 3512, + "tend": 3526, + "bw": 1.1428571428571428 + }, + { + "tstart": 3564, + "tend": 3578, + "bw": 1.1428571428571428 + } + ] + }, + "dma_18": { + "aggregate_bw": 16.633245382585752, + "transfers": [ + { + "tstart": 3608, + "tend": 3622, + "bw": 1.1428571428571428 + }, + { + "tstart": 3660, + "tend": 3674, + "bw": 1.1428571428571428 + } + ] + } +} diff --git a/util/bench/tests/test_data/roi.json b/util/bench/tests/test_data/roi.json new file mode 100644 index 000000000..a6efe3773 --- /dev/null +++ b/util/bench/tests/test_data/roi.json @@ -0,0 +1,33 @@ +{ + "hart_0": [ + { + "label": "compute", + "tstart": 6802.0, + "tend": 12647.0, + "attrs": { + "fpss_fpu_occupancy": 0.013860369609856264, + "total_ipc": 0.20756331279945245 + } + } + ], + "dma_9": [ + { + "label": "dma_in", + "tstart": 3512, + "tend": 3526, + "attrs": { + "bw": 1.1428571428571428 + } + } + ], + "dma_18": [ + { + "label": "dma_in", + "tstart": 3608, + "tend": 3622, + "attrs": { + "bw": 1.1428571428571428 + } + } + ] +} diff --git a/util/bench/tests/test_data/spec.json b/util/bench/tests/test_data/spec.json new file mode 100644 index 000000000..ae58303c0 --- /dev/null +++ b/util/bench/tests/test_data/spec.json @@ -0,0 +1,16 @@ +[ + { + "thread": "hart_0", + "roi": [ + {"idx": 1, "label": "compute"} + ] + }, +% for i in range(0, num_clusters): + { + "thread": "${f'dma_{9*(i+1)}'}", + "roi": [ + {"idx": 0, "label": "dma_in"} + ] + }, +% endfor +] diff --git a/util/bench/tests/test_roi.py b/util/bench/tests/test_roi.py new file mode 100644 index 000000000..ffb567816 --- /dev/null +++ b/util/bench/tests/test_roi.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande + +import json +from pathlib import Path +import pytest +from bench.roi import get_roi, format_roi, load_json_inputs, filter_and_label_rois + +TEST_DATA_DIR = Path(__file__).resolve().parent / 'test_data' +INPUT_JSON = TEST_DATA_DIR / 'data.json' +SPEC_JSON = TEST_DATA_DIR / 'spec.json' +OUTPUT_JSON = TEST_DATA_DIR / 'roi.json' + + +def test_format_roi(): + label = "compute" + roi = { + "tstart": 1759.0, + "tend": 6802.0, + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + } + formatted_roi = { + "label": "compute", + "tstart": 1759.0, + "tend": 6802.0, + "attrs": { + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + }, + } + assert format_roi(roi, label) == formatted_roi + + +@pytest.mark.parametrize("thread, idx, roi", [ + ('hart_0', 0, { + "tstart": 1759.0, + "tend": 6802.0, + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + }), + ('dma_9', 1, { + "tstart": 3564, + "tend": 3578, + "bw": 1.1428571428571428 + }) +]) +def test_get_roi(thread, idx, roi): + with open(INPUT_JSON, 'r') as f: + data = json.load(f) + assert get_roi(data, thread, idx) == roi + + +def test_filter_and_label_rois(): + data, spec = load_json_inputs(INPUT_JSON, SPEC_JSON, num_clusters=2) + with open(OUTPUT_JSON, 'r') as f: + output = json.load(f) + assert filter_and_label_rois(data, spec) == output diff --git a/util/bench/visualize.py b/util/bench/visualize.py new file mode 100755 index 000000000..087d8b86e --- /dev/null +++ b/util/bench/visualize.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +"""Translates a ROI JSON for visualization in Chrome. + +This script translates a JSON file, in the format produced by +[`roi.py`][roi], to a JSON file adhering to the syntax required by +Chrome's +[Trace-Viewer](https://github.com/catapult-project/catapult/tree/master/tracing). + +The output can be visualized in a Chrome browser: go to the +`about:tracing` URL and load the JSON file. + +This script can be compared to `trace/tracevis.py`, but instead of +visualizing individual instructions, it represents entire execution +regions as a whole. +""" + +import argparse +import json +from pathlib import Path +import sys + +sys.path.append(str(Path(__file__).parent / '../trace')) +import tracevis # noqa: E402 + + +# Converts nanoseconds to microseconds +def us(ns): + return ns / 1000 + + +def main(): + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument( + 'input', + metavar='', + help='Input JSON file') + parser.add_argument( + '--traces', + metavar='', + nargs='*', + help='Simulation traces to process') + parser.add_argument( + '--elf', + nargs='?', + help='ELF from which the traces were generated') + parser.add_argument( + '-o', + '--output', + metavar='', + nargs='?', + default='trace.json', + help='Output JSON file') + args = parser.parse_args() + + # TraceViewer events + events = [] + + # Add a dummy instant event to mark time 0. + # This is to avoid that the events are shifted from + # their actual start times, as done to align the first event + # to time 0. + event = {'name': 'zero', + 'ph': 'I', # Instant event type + 'ts': 0, + 's': 'g' # Global scope + } + events.append(event) + + # Read JSON contents + with open(args.input) as f: + data = json.load(f) + + # Iterate threads + for thread, regions in data.items(): + + # Iterate execution regions for current thread + for region in regions: + + # Create TraceViewer event + ts = int(region['tstart']) + dur = int(region['tend']) - ts + event = { + 'name': region['label'], + 'ph': "X", # Complete event type + 'ts': us(ts), + 'dur': us(dur), + 'pid': 0, + 'tid': thread, + 'args': region['attrs'] + } + events.append(event) + + # Optionally extract also instruction-level events + # from the simulation traces + if args.traces and args.elf: + events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch', + addr2line='addr2line', use_time=True, pid=1, + cache=True, elf=args.elf, collapse_call_stack=True) + + # Create TraceViewer JSON object + tvobj = {} + tvobj['traceEvents'] = events + tvobj['displayTimeUnit'] = "ns" + + # Dump TraceViewer events to JSON file + with open(args.output, 'w') as f: + json.dump(tvobj, f, indent=4) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/util/container/Dockerfile b/util/container/Dockerfile index d917a6790..8fb4de146 100644 --- a/util/container/Dockerfile +++ b/util/container/Dockerfile @@ -78,7 +78,7 @@ RUN cargo install --path /tmp/banshee FROM ubuntu:18.04 AS snitch_cluster ARG SNITCH_LLVM_VERSION=latest ARG VERIBLE_VERSION=0.0-776-g09e0b87 -ARG VERILATOR_VERSION=4.100 +ARG VERILATOR_VERSION=4.110 LABEL version="0.1" LABEL description="Snitch container for hardware and software development." diff --git a/util/trace/annotate.py b/util/trace/annotate.py index 4d2c94862..683091b31 100755 --- a/util/trace/annotate.py +++ b/util/trace/annotate.py @@ -1,23 +1,30 @@ #!/usr/bin/env python3 - # Copyright 2021 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 - -# This script parses the traces generated by Snitch and creates an annotated -# trace that includes code sources -# Example output: -# ; snrt_hartid (team.c:14) -# ; in snrt_cluster_core_idx (team.c:47) -# ; in main (event_unit.c:21) -# ; asm("csrr %0, mhartid" : "=r"(hartid)); -# 80000048 x13=0000000a # csrr a3, mhartid -# -# If the -d/--diff option is specified, it instead outputs a (fictitious) diff -# file which allows to visualize the trace-source correlation side-by-side -# instead of interleaved. -# For neater visualization, feed the diff file into a diff visualization tool e.g.: -# kompare -o +"""Annotates an instruction trace with source-code information. + +This script parses a human-readable trace, as generated by CVA6 or +Snitch's [`gen_trace.py`][gen_trace] script, and annotates every +instruction in the trace with information on its originating +source-code. + +Example output: +``` + ; snrt_hartid (team.c:14) + ; in snrt_cluster_core_idx (team.c:47) + ; in main (event_unit.c:21) + ; asm("csrr %0, mhartid" : "=r"(hartid)); + 80000048 x13=0000000a # csrr a3, mhartid +``` + +By default, the source-code information is interleaved in the same +file with the instruction trace. If you prefer to have a +side-by-side view, the -d/--diff option can be used. In this case, +the tool outputs a (fictitious) diff file which can be fed into a +diff visualization tool for side-by-side visualization in a GUI, +e.g. `kompare -o `. +""" import sys import os diff --git a/util/trace/events.py b/util/trace/events.py index a655be033..c5442ee2a 100755 --- a/util/trace/events.py +++ b/util/trace/events.py @@ -3,12 +3,13 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 # -# This script takes a CVA6 or Snitch trace and it exports the simulation time -# of all mcycle CSR reads in a format compatible with the gen_trace.py -# script's JSON output. -# # Author: Luca Colagrande +"""Export `mcycle` CSR read events from a Snitch or CVA6 trace. +This script takes a CVA6 or Snitch trace and it exports the +simulation time of all `mcycle` CSR reads to a JSON file in a format +compatible with [`gen_trace.py`][gen_trace]'s output. +""" import sys import argparse diff --git a/util/trace/eventvis.py b/util/trace/eventvis.py deleted file mode 100755 index 4d0fdfdc7..000000000 --- a/util/trace/eventvis.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2020 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# This script takes a CSV of events, compatible with the CSV format produced by -# `perf_csv.py`, and creates a JSON file that can be visualized by -# [Trace-Viewer](https://github.com/catapult-project/catapult/tree/master/tracing) -# In Chrome, open `about:tracing` and load the JSON file to view it. -# -# Following is an example CSV containing two regions (as would be defined by the -# presence of one mcycle CSR read in the traces): -# -# , prepare data, , send interrupt, -# 0, 32906, 32911, 32911, 33662 -# -# The first line is used to assign a name to each region. -# Each of the following lines starts with the hartid, followed by the start and -# end timestamps of each region. -# While the alignment of the region names in the first line w.r.t. the following -# lines does not matter, we suggest to align them with the columns containing the -# start times of the respective regions (as in the example above). -# -# This script can be compared to `tracevis.py`, but instead of visualizing individual -# instructions, it visualizes coarser grained regions as delimited by events -# in the traces. -# -# Author: Luca Colagrande - -import sys -import argparse -import csv -import json -import tracevis - - -def pairwise(iterable): - "s -> (s0, s1), (s2, s3), (s4, s5), ..." - a = iter(iterable) - return zip(a, a) - - -# Converts nanoseconds to microseconds -def us(ns): - return ns / 1000 - - -def main(): - # Argument parsing - parser = argparse.ArgumentParser() - parser.add_argument( - 'csv', - metavar='', - help='Input CSV file') - parser.add_argument( - '--traces', - metavar='', - nargs='*', - help='Simulation traces to process') - parser.add_argument( - '--elf', - nargs='?', - help='ELF from which the traces were generated') - parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='events.json', - help='Output JSON file') - args = parser.parse_args() - - # TraceViewer events - events = [] - - # Add a dummy instant event to mark time 0. - # This is to avoid that the events are shifted from - # their actual start times to align the first event - # at time 0. - event = {'name': 'zero', - 'ph': 'I', # Instant event type - 'ts': 0, - 's': 'g' # Global scope - } - events.append(event) - - # Read CSV to collect TraceViewer events - with open(args.csv) as f: - reader = csv.reader(f, delimiter=',') - - # Get region names - regions = [name for name in next(reader) if name] - - # Process lines - for row in reader: - - # First entry in row is the hart ID - tid = row[0] - - # Start and end times of each region follow - for i, (start, end) in enumerate(pairwise(row[1:])): - - # Filter regions this hart does not take part in - if start: - - # Create TraceViewer event - ts = int(start) - dur = int(end) - ts - event = {'name': regions[i], - 'ph': "X", # Complete event type - 'ts': us(ts), - 'dur': us(dur), - 'pid': 0, - 'tid': tid - } - events.append(event) - - # Optionally extract also instruction-level events - # from the simulation traces - if args.traces and args.elf: - events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch', - addr2line='addr2line', use_time=True, pid=1, - cache=True, elf=args.elf, collapse_call_stack=True) - - # Create TraceViewer JSON object - tvobj = {} - tvobj['traceEvents'] = events - tvobj['displayTimeUnit'] = "ns" - - # Dump TraceViewer events to JSON file - with open(args.output, 'w') as f: - json.dump(tvobj, f, indent=4) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/util/trace/gen_trace.py b/util/trace/gen_trace.py index fd91ffbf9..f1f94d80d 100755 --- a/util/trace/gen_trace.py +++ b/util/trace/gen_trace.py @@ -2,11 +2,34 @@ # Copyright 2020 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 -# This script takes a trace generated for a Snitch hart and transforms the -# additional decode stage info into meaningful annotation. It also counts -# and computes various performance metrics up to each mcycle CSR read. - -# Author: Paul Scheffler +# +# Authors: Paul Scheffler +# Luca Colagrande +"""Script to generate human-readable instruction traces for Snitch. + +This script takes a trace generated by a Snitch hart +(see `snitch_cc.sv`) and transforms the additional decode stage info +into meaningful annotation. + +It also counts and computes various performance metrics for every +execution region. An execution region is a sequence of instructions. +Every `mcycle` CSR read instruction in your trace implicitly defines +two execution regions, comprising respectively: + +- all instructions executed before the read, up to the previous read +or the first executed instruction +- all instructions executed after the read, up to the next read or +the last executed instruction + +Performance metrics are appended at the end of the generated trace +and can optionally be dumped to a separate JSON file. + +It also computes various performance metrics for every DMA transfer, +provided that the Snitch core is equipped with a tightly-coupled DMA +engine, and the DMA trace logged during simulation +(see `axi_dma_backend.sv`) is fed to the tool. DMA performance +metrics are dumped to a separate JSON file. +""" # TODO: OPER_TYPES and FPU_OPER_TYPES could break: optimization might alter enum mapping # TODO: We annotate all FP16 LSU values as IEEE, not FP16ALT... can we do better? @@ -16,8 +39,10 @@ import math import argparse import json +import ast from ctypes import c_int32, c_uint32 from collections import deque, defaultdict +from pathlib import Path EXTRA_WB_WARN = 'WARNING: {} transactions still in flight for {}.' @@ -381,6 +406,109 @@ def flt_lit(num: int, fmt: int, width: int = 7) -> str: return flt_fmt(flt_decode(num, fmt), width) +# -------------------- DMA -------------------- + + +# We always assume dma_trans contains at least one incomplete placeholder DMA transaction. +# This incomplete transaction contains default settings. Only upon a DMCPY* instruction +# is the size of the transaction known, completing the transaction. At that point, a new +# incomplete transaction is created, inheriting the configuration settings from the previous +# transaction, which may or may not be overriden before the next DMCPY*. +def update_dma(insn, extras, dma_trans): + # Extract instruction mnemonic from full instruction decoding (includes operand registers) + MNEMONIC_REGEX = r'^([\w.]+)\s' + match = re.match(MNEMONIC_REGEX, insn) + if match: + mnemonic = match.group(1) + # Process DMA instruction + if mnemonic in ['dmsrc', 'dmdst', 'dmstr']: + pass + elif mnemonic == 'dmrep': + dma_trans[-1]['rep'] = extras['opa'] + elif mnemonic in ['dmcpy', 'dmcpyi']: + # Create new placeholder transaction to inherit current DMA settings + dma_trans.append(dma_trans[-1].copy()) + # Set size of the transaction + dma_trans[-2]['size'] = extras['opa'] + # Override repetition count if the transaction is configured to be 1D + config = extras['rs2'] + enable_2d = (config & 2) >> 1 + if not enable_2d: + dma_trans[-2]['rep'] = 1 + + +def eval_dma_metrics(dma_trans, dma_trace): + dma_trace = Path(dma_trace) + if dma_trace.exists(): + with open(dma_trace, 'r') as f: + # Initialize variables + compl_transfers = [] + outst_transfers = [] + req_transfer_idx = 0 + req_bytes = 0 + # Iterate lines in DMA trace + for line in f.readlines(): + dma = ast.literal_eval(line) + if 'backend_burst_req_valid' in dma: + # When the first burst in a transfer is granted, we record a new transfer in + # the outstanding transfers queue, with the information obtained from the core + # trace. We record the number of bytes moved by each burst in a transfer, and + # compare the total to the number of bytes moved by the transfer, to count how + # many bursts belong to the current DMA transfer (a number which is difficult + # to pre-compute from the core trace as it depends on address alignments, etc.) + if dma['backend_burst_req_valid'] and dma['backend_burst_req_ready']: + if req_bytes == 0: + n_bytes = dma_trans[req_transfer_idx]['rep'] * \ + dma_trans[req_transfer_idx]['size'] + outst_transfers.append({'tstart': dma['time'], + 'exp_bursts': 0, + 'rec_bursts': 0, + 'bytes': n_bytes}) + req_bytes += dma['backend_burst_req_num_bytes'] + outst_transfers[-1]['exp_bursts'] += 1 + # We move on to the next transfer when the bytes requested by the previous + # bursts match the current transfer size. + if req_bytes == outst_transfers[-1]['bytes']: + req_bytes = 0 + req_transfer_idx += 1 + # Upon a burst completion, we increment the received bursts count. When this + # count matches the expected bursts count of the current transfer we record the + # end time of the transfer and promote the transfer from the outstanding to the + # completed transfers' queue. + if dma['transfer_completed']: + outst_transfers[0]['rec_bursts'] += 1 + if outst_transfers[0]['rec_bursts'] == outst_transfers[0]['exp_bursts']: + outst_transfers[0]['tend'] = dma['time'] + compl_transfer = outst_transfers.pop(0) + compl_transfer.pop('exp_bursts') + compl_transfer.pop('rec_bursts') + compl_transfers.append(compl_transfer) + # Calculate bandwidth of individual transfers + for transfer in compl_transfers: + transfer['cycles'] = transfer['tend'] - transfer['tstart'] + transfer['bw'] = transfer['bytes'] / transfer['cycles'] + # Calculate aggregate bandwidth: total number of bytes transferred while any transfer is + # active (accounts for overlaps between transfers). + prev_trans_end = 0 + active_cycles = 0 + n_bytes = 0 + for transfer in compl_transfers: + # Calculate active cycles, without double-counting overlaps + curr_trans_start, curr_trans_end = transfer['tstart'], transfer['tend'] + if curr_trans_start > prev_trans_end: + active_cycles += curr_trans_end - curr_trans_start + else: + active_cycles += curr_trans_end - prev_trans_end + prev_trans_end = curr_trans_end + # Calculate total number of bytes + n_bytes += transfer['bytes'] + dma_metrics = {} + if active_cycles != 0: + dma_metrics['aggregate_bw'] = n_bytes / active_cycles + dma_metrics['transfers'] = compl_transfers + return dma_metrics + + # -------------------- FPU Sequencer -------------------- @@ -638,7 +766,8 @@ def annotate_insn( annot_fseq_offl: bool = False, # Annotate whenever core offloads to CPU on own line force_hex_addr: bool = True, - permissive: bool = True + permissive: bool = True, + dma_trans: list = [] ) -> (str, tuple, bool ): # Return time info, whether trace line contains no info, and fseq_len match = re.search(TRACE_IN_REGEX, line.strip('\n')) @@ -667,6 +796,7 @@ def annotate_insn( insn, pc_str = ('', '') else: perf_metrics[-1]['snitch_issues'] += 1 + update_dma(insn, extras, dma_trans) # Annotate sequencer elif extras['source'] == TRACE_SRCES['sequencer']: if extras['cbuf_push']: @@ -803,6 +933,12 @@ def main(): ) parser.add_argument( '-o', + '--output', + required=True, + type=argparse.FileType('w'), + help='Path to the output file' + ) + parser.add_argument( '--offl', action='store_true', help='Annotate FPSS and sequencer offloads when they happen in core') @@ -821,55 +957,74 @@ def main(): '--permissive', action='store_true', help='Ignore some state-related issues when they occur') - parser.add_argument('-d', - '--dump-perf', - nargs='?', - metavar='file', - type=argparse.FileType('w'), - help='Dump performance metrics as json text.') + parser.add_argument( + '--dma-trace', + help='Path to a DMA trace file' + ) + parser.add_argument( + '--dump-hart-perf', + nargs='?', + type=argparse.FileType('w'), + help='Dump hart performance metrics as json text.' + ) + parser.add_argument( + '--dump-dma-perf', + help='Dump DMA performance metrics as json text.' + ) args = parser.parse_args() line_iter = iter(args.infile.readline, b'') - # Prepare stateful data structures - time_info = None - gpr_wb_info = defaultdict(deque) - fpr_wb_info = defaultdict(deque) - fseq_info = { - 'curr_sec': 0, - 'fpss_pcs': deque(), - 'fseq_pcs': deque(), - 'cfg_buf': deque(), - 'curr_cfg': None - } - perf_metrics = [ - defaultdict(int) - ] # all values initially 0, also 'start' time of measurement 0 - perf_metrics[0]['start'] = None - # Parse input line by line - for line in line_iter: - if line: - ann_insn, time_info, empty = annotate_insn( - line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False, - time_info, args.offl, not args.saddr, args.permissive) - if perf_metrics[0]['start'] is None: - perf_metrics[0]['tstart'] = time_info[0] / 1000 - perf_metrics[0]['start'] = time_info[1] - if not empty: - print(ann_insn) - else: - break # Nothing more in pipe, EOF - perf_metrics[-1]['tend'] = time_info[0] / 1000 - perf_metrics[-1]['end'] = time_info[1] - # Compute metrics - eval_perf_metrics(perf_metrics) - # Emit metrics - print('\n## Performance metrics') - for idx in range(len(perf_metrics)): - print('\n' + fmt_perf_metrics(perf_metrics, idx, not args.allkeys)) - - if args.dump_perf: - with args.dump_perf as file: + + with args.output as file: + # Prepare stateful data structures + time_info = None + gpr_wb_info = defaultdict(deque) + fpr_wb_info = defaultdict(deque) + fseq_info = { + 'curr_sec': 0, + 'fpss_pcs': deque(), + 'fseq_pcs': deque(), + 'cfg_buf': deque(), + 'curr_cfg': None + } + dma_trans = [{'rep': 1}] + perf_metrics = [ + defaultdict(int) + ] # all values initially 0, also 'start' time of measurement 0 + perf_metrics[0]['start'] = None + # Parse input line by line + for line in line_iter: + if line: + ann_insn, time_info, empty = annotate_insn( + line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False, + time_info, args.offl, not args.saddr, args.permissive, dma_trans) + if perf_metrics[0]['start'] is None: + perf_metrics[0]['tstart'] = time_info[0] / 1000 + perf_metrics[0]['start'] = time_info[1] + if not empty: + print(ann_insn, file=file) + else: + break # Nothing more in pipe, EOF + perf_metrics[-1]['tend'] = time_info[0] / 1000 + perf_metrics[-1]['end'] = time_info[1] + # Compute metrics + eval_perf_metrics(perf_metrics) + # Emit metrics + print('\n## Performance metrics', file=file) + for idx in range(len(perf_metrics)): + print('\n' + fmt_perf_metrics(perf_metrics, idx, not args.allkeys), file=file) + # Emit DMA metrics + if args.dma_trace: + dma_metrics = eval_dma_metrics(dma_trans, args.dma_trace) + + # Dump hart performance metrics to JSON file + if args.dump_hart_perf: + with args.dump_hart_perf as file: file.write(json.dumps(perf_metrics, indent=4)) + # Dump DMA performance metrics to JSON file + if args.dump_dma_perf and dma_metrics is not None: + with open(args.dump_dma_perf, 'w') as file: + file.write(json.dumps(dma_metrics, indent=4)) # Check for any loose ends and warn before exiting seq_isns = len(fseq_info['fseq_pcs']) + len(fseq_info['cfg_buf']) diff --git a/util/trace/layout_events.py b/util/trace/layout_events.py deleted file mode 100755 index ea877c53c..000000000 --- a/util/trace/layout_events.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2020 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# This script takes a CSV of events, compatible with the CSV format produced by -# `perf_csv.py`, and creates another CSV of events, where the events are reordered based -# on a layout CSV file and labeled for viewing with the `eventvis.py` script. -# -# Following is an example CSV of events as output by `perf_csv.py`, -# which could be fed as input to this tool: -# -# , 0_tstart, 0_tend, 1_tstart, 1_tend, 2_tstart, 2_tend -# 0, 334, 10940, 10940, 10945, 10945, 10995 -# 1, 2654, 11061, 11061, 11172, 11172, 11189 -# 2, 2654, 11061, 11061, 11172, 11172, 11190 -# 3, 2654, 11061, 11061, 11172, 11172, 11191 -# -# This is an example layout CSV, which could be fed to the tool -# together with the previous CSV: -# -# , dma-in, compute, dma-out -# 0, 0, , -# "range(1,3)", , 1, -# 9, , , 2 -# -# To produce the following output: -# -# , dma_in, , compute, , dma_out, -# 0, 334, 10940, , , , -# 1, , , 11061, 11172, , -# 2, , , 11061, 11172, , -# 3, , , , , 11172, 11191 -# -# The output CSV can be fed directly to `eventvis.py`. -# -# Author: Luca Colagrande - -import sys -import argparse -import csv -import pandas as pd -from math import isnan - - -def main(): - # Argument parsing - parser = argparse.ArgumentParser() - parser.add_argument( - 'csv', - metavar='', - help='Input CSV file') - parser.add_argument( - 'layout', - metavar='', - help='Layout CSV file') - parser.add_argument( - '--num-clusters', - type=int, - default=1, - help='Number of clusters') - parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='trace.csv', - help='Output CSV file') - args = parser.parse_args() - - # Read input CSV - df = pd.read_csv(args.csv) - - # Output CSV data - data = [] - columns = [] - - # Open layout CSV - with open(args.layout) as layout_f: - layout_reader = csv.reader(layout_f, delimiter=',') - - # Get region labels from layout header - regions = [label for label in next(layout_reader) if label and not label.isspace()] - - # Generate output columns: appropriately spaced region labels - columns = ['hartid'] + [val for label in regions for val in [label, '']] - - # Iterate layout rows - for row in layout_reader: - - # First entry in row is a hart ID or a Python expression - # which generates a list of hart IDs - expr = row[0] - code = compile(expr, "", "eval") - tids = eval(code, {}, {'num_clusters': args.num_clusters}) - if type(tids) == int: - tids = [tids] - - # Iterate hart IDs - for tid in tids: - - # Start output row with hart ID - orow = [tid] - - # Iterate all other cells in layout row (indices of regions to take) - for cell in row[1:]: - - # If the cell is not empty, get start and end times - # of the region from the input CSV and append them to the - # output row. Otherwise, leave cells empty. - if cell and not cell.isspace(): - reg_idx = int(cell) - row_idx = tid - col_idx = 1 + reg_idx * 2 - assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds' - assert (col_idx + 1) < df.shape[1],\ - f'Region index {reg_idx} out of bounds for hart {tid}' - assert not isnan(df.iat[row_idx, col_idx]),\ - (f'Region {reg_idx} looks empty for hart {tid},' - f'check whether it was simulated') - orow.append(int(df.iat[row_idx, col_idx])) - orow.append(int(df.iat[row_idx, col_idx + 1])) - else: - orow.append('') - orow.append('') - - data.append(orow) - - # Create output dataframe and write to CSV - df = pd.DataFrame(data, columns=columns) - df.set_index('hartid', inplace=True) - df.sort_index(axis='index', inplace=True) - df.index.name = None - df.to_csv(args.output) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/util/trace/perf_csv.py b/util/trace/perf_csv.py deleted file mode 100755 index f26e242e2..000000000 --- a/util/trace/perf_csv.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2020 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# This script takes the performance metrics from all cores, in JSON format -# as dumped by the `events.py` or `gen_trace.py` scripts, and merges them -# into a single CSV file for global inspection. -# -# Author: Luca Colagrande - - -import sys -import argparse -import re -import json -import pandas as pd - - -HARTID_REGEX = r'hart_([0-9a-f]+)_perf.json' - - -def main(): - # Argument parsing - parser = argparse.ArgumentParser() - parser.add_argument( - '-i', - '--inputs', - metavar='', - nargs='+', - help='Input performance metric dumps') - parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='perf.csv', - help='Output CSV file') - parser.add_argument( - '--filter', - nargs='*', - help='All and only performance metrics to include in the CSV') - args = parser.parse_args() - - dumps = sorted(args.inputs) - - # Populate a list (one entry per hart) of dictionaries - # enumerating all the performance metrics for each hart - data = [] - index = [] - for dump in dumps: - - # Get hart id from filename and append to index - hartid = int(re.search(HARTID_REGEX, dump).group(1), base=16) - index.append(hartid) - - # Populate dictionary of metrics for the current hart - hart_metrics = {} - with open(dump, 'r') as f: - hart_data = json.load(f) - - # Uniquefy names of performance metrics in each trace - # region by prepending the region index, and merge - # all region metrics in a single dictionary - for i, region in enumerate(hart_data): - - # If filter was provided on the command-line then filter out all - # perf metrics which were not listed - if args.filter: - region = {key: val for (key, val) in region.items() if key in args.filter} - - region_metrics = {f'{i}_{key}': val for (key, val) in region.items()} - hart_metrics.update(region_metrics) - - data.append(hart_metrics) - - # Export data - df = pd.DataFrame.from_records(data, index) - df.to_csv(args.output) - - -if __name__ == '__main__': - sys.exit(main())