From 55b09e9f89de421beff83452cd58e50d438d3ad7 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 12 Jun 2024 15:48:35 +0200 Subject: [PATCH] dot: Various changes --- sw/blas/.gitignore | 1 + sw/blas/axpy/.gitignore | 1 - sw/blas/blas.h | 2 +- sw/blas/{dotp => dot}/Makefile | 4 +- sw/blas/{dotp => dot}/data/params.json | 2 +- sw/blas/{dotp => dot}/scripts/datagen.py | 12 +- sw/blas/{dotp => dot}/scripts/verify.py | 15 +- sw/blas/dot/src/dot.h | 145 ++++++++++++++++++ sw/blas/dot/src/main.c | 27 ++++ sw/blas/dotp/src/dotp.h | 88 ----------- sw/blas/dotp/src/main.c | 110 ------------- sw/blas/gemm/.gitignore | 1 - target/snitch_cluster/sw.mk | 2 +- .../sw/apps/blas/{dotp => dot}/Makefile | 2 +- target/snitch_cluster/sw/run.yaml | 2 + util/sim/data_utils.py | 10 ++ 16 files changed, 204 insertions(+), 220 deletions(-) create mode 100644 sw/blas/.gitignore delete mode 100644 sw/blas/axpy/.gitignore rename sw/blas/{dotp => dot}/Makefile (91%) rename sw/blas/{dotp => dot}/data/params.json (70%) rename sw/blas/{dotp => dot}/scripts/datagen.py (80%) rename sw/blas/{dotp => dot}/scripts/verify.py (63%) create mode 100644 sw/blas/dot/src/dot.h create mode 100644 sw/blas/dot/src/main.c delete mode 100644 sw/blas/dotp/src/dotp.h delete mode 100644 sw/blas/dotp/src/main.c delete mode 100644 sw/blas/gemm/.gitignore rename target/snitch_cluster/sw/apps/blas/{dotp => dot}/Makefile (84%) diff --git a/sw/blas/.gitignore b/sw/blas/.gitignore new file mode 100644 index 0000000000..2ff975f292 --- /dev/null +++ b/sw/blas/.gitignore @@ -0,0 +1 @@ +**/data/data.h \ No newline at end of file diff --git a/sw/blas/axpy/.gitignore b/sw/blas/axpy/.gitignore deleted file mode 100644 index f5ac16baa2..0000000000 --- a/sw/blas/axpy/.gitignore +++ /dev/null @@ -1 +0,0 @@ -data/data.h diff --git a/sw/blas/blas.h b/sw/blas/blas.h index 9207bf6f74..33c29e1753 100644 --- a/sw/blas/blas.h +++ b/sw/blas/blas.h @@ -5,5 +5,5 @@ #pragma once #include "axpy/src/axpy.h" +#include "dot/src/dot.h" #include "gemm/src/gemm.h" -#include "dotp/src/dotp.h" diff --git a/sw/blas/dotp/Makefile b/sw/blas/dot/Makefile similarity index 91% rename from sw/blas/dotp/Makefile rename to sw/blas/dot/Makefile index 49ff75b883..cee16bb92f 100644 --- a/sw/blas/dotp/Makefile +++ b/sw/blas/dot/Makefile @@ -1,4 +1,4 @@ -# Copyright 2023 ETH Zurich and University of Bologna. +# Copyright 2024 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 @@ -10,7 +10,7 @@ SRC_DIR := $(realpath $(MK_DIR)/src) DATA_CFG ?= $(DATA_DIR)/params.json SECTION ?= -APP ?= dotp +APP ?= dot SRCS ?= $(realpath $(SRC_DIR)/main.c) INCDIRS ?= $(dir $(DATA_H)) $(SRC_DIR) diff --git a/sw/blas/dotp/data/params.json b/sw/blas/dot/data/params.json similarity index 70% rename from sw/blas/dotp/data/params.json rename to sw/blas/dot/data/params.json index 66dfcf770f..329a03b50a 100644 --- a/sw/blas/dotp/data/params.json +++ b/sw/blas/dot/data/params.json @@ -1,4 +1,4 @@ -// Copyright 2023 ETH Zurich and University of Bologna. +// Copyright 2024 ETH Zurich and University of Bologna. // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 diff --git a/sw/blas/dotp/scripts/datagen.py b/sw/blas/dot/scripts/datagen.py similarity index 80% rename from sw/blas/dotp/scripts/datagen.py rename to sw/blas/dot/scripts/datagen.py index 94a5e1be1c..d6c2fa81fe 100755 --- a/sw/blas/dotp/scripts/datagen.py +++ b/sw/blas/dot/scripts/datagen.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2023 ETH Zurich and University of Bologna. +# Copyright 2024 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 @@ -9,10 +9,10 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/")) from data_utils import format_scalar_definition, format_array_definition, \ - format_array_declaration, format_ifdef_wrapper, DataGen # noqa: E402 + format_scalar_declaration, format_ifdef_wrapper, DataGen # noqa: E402 -class AxpyDataGen(DataGen): +class DotDataGen(DataGen): MIN = -1000 MAX = +1000 @@ -38,8 +38,8 @@ def emit_header(self, **kwargs): section=kwargs['section'])] header += [format_array_definition('double', 'y', y, alignment=self.BURST_ALIGNMENT, section=kwargs['section'])] - header += [format_array_declaration('double', 'z', [n], alignment=self.BURST_ALIGNMENT, - section=kwargs['section'])] + header += [format_scalar_declaration('double', 'result', alignment=self.BURST_ALIGNMENT, + section=kwargs['section'])] result_def = format_scalar_definition('double', 'g', g) header += [format_ifdef_wrapper('BIST', result_def)] header = '\n\n'.join(header) @@ -48,4 +48,4 @@ def emit_header(self, **kwargs): if __name__ == '__main__': - sys.exit(AxpyDataGen().main()) + sys.exit(DotDataGen().main()) diff --git a/sw/blas/dotp/scripts/verify.py b/sw/blas/dot/scripts/verify.py similarity index 63% rename from sw/blas/dotp/scripts/verify.py rename to sw/blas/dot/scripts/verify.py index 5ea42423e0..9d61ff4667 100755 --- a/sw/blas/dotp/scripts/verify.py +++ b/sw/blas/dot/scripts/verify.py @@ -1,32 +1,31 @@ #!/usr/bin/env python3 -# Copyright 2023 ETH Zurich and University of Bologna. +# Copyright 2024 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 import sys from pathlib import Path -from datagen import AxpyDataGen +from datagen import DotDataGen sys.path.append(str(Path(__file__).parent / '../../../../util/sim/')) from verif_utils import Verifier # noqa: E402 -class AxpyVerifier(Verifier): +class DotVerifier(Verifier): - OUTPUT_UIDS = ['z'] + OUTPUT_UIDS = ['result'] def get_actual_results(self): - return self.get_output_from_symbol('z', 'double') + return self.get_output_from_symbol('result', 'double') def get_expected_results(self): - a = self.get_input_from_symbol('a', 'double') x = self.get_input_from_symbol('x', 'double') y = self.get_input_from_symbol('y', 'double') - return AxpyDataGen().golden_model(a, x, y) + return DotDataGen().golden_model(x, y) def check_results(self, *args): return super().check_results(*args, rtol=1e-10) if __name__ == "__main__": - sys.exit(AxpyVerifier().main()) + sys.exit(DotVerifier().main()) diff --git a/sw/blas/dot/src/dot.h b/sw/blas/dot/src/dot.h new file mode 100644 index 0000000000..a8a81561cc --- /dev/null +++ b/sw/blas/dot/src/dot.h @@ -0,0 +1,145 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "snrt.h" + +inline void dot_seq(uint32_t n, double *x, double *y, double *output) { + // Start of SSR region. + register volatile double ft0 asm("ft0"); + register volatile double ft1 asm("ft1"); + asm volatile("" : "=f"(ft0), "=f"(ft1)); + + snrt_ssr_loop_1d(SNRT_SSR_DM0, n, sizeof(double)); + snrt_ssr_loop_1d(SNRT_SSR_DM1, n, sizeof(double)); + + snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_1D, x); + snrt_ssr_read(SNRT_SSR_DM1, SNRT_SSR_1D, y); + + register volatile double res_ssr asm("fs0") = 0; + + snrt_ssr_enable(); + + const register uint32_t Nm1 asm("t0") = n - 1; + asm volatile( + "frep.o %[n_frep], 1, 0, 0 \n" + "fmadd.d %0, ft0, ft1, %0" + : "=f"(res_ssr) /* output operands */ + : "f"(ft0), "f"(ft1), "0"(res_ssr), + [ n_frep ] "r"(Nm1) /* input operands */ + :); + + // End of SSR region. + snrt_fpu_fence(); + snrt_ssr_disable(); + asm volatile("" : : "f"(ft0), "f"(ft1)); + output[0] = res_ssr; +} + +inline void dot_seq_4_acc(uint32_t n, double *x, double *y, double *output) { + // Start of SSR region. + register volatile double ft0 asm("ft0"); + register volatile double ft1 asm("ft1"); + asm volatile("" : "=f"(ft0), "=f"(ft1)); + + snrt_ssr_loop_1d(SNRT_SSR_DM0, n, sizeof(double)); + snrt_ssr_loop_1d(SNRT_SSR_DM1, n, sizeof(double)); + + snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_1D, x); + snrt_ssr_read(SNRT_SSR_DM1, SNRT_SSR_1D, y); + + register volatile double res_ssr_0 asm("fs0") = 0; + register volatile double res_ssr_1 asm("fs1") = 0; + register volatile double res_ssr_2 asm("fs2") = 0; + register volatile double res_ssr_3 asm("fs3") = 0; + + snrt_ssr_enable(); + + const register uint32_t Nm1 asm("t0") = (n >> 2) - 1; + asm volatile( + "frep.o %[n_frep], 4, 0, 0 \n" + "fmadd.d %0, ft0, ft1, %0 \n" + "fmadd.d %1, ft0, ft1, %1 \n" + "fmadd.d %2, ft0, ft1, %2 \n" + "fmadd.d %3, ft0, ft1, %3" + : "=f"(res_ssr_0), "=f"(res_ssr_1), "=f"(res_ssr_2), + "=f"(res_ssr_3) /* output operands */ + : "f"(ft0), "f"(ft1), "0"(res_ssr_0), "1"(res_ssr_1), "2"(res_ssr_2), + "3"(res_ssr_3), [ n_frep ] "r"(Nm1) /* input operands */ + :); + + // End of SSR region. + snrt_fpu_fence(); + snrt_ssr_disable(); + + asm volatile( + "fadd.d %[res_ssr_0], %[res_ssr_0], %[res_ssr_1] \n" + "fadd.d %[res_ssr_2], %[res_ssr_2], %[res_ssr_3] \n" + "fadd.d %[res_ssr_0], %[res_ssr_0], %[res_ssr_2]" + : [ res_ssr_0 ] "=f"(res_ssr_0), + [ res_ssr_2 ] "=f"(res_ssr_2) /* output operands */ + : [ res_ssr_1 ] "f"(res_ssr_1), + [ res_ssr_3 ] "f"(res_ssr_3) /* input operands */ + :); + + asm volatile("" : : "f"(ft0), "f"(ft1)); + output[0] = res_ssr_0; +} + +static inline void dot(uint32_t n, double *x, double *y, double *result) { + double *local_x, *local_y, *partial_sums; + + uint32_t start_cycle, end_cycle; + + // Allocate space in TCDM + local_x = (double *)snrt_l1_next(); + local_y = local_x + n; + partial_sums = local_y + n; + + // Copy data in TCDM + if (snrt_is_dm_core()) { + size_t size = n * sizeof(double); + snrt_dma_start_1d(local_x, x, size); + snrt_dma_start_1d(local_y, y, size); + snrt_dma_wait_all(); + } + + // Calculate size and pointers for each core + int core_idx = snrt_cluster_core_idx(); + int frac_core = n / snrt_cluster_compute_core_num(); + int offset_core = core_idx * frac_core; + local_x += offset_core; + local_y += offset_core; + + snrt_cluster_hw_barrier(); + + start_cycle = snrt_mcycle(); + + // Compute partial sums + if (snrt_is_compute_core()) { + dot_seq_4_acc(frac_core, local_x, local_y, &partial_sums[core_idx]); + } + + snrt_cluster_hw_barrier(); + + // Reduce partial sums on core 0 +#ifndef _DOTP_EXCLUDE_FINAL_SYNC_ + if (snrt_cluster_core_idx() == 0) { + for (uint32_t i = 1; i < snrt_cluster_compute_core_num(); i++) { + partial_sums[0] += partial_sums[i]; + } + snrt_fpu_fence(); + } +#endif + + end_cycle = snrt_mcycle(); + + snrt_cluster_hw_barrier(); + + // Copy data out of TCDM + if (snrt_is_dm_core()) { + *result = partial_sums[0]; + } + + snrt_cluster_hw_barrier(); +} diff --git a/sw/blas/dot/src/main.c b/sw/blas/dot/src/main.c new file mode 100644 index 0000000000..44620c210e --- /dev/null +++ b/sw/blas/dot/src/main.c @@ -0,0 +1,27 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "snrt.h" + +#include "data.h" +#include "dot.h" + +int main() { + dot(n, x, y, &result); + +// TODO: currently only works for single cluster otherwise need to +// synchronize all cores here +#ifdef BIST + uint32_t nerr = 1; + + // Check computation is correct + if (snrt_global_core_idx() == 0) { + if (result == g) nerr--; + return nerr; + } + +#endif + + return 0; +} diff --git a/sw/blas/dotp/src/dotp.h b/sw/blas/dotp/src/dotp.h deleted file mode 100644 index f2052e7397..0000000000 --- a/sw/blas/dotp/src/dotp.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2024 ETH Zurich and University of Bologna. -// Licensed under the Apache License, Version 2.0, see LICENSE for details. -// SPDX-License-Identifier: Apache-2.0 - -#include "snrt.h" - -inline void dotp_seq (uint32_t N, double *input_A, double *input_B, double *output) { - // Start of SSR region. - register volatile double ft0 asm("ft0"); - register volatile double ft1 asm("ft1"); - asm volatile("" - : "=f"(ft0), "=f"(ft1)); - - snrt_ssr_loop_1d(SNRT_SSR_DM0, N, sizeof(double)); - snrt_ssr_loop_1d(SNRT_SSR_DM1, N, sizeof(double)); - - snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_1D, input_A); - snrt_ssr_read(SNRT_SSR_DM1, SNRT_SSR_1D, input_B); - - register volatile double res_ssr asm("fs0") = 0; - - snrt_ssr_enable(); - - const register uint32_t Nm1 asm("t0") = N - 1; - asm volatile( - "frep.o %[n_frep], 1, 0, 0 \n" - "fmadd.d %0, ft0, ft1, %0" - : "=f"(res_ssr) /* output operands */ - : "f"(ft0), "f"(ft1), "0"(res_ssr), [n_frep]"r"(Nm1) /* input operands */ - :); - - // End of SSR region. - snrt_fpu_fence(); - snrt_ssr_disable(); - asm volatile("" - : - : "f"(ft0), "f"(ft1)); - output[0] = res_ssr; -} - -inline void dotp_seq_4_acc (uint32_t N, double *input_A, double *input_B, double *output) { - // Start of SSR region. - register volatile double ft0 asm("ft0"); - register volatile double ft1 asm("ft1"); - asm volatile("" - : "=f"(ft0), "=f"(ft1)); - - snrt_ssr_loop_1d(SNRT_SSR_DM0, N, sizeof(double)); - snrt_ssr_loop_1d(SNRT_SSR_DM1, N, sizeof(double)); - - snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_1D, input_A); - snrt_ssr_read(SNRT_SSR_DM1, SNRT_SSR_1D, input_B); - - register volatile double res_ssr_0 asm("fs0") = 0; - register volatile double res_ssr_1 asm("fs1") = 0; - register volatile double res_ssr_2 asm("fs2") = 0; - register volatile double res_ssr_3 asm("fs3") = 0; - - snrt_ssr_enable(); - - const register uint32_t Nm1 asm("t0") = (N >> 2) - 1; - asm volatile( - "frep.o %[n_frep], 4, 0, 0 \n" - "fmadd.d %0, ft0, ft1, %0 \n" - "fmadd.d %1, ft0, ft1, %1 \n" - "fmadd.d %2, ft0, ft1, %2 \n" - "fmadd.d %3, ft0, ft1, %3" - : "=f"(res_ssr_0), "=f"(res_ssr_1), "=f"(res_ssr_2), "=f"(res_ssr_3) /* output operands */ - : "f"(ft0), "f"(ft1), "0"(res_ssr_0), "1"(res_ssr_1), "2"(res_ssr_2), "3"(res_ssr_3), [n_frep]"r"(Nm1) /* input operands */ - :); - - // End of SSR region. - snrt_fpu_fence(); - snrt_ssr_disable(); - - asm volatile( - "fadd.d %[res_ssr_0], %[res_ssr_0], %[res_ssr_1] \n" - "fadd.d %[res_ssr_2], %[res_ssr_2], %[res_ssr_3] \n" - "fadd.d %[res_ssr_0], %[res_ssr_0], %[res_ssr_2]" - : [res_ssr_0]"=f"(res_ssr_0), [res_ssr_2]"=f"(res_ssr_2) /* output operands */ - : [res_ssr_1]"f"(res_ssr_1), [res_ssr_3]"f"(res_ssr_3) /* input operands */ - :); - - asm volatile("" - : - : "f"(ft0), "f"(ft1)); - output[0] = res_ssr_0; -} diff --git a/sw/blas/dotp/src/main.c b/sw/blas/dotp/src/main.c deleted file mode 100644 index 794bc08b83..0000000000 --- a/sw/blas/dotp/src/main.c +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2023 ETH Zurich and University of Bologna. -// Licensed under the Apache License, Version 2.0, see LICENSE for details. -// SPDX-License-Identifier: Apache-2.0 - -#include "snrt.h" - -#include "printf.h" - -#define XSSR -#include "dotp.h" -#include "data.h" - -int main() { - double *local_x, *local_y, *local_z; - double *remote_x, *remote_y, *remote_z; - - volatile double sum; - - uint32_t start_cycle, end_cycle; - - // Calculate size and pointers for each cluster - uint32_t frac = n / snrt_cluster_num(); - uint32_t offset = frac * snrt_cluster_idx(); - remote_x = x + offset; - remote_y = y + offset; - remote_z = z + snrt_cluster_idx(); - - // Allocate space in TCDM - local_x = (double *)snrt_l1_next(); - local_y = local_x + frac; - local_z = local_y + frac; - - // Copy data in TCDM - if (snrt_is_dm_core()) { - size_t size = frac * sizeof(double); - snrt_dma_start_1d(local_x, remote_x, size); - snrt_dma_start_1d(local_y, remote_y, size); - snrt_dma_wait_all(); - } - - // Calculate TCDM size and pointers for each core - int core_idx = snrt_cluster_core_idx(); - int frac_core = n / snrt_cluster_compute_core_num(); - int offset_core = core_idx * frac_core; - local_x += offset_core; - local_y += offset_core; - local_z += core_idx; - - snrt_cluster_hw_barrier(); - - // Compute - if (!snrt_is_dm_core()) { - start_cycle = snrt_mcycle(); - dotp_seq_4_acc(frac_core, local_x, local_y, local_z); - snrt_cluster_hw_barrier(); - -#ifndef _DOTP_EXCLUDE_FINAL_SYNC_ - if (!snrt_cluster_core_idx()) { - sum = 0; - for (uint32_t i = 0; i < snrt_cluster_compute_core_num(); ++i) { - sum += local_z[i]; - } - } - snrt_fpu_fence(); -#endif - - end_cycle = snrt_mcycle(); - } else { - // DMA should also sync with the computational cores - snrt_cluster_hw_barrier(); - } - - snrt_cluster_hw_barrier(); - - if (!snrt_cluster_core_idx()) { - unsigned int runtime = end_cycle - start_cycle; - double performance = (double) (2 * n - 1) / runtime; - double util = 100 * (performance / (2 * snrt_cluster_compute_core_num())); - - printf("Core %d execution time: %u cycles\nPerformance: %f DP-FLOP/Cycle\nUtilization: %f%%\n", - snrt_cluster_core_idx(), runtime, performance, util); - } - - snrt_cluster_hw_barrier(); - - // Copy data out of TCDM - if (snrt_is_dm_core()) { - size_t size = frac_core * sizeof(double); - snrt_dma_start_1d(remote_z, local_z, size); - snrt_dma_wait_all(); - } - - snrt_cluster_hw_barrier(); - -// TODO: currently only works for single cluster otherwise need to -// synchronize all cores here -#ifdef BIST - uint32_t nerr = 1; - - // Check computation is correct - if (snrt_global_core_idx() == 0) { - if (sum == g) nerr--; - printf("%f %f\n", sum, g); - } - - return nerr; -#endif - - return 0; -} diff --git a/sw/blas/gemm/.gitignore b/sw/blas/gemm/.gitignore deleted file mode 100644 index f5ac16baa2..0000000000 --- a/sw/blas/gemm/.gitignore +++ /dev/null @@ -1 +0,0 @@ -data/data.h diff --git a/target/snitch_cluster/sw.mk b/target/snitch_cluster/sw.mk index 329b606a57..28ab6668d8 100644 --- a/target/snitch_cluster/sw.mk +++ b/target/snitch_cluster/sw.mk @@ -42,7 +42,7 @@ APPS = sw/apps/lto APPS += sw/apps/nop APPS += sw/apps/blas/axpy APPS += sw/apps/blas/gemm -APPS += sw/apps/blas/dotp +APPS += sw/apps/blas/dot APPS += sw/apps/dnn/batchnorm APPS += sw/apps/dnn/conv2d APPS += sw/apps/dnn/fusedconv diff --git a/target/snitch_cluster/sw/apps/blas/dotp/Makefile b/target/snitch_cluster/sw/apps/blas/dot/Makefile similarity index 84% rename from target/snitch_cluster/sw/apps/blas/dotp/Makefile rename to target/snitch_cluster/sw/apps/blas/dot/Makefile index 63f748994d..41e58a5340 100644 --- a/target/snitch_cluster/sw/apps/blas/dotp/Makefile +++ b/target/snitch_cluster/sw/apps/blas/dot/Makefile @@ -4,7 +4,7 @@ # # Matteo Perotti -include ../../../../../../sw/blas/dotp/Makefile +include ../../../../../../sw/blas/dot/Makefile include ../../common.mk $(DEP): $(DATA_H) diff --git a/target/snitch_cluster/sw/run.yaml b/target/snitch_cluster/sw/run.yaml index e5f07c731f..3842e3e706 100644 --- a/target/snitch_cluster/sw/run.yaml +++ b/target/snitch_cluster/sw/run.yaml @@ -78,6 +78,8 @@ runs: cmd: [../../../sw/blas/axpy/scripts/verify.py, "${sim_bin}", "${elf}"] - elf: apps/blas/gemm/build/gemm.elf cmd: [../../../sw/blas/gemm/scripts/verify.py, "${sim_bin}", "${elf}"] + - elf: apps/blas/dot/build/dot.elf + cmd: [../../../sw/blas/dot/scripts/verify.py, "${sim_bin}", "${elf}"] - elf: apps/dnn/batchnorm/build/batchnorm.elf - elf: apps/dnn/maxpool/build/maxpool.elf # - elf: apps/dnn/conv2d/build/conv2d.elf # Fails with wrong results diff --git a/util/sim/data_utils.py b/util/sim/data_utils.py index 9763d416f5..2ed621b34e 100644 --- a/util/sim/data_utils.py +++ b/util/sim/data_utils.py @@ -144,6 +144,16 @@ def format_scalar_definition(dtype, uid, scalar): return s +def format_scalar_declaration(dtype, uid, alignment=None, section=None): + attributes = _variable_attributes(alignment, section) + s = f'{_alias_dtype(dtype)} {uid}' + if attributes: + s += f' {attributes};' + else: + s += ';' + return s + + def format_array_initializer(dtype, array): s = '{\n' array = flatten(array)