From aa278ce4da9082aff727960b57a927ce37fe2249 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Mon, 19 Feb 2024 23:53:15 +0100 Subject: [PATCH] Move towards LLVM 15 toolchain (#61) * conv2d: Remove register array declarations * traces: Use `addr2line` from software build toolchain --- sw/dnn/conv2d/src/conv2d.h | 28 ++++++++++++++-------------- target/common/common.mk | 3 ++- util/trace/annotate.py | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/sw/dnn/conv2d/src/conv2d.h b/sw/dnn/conv2d/src/conv2d.h index 7037936e6..8b0640aba 100644 --- a/sw/dnn/conv2d/src/conv2d.h +++ b/sw/dnn/conv2d/src/conv2d.h @@ -341,7 +341,7 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y, volatile register v2s current_kappa = ((v2s *)kappa)[co]; volatile register v2s zero = (v2s)0.0; - volatile register v2s tmp[n_unroll]; + volatile v2s tmp[n_unroll]; snrt_ssr_enable(); @@ -422,7 +422,7 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y, volatile register v2s current_kappa = ((v2s *)kappa)[co]; volatile register v2s zero = (v2s)0.0; - volatile register v2s tmp[n_unroll]; + volatile v2s tmp[n_unroll]; snrt_ssr_enable(); @@ -653,7 +653,7 @@ static inline void conv2d_fp64(kernel_fp64 *k) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { // TODO: check if initialization needs to be unrolled by hand - volatile register double sum[max_unroll]; + volatile double sum[max_unroll]; if (k->flag_y_accumulate_start) { for (uint32_t i = 0; i < max_unroll; i++) { sum[i] = 0.0; @@ -718,7 +718,7 @@ static inline void conv2d_fp64(kernel_fp64 *k) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { - volatile register double sum[max_unroll]; + volatile double sum[max_unroll]; if (k->flag_y_accumulate_start) { for (uint32_t i = 0; i < cleanup_unroll; i++) { sum[i] = 0.0; @@ -1009,8 +1009,8 @@ static inline void conv2d_fp32(kernel_fp32 *k) { for (h0 = 0; h0 < k->dim_out_y / max_unroll; h0++) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { - volatile register v2s sum[max_unroll]; - volatile register float reduce_reg[max_unroll]; + volatile v2s sum[max_unroll]; + volatile float reduce_reg[max_unroll]; // pointer to output buffer location where intermediate values // are read from and stored float *_pOutBuffer = @@ -1100,8 +1100,8 @@ static inline void conv2d_fp32(kernel_fp32 *k) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { - volatile register v2s sum[max_unroll]; - volatile register float reduce_reg[max_unroll]; + volatile v2s sum[max_unroll]; + volatile float reduce_reg[max_unroll]; // pointer to output buffer location where intermediate values // are read from and stored @@ -1442,7 +1442,7 @@ static inline void conv2d_dw_fp32(kernel_fp32 *k) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { - volatile register v2s sum[max_unroll]; + volatile v2s sum[max_unroll]; // pointer to output buffer location where intermediate values // are read from and stored v2s *_pOutBuffer = @@ -1511,7 +1511,7 @@ static inline void conv2d_dw_fp32(kernel_fp32 *k) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { - volatile register v2s sum[max_unroll]; + volatile v2s sum[max_unroll]; // pointer to output buffer location where intermediate values // are read from and stored @@ -1788,8 +1788,8 @@ static inline void conv2d_chw_fp32(kernel_fp32 *k) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { - volatile register v2s sum[max_unroll]; - volatile register float reduce_reg[max_unroll]; + volatile v2s sum[max_unroll]; + volatile float reduce_reg[max_unroll]; // pointer to output buffer location where intermediate // values are read from and stored float *_pOutBuffer = &k->pOutBuffer[h * output_h_stride + @@ -1884,8 +1884,8 @@ static inline void conv2d_chw_fp32(kernel_fp32 *k) { // Output width dimension `k->dim_out_x` for (uint32_t w = 0; w < k->dim_out_x; w++) { - volatile register v2s sum[max_unroll]; - volatile register float reduce_reg[max_unroll]; + volatile v2s sum[max_unroll]; + volatile float reduce_reg[max_unroll]; // pointer to output buffer location where intermediate // values are read from and stored diff --git a/target/common/common.mk b/target/common/common.mk index 9e95795d3..6b7c45be0 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -26,6 +26,7 @@ VSIM ?= $(QUESTA_SEPP) vsim VOPT ?= $(QUESTA_SEPP) vopt VLOG ?= $(QUESTA_SEPP) vlog VLIB ?= $(QUESTA_SEPP) vlib +ADDR2LINE ?= $(LLVM_BINROOT)/llvm-addr2line # Internal executables GENTRACE_PY ?= $(UTIL_DIR)/trace/gen_trace.py @@ -80,7 +81,7 @@ VLT_FLAGS += --unroll-count 1024 VLT_CFLAGS += -std=c++14 -pthread VLT_CFLAGS +=-I ${VLT_BUILDDIR} -I $(VLT_ROOT)/include -I $(VLT_ROOT)/include/vltstd -I $(VLT_FESVR)/include -I $(TB_DIR) -I ${MKFILE_DIR}/test -ANNOTATE_FLAGS ?= -q --keep-time +ANNOTATE_FLAGS ?= -q --keep-time --addr2line=$(ADDR2LINE) LAYOUT_EVENTS_FLAGS ?= --cfg=$(CFG) # We need a recent LLVM installation (>11) to compile Verilator. diff --git a/util/trace/annotate.py b/util/trace/annotate.py index 512556190..e14853bdb 100755 --- a/util/trace/annotate.py +++ b/util/trace/annotate.py @@ -164,7 +164,7 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): # Open ELF file for addr2line processing -elf = a2l.Elf(elf_file) +elf = a2l.Elf(elf_file, addr2line) # core functionality with open(trace, 'r') as f: