From aa278ce4da9082aff727960b57a927ce37fe2249 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Mon, 19 Feb 2024 23:53:15 +0100
Subject: [PATCH] Move towards LLVM 15 toolchain (#61)

* conv2d: Remove register array declarations

* traces: Use `addr2line` from software build toolchain
---
 sw/dnn/conv2d/src/conv2d.h | 28 ++++++++++++++--------------
 target/common/common.mk    |  3 ++-
 util/trace/annotate.py     |  2 +-
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/sw/dnn/conv2d/src/conv2d.h b/sw/dnn/conv2d/src/conv2d.h
index 7037936e6..8b0640aba 100644
--- a/sw/dnn/conv2d/src/conv2d.h
+++ b/sw/dnn/conv2d/src/conv2d.h
@@ -341,7 +341,7 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y,
             volatile register v2s current_kappa = ((v2s *)kappa)[co];
             volatile register v2s zero = (v2s)0.0;
 
-            volatile register v2s tmp[n_unroll];
+            volatile v2s tmp[n_unroll];
 
             snrt_ssr_enable();
 
@@ -422,7 +422,7 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y,
             volatile register v2s current_kappa = ((v2s *)kappa)[co];
             volatile register v2s zero = (v2s)0.0;
 
-            volatile register v2s tmp[n_unroll];
+            volatile v2s tmp[n_unroll];
 
             snrt_ssr_enable();
 
@@ -653,7 +653,7 @@ static inline void conv2d_fp64(kernel_fp64 *k) {
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
                 // TODO: check if initialization needs to be unrolled by hand
-                volatile register double sum[max_unroll];
+                volatile double sum[max_unroll];
                 if (k->flag_y_accumulate_start) {
                     for (uint32_t i = 0; i < max_unroll; i++) {
                         sum[i] = 0.0;
@@ -718,7 +718,7 @@ static inline void conv2d_fp64(kernel_fp64 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register double sum[max_unroll];
+                volatile double sum[max_unroll];
                 if (k->flag_y_accumulate_start) {
                     for (uint32_t i = 0; i < cleanup_unroll; i++) {
                         sum[i] = 0.0;
@@ -1009,8 +1009,8 @@ static inline void conv2d_fp32(kernel_fp32 *k) {
         for (h0 = 0; h0 < k->dim_out_y / max_unroll; h0++) {
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
-                volatile register float reduce_reg[max_unroll];
+                volatile v2s sum[max_unroll];
+                volatile float reduce_reg[max_unroll];
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
                 float *_pOutBuffer =
@@ -1100,8 +1100,8 @@ static inline void conv2d_fp32(kernel_fp32 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
-                volatile register float reduce_reg[max_unroll];
+                volatile v2s sum[max_unroll];
+                volatile float reduce_reg[max_unroll];
 
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
@@ -1442,7 +1442,7 @@ static inline void conv2d_dw_fp32(kernel_fp32 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
+                volatile v2s sum[max_unroll];
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
                 v2s *_pOutBuffer =
@@ -1511,7 +1511,7 @@ static inline void conv2d_dw_fp32(kernel_fp32 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
+                volatile v2s sum[max_unroll];
 
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
@@ -1788,8 +1788,8 @@ static inline void conv2d_chw_fp32(kernel_fp32 *k) {
 
                 // Output width dimension `k->dim_out_x`
                 for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                    volatile register v2s sum[max_unroll];
-                    volatile register float reduce_reg[max_unroll];
+                    volatile v2s sum[max_unroll];
+                    volatile float reduce_reg[max_unroll];
                     // pointer to output buffer location where intermediate
                     // values are read from and stored
                     float *_pOutBuffer = &k->pOutBuffer[h * output_h_stride +
@@ -1884,8 +1884,8 @@ static inline void conv2d_chw_fp32(kernel_fp32 *k) {
 
                 // Output width dimension `k->dim_out_x`
                 for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                    volatile register v2s sum[max_unroll];
-                    volatile register float reduce_reg[max_unroll];
+                    volatile v2s sum[max_unroll];
+                    volatile float reduce_reg[max_unroll];
 
                     // pointer to output buffer location where intermediate
                     // values are read from and stored
diff --git a/target/common/common.mk b/target/common/common.mk
index 9e95795d3..6b7c45be0 100644
--- a/target/common/common.mk
+++ b/target/common/common.mk
@@ -26,6 +26,7 @@ VSIM         ?= $(QUESTA_SEPP) vsim
 VOPT         ?= $(QUESTA_SEPP) vopt
 VLOG         ?= $(QUESTA_SEPP) vlog
 VLIB         ?= $(QUESTA_SEPP) vlib
+ADDR2LINE    ?= $(LLVM_BINROOT)/llvm-addr2line
 
 # Internal executables
 GENTRACE_PY      ?= $(UTIL_DIR)/trace/gen_trace.py
@@ -80,7 +81,7 @@ VLT_FLAGS    += --unroll-count 1024
 VLT_CFLAGS   += -std=c++14 -pthread
 VLT_CFLAGS   +=-I ${VLT_BUILDDIR} -I $(VLT_ROOT)/include -I $(VLT_ROOT)/include/vltstd -I $(VLT_FESVR)/include -I $(TB_DIR) -I ${MKFILE_DIR}/test
 
-ANNOTATE_FLAGS      ?= -q --keep-time
+ANNOTATE_FLAGS      ?= -q --keep-time --addr2line=$(ADDR2LINE)
 LAYOUT_EVENTS_FLAGS ?= --cfg=$(CFG)
 
 # We need a recent LLVM installation (>11) to compile Verilator.
diff --git a/util/trace/annotate.py b/util/trace/annotate.py
index 512556190..e14853bdb 100755
--- a/util/trace/annotate.py
+++ b/util/trace/annotate.py
@@ -164,7 +164,7 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
 
 
 # Open ELF file for addr2line processing
-elf = a2l.Elf(elf_file)
+elf = a2l.Elf(elf_file, addr2line)
 
 # core functionality
 with open(trace, 'r') as f: