pulp-platform · colluca · Feb 19, 2024 · Oct 24, 2023 · Oct 25, 2023
@@ -341,7 +341,7 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y,
             volatile register v2s current_kappa = ((v2s *)kappa)[co];
             volatile register v2s zero = (v2s)0.0;
 
-            volatile register v2s tmp[n_unroll];
+            volatile v2s tmp[n_unroll];
 
             snrt_ssr_enable();
 
@@ -422,7 +422,7 @@ void bn_relu(const float *pBuffer, const uint16_t dim_x, const uint16_t dim_y,
             volatile register v2s current_kappa = ((v2s *)kappa)[co];
             volatile register v2s zero = (v2s)0.0;
 
-            volatile register v2s tmp[n_unroll];
+            volatile v2s tmp[n_unroll];
 
             snrt_ssr_enable();
 
@@ -653,7 +653,7 @@ static inline void conv2d_fp64(kernel_fp64 *k) {
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
                 // TODO: check if initialization needs to be unrolled by hand
-                volatile register double sum[max_unroll];
+                volatile double sum[max_unroll];
                 if (k->flag_y_accumulate_start) {
                     for (uint32_t i = 0; i < max_unroll; i++) {
                         sum[i] = 0.0;
@@ -718,7 +718,7 @@ static inline void conv2d_fp64(kernel_fp64 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register double sum[max_unroll];
+                volatile double sum[max_unroll];
                 if (k->flag_y_accumulate_start) {
                     for (uint32_t i = 0; i < cleanup_unroll; i++) {
                         sum[i] = 0.0;
@@ -1009,8 +1009,8 @@ static inline void conv2d_fp32(kernel_fp32 *k) {
         for (h0 = 0; h0 < k->dim_out_y / max_unroll; h0++) {
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
-                volatile register float reduce_reg[max_unroll];
+                volatile v2s sum[max_unroll];
+                volatile float reduce_reg[max_unroll];
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
                 float *_pOutBuffer =
@@ -1100,8 +1100,8 @@ static inline void conv2d_fp32(kernel_fp32 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
-                volatile register float reduce_reg[max_unroll];
+                volatile v2s sum[max_unroll];
+                volatile float reduce_reg[max_unroll];
 
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
@@ -1442,7 +1442,7 @@ static inline void conv2d_dw_fp32(kernel_fp32 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
+                volatile v2s sum[max_unroll];
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
                 v2s *_pOutBuffer =
@@ -1511,7 +1511,7 @@ static inline void conv2d_dw_fp32(kernel_fp32 *k) {
 
             // Output width dimension `k->dim_out_x`
             for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                volatile register v2s sum[max_unroll];
+                volatile v2s sum[max_unroll];
 
                 // pointer to output buffer location where intermediate values
                 // are read from and stored
@@ -1788,8 +1788,8 @@ static inline void conv2d_chw_fp32(kernel_fp32 *k) {
 
                 // Output width dimension `k->dim_out_x`
                 for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                    volatile register v2s sum[max_unroll];
-                    volatile register float reduce_reg[max_unroll];
+                    volatile v2s sum[max_unroll];
+                    volatile float reduce_reg[max_unroll];
                     // pointer to output buffer location where intermediate
                     // values are read from and stored
                     float *_pOutBuffer = &k->pOutBuffer[h * output_h_stride +
@@ -1884,8 +1884,8 @@ static inline void conv2d_chw_fp32(kernel_fp32 *k) {
 
                 // Output width dimension `k->dim_out_x`
                 for (uint32_t w = 0; w < k->dim_out_x; w++) {
-                    volatile register v2s sum[max_unroll];
-                    volatile register float reduce_reg[max_unroll];
+                    volatile v2s sum[max_unroll];
+                    volatile float reduce_reg[max_unroll];
 
                     // pointer to output buffer location where intermediate
                     // values are read from and stored

@@ -26,6 +26,7 @@ VSIM         ?= $(QUESTA_SEPP) vsim
 VOPT         ?= $(QUESTA_SEPP) vopt
 VLOG         ?= $(QUESTA_SEPP) vlog
 VLIB         ?= $(QUESTA_SEPP) vlib
+ADDR2LINE    ?= $(LLVM_BINROOT)/llvm-addr2line
 
 # Internal executables
 GENTRACE_PY      ?= $(UTIL_DIR)/trace/gen_trace.py
@@ -80,7 +81,7 @@ VLT_FLAGS    += --unroll-count 1024
 VLT_CFLAGS   += -std=c++14 -pthread
 VLT_CFLAGS   +=-I ${VLT_BUILDDIR} -I $(VLT_ROOT)/include -I $(VLT_ROOT)/include/vltstd -I $(VLT_FESVR)/include -I $(TB_DIR) -I ${MKFILE_DIR}/test
 
-ANNOTATE_FLAGS      ?= -q --keep-time
+ANNOTATE_FLAGS      ?= -q --keep-time --addr2line=$(ADDR2LINE)
 LAYOUT_EVENTS_FLAGS ?= --cfg=$(CFG)
 
 # We need a recent LLVM installation (>11) to compile Verilator.

@@ -164,7 +164,7 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
 
 
 # Open ELF file for addr2line processing
-elf = a2l.Elf(elf_file)
+elf = a2l.Elf(elf_file, addr2line)
 
 # core functionality
 with open(trace, 'r') as f: