From 8727bfa10ad63d14262c0605ba245842ee789768 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Mon, 11 Mar 2024 19:35:28 +0100
Subject: [PATCH 01/11] `util/sim/Simulation.py`: Take time from last matching
 line in log

---
 util/sim/Simulation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/util/sim/Simulation.py b/util/sim/Simulation.py
index 58e1b200e..e2465f095 100644
--- a/util/sim/Simulation.py
+++ b/util/sim/Simulation.py
@@ -226,7 +226,9 @@ def get_simulation_time(self):
         # Extract the simulation time from the simulation log
         if self.log is not None:
             with open(self.log, 'r') as f:
-                for line in f.readlines():
+                # Read lines from the bottom of the file, since warning and error messages may
+                # also print a time to the log.
+                for line in reversed(f.readlines()):
                     regex = r'Time: (\d+) ([a-z]+)\s+'
                     match = re.search(regex, line)
                     if match:

From 865bdb0f520602f0bf5ddb4e445ad4d3d65d602a Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Mon, 11 Mar 2024 17:39:03 +0100
Subject: [PATCH 02/11] `util/sim/sim_utils.py`: Add test summary table title

---
 util/sim/sim_utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/util/sim/sim_utils.py b/util/sim/sim_utils.py
index 75ae28c5c..9844fa363 100755
--- a/util/sim/sim_utils.py
+++ b/util/sim/sim_utils.py
@@ -190,12 +190,9 @@ def print_summary(sims, early_exit=False, dry_run=False):
     Args:
         sims: A list of simulations from the simulation suite.
     """
-    # Header
-    header = '==== Test summary ===='
-    print(header)
-
     # Table
     table = PrettyTable()
+    table.title = 'Test summary'
     table.field_names = [
         'test',
         'launched',

From dee0ae6a97acd57f3ee67bc3dcd9c0d7adf0e3b8 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Wed, 17 Jan 2024 15:03:35 +0100
Subject: [PATCH 03/11] `util/sim/Simulation.py`: Fix bug in dry-run mode

---
 util/sim/Simulation.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/util/sim/Simulation.py b/util/sim/Simulation.py
index e2465f095..5dd98ee35 100644
--- a/util/sim/Simulation.py
+++ b/util/sim/Simulation.py
@@ -200,17 +200,20 @@ def get_retcode(self):
             return super().get_retcode()
         elif self.log is not None:
             # Extract the application's return code from the simulation log
-            with open(self.log, 'r') as f:
-                for line in f.readlines():
-                    regex_success = r'\[SUCCESS\] Program finished successfully'
-                    match_success = re.search(regex_success, line)
-                    if match_success:
-                        return 0
-                    else:
-                        regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)'
-                        match = re.search(regex_fail, line)
-                        if match:
-                            return int(match.group(1))
+            if not self.dry_run:
+                with open(self.log, 'r') as f:
+                    for line in f.readlines():
+                        regex_success = r'\[SUCCESS\] Program finished successfully'
+                        match_success = re.search(regex_success, line)
+                        if match_success:
+                            return 0
+                        else:
+                            regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)'
+                            match = re.search(regex_fail, line)
+                            if match:
+                                return int(match.group(1))
+            else:
+                return 0
 
     def successful(self):
         # Check that simulation return code matches expected value (in super class)

From 7a499aa5cfb68280c3673757d6ad5707a4c019dc Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Tue, 9 Apr 2024 17:23:39 +0200
Subject: [PATCH 04/11] util/trace: Add column field to annotated traces

---
 target/common/common.mk |   2 +-
 util/bench/visualize.py |   8 ++-
 util/trace/a2l.py       | 125 +++++++++++++++++++++++++++-------------
 util/trace/annotate.py  |   7 ++-
 4 files changed, 99 insertions(+), 43 deletions(-)

diff --git a/target/common/common.mk b/target/common/common.mk
index 4c2b2c95b..21dd184aa 100644
--- a/target/common/common.mk
+++ b/target/common/common.mk
@@ -243,4 +243,4 @@ $(ROI_DUMP): $(JOINT_PERF_DUMP) $(ROI_SPEC) $(ROI_PY)
 	$(ROI_PY) $(JOINT_PERF_DUMP) $(ROI_SPEC) --cfg $(CFG) -o $@
 
 $(VISUAL_TRACE): $(ROI_DUMP) $(VISUALIZE_PY)
-	$(VISUALIZE_PY) $(ROI_DUMP) --traces $(SNITCH_TXT_TRACES) --elf $(BINARY) -o $@
+	$(VISUALIZE_PY) $(ROI_DUMP) --traces $(SNITCH_TXT_TRACES) --elf $(BINARY) --addr2line $(ADDR2LINE) -o $@
diff --git a/util/bench/visualize.py b/util/bench/visualize.py
index 087d8b86e..bb0bb13d8 100755
--- a/util/bench/visualize.py
+++ b/util/bench/visualize.py
@@ -56,6 +56,12 @@ def main():
         nargs='?',
         default='trace.json',
         help='Output JSON file')
+    parser.add_argument(
+        '--addr2line',
+        metavar='<path>',
+        nargs='?',
+        default='addr2line',
+        help='`addr2line` binary to use for parsing')
     args = parser.parse_args()
 
     # TraceViewer events
@@ -100,7 +106,7 @@ def main():
     # from the simulation traces
     if args.traces and args.elf:
         events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch',
-                                        addr2line='addr2line', use_time=True, pid=1,
+                                        addr2line=args.addr2line, use_time=True, pid=1,
                                         cache=True, elf=args.elf, collapse_call_stack=True)
 
     # Create TraceViewer JSON object
diff --git a/util/trace/a2l.py b/util/trace/a2l.py
index ad345a011..6993043dc 100644
--- a/util/trace/a2l.py
+++ b/util/trace/a2l.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 from functools import lru_cache
 from operator import itemgetter
+import re
 
 
 def unzip(ls):
@@ -24,18 +25,13 @@ def format_function_name(name):
     return name
 
 
-def format_line(num):
-    if num == '?':
-        return -1
-    return int(num)
-
-
 class Addr2LineOutput:
 
     indent_unit = '  '
 
-    def __init__(self, raw):
+    def __init__(self, raw, toolchain='llvm'):
         self.raw = raw
+        self.toolchain = toolchain
 
     # Returns the function stack of the current line.
     # If there was no function inlining, then the function stack
@@ -46,50 +42,89 @@ def __init__(self, raw):
     def function_stack(self):
         output = self.raw.split('\n')
 
-        functions = output[::2]
-        filepaths, lines = unzip([o.split(':') for o in output[1::2]])
-
+        if self.toolchain == 'llvm':
+            functions = output[::6]
+            if functions[0] == '??':
+                # If function is unknown, there will be no "Function start filename"
+                # and "Function start line" fields
+                filepaths = output[1:2]
+                lines = output[2:3]
+                cols = output[3:4]
+            else:
+                filepaths = output[1::6]
+                lines = output[4::6]
+                cols = output[5::6]
+            filepaths = [re.search(r"Filename: (.+)$", filepath).group(1) for filepath in filepaths]
+            lines = [re.search(r"Line: (\d+)", line).group(1) for line in lines]
+            cols = [re.search(r"Column: (\d+)", col).group(1) for col in cols]
+            cols = [int(col) if col != '0' else None for col in cols]
+        else:
+            functions = output[::2]
+            filepaths, lines = unzip([o.split(':') for o in output[1::2]])
+            cols = [None] * len(lines)
         functions = map(format_function_name, functions)
-        lines = map(format_line, lines)
+        lines = [int(line) if line != '0' else None for line in lines]
 
-        stack = zip(functions, filepaths, lines)
-        stack = [{'func': s[0], 'file': s[1], 'line': s[2]} for s in stack]
-        return stack
+        stack = zip(functions, filepaths, lines, cols)
+        stack = [{'func': s[0], 'file': s[1], 'line': s[2], 'col': s[3]} for s in stack]
+        # Do not create stack if compiler was unable to associate a line number to the address
+        return stack if stack[0]['line'] is not None else None
 
     def function_stack_string(self, short=True):
-        stack = reversed(self.function_stack())
         s = ''
         indent = ''
-        for i, level in enumerate(stack):
-            func, file, line = level.values()
-            if short:
-                file = Path(file).name
-            indent = self.indent_unit * i
-            s += f'{indent}{func} ({file}:{line})\n'
+        stack = self.function_stack()
+        if stack is not None:
+            stack = reversed(self.function_stack())
+            for i, level in enumerate(stack):
+                func, file, line, col = level.values()
+                if short:
+                    file = Path(file).name
+                indent = self.indent_unit * i
+                s += f'{indent}{func} ({file}:{line}'
+                if col is not None:
+                    s += f':{col}'
+                s += ')\n'
+            # Remove final newline
+            s = s[:-1]
         return s
 
     def line(self):
-        file, line = itemgetter('file', 'line')(self.function_stack()[0])
-
-        # Open source file
-        src = []
-        try:
-            with open(file, 'r') as f:
-                src = [x.strip() for x in f.readlines()]
-        except OSError:
+        if self.function_stack():
+            file, line = itemgetter('file', 'line')(self.function_stack()[0])
+
+            # Open source file
             src = []
+            try:
+                with open(file, 'r') as f:
+                    src = [x for x in f.readlines()]
+            except OSError:
+                src = []
 
-        # Extract line
-        if src and line >= 0:
-            return src[line-1]
-        else:
-            return ''
+            # Extract line
+            if src and line is not None:
+                return src[line-1]
 
     def __str__(self):
-        s = self.function_stack_string()
-        if self.line():
-            indent = self.indent_unit * len(s.strip().split('\n'))
-            s += f'{indent}{self.line()}'
+        s = ''
+        stack = self.function_stack()
+        if stack:
+            col = stack[0]['col']
+            s = self.function_stack_string()
+            line = self.line()
+            if line is not None:
+                # Calculate indentation of original source line
+                stripped_line = line.lstrip()
+                orig_indent = len(line) - len(stripped_line)
+                stripped_line = stripped_line.rstrip()
+                # Calculate indentation to prepend to source line from the function stack string
+                indent = self.indent_unit * len(s.strip().split('\n'))
+                # Append source line to function stack string
+                s += f'\n{indent}{stripped_line}'
+                # Append a column indicator line
+                if col is not None:
+                    new_col = col - orig_indent + len(indent)
+                    s += '\n' + ' ' * (new_col - 1) + '^'
         return s
 
 
@@ -99,6 +134,16 @@ def __init__(self, elf, a2l_binary='addr2line'):
         self.elf = Path(elf)
         self.a2l = a2l_binary
 
+        # We must distinguish between LLVM and GCC toolchains as the latter
+        # does not support the `--verbose` flag
+        if 'riscv64-unknown-elf-addr2line' in a2l_binary:
+            self.toolchain = 'gcc'
+        elif 'llvm-addr2line' in a2l_binary:
+            self.toolchain = 'llvm'
+        else:
+            raise ValueError('addr2line binary expected to be either riscv64-unknown-elf-addr2line'
+                             ' or llvm-addr2line')
+
         assert self.elf.exists(), f'File not found {self.elf}'
 
     @lru_cache(maxsize=1024)
@@ -106,4 +151,6 @@ def addr2line(self, addr):
         if isinstance(addr, str):
             addr = int(addr, 16)
         cmd = f'{self.a2l} -e {self.elf} -f -i {addr:x}'
-        return Addr2LineOutput(os.popen(cmd).read())
+        if self.toolchain == 'llvm':
+            cmd += ' --verbose'
+        return Addr2LineOutput(os.popen(cmd).read(), toolchain=self.toolchain)
diff --git a/util/trace/annotate.py b/util/trace/annotate.py
index 243784a07..5db04943a 100755
--- a/util/trace/annotate.py
+++ b/util/trace/annotate.py
@@ -212,8 +212,11 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
             if trace_start_col < 0:
                 trace_start_col = line.find(addr)
             # Get addr2line information and format it as an assembly comment
-            a2l_output = elf.addr2line(addr)
-            annot = '\n'.join([f'#; {line}' for line in str(a2l_output).split('\n')])
+            a2l_output = str(elf.addr2line(addr))
+            if a2l_output:
+                annot = '\n'.join([f'#; {line}' for line in a2l_output.split('\n')])
+            else:
+                annot = ''
         except (ValueError, IndexError):
             a2l_output = None
             annot = ''

From 4de5e3270c47d01d2be6e51721f0ffb4a9c6ebd5 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Wed, 10 Apr 2024 13:11:38 +0200
Subject: [PATCH 05/11] `util/trace/annotate.py`: Add annotations to lines
 without timestamp

Previously, lines without timestamps, e.g. FPU instructions executed
in the same cycle as an integer instruction, were being parsed incorrectly,
so they would not be annotated. Now we use a regex to handle these lines
properly.
---
 util/trace/annotate.py | 74 ++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 28 deletions(-)

diff --git a/util/trace/annotate.py b/util/trace/annotate.py
index 5db04943a..f6763d57b 100755
--- a/util/trace/annotate.py
+++ b/util/trace/annotate.py
@@ -87,6 +87,13 @@
     '--quiet',
     action='store_true',
     help='Quiet output')
+parser.add_argument(
+    '-f',
+    '--format',
+    default='snitch',
+    choices=['cva6', 'snitch'],
+    help='Trace format'
+)
 
 args = parser.parse_args()
 
@@ -97,6 +104,7 @@
 addr2line = args.addr2line
 quiet = args.quiet
 keep_time = args.keep_time
+trace_fmt = args.format
 
 if not quiet:
     print('elf:', elf_file, file=sys.stderr)
@@ -170,6 +178,22 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
     of.write(f'{hunk_header}{hunk_trace}{hunk_source}')
 
 
+def parse_line(line, fmt='snitch'):
+    # Regex capturing the timestamp and program counter (if present)
+    if fmt == 'snitch':
+        pattern = r'^\s*(\d+)?\s*.*?M (0x[0-9a-fA-F]+)?'
+    elif fmt == 'cva6':
+        pattern = r'^\s*(\d+ns)?\s*.*?M ([0-9a-fA-F]+)?'
+    else:
+        raise ValueError('Invalid trace format string')
+    match = re.search(pattern, line)
+    if match:
+        timestamp, program_counter = match.group(1), match.group(2)
+    else:
+        timestamp, program_counter = None, None
+    return timestamp, program_counter
+
+
 # Open ELF file for addr2line processing
 elf = a2l.Elf(elf_file, addr2line)
 
@@ -200,42 +224,36 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
     last_prog = 0
     for lino, line in enumerate(trace_lines):
 
-        # Split trace line in columns
-        cols = re.split(r" +", line.strip())
-        # Get simulation time from first column
-        time = cols[0]
-        # RTL traces might not contain a PC on each line
-        try:
-            # Get address from PC column
-            addr = cols[3]
-            # Find index of first character in PC
-            if trace_start_col < 0:
-                trace_start_col = line.find(addr)
-            # Get addr2line information and format it as an assembly comment
-            a2l_output = str(elf.addr2line(addr))
+        # Parse trace line
+        time, pc = parse_line(line, fmt=trace_fmt)
+
+        # In the first line we record the start column of the program counter
+        # so we know where to cut the line, even when the PC is missing
+        if trace_start_col < 0:
+            assert pc is not None, "Program counter is missing in first trace line"
+            trace_start_col = line.find(pc)
+
+        # Filter trace line for printing
+        if keep_time:
+            time = time if time is not None else ""
+            filtered_line = f'{time:>12}    {line[trace_start_col:]}'
+        else:
+            filtered_line = f'{line[trace_start_col:]}'
+
+        # Get addr2line information and format it as an assembly comment
+        annot = ''
+        a2l_output = None
+        if pc is not None:
+            a2l_output = str(elf.addr2line(pc))
             if a2l_output:
                 annot = '\n'.join([f'#; {line}' for line in a2l_output.split('\n')])
-            else:
-                annot = ''
-        except (ValueError, IndexError):
-            a2l_output = None
-            annot = ''
-            if keep_time:
-                filtered_line = f'{time:>12}    {line[trace_start_col:]}'
-            else:
-                filtered_line = f'{line[trace_start_col:]}'
+        else:
             if diff:
                 hunk_trace += f'-{filtered_line}'
             else:
                 of.write(f'      {filtered_line}')
             continue
 
-        # Filter trace line for printing
-        if keep_time:
-            filtered_line = f'{time:>12}    {line[trace_start_col:]}'
-        else:
-            filtered_line = f'{line[trace_start_col:]}'
-
         # Print diff
         if diff:
             # Compare current and previous call stacks

From b03b66ee707e2cd177b9da4b5af4c740af020531 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Wed, 17 Apr 2024 19:40:24 +0200
Subject: [PATCH 06/11] `util/trace/annotate.py`: Preserve performance metrics
 at end of file

---
 util/trace/annotate.py | 144 ++++++++++++++++++++++-------------------
 1 file changed, 77 insertions(+), 67 deletions(-)

diff --git a/util/trace/annotate.py b/util/trace/annotate.py
index f6763d57b..7d0601076 100755
--- a/util/trace/annotate.py
+++ b/util/trace/annotate.py
@@ -222,81 +222,91 @@ def parse_line(line, fmt='snitch'):
 
     tot_lines = len(trace_lines)
     last_prog = 0
+    end_of_trace_reached = False
     for lino, line in enumerate(trace_lines):
 
-        # Parse trace line
-        time, pc = parse_line(line, fmt=trace_fmt)
+        # Check if we reached the end of the trace, marked by the first empty line, which
+        # is the followed by the performance metrics.
+        if line == '\n':
+            end_of_trace_reached = True
 
-        # In the first line we record the start column of the program counter
-        # so we know where to cut the line, even when the PC is missing
-        if trace_start_col < 0:
-            assert pc is not None, "Program counter is missing in first trace line"
-            trace_start_col = line.find(pc)
-
-        # Filter trace line for printing
-        if keep_time:
-            time = time if time is not None else ""
-            filtered_line = f'{time:>12}    {line[trace_start_col:]}'
-        else:
-            filtered_line = f'{line[trace_start_col:]}'
-
-        # Get addr2line information and format it as an assembly comment
-        annot = ''
-        a2l_output = None
-        if pc is not None:
-            a2l_output = str(elf.addr2line(pc))
-            if a2l_output:
-                annot = '\n'.join([f'#; {line}' for line in a2l_output.split('\n')])
+        # Lines after the end of the trace are simply redirected as is to the annotated trace
+        if end_of_trace_reached:
+            of.write(f'{line}')
         else:
+            # Parse regular trace line
+            time, pc = parse_line(line, fmt=trace_fmt)
+
+            # In the first line we record the start column of the program counter
+            # so we know where to cut the line, even when the PC is missing
+            if trace_start_col < 0:
+                assert pc is not None, "Program counter is missing in first trace line"
+                trace_start_col = line.find(pc)
+
+            # Filter trace line for printing
+            if keep_time:
+                time = time if time is not None else ""
+                filtered_line = f'{time:>12}    {line[trace_start_col:]}'
+            else:
+                filtered_line = f'{line[trace_start_col:]}'
+
+            # Get addr2line information and format it as an assembly comment
+            annot = ''
+            a2l_output = None
+            if pc is not None:
+                a2l_output = str(elf.addr2line(pc))
+                if a2l_output:
+                    annot = '\n'.join([f'#; {line}' for line in a2l_output.split('\n')])
+            else:
+                if diff:
+                    hunk_trace += f'-{filtered_line}'
+                else:
+                    of.write(f'      {filtered_line}')
+                continue
+
+            # Print diff
             if diff:
+                # Compare current and previous call stacks
+                if a2l_output:
+                    funs, files, lines = zip(*[level.values() for level in a2l_output.function_stack()])
+                else:
+                    funs = files = lines = []
+                next_call_stack = assemble_call_stack(funs, files, lines)
+                matching_cstack_levels = matching_call_stack_levels(next_call_stack, call_stack)
+                matching_src_line = matching_source_line(next_call_stack, call_stack)
+
+                # If this instruction does not map to the same evaluation of the source line
+                # of the last instruction, we finalize and dump the previous hunk
+                if hunk_trace and not matching_src_line:
+                    dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source)
+                    # Initialize next hunk
+                    hunk_tstart += len(hunk_trace.splitlines())
+                    hunk_sstart += len(hunk_source.splitlines())
+                    hunk_trace = ''
+                    hunk_source = ''
+
+                # Update state for next iteration
+                call_stack = next_call_stack
+
+                # Assemble source part of hunk
+                src_line = a2l_output.line()
+                if len(funs) and src_line:
+                    for i, call in enumerate(call_stack):
+                        if i >= matching_cstack_levels:
+                            hunk_source += f'+{format_call(i, call)}'
+                    if not matching_src_line:
+                        indentation = '  ' * (len(call_stack) - 1)
+                        hunk_source += f'+{indentation}{lines[0]}: {src_line}\n'
+
+                # Assemble trace part of hunk
                 hunk_trace += f'-{filtered_line}'
-            else:
-                of.write(f'      {filtered_line}')
-            continue
 
-        # Print diff
-        if diff:
-            # Compare current and previous call stacks
-            if a2l_output:
-                funs, files, lines = zip(*[level.values() for level in a2l_output.function_stack()])
+            # Default: print trace interleaved with source annotations
             else:
-                funs = files = lines = []
-            next_call_stack = assemble_call_stack(funs, files, lines)
-            matching_cstack_levels = matching_call_stack_levels(next_call_stack, call_stack)
-            matching_src_line = matching_source_line(next_call_stack, call_stack)
-
-            # If this instruction does not map to the same evaluation of the source line
-            # of the last instruction, we finalize and dump the previous hunk
-            if hunk_trace and not matching_src_line:
-                dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source)
-                # Initialize next hunk
-                hunk_tstart += len(hunk_trace.splitlines())
-                hunk_sstart += len(hunk_source.splitlines())
-                hunk_trace = ''
-                hunk_source = ''
-
-            # Update state for next iteration
-            call_stack = next_call_stack
-
-            # Assemble source part of hunk
-            src_line = a2l_output.line()
-            if len(funs) and src_line:
-                for i, call in enumerate(call_stack):
-                    if i >= matching_cstack_levels:
-                        hunk_source += f'+{format_call(i, call)}'
-                if not matching_src_line:
-                    indentation = '  ' * (len(call_stack) - 1)
-                    hunk_source += f'+{indentation}{lines[0]}: {src_line}\n'
-
-            # Assemble trace part of hunk
-            hunk_trace += f'-{filtered_line}'
-
-        # Default: print trace interleaved with source annotations
-        else:
-            if len(annot) and annot != last:
-                of.write(annot+'\n')
-            of.write(f'      {filtered_line}')
-            last = annot
+                if len(annot) and annot != last:
+                    of.write(annot+'\n')
+                of.write(f'      {filtered_line}')
+                last = annot
 
         # very simple progress
         if not quiet:

From b72d00abdbed147b66785c9c36e2f410b005783b Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Sun, 5 May 2024 16:52:57 +0200
Subject: [PATCH 07/11] `util/trace/a2l.py`: Parse addr2line output using regex

Correctly handles some corner cases where addr2line did not display
all information for a certain function.
---
 util/trace/a2l.py | 69 +++++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/util/trace/a2l.py b/util/trace/a2l.py
index 6993043dc..91cde3361 100644
--- a/util/trace/a2l.py
+++ b/util/trace/a2l.py
@@ -19,12 +19,6 @@ def unzip(ls):
     return zip(*ls)
 
 
-def format_function_name(name):
-    if name == '??':
-        return 'unknown function'
-    return name
-
-
 class Addr2LineOutput:
 
     indent_unit = '  '
@@ -40,33 +34,44 @@ def __init__(self, raw, toolchain='llvm'):
     # belonged to after inlining the previous, up to (and including)
     # the last function which was not inlined.
     def function_stack(self):
-        output = self.raw.split('\n')
-
         if self.toolchain == 'llvm':
-            functions = output[::6]
-            if functions[0] == '??':
-                # If function is unknown, there will be no "Function start filename"
-                # and "Function start line" fields
-                filepaths = output[1:2]
-                lines = output[2:3]
-                cols = output[3:4]
-            else:
-                filepaths = output[1::6]
-                lines = output[4::6]
-                cols = output[5::6]
-            filepaths = [re.search(r"Filename: (.+)$", filepath).group(1) for filepath in filepaths]
-            lines = [re.search(r"Line: (\d+)", line).group(1) for line in lines]
-            cols = [re.search(r"Column: (\d+)", col).group(1) for col in cols]
-            cols = [int(col) if col != '0' else None for col in cols]
+            # Define a regex pattern to capture relevant data. The function start filename
+            # and start line are optional, so they are enclosed in ()?.
+            pattern = re.compile(
+                r'^(?P<func>.+?)\s*'
+                r'Filename:\s*(?P<file>[^\n]+)\s*'
+                r'(Function start filename:\s*(?P<func_start_filename>[^\n]+)\s*'
+                r'Function start line:\s*(?P<func_start_line>\d+)\s*)?'
+                r'Line:\s*(?P<line>\d+)\s*'
+                r'Column:\s*(?P<col>\d+)',
+                re.MULTILINE)
         else:
-            functions = output[::2]
-            filepaths, lines = unzip([o.split(':') for o in output[1::2]])
-            cols = [None] * len(lines)
-        functions = map(format_function_name, functions)
-        lines = [int(line) if line != '0' else None for line in lines]
-
-        stack = zip(functions, filepaths, lines, cols)
-        stack = [{'func': s[0], 'file': s[1], 'line': s[2], 'col': s[3]} for s in stack]
+            # Define a regex pattern to match function names, file paths and line numbers
+            pattern = re.compile(
+                r"^(?P<func>.+)\n(?P<file>.+):(?P<line>\d+)(?: \(discriminator \d+\))?$",
+                re.MULTILINE)
+
+        # Find all matches and organize them into a list of dictionaries
+        stack = [match.groupdict() for match in pattern.finditer(self.raw)]
+        
+        # Format stack entries
+        def format_stack_entry(entry):
+            func, line, col = [entry.get(key, None) for key in ['func', 'line', 'col']]
+            # Rename unknown functions
+            if func == '??':
+                entry['func'] = 'unknown function'
+            # Add column key if missing and convert 0 line and cols to None
+            for key, val in zip(['line', 'col'], [line, col]):
+                if val is not None:
+                    val = int(val)
+                    if val == 0:
+                        entry[key] = None
+                    else:
+                        entry[key] = val
+                else:
+                    entry[key] = val
+            return entry
+        stack = list(map(format_stack_entry, stack))
         # Do not create stack if compiler was unable to associate a line number to the address
         return stack if stack[0]['line'] is not None else None
 
@@ -77,7 +82,7 @@ def function_stack_string(self, short=True):
         if stack is not None:
             stack = reversed(self.function_stack())
             for i, level in enumerate(stack):
-                func, file, line, col = level.values()
+                func, file, line, col = [level.get(key) for key in ["func", "file", "line", "col"]]
                 if short:
                     file = Path(file).name
                 indent = self.indent_unit * i

From 1c7b06e700abb49c0cb9765a90e058b33361e7b0 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Mon, 6 May 2024 16:10:57 +0200
Subject: [PATCH 08/11] treewide: Extend `visualize.py` as required to include
 CVA6 trace

---
 target/common/common.mk |  4 +++-
 util/bench/visualize.py | 32 ++++++++++++++------------------
 util/trace/tracevis.py  | 18 +++++++++---------
 3 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/target/common/common.mk b/target/common/common.mk
index 21dd184aa..70afd80c2 100644
--- a/target/common/common.mk
+++ b/target/common/common.mk
@@ -211,6 +211,8 @@ JOINT_PERF_DUMP   = $(LOGS_DIR)/perf.json
 ROI_DUMP          = $(LOGS_DIR)/roi.json
 VISUAL_TRACE      = $(LOGS_DIR)/trace.json
 
+VISUALIZE_PY_FLAGS += --tracevis "$(BINARY) $(SNITCH_TXT_TRACES) --addr2line $(ADDR2LINE) -f snitch"
+
 .PHONY: traces annotate visual-trace clean-traces clean-annotate clean-perf clean-visual-trace
 traces: $(TXT_TRACES)
 annotate: $(ANNOTATED_TRACES)
@@ -243,4 +245,4 @@ $(ROI_DUMP): $(JOINT_PERF_DUMP) $(ROI_SPEC) $(ROI_PY)
 	$(ROI_PY) $(JOINT_PERF_DUMP) $(ROI_SPEC) --cfg $(CFG) -o $@
 
 $(VISUAL_TRACE): $(ROI_DUMP) $(VISUALIZE_PY)
-	$(VISUALIZE_PY) $(ROI_DUMP) --traces $(SNITCH_TXT_TRACES) --elf $(BINARY) --addr2line $(ADDR2LINE) -o $@
+	$(VISUALIZE_PY) $(ROI_DUMP) $(VISUALIZE_PY_FLAGS) -o $@
diff --git a/util/bench/visualize.py b/util/bench/visualize.py
index bb0bb13d8..232a29348 100755
--- a/util/bench/visualize.py
+++ b/util/bench/visualize.py
@@ -41,14 +41,10 @@ def main():
         metavar='<input>',
         help='Input JSON file')
     parser.add_argument(
-        '--traces',
-        metavar='<trace>',
-        nargs='*',
-        help='Simulation traces to process')
-    parser.add_argument(
-        '--elf',
-        nargs='?',
-        help='ELF from which the traces were generated')
+        '--tracevis',
+        action='append',
+        default=[],
+        help='Argument string to pass down to tracevis, to generate additional events.')
     parser.add_argument(
         '-o',
         '--output',
@@ -56,12 +52,6 @@ def main():
         nargs='?',
         default='trace.json',
         help='Output JSON file')
-    parser.add_argument(
-        '--addr2line',
-        metavar='<path>',
-        nargs='?',
-        default='addr2line',
-        help='`addr2line` binary to use for parsing')
     args = parser.parse_args()
 
     # TraceViewer events
@@ -104,10 +94,16 @@ def main():
 
     # Optionally extract also instruction-level events
     # from the simulation traces
-    if args.traces and args.elf:
-        events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch',
-                                        addr2line=args.addr2line, use_time=True, pid=1,
-                                        cache=True, elf=args.elf, collapse_call_stack=True)
+    for tvargs in args.tracevis:
+        # Break tracevis argument string into a list of arguments
+        tvargs = tvargs.split()
+        # Add default arguments, and parse all tracevis arguments
+        tvargs.append('--time')
+        tvargs.append('--collapse-call-stack')
+        tvargs = vars(tracevis.parse_args(tvargs))
+        # Add more arguments, and get tracevis events
+        tvargs['pid'] = 1
+        events += tracevis.parse_traces(**tvargs)
 
     # Create TraceViewer JSON object
     tvobj = {}
diff --git a/util/trace/tracevis.py b/util/trace/tracevis.py
index 599c82bd6..46ce80661 100755
--- a/util/trace/tracevis.py
+++ b/util/trace/tracevis.py
@@ -115,7 +115,9 @@ def flush(lah, buf, **kwargs):
         event['dur'] = 1 if fmt == 'banshee' else duration
         # The thread ID is used to group events in a single TraceViewer row
         if not collapse_call_stack:
-            event['tid'] = a2l_info.function_stack[0]['func']
+            stack = a2l_info.function_stack()
+            if stack is not None:
+                event['tid'] = stack[0]['func']
         if fmt == 'banshee':
             # Banshee stores all traces in a single file
             event['tid'] = priv
@@ -272,13 +274,11 @@ def parse_traces(traces, **kwargs):
 
     # Iterate traces
     events = []
-    for i, filename in enumerate(traces):
+    for filename in traces:
 
-        # Extract hartid from filename or use current index
-        # TODO doesn't work with hex numbers
-        # parsed_nums = re.findall(r'\d+', filename)
-        # hartid = int(parsed_nums[-1]) if len(parsed_nums) else i
-        hartid = i
+        # Extract hartid from filename
+        pattern = r'trace_hart_([0-9a-fA-F]+)\.txt$'
+        hartid = int(re.search(pattern, filename).group(1), 16)
 
         # Extract TraceViewer events from trace
         trace_events = parse_trace(filename, **kwargs)
@@ -319,7 +319,7 @@ def main(**kwargs):
 
 
 # Parse command-line args
-def parse_args():
+def parse_args(args=None):
     # Argument parsing
     parser = argparse.ArgumentParser('tracevis', allow_abbrev=True)
     parser.add_argument(
@@ -379,7 +379,7 @@ def parse_args():
         type=int,
         default=-1,
         help='Last line to parse (inclusive)')
-    return parser.parse_args()
+    return parser.parse_args(args)
 
 
 if __name__ == '__main__':

From a7b3a415785421f884a7cd6bd32534148d295175 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Thu, 18 Apr 2024 20:45:15 +0200
Subject: [PATCH 09/11] `util/trace/tracevis.py`: Parallelize processing
 multiple traces

---
 util/trace/tracevis.py | 62 ++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/util/trace/tracevis.py b/util/trace/tracevis.py
index 46ce80661..7c21f0ba2 100755
--- a/util/trace/tracevis.py
+++ b/util/trace/tracevis.py
@@ -19,14 +19,7 @@
 import json
 import argparse
 from a2l import Elf
-
-has_progressbar = True
-try:
-    import progressbar
-except ImportError as e:
-    # Do not use progressbar
-    print(f'{e} --> No progress bar will be shown.', file=sys.stderr)
-    has_progressbar = False
+from concurrent.futures import ThreadPoolExecutor
 
 
 # line format:
@@ -215,12 +208,16 @@ def offload_lookahead(lines, **kwargs):
 
 # Parses a trace file and returns a list of TraceViewer events.
 # Each event is formatted as a dictionary.
-def parse_trace(filename, **kwargs):
+def parse_trace(filename, kwargs):
 
     start = kwargs['start']
     end = kwargs['end']
     fmt = kwargs['fmt']
 
+    # Extract hartid from filename
+    pattern = r'trace_hart_([0-9a-fA-F]+)\.txt$'
+    hartid = int(re.search(pattern, filename).group(1), 16)
+
     # Open trace
     print(f'parsing trace {filename}', file=sys.stderr)
     lah = {}
@@ -238,17 +235,9 @@ def parse_trace(filename, **kwargs):
         if fmt == 'snitch':
             lah = offload_lookahead(all_lines, **kwargs)
 
-        # Use a progress bar iterator if the package is installed
-        if has_progressbar:
-            iterations = progressbar.progressbar(
-                    enumerate(all_lines),
-                    max_value=len(all_lines))
-        else:
-            iterations = enumerate(all_lines)
-
         # Iterate lines
         events = []
-        for lino, line in iterations:
+        for lino, line in enumerate(all_lines):
             # Parse line
             parsed_line = parse_line(line, **kwargs)
             if parsed_line:
@@ -263,6 +252,15 @@ def parse_trace(filename, **kwargs):
         events += flush(lah, buf, **kwargs)
 
         print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr)
+    
+        # Assign a per-trace unique TID or PID to all events
+        for event in events:
+            if kwargs['collapse_call_stack']:
+                event['pid'] = kwargs['pid']
+                event['tid'] = hartid
+            else:
+                event['pid'] = kwargs['pid']+':hartid'+str(hartid)
+
         return events
 
 
@@ -272,28 +270,16 @@ def parse_traces(traces, **kwargs):
     elf_path = kwargs['elf']
     kwargs['elf'] = Elf(elf_path, a2l_binary=kwargs['addr2line'])
 
-    # Iterate traces
-    events = []
-    for filename in traces:
-
-        # Extract hartid from filename
-        pattern = r'trace_hart_([0-9a-fA-F]+)\.txt$'
-        hartid = int(re.search(pattern, filename).group(1), 16)
-
-        # Extract TraceViewer events from trace
-        trace_events = parse_trace(filename, **kwargs)
+    # The unique PID to add to the events
+    if 'pid' not in kwargs:
+        kwargs['pid'] = elf_path
 
-        # Assign a per-trace unique TID or PID to all events
-        pid = elf_path if 'pid' not in kwargs else kwargs['pid']
-        for event in trace_events:
-            if kwargs['collapse_call_stack']:
-                event['pid'] = pid
-                event['tid'] = hartid
-            else:
-                event['pid'] = pid+':hartid'+str(hartid)
+    # Parse traces in parallel
+    with ThreadPoolExecutor() as executor:
+        results = executor.map(parse_trace, traces, [kwargs]*len(traces))
 
-        # Add to events from previous traces
-        events += trace_events
+    # Flatten list of event lists from every trace into a single list
+    events = [event for result in results for event in result]
 
     return events
 

From d8dbafbb10f27218341cd155d63b549900b305a9 Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Sun, 12 May 2024 17:05:16 +0200
Subject: [PATCH 10/11] `util/trace/gen_trace.py`: Print line number on
 exception

---
 util/trace/gen_trace.py | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/util/trace/gen_trace.py b/util/trace/gen_trace.py
index 62b9737d9..db094ad7e 100755
--- a/util/trace/gen_trace.py
+++ b/util/trace/gen_trace.py
@@ -41,6 +41,8 @@
 from ctypes import c_int32, c_uint32
 from collections import deque, defaultdict
 from pathlib import Path
+import traceback
+from itertools import tee, islice, chain
 
 EXTRA_WB_WARN = 'WARNING: {} transactions still in flight for {}.'
 
@@ -1038,6 +1040,15 @@ def fmt_perf_metrics(perf_metrics: list, idx: int, omit_keys: bool = True):
     return '\n'.join(ret)
 
 
+# -------------------- Utils --------------------
+
+
+def current_and_next(iterable):
+    currs, nexts = tee(iterable, 2)
+    nexts = chain(islice(nexts, 1, None), [None])
+    return zip(currs, nexts)
+
+
 # -------------------- Main --------------------
 
 
@@ -1115,16 +1126,26 @@ def main():
         ]  # all values initially 0, also 'start' time of measurement 0
         perf_metrics[0]['start'] = None
         # Parse input line by line
-        for line in line_iter:
+        for lineno, (line, nextl) in enumerate(current_and_next(line_iter)):
             if line:
-                ann_insn, time_info, empty = annotate_insn(
-                    line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False,
-                    time_info, args.offl, not args.saddr, args.permissive, dma_trans)
-                if perf_metrics[0]['start'] is None:
-                    perf_metrics[0]['tstart'] = time_info[0] / 1000
-                    perf_metrics[0]['start'] = time_info[1]
-                if not empty:
-                    print(ann_insn, file=file)
+                try:
+                    ann_insn, time_info, empty = annotate_insn(
+                        line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False,
+                        time_info, args.offl, not args.saddr, args.permissive, dma_trans)
+                    if perf_metrics[0]['start'] is None:
+                        perf_metrics[0]['tstart'] = time_info[0] / 1000
+                        perf_metrics[0]['start'] = time_info[1]
+                    if not empty:
+                        print(ann_insn, file=file)
+                except Exception:
+                    message = 'Exception occured while processing '
+                    if not nextl:
+                        message += 'last line. Did the simulation terminate?'
+                    else:
+                        message += 'line {lineno}.'
+                    print(traceback.format_exc(), file=sys.stderr)
+                    print(message, file=sys.stderr)
+                    return 1
             else:
                 break  # Nothing more in pipe, EOF
         perf_metrics[-1]['tend'] = time_info[0] / 1000

From b9b6c42d5e29a516b74c2df268d59567060ae43e Mon Sep 17 00:00:00 2001
From: Luca Colagrande <luca.colagrande3@gmail.com>
Date: Sun, 11 Aug 2024 15:53:53 +0200
Subject: [PATCH 11/11] Correct linting

---
 util/trace/a2l.py      | 2 +-
 util/trace/annotate.py | 3 ++-
 util/trace/tracevis.py | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/util/trace/a2l.py b/util/trace/a2l.py
index 91cde3361..4a849e80a 100644
--- a/util/trace/a2l.py
+++ b/util/trace/a2l.py
@@ -53,7 +53,7 @@ def function_stack(self):
 
         # Find all matches and organize them into a list of dictionaries
         stack = [match.groupdict() for match in pattern.finditer(self.raw)]
-        
+
         # Format stack entries
         def format_stack_entry(entry):
             func, line, col = [entry.get(key, None) for key in ['func', 'line', 'col']]
diff --git a/util/trace/annotate.py b/util/trace/annotate.py
index 7d0601076..dac037832 100755
--- a/util/trace/annotate.py
+++ b/util/trace/annotate.py
@@ -268,7 +268,8 @@ def parse_line(line, fmt='snitch'):
             if diff:
                 # Compare current and previous call stacks
                 if a2l_output:
-                    funs, files, lines = zip(*[level.values() for level in a2l_output.function_stack()])
+                    funs, files, lines = zip(
+                        *[level.values() for level in a2l_output.function_stack()])
                 else:
                     funs = files = lines = []
                 next_call_stack = assemble_call_stack(funs, files, lines)
diff --git a/util/trace/tracevis.py b/util/trace/tracevis.py
index 7c21f0ba2..e31f0bff8 100755
--- a/util/trace/tracevis.py
+++ b/util/trace/tracevis.py
@@ -252,7 +252,7 @@ def parse_trace(filename, kwargs):
         events += flush(lah, buf, **kwargs)
 
         print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr)
-    
+
         # Assign a per-trace unique TID or PID to all events
         for event in events:
             if kwargs['collapse_call_stack']: