util: Extend simulation and trace utilities (#178)

* `util/sim/Simulation.py`: Take time from last matching line in log * `util/sim/sim_utils.py`: Add test summary table title * `util/sim/Simulation.py`: Fix bug in dry-run mode * util/trace: Add column field to annotated traces * `util/trace/annotate.py`: Add annotations to lines without timestamp Previously, lines without timestamps, e.g. FPU instructions executed in the same cycle as an integer instruction, were being parsed incorrectly, so they would not be annotated. Now we use a regex to handle these lines properly. * `util/trace/annotate.py`: Preserve performance metrics at end of file * `util/trace/a2l.py`: Parse addr2line output using regex Correctly handles some corner cases where addr2line did not display all information for a certain function. * treewide: Extend `visualize.py` as required to include CVA6 trace * `util/trace/tracevis.py`: Parallelize processing multiple traces * `util/trace/gen_trace.py`: Print line number on exception
pulp-platform · Aug 12, 2024 · 5928ffa · 5928ffa
1 parent 0fc8be0
commit 5928ffa
Show file tree

Hide file tree

Showing 8 changed files with 294 additions and 197 deletions.
diff --git a/target/common/common.mk b/target/common/common.mk
@@ -211,6 +211,8 @@ JOINT_PERF_DUMP   = $(LOGS_DIR)/perf.json
 ROI_DUMP          = $(LOGS_DIR)/roi.json
 VISUAL_TRACE      = $(LOGS_DIR)/trace.json
 
+VISUALIZE_PY_FLAGS += --tracevis "$(BINARY) $(SNITCH_TXT_TRACES) --addr2line $(ADDR2LINE) -f snitch"
+
 .PHONY: traces annotate visual-trace clean-traces clean-annotate clean-perf clean-visual-trace
 traces: $(TXT_TRACES)
 annotate: $(ANNOTATED_TRACES)
@@ -243,4 +245,4 @@ $(ROI_DUMP): $(JOINT_PERF_DUMP) $(ROI_SPEC) $(ROI_PY)
 	$(ROI_PY) $(JOINT_PERF_DUMP) $(ROI_SPEC) --cfg $(CFG) -o $@
 
 $(VISUAL_TRACE): $(ROI_DUMP) $(VISUALIZE_PY)
-	$(VISUALIZE_PY) $(ROI_DUMP) --traces $(SNITCH_TXT_TRACES) --elf $(BINARY) -o $@
+	$(VISUALIZE_PY) $(ROI_DUMP) $(VISUALIZE_PY_FLAGS) -o $@
diff --git a/util/bench/visualize.py b/util/bench/visualize.py
@@ -41,14 +41,10 @@ def main():
         metavar='<input>',
         help='Input JSON file')
     parser.add_argument(
-        '--traces',
-        metavar='<trace>',
-        nargs='*',
-        help='Simulation traces to process')
-    parser.add_argument(
-        '--elf',
-        nargs='?',
-        help='ELF from which the traces were generated')
+        '--tracevis',
+        action='append',
+        default=[],
+        help='Argument string to pass down to tracevis, to generate additional events.')
     parser.add_argument(
         '-o',
         '--output',
@@ -98,10 +94,16 @@ def main():
 
     # Optionally extract also instruction-level events
     # from the simulation traces
-    if args.traces and args.elf:
-        events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch',
-                                        addr2line='addr2line', use_time=True, pid=1,
-                                        cache=True, elf=args.elf, collapse_call_stack=True)
+    for tvargs in args.tracevis:
+        # Break tracevis argument string into a list of arguments
+        tvargs = tvargs.split()
+        # Add default arguments, and parse all tracevis arguments
+        tvargs.append('--time')
+        tvargs.append('--collapse-call-stack')
+        tvargs = vars(tracevis.parse_args(tvargs))
+        # Add more arguments, and get tracevis events
+        tvargs['pid'] = 1
+        events += tracevis.parse_traces(**tvargs)
 
     # Create TraceViewer JSON object
     tvobj = {}

diff --git a/util/sim/Simulation.py b/util/sim/Simulation.py
@@ -200,17 +200,20 @@ def get_retcode(self):
             return super().get_retcode()
         elif self.log is not None:
             # Extract the application's return code from the simulation log
-            with open(self.log, 'r') as f:
-                for line in f.readlines():
-                    regex_success = r'\[SUCCESS\] Program finished successfully'
-                    match_success = re.search(regex_success, line)
-                    if match_success:
-                        return 0
-                    else:
-                        regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)'
-                        match = re.search(regex_fail, line)
-                        if match:
-                            return int(match.group(1))
+            if not self.dry_run:
+                with open(self.log, 'r') as f:
+                    for line in f.readlines():
+                        regex_success = r'\[SUCCESS\] Program finished successfully'
+                        match_success = re.search(regex_success, line)
+                        if match_success:
+                            return 0
+                        else:
+                            regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)'
+                            match = re.search(regex_fail, line)
+                            if match:
+                                return int(match.group(1))
+            else:
+                return 0
 
     def successful(self):
         # Check that simulation return code matches expected value (in super class)
@@ -226,7 +229,9 @@ def get_simulation_time(self):
         # Extract the simulation time from the simulation log
         if self.log is not None:
             with open(self.log, 'r') as f:
-                for line in f.readlines():
+                # Read lines from the bottom of the file, since warning and error messages may
+                # also print a time to the log.
+                for line in reversed(f.readlines()):
                     regex = r'Time: (\d+) ([a-z]+)\s+'
                     match = re.search(regex, line)
                     if match:

diff --git a/util/sim/sim_utils.py b/util/sim/sim_utils.py
@@ -190,12 +190,9 @@ def print_summary(sims, early_exit=False, dry_run=False):
     Args:
         sims: A list of simulations from the simulation suite.
     """
-    # Header
-    header = '==== Test summary ===='
-    print(header)
-
     # Table
     table = PrettyTable()
+    table.title = 'Test summary'
     table.field_names = [
         'test',
         'launched',

diff --git a/util/trace/a2l.py b/util/trace/a2l.py
@@ -12,30 +12,20 @@
 from pathlib import Path
 from functools import lru_cache
 from operator import itemgetter
+import re
 
 
 def unzip(ls):
     return zip(*ls)
 
 
-def format_function_name(name):
-    if name == '??':
-        return 'unknown function'
-    return name
-
-
-def format_line(num):
-    if num == '?':
-        return -1
-    return int(num)
-
-
 class Addr2LineOutput:
 
     indent_unit = '  '
 
-    def __init__(self, raw):
+    def __init__(self, raw, toolchain='llvm'):
         self.raw = raw
+        self.toolchain = toolchain
 
     # Returns the function stack of the current line.
     # If there was no function inlining, then the function stack
@@ -44,52 +34,102 @@ def __init__(self, raw):
     # belonged to after inlining the previous, up to (and including)
     # the last function which was not inlined.
     def function_stack(self):
-        output = self.raw.split('\n')
-
-        functions = output[::2]
-        filepaths, lines = unzip([o.split(':') for o in output[1::2]])
-
-        functions = map(format_function_name, functions)
-        lines = map(format_line, lines)
-
-        stack = zip(functions, filepaths, lines)
-        stack = [{'func': s[0], 'file': s[1], 'line': s[2]} for s in stack]
-        return stack
+        if self.toolchain == 'llvm':
+            # Define a regex pattern to capture relevant data. The function start filename
+            # and start line are optional, so they are enclosed in ()?.
+            pattern = re.compile(
+                r'^(?P<func>.+?)\s*'
+                r'Filename:\s*(?P<file>[^\n]+)\s*'
+                r'(Function start filename:\s*(?P<func_start_filename>[^\n]+)\s*'
+                r'Function start line:\s*(?P<func_start_line>\d+)\s*)?'
+                r'Line:\s*(?P<line>\d+)\s*'
+                r'Column:\s*(?P<col>\d+)',
+                re.MULTILINE)
+        else:
+            # Define a regex pattern to match function names, file paths and line numbers
+            pattern = re.compile(
+                r"^(?P<func>.+)\n(?P<file>.+):(?P<line>\d+)(?: \(discriminator \d+\))?$",
+                re.MULTILINE)
+
+        # Find all matches and organize them into a list of dictionaries
+        stack = [match.groupdict() for match in pattern.finditer(self.raw)]
+
+        # Format stack entries
+        def format_stack_entry(entry):
+            func, line, col = [entry.get(key, None) for key in ['func', 'line', 'col']]
+            # Rename unknown functions
+            if func == '??':
+                entry['func'] = 'unknown function'
+            # Add column key if missing and convert 0 line and cols to None
+            for key, val in zip(['line', 'col'], [line, col]):
+                if val is not None:
+                    val = int(val)
+                    if val == 0:
+                        entry[key] = None
+                    else:
+                        entry[key] = val
+                else:
+                    entry[key] = val
+            return entry
+        stack = list(map(format_stack_entry, stack))
+        # Do not create stack if compiler was unable to associate a line number to the address
+        return stack if stack[0]['line'] is not None else None
 
     def function_stack_string(self, short=True):
-        stack = reversed(self.function_stack())
         s = ''
         indent = ''
-        for i, level in enumerate(stack):
-            func, file, line = level.values()
-            if short:
-                file = Path(file).name
-            indent = self.indent_unit * i
-            s += f'{indent}{func} ({file}:{line})\n'
+        stack = self.function_stack()
+        if stack is not None:
+            stack = reversed(self.function_stack())
+            for i, level in enumerate(stack):
+                func, file, line, col = [level.get(key) for key in ["func", "file", "line", "col"]]
+                if short:
+                    file = Path(file).name
+                indent = self.indent_unit * i
+                s += f'{indent}{func} ({file}:{line}'
+                if col is not None:
+                    s += f':{col}'
+                s += ')\n'
+            # Remove final newline
+            s = s[:-1]
         return s
 
     def line(self):
-        file, line = itemgetter('file', 'line')(self.function_stack()[0])
-
-        # Open source file
-        src = []
-        try:
-            with open(file, 'r') as f:
-                src = [x.strip() for x in f.readlines()]
-        except OSError:
+        if self.function_stack():
+            file, line = itemgetter('file', 'line')(self.function_stack()[0])
+
+            # Open source file
             src = []
+            try:
+                with open(file, 'r') as f:
+                    src = [x for x in f.readlines()]
+            except OSError:
+                src = []
 
-        # Extract line
-        if src and line >= 0:
-            return src[line-1]
-        else:
-            return ''
+            # Extract line
+            if src and line is not None:
+                return src[line-1]
 
     def __str__(self):
-        s = self.function_stack_string()
-        if self.line():
-            indent = self.indent_unit * len(s.strip().split('\n'))
-            s += f'{indent}{self.line()}'
+        s = ''
+        stack = self.function_stack()
+        if stack:
+            col = stack[0]['col']
+            s = self.function_stack_string()
+            line = self.line()
+            if line is not None:
+                # Calculate indentation of original source line
+                stripped_line = line.lstrip()
+                orig_indent = len(line) - len(stripped_line)
+                stripped_line = stripped_line.rstrip()
+                # Calculate indentation to prepend to source line from the function stack string
+                indent = self.indent_unit * len(s.strip().split('\n'))
+                # Append source line to function stack string
+                s += f'\n{indent}{stripped_line}'
+                # Append a column indicator line
+                if col is not None:
+                    new_col = col - orig_indent + len(indent)
+                    s += '\n' + ' ' * (new_col - 1) + '^'
         return s
 
 
@@ -99,11 +139,23 @@ def __init__(self, elf, a2l_binary='addr2line'):
         self.elf = Path(elf)
         self.a2l = a2l_binary
 
+        # We must distinguish between LLVM and GCC toolchains as the latter
+        # does not support the `--verbose` flag
+        if 'riscv64-unknown-elf-addr2line' in a2l_binary:
+            self.toolchain = 'gcc'
+        elif 'llvm-addr2line' in a2l_binary:
+            self.toolchain = 'llvm'
+        else:
+            raise ValueError('addr2line binary expected to be either riscv64-unknown-elf-addr2line'
+                             ' or llvm-addr2line')
+
         assert self.elf.exists(), f'File not found {self.elf}'
 
     @lru_cache(maxsize=1024)
     def addr2line(self, addr):
         if isinstance(addr, str):
             addr = int(addr, 16)
         cmd = f'{self.a2l} -e {self.elf} -f -i {addr:x}'
-        return Addr2LineOutput(os.popen(cmd).read())
+        if self.toolchain == 'llvm':
+            cmd += ' --verbose'
+        return Addr2LineOutput(os.popen(cmd).read(), toolchain=self.toolchain)