diff --git a/util/trace/a2l.py b/util/trace/a2l.py
new file mode 100644
index 000000000..c62633739
--- /dev/null
+++ b/util/trace/a2l.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+# Copyright 2021 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Author: Luca Colagrande <colluca@iis.ee.ethz.ch>
+#
+# Utilities for common tasks involving addr2line
+
+import os
+from pathlib import Path
+from functools import lru_cache
+from operator import itemgetter
+
+
+def unzip(ls):
+    return zip(*ls)
+
+
+def format_function_name(name):
+    if name == '??':
+        return 'unknown function'
+    return name
+
+
+def format_line(num):
+    if num == '?':
+        return -1
+    return int(num)
+
+
+class Addr2LineOutput:
+
+    indent_unit = '  '
+
+    def __init__(self, raw):
+        self.raw = raw
+
+    # Returns the function stack of the current line.
+    # If there was no function inlining, then the function stack
+    # includes only the function the line belongs to.
+    # If there was inlining, it includes all functions the line
+    # belonged to after inlining the previous, up to (and including)
+    # the last function which was not inlined.
+    def function_stack(self):
+        output = self.raw.split('\n')
+
+        functions = output[::2]
+        filepaths, lines = unzip([o.split(':') for o in output[1::2]])
+
+        functions = map(format_function_name, functions)
+        lines = map(format_line, lines)
+
+        stack = zip(functions, filepaths, lines)
+        stack = [{'func': s[0], 'file': s[1], 'line': s[2]} for s in stack]
+        return stack
+
+    def function_stack_string(self, short=True):
+        stack = reversed(self.function_stack())
+        s = ''
+        indent = ''
+        for i, level in enumerate(stack):
+            func, file, line = level.values()
+            if short:
+                file = Path(file).name
+            indent = self.indent_unit * i
+            s += f'{indent}{func} ({file}:{line})\n'
+        return s
+
+    def line(self):
+        file, line = itemgetter('file', 'line')(self.function_stack()[0])
+
+        # Open source file
+        src = []
+        try:
+            with open(file, 'r') as f:
+                src = [x.strip() for x in f.readlines()]
+        except OSError:
+            src = []
+
+        # Extract line
+        if src and line >= 0:
+            return src[line-1]
+        else:
+            return ''
+
+    def __str__(self):
+        s = self.function_stack_string()
+        if self.line():
+            indent = self.indent_unit * len(s.strip().split('\n'))
+            s += f'{indent}{self.line()}'
+        return s
+
+
+class Elf:
+
+    def __init__(self, elf, a2l_binary='addr2line'):
+        self.elf = Path(elf)
+        self.a2l = a2l_binary
+
+        assert self.elf.exists(), f'File not found {self.elf}'
+
+    @lru_cache(maxsize=1024)
+    def addr2line(self, addr):
+        if type(addr) == str:
+            addr = int(addr, 16)
+        cmd = f'{self.a2l} -e {self.elf} -f -i {addr:x}'
+        return Addr2LineOutput(os.popen(cmd).read())
diff --git a/util/trace/annotate.py b/util/trace/annotate.py
index a88664544..512556190 100755
--- a/util/trace/annotate.py
+++ b/util/trace/annotate.py
@@ -22,9 +22,8 @@
 import sys
 import os
 import re
-from functools import lru_cache
 import argparse
-from termcolor import colored
+import a2l
 
 # Argument parsing
 parser = argparse.ArgumentParser('annotate', allow_abbrev=True)
@@ -84,7 +83,7 @@
 
 args = parser.parse_args()
 
-elf = args.elf
+elf_file = args.elf
 trace = args.trace
 output = args.output
 diff = args.diff
@@ -93,7 +92,7 @@
 keep_time = args.keep_time
 
 if not quiet:
-    print('elf:', elf, file=sys.stderr)
+    print('elf:', elf_file, file=sys.stderr)
     print('trace:', trace, file=sys.stderr)
     print('output:', output, file=sys.stderr)
     print('diff:', diff, file=sys.stderr)
@@ -110,34 +109,9 @@
 trace_start_col = -1
 
 
-@lru_cache(maxsize=1024)
-def adr2line(addr):
-    cmd = f'{addr2line} -e {elf} -f -i {addr:x}'
-    return os.popen(cmd).read().split('\n')
-
-
-# helper functions to parse addr2line output
-def a2l_file_path(a2l_file_str):
-    return a2l_file_str.split(':')[0]
-
-
-def a2l_file_name(a2l_file_str):
-    return a2l_file_str.split('/')[-1].split(':')[0]
-
-
-def a2l_file_line(a2l_file_str):
-    return int(a2l_file_str.split(':')[-1].split(' ')[0])
-
-
-def format_a2l_funcname(a2l_func_name):
-    if a2l_func_name == '??':
-        return 'unknown function'
-    return a2l_func_name
-
-
 # helper functions to assemble diff output
 def format_call(level, call):
-    funcname = format_a2l_funcname(call[0])
+    funcname = a2l.format_function_name(call[0])
     if level == 0:
         return f'{funcname} ({call[1]})\n'
     else:
@@ -189,6 +163,9 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
     of.write(f'{hunk_header}{hunk_trace}{hunk_source}')
 
 
+# Open ELF file for addr2line processing
+elf = a2l.Elf(elf_file)
+
 # core functionality
 with open(trace, 'r') as f:
 
@@ -223,12 +200,16 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
         # RTL traces might not contain a PC on each line
         try:
             # Get address from PC column
-            addr_str = cols[3]
-            addr = int(addr_str, base=16)
+            addr = cols[3]
             # Find index of first character in PC
             if trace_start_col < 0:
-                trace_start_col = line.find(addr_str)
+                trace_start_col = line.find(addr)
+            # Get addr2line information and format it as an assembly comment
+            a2l_output = elf.addr2line(addr)
+            annot = '\n'.join([f'#; {line}' for line in str(a2l_output).split('\n')])
         except (ValueError, IndexError):
+            a2l_output = None
+            annot = ''
             if keep_time:
                 filtered_line = f'{time:>12}    {line[trace_start_col:]}'
             else:
@@ -245,41 +226,14 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
         else:
             filtered_line = f'{line[trace_start_col:]}'
 
-        addr_hex = f'{addr:x}'
-        ret = adr2line(addr)
-
-        funs = ret[::2]
-        file_paths = [a2l_file_path(x) for x in ret[1::2]]
-        file_names = [a2l_file_name(x) for x in ret[1::2]]
-        file_lines = [a2l_file_line(x) for x in ret[1::2]]
-        # Assemble annotation string
-        if len(funs):
-            annot = f'#; {funs[0]} ({file_names[0]}:{file_lines[0]})'
-            for fun, file_name, file_line in zip(funs[1:], file_names[1:], file_lines[1:]):
-                annot = f'{annot}\n#;  in {fun} ({file_name}:{file_line})'
-
-        # Get source of last file and print the line
-        src_fname = file_paths[0]
-        if src_fname not in src_files.keys():
-            try:
-                # Issue warning if source was modified after trace
-                src_timestamp = os.path.getmtime(src_fname)
-                if src_timestamp >= trace_timestamp:
-                    print(colored('Warning:', 'yellow'),
-                          f'{src_fname} has been edited since the trace was generated')
-
-                with open(src_fname, 'r') as src_f:
-                    src_files[src_fname] = [x.strip() for x in src_f.readlines()]
-            except OSError:
-                src_files[src_fname] = None
-        if src_files[src_fname] is not None:
-            src_line = src_files[src_fname][file_lines[0]-1]
-            annot = f'{annot}\n#;  {src_line}'
-
         # Print diff
         if diff:
             # Compare current and previous call stacks
-            next_call_stack = assemble_call_stack(funs, file_paths, file_lines)
+            if a2l_output:
+                funs, files, lines = zip(*[level.values() for level in a2l_output.function_stack()])
+            else:
+                funs = files = lines = []
+            next_call_stack = assemble_call_stack(funs, files, lines)
             matching_cstack_levels = matching_call_stack_levels(next_call_stack, call_stack)
             matching_src_line = matching_source_line(next_call_stack, call_stack)
 
@@ -297,13 +251,14 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
             call_stack = next_call_stack
 
             # Assemble source part of hunk
-            if len(funs) and src_files[src_fname]:
+            src_line = a2l_output.line()
+            if len(funs) and src_line:
                 for i, call in enumerate(call_stack):
                     if i >= matching_cstack_levels:
                         hunk_source += f'+{format_call(i, call)}'
                 if not matching_src_line:
                     indentation = '  ' * (len(call_stack) - 1)
-                    hunk_source += f'+{indentation}{file_lines[0]}: {src_line}\n'
+                    hunk_source += f'+{indentation}{lines[0]}: {src_line}\n'
 
             # Assemble trace part of hunk
             hunk_trace += f'-{filtered_line}'
@@ -329,4 +284,3 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source):
 
 if not quiet:
     print(' done')
-    print(adr2line.cache_info())
diff --git a/util/trace/eventvis.py b/util/trace/eventvis.py
index 2d81ef8fb..4d0fdfdc7 100755
--- a/util/trace/eventvis.py
+++ b/util/trace/eventvis.py
@@ -31,6 +31,7 @@
 import argparse
 import csv
 import json
+import tracevis
 
 
 def pairwise(iterable):
@@ -51,6 +52,15 @@ def main():
         'csv',
         metavar='<csv>',
         help='Input CSV file')
+    parser.add_argument(
+        '--traces',
+        metavar='<trace>',
+        nargs='*',
+        help='Simulation traces to process')
+    parser.add_argument(
+        '--elf',
+        nargs='?',
+        help='ELF from which the traces were generated')
     parser.add_argument(
         '-o',
         '--output',
@@ -60,8 +70,21 @@ def main():
         help='Output JSON file')
     args = parser.parse_args()
 
-    # Read CSV to collect TraceViewer events
+    # TraceViewer events
     events = []
+
+    # Add a dummy instant event to mark time 0.
+    # This is to avoid that the events are shifted from
+    # their actual start times to align the first event
+    # at time 0.
+    event = {'name': 'zero',
+             'ph':   'I',  # Instant event type
+             'ts':   0,
+             's':    'g'  # Global scope
+             }
+    events.append(event)
+
+    # Read CSV to collect TraceViewer events
     with open(args.csv) as f:
         reader = csv.reader(f, delimiter=',')
 
@@ -92,6 +115,13 @@ def main():
                              }
                     events.append(event)
 
+    # Optionally extract also instruction-level events
+    # from the simulation traces
+    if args.traces and args.elf:
+        events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch',
+                                        addr2line='addr2line', use_time=True, pid=1,
+                                        cache=True, elf=args.elf, collapse_call_stack=True)
+
     # Create TraceViewer JSON object
     tvobj = {}
     tvobj['traceEvents'] = events
diff --git a/util/trace/layout_events.py b/util/trace/layout_events.py
index a17fa504d..ea877c53c 100755
--- a/util/trace/layout_events.py
+++ b/util/trace/layout_events.py
@@ -40,6 +40,7 @@
 import argparse
 import csv
 import pandas as pd
+from math import isnan
 
 
 def main():
@@ -53,6 +54,11 @@ def main():
         'layout',
         metavar='<layout>',
         help='Layout CSV file')
+    parser.add_argument(
+        '--num-clusters',
+        type=int,
+        default=1,
+        help='Number of clusters')
     parser.add_argument(
         '-o',
         '--output',
@@ -65,58 +71,67 @@ def main():
     # Read input CSV
     df = pd.read_csv(args.csv)
 
-    # Open output CSV for writing
-    with open(args.output, mode='w') as out_f:
-        writer = csv.writer(out_f, delimiter=',', quotechar='"')
-
-        # Open layout CSV
-        with open(args.layout) as layout_f:
-            layout_reader = csv.reader(layout_f, delimiter=',')
-
-            # Get region labels from layout header
-            regions = [label for label in next(layout_reader) if label and not label.isspace()]
-
-            # Generate output header: appropriately spaced region labels
-            header = [''] + [val for label in regions for val in [label, '']]
-            writer.writerow(header)
-
-            # Iterate layout rows
-            for row in layout_reader:
-
-                # First entry in row is a hart ID or a Python expression
-                # which generates a list of hart IDs
-                expr = row[0]
-                code = compile(expr, "<string>", "eval")
-                tids = eval(code)
-                if isinstance(tids, int):
-                    tids = [tids]
-
-                # Iterate hart IDs
-                for tid in tids:
-
-                    # Start output row with hart ID
-                    orow = [tid]
-
-                    # Iterate all other cells in layout row (indices of regions to take)
-                    for cell in row[1:]:
-
-                        # If the cell is not empty, get start and end times
-                        # of the region from the input CSV and append them to the
-                        # output row. Otherwise, leave cells empty.
-                        if cell and not cell.isspace():
-                            reg_idx = int(cell)
-                            row_idx = tid
-                            col_idx = 1 + reg_idx * 2
-                            assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds'
-                            assert (col_idx + 1) < df.shape[1], \
-                                f'Region index {reg_idx} out of bounds'
-                            orow.append(int(df.iat[row_idx, col_idx]))
-                            orow.append(int(df.iat[row_idx, col_idx + 1]))
-                        else:
-                            orow.append('')
-                            orow.append('')
-
-                    writer.writerow(orow)
+    # Output CSV data
+    data = []
+    columns = []
+
+    # Open layout CSV
+    with open(args.layout) as layout_f:
+        layout_reader = csv.reader(layout_f, delimiter=',')
+
+        # Get region labels from layout header
+        regions = [label for label in next(layout_reader) if label and not label.isspace()]
+
+        # Generate output columns: appropriately spaced region labels
+        columns = ['hartid'] + [val for label in regions for val in [label, '']]
+
+        # Iterate layout rows
+        for row in layout_reader:
+
+            # First entry in row is a hart ID or a Python expression
+            # which generates a list of hart IDs
+            expr = row[0]
+            code = compile(expr, "<string>", "eval")
+            tids = eval(code, {}, {'num_clusters': args.num_clusters})
+            if type(tids) == int:
+                tids = [tids]
+
+            # Iterate hart IDs
+            for tid in tids:
+
+                # Start output row with hart ID
+                orow = [tid]
+
+                # Iterate all other cells in layout row (indices of regions to take)
+                for cell in row[1:]:
+
+                    # If the cell is not empty, get start and end times
+                    # of the region from the input CSV and append them to the
+                    # output row. Otherwise, leave cells empty.
+                    if cell and not cell.isspace():
+                        reg_idx = int(cell)
+                        row_idx = tid
+                        col_idx = 1 + reg_idx * 2
+                        assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds'
+                        assert (col_idx + 1) < df.shape[1],\
+                            f'Region index {reg_idx} out of bounds for hart {tid}'
+                        assert not isnan(df.iat[row_idx, col_idx]),\
+                            (f'Region {reg_idx} looks empty for hart {tid},'
+                             f'check whether it was simulated')
+                        orow.append(int(df.iat[row_idx, col_idx]))
+                        orow.append(int(df.iat[row_idx, col_idx + 1]))
+                    else:
+                        orow.append('')
+                        orow.append('')
+
+                data.append(orow)
+
+    # Create output dataframe and write to CSV
+    df = pd.DataFrame(data, columns=columns)
+    df.set_index('hartid', inplace=True)
+    df.sort_index(axis='index', inplace=True)
+    df.index.name = None
+    df.to_csv(args.output)
 
 
 if __name__ == '__main__':
diff --git a/util/trace/tracevis.py b/util/trace/tracevis.py
index ecc344253..599c82bd6 100755
--- a/util/trace/tracevis.py
+++ b/util/trace/tracevis.py
@@ -12,12 +12,13 @@
 # This script is inspired by https://github.com/SalvatoreDiGirolamo/tracevis
 # Author: Noah Huetter <huettern@student.ethz.ch>
 #         Samuel Riedel <sriedel@iis.ee.ethz.ch>
+#         Luca Colagrande <colluca@iis.ee.ethz.ch>
 
 import re
-import os
 import sys
-from functools import lru_cache
+import json
 import argparse
+from a2l import Elf
 
 has_progressbar = True
 try:
@@ -31,13 +32,16 @@
 # line format:
 # Snitch RTL simulation:
 # 101000 82      M         0x00001000 csrr    a0, mhartid     #; comment
-# time   cycle   priv_lvl  pc         insn
+# CVA6 RTL simulation:
+# 101ns  82      M         0000000000001000 0      301022f3     csrr   t0, misa  ...
+# time   cycle   priv_lvl  pc               branch machine_insn insn
 # MemPool RTL simulation:
 # 101000 82      0x00001000 csrr    a0, mhartid     #; comment
 # time   cycle   pc         insn
 # Banshee traces:
 # 00000432 00000206 0005     800101e0  x15:00000064 x15=00000065 # addi    a5, a5, 1
 # cycle    instret  hard_id  pc        register                    insn
+FORMATS = ['cva6', 'snitch', 'banshee']
 
 # regex matches to groups
 # 0 -> time
@@ -45,10 +49,11 @@
 # 2 -> privilege level (RTL) / hartid (banshee)
 # 3 -> pc (hex with 0x prefix)
 # 4 -> instruction
-# 5 -> args (RTL) / empty (banshee)
-# 6 -> comment (RTL) / instruction arguments (banshee)
-RTL_REGEX = r' *(\d+) +(\d+) +([3M1S0U]?) *(0x[0-9a-f]+) ([.\w]+) +(.+)#; (.*)'
-BANSHEE_REGEX = r' *(\d+) (\d+) (\d+) ([0-9a-f]+) *.+ +.+# ([\w\.]*)( +)(.*)'
+# 5 -> args (RTL) / empty (cva6, banshee)
+# 6 -> comment (RTL) / instruction arguments (banshee) / empty (cva6)
+REGEX = {'snitch': r' *(\d+) +(\d+) +([3M1S0U]?) *(0x[0-9a-f]+) ([.\w]+) +(.+)#; (.*)',
+         'cva6': r' *(\d+)ns +(\d+) +([3M1S0U]?) *([0-9a-f]+) +[01]+ +[0-9a-f]+ +([.\w]+)',
+         'banshee': r' *(\d+) (\d+) (\d+) ([0-9a-f]+) *.+ +.+# ([\w\.]*)( +)(.*)'}
 
 # regex matches a line of instruction retired by the accelerator
 # 0 -> time
@@ -57,29 +62,20 @@
 # 3 -> comment
 ACC_LINE_REGEX = r' *(\d+) +(\d+) +([3M1S0U]?) *#; (.*)'
 
-buf = []
 
+# Parses the output of the `parse_line()` function into a TraceViewer
+# event, formatted as a dictionary. It operates on multiple of these
+# outputs, collected in a buffer `buf`.
+def flush(lah, buf, **kwargs):
+    elf = kwargs['elf']
+    fmt = kwargs['fmt']
+    use_time = kwargs['use_time']
+    collapse_call_stack = kwargs['collapse_call_stack']
 
-@lru_cache(maxsize=1024)
-def addr2line_cache(addr):
-    cmd = f'{addr2line} -e {elf} -f -a -i {addr:x}'
-    return os.popen(cmd).read().split('\n')
-
-
-def flush(buf, hartid):
-    global output_file
-    # get function names
-    pcs = [x[3] for x in buf]
-    a2ls = []
-
-    if cache:
-        for addr in pcs:
-            a2ls += addr2line_cache(int(addr, base=16))[:-1]
-    else:
-        a2ls = os.popen(
-            f'{addr2line} -e {elf} -f -a -i {" ".join(pcs)}').read().split('\n')[:-1]
-
+    # Iterate buffer entries
+    events = []
     for i in range(len(buf)-1):
+
         (time, cyc, priv, pc, instr, args, cmt) = buf.pop(0)
 
         if use_time:
@@ -91,158 +87,86 @@ def flush(buf, hartid):
 
         # Have lookahead time to this instruction?
         next_time = lah[time] if time in lah else next_time
+        duration = next_time - time
 
-        # print(f'time "{time}", cyc "{cyc}", priv "{priv}", pc "{pc}"'
-        #       f', instr "{instr}", args "{args}"', file=sys.stderr)
-
-        [pc, func, file] = a2ls.pop(0), a2ls.pop(0), a2ls.pop(0)
-
-        # check for more output of a2l
-        inlined = ''
-        while not a2ls[0].startswith('0x'):
-            inlined += '(inlined by) ' + a2ls.pop(0)
-        # print(f'pc "{pc}", func "{func}", file "{file}"')
+        # Get information on current instruction from addr2line
+        a2l_info = elf.addr2line(pc)
 
-        # assemble values for json
+        # Assemble TraceViewer event
         # Doc: https://docs.google.com/document/d/
-        # 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
+        #      1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
+        event = {}
         # The name of the event, as displayed in Trace Viewer
-        name = instr
+        event['name'] = instr
+        # The event type, 'X' indicates a "complete event"
+        event['ph'] = 'X'
         # The event categories. This is a comma separated list of categories for the event.
         # The categories can be used to hide events in the Trace Viewer UI.
-        cat = 'instr'
-        # The tracing clock timestamp of the event.
-        # The timestamps are provided at microsecond granularity.
-        ts = time
-        # There is an extra parameter dur to specify the tracing clock duration
-        # of complete events in microseconds.
-        duration = next_time - time
-
-        if banshee:
+        event['cat'] = 'instr'
+        # The tracing clock timestamp of the event. The timestamps are provided at microsecond
+        # granularity.
+        if use_time:
+            time = time / 1000 if fmt == 'cva6' else time / 1000000
+        event['ts'] = time
+        # There is an extra parameter dur to specify the tracing clock duration of complete
+        # events in microseconds. In Banshee, each instruction takes one cycle
+        if use_time:
+            duration = duration / 1000 if fmt == 'cva6' else duration / 1000000
+        event['dur'] = 1 if fmt == 'banshee' else duration
+        # The thread ID is used to group events in a single TraceViewer row
+        if not collapse_call_stack:
+            event['tid'] = a2l_info.function_stack[0]['func']
+        if fmt == 'banshee':
             # Banshee stores all traces in a single file
-            hartid = priv
-            # In Banshee, each instruction takes one cycle
-            duration = 1
-
-        pid = elf+':hartid'+str(hartid)
-        funcname = func
-
-        # args
-        arg_pc = pc
-        arg_instr = instr
-        arg_args = args
-        arg_cycles = cyc
-        arg_coords = file
-        arg_inlined = inlined
-
-        output_file.write((
-            f'{{"name": "{name}", "cat": "{cat}", "ph": "X", '
-            f'"ts": {ts}, "dur": {duration}, "pid": "{pid}", '
-            f'"tid": "{funcname}", "args": {{"pc": "{arg_pc}", '
-            f'"instr": "{arg_instr} {arg_args}", "time": "{arg_cycles}", '
-            f'"Origin": "{arg_coords}", "inline": "{arg_inlined}"'
-            f'}}}},\n'))
-
-
-def parse_line(line, hartid):
-    global last_time, last_cyc
+            event['tid'] = priv
+        # Additional event args
+        event['args'] = {}
+        event['args']['pc'] = pc
+        event['args']['instr'] = f'{instr} {args}'
+        if cmt:
+            event['args']['comment'] = cmt
+        event['args']['cycle'] = cyc
+        event['args']['stack'] = a2l_info.function_stack_string(short=True)
+        event['args']['line'] = a2l_info.line()
+
+        events.append(event)
+    return events
+
+
+# Parses a trace line and returns an array of values extracted from the line
+def parse_line(line, **kwargs):
+    fmt = kwargs['fmt']
+
+    # Compile regex
+    re_line = re.compile(REGEX[fmt])
+
     # print(line)
     match = re_line.match(line)
     if match:
-        (time, cyc, priv, pc, instr, args, cmt) = tuple(
-            [match.group(i+1).strip() for i in range(re_line.groups)])
-        buf.append((time, cyc, priv, pc, instr, args, cmt))
-        last_time, last_cyc = time, cyc
-
-    if len(buf) > 10:
-        flush(buf, hartid)
-    return 0
-
-
-# Argument parsing
-parser = argparse.ArgumentParser('tracevis', allow_abbrev=True)
-parser.add_argument(
-    'elf',
-    metavar='<elf>',
-    help='The binary executed to generate the traces',
-
-
-)
-parser.add_argument(
-    'traces',
-    metavar='<trace>',
-    nargs='+',
-    help='Snitch traces to visualize')
-parser.add_argument(
-    '-o',
-    '--output',
-    metavar='<json>',
-    nargs='?',
-    default='chrome.json',
-    help='Output JSON file')
-parser.add_argument(
-    '--addr2line',
-    metavar='<path>',
-    nargs='?',
-    default='addr2line',
-    help='`addr2line` binary to use for parsing')
-parser.add_argument(
-    '-t',
-    '--time',
-    action='store_true',
-    help='Use the traces time instead of cycles')
-parser.add_argument(
-    '-b',
-    '--banshee',
-    action='store_true',
-    help='Parse Banshee traces')
-parser.add_argument(
-    '--no-cache',
-    action='store_true',
-    help='Disable addr2line caching (slow but might give better traces in some cases)')
-parser.add_argument(
-    '-s',
-    '--start',
-    metavar='<line>',
-    nargs='?',
-    type=int,
-    default=0,
-    help='First line to parse')
-parser.add_argument(
-    '-e',
-    '--end',
-    metavar='<line>',
-    nargs='?',
-    type=int,
-    default=-1,
-    help='Last line to parse')
-
-args = parser.parse_args()
-
-elf = args.elf
-traces = args.traces
-output = args.output
-use_time = args.time
-banshee = args.banshee
-addr2line = args.addr2line
-cache = not args.no_cache
-
-print('elf:', elf, file=sys.stderr)
-print('traces:', traces, file=sys.stderr)
-print('output:', output, file=sys.stderr)
-print('addr2line:', addr2line, file=sys.stderr)
-print('cache:', cache, file=sys.stderr)
-
-# Compile regex
-if banshee:
-    re_line = re.compile(BANSHEE_REGEX)
-else:
-    re_line = re.compile(RTL_REGEX)
-
-re_acc_line = re.compile(ACC_LINE_REGEX)
-
-
-def offload_lookahead(lines):
+        # TODO extend CVA6 regex to extract instruction args
+        if fmt == 'cva6':
+            (time, cyc, priv, pc, instr) = tuple(
+                [match.group(i+1).strip() for i in range(re_line.groups)])
+            args = cmt = ''
+        else:
+            (time, cyc, priv, pc, instr, args, cmt) = tuple(
+                [match.group(i+1).strip() for i in range(re_line.groups)])
+        return (time, cyc, priv, pc, instr, args, cmt)
+
+    return None
+
+
+# Parses a trace file and returns a dictionary mapping the time stamp
+# when every instruction is issued, to the time stamp when the instruction
+# writes back.
+def offload_lookahead(lines, **kwargs):
+    fmt = kwargs['fmt']
+    use_time = kwargs['use_time']
+
+    # Compile regex
+    re_line = re.compile(REGEX[fmt])
+    re_acc_line = re.compile(ACC_LINE_REGEX)
+
     # dict mapping time stamp of retired instruction to time stamp of
     # accelerator complete
     lah = {}
@@ -287,40 +211,177 @@ def offload_lookahead(lines):
     return lah
 
 
-lah = {}
-
-with open(output, 'w') as output_file:
-    # JSON header
-    output_file.write('{"traceEvents": [\n')
-
-    for filename in traces:
-        hartid = 0
-        parsed_nums = re.findall(r'\d+', filename)
-        hartid = int(parsed_nums[-1]) if len(parsed_nums) else hartid+1
-        fails = lines = 0
-        last_time = last_cyc = 0
-
-        print(
-            f'parsing hartid {hartid} with trace {filename}', file=sys.stderr)
-        tot_lines = len(open(filename).readlines())
-        with open(filename) as f:
-            all_lines = f.readlines()[args.start:args.end]
-            # offload lookahead
-            if not banshee:
-                lah = offload_lookahead(all_lines)
-            if has_progressbar:
-                for lino, line in progressbar.progressbar(
-                        enumerate(all_lines),
-                        max_value=tot_lines):
-                    fails += parse_line(line, hartid)
-                    lines += 1
+# Parses a trace file and returns a list of TraceViewer events.
+# Each event is formatted as a dictionary.
+def parse_trace(filename, **kwargs):
+
+    start = kwargs['start']
+    end = kwargs['end']
+    fmt = kwargs['fmt']
+
+    # Open trace
+    print(f'parsing trace {filename}', file=sys.stderr)
+    lah = {}
+    buf = []
+    fails = lines = 0
+    with open(filename) as f:
+
+        # Read lines
+        all_lines = f.readlines()
+        if end < 0:
+            end = len(all_lines) + end + 1
+        all_lines = all_lines[start:end]
+
+        # offload lookahead
+        if fmt == 'snitch':
+            lah = offload_lookahead(all_lines, **kwargs)
+
+        # Use a progress bar iterator if the package is installed
+        if has_progressbar:
+            iterations = progressbar.progressbar(
+                    enumerate(all_lines),
+                    max_value=len(all_lines))
+        else:
+            iterations = enumerate(all_lines)
+
+        # Iterate lines
+        events = []
+        for lino, line in iterations:
+            # Parse line
+            parsed_line = parse_line(line, **kwargs)
+            if parsed_line:
+                buf.append(parsed_line)
+            else:
+                fails += 1
+            lines += 1
+
+            # Flush buffer when it contains enough lines
+            if len(buf) > 10:
+                events += flush(lah, buf, **kwargs)
+        events += flush(lah, buf, **kwargs)
+
+        print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr)
+        return events
+
+
+def parse_traces(traces, **kwargs):
+
+    # Open ELF file
+    elf_path = kwargs['elf']
+    kwargs['elf'] = Elf(elf_path, a2l_binary=kwargs['addr2line'])
+
+    # Iterate traces
+    events = []
+    for i, filename in enumerate(traces):
+
+        # Extract hartid from filename or use current index
+        # TODO doesn't work with hex numbers
+        # parsed_nums = re.findall(r'\d+', filename)
+        # hartid = int(parsed_nums[-1]) if len(parsed_nums) else i
+        hartid = i
+
+        # Extract TraceViewer events from trace
+        trace_events = parse_trace(filename, **kwargs)
+
+        # Assign a per-trace unique TID or PID to all events
+        pid = elf_path if 'pid' not in kwargs else kwargs['pid']
+        for event in trace_events:
+            if kwargs['collapse_call_stack']:
+                event['pid'] = pid
+                event['tid'] = hartid
             else:
-                for lino, line in enumerate(
-                        all_lines):
-                    fails += parse_line(line, hartid)
-                    lines += 1
-            flush(buf, hartid)
-            print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr)
-
-    # JSON footer
-    output_file.write(r'{}]}''\n')
+                event['pid'] = pid+':hartid'+str(hartid)
+
+        # Add to events from previous traces
+        events += trace_events
+
+    return events
+
+
+def main(**kwargs):
+    elf = kwargs['elf']
+    traces = kwargs['traces']
+    output = kwargs['output']
+    addr2line = kwargs['addr2line']
+
+    print('elf:', elf, file=sys.stderr)
+    print('traces:', traces, file=sys.stderr)
+    print('output:', output, file=sys.stderr)
+    print('addr2line:', addr2line, file=sys.stderr)
+
+    # Parse traces and create TraceViewer JSON object
+    events = parse_traces(**kwargs)
+    tvobj = {'traceEvents': events, 'displayTimeUnit': 'ns'}
+
+    # Dump JSON object to file
+    with open(output, 'w') as output_file:
+        json.dump(tvobj, output_file, indent=4)
+
+
+# Parse command-line args
+def parse_args():
+    # Argument parsing
+    parser = argparse.ArgumentParser('tracevis', allow_abbrev=True)
+    parser.add_argument(
+        'elf',
+        metavar='<elf>',
+        help='The binary executed to generate the traces',
+    )
+    parser.add_argument(
+        'traces',
+        metavar='<trace>',
+        nargs='+',
+        help='Traces to visualize')
+    parser.add_argument(
+        '-o',
+        '--output',
+        metavar='<json>',
+        nargs='?',
+        default='chrome.json',
+        help='Output JSON file')
+    parser.add_argument(
+        '--addr2line',
+        metavar='<path>',
+        nargs='?',
+        default='addr2line',
+        help='`addr2line` binary to use for parsing')
+    parser.add_argument(
+        '-t',
+        '--time',
+        dest='use_time',
+        action='store_true',
+        help='Use the traces time instead of cycles')
+    parser.add_argument(
+        '-f',
+        '--format',
+        dest='fmt',
+        type=str,
+        default='snitch',
+        choices=FORMATS,
+        help='Trace format')
+    parser.add_argument(
+        '--collapse-call-stack',
+        action='store_true',
+        help='Visualize all instructions of a core in a single TraceViewer thread')
+    parser.add_argument(
+        '-s',
+        '--start',
+        metavar='<line>',
+        nargs='?',
+        type=int,
+        default=0,
+        help='First line to parse')
+    parser.add_argument(
+        '-e',
+        '--end',
+        metavar='<line>',
+        nargs='?',
+        type=int,
+        default=-1,
+        help='Last line to parse (inclusive)')
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = vars(parse_args())
+    main(**args)