diff --git a/util/trace/a2l.py b/util/trace/a2l.py new file mode 100644 index 000000000..c62633739 --- /dev/null +++ b/util/trace/a2l.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +# Copyright 2021 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +# +# Utilities for common tasks involving addr2line + +import os +from pathlib import Path +from functools import lru_cache +from operator import itemgetter + + +def unzip(ls): + return zip(*ls) + + +def format_function_name(name): + if name == '??': + return 'unknown function' + return name + + +def format_line(num): + if num == '?': + return -1 + return int(num) + + +class Addr2LineOutput: + + indent_unit = ' ' + + def __init__(self, raw): + self.raw = raw + + # Returns the function stack of the current line. + # If there was no function inlining, then the function stack + # includes only the function the line belongs to. + # If there was inlining, it includes all functions the line + # belonged to after inlining the previous, up to (and including) + # the last function which was not inlined. + def function_stack(self): + output = self.raw.split('\n') + + functions = output[::2] + filepaths, lines = unzip([o.split(':') for o in output[1::2]]) + + functions = map(format_function_name, functions) + lines = map(format_line, lines) + + stack = zip(functions, filepaths, lines) + stack = [{'func': s[0], 'file': s[1], 'line': s[2]} for s in stack] + return stack + + def function_stack_string(self, short=True): + stack = reversed(self.function_stack()) + s = '' + indent = '' + for i, level in enumerate(stack): + func, file, line = level.values() + if short: + file = Path(file).name + indent = self.indent_unit * i + s += f'{indent}{func} ({file}:{line})\n' + return s + + def line(self): + file, line = itemgetter('file', 'line')(self.function_stack()[0]) + + # Open source file + src = [] + try: + with open(file, 'r') as f: + src = [x.strip() for x in f.readlines()] + except OSError: + src = [] + + # Extract line + if src and line >= 0: + return src[line-1] + else: + return '' + + def __str__(self): + s = self.function_stack_string() + if self.line(): + indent = self.indent_unit * len(s.strip().split('\n')) + s += f'{indent}{self.line()}' + return s + + +class Elf: + + def __init__(self, elf, a2l_binary='addr2line'): + self.elf = Path(elf) + self.a2l = a2l_binary + + assert self.elf.exists(), f'File not found {self.elf}' + + @lru_cache(maxsize=1024) + def addr2line(self, addr): + if type(addr) == str: + addr = int(addr, 16) + cmd = f'{self.a2l} -e {self.elf} -f -i {addr:x}' + return Addr2LineOutput(os.popen(cmd).read()) diff --git a/util/trace/annotate.py b/util/trace/annotate.py index a88664544..512556190 100755 --- a/util/trace/annotate.py +++ b/util/trace/annotate.py @@ -22,9 +22,8 @@ import sys import os import re -from functools import lru_cache import argparse -from termcolor import colored +import a2l # Argument parsing parser = argparse.ArgumentParser('annotate', allow_abbrev=True) @@ -84,7 +83,7 @@ args = parser.parse_args() -elf = args.elf +elf_file = args.elf trace = args.trace output = args.output diff = args.diff @@ -93,7 +92,7 @@ keep_time = args.keep_time if not quiet: - print('elf:', elf, file=sys.stderr) + print('elf:', elf_file, file=sys.stderr) print('trace:', trace, file=sys.stderr) print('output:', output, file=sys.stderr) print('diff:', diff, file=sys.stderr) @@ -110,34 +109,9 @@ trace_start_col = -1 -@lru_cache(maxsize=1024) -def adr2line(addr): - cmd = f'{addr2line} -e {elf} -f -i {addr:x}' - return os.popen(cmd).read().split('\n') - - -# helper functions to parse addr2line output -def a2l_file_path(a2l_file_str): - return a2l_file_str.split(':')[0] - - -def a2l_file_name(a2l_file_str): - return a2l_file_str.split('/')[-1].split(':')[0] - - -def a2l_file_line(a2l_file_str): - return int(a2l_file_str.split(':')[-1].split(' ')[0]) - - -def format_a2l_funcname(a2l_func_name): - if a2l_func_name == '??': - return 'unknown function' - return a2l_func_name - - # helper functions to assemble diff output def format_call(level, call): - funcname = format_a2l_funcname(call[0]) + funcname = a2l.format_function_name(call[0]) if level == 0: return f'{funcname} ({call[1]})\n' else: @@ -189,6 +163,9 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): of.write(f'{hunk_header}{hunk_trace}{hunk_source}') +# Open ELF file for addr2line processing +elf = a2l.Elf(elf_file) + # core functionality with open(trace, 'r') as f: @@ -223,12 +200,16 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): # RTL traces might not contain a PC on each line try: # Get address from PC column - addr_str = cols[3] - addr = int(addr_str, base=16) + addr = cols[3] # Find index of first character in PC if trace_start_col < 0: - trace_start_col = line.find(addr_str) + trace_start_col = line.find(addr) + # Get addr2line information and format it as an assembly comment + a2l_output = elf.addr2line(addr) + annot = '\n'.join([f'#; {line}' for line in str(a2l_output).split('\n')]) except (ValueError, IndexError): + a2l_output = None + annot = '' if keep_time: filtered_line = f'{time:>12} {line[trace_start_col:]}' else: @@ -245,41 +226,14 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): else: filtered_line = f'{line[trace_start_col:]}' - addr_hex = f'{addr:x}' - ret = adr2line(addr) - - funs = ret[::2] - file_paths = [a2l_file_path(x) for x in ret[1::2]] - file_names = [a2l_file_name(x) for x in ret[1::2]] - file_lines = [a2l_file_line(x) for x in ret[1::2]] - # Assemble annotation string - if len(funs): - annot = f'#; {funs[0]} ({file_names[0]}:{file_lines[0]})' - for fun, file_name, file_line in zip(funs[1:], file_names[1:], file_lines[1:]): - annot = f'{annot}\n#; in {fun} ({file_name}:{file_line})' - - # Get source of last file and print the line - src_fname = file_paths[0] - if src_fname not in src_files.keys(): - try: - # Issue warning if source was modified after trace - src_timestamp = os.path.getmtime(src_fname) - if src_timestamp >= trace_timestamp: - print(colored('Warning:', 'yellow'), - f'{src_fname} has been edited since the trace was generated') - - with open(src_fname, 'r') as src_f: - src_files[src_fname] = [x.strip() for x in src_f.readlines()] - except OSError: - src_files[src_fname] = None - if src_files[src_fname] is not None: - src_line = src_files[src_fname][file_lines[0]-1] - annot = f'{annot}\n#; {src_line}' - # Print diff if diff: # Compare current and previous call stacks - next_call_stack = assemble_call_stack(funs, file_paths, file_lines) + if a2l_output: + funs, files, lines = zip(*[level.values() for level in a2l_output.function_stack()]) + else: + funs = files = lines = [] + next_call_stack = assemble_call_stack(funs, files, lines) matching_cstack_levels = matching_call_stack_levels(next_call_stack, call_stack) matching_src_line = matching_source_line(next_call_stack, call_stack) @@ -297,13 +251,14 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): call_stack = next_call_stack # Assemble source part of hunk - if len(funs) and src_files[src_fname]: + src_line = a2l_output.line() + if len(funs) and src_line: for i, call in enumerate(call_stack): if i >= matching_cstack_levels: hunk_source += f'+{format_call(i, call)}' if not matching_src_line: indentation = ' ' * (len(call_stack) - 1) - hunk_source += f'+{indentation}{file_lines[0]}: {src_line}\n' + hunk_source += f'+{indentation}{lines[0]}: {src_line}\n' # Assemble trace part of hunk hunk_trace += f'-{filtered_line}' @@ -329,4 +284,3 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): if not quiet: print(' done') - print(adr2line.cache_info()) diff --git a/util/trace/eventvis.py b/util/trace/eventvis.py index 2d81ef8fb..4d0fdfdc7 100755 --- a/util/trace/eventvis.py +++ b/util/trace/eventvis.py @@ -31,6 +31,7 @@ import argparse import csv import json +import tracevis def pairwise(iterable): @@ -51,6 +52,15 @@ def main(): 'csv', metavar='', help='Input CSV file') + parser.add_argument( + '--traces', + metavar='', + nargs='*', + help='Simulation traces to process') + parser.add_argument( + '--elf', + nargs='?', + help='ELF from which the traces were generated') parser.add_argument( '-o', '--output', @@ -60,8 +70,21 @@ def main(): help='Output JSON file') args = parser.parse_args() - # Read CSV to collect TraceViewer events + # TraceViewer events events = [] + + # Add a dummy instant event to mark time 0. + # This is to avoid that the events are shifted from + # their actual start times to align the first event + # at time 0. + event = {'name': 'zero', + 'ph': 'I', # Instant event type + 'ts': 0, + 's': 'g' # Global scope + } + events.append(event) + + # Read CSV to collect TraceViewer events with open(args.csv) as f: reader = csv.reader(f, delimiter=',') @@ -92,6 +115,13 @@ def main(): } events.append(event) + # Optionally extract also instruction-level events + # from the simulation traces + if args.traces and args.elf: + events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch', + addr2line='addr2line', use_time=True, pid=1, + cache=True, elf=args.elf, collapse_call_stack=True) + # Create TraceViewer JSON object tvobj = {} tvobj['traceEvents'] = events diff --git a/util/trace/layout_events.py b/util/trace/layout_events.py index a17fa504d..ea877c53c 100755 --- a/util/trace/layout_events.py +++ b/util/trace/layout_events.py @@ -40,6 +40,7 @@ import argparse import csv import pandas as pd +from math import isnan def main(): @@ -53,6 +54,11 @@ def main(): 'layout', metavar='', help='Layout CSV file') + parser.add_argument( + '--num-clusters', + type=int, + default=1, + help='Number of clusters') parser.add_argument( '-o', '--output', @@ -65,58 +71,67 @@ def main(): # Read input CSV df = pd.read_csv(args.csv) - # Open output CSV for writing - with open(args.output, mode='w') as out_f: - writer = csv.writer(out_f, delimiter=',', quotechar='"') - - # Open layout CSV - with open(args.layout) as layout_f: - layout_reader = csv.reader(layout_f, delimiter=',') - - # Get region labels from layout header - regions = [label for label in next(layout_reader) if label and not label.isspace()] - - # Generate output header: appropriately spaced region labels - header = [''] + [val for label in regions for val in [label, '']] - writer.writerow(header) - - # Iterate layout rows - for row in layout_reader: - - # First entry in row is a hart ID or a Python expression - # which generates a list of hart IDs - expr = row[0] - code = compile(expr, "", "eval") - tids = eval(code) - if isinstance(tids, int): - tids = [tids] - - # Iterate hart IDs - for tid in tids: - - # Start output row with hart ID - orow = [tid] - - # Iterate all other cells in layout row (indices of regions to take) - for cell in row[1:]: - - # If the cell is not empty, get start and end times - # of the region from the input CSV and append them to the - # output row. Otherwise, leave cells empty. - if cell and not cell.isspace(): - reg_idx = int(cell) - row_idx = tid - col_idx = 1 + reg_idx * 2 - assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds' - assert (col_idx + 1) < df.shape[1], \ - f'Region index {reg_idx} out of bounds' - orow.append(int(df.iat[row_idx, col_idx])) - orow.append(int(df.iat[row_idx, col_idx + 1])) - else: - orow.append('') - orow.append('') - - writer.writerow(orow) + # Output CSV data + data = [] + columns = [] + + # Open layout CSV + with open(args.layout) as layout_f: + layout_reader = csv.reader(layout_f, delimiter=',') + + # Get region labels from layout header + regions = [label for label in next(layout_reader) if label and not label.isspace()] + + # Generate output columns: appropriately spaced region labels + columns = ['hartid'] + [val for label in regions for val in [label, '']] + + # Iterate layout rows + for row in layout_reader: + + # First entry in row is a hart ID or a Python expression + # which generates a list of hart IDs + expr = row[0] + code = compile(expr, "", "eval") + tids = eval(code, {}, {'num_clusters': args.num_clusters}) + if type(tids) == int: + tids = [tids] + + # Iterate hart IDs + for tid in tids: + + # Start output row with hart ID + orow = [tid] + + # Iterate all other cells in layout row (indices of regions to take) + for cell in row[1:]: + + # If the cell is not empty, get start and end times + # of the region from the input CSV and append them to the + # output row. Otherwise, leave cells empty. + if cell and not cell.isspace(): + reg_idx = int(cell) + row_idx = tid + col_idx = 1 + reg_idx * 2 + assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds' + assert (col_idx + 1) < df.shape[1],\ + f'Region index {reg_idx} out of bounds for hart {tid}' + assert not isnan(df.iat[row_idx, col_idx]),\ + (f'Region {reg_idx} looks empty for hart {tid},' + f'check whether it was simulated') + orow.append(int(df.iat[row_idx, col_idx])) + orow.append(int(df.iat[row_idx, col_idx + 1])) + else: + orow.append('') + orow.append('') + + data.append(orow) + + # Create output dataframe and write to CSV + df = pd.DataFrame(data, columns=columns) + df.set_index('hartid', inplace=True) + df.sort_index(axis='index', inplace=True) + df.index.name = None + df.to_csv(args.output) if __name__ == '__main__': diff --git a/util/trace/tracevis.py b/util/trace/tracevis.py index ecc344253..599c82bd6 100755 --- a/util/trace/tracevis.py +++ b/util/trace/tracevis.py @@ -12,12 +12,13 @@ # This script is inspired by https://github.com/SalvatoreDiGirolamo/tracevis # Author: Noah Huetter # Samuel Riedel +# Luca Colagrande import re -import os import sys -from functools import lru_cache +import json import argparse +from a2l import Elf has_progressbar = True try: @@ -31,13 +32,16 @@ # line format: # Snitch RTL simulation: # 101000 82 M 0x00001000 csrr a0, mhartid #; comment -# time cycle priv_lvl pc insn +# CVA6 RTL simulation: +# 101ns 82 M 0000000000001000 0 301022f3 csrr t0, misa ... +# time cycle priv_lvl pc branch machine_insn insn # MemPool RTL simulation: # 101000 82 0x00001000 csrr a0, mhartid #; comment # time cycle pc insn # Banshee traces: # 00000432 00000206 0005 800101e0 x15:00000064 x15=00000065 # addi a5, a5, 1 # cycle instret hard_id pc register insn +FORMATS = ['cva6', 'snitch', 'banshee'] # regex matches to groups # 0 -> time @@ -45,10 +49,11 @@ # 2 -> privilege level (RTL) / hartid (banshee) # 3 -> pc (hex with 0x prefix) # 4 -> instruction -# 5 -> args (RTL) / empty (banshee) -# 6 -> comment (RTL) / instruction arguments (banshee) -RTL_REGEX = r' *(\d+) +(\d+) +([3M1S0U]?) *(0x[0-9a-f]+) ([.\w]+) +(.+)#; (.*)' -BANSHEE_REGEX = r' *(\d+) (\d+) (\d+) ([0-9a-f]+) *.+ +.+# ([\w\.]*)( +)(.*)' +# 5 -> args (RTL) / empty (cva6, banshee) +# 6 -> comment (RTL) / instruction arguments (banshee) / empty (cva6) +REGEX = {'snitch': r' *(\d+) +(\d+) +([3M1S0U]?) *(0x[0-9a-f]+) ([.\w]+) +(.+)#; (.*)', + 'cva6': r' *(\d+)ns +(\d+) +([3M1S0U]?) *([0-9a-f]+) +[01]+ +[0-9a-f]+ +([.\w]+)', + 'banshee': r' *(\d+) (\d+) (\d+) ([0-9a-f]+) *.+ +.+# ([\w\.]*)( +)(.*)'} # regex matches a line of instruction retired by the accelerator # 0 -> time @@ -57,29 +62,20 @@ # 3 -> comment ACC_LINE_REGEX = r' *(\d+) +(\d+) +([3M1S0U]?) *#; (.*)' -buf = [] +# Parses the output of the `parse_line()` function into a TraceViewer +# event, formatted as a dictionary. It operates on multiple of these +# outputs, collected in a buffer `buf`. +def flush(lah, buf, **kwargs): + elf = kwargs['elf'] + fmt = kwargs['fmt'] + use_time = kwargs['use_time'] + collapse_call_stack = kwargs['collapse_call_stack'] -@lru_cache(maxsize=1024) -def addr2line_cache(addr): - cmd = f'{addr2line} -e {elf} -f -a -i {addr:x}' - return os.popen(cmd).read().split('\n') - - -def flush(buf, hartid): - global output_file - # get function names - pcs = [x[3] for x in buf] - a2ls = [] - - if cache: - for addr in pcs: - a2ls += addr2line_cache(int(addr, base=16))[:-1] - else: - a2ls = os.popen( - f'{addr2line} -e {elf} -f -a -i {" ".join(pcs)}').read().split('\n')[:-1] - + # Iterate buffer entries + events = [] for i in range(len(buf)-1): + (time, cyc, priv, pc, instr, args, cmt) = buf.pop(0) if use_time: @@ -91,158 +87,86 @@ def flush(buf, hartid): # Have lookahead time to this instruction? next_time = lah[time] if time in lah else next_time + duration = next_time - time - # print(f'time "{time}", cyc "{cyc}", priv "{priv}", pc "{pc}"' - # f', instr "{instr}", args "{args}"', file=sys.stderr) - - [pc, func, file] = a2ls.pop(0), a2ls.pop(0), a2ls.pop(0) - - # check for more output of a2l - inlined = '' - while not a2ls[0].startswith('0x'): - inlined += '(inlined by) ' + a2ls.pop(0) - # print(f'pc "{pc}", func "{func}", file "{file}"') + # Get information on current instruction from addr2line + a2l_info = elf.addr2line(pc) - # assemble values for json + # Assemble TraceViewer event # Doc: https://docs.google.com/document/d/ - # 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + # 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + event = {} # The name of the event, as displayed in Trace Viewer - name = instr + event['name'] = instr + # The event type, 'X' indicates a "complete event" + event['ph'] = 'X' # The event categories. This is a comma separated list of categories for the event. # The categories can be used to hide events in the Trace Viewer UI. - cat = 'instr' - # The tracing clock timestamp of the event. - # The timestamps are provided at microsecond granularity. - ts = time - # There is an extra parameter dur to specify the tracing clock duration - # of complete events in microseconds. - duration = next_time - time - - if banshee: + event['cat'] = 'instr' + # The tracing clock timestamp of the event. The timestamps are provided at microsecond + # granularity. + if use_time: + time = time / 1000 if fmt == 'cva6' else time / 1000000 + event['ts'] = time + # There is an extra parameter dur to specify the tracing clock duration of complete + # events in microseconds. In Banshee, each instruction takes one cycle + if use_time: + duration = duration / 1000 if fmt == 'cva6' else duration / 1000000 + event['dur'] = 1 if fmt == 'banshee' else duration + # The thread ID is used to group events in a single TraceViewer row + if not collapse_call_stack: + event['tid'] = a2l_info.function_stack[0]['func'] + if fmt == 'banshee': # Banshee stores all traces in a single file - hartid = priv - # In Banshee, each instruction takes one cycle - duration = 1 - - pid = elf+':hartid'+str(hartid) - funcname = func - - # args - arg_pc = pc - arg_instr = instr - arg_args = args - arg_cycles = cyc - arg_coords = file - arg_inlined = inlined - - output_file.write(( - f'{{"name": "{name}", "cat": "{cat}", "ph": "X", ' - f'"ts": {ts}, "dur": {duration}, "pid": "{pid}", ' - f'"tid": "{funcname}", "args": {{"pc": "{arg_pc}", ' - f'"instr": "{arg_instr} {arg_args}", "time": "{arg_cycles}", ' - f'"Origin": "{arg_coords}", "inline": "{arg_inlined}"' - f'}}}},\n')) - - -def parse_line(line, hartid): - global last_time, last_cyc + event['tid'] = priv + # Additional event args + event['args'] = {} + event['args']['pc'] = pc + event['args']['instr'] = f'{instr} {args}' + if cmt: + event['args']['comment'] = cmt + event['args']['cycle'] = cyc + event['args']['stack'] = a2l_info.function_stack_string(short=True) + event['args']['line'] = a2l_info.line() + + events.append(event) + return events + + +# Parses a trace line and returns an array of values extracted from the line +def parse_line(line, **kwargs): + fmt = kwargs['fmt'] + + # Compile regex + re_line = re.compile(REGEX[fmt]) + # print(line) match = re_line.match(line) if match: - (time, cyc, priv, pc, instr, args, cmt) = tuple( - [match.group(i+1).strip() for i in range(re_line.groups)]) - buf.append((time, cyc, priv, pc, instr, args, cmt)) - last_time, last_cyc = time, cyc - - if len(buf) > 10: - flush(buf, hartid) - return 0 - - -# Argument parsing -parser = argparse.ArgumentParser('tracevis', allow_abbrev=True) -parser.add_argument( - 'elf', - metavar='', - help='The binary executed to generate the traces', - - -) -parser.add_argument( - 'traces', - metavar='', - nargs='+', - help='Snitch traces to visualize') -parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='chrome.json', - help='Output JSON file') -parser.add_argument( - '--addr2line', - metavar='', - nargs='?', - default='addr2line', - help='`addr2line` binary to use for parsing') -parser.add_argument( - '-t', - '--time', - action='store_true', - help='Use the traces time instead of cycles') -parser.add_argument( - '-b', - '--banshee', - action='store_true', - help='Parse Banshee traces') -parser.add_argument( - '--no-cache', - action='store_true', - help='Disable addr2line caching (slow but might give better traces in some cases)') -parser.add_argument( - '-s', - '--start', - metavar='', - nargs='?', - type=int, - default=0, - help='First line to parse') -parser.add_argument( - '-e', - '--end', - metavar='', - nargs='?', - type=int, - default=-1, - help='Last line to parse') - -args = parser.parse_args() - -elf = args.elf -traces = args.traces -output = args.output -use_time = args.time -banshee = args.banshee -addr2line = args.addr2line -cache = not args.no_cache - -print('elf:', elf, file=sys.stderr) -print('traces:', traces, file=sys.stderr) -print('output:', output, file=sys.stderr) -print('addr2line:', addr2line, file=sys.stderr) -print('cache:', cache, file=sys.stderr) - -# Compile regex -if banshee: - re_line = re.compile(BANSHEE_REGEX) -else: - re_line = re.compile(RTL_REGEX) - -re_acc_line = re.compile(ACC_LINE_REGEX) - - -def offload_lookahead(lines): + # TODO extend CVA6 regex to extract instruction args + if fmt == 'cva6': + (time, cyc, priv, pc, instr) = tuple( + [match.group(i+1).strip() for i in range(re_line.groups)]) + args = cmt = '' + else: + (time, cyc, priv, pc, instr, args, cmt) = tuple( + [match.group(i+1).strip() for i in range(re_line.groups)]) + return (time, cyc, priv, pc, instr, args, cmt) + + return None + + +# Parses a trace file and returns a dictionary mapping the time stamp +# when every instruction is issued, to the time stamp when the instruction +# writes back. +def offload_lookahead(lines, **kwargs): + fmt = kwargs['fmt'] + use_time = kwargs['use_time'] + + # Compile regex + re_line = re.compile(REGEX[fmt]) + re_acc_line = re.compile(ACC_LINE_REGEX) + # dict mapping time stamp of retired instruction to time stamp of # accelerator complete lah = {} @@ -287,40 +211,177 @@ def offload_lookahead(lines): return lah -lah = {} - -with open(output, 'w') as output_file: - # JSON header - output_file.write('{"traceEvents": [\n') - - for filename in traces: - hartid = 0 - parsed_nums = re.findall(r'\d+', filename) - hartid = int(parsed_nums[-1]) if len(parsed_nums) else hartid+1 - fails = lines = 0 - last_time = last_cyc = 0 - - print( - f'parsing hartid {hartid} with trace {filename}', file=sys.stderr) - tot_lines = len(open(filename).readlines()) - with open(filename) as f: - all_lines = f.readlines()[args.start:args.end] - # offload lookahead - if not banshee: - lah = offload_lookahead(all_lines) - if has_progressbar: - for lino, line in progressbar.progressbar( - enumerate(all_lines), - max_value=tot_lines): - fails += parse_line(line, hartid) - lines += 1 +# Parses a trace file and returns a list of TraceViewer events. +# Each event is formatted as a dictionary. +def parse_trace(filename, **kwargs): + + start = kwargs['start'] + end = kwargs['end'] + fmt = kwargs['fmt'] + + # Open trace + print(f'parsing trace {filename}', file=sys.stderr) + lah = {} + buf = [] + fails = lines = 0 + with open(filename) as f: + + # Read lines + all_lines = f.readlines() + if end < 0: + end = len(all_lines) + end + 1 + all_lines = all_lines[start:end] + + # offload lookahead + if fmt == 'snitch': + lah = offload_lookahead(all_lines, **kwargs) + + # Use a progress bar iterator if the package is installed + if has_progressbar: + iterations = progressbar.progressbar( + enumerate(all_lines), + max_value=len(all_lines)) + else: + iterations = enumerate(all_lines) + + # Iterate lines + events = [] + for lino, line in iterations: + # Parse line + parsed_line = parse_line(line, **kwargs) + if parsed_line: + buf.append(parsed_line) + else: + fails += 1 + lines += 1 + + # Flush buffer when it contains enough lines + if len(buf) > 10: + events += flush(lah, buf, **kwargs) + events += flush(lah, buf, **kwargs) + + print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr) + return events + + +def parse_traces(traces, **kwargs): + + # Open ELF file + elf_path = kwargs['elf'] + kwargs['elf'] = Elf(elf_path, a2l_binary=kwargs['addr2line']) + + # Iterate traces + events = [] + for i, filename in enumerate(traces): + + # Extract hartid from filename or use current index + # TODO doesn't work with hex numbers + # parsed_nums = re.findall(r'\d+', filename) + # hartid = int(parsed_nums[-1]) if len(parsed_nums) else i + hartid = i + + # Extract TraceViewer events from trace + trace_events = parse_trace(filename, **kwargs) + + # Assign a per-trace unique TID or PID to all events + pid = elf_path if 'pid' not in kwargs else kwargs['pid'] + for event in trace_events: + if kwargs['collapse_call_stack']: + event['pid'] = pid + event['tid'] = hartid else: - for lino, line in enumerate( - all_lines): - fails += parse_line(line, hartid) - lines += 1 - flush(buf, hartid) - print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr) - - # JSON footer - output_file.write(r'{}]}''\n') + event['pid'] = pid+':hartid'+str(hartid) + + # Add to events from previous traces + events += trace_events + + return events + + +def main(**kwargs): + elf = kwargs['elf'] + traces = kwargs['traces'] + output = kwargs['output'] + addr2line = kwargs['addr2line'] + + print('elf:', elf, file=sys.stderr) + print('traces:', traces, file=sys.stderr) + print('output:', output, file=sys.stderr) + print('addr2line:', addr2line, file=sys.stderr) + + # Parse traces and create TraceViewer JSON object + events = parse_traces(**kwargs) + tvobj = {'traceEvents': events, 'displayTimeUnit': 'ns'} + + # Dump JSON object to file + with open(output, 'w') as output_file: + json.dump(tvobj, output_file, indent=4) + + +# Parse command-line args +def parse_args(): + # Argument parsing + parser = argparse.ArgumentParser('tracevis', allow_abbrev=True) + parser.add_argument( + 'elf', + metavar='', + help='The binary executed to generate the traces', + ) + parser.add_argument( + 'traces', + metavar='', + nargs='+', + help='Traces to visualize') + parser.add_argument( + '-o', + '--output', + metavar='', + nargs='?', + default='chrome.json', + help='Output JSON file') + parser.add_argument( + '--addr2line', + metavar='', + nargs='?', + default='addr2line', + help='`addr2line` binary to use for parsing') + parser.add_argument( + '-t', + '--time', + dest='use_time', + action='store_true', + help='Use the traces time instead of cycles') + parser.add_argument( + '-f', + '--format', + dest='fmt', + type=str, + default='snitch', + choices=FORMATS, + help='Trace format') + parser.add_argument( + '--collapse-call-stack', + action='store_true', + help='Visualize all instructions of a core in a single TraceViewer thread') + parser.add_argument( + '-s', + '--start', + metavar='', + nargs='?', + type=int, + default=0, + help='First line to parse') + parser.add_argument( + '-e', + '--end', + metavar='', + nargs='?', + type=int, + default=-1, + help='Last line to parse (inclusive)') + return parser.parse_args() + + +if __name__ == '__main__': + args = vars(parse_args()) + main(**args)