diff --git a/target/common/common.mk b/target/common/common.mk index 4c2b2c95b..70afd80c2 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -211,6 +211,8 @@ JOINT_PERF_DUMP = $(LOGS_DIR)/perf.json ROI_DUMP = $(LOGS_DIR)/roi.json VISUAL_TRACE = $(LOGS_DIR)/trace.json +VISUALIZE_PY_FLAGS += --tracevis "$(BINARY) $(SNITCH_TXT_TRACES) --addr2line $(ADDR2LINE) -f snitch" + .PHONY: traces annotate visual-trace clean-traces clean-annotate clean-perf clean-visual-trace traces: $(TXT_TRACES) annotate: $(ANNOTATED_TRACES) @@ -243,4 +245,4 @@ $(ROI_DUMP): $(JOINT_PERF_DUMP) $(ROI_SPEC) $(ROI_PY) $(ROI_PY) $(JOINT_PERF_DUMP) $(ROI_SPEC) --cfg $(CFG) -o $@ $(VISUAL_TRACE): $(ROI_DUMP) $(VISUALIZE_PY) - $(VISUALIZE_PY) $(ROI_DUMP) --traces $(SNITCH_TXT_TRACES) --elf $(BINARY) -o $@ + $(VISUALIZE_PY) $(ROI_DUMP) $(VISUALIZE_PY_FLAGS) -o $@ diff --git a/util/bench/visualize.py b/util/bench/visualize.py index 087d8b86e..232a29348 100755 --- a/util/bench/visualize.py +++ b/util/bench/visualize.py @@ -41,14 +41,10 @@ def main(): metavar='', help='Input JSON file') parser.add_argument( - '--traces', - metavar='', - nargs='*', - help='Simulation traces to process') - parser.add_argument( - '--elf', - nargs='?', - help='ELF from which the traces were generated') + '--tracevis', + action='append', + default=[], + help='Argument string to pass down to tracevis, to generate additional events.') parser.add_argument( '-o', '--output', @@ -98,10 +94,16 @@ def main(): # Optionally extract also instruction-level events # from the simulation traces - if args.traces and args.elf: - events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch', - addr2line='addr2line', use_time=True, pid=1, - cache=True, elf=args.elf, collapse_call_stack=True) + for tvargs in args.tracevis: + # Break tracevis argument string into a list of arguments + tvargs = tvargs.split() + # Add default arguments, and parse all tracevis arguments + tvargs.append('--time') + tvargs.append('--collapse-call-stack') + tvargs = vars(tracevis.parse_args(tvargs)) + # Add more arguments, and get tracevis events + tvargs['pid'] = 1 + events += tracevis.parse_traces(**tvargs) # Create TraceViewer JSON object tvobj = {} diff --git a/util/sim/Simulation.py b/util/sim/Simulation.py index 58e1b200e..5dd98ee35 100644 --- a/util/sim/Simulation.py +++ b/util/sim/Simulation.py @@ -200,17 +200,20 @@ def get_retcode(self): return super().get_retcode() elif self.log is not None: # Extract the application's return code from the simulation log - with open(self.log, 'r') as f: - for line in f.readlines(): - regex_success = r'\[SUCCESS\] Program finished successfully' - match_success = re.search(regex_success, line) - if match_success: - return 0 - else: - regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)' - match = re.search(regex_fail, line) - if match: - return int(match.group(1)) + if not self.dry_run: + with open(self.log, 'r') as f: + for line in f.readlines(): + regex_success = r'\[SUCCESS\] Program finished successfully' + match_success = re.search(regex_success, line) + if match_success: + return 0 + else: + regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)' + match = re.search(regex_fail, line) + if match: + return int(match.group(1)) + else: + return 0 def successful(self): # Check that simulation return code matches expected value (in super class) @@ -226,7 +229,9 @@ def get_simulation_time(self): # Extract the simulation time from the simulation log if self.log is not None: with open(self.log, 'r') as f: - for line in f.readlines(): + # Read lines from the bottom of the file, since warning and error messages may + # also print a time to the log. + for line in reversed(f.readlines()): regex = r'Time: (\d+) ([a-z]+)\s+' match = re.search(regex, line) if match: diff --git a/util/sim/sim_utils.py b/util/sim/sim_utils.py index 75ae28c5c..9844fa363 100755 --- a/util/sim/sim_utils.py +++ b/util/sim/sim_utils.py @@ -190,12 +190,9 @@ def print_summary(sims, early_exit=False, dry_run=False): Args: sims: A list of simulations from the simulation suite. """ - # Header - header = '==== Test summary ====' - print(header) - # Table table = PrettyTable() + table.title = 'Test summary' table.field_names = [ 'test', 'launched', diff --git a/util/trace/a2l.py b/util/trace/a2l.py index ad345a011..4a849e80a 100644 --- a/util/trace/a2l.py +++ b/util/trace/a2l.py @@ -12,30 +12,20 @@ from pathlib import Path from functools import lru_cache from operator import itemgetter +import re def unzip(ls): return zip(*ls) -def format_function_name(name): - if name == '??': - return 'unknown function' - return name - - -def format_line(num): - if num == '?': - return -1 - return int(num) - - class Addr2LineOutput: indent_unit = ' ' - def __init__(self, raw): + def __init__(self, raw, toolchain='llvm'): self.raw = raw + self.toolchain = toolchain # Returns the function stack of the current line. # If there was no function inlining, then the function stack @@ -44,52 +34,102 @@ def __init__(self, raw): # belonged to after inlining the previous, up to (and including) # the last function which was not inlined. def function_stack(self): - output = self.raw.split('\n') - - functions = output[::2] - filepaths, lines = unzip([o.split(':') for o in output[1::2]]) - - functions = map(format_function_name, functions) - lines = map(format_line, lines) - - stack = zip(functions, filepaths, lines) - stack = [{'func': s[0], 'file': s[1], 'line': s[2]} for s in stack] - return stack + if self.toolchain == 'llvm': + # Define a regex pattern to capture relevant data. The function start filename + # and start line are optional, so they are enclosed in ()?. + pattern = re.compile( + r'^(?P.+?)\s*' + r'Filename:\s*(?P[^\n]+)\s*' + r'(Function start filename:\s*(?P[^\n]+)\s*' + r'Function start line:\s*(?P\d+)\s*)?' + r'Line:\s*(?P\d+)\s*' + r'Column:\s*(?P\d+)', + re.MULTILINE) + else: + # Define a regex pattern to match function names, file paths and line numbers + pattern = re.compile( + r"^(?P.+)\n(?P.+):(?P\d+)(?: \(discriminator \d+\))?$", + re.MULTILINE) + + # Find all matches and organize them into a list of dictionaries + stack = [match.groupdict() for match in pattern.finditer(self.raw)] + + # Format stack entries + def format_stack_entry(entry): + func, line, col = [entry.get(key, None) for key in ['func', 'line', 'col']] + # Rename unknown functions + if func == '??': + entry['func'] = 'unknown function' + # Add column key if missing and convert 0 line and cols to None + for key, val in zip(['line', 'col'], [line, col]): + if val is not None: + val = int(val) + if val == 0: + entry[key] = None + else: + entry[key] = val + else: + entry[key] = val + return entry + stack = list(map(format_stack_entry, stack)) + # Do not create stack if compiler was unable to associate a line number to the address + return stack if stack[0]['line'] is not None else None def function_stack_string(self, short=True): - stack = reversed(self.function_stack()) s = '' indent = '' - for i, level in enumerate(stack): - func, file, line = level.values() - if short: - file = Path(file).name - indent = self.indent_unit * i - s += f'{indent}{func} ({file}:{line})\n' + stack = self.function_stack() + if stack is not None: + stack = reversed(self.function_stack()) + for i, level in enumerate(stack): + func, file, line, col = [level.get(key) for key in ["func", "file", "line", "col"]] + if short: + file = Path(file).name + indent = self.indent_unit * i + s += f'{indent}{func} ({file}:{line}' + if col is not None: + s += f':{col}' + s += ')\n' + # Remove final newline + s = s[:-1] return s def line(self): - file, line = itemgetter('file', 'line')(self.function_stack()[0]) - - # Open source file - src = [] - try: - with open(file, 'r') as f: - src = [x.strip() for x in f.readlines()] - except OSError: + if self.function_stack(): + file, line = itemgetter('file', 'line')(self.function_stack()[0]) + + # Open source file src = [] + try: + with open(file, 'r') as f: + src = [x for x in f.readlines()] + except OSError: + src = [] - # Extract line - if src and line >= 0: - return src[line-1] - else: - return '' + # Extract line + if src and line is not None: + return src[line-1] def __str__(self): - s = self.function_stack_string() - if self.line(): - indent = self.indent_unit * len(s.strip().split('\n')) - s += f'{indent}{self.line()}' + s = '' + stack = self.function_stack() + if stack: + col = stack[0]['col'] + s = self.function_stack_string() + line = self.line() + if line is not None: + # Calculate indentation of original source line + stripped_line = line.lstrip() + orig_indent = len(line) - len(stripped_line) + stripped_line = stripped_line.rstrip() + # Calculate indentation to prepend to source line from the function stack string + indent = self.indent_unit * len(s.strip().split('\n')) + # Append source line to function stack string + s += f'\n{indent}{stripped_line}' + # Append a column indicator line + if col is not None: + new_col = col - orig_indent + len(indent) + s += '\n' + ' ' * (new_col - 1) + '^' return s @@ -99,6 +139,16 @@ def __init__(self, elf, a2l_binary='addr2line'): self.elf = Path(elf) self.a2l = a2l_binary + # We must distinguish between LLVM and GCC toolchains as the latter + # does not support the `--verbose` flag + if 'riscv64-unknown-elf-addr2line' in a2l_binary: + self.toolchain = 'gcc' + elif 'llvm-addr2line' in a2l_binary: + self.toolchain = 'llvm' + else: + raise ValueError('addr2line binary expected to be either riscv64-unknown-elf-addr2line' + ' or llvm-addr2line') + assert self.elf.exists(), f'File not found {self.elf}' @lru_cache(maxsize=1024) @@ -106,4 +156,6 @@ def addr2line(self, addr): if isinstance(addr, str): addr = int(addr, 16) cmd = f'{self.a2l} -e {self.elf} -f -i {addr:x}' - return Addr2LineOutput(os.popen(cmd).read()) + if self.toolchain == 'llvm': + cmd += ' --verbose' + return Addr2LineOutput(os.popen(cmd).read(), toolchain=self.toolchain) diff --git a/util/trace/annotate.py b/util/trace/annotate.py index 243784a07..dac037832 100755 --- a/util/trace/annotate.py +++ b/util/trace/annotate.py @@ -87,6 +87,13 @@ '--quiet', action='store_true', help='Quiet output') +parser.add_argument( + '-f', + '--format', + default='snitch', + choices=['cva6', 'snitch'], + help='Trace format' +) args = parser.parse_args() @@ -97,6 +104,7 @@ addr2line = args.addr2line quiet = args.quiet keep_time = args.keep_time +trace_fmt = args.format if not quiet: print('elf:', elf_file, file=sys.stderr) @@ -170,6 +178,22 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): of.write(f'{hunk_header}{hunk_trace}{hunk_source}') +def parse_line(line, fmt='snitch'): + # Regex capturing the timestamp and program counter (if present) + if fmt == 'snitch': + pattern = r'^\s*(\d+)?\s*.*?M (0x[0-9a-fA-F]+)?' + elif fmt == 'cva6': + pattern = r'^\s*(\d+ns)?\s*.*?M ([0-9a-fA-F]+)?' + else: + raise ValueError('Invalid trace format string') + match = re.search(pattern, line) + if match: + timestamp, program_counter = match.group(1), match.group(2) + else: + timestamp, program_counter = None, None + return timestamp, program_counter + + # Open ELF file for addr2line processing elf = a2l.Elf(elf_file, addr2line) @@ -198,84 +222,92 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): tot_lines = len(trace_lines) last_prog = 0 + end_of_trace_reached = False for lino, line in enumerate(trace_lines): - # Split trace line in columns - cols = re.split(r" +", line.strip()) - # Get simulation time from first column - time = cols[0] - # RTL traces might not contain a PC on each line - try: - # Get address from PC column - addr = cols[3] - # Find index of first character in PC + # Check if we reached the end of the trace, marked by the first empty line, which + # is the followed by the performance metrics. + if line == '\n': + end_of_trace_reached = True + + # Lines after the end of the trace are simply redirected as is to the annotated trace + if end_of_trace_reached: + of.write(f'{line}') + else: + # Parse regular trace line + time, pc = parse_line(line, fmt=trace_fmt) + + # In the first line we record the start column of the program counter + # so we know where to cut the line, even when the PC is missing if trace_start_col < 0: - trace_start_col = line.find(addr) - # Get addr2line information and format it as an assembly comment - a2l_output = elf.addr2line(addr) - annot = '\n'.join([f'#; {line}' for line in str(a2l_output).split('\n')]) - except (ValueError, IndexError): - a2l_output = None - annot = '' + assert pc is not None, "Program counter is missing in first trace line" + trace_start_col = line.find(pc) + + # Filter trace line for printing if keep_time: + time = time if time is not None else "" filtered_line = f'{time:>12} {line[trace_start_col:]}' else: filtered_line = f'{line[trace_start_col:]}' - if diff: - hunk_trace += f'-{filtered_line}' + + # Get addr2line information and format it as an assembly comment + annot = '' + a2l_output = None + if pc is not None: + a2l_output = str(elf.addr2line(pc)) + if a2l_output: + annot = '\n'.join([f'#; {line}' for line in a2l_output.split('\n')]) else: - of.write(f' {filtered_line}') - continue + if diff: + hunk_trace += f'-{filtered_line}' + else: + of.write(f' {filtered_line}') + continue - # Filter trace line for printing - if keep_time: - filtered_line = f'{time:>12} {line[trace_start_col:]}' - else: - filtered_line = f'{line[trace_start_col:]}' + # Print diff + if diff: + # Compare current and previous call stacks + if a2l_output: + funs, files, lines = zip( + *[level.values() for level in a2l_output.function_stack()]) + else: + funs = files = lines = [] + next_call_stack = assemble_call_stack(funs, files, lines) + matching_cstack_levels = matching_call_stack_levels(next_call_stack, call_stack) + matching_src_line = matching_source_line(next_call_stack, call_stack) + + # If this instruction does not map to the same evaluation of the source line + # of the last instruction, we finalize and dump the previous hunk + if hunk_trace and not matching_src_line: + dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source) + # Initialize next hunk + hunk_tstart += len(hunk_trace.splitlines()) + hunk_sstart += len(hunk_source.splitlines()) + hunk_trace = '' + hunk_source = '' + + # Update state for next iteration + call_stack = next_call_stack + + # Assemble source part of hunk + src_line = a2l_output.line() + if len(funs) and src_line: + for i, call in enumerate(call_stack): + if i >= matching_cstack_levels: + hunk_source += f'+{format_call(i, call)}' + if not matching_src_line: + indentation = ' ' * (len(call_stack) - 1) + hunk_source += f'+{indentation}{lines[0]}: {src_line}\n' + + # Assemble trace part of hunk + hunk_trace += f'-{filtered_line}' - # Print diff - if diff: - # Compare current and previous call stacks - if a2l_output: - funs, files, lines = zip(*[level.values() for level in a2l_output.function_stack()]) + # Default: print trace interleaved with source annotations else: - funs = files = lines = [] - next_call_stack = assemble_call_stack(funs, files, lines) - matching_cstack_levels = matching_call_stack_levels(next_call_stack, call_stack) - matching_src_line = matching_source_line(next_call_stack, call_stack) - - # If this instruction does not map to the same evaluation of the source line - # of the last instruction, we finalize and dump the previous hunk - if hunk_trace and not matching_src_line: - dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source) - # Initialize next hunk - hunk_tstart += len(hunk_trace.splitlines()) - hunk_sstart += len(hunk_source.splitlines()) - hunk_trace = '' - hunk_source = '' - - # Update state for next iteration - call_stack = next_call_stack - - # Assemble source part of hunk - src_line = a2l_output.line() - if len(funs) and src_line: - for i, call in enumerate(call_stack): - if i >= matching_cstack_levels: - hunk_source += f'+{format_call(i, call)}' - if not matching_src_line: - indentation = ' ' * (len(call_stack) - 1) - hunk_source += f'+{indentation}{lines[0]}: {src_line}\n' - - # Assemble trace part of hunk - hunk_trace += f'-{filtered_line}' - - # Default: print trace interleaved with source annotations - else: - if len(annot) and annot != last: - of.write(annot+'\n') - of.write(f' {filtered_line}') - last = annot + if len(annot) and annot != last: + of.write(annot+'\n') + of.write(f' {filtered_line}') + last = annot # very simple progress if not quiet: diff --git a/util/trace/gen_trace.py b/util/trace/gen_trace.py index 62b9737d9..db094ad7e 100755 --- a/util/trace/gen_trace.py +++ b/util/trace/gen_trace.py @@ -41,6 +41,8 @@ from ctypes import c_int32, c_uint32 from collections import deque, defaultdict from pathlib import Path +import traceback +from itertools import tee, islice, chain EXTRA_WB_WARN = 'WARNING: {} transactions still in flight for {}.' @@ -1038,6 +1040,15 @@ def fmt_perf_metrics(perf_metrics: list, idx: int, omit_keys: bool = True): return '\n'.join(ret) +# -------------------- Utils -------------------- + + +def current_and_next(iterable): + currs, nexts = tee(iterable, 2) + nexts = chain(islice(nexts, 1, None), [None]) + return zip(currs, nexts) + + # -------------------- Main -------------------- @@ -1115,16 +1126,26 @@ def main(): ] # all values initially 0, also 'start' time of measurement 0 perf_metrics[0]['start'] = None # Parse input line by line - for line in line_iter: + for lineno, (line, nextl) in enumerate(current_and_next(line_iter)): if line: - ann_insn, time_info, empty = annotate_insn( - line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False, - time_info, args.offl, not args.saddr, args.permissive, dma_trans) - if perf_metrics[0]['start'] is None: - perf_metrics[0]['tstart'] = time_info[0] / 1000 - perf_metrics[0]['start'] = time_info[1] - if not empty: - print(ann_insn, file=file) + try: + ann_insn, time_info, empty = annotate_insn( + line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False, + time_info, args.offl, not args.saddr, args.permissive, dma_trans) + if perf_metrics[0]['start'] is None: + perf_metrics[0]['tstart'] = time_info[0] / 1000 + perf_metrics[0]['start'] = time_info[1] + if not empty: + print(ann_insn, file=file) + except Exception: + message = 'Exception occured while processing ' + if not nextl: + message += 'last line. Did the simulation terminate?' + else: + message += 'line {lineno}.' + print(traceback.format_exc(), file=sys.stderr) + print(message, file=sys.stderr) + return 1 else: break # Nothing more in pipe, EOF perf_metrics[-1]['tend'] = time_info[0] / 1000 diff --git a/util/trace/tracevis.py b/util/trace/tracevis.py index 599c82bd6..e31f0bff8 100755 --- a/util/trace/tracevis.py +++ b/util/trace/tracevis.py @@ -19,14 +19,7 @@ import json import argparse from a2l import Elf - -has_progressbar = True -try: - import progressbar -except ImportError as e: - # Do not use progressbar - print(f'{e} --> No progress bar will be shown.', file=sys.stderr) - has_progressbar = False +from concurrent.futures import ThreadPoolExecutor # line format: @@ -115,7 +108,9 @@ def flush(lah, buf, **kwargs): event['dur'] = 1 if fmt == 'banshee' else duration # The thread ID is used to group events in a single TraceViewer row if not collapse_call_stack: - event['tid'] = a2l_info.function_stack[0]['func'] + stack = a2l_info.function_stack() + if stack is not None: + event['tid'] = stack[0]['func'] if fmt == 'banshee': # Banshee stores all traces in a single file event['tid'] = priv @@ -213,12 +208,16 @@ def offload_lookahead(lines, **kwargs): # Parses a trace file and returns a list of TraceViewer events. # Each event is formatted as a dictionary. -def parse_trace(filename, **kwargs): +def parse_trace(filename, kwargs): start = kwargs['start'] end = kwargs['end'] fmt = kwargs['fmt'] + # Extract hartid from filename + pattern = r'trace_hart_([0-9a-fA-F]+)\.txt$' + hartid = int(re.search(pattern, filename).group(1), 16) + # Open trace print(f'parsing trace {filename}', file=sys.stderr) lah = {} @@ -236,17 +235,9 @@ def parse_trace(filename, **kwargs): if fmt == 'snitch': lah = offload_lookahead(all_lines, **kwargs) - # Use a progress bar iterator if the package is installed - if has_progressbar: - iterations = progressbar.progressbar( - enumerate(all_lines), - max_value=len(all_lines)) - else: - iterations = enumerate(all_lines) - # Iterate lines events = [] - for lino, line in iterations: + for lino, line in enumerate(all_lines): # Parse line parsed_line = parse_line(line, **kwargs) if parsed_line: @@ -261,6 +252,15 @@ def parse_trace(filename, **kwargs): events += flush(lah, buf, **kwargs) print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr) + + # Assign a per-trace unique TID or PID to all events + for event in events: + if kwargs['collapse_call_stack']: + event['pid'] = kwargs['pid'] + event['tid'] = hartid + else: + event['pid'] = kwargs['pid']+':hartid'+str(hartid) + return events @@ -270,30 +270,16 @@ def parse_traces(traces, **kwargs): elf_path = kwargs['elf'] kwargs['elf'] = Elf(elf_path, a2l_binary=kwargs['addr2line']) - # Iterate traces - events = [] - for i, filename in enumerate(traces): - - # Extract hartid from filename or use current index - # TODO doesn't work with hex numbers - # parsed_nums = re.findall(r'\d+', filename) - # hartid = int(parsed_nums[-1]) if len(parsed_nums) else i - hartid = i - - # Extract TraceViewer events from trace - trace_events = parse_trace(filename, **kwargs) + # The unique PID to add to the events + if 'pid' not in kwargs: + kwargs['pid'] = elf_path - # Assign a per-trace unique TID or PID to all events - pid = elf_path if 'pid' not in kwargs else kwargs['pid'] - for event in trace_events: - if kwargs['collapse_call_stack']: - event['pid'] = pid - event['tid'] = hartid - else: - event['pid'] = pid+':hartid'+str(hartid) + # Parse traces in parallel + with ThreadPoolExecutor() as executor: + results = executor.map(parse_trace, traces, [kwargs]*len(traces)) - # Add to events from previous traces - events += trace_events + # Flatten list of event lists from every trace into a single list + events = [event for result in results for event in result] return events @@ -319,7 +305,7 @@ def main(**kwargs): # Parse command-line args -def parse_args(): +def parse_args(args=None): # Argument parsing parser = argparse.ArgumentParser('tracevis', allow_abbrev=True) parser.add_argument( @@ -379,7 +365,7 @@ def parse_args(): type=int, default=-1, help='Last line to parse (inclusive)') - return parser.parse_args() + return parser.parse_args(args) if __name__ == '__main__':