From 2b80dbb1b74e542a4b8a46d80e55eeac76e5a9d9 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 18 Aug 2023 10:47:51 +0200 Subject: [PATCH 01/13] snRuntime: Correct undefined symbol errors --- sw/snRuntime/src/start.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/sw/snRuntime/src/start.c b/sw/snRuntime/src/start.c index 77d5a0326..8a692e921 100644 --- a/sw/snRuntime/src/start.c +++ b/sw/snRuntime/src/start.c @@ -2,6 +2,14 @@ // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 +static inline void snrt_crt0_cluster_hw_barrier() { + uint32_t register r; + uint32_t hw_barrier = + SNRT_CLUSTER_HW_BARRIER_ADDR + snrt_cluster_idx() * SNRT_CLUSTER_OFFSET; + asm volatile("lw %0, 0(%1)" : "=r"(r) : "r"(hw_barrier) : "memory"); +} + +#ifdef SNRT_INIT_CLS static inline uint32_t snrt_cls_base_addr() { extern volatile uint32_t __cdata_start, __cdata_end; extern volatile uint32_t __cbss_start, __cbss_end; @@ -12,14 +20,9 @@ static inline uint32_t snrt_cls_base_addr() { SNRT_TCDM_SIZE; return l1_end_addr - cdata_size - cbss_size; } +#endif -static inline void snrt_crt0_cluster_hw_barrier() { - uint32_t register r; - uint32_t hw_barrier = - SNRT_CLUSTER_HW_BARRIER_ADDR + snrt_cluster_idx() * SNRT_CLUSTER_OFFSET; - asm volatile("lw %0, 0(%1)" : "=r"(r) : "r"(hw_barrier) : "memory"); -} - +#ifdef SNRT_INIT_TLS static inline void snrt_init_tls() { extern volatile uint32_t __tdata_start, __tdata_end; extern volatile uint32_t __tbss_start, __tbss_end; @@ -41,7 +44,9 @@ static inline void snrt_init_tls() { tls_ptr++; } } +#endif +#ifdef SNRT_INIT_BSS static inline void snrt_init_bss() { extern volatile uint32_t __bss_start, __bss_end; @@ -54,7 +59,9 @@ static inline void snrt_init_bss() { } } } +#endif +#ifdef SNRT_INIT_CLS static inline void snrt_init_cls() { extern volatile uint32_t __cdata_start, __cdata_end; extern volatile uint32_t __cbss_start, __cbss_end; @@ -81,13 +88,18 @@ static inline void snrt_init_cls() { } } } +#endif +#ifdef SNRT_INIT_LIBS static inline void snrt_init_libs() { snrt_alloc_init(); } +#endif +#ifdef SNRT_CRT0_EXIT static inline void snrt_exit(int exit_code) { if (snrt_global_core_idx() == 0) *(snrt_exit_code_destination()) = (exit_code << 1) | 1; } +#endif void snrt_main() { int exit_code = 0; From d25cd9e2f25768163b499505f38512bae064014a Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Tue, 30 May 2023 12:21:25 +0200 Subject: [PATCH 02/13] util: Streamline addr2line functionality and extend eventvis.py --- util/trace/a2l.py | 109 +++++++++ util/trace/annotate.py | 90 ++----- util/trace/eventvis.py | 32 ++- util/trace/layout_events.py | 119 +++++---- util/trace/tracevis.py | 471 ++++++++++++++++++++---------------- 5 files changed, 495 insertions(+), 326 deletions(-) create mode 100644 util/trace/a2l.py diff --git a/util/trace/a2l.py b/util/trace/a2l.py new file mode 100644 index 000000000..c62633739 --- /dev/null +++ b/util/trace/a2l.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +# Copyright 2021 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +# +# Utilities for common tasks involving addr2line + +import os +from pathlib import Path +from functools import lru_cache +from operator import itemgetter + + +def unzip(ls): + return zip(*ls) + + +def format_function_name(name): + if name == '??': + return 'unknown function' + return name + + +def format_line(num): + if num == '?': + return -1 + return int(num) + + +class Addr2LineOutput: + + indent_unit = ' ' + + def __init__(self, raw): + self.raw = raw + + # Returns the function stack of the current line. + # If there was no function inlining, then the function stack + # includes only the function the line belongs to. + # If there was inlining, it includes all functions the line + # belonged to after inlining the previous, up to (and including) + # the last function which was not inlined. + def function_stack(self): + output = self.raw.split('\n') + + functions = output[::2] + filepaths, lines = unzip([o.split(':') for o in output[1::2]]) + + functions = map(format_function_name, functions) + lines = map(format_line, lines) + + stack = zip(functions, filepaths, lines) + stack = [{'func': s[0], 'file': s[1], 'line': s[2]} for s in stack] + return stack + + def function_stack_string(self, short=True): + stack = reversed(self.function_stack()) + s = '' + indent = '' + for i, level in enumerate(stack): + func, file, line = level.values() + if short: + file = Path(file).name + indent = self.indent_unit * i + s += f'{indent}{func} ({file}:{line})\n' + return s + + def line(self): + file, line = itemgetter('file', 'line')(self.function_stack()[0]) + + # Open source file + src = [] + try: + with open(file, 'r') as f: + src = [x.strip() for x in f.readlines()] + except OSError: + src = [] + + # Extract line + if src and line >= 0: + return src[line-1] + else: + return '' + + def __str__(self): + s = self.function_stack_string() + if self.line(): + indent = self.indent_unit * len(s.strip().split('\n')) + s += f'{indent}{self.line()}' + return s + + +class Elf: + + def __init__(self, elf, a2l_binary='addr2line'): + self.elf = Path(elf) + self.a2l = a2l_binary + + assert self.elf.exists(), f'File not found {self.elf}' + + @lru_cache(maxsize=1024) + def addr2line(self, addr): + if type(addr) == str: + addr = int(addr, 16) + cmd = f'{self.a2l} -e {self.elf} -f -i {addr:x}' + return Addr2LineOutput(os.popen(cmd).read()) diff --git a/util/trace/annotate.py b/util/trace/annotate.py index a88664544..512556190 100755 --- a/util/trace/annotate.py +++ b/util/trace/annotate.py @@ -22,9 +22,8 @@ import sys import os import re -from functools import lru_cache import argparse -from termcolor import colored +import a2l # Argument parsing parser = argparse.ArgumentParser('annotate', allow_abbrev=True) @@ -84,7 +83,7 @@ args = parser.parse_args() -elf = args.elf +elf_file = args.elf trace = args.trace output = args.output diff = args.diff @@ -93,7 +92,7 @@ keep_time = args.keep_time if not quiet: - print('elf:', elf, file=sys.stderr) + print('elf:', elf_file, file=sys.stderr) print('trace:', trace, file=sys.stderr) print('output:', output, file=sys.stderr) print('diff:', diff, file=sys.stderr) @@ -110,34 +109,9 @@ trace_start_col = -1 -@lru_cache(maxsize=1024) -def adr2line(addr): - cmd = f'{addr2line} -e {elf} -f -i {addr:x}' - return os.popen(cmd).read().split('\n') - - -# helper functions to parse addr2line output -def a2l_file_path(a2l_file_str): - return a2l_file_str.split(':')[0] - - -def a2l_file_name(a2l_file_str): - return a2l_file_str.split('/')[-1].split(':')[0] - - -def a2l_file_line(a2l_file_str): - return int(a2l_file_str.split(':')[-1].split(' ')[0]) - - -def format_a2l_funcname(a2l_func_name): - if a2l_func_name == '??': - return 'unknown function' - return a2l_func_name - - # helper functions to assemble diff output def format_call(level, call): - funcname = format_a2l_funcname(call[0]) + funcname = a2l.format_function_name(call[0]) if level == 0: return f'{funcname} ({call[1]})\n' else: @@ -189,6 +163,9 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): of.write(f'{hunk_header}{hunk_trace}{hunk_source}') +# Open ELF file for addr2line processing +elf = a2l.Elf(elf_file) + # core functionality with open(trace, 'r') as f: @@ -223,12 +200,16 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): # RTL traces might not contain a PC on each line try: # Get address from PC column - addr_str = cols[3] - addr = int(addr_str, base=16) + addr = cols[3] # Find index of first character in PC if trace_start_col < 0: - trace_start_col = line.find(addr_str) + trace_start_col = line.find(addr) + # Get addr2line information and format it as an assembly comment + a2l_output = elf.addr2line(addr) + annot = '\n'.join([f'#; {line}' for line in str(a2l_output).split('\n')]) except (ValueError, IndexError): + a2l_output = None + annot = '' if keep_time: filtered_line = f'{time:>12} {line[trace_start_col:]}' else: @@ -245,41 +226,14 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): else: filtered_line = f'{line[trace_start_col:]}' - addr_hex = f'{addr:x}' - ret = adr2line(addr) - - funs = ret[::2] - file_paths = [a2l_file_path(x) for x in ret[1::2]] - file_names = [a2l_file_name(x) for x in ret[1::2]] - file_lines = [a2l_file_line(x) for x in ret[1::2]] - # Assemble annotation string - if len(funs): - annot = f'#; {funs[0]} ({file_names[0]}:{file_lines[0]})' - for fun, file_name, file_line in zip(funs[1:], file_names[1:], file_lines[1:]): - annot = f'{annot}\n#; in {fun} ({file_name}:{file_line})' - - # Get source of last file and print the line - src_fname = file_paths[0] - if src_fname not in src_files.keys(): - try: - # Issue warning if source was modified after trace - src_timestamp = os.path.getmtime(src_fname) - if src_timestamp >= trace_timestamp: - print(colored('Warning:', 'yellow'), - f'{src_fname} has been edited since the trace was generated') - - with open(src_fname, 'r') as src_f: - src_files[src_fname] = [x.strip() for x in src_f.readlines()] - except OSError: - src_files[src_fname] = None - if src_files[src_fname] is not None: - src_line = src_files[src_fname][file_lines[0]-1] - annot = f'{annot}\n#; {src_line}' - # Print diff if diff: # Compare current and previous call stacks - next_call_stack = assemble_call_stack(funs, file_paths, file_lines) + if a2l_output: + funs, files, lines = zip(*[level.values() for level in a2l_output.function_stack()]) + else: + funs = files = lines = [] + next_call_stack = assemble_call_stack(funs, files, lines) matching_cstack_levels = matching_call_stack_levels(next_call_stack, call_stack) matching_src_line = matching_source_line(next_call_stack, call_stack) @@ -297,13 +251,14 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): call_stack = next_call_stack # Assemble source part of hunk - if len(funs) and src_files[src_fname]: + src_line = a2l_output.line() + if len(funs) and src_line: for i, call in enumerate(call_stack): if i >= matching_cstack_levels: hunk_source += f'+{format_call(i, call)}' if not matching_src_line: indentation = ' ' * (len(call_stack) - 1) - hunk_source += f'+{indentation}{file_lines[0]}: {src_line}\n' + hunk_source += f'+{indentation}{lines[0]}: {src_line}\n' # Assemble trace part of hunk hunk_trace += f'-{filtered_line}' @@ -329,4 +284,3 @@ def dump_hunk(hunk_tstart, hunk_sstart, hunk_trace, hunk_source): if not quiet: print(' done') - print(adr2line.cache_info()) diff --git a/util/trace/eventvis.py b/util/trace/eventvis.py index 2d81ef8fb..4d0fdfdc7 100755 --- a/util/trace/eventvis.py +++ b/util/trace/eventvis.py @@ -31,6 +31,7 @@ import argparse import csv import json +import tracevis def pairwise(iterable): @@ -51,6 +52,15 @@ def main(): 'csv', metavar='', help='Input CSV file') + parser.add_argument( + '--traces', + metavar='', + nargs='*', + help='Simulation traces to process') + parser.add_argument( + '--elf', + nargs='?', + help='ELF from which the traces were generated') parser.add_argument( '-o', '--output', @@ -60,8 +70,21 @@ def main(): help='Output JSON file') args = parser.parse_args() - # Read CSV to collect TraceViewer events + # TraceViewer events events = [] + + # Add a dummy instant event to mark time 0. + # This is to avoid that the events are shifted from + # their actual start times to align the first event + # at time 0. + event = {'name': 'zero', + 'ph': 'I', # Instant event type + 'ts': 0, + 's': 'g' # Global scope + } + events.append(event) + + # Read CSV to collect TraceViewer events with open(args.csv) as f: reader = csv.reader(f, delimiter=',') @@ -92,6 +115,13 @@ def main(): } events.append(event) + # Optionally extract also instruction-level events + # from the simulation traces + if args.traces and args.elf: + events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch', + addr2line='addr2line', use_time=True, pid=1, + cache=True, elf=args.elf, collapse_call_stack=True) + # Create TraceViewer JSON object tvobj = {} tvobj['traceEvents'] = events diff --git a/util/trace/layout_events.py b/util/trace/layout_events.py index a17fa504d..ea877c53c 100755 --- a/util/trace/layout_events.py +++ b/util/trace/layout_events.py @@ -40,6 +40,7 @@ import argparse import csv import pandas as pd +from math import isnan def main(): @@ -53,6 +54,11 @@ def main(): 'layout', metavar='', help='Layout CSV file') + parser.add_argument( + '--num-clusters', + type=int, + default=1, + help='Number of clusters') parser.add_argument( '-o', '--output', @@ -65,58 +71,67 @@ def main(): # Read input CSV df = pd.read_csv(args.csv) - # Open output CSV for writing - with open(args.output, mode='w') as out_f: - writer = csv.writer(out_f, delimiter=',', quotechar='"') - - # Open layout CSV - with open(args.layout) as layout_f: - layout_reader = csv.reader(layout_f, delimiter=',') - - # Get region labels from layout header - regions = [label for label in next(layout_reader) if label and not label.isspace()] - - # Generate output header: appropriately spaced region labels - header = [''] + [val for label in regions for val in [label, '']] - writer.writerow(header) - - # Iterate layout rows - for row in layout_reader: - - # First entry in row is a hart ID or a Python expression - # which generates a list of hart IDs - expr = row[0] - code = compile(expr, "", "eval") - tids = eval(code) - if isinstance(tids, int): - tids = [tids] - - # Iterate hart IDs - for tid in tids: - - # Start output row with hart ID - orow = [tid] - - # Iterate all other cells in layout row (indices of regions to take) - for cell in row[1:]: - - # If the cell is not empty, get start and end times - # of the region from the input CSV and append them to the - # output row. Otherwise, leave cells empty. - if cell and not cell.isspace(): - reg_idx = int(cell) - row_idx = tid - col_idx = 1 + reg_idx * 2 - assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds' - assert (col_idx + 1) < df.shape[1], \ - f'Region index {reg_idx} out of bounds' - orow.append(int(df.iat[row_idx, col_idx])) - orow.append(int(df.iat[row_idx, col_idx + 1])) - else: - orow.append('') - orow.append('') - - writer.writerow(orow) + # Output CSV data + data = [] + columns = [] + + # Open layout CSV + with open(args.layout) as layout_f: + layout_reader = csv.reader(layout_f, delimiter=',') + + # Get region labels from layout header + regions = [label for label in next(layout_reader) if label and not label.isspace()] + + # Generate output columns: appropriately spaced region labels + columns = ['hartid'] + [val for label in regions for val in [label, '']] + + # Iterate layout rows + for row in layout_reader: + + # First entry in row is a hart ID or a Python expression + # which generates a list of hart IDs + expr = row[0] + code = compile(expr, "", "eval") + tids = eval(code, {}, {'num_clusters': args.num_clusters}) + if type(tids) == int: + tids = [tids] + + # Iterate hart IDs + for tid in tids: + + # Start output row with hart ID + orow = [tid] + + # Iterate all other cells in layout row (indices of regions to take) + for cell in row[1:]: + + # If the cell is not empty, get start and end times + # of the region from the input CSV and append them to the + # output row. Otherwise, leave cells empty. + if cell and not cell.isspace(): + reg_idx = int(cell) + row_idx = tid + col_idx = 1 + reg_idx * 2 + assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds' + assert (col_idx + 1) < df.shape[1],\ + f'Region index {reg_idx} out of bounds for hart {tid}' + assert not isnan(df.iat[row_idx, col_idx]),\ + (f'Region {reg_idx} looks empty for hart {tid},' + f'check whether it was simulated') + orow.append(int(df.iat[row_idx, col_idx])) + orow.append(int(df.iat[row_idx, col_idx + 1])) + else: + orow.append('') + orow.append('') + + data.append(orow) + + # Create output dataframe and write to CSV + df = pd.DataFrame(data, columns=columns) + df.set_index('hartid', inplace=True) + df.sort_index(axis='index', inplace=True) + df.index.name = None + df.to_csv(args.output) if __name__ == '__main__': diff --git a/util/trace/tracevis.py b/util/trace/tracevis.py index ecc344253..599c82bd6 100755 --- a/util/trace/tracevis.py +++ b/util/trace/tracevis.py @@ -12,12 +12,13 @@ # This script is inspired by https://github.com/SalvatoreDiGirolamo/tracevis # Author: Noah Huetter # Samuel Riedel +# Luca Colagrande import re -import os import sys -from functools import lru_cache +import json import argparse +from a2l import Elf has_progressbar = True try: @@ -31,13 +32,16 @@ # line format: # Snitch RTL simulation: # 101000 82 M 0x00001000 csrr a0, mhartid #; comment -# time cycle priv_lvl pc insn +# CVA6 RTL simulation: +# 101ns 82 M 0000000000001000 0 301022f3 csrr t0, misa ... +# time cycle priv_lvl pc branch machine_insn insn # MemPool RTL simulation: # 101000 82 0x00001000 csrr a0, mhartid #; comment # time cycle pc insn # Banshee traces: # 00000432 00000206 0005 800101e0 x15:00000064 x15=00000065 # addi a5, a5, 1 # cycle instret hard_id pc register insn +FORMATS = ['cva6', 'snitch', 'banshee'] # regex matches to groups # 0 -> time @@ -45,10 +49,11 @@ # 2 -> privilege level (RTL) / hartid (banshee) # 3 -> pc (hex with 0x prefix) # 4 -> instruction -# 5 -> args (RTL) / empty (banshee) -# 6 -> comment (RTL) / instruction arguments (banshee) -RTL_REGEX = r' *(\d+) +(\d+) +([3M1S0U]?) *(0x[0-9a-f]+) ([.\w]+) +(.+)#; (.*)' -BANSHEE_REGEX = r' *(\d+) (\d+) (\d+) ([0-9a-f]+) *.+ +.+# ([\w\.]*)( +)(.*)' +# 5 -> args (RTL) / empty (cva6, banshee) +# 6 -> comment (RTL) / instruction arguments (banshee) / empty (cva6) +REGEX = {'snitch': r' *(\d+) +(\d+) +([3M1S0U]?) *(0x[0-9a-f]+) ([.\w]+) +(.+)#; (.*)', + 'cva6': r' *(\d+)ns +(\d+) +([3M1S0U]?) *([0-9a-f]+) +[01]+ +[0-9a-f]+ +([.\w]+)', + 'banshee': r' *(\d+) (\d+) (\d+) ([0-9a-f]+) *.+ +.+# ([\w\.]*)( +)(.*)'} # regex matches a line of instruction retired by the accelerator # 0 -> time @@ -57,29 +62,20 @@ # 3 -> comment ACC_LINE_REGEX = r' *(\d+) +(\d+) +([3M1S0U]?) *#; (.*)' -buf = [] +# Parses the output of the `parse_line()` function into a TraceViewer +# event, formatted as a dictionary. It operates on multiple of these +# outputs, collected in a buffer `buf`. +def flush(lah, buf, **kwargs): + elf = kwargs['elf'] + fmt = kwargs['fmt'] + use_time = kwargs['use_time'] + collapse_call_stack = kwargs['collapse_call_stack'] -@lru_cache(maxsize=1024) -def addr2line_cache(addr): - cmd = f'{addr2line} -e {elf} -f -a -i {addr:x}' - return os.popen(cmd).read().split('\n') - - -def flush(buf, hartid): - global output_file - # get function names - pcs = [x[3] for x in buf] - a2ls = [] - - if cache: - for addr in pcs: - a2ls += addr2line_cache(int(addr, base=16))[:-1] - else: - a2ls = os.popen( - f'{addr2line} -e {elf} -f -a -i {" ".join(pcs)}').read().split('\n')[:-1] - + # Iterate buffer entries + events = [] for i in range(len(buf)-1): + (time, cyc, priv, pc, instr, args, cmt) = buf.pop(0) if use_time: @@ -91,158 +87,86 @@ def flush(buf, hartid): # Have lookahead time to this instruction? next_time = lah[time] if time in lah else next_time + duration = next_time - time - # print(f'time "{time}", cyc "{cyc}", priv "{priv}", pc "{pc}"' - # f', instr "{instr}", args "{args}"', file=sys.stderr) - - [pc, func, file] = a2ls.pop(0), a2ls.pop(0), a2ls.pop(0) - - # check for more output of a2l - inlined = '' - while not a2ls[0].startswith('0x'): - inlined += '(inlined by) ' + a2ls.pop(0) - # print(f'pc "{pc}", func "{func}", file "{file}"') + # Get information on current instruction from addr2line + a2l_info = elf.addr2line(pc) - # assemble values for json + # Assemble TraceViewer event # Doc: https://docs.google.com/document/d/ - # 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + # 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + event = {} # The name of the event, as displayed in Trace Viewer - name = instr + event['name'] = instr + # The event type, 'X' indicates a "complete event" + event['ph'] = 'X' # The event categories. This is a comma separated list of categories for the event. # The categories can be used to hide events in the Trace Viewer UI. - cat = 'instr' - # The tracing clock timestamp of the event. - # The timestamps are provided at microsecond granularity. - ts = time - # There is an extra parameter dur to specify the tracing clock duration - # of complete events in microseconds. - duration = next_time - time - - if banshee: + event['cat'] = 'instr' + # The tracing clock timestamp of the event. The timestamps are provided at microsecond + # granularity. + if use_time: + time = time / 1000 if fmt == 'cva6' else time / 1000000 + event['ts'] = time + # There is an extra parameter dur to specify the tracing clock duration of complete + # events in microseconds. In Banshee, each instruction takes one cycle + if use_time: + duration = duration / 1000 if fmt == 'cva6' else duration / 1000000 + event['dur'] = 1 if fmt == 'banshee' else duration + # The thread ID is used to group events in a single TraceViewer row + if not collapse_call_stack: + event['tid'] = a2l_info.function_stack[0]['func'] + if fmt == 'banshee': # Banshee stores all traces in a single file - hartid = priv - # In Banshee, each instruction takes one cycle - duration = 1 - - pid = elf+':hartid'+str(hartid) - funcname = func - - # args - arg_pc = pc - arg_instr = instr - arg_args = args - arg_cycles = cyc - arg_coords = file - arg_inlined = inlined - - output_file.write(( - f'{{"name": "{name}", "cat": "{cat}", "ph": "X", ' - f'"ts": {ts}, "dur": {duration}, "pid": "{pid}", ' - f'"tid": "{funcname}", "args": {{"pc": "{arg_pc}", ' - f'"instr": "{arg_instr} {arg_args}", "time": "{arg_cycles}", ' - f'"Origin": "{arg_coords}", "inline": "{arg_inlined}"' - f'}}}},\n')) - - -def parse_line(line, hartid): - global last_time, last_cyc + event['tid'] = priv + # Additional event args + event['args'] = {} + event['args']['pc'] = pc + event['args']['instr'] = f'{instr} {args}' + if cmt: + event['args']['comment'] = cmt + event['args']['cycle'] = cyc + event['args']['stack'] = a2l_info.function_stack_string(short=True) + event['args']['line'] = a2l_info.line() + + events.append(event) + return events + + +# Parses a trace line and returns an array of values extracted from the line +def parse_line(line, **kwargs): + fmt = kwargs['fmt'] + + # Compile regex + re_line = re.compile(REGEX[fmt]) + # print(line) match = re_line.match(line) if match: - (time, cyc, priv, pc, instr, args, cmt) = tuple( - [match.group(i+1).strip() for i in range(re_line.groups)]) - buf.append((time, cyc, priv, pc, instr, args, cmt)) - last_time, last_cyc = time, cyc - - if len(buf) > 10: - flush(buf, hartid) - return 0 - - -# Argument parsing -parser = argparse.ArgumentParser('tracevis', allow_abbrev=True) -parser.add_argument( - 'elf', - metavar='', - help='The binary executed to generate the traces', - - -) -parser.add_argument( - 'traces', - metavar='', - nargs='+', - help='Snitch traces to visualize') -parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='chrome.json', - help='Output JSON file') -parser.add_argument( - '--addr2line', - metavar='', - nargs='?', - default='addr2line', - help='`addr2line` binary to use for parsing') -parser.add_argument( - '-t', - '--time', - action='store_true', - help='Use the traces time instead of cycles') -parser.add_argument( - '-b', - '--banshee', - action='store_true', - help='Parse Banshee traces') -parser.add_argument( - '--no-cache', - action='store_true', - help='Disable addr2line caching (slow but might give better traces in some cases)') -parser.add_argument( - '-s', - '--start', - metavar='', - nargs='?', - type=int, - default=0, - help='First line to parse') -parser.add_argument( - '-e', - '--end', - metavar='', - nargs='?', - type=int, - default=-1, - help='Last line to parse') - -args = parser.parse_args() - -elf = args.elf -traces = args.traces -output = args.output -use_time = args.time -banshee = args.banshee -addr2line = args.addr2line -cache = not args.no_cache - -print('elf:', elf, file=sys.stderr) -print('traces:', traces, file=sys.stderr) -print('output:', output, file=sys.stderr) -print('addr2line:', addr2line, file=sys.stderr) -print('cache:', cache, file=sys.stderr) - -# Compile regex -if banshee: - re_line = re.compile(BANSHEE_REGEX) -else: - re_line = re.compile(RTL_REGEX) - -re_acc_line = re.compile(ACC_LINE_REGEX) - - -def offload_lookahead(lines): + # TODO extend CVA6 regex to extract instruction args + if fmt == 'cva6': + (time, cyc, priv, pc, instr) = tuple( + [match.group(i+1).strip() for i in range(re_line.groups)]) + args = cmt = '' + else: + (time, cyc, priv, pc, instr, args, cmt) = tuple( + [match.group(i+1).strip() for i in range(re_line.groups)]) + return (time, cyc, priv, pc, instr, args, cmt) + + return None + + +# Parses a trace file and returns a dictionary mapping the time stamp +# when every instruction is issued, to the time stamp when the instruction +# writes back. +def offload_lookahead(lines, **kwargs): + fmt = kwargs['fmt'] + use_time = kwargs['use_time'] + + # Compile regex + re_line = re.compile(REGEX[fmt]) + re_acc_line = re.compile(ACC_LINE_REGEX) + # dict mapping time stamp of retired instruction to time stamp of # accelerator complete lah = {} @@ -287,40 +211,177 @@ def offload_lookahead(lines): return lah -lah = {} - -with open(output, 'w') as output_file: - # JSON header - output_file.write('{"traceEvents": [\n') - - for filename in traces: - hartid = 0 - parsed_nums = re.findall(r'\d+', filename) - hartid = int(parsed_nums[-1]) if len(parsed_nums) else hartid+1 - fails = lines = 0 - last_time = last_cyc = 0 - - print( - f'parsing hartid {hartid} with trace {filename}', file=sys.stderr) - tot_lines = len(open(filename).readlines()) - with open(filename) as f: - all_lines = f.readlines()[args.start:args.end] - # offload lookahead - if not banshee: - lah = offload_lookahead(all_lines) - if has_progressbar: - for lino, line in progressbar.progressbar( - enumerate(all_lines), - max_value=tot_lines): - fails += parse_line(line, hartid) - lines += 1 +# Parses a trace file and returns a list of TraceViewer events. +# Each event is formatted as a dictionary. +def parse_trace(filename, **kwargs): + + start = kwargs['start'] + end = kwargs['end'] + fmt = kwargs['fmt'] + + # Open trace + print(f'parsing trace {filename}', file=sys.stderr) + lah = {} + buf = [] + fails = lines = 0 + with open(filename) as f: + + # Read lines + all_lines = f.readlines() + if end < 0: + end = len(all_lines) + end + 1 + all_lines = all_lines[start:end] + + # offload lookahead + if fmt == 'snitch': + lah = offload_lookahead(all_lines, **kwargs) + + # Use a progress bar iterator if the package is installed + if has_progressbar: + iterations = progressbar.progressbar( + enumerate(all_lines), + max_value=len(all_lines)) + else: + iterations = enumerate(all_lines) + + # Iterate lines + events = [] + for lino, line in iterations: + # Parse line + parsed_line = parse_line(line, **kwargs) + if parsed_line: + buf.append(parsed_line) + else: + fails += 1 + lines += 1 + + # Flush buffer when it contains enough lines + if len(buf) > 10: + events += flush(lah, buf, **kwargs) + events += flush(lah, buf, **kwargs) + + print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr) + return events + + +def parse_traces(traces, **kwargs): + + # Open ELF file + elf_path = kwargs['elf'] + kwargs['elf'] = Elf(elf_path, a2l_binary=kwargs['addr2line']) + + # Iterate traces + events = [] + for i, filename in enumerate(traces): + + # Extract hartid from filename or use current index + # TODO doesn't work with hex numbers + # parsed_nums = re.findall(r'\d+', filename) + # hartid = int(parsed_nums[-1]) if len(parsed_nums) else i + hartid = i + + # Extract TraceViewer events from trace + trace_events = parse_trace(filename, **kwargs) + + # Assign a per-trace unique TID or PID to all events + pid = elf_path if 'pid' not in kwargs else kwargs['pid'] + for event in trace_events: + if kwargs['collapse_call_stack']: + event['pid'] = pid + event['tid'] = hartid else: - for lino, line in enumerate( - all_lines): - fails += parse_line(line, hartid) - lines += 1 - flush(buf, hartid) - print(f' parsed {lines-fails} of {lines} lines', file=sys.stderr) - - # JSON footer - output_file.write(r'{}]}''\n') + event['pid'] = pid+':hartid'+str(hartid) + + # Add to events from previous traces + events += trace_events + + return events + + +def main(**kwargs): + elf = kwargs['elf'] + traces = kwargs['traces'] + output = kwargs['output'] + addr2line = kwargs['addr2line'] + + print('elf:', elf, file=sys.stderr) + print('traces:', traces, file=sys.stderr) + print('output:', output, file=sys.stderr) + print('addr2line:', addr2line, file=sys.stderr) + + # Parse traces and create TraceViewer JSON object + events = parse_traces(**kwargs) + tvobj = {'traceEvents': events, 'displayTimeUnit': 'ns'} + + # Dump JSON object to file + with open(output, 'w') as output_file: + json.dump(tvobj, output_file, indent=4) + + +# Parse command-line args +def parse_args(): + # Argument parsing + parser = argparse.ArgumentParser('tracevis', allow_abbrev=True) + parser.add_argument( + 'elf', + metavar='', + help='The binary executed to generate the traces', + ) + parser.add_argument( + 'traces', + metavar='', + nargs='+', + help='Traces to visualize') + parser.add_argument( + '-o', + '--output', + metavar='', + nargs='?', + default='chrome.json', + help='Output JSON file') + parser.add_argument( + '--addr2line', + metavar='', + nargs='?', + default='addr2line', + help='`addr2line` binary to use for parsing') + parser.add_argument( + '-t', + '--time', + dest='use_time', + action='store_true', + help='Use the traces time instead of cycles') + parser.add_argument( + '-f', + '--format', + dest='fmt', + type=str, + default='snitch', + choices=FORMATS, + help='Trace format') + parser.add_argument( + '--collapse-call-stack', + action='store_true', + help='Visualize all instructions of a core in a single TraceViewer thread') + parser.add_argument( + '-s', + '--start', + metavar='', + nargs='?', + type=int, + default=0, + help='First line to parse') + parser.add_argument( + '-e', + '--end', + metavar='', + nargs='?', + type=int, + default=-1, + help='Last line to parse (inclusive)') + return parser.parse_args() + + +if __name__ == '__main__': + args = vars(parse_args()) + main(**args) From 675bff7d666f83b5c8a03b34211baa5e7133455f Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Sat, 19 Aug 2023 16:01:20 +0200 Subject: [PATCH 03/13] docs: Make agnostic of repo to reuse snippets in occamy --- docs/publications.md | 4 ++++ docs/ug/getting_started.md | 17 ++++++++++++----- mkdocs.yml | 1 + 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/docs/publications.md b/docs/publications.md index dd0ebd23f..6f14daa64 100644 --- a/docs/publications.md +++ b/docs/publications.md @@ -2,6 +2,8 @@ If you use the Snitch cluster or its extensions in your work, you can cite us: + +
Snitch: A tiny Pseudo Dual-Issue Processor for Area and Energy Efficient Execution of Floating-Point Intensive Workloads

@@ -95,3 +97,5 @@ If you use the Snitch cluster or its extensions in your work, you can cite us: ```

+ + diff --git a/docs/ug/getting_started.md b/docs/ug/getting_started.md index de2487ae5..82f60ff70 100644 --- a/docs/ug/getting_started.md +++ b/docs/ug/getting_started.md @@ -1,10 +1,12 @@ + + # Getting Started ## Installation Clone the repository: ```shell -git clone https://github.com/pulp-platform/snitch_cluster.git --recurse-submodules +git clone https://github.com/pulp-platform/{{ repo }}.git --recurse-submodules ``` If you had already cloned the repository without the `--recurse-submodules` flag, clone its submodules: @@ -18,15 +20,17 @@ This repository requires several tools to be installed on your machine. Some of Note that installing all tools, in appropriate versions, may be non-trivial. For this purpose, we provide a Docker container with all free tools installed. -The [following section](#docker-container) provides instructions to install the Docker container. +The [following section](https://pulp-platform.github.io/{{ repo }}/ug/getting_started.html#docker-container) provides instructions to install the Docker container. -Users with access to ETH Zurich IIS machines can find all tools already installed on these machines. To complete the setup, skip to the [IIS environment setup](#iis-environment-setup) section. +Users with access to ETH Zurich IIS machines can find all tools already installed on these machines. To complete the setup, skip to the [IIS environment setup](https://pulp-platform.github.io/{{ repo }}/ug/getting_started.html#iis-environment-setup) section. -If you do choose to setup a custom development environment on your own machine, we strongly recommend you take example from our [Docker file](https://github.com/pulp-platform/snitch_cluster/blob/{{ branch }}/util/container/README.md). +If you do choose to setup a custom development environment on your own machine, we strongly recommend you take example from our [Docker file](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/util/container/README.md). ## Docker container -The following instructions are extracted from the Docker container [README.md](https://github.com/pulp-platform/snitch_cluster/blob/{{ branch }}/util/container/README.md). For additional information on the Docker container refer to that document. + + +The following instructions are extracted from the Docker container [README.md](https://github.com/pulp-platform/{{ repo }}/blob/{{ branch }}/util/container/README.md). For additional information on the Docker container refer to that document. ### Installation @@ -38,6 +42,8 @@ The following instructions are extracted from the Docker container [README.md](h heading-offset=1 %} + + ## IIS environment setup To make sure the right versions of each tool are picked up, set the following environment variables, e.g. in a bash shell: @@ -74,3 +80,4 @@ Install the required packages in the currently active virtual environment: ```shell pip install -r python-requirements.txt ``` + diff --git a/mkdocs.yml b/mkdocs.yml index b817649fd..3f9595b0a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -26,6 +26,7 @@ plugins: on_error_fail: true use_directory_urls: false extra: + repo: snitch_cluster branch: main nav: - Home: index.md From b5d7fbd9adf4f030383b0d0acf57ccc71e4495d3 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 25 Aug 2023 11:28:25 +0200 Subject: [PATCH 04/13] snRuntime: Various improvements from offload study --- sw/snRuntime/api/cluster_interrupt_decls.h | 2 + sw/snRuntime/src/cluster_interrupts.h | 27 +++++++++++- sw/snRuntime/src/riscv.h | 2 + sw/snRuntime/src/start.c | 44 +++++++------------ sw/snRuntime/src/team.h | 5 +++ target/common/common.mk | 18 +++++--- target/snitch_cluster/Makefile | 8 ++-- .../sw/runtime/common/snitch_cluster_memory.c | 2 + 8 files changed, 69 insertions(+), 39 deletions(-) diff --git a/sw/snRuntime/api/cluster_interrupt_decls.h b/sw/snRuntime/api/cluster_interrupt_decls.h index aa18db210..00a6f0a43 100644 --- a/sw/snRuntime/api/cluster_interrupt_decls.h +++ b/sw/snRuntime/api/cluster_interrupt_decls.h @@ -6,6 +6,8 @@ inline void snrt_int_cluster_set(uint32_t mask); inline void snrt_int_cluster_clr(uint32_t mask); +inline void snrt_int_clr_mcip_unsafe(); + inline void snrt_int_clr_mcip(); inline void snrt_int_set_mcip(); diff --git a/sw/snRuntime/src/cluster_interrupts.h b/sw/snRuntime/src/cluster_interrupts.h index a3b15e8e0..ee2a36f87 100644 --- a/sw/snRuntime/src/cluster_interrupts.h +++ b/sw/snRuntime/src/cluster_interrupts.h @@ -2,6 +2,8 @@ // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 +#include "../../deps/riscv-opcodes/encoding.h" + /** * @brief Write mask to the cluster-local interrupt set register * @param mask set bit at X sets the interrupt of hart X @@ -18,10 +20,33 @@ inline void snrt_int_cluster_clr(uint32_t mask) { *(snrt_cluster_clint_clr_ptr()) = mask; } -inline void snrt_int_clr_mcip() { +/** + * @brief Clear MCIP interrupt + * @detail The interrupt is cleared asynchronously, i.e. it may not be cleared + * yet when the function returns. Use `snrt_int_clr_mcip()` or + * `snrt_int_wait_mcip_clr` if you need to block until the interrupt is + * cleared. + */ +inline void snrt_int_clr_mcip_unsafe() { snrt_int_cluster_clr(1 << snrt_cluster_core_idx()); } +/** + * @brief Wait for MCIP interrupt to be cleared + */ +inline void snrt_int_wait_mcip_clr() { + while (read_csr(mip) & MIP_MCIP) + ; +} + +/** + * @brief Clear MCIP interrupt and wait for the write to have effect + */ +inline void snrt_int_clr_mcip() { + snrt_int_clr_mcip_unsafe(); + snrt_int_wait_mcip_clr(); +} + inline void snrt_int_set_mcip() { snrt_int_cluster_set(1 << snrt_cluster_core_idx()); } diff --git a/sw/snRuntime/src/riscv.h b/sw/snRuntime/src/riscv.h index 47542daa3..faaf888b8 100644 --- a/sw/snRuntime/src/riscv.h +++ b/sw/snRuntime/src/riscv.h @@ -10,6 +10,8 @@ */ static inline void snrt_wfi() { asm volatile("wfi"); } +static inline void snrt_nop() { asm volatile("nop" : : :); } + static inline uint32_t snrt_mcycle() { uint32_t register r; asm volatile("csrr %0, mcycle" : "=r"(r) : : "memory"); diff --git a/sw/snRuntime/src/start.c b/sw/snRuntime/src/start.c index 8a692e921..3fb338f4a 100644 --- a/sw/snRuntime/src/start.c +++ b/sw/snRuntime/src/start.c @@ -2,13 +2,6 @@ // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 -static inline void snrt_crt0_cluster_hw_barrier() { - uint32_t register r; - uint32_t hw_barrier = - SNRT_CLUSTER_HW_BARRIER_ADDR + snrt_cluster_idx() * SNRT_CLUSTER_OFFSET; - asm volatile("lw %0, 0(%1)" : "=r"(r) : "r"(hw_barrier) : "memory"); -} - #ifdef SNRT_INIT_CLS static inline uint32_t snrt_cls_base_addr() { extern volatile uint32_t __cdata_start, __cdata_end; @@ -52,11 +45,9 @@ static inline void snrt_init_bss() { // Only one core needs to perform the initialization if (snrt_cluster_idx() == 0 && snrt_is_dm_core()) { - volatile uint32_t* p; - - for (p = (uint32_t*)(&__bss_start); p < (uint32_t*)(&__bss_end); p++) { - *p = 0; - } + size_t size = (size_t)(&__bss_end) - (size_t)(&__bss_start); + snrt_dma_start_1d_wideptr((uint64_t)(&__bss_start), + (uint64_t)(snrt_zero_memory_ptr()), size); } } #endif @@ -70,22 +61,17 @@ static inline void snrt_init_cls() { // Only one core per cluster has to do this if (snrt_is_dm_core()) { - volatile uint32_t* p; - volatile uint32_t* cls_ptr = (volatile uint32_t*)snrt_cls_base_addr(); + void* ptr = (void*)snrt_cls_base_addr(); + size_t size; // Copy cdata section to base of the TCDM - for (p = (uint32_t*)(&__cdata_start); p < (uint32_t*)(&__cdata_end); - p++) { - *cls_ptr = *p; - cls_ptr++; - } + size = (size_t)(&__cdata_end) - (size_t)(&__cdata_start); + if (size > 0) snrt_dma_start_1d(ptr, (void*)(&__cdata_start), size); // Clear cbss section - for (p = (uint32_t*)(&__cbss_start); p < (uint32_t*)(&__cbss_end); - p++) { - *cls_ptr = 0; - cls_ptr++; - } + ptr = (void*)((uint32_t)ptr + size); + size = (size_t)(&__cbss_end) - (size_t)(&__cbss_start); + snrt_dma_start_1d(ptr, (void*)(snrt_zero_memory_ptr()), size); } } #endif @@ -105,7 +91,6 @@ void snrt_main() { int exit_code = 0; #ifdef SNRT_CRT0_CALLBACK0 - snrt_crt0_callback0(); #endif @@ -129,6 +114,11 @@ void snrt_main() { snrt_init_cls(); #endif +#if defined(SNRT_INIT_BSS) || defined(SNRT_INIT_CLS) + // Single DMA wait call for both snrt_init_bss() and snrt_init_cls() + if (snrt_is_dm_core()) snrt_dma_wait_all(); +#endif + #ifdef SNRT_CRT0_CALLBACK3 snrt_crt0_callback3(); #endif @@ -142,7 +132,7 @@ void snrt_main() { #endif #ifdef SNRT_CRT0_PRE_BARRIER - snrt_crt0_cluster_hw_barrier(); + snrt_cluster_hw_barrier(); #endif #ifdef SNRT_CRT0_CALLBACK5 @@ -159,7 +149,7 @@ void snrt_main() { #endif #ifdef SNRT_CRT0_POST_BARRIER - snrt_crt0_cluster_hw_barrier(); + snrt_cluster_hw_barrier(); #endif #ifdef SNRT_CRT0_CALLBACK7 diff --git a/sw/snRuntime/src/team.h b/sw/snRuntime/src/team.h index 54a3b0aa2..918037e64 100644 --- a/sw/snRuntime/src/team.h +++ b/sw/snRuntime/src/team.h @@ -28,6 +28,11 @@ inline uint32_t __attribute__((const)) snrt_global_core_idx() { return snrt_hartid() - snrt_global_core_base_hartid(); } +inline uint32_t __attribute__((const)) snrt_global_compute_core_idx() { + return snrt_cluster_idx() * snrt_cluster_compute_core_num() + + snrt_cluster_core_idx(); +} + inline uint32_t __attribute__((const)) snrt_cluster_idx() { return snrt_global_core_idx() / snrt_cluster_core_num(); } diff --git a/target/common/common.mk b/target/common/common.mk index 3535fb156..9c469f5a6 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -6,15 +6,19 @@ LOGS_DIR ?= logs TB_DIR ?= $(SNITCH_ROOT)/target/common/test UTIL_DIR ?= $(SNITCH_ROOT)/util -# Support for local override +# External executables BENDER ?= bender DASM ?= spike-dasm VLT ?= verilator VERIBLE_FMT ?= verible-verilog-format +CLANG_FORMAT ?= clang-format + +# Internal executables BIN2JTAG ?= $(UTIL_DIR)/bin2jtag.py -ANNOTATE ?= $(UTIL_DIR)/trace/annotate.py GENTRACE ?= $(UTIL_DIR)/trace/gen_trace.py -CLANG_FORMAT ?= clang-format +ANNOTATE_PY ?= $(UTIL_DIR)/trace/annotate.py +EVENTS_PY ?= $(UTIL_DIR)/trace/events.py +PERF_CSV_PY ?= $(UTIL_DIR)/trace/perf_csv.py VERILATOR_ROOT ?= $(dir $(shell which $(VLT)))/../share/verilator VLT_ROOT ?= ${VERILATOR_ROOT} @@ -194,10 +198,10 @@ traces: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\. # make annotate # Generate source-code interleaved traces for all harts. Reads the binary from # the logs/.rtlbinary file that is written at start of simulation in the vsim script -$(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE} - $(PYTHON) ${ANNOTATE} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -$(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE} - $(PYTHON) ${ANNOTATE} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d +$(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} + $(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< +$(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} + $(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d BINARY ?= $(shell cat $(LOGS_DIR)/.rtlbinary) annotate: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\.s/') || echo "") \ $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\.diff/') || echo "") diff --git a/target/snitch_cluster/Makefile b/target/snitch_cluster/Makefile index 9b346ba1a..f464697d1 100644 --- a/target/snitch_cluster/Makefile +++ b/target/snitch_cluster/Makefile @@ -266,12 +266,12 @@ bin/snitch_cluster.vcs: ${VCS_SOURCES} ${TB_SRCS} $(TB_CC_SOURCES) $(TB_ASM_SOUR ########## $(LOGS_DIR)/perf.csv: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/trace_hart/hart/' | sed 's/.dasm/_perf.json/')) \ - $(ROOT)/util/trace/perf_csv.py - $(PYTHON) $(ROOT)/util/trace/perf_csv.py -o $@ -i $(LOGS_DIR)/hart_*_perf.json + $(PERF_CSV_PY) + $(PYTHON) $(PERF_CSV_PY) -o $@ -i $(LOGS_DIR)/hart_*_perf.json $(LOGS_DIR)/event.csv: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/trace_hart/hart/' | sed 's/.dasm/_perf.json/')) \ - $(ROOT)/util/trace/perf_csv.py - $(PYTHON) $(ROOT)/util/trace/perf_csv.py -o $@ -i $(LOGS_DIR)/hart_*_perf.json --filter tstart tend + $(PERF_CSV_PY) + $(PYTHON) $(PERF_CSV_PY) -o $@ -i $(LOGS_DIR)/hart_*_perf.json --filter tstart tend ######## # Util # diff --git a/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c b/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c index f76f16508..48c08faa3 100644 --- a/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c +++ b/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c @@ -11,3 +11,5 @@ extern volatile uint32_t* snrt_cluster_clint_set_ptr(); extern volatile uint32_t* snrt_cluster_clint_clr_ptr(); extern uint32_t snrt_cluster_hw_barrier_addr(); + +extern volatile uint32_t* snrt_zero_memory_ptr(); From 84fc3d6f5df43096e46883b6a2b127873326be92 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Mon, 28 Aug 2023 11:00:19 +0200 Subject: [PATCH 05/13] util/sim: Extend simulation utilities for Occamy --- .github/workflows/ci.yml | 6 +- .gitlab-ci.yml | 12 +- sw/blas/axpy/verify.py | 6 +- sw/blas/gemm/Makefile | 10 +- .../{interrupt-local.c => interrupt_local.c} | 0 target/snitch_cluster/sw/apps/run.py | 21 --- target/snitch_cluster/sw/apps/run.yaml | 17 -- target/snitch_cluster/sw/run.yaml | 85 ++++++++++ target/snitch_cluster/sw/tests/Makefile | 2 +- target/snitch_cluster/sw/tests/run.py | 21 --- target/snitch_cluster/sw/tests/run.yaml | 71 --------- util/sim/elf.py | 7 +- util/sim/list_apps.py | 13 +- util/sim/{sim_utils.py => simulate.py} | 145 ++++++++++-------- util/sim/verification.py | 9 +- 15 files changed, 209 insertions(+), 216 deletions(-) rename sw/tests/{interrupt-local.c => interrupt_local.c} (100%) delete mode 100755 target/snitch_cluster/sw/apps/run.py delete mode 100644 target/snitch_cluster/sw/apps/run.yaml create mode 100644 target/snitch_cluster/sw/run.yaml delete mode 100755 target/snitch_cluster/sw/tests/run.py delete mode 100644 target/snitch_cluster/sw/tests/run.yaml rename util/sim/{sim_utils.py => simulate.py} (53%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71fdc8eaa..84faeac81 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,8 +43,7 @@ jobs: - name: Run Tests working-directory: target/snitch_cluster run: |- - ./sw/tests/run.py sw/tests/run.yaml --simulator verilator - ./sw/apps/run.py sw/apps/run.yaml --simulator verilator + ../../util/sim/simulate.py sw/run.yaml --simulator verilator ############################################ # Build SW on Snitch Cluster w/ Banshee # @@ -67,5 +66,4 @@ jobs: SNITCH_LOG: info working-directory: target/snitch_cluster run: |- - ./sw/tests/run.py sw/tests/run.yaml --simulator banshee - ./sw/apps/run.py sw/apps/run.yaml --simulator banshee + ../../util/sim/simulate.py sw/run.yaml --simulator banshee diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 811c82856..18adcf22e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -93,8 +93,7 @@ snitch-cluster-vlt: script: - cd target/snitch_cluster - $VERILATOR make bin/snitch_cluster.vlt - - $VERILATOR ./sw/tests/run.py sw/tests/run.yaml --simulator verilator - - $VERILATOR ./sw/apps/run.py sw/apps/run.yaml --simulator verilator + - $VERILATOR ../../util/sim/simulate.py sw/run.yaml --simulator verilator # yamllint enable rule:line-length # VCS @@ -103,8 +102,7 @@ snitch-cluster-vcs: script: - cd target/snitch_cluster - $VCS make bin/snitch_cluster.vcs - - $VCS ./sw/tests/run.py sw/tests/run.yaml --simulator vcs - - $VCS ./sw/apps/run.py sw/apps/run.yaml --simulator vcs + - $VCS ../../util/sim/simulate.py sw/run.yaml --simulator vcs # Questa snitch-cluster-vsim: @@ -112,8 +110,7 @@ snitch-cluster-vsim: script: - cd target/snitch_cluster - $QUESTA make bin/snitch_cluster.vsim - - $QUESTA ./sw/tests/run.py sw/tests/run.yaml --simulator vsim - - $QUESTA ./sw/apps/run.py sw/apps/run.yaml --simulator vsim + - $QUESTA ../../util/sim/simulate.py sw/run.yaml --simulator vsim # Banshee snitch-cluster-banshee: @@ -129,5 +126,4 @@ snitch-cluster-banshee: - cd banshee - cargo install --debug --path . - cd ../target/snitch_cluster - - ./sw/tests/run.py sw/tests/run.yaml --simulator banshee - - ./sw/apps/run.py sw/apps/run.yaml --simulator banshee + - ../../util/sim/simulate.py sw/run.yaml --simulator banshee diff --git a/sw/blas/axpy/verify.py b/sw/blas/axpy/verify.py index 5838c68df..02cb15975 100755 --- a/sw/blas/axpy/verify.py +++ b/sw/blas/axpy/verify.py @@ -24,12 +24,16 @@ def main(): args = verification.parse_args() raw_results = verification.simulate(sim_bin=args.sim_bin, snitch_bin=args.snitch_bin, + symbols_bin=args.symbols_bin, log=args.log, output_uids=['z']) z_actual = np.array(bytes_to_doubles(raw_results['z'])) # Extract input operands from ELF file - elf = Elf(args.snitch_bin) + if args.symbols_bin: + elf = Elf(args.symbols_bin) + else: + elf = Elf(args.snitch_bin) a = np.array(bytes_to_doubles(elf.get_symbol_contents('a'))) x = np.array(bytes_to_doubles(elf.get_symbol_contents('x'))) y = np.array(bytes_to_doubles(elf.get_symbol_contents('y'))) diff --git a/sw/blas/gemm/Makefile b/sw/blas/gemm/Makefile index c390c3667..604556ed1 100644 --- a/sw/blas/gemm/Makefile +++ b/sw/blas/gemm/Makefile @@ -9,18 +9,20 @@ MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) DATA_DIR := $(realpath $(MK_DIR)/data) SRC_DIR := $(realpath $(MK_DIR)/src) -DATA_CFG ?= $(DATA_DIR)/params.hjson - APP ?= gemm SRCS ?= $(realpath $(SRC_DIR)/main.c) INCDIRS ?= $(DATA_DIR) $(SRC_DIR) -$(DATA_DIR)/data.h: $(DATA_DIR)/datagen.py $(DATA_CFG) +DATA_CFG ?= $(DATA_DIR)/params.hjson +DATAGEN_PY = $(DATA_DIR)/datagen.py +DATA_H = $(DATA_DIR)/data.h + +$(DATA_H): $(DATAGEN_PY) $(DATA_CFG) $< -c $(DATA_CFG) > $@ .PHONY: clean-data clean clean-data: - rm -f $(DATA_DIR)/data.h + rm -f $(DATA_H) clean: clean-data diff --git a/sw/tests/interrupt-local.c b/sw/tests/interrupt_local.c similarity index 100% rename from sw/tests/interrupt-local.c rename to sw/tests/interrupt_local.c diff --git a/target/snitch_cluster/sw/apps/run.py b/target/snitch_cluster/sw/apps/run.py deleted file mode 100755 index 86b9422eb..000000000 --- a/target/snitch_cluster/sw/apps/run.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2023 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# Luca Colagrande - -import sys -from pathlib import Path - -sys.path.append(str(Path(__file__).parent / '../../../../util/sim')) -import sim_utils # noqa: E402,E261 - - -def main(): - sim_utils.main(lambda test: Path(__file__).parent / f'{test}/build/{Path(test).name}.elf') - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/target/snitch_cluster/sw/apps/run.yaml b/target/snitch_cluster/sw/apps/run.yaml deleted file mode 100644 index 93bd32d8b..000000000 --- a/target/snitch_cluster/sw/apps/run.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2023 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 - -runs: - - app: blas/axpy - cmd: ../../sw/blas/axpy/verify.py {sim_bin} {elf} - - app: blas/gemm - - app: dnn/batchnorm - - app: dnn/linear - - app: dnn/maxpool - - app: dnn/gemm -# dnn/gelu # seems like it stalls -# dnn/conv2d # fails with exit code 32 -# dnn/fusedconv # fails newly -# dnn/layernorm # throws illegal instruction on FDIV in simulation -# dnn/softmax # throws illegal instruction on FDIV in simulation diff --git a/target/snitch_cluster/sw/run.yaml b/target/snitch_cluster/sw/run.yaml new file mode 100644 index 000000000..4c7ff7b1e --- /dev/null +++ b/target/snitch_cluster/sw/run.yaml @@ -0,0 +1,85 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +runs: + - elf: tests/build/atomics.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x4 + - elf: tests/build/barrier.elf + - elf: tests/build/data_mover.elf + - elf: tests/build/dma_simple.elf + - elf: tests/build/event_unit.elf + - elf: tests/build/fence_i.elf + - elf: tests/build/fp8_comparison_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with segfault + - elf: tests/build/fp8_comparison_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with segfault + - elf: tests/build/fp8_computation_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with JIT issue + - elf: tests/build/fp8_computation_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x6 + - elf: tests/build/fp8alt_comparison_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with segfault + - elf: tests/build/fp8alt_comparison_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 + - elf: tests/build/fp8alt_computation_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with JIT issue + - elf: tests/build/fp8alt_computation_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x12 + - elf: tests/build/fp16_comparison_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 + - elf: tests/build/fp16_comparison_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 + - elf: tests/build/fp16_computation_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with JIT issue + - elf: tests/build/fp16_computation_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x6 + - elf: tests/build/fp16alt_comparison_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 + - elf: tests/build/fp16alt_comparison_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 + - elf: tests/build/fp16alt_computation_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with JIT issue + - elf: tests/build/fp16alt_computation_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x16 + - elf: tests/build/fp32_comparison_scalar.elf + - elf: tests/build/fp32_comparison_vector.elf + - elf: tests/build/fp32_computation_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x2 + - elf: tests/build/fp32_computation_vector.elf + simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x2 + - elf: tests/build/fp32_conversions_scalar.elf + simulators: [vsim, vcs, verilator] # banshee fails with illegal instruction + - elf: tests/build/fp64_conversions_scalar.elf + simulators: [vsim, vcs, verilator] + # - elf: tests/build/interrupt.elf + - elf: tests/build/interrupt_local.elf + - elf: tests/build/multi_cluster.elf + - elf: tests/build/openmp_parallel.elf + - elf: tests/build/openmp_for_static_schedule.elf + - elf: tests/build/openmp_double_buffering.elf + - elf: tests/build/perf_cnt.elf + - elf: tests/build/printf_simple.elf + - elf: tests/build/printf_fmtint.elf + - elf: tests/build/simple.elf + - elf: tests/build/team_global.elf + - elf: tests/build/tls.elf + - elf: tests/build/varargs_1.elf + - elf: tests/build/varargs_2.elf + - elf: tests/build/zero_mem.elf + - elf: tests/build/non_null_exitcode.elf + exit_code: 14 + - elf: apps/blas/axpy/build/axpy.elf + cmd: ../../sw/blas/axpy/verify.py {sim_bin} {elf} + - elf: apps/blas/gemm/build/gemm.elf + - elf: apps/dnn/batchnorm/build/batchnorm.elf + - elf: apps/dnn/linear/build/linear.elf + - elf: apps/dnn/maxpool/build/maxpool.elf + - elf: apps/dnn/gemm/build/gemm.elf + # - elf: apps/dnn/gelu/build/gelu.elf # seems like it stalls + # - elf: apps/dnn/conv2d/build/conv2d.elf # fails with exit code 32 + # - elf: apps/dnn/fusedconv/build/fusedconv.elf # fails newly + # - elf: apps/dnn/layernorm/build/layernorm.elf + # throws illegal instruction on FDIV in simulation + # - elf: apps/dnn/softmax/build/softmax.elf + # throws illegal instruction on FDIV in simulation diff --git a/target/snitch_cluster/sw/tests/Makefile b/target/snitch_cluster/sw/tests/Makefile index c3b204b55..57b26d9a0 100644 --- a/target/snitch_cluster/sw/tests/Makefile +++ b/target/snitch_cluster/sw/tests/Makefile @@ -44,7 +44,7 @@ RISCV_LDFLAGS += -lsnRuntime # Outputs # ########### -APPS = $(shell $(MK_DIR)/../../../../util/sim/list_apps.py run.yaml) +APPS = $(shell $(MK_DIR)/../../../../util/sim/list_apps.py --in-dir tests/ ../run.yaml) ELFS = $(abspath $(addprefix $(BUILDDIR)/,$(addsuffix .elf,$(APPS)))) DEPS = $(abspath $(addprefix $(BUILDDIR)/,$(addsuffix .d,$(APPS)))) DUMPS = $(abspath $(addprefix $(BUILDDIR)/,$(addsuffix .dump,$(APPS)))) diff --git a/target/snitch_cluster/sw/tests/run.py b/target/snitch_cluster/sw/tests/run.py deleted file mode 100755 index 3fe6a6e51..000000000 --- a/target/snitch_cluster/sw/tests/run.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2023 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# Luca Colagrande - -import sys -from pathlib import Path - -sys.path.append(str(Path(__file__).parent / '../../../../util/sim')) -import sim_utils # noqa: E402,E261 - - -def main(): - sim_utils.main(lambda test: Path(__file__).parent / f'build/{test}.elf') - - -if __name__ == '__main__': - main() diff --git a/target/snitch_cluster/sw/tests/run.yaml b/target/snitch_cluster/sw/tests/run.yaml deleted file mode 100644 index 2f86d1a70..000000000 --- a/target/snitch_cluster/sw/tests/run.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2023 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 - -runs: - - app: atomics - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x4 - - app: barrier - - app: data_mover - - app: dma_simple - - app: event_unit - - app: fence_i - - app: fp8_comparison_scalar - simulators: [vsim, vcs, verilator] # banshee fails with segfault - - app: fp8_comparison_vector - simulators: [vsim, vcs, verilator] # banshee fails with segfault - - app: fp8_computation_scalar - simulators: [vsim, vcs, verilator] # banshee fails with JIT issue - - app: fp8_computation_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x6 - - app: fp8alt_comparison_scalar - simulators: [vsim, vcs, verilator] # banshee fails with segfault - - app: fp8alt_comparison_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 - - app: fp8alt_computation_scalar - simulators: [vsim, vcs, verilator] # banshee fails with JIT issue - - app: fp8alt_computation_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x12 - - app: fp16_comparison_scalar - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 - - app: fp16_comparison_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 - - app: fp16_computation_scalar - simulators: [vsim, vcs, verilator] # banshee fails with JIT issue - - app: fp16_computation_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x6 - - app: fp16alt_comparison_scalar - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 - - app: fp16alt_comparison_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x10 - - app: fp16alt_computation_scalar - simulators: [vsim, vcs, verilator] # banshee fails with JIT issue - - app: fp16alt_computation_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x16 - - app: fp32_comparison_scalar - - app: fp32_comparison_vector - - app: fp32_computation_scalar - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x2 - - app: fp32_computation_vector - simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x2 - - app: fp32_conversions_scalar - simulators: [vsim, vcs, verilator] # banshee fails with illegal instruction - - app: fp64_conversions_scalar - simulators: [vsim, vcs, verilator] - # - app: interrupt - - app: interrupt-local - - app: multi_cluster - - app: openmp_parallel - - app: openmp_for_static_schedule - - app: openmp_double_buffering - - app: perf_cnt - - app: printf_simple - - app: printf_fmtint - - app: simple - - app: team_global - - app: tls - - app: varargs_1 - - app: varargs_2 - - app: zero_mem - - app: non_null_exitcode - exit_code: 14 diff --git a/util/sim/elf.py b/util/sim/elf.py index db1721160..a46a6764d 100644 --- a/util/sim/elf.py +++ b/util/sim/elf.py @@ -1,7 +1,6 @@ -#!/usr/bin/env python3 -# Copyright 2020 ETH Zurich and University of Bologna. -# Solderpad Hardware License, Version 0.51, see LICENSE for details. -# SPDX-License-Identifier: SHL-0.51 +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 # # Luca Colagrande # diff --git a/util/sim/list_apps.py b/util/sim/list_apps.py index 608f8e335..baefdf7eb 100755 --- a/util/sim/list_apps.py +++ b/util/sim/list_apps.py @@ -7,22 +7,29 @@ import argparse import yaml +from pathlib import Path def main(): # Argument parsing parser = argparse.ArgumentParser() + parser.add_argument( + '--in-dir', + type=Path, + help='Only apps below this directory (at any level) will be listed') parser.add_argument( 'input', - help='The YAML file containing run information', - ) + help='The YAML file containing run information') args = parser.parse_args() with open(args.input, 'r') as file: tests = yaml.safe_load(file)['runs'] for test in tests: - print(test['app']) + elf = Path(test['elf']) + match_parts = args.in_dir.parts + if elf.parts[:len(match_parts)] == match_parts: + print(elf.stem) if __name__ == '__main__': diff --git a/util/sim/sim_utils.py b/util/sim/simulate.py similarity index 53% rename from util/sim/sim_utils.py rename to util/sim/simulate.py index bdd01615a..a1466dc16 100755 --- a/util/sim/sim_utils.py +++ b/util/sim/simulate.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # Copyright 2023 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 @@ -18,7 +19,7 @@ BANSHEE_CFG = 'src/banshee.yaml' # Tool settings -SIMULATORS = ['vsim', 'banshee', 'verilator', 'vcs'] +SIMULATORS = ['vsim', 'banshee', 'verilator', 'vcs', 'other'] DEFAULT_SIMULATOR = SIMULATORS[0] SIMULATOR_BINS = { 'vsim': 'bin/snitch_cluster.vsim', @@ -48,6 +49,11 @@ def parse_args(): default=DEFAULT_SIMULATOR, choices=SIMULATORS, help='Choose a simulator to run the test with') + parser.add_argument( + '--sim-bin', + action='store', + nargs='?', + help='Override default path to simulator binary') parser.add_argument( '--dry-run', action='store_true', @@ -75,73 +81,95 @@ def check_exit_code(test, exit_code): return exit_code -def run_test(test, format_elf_path, simulator, dry_run=False): - # Get test parameters - app = test['app'] +def run_simulation(cmd, simulator, test): + # Defaults + result = 1 + + # Spawn simulation subprocess + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, universal_newlines=True) + + # Poll simulation subprocess and log its output + while p.poll() is None: + line = p.stdout.readline() + print(line, end='', flush=True) + + # When simulating with vsim or vcs, we need to parse the simulation + # log to catch the application's return code + if simulator in ['vsim', 'vcs']: + # Capture success + regex_success = r'\[SUCCESS\] Program finished successfully' + match_success = re.search(regex_success, line) + if match_success: + result = 0 + else: + regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)' + match = re.search(regex_fail, line) + if match: + exit_code = match.group(1) + result = check_exit_code(test, exit_code) + + # Check if the subprocess terminated correctly + exit_code = p.poll() + # In Banshee and Verilator the exit code of the Snitch binary is returned + # through the exit code of the simulation command + if simulator in ['banshee', 'verilator']: + result = check_exit_code(test, exit_code) + # For custom commands the return code is that of the command + elif simulator == 'other': + result = exit_code + # For standard simulation commands the simulated Snitch binary exit + # code is overriden only if the simulator failed + else: + if exit_code != 0: + result = exit_code + + return result + + +def run_test(test, args): + # Extract args + simulator = args.simulator + sim_bin = args.sim_bin if args.sim_bin else SIMULATOR_BINS[simulator] + dry_run = args.dry_run + testlist = args.testlist + + # Check if simulator is supported for this test if 'simulators' in test: if simulator not in test['simulators']: return 0 # Construct path to executable - elf = format_elf_path(app) + elf = Path(test['elf']) + if testlist: + elf = Path(testlist).absolute().parent / elf cprint(f'Run test {colored(elf, "cyan")}', attrs=["bold"]) # Construct simulation command (override only supported for RTL) if 'cmd' in test and simulator != 'banshee': cmd = test['cmd'] + cmd = cmd.format(sim_bin=sim_bin, elf=elf, simulator=simulator) + simulator = 'other' else: cmd = SIMULATOR_CMDS[simulator] - cmd = cmd.format(sim_bin=SIMULATOR_BINS[simulator], elf=elf) + cmd = cmd.format(sim_bin=sim_bin, elf=elf) print(f'$ {cmd}', flush=True) - # Run test + # Run simulation result = 0 if not dry_run: - result = 1 - - # When simulating with vsim or vcs, we need to parse the simulation - # log to catch the application's return code - if simulator in ['vsim', 'vcs']: - p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, - text=True) - - while p.poll() is None: - line = p.stdout.readline() - print(line, end='', flush=True) - - # Capture success - regex_success = r'\[SUCCESS\] Program finished successfully' - match_success = re.search(regex_success, line) - if match_success: - result = 0 - else: - regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)' - match = re.search(regex_fail, line) - if match: - exit_code = match.group(1) - result = check_exit_code(test, exit_code) - - # Check if the subprocess terminated correctly - if p.poll() != 0: - result = p.poll() - - else: - p = subprocess.Popen(cmd, shell=True) - p.wait() - exit_code = p.returncode - result = check_exit_code(test, exit_code) + result = run_simulation(cmd, simulator, test) - # Report failure or success - if result != 0: - cprint(f'{app} test failed', 'red', attrs=['bold'], flush=True) - else: - cprint(f'{app} test passed', 'green', attrs=['bold'], flush=True) + # Report failure or success + if result != 0: + cprint(f'{elf} test failed', 'red', attrs=['bold'], flush=True) + else: + cprint(f'{elf} test passed', 'green', attrs=['bold'], flush=True) return result def print_failed_test(test): - print(f'{colored(test["app"], "cyan")} test {colored("failed", "red")}') + print(f'{colored(test["elf"], "cyan")} test {colored("failed", "red")}') def print_test_summary(failed_tests, dry_run=False): @@ -157,28 +185,25 @@ def print_test_summary(failed_tests, dry_run=False): return 0 -def run_tests(testlist, format_elf_path, simulator, dry_run=False, early_exit=False): +def run_tests(args): # Iterate tests - tests = get_tests(testlist) + tests = get_tests(args.testlist) failed_tests = [] for test in tests: # Run test - result = run_test(test, format_elf_path, simulator, dry_run) + result = run_test(test, args) if result != 0: failed_tests.append(test) # End program if requested on first test failure - if early_exit: + if args.early_exit: break + return print_test_summary(failed_tests, args.dry_run) - return print_test_summary(failed_tests, dry_run) - -# format_elf_path: function which constructs the path to an ELF binary -# from a test name as listed in the test list file -def main(format_elf_path): +def main(): args = parse_args() - sys.exit(run_tests(args.testlist, - format_elf_path, - args.simulator, - args.dry_run, - args.early_exit)) + sys.exit(run_tests(args)) + + +if __name__ == '__main__': + main() diff --git a/util/sim/verification.py b/util/sim/verification.py index ed3f0f670..9878ef62a 100644 --- a/util/sim/verification.py +++ b/util/sim/verification.py @@ -24,13 +24,18 @@ def parse_args(): parser.add_argument( 'snitch_bin', help='The Snitch binary to be executed by the simulated Snitch hardware') + parser.add_argument( + '--symbols-bin', + help='An optional binary containing the I/O symbols. By default,' + 'these are searched for in snitch_bin. This argument serves as an' + 'alternative.') parser.add_argument( '--log', help='Redirect simulation output to this log file') return parser.parse_args() -def simulate(sim_bin, snitch_bin, log, output_uids): +def simulate(sim_bin, snitch_bin, log, output_uids, symbols_bin=None): # Open ELF file for processing elf = Elf(snitch_bin) @@ -43,6 +48,8 @@ def simulate(sim_bin, snitch_bin, log, output_uids): sim.poll(tohost, 1, 0) # Read out results from memory + if symbols_bin: + elf = Elf(symbols_bin) raw_outputs = {} for uid in output_uids: address = elf.get_symbol_address(uid) From 7d4d2316203eac32a88fc5cf9ffee5099dff1c21 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 25 Aug 2023 13:20:44 +0200 Subject: [PATCH 06/13] sw: Update AXPY and GEMM for (trivial) multi-cluster --- sw/blas/axpy/src/main.c | 31 +++-- sw/blas/gemm/data/datagen.py | 80 +++++------- sw/blas/gemm/data/params.hjson | 4 +- sw/blas/gemm/src/gemm.h | 61 ++++++++++ sw/blas/gemm/src/main.c | 194 ++++++++++++------------------ sw/blas/gemm/verify.py | 61 ++++++++++ sw/dnn/src/dnn.h | 5 + target/snitch_cluster/sw/run.yaml | 1 + util/sim/data_utils.py | 27 ++++- 9 files changed, 285 insertions(+), 179 deletions(-) create mode 100755 sw/blas/gemm/verify.py diff --git a/sw/blas/axpy/src/main.c b/sw/blas/axpy/src/main.c index 7fe9d3f83..1b379c811 100644 --- a/sw/blas/axpy/src/main.c +++ b/sw/blas/axpy/src/main.c @@ -10,17 +10,26 @@ int main() { double *local_x, *local_y, *local_z; + double *remote_x, *remote_y, *remote_z; + + // Calculate size and pointers for each cluster + uint32_t frac = l / snrt_cluster_num(); + uint32_t offset = frac * snrt_cluster_idx(); + remote_x = x + offset; + remote_y = y + offset; + remote_z = z + offset; // Allocate space in TCDM local_x = (double *)snrt_l1_next(); - local_y = local_x + l; - local_z = local_y + l; + local_y = local_x + frac; + local_z = local_y + frac; // Copy data in TCDM if (snrt_is_dm_core()) { - size_t size = l * sizeof(double); - snrt_dma_start_1d(local_x, x, size); - snrt_dma_start_1d(local_y, y, size); + size_t size = frac * sizeof(double); + snrt_dma_start_1d(local_x, remote_x, size); + snrt_dma_start_1d(local_y, remote_y, size); + snrt_dma_wait_all(); } snrt_cluster_hw_barrier(); @@ -28,7 +37,7 @@ int main() { // Compute if (!snrt_is_dm_core()) { uint32_t start_cycle = snrt_mcycle(); - axpy(l, a, local_x, local_y, local_z); + axpy(frac, a, local_x, local_y, local_z); uint32_t end_cycle = snrt_mcycle(); } @@ -36,10 +45,15 @@ int main() { // Copy data out of TCDM if (snrt_is_dm_core()) { - size_t size = l * sizeof(double); - snrt_dma_start_1d(z, local_z, size); + size_t size = frac * sizeof(double); + snrt_dma_start_1d(remote_z, local_z, size); + snrt_dma_wait_all(); } + snrt_cluster_hw_barrier(); + +// TODO: currently only works for single cluster otherwise need to +// synchronize all cores here #ifdef BIST uint32_t nerr = l; @@ -47,6 +61,7 @@ int main() { if (snrt_global_core_idx() == 0) { for (int i = 0; i < l; i++) { if (local_z[i] == g[i]) nerr--; + printf("%d %d\n", local_z[i], g[i]); } } diff --git a/sw/blas/gemm/data/datagen.py b/sw/blas/gemm/data/datagen.py index c6faa092f..b33eb4afc 100755 --- a/sw/blas/gemm/data/datagen.py +++ b/sw/blas/gemm/data/datagen.py @@ -10,6 +10,13 @@ import argparse import pathlib import hjson +import sys +import os + +sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/")) +from data_utils import emit_license, format_scalar_definition, \ + format_vector_definition, format_ifdef_wrapper # noqa: E402 + np.random.seed(42) @@ -33,41 +40,11 @@ } -def format_vector_definition(id, vector, typ): - s = f'{typ} {id}[{len(vector)}] = ' + '{\n' - for i, el in enumerate(vector): - if typ != 'char': - s += f'\t{el},' - else: - if type(el) == float: - print(el) - s += f'0x{el:02x},' - if i % 8 == 7: - s += '\n' - s += '};' - return s - - -def format_vector_declaration(id, vector, typ): - s = f'{typ} {id}[{len(vector)}];' - return s - - -def format_scalar_definition(id, scalar, typ): - s = f'{typ} {id} = {scalar};' - return s - - -def emit_header_file(**kwargs): - - emit_str = "// Copyright 2023 ETH Zurich and University of Bologna.\n" + \ - "// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n" + \ - "// SPDX-License-Identifier: Apache-2.0\n\n" - emit_str += emit_gemm_data(**kwargs) - return emit_str +def golden_model(a, b, alpha, c): + return np.matmul(a, b) + alpha * c -def emit_gemm_data(**kwargs): +def emit_header(**kwargs): # Generate random input matrices dtype = NUMPY_TYPES[str(kwargs['prec'])] @@ -104,30 +81,31 @@ def emit_gemm_data(**kwargs): a = np.random.rand(kwargs['M'], kwargs['K']).astype(dtype) b = np.random.rand(kwargs['K'], kwargs['N']).astype(dtype) c = np.random.rand(kwargs['M'], kwargs['N']).astype(dtype) - result = np.matmul(a, b) + kwargs['alpha'] * c + result = golden_model(a, b, kwargs['alpha'], c) # Store matrices in transposed form if requested a = a.T if kwargs['ta'] else a b = b.T if kwargs['tb'] else b - data_str = [] - data_str += [format_scalar_definition('M', kwargs['M'], 'uint32_t')] - data_str += [format_scalar_definition('N', kwargs['N'], 'uint32_t')] - data_str += [format_scalar_definition('K', kwargs['K'], 'uint32_t')] - data_str += [format_scalar_definition('TA', int(kwargs['ta']), 'uint32_t')] - data_str += [format_scalar_definition('TB', int(kwargs['tb']), 'uint32_t')] - data_str += [format_scalar_definition('ALPHA', kwargs['alpha'], 'uint32_t')] - data_str += [format_scalar_definition('dtype_size', kwargs['prec']//8, 'uint32_t')] - data_str += [format_scalar_definition('expand', kwargs['expand'], 'uint32_t')] - data_str += [format_vector_definition('a', a.flatten(), C_TYPES[str(kwargs['prec'])])] - data_str += [format_vector_definition('b', b.flatten(), C_TYPES[str(kwargs['prec'])])] - data_str += [format_vector_definition('c', c.flatten(), C_TYPES[str(kwargs['prec'])])] + data_str = [emit_license()] + data_str += [format_scalar_definition('uint32_t', 'M', kwargs['M'])] + data_str += [format_scalar_definition('uint32_t', 'N', kwargs['N'])] + data_str += [format_scalar_definition('uint32_t', 'K', kwargs['K'])] + data_str += [format_scalar_definition('uint32_t', 'TA', int(kwargs['ta']))] + data_str += [format_scalar_definition('uint32_t', 'TB', int(kwargs['tb']))] + data_str += [format_scalar_definition('uint32_t', 'ALPHA', kwargs['alpha'])] + data_str += [format_scalar_definition('uint32_t', 'dtype_size', kwargs['prec']//8)] + data_str += [format_scalar_definition('uint32_t', 'expand', kwargs['expand'])] + data_str += [format_vector_definition(C_TYPES[str(kwargs['prec'])], 'a', a.flatten())] + data_str += [format_vector_definition(C_TYPES[str(kwargs['prec'])], 'b', b.flatten())] + data_str += [format_vector_definition(C_TYPES[str(kwargs['prec'])], 'c', c.flatten())] if kwargs['prec'] == 8: - data_str += [format_vector_definition('result', result.flatten(), C_TYPES['64'])] + result_def = format_vector_definition(C_TYPES['64'], 'result', result.flatten()) else: - data_str += [format_vector_definition('result', - result.flatten(), - C_TYPES[str(kwargs['prec'])])] + result_def = format_vector_definition(C_TYPES[str(kwargs['prec'])], + 'result', + result.flatten()) + data_str += [format_ifdef_wrapper('BIST', result_def)] data_str = '\n\n'.join(data_str) return data_str @@ -149,7 +127,7 @@ def main(): param = hjson.loads(f.read()) # Emit header file - print(emit_header_file(**param)) + print(emit_header(**param)) if __name__ == '__main__': diff --git a/sw/blas/gemm/data/params.hjson b/sw/blas/gemm/data/params.hjson index e079a52e6..23a4100cf 100644 --- a/sw/blas/gemm/data/params.hjson +++ b/sw/blas/gemm/data/params.hjson @@ -5,12 +5,12 @@ // Parameters for a GEMM { - M: 16, + M: 192, N: 16, K: 16, alpha: 0, ta: false, tb: true, // must be true for SIMD - prec: 32, + prec: 64, expand: 0 } diff --git a/sw/blas/gemm/src/gemm.h b/sw/blas/gemm/src/gemm.h index da967e698..86ec17ede 100644 --- a/sw/blas/gemm/src/gemm.h +++ b/sw/blas/gemm/src/gemm.h @@ -4,14 +4,22 @@ // // Author: Tim Fischer // Luca Bertaccini +// Luca Colagrande #include #include "snrt.h" +// Guard to avoid conflict with DNN header file +// TODO: move this definition to Snitch math library to solve problem +#ifndef PRECISION_T +#define PRECISION_T +typedef enum { FP64 = 8, FP32 = 4, FP16 = 2, FP8 = 1 } precision_t; + typedef float v2f32 __attribute__((vector_size(8))); typedef __fp16 v4f16 __attribute__((vector_size(8))); typedef char v8f8 __attribute__((vector_size(8))); +#endif void gemm_fp64_baseline(uint32_t M, uint32_t N, uint32_t K, double* A, uint32_t ldA, uint32_t ta, double* B, uint32_t ldB, @@ -874,3 +882,56 @@ void gemm_fp8_ex_opt(uint32_t M, uint32_t N, uint32_t K, char* A, uint32_t ldA, snrt_ssr_disable(); } + +// BLAS compliant GEMM kernel, with some additional arguments at the beginning +// to specify Snitch implementation details. Matrix sizes and pointers are for +// the whole cluster computation +// TODO: alpha (and beta) should be of floating-point type (same precision as +// operands) +void gemm(precision_t prec, uint32_t expand, uint32_t setup_ssr, + uint32_t transa, uint32_t transb, uint32_t m, uint32_t n, uint32_t k, + uint32_t alpha, void* a, uint32_t lda, void* b, uint32_t ldb, + double beta, void* c, uint32_t ldc) { + const uint32_t compute_num = snrt_cluster_compute_core_num(); + const uint32_t compute_id = snrt_cluster_core_idx(); + + // Compute cores work not on contiguous blocks but on strided rows + uint32_t lda_strided = compute_num * lda; + uint32_t ldc_strided = compute_num * ldc; + + // Compute cores access A and C at offsets of one row from each other + uint32_t offsetA = compute_id * lda; + uint32_t offsetC = compute_id * ldc; + + // Compute fraction of C rows every core computes + uint32_t frac_m = m / compute_num; + + switch (prec) { + case FP64: + gemm_fp64_opt(frac_m, n, k, (double*)a + offsetA, lda_strided, + transa, (double*)b, ldb, transb, (double*)c + offsetC, + ldc_strided, &alpha, setup_ssr); + break; + case FP32: + gemm_fp32_opt(frac_m, n, k, (float*)a + offsetA, lda_strided, + (float*)b, ldb, (float*)c + offsetC, ldc_strided, + &alpha, setup_ssr); + break; + case FP16: + if (expand) { + gemm_fp16_ex_opt( + frac_m, n, k, (__fp16*)a + offsetA, lda_strided, (__fp16*)b, + ldb, (__fp16*)c + offsetC, ldc_strided, &alpha, setup_ssr); + } else { + gemm_fp16_opt(frac_m, n, k, (__fp16*)a + offsetA, lda_strided, + (__fp16*)b, ldb, (__fp16*)c + offsetC, + ldc_strided, &alpha, setup_ssr); + } + break; + case FP8: + gemm_fp8_ex_opt(frac_m, n, k, (char*)a + offsetA, lda, (char*)b, + ldb, (char*)c + offsetC, ldc_strided, &alpha, + setup_ssr); + break; + } +} diff --git a/sw/blas/gemm/src/main.c b/sw/blas/gemm/src/main.c index bde009c95..3da55c2ab 100644 --- a/sw/blas/gemm/src/main.c +++ b/sw/blas/gemm/src/main.c @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // // Author: Tim Fischer +// Luca Colagrande #include #include @@ -11,40 +12,33 @@ #include "gemm.h" #include "snrt.h" -// Padding of innermost dimension of a Matrix -// Useful for preventing banking conflicts between cores -// that are accessing different rows of the matrix -#define MAT_ROW_PADDING 0 - -// Padding in between matrices A, B for preventing -// banking conflicts in the beginning -#define MAT_PADDING 0 - -#define CHECK_RESULT - -typedef enum { FP64 = 8, FP32 = 4, FP16 = 2, FP8 = 1 } precision_t; - -void *l1_a, *l1_b, *l1_c; - int main() { - const uint32_t compute_num = snrt_cluster_compute_core_num(); - const uint32_t compute_id = snrt_cluster_core_idx(); - - uint32_t a_size = (M * (K + MAT_ROW_PADDING) + MAT_PADDING) * dtype_size; - uint32_t b_size = (K + MAT_ROW_PADDING) * N * dtype_size; - uint32_t c_size = M * N * dtype_size; - + void *local_a, *local_b, *local_c; + void *remote_a, *remote_b, *remote_c; + + // Calculate size and pointers for each cluster + uint32_t frac_m = M / snrt_cluster_num(); + uint32_t frac_a = frac_m * K; + uint32_t frac_c = frac_m * N; + uint32_t size_frac_a = frac_a * dtype_size; + uint32_t size_b = K * N * dtype_size; + uint32_t size_frac_c = frac_c * dtype_size; + uint32_t offset_a = frac_a * snrt_cluster_idx(); + uint32_t offset_c = frac_c * snrt_cluster_idx(); + remote_a = a + offset_a; + remote_b = b; + remote_c = c + offset_c; + + // Allocate space in TCDM + local_a = (void *)snrt_l1_next(); + local_b = local_a + size_frac_a; + local_c = local_b + size_b; + + // Copy data in TCDM if (snrt_is_dm_core()) { - l1_a = snrt_l1alloc(a_size); - l1_b = snrt_l1alloc(b_size); - l1_c = snrt_l1alloc(c_size); - snrt_dma_start_2d(l1_a, a, dtype_size * K, - dtype_size * (K + MAT_ROW_PADDING), dtype_size * K, - M); - snrt_dma_start_2d(l1_b, b, dtype_size * K, - dtype_size * (K + MAT_ROW_PADDING), dtype_size * K, - N); - snrt_dma_start_1d(l1_c, c, dtype_size * M * N); + snrt_dma_start_1d(local_a, remote_a, size_frac_a); + snrt_dma_start_1d(local_b, remote_b, size_b); + snrt_dma_start_1d(local_c, remote_c, size_frac_c); snrt_dma_wait_all(); } @@ -52,104 +46,70 @@ int main() { // Compute if (!snrt_is_dm_core()) { - const uint32_t setup_SSR = 1; + const uint32_t setup_ssr = 1; uint32_t start_cycle = snrt_mcycle(); - if (!TA && !TB) { - volatile uint32_t A_offset = - compute_id * (K + MAT_ROW_PADDING) * dtype_size; - volatile uint32_t C_offset = compute_id * N * dtype_size; - volatile uint32_t ldA = compute_num * (K + MAT_ROW_PADDING); - volatile uint32_t ldB = N + MAT_ROW_PADDING; - volatile uint32_t ldC = N * compute_num; - - gemm_fp64_opt(M / compute_num, N, K, &l1_a[A_offset], ldA, TA, l1_b, - ldB, TB, &l1_c[C_offset], ldC, &ALPHA, setup_SSR); - } else if (!TA && TB) { - volatile uint32_t A_offset = - compute_id * (K + MAT_ROW_PADDING) * dtype_size; - volatile uint32_t C_offset = compute_id * N * dtype_size; - volatile uint32_t ldA = compute_num * (K + MAT_ROW_PADDING); - volatile uint32_t ldB = K + MAT_ROW_PADDING; - volatile uint32_t ldC = N * compute_num; - - switch (dtype_size) { - case FP64: - gemm_fp64_opt(M / compute_num, N, K, &l1_a[A_offset], ldA, - TA, l1_b, ldB, TB, &l1_c[C_offset], ldC, - &ALPHA, setup_SSR); - break; - case FP32: - gemm_fp32_opt(M / compute_num, N, K, &l1_a[A_offset], ldA, - l1_b, ldB, &l1_c[C_offset], ldC, &ALPHA, - setup_SSR); - break; - case FP16: - if (expand) { - gemm_fp16_ex_opt(M / compute_num, N, K, &l1_a[A_offset], - ldA, l1_b, ldB, &l1_c[C_offset], ldC, - &ALPHA, setup_SSR); - } else { - gemm_fp16_opt(M / compute_num, N, K, &l1_a[A_offset], - ldA, l1_b, ldB, &l1_c[C_offset], ldC, - &ALPHA, setup_SSR); - } - break; - case FP8: - gemm_fp8_ex_opt(M / compute_num, N, K, &l1_a[A_offset], ldA, - l1_b, ldB, &l1_c[C_offset], ldC, &ALPHA, - setup_SSR); - break; - } - } else if (TA) { - printf("transpose TA not supported\n"); + volatile uint32_t lda = K; + volatile uint32_t ldb = N; + volatile uint32_t ldc = N; + + // Transpose of A unsopported + if (TA) return -1; + if (TB) { + // Transpose of B supported only in FP64 + if (dtype_size != FP64) return -1; + ldb = K; } + + gemm(dtype_size, expand, setup_ssr, TA, TB, frac_m, N, K, ALPHA, + local_a, lda, local_b, ldb, 1, local_c, ldc); + uint32_t end_cycle = snrt_mcycle(); } snrt_cluster_hw_barrier(); -#ifdef CHECK_RESULT - - uint32_t errors = 0; - if (compute_id == 0) { - switch (dtype_size) { - case FP64: - for (uint32_t m = 0; m < M; m++) { - for (uint32_t n = 0; n < N; n++) { - uint32_t idx = m * N + n; - if (fabs(result[idx] - ((double *)l1_c)[idx]) > 0.001) - errors++; - } - } - break; - case FP32: - for (uint32_t m = 0; m < M; m++) { - for (uint32_t n = 0; n < N; n++) { - uint32_t idx = m * N + n; - if (fabs(result[idx] - ((float *)l1_c)[idx]) > 0.001) - errors++; - } - } - break; - case FP16: - for (uint32_t m = 0; m < M; m++) { - for (uint32_t n = 0; n < N; n++) { - uint32_t idx = m * N + n; - if (fabs(result[idx] - ((__fp16 *)l1_c)[idx]) > 0.001) - errors++; - } + // Copy data out of TCDM + if (snrt_is_dm_core()) { + snrt_dma_start_1d(remote_c, local_c, size_frac_c); + snrt_dma_wait_all(); + } + +// TODO: currently only works for single cluster otherwise need to +// synchronize all cores here +#ifdef BIST + uint32_t errors = M * N; + + if (snrt_cluster_core_idx() == 0) { + for (uint32_t m = 0; m < M; m++) { + for (uint32_t n = 0; n < N; n++) { + uint32_t idx = m * N + n; + switch (dtype_size) { + case FP64: + if (fabs(result[idx] - ((double *)local_c)[idx]) > + 0.001) + errors--; + break; + case FP32: + if (fabs(result[idx] - ((float *)local_c)[idx]) > 0.001) + errors--; + break; + case FP16: + if (fabs(result[idx] - ((__fp16 *)local_c)[idx]) > + 0.001) + errors--; + break; + case FP8: + printf("No golden model yet for fp8!\n"); + return -1; + break; } - break; - case FP8: - printf("No golden model yet for fp8!\n"); - return -1; - break; + } } printf("%d/%d Errors\n", errors, M * N); } - return errors; + return errors; #endif return 0; diff --git a/sw/blas/gemm/verify.py b/sw/blas/gemm/verify.py new file mode 100755 index 000000000..3bae7f801 --- /dev/null +++ b/sw/blas/gemm/verify.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Luca Colagrande + +import sys +from pathlib import Path +import numpy as np +from data.datagen import golden_model + +sys.path.append(str(Path(__file__).parent / '../../../util/sim/')) +import verification # noqa: E402 +from elf import Elf # noqa: E402 +from data_utils import bytes_to_doubles, bytes_to_uint32s # noqa: E402 + + +ERR_THRESHOLD = 0.001 + + +def main(): + # Run simulation and get outputs + args = verification.parse_args() + raw_results = verification.simulate(sim_bin=args.sim_bin, + snitch_bin=args.snitch_bin, + symbols_bin=args.symbols_bin, + log=args.log, + output_uids=['c']) + c_actual = np.array(bytes_to_doubles(raw_results['c'])) + + # Extract input operands from ELF file + if args.symbols_bin: + elf = Elf(args.symbols_bin) + else: + elf = Elf(args.snitch_bin) + a = np.array(bytes_to_doubles(elf.get_symbol_contents('a'))) + b = np.array(bytes_to_doubles(elf.get_symbol_contents('b'))) + c = np.array(bytes_to_doubles(elf.get_symbol_contents('c'))) + alpha = bytes_to_uint32s(elf.get_symbol_contents('ALPHA'))[0] + m = bytes_to_uint32s(elf.get_symbol_contents('M'))[0] + n = bytes_to_uint32s(elf.get_symbol_contents('N'))[0] + k = bytes_to_uint32s(elf.get_symbol_contents('K'))[0] + tb = bytes_to_uint32s(elf.get_symbol_contents('TB'))[0] + a = np.reshape(a, (m, k)) + b = np.reshape(b, (k, n)) + if tb: + b = b.transpose() + c = np.reshape(c, (m, n)) + + # Verify results + c_golden = golden_model(a, b, alpha, c).flatten() + + absolute_err = np.absolute(c_golden - c_actual) + fail = np.any(absolute_err > ERR_THRESHOLD) + + return int(fail) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sw/dnn/src/dnn.h b/sw/dnn/src/dnn.h index b345ce874..537f488cd 100644 --- a/sw/dnn/src/dnn.h +++ b/sw/dnn/src/dnn.h @@ -6,11 +6,16 @@ #include +// Guard to avoid conflict with BLAS header file +// TODO: move this definition to Snitch math library to solve problem +#ifndef PRECISION_T +#define PRECISION_T typedef enum { FP64 = 8, FP32 = 4, FP16 = 2, FP8 = 1 } precision_t; typedef float v2f32 __attribute__((vector_size(8))); typedef __fp16 v4f16 __attribute__((vector_size(8))); typedef char v8f8 __attribute__((vector_size(8))); +#endif typedef union { double f64; diff --git a/target/snitch_cluster/sw/run.yaml b/target/snitch_cluster/sw/run.yaml index 4c7ff7b1e..8e80cf35c 100644 --- a/target/snitch_cluster/sw/run.yaml +++ b/target/snitch_cluster/sw/run.yaml @@ -72,6 +72,7 @@ runs: - elf: apps/blas/axpy/build/axpy.elf cmd: ../../sw/blas/axpy/verify.py {sim_bin} {elf} - elf: apps/blas/gemm/build/gemm.elf + cmd: ../../sw/blas/gemm/verify.py {sim_bin} {elf} - elf: apps/dnn/batchnorm/build/batchnorm.elf - elf: apps/dnn/linear/build/linear.elf - elf: apps/dnn/maxpool/build/maxpool.elf diff --git a/util/sim/data_utils.py b/util/sim/data_utils.py index cea0721f4..664e2624b 100644 --- a/util/sim/data_utils.py +++ b/util/sim/data_utils.py @@ -5,6 +5,14 @@ # Author: Luca Colagrande import struct +from datetime import datetime + + +def emit_license(): + s = (f"// Copyright {datetime.now().year} ETH Zurich and University of Bologna." + f"// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n" + f"// SPDX-License-Identifier: Apache-2.0\n\n") + return s def variable_attributes(alignment=None, section=None): @@ -20,7 +28,11 @@ def format_vector_definition(type, uid, vector, alignment=None, section=None): attributes = variable_attributes(alignment, section) s = f'{type} {uid}[{len(vector)}] {attributes} = ' + '{\n' for el in vector: - s += f'\t{el},\n' + if type != 'char': + el_str = f'{el}' + else: + el_str = f'0x{el:02x}' + s += f'\t{el_str},\n' s += '};' return s @@ -55,3 +67,16 @@ def bytes_to_doubles(byte_array): double = struct.unpack(' Date: Tue, 12 Sep 2023 20:53:51 +0200 Subject: [PATCH 07/13] sw: Make compatible with non-12 LLVM versions (#46) Co-authored-by: Luca Colagrande --- sw/blas/gemm/src/gemm.h | 16 ++++++++-------- target/snitch_cluster/sw/toolchain.mk | 3 ++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sw/blas/gemm/src/gemm.h b/sw/blas/gemm/src/gemm.h index 86ec17ede..ab0f17285 100644 --- a/sw/blas/gemm/src/gemm.h +++ b/sw/blas/gemm/src/gemm.h @@ -121,7 +121,7 @@ void gemm_fp64_opt(uint32_t M, uint32_t N, uint32_t K, double* A, uint32_t ldA, for (uint32_t m = 0; m < M; m++) { uint32_t n = 0; for (uint32_t n0 = 0; n0 < N / unroll; n0++) { - register double c[unroll]; + double c[unroll]; // Load intermediate result if (*ALPHA) { @@ -234,7 +234,7 @@ void gemm_fp32_opt(const uint32_t M, const uint32_t N, const uint32_t K, for (uint32_t n0 = 0; n0 < N / unroll; n0++) { float* _C = &C[m * ldC + n / 2]; const register float zero = 0.0; - register v2f32 c[unroll], reduce_reg[unroll]; + v2f32 c[unroll], reduce_reg[unroll]; asm volatile( "lw t0, 0(%[ALPHA]) \n" @@ -384,8 +384,8 @@ void gemm_fp16_opt(uint32_t M, uint32_t N, uint32_t K, __fp16* A, uint32_t ldA, for (uint32_t n0 = 0; n0 < N / unroll; n0++) { __fp16* _C = &C[m * ldC + n]; const register float zero = 0.0; - register v4f16 c[unroll]; - register v2f32 reduce_reg[unroll]; + v4f16 c[unroll]; + v2f32 reduce_reg[unroll]; uint32_t alpha; asm volatile( @@ -568,8 +568,8 @@ void gemm_fp16_ex_opt(uint32_t M, uint32_t N, uint32_t K, __fp16* A, for (uint32_t n0 = 0; n0 < N / unroll; n0++) { __fp16* _C = &C[m * ldC + n]; const register float zero = 0.0; - register v4f16 c[unroll]; - register v2f32 reduce_reg[unroll]; + v4f16 c[unroll]; + v2f32 reduce_reg[unroll]; uint32_t alpha; asm volatile( @@ -735,8 +735,8 @@ void gemm_fp8_ex_opt(uint32_t M, uint32_t N, uint32_t K, char* A, uint32_t ldA, for (uint32_t n0 = 0; n0 < N / unroll; n0++) { char* _C = &C[m * ldC + n]; const register float zero = 0.0; - register v8f8 c[unroll]; - register v4f16 reduce_reg[unroll]; + v8f8 c[unroll]; + v4f16 reduce_reg[unroll]; uint32_t alpha; asm volatile( diff --git a/target/snitch_cluster/sw/toolchain.mk b/target/snitch_cluster/sw/toolchain.mk index e8fb7b46b..4fa0fc5af 100644 --- a/target/snitch_cluster/sw/toolchain.mk +++ b/target/snitch_cluster/sw/toolchain.mk @@ -17,6 +17,7 @@ DEBUG ?= OFF # ON to turn on debugging symbols # Compiler toolchain LLVM_BINROOT ?= $(dir $(shell which riscv32-unknown-elf-clang)) +LLVM_VER ?= $(shell $(LLVM_BINROOT)/llvm-config --version | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+') RISCV_CC ?= $(LLVM_BINROOT)/clang RISCV_LD ?= $(LLVM_BINROOT)/ld.lld RISCV_AR ?= $(LLVM_BINROOT)/llvm-ar @@ -48,7 +49,7 @@ RISCV_LDFLAGS += -fuse-ld=$(RISCV_LD) RISCV_LDFLAGS += -nostartfiles RISCV_LDFLAGS += -nostdlib RISCV_LDFLAGS += -lc -RISCV_LDFLAGS += -L$(LLVM_BINROOT)/../lib/clang/12.0.1/lib/ +RISCV_LDFLAGS += -L$(LLVM_BINROOT)/../lib/clang/$(LLVM_VER)/lib/ RISCV_LDFLAGS += -lclang_rt.builtins-riscv32 # Archiver flags From 5c470c12a1f451509bea8344a50f1abd872e3be9 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 15 Sep 2023 10:26:06 +0200 Subject: [PATCH 08/13] axpy: Allow storing data to custom sections --- sw/blas/axpy/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sw/blas/axpy/Makefile b/sw/blas/axpy/Makefile index 40c3af1f6..bed4edaa8 100644 --- a/sw/blas/axpy/Makefile +++ b/sw/blas/axpy/Makefile @@ -9,7 +9,8 @@ MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) DATA_DIR := $(realpath $(MK_DIR)/data) SRC_DIR := $(realpath $(MK_DIR)/src) -LENGTH ?= 24 +LENGTH ?= 24 +SECTION ?= APP ?= axpy SRCS ?= $(SRC_DIR)/main.c @@ -19,7 +20,7 @@ DATAGEN_PY = $(DATA_DIR)/datagen.py DATA_H = $(DATA_DIR)/data.h $(DATA_H): $(DATAGEN_PY) - $< $(LENGTH) > $@ + $< $(LENGTH) --section="$(SECTION)" > $@ .PHONY: clean-data clean From 4b9f7f1e0bcaddd9706c051a08d1934d7769445d Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 15 Sep 2023 10:27:39 +0200 Subject: [PATCH 09/13] util/perf_csv.py: Allow >10 hexadecimal-indexed traces --- util/trace/perf_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/trace/perf_csv.py b/util/trace/perf_csv.py index d2286aae5..450758c70 100755 --- a/util/trace/perf_csv.py +++ b/util/trace/perf_csv.py @@ -51,7 +51,7 @@ def main(): for dump in dumps: # Get hart id from filename and append to index - hartid = int(re.search(HARTID_REGEX, dump).group(1)) + hartid = int(re.search(HARTID_REGEX, dump).group(1), base=16) index.append(hartid) # Populate dictionary of metrics for the current hart From e57335773da2899bae86410760b6e8b0c1a0de51 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 15 Sep 2023 17:27:20 +0200 Subject: [PATCH 10/13] util/sim: Log errors caught by IPC verification framework --- sw/blas/axpy/verify.py | 3 +++ util/sim/verification.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/sw/blas/axpy/verify.py b/sw/blas/axpy/verify.py index 02cb15975..80b195ff9 100755 --- a/sw/blas/axpy/verify.py +++ b/sw/blas/axpy/verify.py @@ -42,6 +42,9 @@ def main(): z_golden = golden_model(a, x, y) relative_err = np.absolute((z_golden - z_actual) / z_golden) fail = np.any(relative_err > ERR_THRESHOLD) + if (fail): + verification.dump_results_to_csv([z_golden, z_actual, relative_err], + Path.cwd() / 'axpy_results.csv') return int(fail) diff --git a/util/sim/verification.py b/util/sim/verification.py index 9878ef62a..04594a51c 100644 --- a/util/sim/verification.py +++ b/util/sim/verification.py @@ -7,6 +7,8 @@ import sys import argparse +import numpy as np +import csv from elf import Elf from pathlib import Path @@ -60,3 +62,19 @@ def simulate(sim_bin, snitch_bin, log, output_uids, symbols_bin=None): sim.finish(wait_for_sim=True) return raw_outputs + + +# Takes a set of Numpy arrays (of the same shape), flattens them, zips them +# and dumps them to a CSV file. Arrays may for example be: golden results, actual +# results, absolute errors and relative errors. +def dump_results_to_csv(results, path): + # Flatten and zip arrays + flattened = [arr.flatten() for arr in results] + zipped = np.column_stack(flattened) + # Write row-by-row to CSV file + with open(path, 'w') as csv_file: + csv_writer = csv.writer(csv_file) + for row in zipped: + csv_writer.writerow(row) + # Print path where results were written + print(f"Wrote results to {path}") From 4581ed9d3f5cbf2d298a43c8c60c844fdf4dfb4e Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Tue, 5 Sep 2023 11:28:39 +0200 Subject: [PATCH 11/13] test: Fix bandwidth loss in `tb_memory_axi` IP --- target/common/test/tb_memory_axi.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/target/common/test/tb_memory_axi.sv b/target/common/test/tb_memory_axi.sv index cddbc4239..2c8e28a6e 100644 --- a/target/common/test/tb_memory_axi.sv +++ b/target/common/test/tb_memory_axi.sv @@ -95,6 +95,7 @@ module tb_memory_axi #( .ID_WIDTH ( AxiIdWidth ), .USER_WIDTH ( AxiUserWidth ), .DECOUPLE_W ( 1 ), + .FULL_BW ( 1 ), .AXI_MAX_WRITE_TXNS ( 32'd128 ), .AXI_MAX_READ_TXNS ( 32'd128 ) ) i_axi_to_reg ( From 1273cc22baa86bea5c48c6e7f8b8316430d3284a Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 20 Sep 2023 16:47:50 +0200 Subject: [PATCH 12/13] simulate.py: Run tests in parallel --- .github/workflows/ci.yml | 6 ++- .gitlab-ci.yml | 9 ++-- target/common/common.mk | 2 +- util/sim/simulate.py | 111 ++++++++++++++++++++++++++++++--------- 4 files changed, 97 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 84faeac81..f2c3e692a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,7 +43,8 @@ jobs: - name: Run Tests working-directory: target/snitch_cluster run: |- - ../../util/sim/simulate.py sw/run.yaml --simulator verilator + ../../util/sim/simulate.py sw/run.yaml --simulator verilator -j \ + --verbose ############################################ # Build SW on Snitch Cluster w/ Banshee # @@ -66,4 +67,5 @@ jobs: SNITCH_LOG: info working-directory: target/snitch_cluster run: |- - ../../util/sim/simulate.py sw/run.yaml --simulator banshee + ../../util/sim/simulate.py sw/run.yaml --simulator banshee -j \ + --verbose diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 18adcf22e..610c271ea 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -93,7 +93,7 @@ snitch-cluster-vlt: script: - cd target/snitch_cluster - $VERILATOR make bin/snitch_cluster.vlt - - $VERILATOR ../../util/sim/simulate.py sw/run.yaml --simulator verilator + - $VERILATOR ../../util/sim/simulate.py sw/run.yaml --simulator verilator -j --verbose # yamllint enable rule:line-length # VCS @@ -102,7 +102,7 @@ snitch-cluster-vcs: script: - cd target/snitch_cluster - $VCS make bin/snitch_cluster.vcs - - $VCS ../../util/sim/simulate.py sw/run.yaml --simulator vcs + - $VCS ../../util/sim/simulate.py sw/run.yaml --simulator vcs -j --verbose # Questa snitch-cluster-vsim: @@ -110,7 +110,8 @@ snitch-cluster-vsim: script: - cd target/snitch_cluster - $QUESTA make bin/snitch_cluster.vsim - - $QUESTA ../../util/sim/simulate.py sw/run.yaml --simulator vsim + - $QUESTA ../../util/sim/simulate.py sw/run.yaml --simulator vsim -j + --verbose # Banshee snitch-cluster-banshee: @@ -126,4 +127,4 @@ snitch-cluster-banshee: - cd banshee - cargo install --debug --path . - cd ../target/snitch_cluster - - ../../util/sim/simulate.py sw/run.yaml --simulator banshee + - ../../util/sim/simulate.py sw/run.yaml --simulator banshee -j --verbose diff --git a/target/common/common.mk b/target/common/common.mk index 9c469f5a6..6b9c679d0 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -154,7 +154,7 @@ define QUESTASIM @echo 'binary=$$(realpath --relative-to=${MKFILE_DIR} $$1)' >> $@ @echo 'cd ${MKFILE_DIR}' >> $@ @echo 'echo $$binary > $(LOGS_DIR)/.rtlbinary' >> $@ - @echo '${VSIM} +permissive ${VSIM_FLAGS} -work ${MKFILE_DIR}/${VSIM_BUILDDIR} -c \ + @echo '${VSIM} +permissive ${VSIM_FLAGS} $$3 -work ${MKFILE_DIR}/${VSIM_BUILDDIR} -c \ -ldflags "-Wl,-rpath,${FESVR}/lib -L${FESVR}/lib -lfesvr -lutil" \ $1 +permissive-off ++$$binary ++$$2' >> $@ @chmod +x $@ diff --git a/util/sim/simulate.py b/util/sim/simulate.py index a1466dc16..db00292af 100755 --- a/util/sim/simulate.py +++ b/util/sim/simulate.py @@ -8,11 +8,14 @@ # TODO colluca: timeout feature import argparse +import multiprocessing from pathlib import Path import subprocess from termcolor import colored, cprint +import os import re import sys +import time import yaml @@ -28,7 +31,7 @@ 'vcs': 'bin/snitch_cluster.vcs' } SIMULATOR_CMDS = { - 'vsim': '{sim_bin} {elf}', + 'vsim': '{sim_bin} {elf} "" -batch', 'banshee': ('{{sim_bin}} --no-opt-llvm --no-opt-jit --configuration {cfg}' ' --trace {{elf}} > /dev/null').format(cfg=BANSHEE_CFG), 'verilator': '{sim_bin} {elf}', @@ -62,6 +65,22 @@ def parse_args(): '--early-exit', action='store_true', help='Exit as soon as any test fails') + parser.add_argument( + '-j', + action='store', + dest='n_procs', + nargs='?', + type=int, + default=1, + const=os.cpu_count(), + help=('Maximum number of tests to run in parallel. ' + 'One if the option is not present. Equal to the number of CPU cores ' + 'if the option is present but not followed by an argument.')) + parser.add_argument( + '--verbose', + action='store_true', + help=('Option to print simulation logs when multiple tests are run in parallel.' + 'Logs are always printed when n_procs == 1')) args = parser.parse_args() return args @@ -81,17 +100,25 @@ def check_exit_code(test, exit_code): return exit_code -def run_simulation(cmd, simulator, test): +def multiple_processes(args): + return args.n_procs != 1 + + +def run_simulation(cmd, simulator, test, quiet=False): # Defaults result = 1 + log = '' # Spawn simulation subprocess - p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, universal_newlines=True) + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + universal_newlines=True) # Poll simulation subprocess and log its output while p.poll() is None: line = p.stdout.readline() - print(line, end='', flush=True) + log += line + if not quiet: + print(line, end='', flush=True) # When simulating with vsim or vcs, we need to parse the simulation # log to catch the application's return code @@ -123,7 +150,7 @@ def run_simulation(cmd, simulator, test): if exit_code != 0: result = exit_code - return result + return result, log def run_test(test, args): @@ -132,11 +159,12 @@ def run_test(test, args): sim_bin = args.sim_bin if args.sim_bin else SIMULATOR_BINS[simulator] dry_run = args.dry_run testlist = args.testlist + quiet = multiple_processes(args) # Check if simulator is supported for this test if 'simulators' in test: if simulator not in test['simulators']: - return 0 + return (0, '') # Construct path to executable elf = Path(test['elf']) @@ -152,12 +180,14 @@ def run_test(test, args): else: cmd = SIMULATOR_CMDS[simulator] cmd = cmd.format(sim_bin=sim_bin, elf=elf) - print(f'$ {cmd}', flush=True) + if not quiet: + print(f'$ {cmd}', flush=True) # Run simulation result = 0 + log = '' if not dry_run: - result = run_simulation(cmd, simulator, test) + result, log = run_simulation(cmd, simulator, test, quiet) # Report failure or success if result != 0: @@ -165,39 +195,72 @@ def run_test(test, args): else: cprint(f'{elf} test passed', 'green', attrs=['bold'], flush=True) - return result + return (result, log) def print_failed_test(test): print(f'{colored(test["elf"], "cyan")} test {colored("failed", "red")}') -def print_test_summary(failed_tests, dry_run=False): - if not dry_run: - print('\n==== Test summary ====') +def print_test_summary(failed_tests, args): + if not args.dry_run: + header = f'\n==== Test summary {"(early exit)" if args.early_exit else ""} ====' + cprint(header, attrs=['bold']) if failed_tests: for failed_test in failed_tests: print_failed_test(failed_test) - return 1 else: print(f'{colored("All tests passed!", "green")}') - return 0 - return 0 def run_tests(args): - # Iterate tests + + # Get tests from testlist tests = get_tests(args.testlist) + + # Create a process Pool + with multiprocessing.Pool(args.n_procs) as pool: + + # Create a shared object which parent and child processes can access + # concurrently to terminate the pool early as soon as one process fails + exit_early = multiprocessing.Value('B') + exit_early.value = 0 + + # Define callback for early exit + def completion_callback(return_value): + result = return_value[0] + log = return_value[1] + if args.early_exit and result != 0: + exit_early.value = 1 + # Printing the log all at once here, rather than line-by-line + # in run_simulation, ensures that the logs of different processes + # are not interleaved in stdout. + # However, as we prefer line-by-line printing when a single process + # is used, we have to make sure we don't print twice. + if args.verbose and multiple_processes(args): + print(log) + + # Queue tests to process pool + results = [] + for test in tests: + result = pool.apply_async(run_test, args=(test, args), callback=completion_callback) + results.append(result) + + # Wait for all tests to complete + running = range(len(tests)) + while len(running) != 0 and not exit_early.value: + time.sleep(1) + running = [i for i in running if not results[i].ready()] + + # Query test results failed_tests = [] - for test in tests: - # Run test - result = run_test(test, args) - if result != 0: + for test, result in zip(tests, results): + if result.ready() and result.get()[0] != 0: failed_tests.append(test) - # End program if requested on first test failure - if args.early_exit: - break - return print_test_summary(failed_tests, args.dry_run) + + print_test_summary(failed_tests, args) + + return len(failed_tests) def main(): From cd78ab6b9e1ea047fa44dc1eded86136d3777d09 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Thu, 21 Sep 2023 16:23:42 +0200 Subject: [PATCH 13/13] util/sim: Minor improvements --- util/sim/simulate.py | 10 ++++------ util/sim/verification.py | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/util/sim/simulate.py b/util/sim/simulate.py index db00292af..4e36cc1e1 100755 --- a/util/sim/simulate.py +++ b/util/sim/simulate.py @@ -213,10 +213,7 @@ def print_test_summary(failed_tests, args): print(f'{colored("All tests passed!", "green")}') -def run_tests(args): - - # Get tests from testlist - tests = get_tests(args.testlist) +def run_tests(tests, args): # Create a process Pool with multiprocessing.Pool(args.n_procs) as pool: @@ -265,8 +262,9 @@ def completion_callback(return_value): def main(): args = parse_args() - sys.exit(run_tests(args)) + tests = get_tests(args.testlist) + return run_tests(tests, args) if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/util/sim/verification.py b/util/sim/verification.py index 04594a51c..9dd3428e4 100644 --- a/util/sim/verification.py +++ b/util/sim/verification.py @@ -28,8 +28,8 @@ def parse_args(): help='The Snitch binary to be executed by the simulated Snitch hardware') parser.add_argument( '--symbols-bin', - help='An optional binary containing the I/O symbols. By default,' - 'these are searched for in snitch_bin. This argument serves as an' + help='An optional binary containing the I/O symbols. By default, ' + 'these are searched for in snitch_bin. This argument serves as an ' 'alternative.') parser.add_argument( '--log',