Skip to content

Commit

Permalink
util: Extend simulation and trace utilities (#178)
Browse files Browse the repository at this point in the history
* `util/sim/Simulation.py`: Take time from last matching line in log

* `util/sim/sim_utils.py`: Add test summary table title

* `util/sim/Simulation.py`: Fix bug in dry-run mode

* util/trace: Add column field to annotated traces

* `util/trace/annotate.py`: Add annotations to lines without timestamp

Previously, lines without timestamps, e.g. FPU instructions executed
in the same cycle as an integer instruction, were being parsed incorrectly,
so they would not be annotated. Now we use a regex to handle these lines
properly.

* `util/trace/annotate.py`: Preserve performance metrics at end of file

* `util/trace/a2l.py`: Parse addr2line output using regex

Correctly handles some corner cases where addr2line did not display
all information for a certain function.

* treewide: Extend `visualize.py` as required to include CVA6 trace

* `util/trace/tracevis.py`: Parallelize processing multiple traces

* `util/trace/gen_trace.py`: Print line number on exception
  • Loading branch information
colluca authored Aug 12, 2024
1 parent 0fc8be0 commit 5928ffa
Show file tree
Hide file tree
Showing 8 changed files with 294 additions and 197 deletions.
4 changes: 3 additions & 1 deletion target/common/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ JOINT_PERF_DUMP = $(LOGS_DIR)/perf.json
ROI_DUMP = $(LOGS_DIR)/roi.json
VISUAL_TRACE = $(LOGS_DIR)/trace.json

VISUALIZE_PY_FLAGS += --tracevis "$(BINARY) $(SNITCH_TXT_TRACES) --addr2line $(ADDR2LINE) -f snitch"

.PHONY: traces annotate visual-trace clean-traces clean-annotate clean-perf clean-visual-trace
traces: $(TXT_TRACES)
annotate: $(ANNOTATED_TRACES)
Expand Down Expand Up @@ -243,4 +245,4 @@ $(ROI_DUMP): $(JOINT_PERF_DUMP) $(ROI_SPEC) $(ROI_PY)
$(ROI_PY) $(JOINT_PERF_DUMP) $(ROI_SPEC) --cfg $(CFG) -o $@

$(VISUAL_TRACE): $(ROI_DUMP) $(VISUALIZE_PY)
$(VISUALIZE_PY) $(ROI_DUMP) --traces $(SNITCH_TXT_TRACES) --elf $(BINARY) -o $@
$(VISUALIZE_PY) $(ROI_DUMP) $(VISUALIZE_PY_FLAGS) -o $@
26 changes: 14 additions & 12 deletions util/bench/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,10 @@ def main():
metavar='<input>',
help='Input JSON file')
parser.add_argument(
'--traces',
metavar='<trace>',
nargs='*',
help='Simulation traces to process')
parser.add_argument(
'--elf',
nargs='?',
help='ELF from which the traces were generated')
'--tracevis',
action='append',
default=[],
help='Argument string to pass down to tracevis, to generate additional events.')
parser.add_argument(
'-o',
'--output',
Expand Down Expand Up @@ -98,10 +94,16 @@ def main():

# Optionally extract also instruction-level events
# from the simulation traces
if args.traces and args.elf:
events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch',
addr2line='addr2line', use_time=True, pid=1,
cache=True, elf=args.elf, collapse_call_stack=True)
for tvargs in args.tracevis:
# Break tracevis argument string into a list of arguments
tvargs = tvargs.split()
# Add default arguments, and parse all tracevis arguments
tvargs.append('--time')
tvargs.append('--collapse-call-stack')
tvargs = vars(tracevis.parse_args(tvargs))
# Add more arguments, and get tracevis events
tvargs['pid'] = 1
events += tracevis.parse_traces(**tvargs)

# Create TraceViewer JSON object
tvobj = {}
Expand Down
29 changes: 17 additions & 12 deletions util/sim/Simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,17 +200,20 @@ def get_retcode(self):
return super().get_retcode()
elif self.log is not None:
# Extract the application's return code from the simulation log
with open(self.log, 'r') as f:
for line in f.readlines():
regex_success = r'\[SUCCESS\] Program finished successfully'
match_success = re.search(regex_success, line)
if match_success:
return 0
else:
regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)'
match = re.search(regex_fail, line)
if match:
return int(match.group(1))
if not self.dry_run:
with open(self.log, 'r') as f:
for line in f.readlines():
regex_success = r'\[SUCCESS\] Program finished successfully'
match_success = re.search(regex_success, line)
if match_success:
return 0
else:
regex_fail = r'\[FAILURE\] Finished with exit code\s+(\d+)'
match = re.search(regex_fail, line)
if match:
return int(match.group(1))
else:
return 0

def successful(self):
# Check that simulation return code matches expected value (in super class)
Expand All @@ -226,7 +229,9 @@ def get_simulation_time(self):
# Extract the simulation time from the simulation log
if self.log is not None:
with open(self.log, 'r') as f:
for line in f.readlines():
# Read lines from the bottom of the file, since warning and error messages may
# also print a time to the log.
for line in reversed(f.readlines()):
regex = r'Time: (\d+) ([a-z]+)\s+'
match = re.search(regex, line)
if match:
Expand Down
5 changes: 1 addition & 4 deletions util/sim/sim_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,9 @@ def print_summary(sims, early_exit=False, dry_run=False):
Args:
sims: A list of simulations from the simulation suite.
"""
# Header
header = '==== Test summary ===='
print(header)

# Table
table = PrettyTable()
table.title = 'Test summary'
table.field_names = [
'test',
'launched',
Expand Down
150 changes: 101 additions & 49 deletions util/trace/a2l.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,20 @@
from pathlib import Path
from functools import lru_cache
from operator import itemgetter
import re


def unzip(ls):
return zip(*ls)


def format_function_name(name):
if name == '??':
return 'unknown function'
return name


def format_line(num):
if num == '?':
return -1
return int(num)


class Addr2LineOutput:

indent_unit = ' '

def __init__(self, raw):
def __init__(self, raw, toolchain='llvm'):
self.raw = raw
self.toolchain = toolchain

# Returns the function stack of the current line.
# If there was no function inlining, then the function stack
Expand All @@ -44,52 +34,102 @@ def __init__(self, raw):
# belonged to after inlining the previous, up to (and including)
# the last function which was not inlined.
def function_stack(self):
output = self.raw.split('\n')

functions = output[::2]
filepaths, lines = unzip([o.split(':') for o in output[1::2]])

functions = map(format_function_name, functions)
lines = map(format_line, lines)

stack = zip(functions, filepaths, lines)
stack = [{'func': s[0], 'file': s[1], 'line': s[2]} for s in stack]
return stack
if self.toolchain == 'llvm':
# Define a regex pattern to capture relevant data. The function start filename
# and start line are optional, so they are enclosed in ()?.
pattern = re.compile(
r'^(?P<func>.+?)\s*'
r'Filename:\s*(?P<file>[^\n]+)\s*'
r'(Function start filename:\s*(?P<func_start_filename>[^\n]+)\s*'
r'Function start line:\s*(?P<func_start_line>\d+)\s*)?'
r'Line:\s*(?P<line>\d+)\s*'
r'Column:\s*(?P<col>\d+)',
re.MULTILINE)
else:
# Define a regex pattern to match function names, file paths and line numbers
pattern = re.compile(
r"^(?P<func>.+)\n(?P<file>.+):(?P<line>\d+)(?: \(discriminator \d+\))?$",
re.MULTILINE)

# Find all matches and organize them into a list of dictionaries
stack = [match.groupdict() for match in pattern.finditer(self.raw)]

# Format stack entries
def format_stack_entry(entry):
func, line, col = [entry.get(key, None) for key in ['func', 'line', 'col']]
# Rename unknown functions
if func == '??':
entry['func'] = 'unknown function'
# Add column key if missing and convert 0 line and cols to None
for key, val in zip(['line', 'col'], [line, col]):
if val is not None:
val = int(val)
if val == 0:
entry[key] = None
else:
entry[key] = val
else:
entry[key] = val
return entry
stack = list(map(format_stack_entry, stack))
# Do not create stack if compiler was unable to associate a line number to the address
return stack if stack[0]['line'] is not None else None

def function_stack_string(self, short=True):
stack = reversed(self.function_stack())
s = ''
indent = ''
for i, level in enumerate(stack):
func, file, line = level.values()
if short:
file = Path(file).name
indent = self.indent_unit * i
s += f'{indent}{func} ({file}:{line})\n'
stack = self.function_stack()
if stack is not None:
stack = reversed(self.function_stack())
for i, level in enumerate(stack):
func, file, line, col = [level.get(key) for key in ["func", "file", "line", "col"]]
if short:
file = Path(file).name
indent = self.indent_unit * i
s += f'{indent}{func} ({file}:{line}'
if col is not None:
s += f':{col}'
s += ')\n'
# Remove final newline
s = s[:-1]
return s

def line(self):
file, line = itemgetter('file', 'line')(self.function_stack()[0])

# Open source file
src = []
try:
with open(file, 'r') as f:
src = [x.strip() for x in f.readlines()]
except OSError:
if self.function_stack():
file, line = itemgetter('file', 'line')(self.function_stack()[0])

# Open source file
src = []
try:
with open(file, 'r') as f:
src = [x for x in f.readlines()]
except OSError:
src = []

# Extract line
if src and line >= 0:
return src[line-1]
else:
return ''
# Extract line
if src and line is not None:
return src[line-1]

def __str__(self):
s = self.function_stack_string()
if self.line():
indent = self.indent_unit * len(s.strip().split('\n'))
s += f'{indent}{self.line()}'
s = ''
stack = self.function_stack()
if stack:
col = stack[0]['col']
s = self.function_stack_string()
line = self.line()
if line is not None:
# Calculate indentation of original source line
stripped_line = line.lstrip()
orig_indent = len(line) - len(stripped_line)
stripped_line = stripped_line.rstrip()
# Calculate indentation to prepend to source line from the function stack string
indent = self.indent_unit * len(s.strip().split('\n'))
# Append source line to function stack string
s += f'\n{indent}{stripped_line}'
# Append a column indicator line
if col is not None:
new_col = col - orig_indent + len(indent)
s += '\n' + ' ' * (new_col - 1) + '^'
return s


Expand All @@ -99,11 +139,23 @@ def __init__(self, elf, a2l_binary='addr2line'):
self.elf = Path(elf)
self.a2l = a2l_binary

# We must distinguish between LLVM and GCC toolchains as the latter
# does not support the `--verbose` flag
if 'riscv64-unknown-elf-addr2line' in a2l_binary:
self.toolchain = 'gcc'
elif 'llvm-addr2line' in a2l_binary:
self.toolchain = 'llvm'
else:
raise ValueError('addr2line binary expected to be either riscv64-unknown-elf-addr2line'
' or llvm-addr2line')

assert self.elf.exists(), f'File not found {self.elf}'

@lru_cache(maxsize=1024)
def addr2line(self, addr):
if isinstance(addr, str):
addr = int(addr, 16)
cmd = f'{self.a2l} -e {self.elf} -f -i {addr:x}'
return Addr2LineOutput(os.popen(cmd).read())
if self.toolchain == 'llvm':
cmd += ' --verbose'
return Addr2LineOutput(os.popen(cmd).read(), toolchain=self.toolchain)
Loading

0 comments on commit 5928ffa

Please sign in to comment.