From de323667c2b96cc3d0cf3dac0ecb628fedc95adb Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 17 Jan 2024 15:03:21 +0100 Subject: [PATCH 1/8] target: Fix bug in tracer targets --- target/common/common.mk | 59 ++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/target/common/common.mk b/target/common/common.mk index 6460962aa..9efad2e95 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -199,25 +199,31 @@ endef # Traces # ########## -DASM_TRACES = $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null)) -TXT_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.txt/g')) -PERF_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/trace_hart/hart/g' | sed 's/.dasm/_perf.json/g')) -ANNOTATED_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.s/g')) -DIFF_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.diff/g')) - -GENTRACE_OUTPUTS = $(TXT_TRACES) $(PERF_TRACES) -ANNOTATE_OUTPUTS = $(ANNOTATED_TRACES) -PERF_CSV = $(LOGS_DIR)/perf.csv -EVENT_CSV = $(LOGS_DIR)/event.csv -TRACE_CSV = $(LOGS_DIR)/trace.csv -TRACE_JSON = $(LOGS_DIR)/trace.json - -.PHONY: traces annotate perf-csv event-csv layout -traces: $(GENTRACE_OUTPUTS) -annotate: $(ANNOTATE_OUTPUTS) -perf-csv: $(PERF_CSV) -event-csv: $(EVENT_CSV) -layout: $(TRACE_CSV) $(TRACE_JSON) +SNITCH_DASM_TRACES = $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null)) +SNITCH_TXT_TRACES = $(shell (echo $(SNITCH_DASM_TRACES) | sed 's/\.dasm/\.txt/g')) +SNITCH_ANNOTATED_TRACES = $(shell (echo $(SNITCH_DASM_TRACES) | sed 's/\.dasm/\.s/g')) +SNITCH_PERF_DUMPS = $(shell (echo $(SNITCH_DASM_TRACES) | sed 's/trace_hart/hart/g' | sed 's/.dasm/_perf.json/g')) + +TXT_TRACES += $(SNITCH_TXT_TRACES) +ANNOTATED_TRACES += $(SNITCH_ANNOTATED_TRACES) +PERF_DUMPS += $(SNITCH_PERF_DUMPS) +JOINT_PERF_DUMP = $(LOGS_DIR)/perf.json +ROI_DUMP = $(LOGS_DIR)/roi.json +VISUAL_TRACE = $(LOGS_DIR)/trace.json + +.PHONY: traces annotate visual-trace clean-traces clean-annotate clean-perf clean-visual-trace +traces: $(TXT_TRACES) +annotate: $(ANNOTATED_TRACES) +perf: $(JOINT_PERF_DUMP) +visual-trace: $(VISUAL_TRACE) +clean-traces: + rm -f $(TXT_TRACES) +clean-annotate: + rm -f $(ANNOTATED_TRACES) +clean-perf: + rm -f $(PERF_DUMPS) $(JOINT_PERF_DUMP) +clean-visual-trace: + rm -f $(VISUAL_TRACE) $(LOGS_DIR)/trace_hart_%.txt $(LOGS_DIR)/hart_%_perf.json: $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) $(DASM) < $< | $(GENTRACE_PY) --permissive -d $(LOGS_DIR)/hart_$*_perf.json > $(LOGS_DIR)/trace_hart_$*.txt @@ -230,14 +236,11 @@ $(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} $(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d -$(PERF_CSV): $(PERF_TRACES) $(PERF_CSV_PY) - $(PERF_CSV_PY) -o $@ -i $(PERF_TRACES) +$(JOINT_PERF_DUMP): $(PERF_DUMPS) $(JOIN_PY) + $(JOIN_PY) -i $(shell ls $(LOGS_DIR)/*_perf.json) -o $@ -$(EVENT_CSV): $(PERF_TRACES) $(PERF_CSV_PY) - $(PERF_CSV_PY) -o $@ -i $(PERF_TRACES) --filter tstart tend +$(ROI_DUMP): $(JOINT_PERF_DUMP) $(ROI_SPEC) $(ROI_PY) + $(ROI_PY) $(JOINT_PERF_DUMP) $(ROI_SPEC) --cfg $(CFG) -o $@ -$(TRACE_CSV): $(EVENT_CSV) $(LAYOUT_FILE) $(LAYOUT_EVENTS_PY) - $(LAYOUT_EVENTS_PY) $(LAYOUT_EVENTS_FLAGS) $(EVENT_CSV) $(LAYOUT_FILE) -o $@ - -$(TRACE_JSON): $(TRACE_CSV) $(EVENTVIS_PY) - $(EVENTVIS_PY) -o $@ $(TRACE_CSV) +$(VISUAL_TRACE): $(ROI_DUMP) $(VISUALIZE_PY) + $(VISUALIZE_PY) $(ROI_DUMP) --traces $(SNITCH_TXT_TRACES) --elf $(BINARY) -o $@ From d48a60c66bd718c985af1a7e5a76ab5aff5d8943 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 3 Apr 2024 13:42:46 +0200 Subject: [PATCH 2/8] trace: Refactor and prepare for DMA trace generation * bench: Refactor benchmarking utils to use JSON * docs: Add trace and benchmarking utilities --- .github/workflows/ci.yml | 14 +++ .gitlab-ci.yml | 8 ++ docs/rm/bench/join.md | 1 + docs/rm/bench/roi.md | 1 + docs/rm/bench/visualize.md | 1 + docs/rm/trace/annotate.md | 1 + docs/rm/trace/events.md | 1 + docs/rm/trace/gen_trace.md | 4 + mkdocs.yml | 7 ++ python-requirements.txt | 17 ++-- target/common/common.mk | 10 +- util/bench/__init__.py | 0 util/bench/join.py | 62 ++++++++++++ util/bench/roi.py | 123 +++++++++++++++++++++++ util/bench/tests/__init__.py | 0 util/bench/tests/test_data/data.json | 46 +++++++++ util/bench/tests/test_data/roi.json | 33 ++++++ util/bench/tests/test_data/spec.json | 16 +++ util/bench/tests/test_roi.py | 62 ++++++++++++ util/bench/visualize.py | 117 +++++++++++++++++++++ util/trace/annotate.py | 39 ++++--- util/trace/events.py | 9 +- util/trace/eventvis.py | 136 ------------------------- util/trace/gen_trace.py | 80 ++++++++------- util/trace/layout_events.py | 145 --------------------------- util/trace/perf_csv.py | 83 --------------- 26 files changed, 584 insertions(+), 432 deletions(-) create mode 100644 docs/rm/bench/join.md create mode 100644 docs/rm/bench/roi.md create mode 100644 docs/rm/bench/visualize.md create mode 100644 docs/rm/trace/annotate.md create mode 100644 docs/rm/trace/events.md create mode 100644 util/bench/__init__.py create mode 100755 util/bench/join.py create mode 100755 util/bench/roi.py create mode 100644 util/bench/tests/__init__.py create mode 100644 util/bench/tests/test_data/data.json create mode 100644 util/bench/tests/test_data/roi.json create mode 100644 util/bench/tests/test_data/spec.json create mode 100644 util/bench/tests/test_roi.py create mode 100755 util/bench/visualize.py delete mode 100755 util/trace/eventvis.py delete mode 100755 util/trace/layout_events.py delete mode 100755 util/trace/perf_csv.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d73348e8..4613be05d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,20 @@ jobs: - name: Build docs run: make docs + ##################### + # Python unit tests # + ##################### + + pytest: + name: Python unit tests + runs-on: ubuntu-22.04 + container: + image: ghcr.io/pulp-platform/snitch_cluster:main + steps: + - uses: actions/checkout@v2 + - name: Run pytest + run: pytest + ############################################## # Simulate SW on Snitch Cluster w/ Verilator # ############################################## diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6e8f45cb1..656558761 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -24,6 +24,14 @@ docs: script: - make docs +##################### +# Python unit tests # +##################### + +pytest: + script: + - pytest + ################################# # Build Snitch cluster software # ################################# diff --git a/docs/rm/bench/join.md b/docs/rm/bench/join.md new file mode 100644 index 000000000..ee9aa8221 --- /dev/null +++ b/docs/rm/bench/join.md @@ -0,0 +1 @@ +::: join \ No newline at end of file diff --git a/docs/rm/bench/roi.md b/docs/rm/bench/roi.md new file mode 100644 index 000000000..239fedf30 --- /dev/null +++ b/docs/rm/bench/roi.md @@ -0,0 +1 @@ +::: roi \ No newline at end of file diff --git a/docs/rm/bench/visualize.md b/docs/rm/bench/visualize.md new file mode 100644 index 000000000..b2c2bed8b --- /dev/null +++ b/docs/rm/bench/visualize.md @@ -0,0 +1 @@ +::: visualize \ No newline at end of file diff --git a/docs/rm/trace/annotate.md b/docs/rm/trace/annotate.md new file mode 100644 index 000000000..b70b1a847 --- /dev/null +++ b/docs/rm/trace/annotate.md @@ -0,0 +1 @@ +::: annotate \ No newline at end of file diff --git a/docs/rm/trace/events.md b/docs/rm/trace/events.md new file mode 100644 index 000000000..5b9cca4ae --- /dev/null +++ b/docs/rm/trace/events.md @@ -0,0 +1 @@ +::: events \ No newline at end of file diff --git a/docs/rm/trace/gen_trace.md b/docs/rm/trace/gen_trace.md index c15c297be..8eef24668 100644 --- a/docs/rm/trace/gen_trace.md +++ b/docs/rm/trace/gen_trace.md @@ -1 +1,5 @@ +<<<<<<< HEAD ::: gen_trace +======= +::: gen_trace +>>>>>>> c744477... trace: Refactor and prepare for DMA trace generation diff --git a/mkdocs.yml b/mkdocs.yml index 61e4494a9..2ccad29a7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,6 +28,7 @@ plugins: paths: - util/sim - util/trace + - util/bench - target/snitch_cluster/util - macros: on_error_fail: true @@ -67,6 +68,12 @@ nav: - rm/sim/Elf.md - Trace Utilities: - gen_trace.py: rm/trace/gen_trace.md + - annotate.py: rm/trace/annotate.md + - events.py: rm/trace/events.md + - Benchmarking Utilities: + - join.py: rm/bench/join.md + - roi.py: rm/bench/roi.md + - visualize.py: rm/bench/visualize.md - Snitch Target Utilities: - run.py: rm/snitch_target_utils/run.md - build.py: rm/snitch_target_utils/build.md diff --git a/python-requirements.txt b/python-requirements.txt index 583de9499..72c35460f 100644 --- a/python-requirements.txt +++ b/python-requirements.txt @@ -2,6 +2,7 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 +# Keep sorted. bin2coe dataclasses editorconfig-checker==2.3.51 @@ -13,18 +14,20 @@ json5 jsonref jsonschema mako +matplotlib mkdocs-material -progressbar2 -tabulate -yamllint -pyyaml -pytablewriter -termcolor pandas prettytable -pyelftools +progressbar2 psutil +pyelftools pyflexfloat +pytablewriter +pytest +pyyaml +tabulate +termcolor +yamllint -r docs/requirements.txt -r sw/dnn/requirements.txt diff --git a/target/common/common.mk b/target/common/common.mk index 9efad2e95..18f40ac45 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -36,9 +36,9 @@ ADDR2LINE ?= $(LLVM_BINROOT)/llvm-addr2line GENTRACE_PY ?= $(UTIL_DIR)/trace/gen_trace.py ANNOTATE_PY ?= $(UTIL_DIR)/trace/annotate.py EVENTS_PY ?= $(UTIL_DIR)/trace/events.py -PERF_CSV_PY ?= $(UTIL_DIR)/trace/perf_csv.py -LAYOUT_EVENTS_PY ?= $(UTIL_DIR)/trace/layout_events.py -EVENTVIS_PY ?= $(UTIL_DIR)/trace/eventvis.py +JOIN_PY ?= $(UTIL_DIR)/bench/join.py +ROI_PY ?= $(UTIL_DIR)/bench/roi.py +VISUALIZE_PY ?= $(UTIL_DIR)/bench/visualize.py # For some reason `$(VERILATOR_SEPP) which verilator` returns a # a two-liner with the OS on the first line, hence the tail -n1 @@ -225,8 +225,8 @@ clean-perf: clean-visual-trace: rm -f $(VISUAL_TRACE) -$(LOGS_DIR)/trace_hart_%.txt $(LOGS_DIR)/hart_%_perf.json: $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) - $(DASM) < $< | $(GENTRACE_PY) --permissive -d $(LOGS_DIR)/hart_$*_perf.json > $(LOGS_DIR)/trace_hart_$*.txt +$(addprefix $(LOGS_DIR)/,trace_hart_%.txt hart_%_perf.json): $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) + $(DASM) < $< | $(GENTRACE_PY) --permissive -d $(LOGS_DIR)/hart_$*_perf.json -o $(LOGS_DIR)/trace_hart_$*.txt # Generate source-code interleaved traces for all harts. Reads the binary from # the logs/.rtlbinary file that is written at start of simulation in the vsim script diff --git a/util/bench/__init__.py b/util/bench/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/util/bench/join.py b/util/bench/join.py new file mode 100755 index 000000000..56c0defe0 --- /dev/null +++ b/util/bench/join.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +"""Combines performance metrics from all threads into one JSON file. + +This script takes the performance metrics from multiple cores or DMA +engines, in JSON format as dumped by the [`events.py`][events] or +[`gen_trace.py`][gen_trace] scripts, and merges them into a single +JSON file for global inspection and further processing. +""" + +import sys +import argparse +import re +import json + + +FILENAME_REGEX = r'([a-z]+)_([0-9a-f]+)_perf.json' + + +def main(): + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument( + '-i', + '--inputs', + metavar='', + nargs='+', + help='Input performance metric dumps') + parser.add_argument( + '-o', + '--output', + metavar='', + nargs='?', + default='perf.json', + help='Output JSON file') + args = parser.parse_args() + + # Populate a list (one entry per hart) of dictionaries + # enumerating all the performance metrics for each hart + data = {} + for filename in sorted(args.inputs): + + # Get thread ID and type (DMA or hart) from filename + match = re.search(FILENAME_REGEX, filename) + typ = match.group(1) + idx = int(match.group(2), base=16) + + # Populate dictionary of metrics for the current hart + with open(filename, 'r') as f: + data[f'{typ}_{idx}'] = json.load(f) + + # Export data + with open(args.output, 'w') as f: + json.dump(data, f, indent=4) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/util/bench/roi.py b/util/bench/roi.py new file mode 100755 index 000000000..7e4b18723 --- /dev/null +++ b/util/bench/roi.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +"""Filters and labels execution regions for visualization. + +This script takes a JSON file of performance metrics, as output by +[`join.py`][join], and generates another JSON, where the execution +regions are filtered and labeled for visualization, according to an +auxiliary region-of-interest (ROI) specification file (JSON format). +The specification file can be a Mako template to parameterize +certain parameters, such as the number of clusters in the system. +The output JSON can be passed to the [`visualize.py`][visualize] +script for visualization. + +Check out `test_data/data.json` and `test_data/spec.json` for an +example input and specification file which can be fed as input to the +tool respectively. The corresponding output is contained in +`test_data/roi.json`. +""" + +import argparse +import json +import json5 +from mako.template import Template +import sys + + +def format_roi(roi, label): + return { + "label": label, + "tstart": roi["tstart"], + "tend": roi["tend"], + "attrs": {key: value for key, value in roi.items() if key not in ["tstart", "tend"]} + } + + +def get_roi(data, thread, idx): + thread_type, thread_idx = thread.split('_') + thread_idx = int(thread_idx) + try: + thread_data = data[thread] + except KeyError: + raise KeyError(f"Nonexistent thread {thread}") + if thread_type in ["hart", "dma"]: + try: + if thread_type == "hart": + return thread_data[idx] + elif thread_type == "dma": + return thread_data["transfers"][idx] + except IndexError: + raise IndexError(f"Thread {thread} does not contain region {idx}") + else: + raise ValueError(f"Unsupported thread type {thread_type}") + + +def filter_and_label_rois(data, spec): + output = {} + # Iterate all threads in the rendered specification + for thread_spec in spec: + thread = thread_spec['thread'] + output_rois = [] + # Iterate all ROIs to keep for the current thread + for roi in thread_spec['roi']: + output_roi = format_roi(get_roi(data, thread, roi['idx']), roi['label']) + output_rois.append(output_roi) + # Add ROIs for current thread to output, if any + if output_rois: + output[thread] = output_rois + return output + + +def load_json_inputs(input_path, spec_path, **kwargs): + # Read input JSON + with open(input_path, 'r') as f: + data = json5.load(f) + # Read and render specification template JSON + with open(spec_path, 'r') as f: + spec_template = Template(f.read()) + rendered_spec = spec_template.render(**kwargs) + spec = json5.loads(rendered_spec) + return data, spec + + +def main(): + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument( + 'input', + help='Input JSON file') + parser.add_argument( + 'spec', + help='ROI specification file (JSON format)') + parser.add_argument( + '--cfg', + help='Hardware configuration file used to render the specification file') + parser.add_argument( + '-o', + '--output', + nargs='?', + default='roi.json', + help='Output JSON file') + args = parser.parse_args() + + # Load hardware configuration + with open(args.cfg, 'r') as f: + cfg = json5.load(f) + + # Read and render input files + data, spec = load_json_inputs(args.input, args.spec, cfg=cfg) + + # Process inputs and generate output JSON + output = filter_and_label_rois(data, spec) + + # Write output to file + with open(args.output, 'w') as f: + json.dump(output, f, indent=4) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/util/bench/tests/__init__.py b/util/bench/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/util/bench/tests/test_data/data.json b/util/bench/tests/test_data/data.json new file mode 100644 index 000000000..77ca26416 --- /dev/null +++ b/util/bench/tests/test_data/data.json @@ -0,0 +1,46 @@ +{ + "hart_0": [ + { + "tstart": 1759.0, + "tend": 6802.0, + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + }, + { + "tstart": 6802.0, + "tend": 12647.0, + "fpss_fpu_occupancy": 0.013860369609856264, + "total_ipc": 0.20756331279945245 + } + ], + "dma_9": { + "aggregate_bw": 11.829313543599257, + "transfers": [ + { + "tstart": 3512, + "tend": 3526, + "bw": 1.1428571428571428 + }, + { + "tstart": 3564, + "tend": 3578, + "bw": 1.1428571428571428 + } + ] + }, + "dma_18": { + "aggregate_bw": 16.633245382585752, + "transfers": [ + { + "tstart": 3608, + "tend": 3622, + "bw": 1.1428571428571428 + }, + { + "tstart": 3660, + "tend": 3674, + "bw": 1.1428571428571428 + } + ] + } +} diff --git a/util/bench/tests/test_data/roi.json b/util/bench/tests/test_data/roi.json new file mode 100644 index 000000000..a6efe3773 --- /dev/null +++ b/util/bench/tests/test_data/roi.json @@ -0,0 +1,33 @@ +{ + "hart_0": [ + { + "label": "compute", + "tstart": 6802.0, + "tend": 12647.0, + "attrs": { + "fpss_fpu_occupancy": 0.013860369609856264, + "total_ipc": 0.20756331279945245 + } + } + ], + "dma_9": [ + { + "label": "dma_in", + "tstart": 3512, + "tend": 3526, + "attrs": { + "bw": 1.1428571428571428 + } + } + ], + "dma_18": [ + { + "label": "dma_in", + "tstart": 3608, + "tend": 3622, + "attrs": { + "bw": 1.1428571428571428 + } + } + ] +} diff --git a/util/bench/tests/test_data/spec.json b/util/bench/tests/test_data/spec.json new file mode 100644 index 000000000..ae58303c0 --- /dev/null +++ b/util/bench/tests/test_data/spec.json @@ -0,0 +1,16 @@ +[ + { + "thread": "hart_0", + "roi": [ + {"idx": 1, "label": "compute"} + ] + }, +% for i in range(0, num_clusters): + { + "thread": "${f'dma_{9*(i+1)}'}", + "roi": [ + {"idx": 0, "label": "dma_in"} + ] + }, +% endfor +] diff --git a/util/bench/tests/test_roi.py b/util/bench/tests/test_roi.py new file mode 100644 index 000000000..ffb567816 --- /dev/null +++ b/util/bench/tests/test_roi.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande + +import json +from pathlib import Path +import pytest +from bench.roi import get_roi, format_roi, load_json_inputs, filter_and_label_rois + +TEST_DATA_DIR = Path(__file__).resolve().parent / 'test_data' +INPUT_JSON = TEST_DATA_DIR / 'data.json' +SPEC_JSON = TEST_DATA_DIR / 'spec.json' +OUTPUT_JSON = TEST_DATA_DIR / 'roi.json' + + +def test_format_roi(): + label = "compute" + roi = { + "tstart": 1759.0, + "tend": 6802.0, + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + } + formatted_roi = { + "label": "compute", + "tstart": 1759.0, + "tend": 6802.0, + "attrs": { + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + }, + } + assert format_roi(roi, label) == formatted_roi + + +@pytest.mark.parametrize("thread, idx, roi", [ + ('hart_0', 0, { + "tstart": 1759.0, + "tend": 6802.0, + "fpss_fpu_occupancy": 0.006345429307951616, + "total_ipc": 0.04501288915328178 + }), + ('dma_9', 1, { + "tstart": 3564, + "tend": 3578, + "bw": 1.1428571428571428 + }) +]) +def test_get_roi(thread, idx, roi): + with open(INPUT_JSON, 'r') as f: + data = json.load(f) + assert get_roi(data, thread, idx) == roi + + +def test_filter_and_label_rois(): + data, spec = load_json_inputs(INPUT_JSON, SPEC_JSON, num_clusters=2) + with open(OUTPUT_JSON, 'r') as f: + output = json.load(f) + assert filter_and_label_rois(data, spec) == output diff --git a/util/bench/visualize.py b/util/bench/visualize.py new file mode 100755 index 000000000..087d8b86e --- /dev/null +++ b/util/bench/visualize.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +"""Translates a ROI JSON for visualization in Chrome. + +This script translates a JSON file, in the format produced by +[`roi.py`][roi], to a JSON file adhering to the syntax required by +Chrome's +[Trace-Viewer](https://github.com/catapult-project/catapult/tree/master/tracing). + +The output can be visualized in a Chrome browser: go to the +`about:tracing` URL and load the JSON file. + +This script can be compared to `trace/tracevis.py`, but instead of +visualizing individual instructions, it represents entire execution +regions as a whole. +""" + +import argparse +import json +from pathlib import Path +import sys + +sys.path.append(str(Path(__file__).parent / '../trace')) +import tracevis # noqa: E402 + + +# Converts nanoseconds to microseconds +def us(ns): + return ns / 1000 + + +def main(): + # Argument parsing + parser = argparse.ArgumentParser() + parser.add_argument( + 'input', + metavar='', + help='Input JSON file') + parser.add_argument( + '--traces', + metavar='', + nargs='*', + help='Simulation traces to process') + parser.add_argument( + '--elf', + nargs='?', + help='ELF from which the traces were generated') + parser.add_argument( + '-o', + '--output', + metavar='', + nargs='?', + default='trace.json', + help='Output JSON file') + args = parser.parse_args() + + # TraceViewer events + events = [] + + # Add a dummy instant event to mark time 0. + # This is to avoid that the events are shifted from + # their actual start times, as done to align the first event + # to time 0. + event = {'name': 'zero', + 'ph': 'I', # Instant event type + 'ts': 0, + 's': 'g' # Global scope + } + events.append(event) + + # Read JSON contents + with open(args.input) as f: + data = json.load(f) + + # Iterate threads + for thread, regions in data.items(): + + # Iterate execution regions for current thread + for region in regions: + + # Create TraceViewer event + ts = int(region['tstart']) + dur = int(region['tend']) - ts + event = { + 'name': region['label'], + 'ph': "X", # Complete event type + 'ts': us(ts), + 'dur': us(dur), + 'pid': 0, + 'tid': thread, + 'args': region['attrs'] + } + events.append(event) + + # Optionally extract also instruction-level events + # from the simulation traces + if args.traces and args.elf: + events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch', + addr2line='addr2line', use_time=True, pid=1, + cache=True, elf=args.elf, collapse_call_stack=True) + + # Create TraceViewer JSON object + tvobj = {} + tvobj['traceEvents'] = events + tvobj['displayTimeUnit'] = "ns" + + # Dump TraceViewer events to JSON file + with open(args.output, 'w') as f: + json.dump(tvobj, f, indent=4) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/util/trace/annotate.py b/util/trace/annotate.py index e14853bdb..243784a07 100755 --- a/util/trace/annotate.py +++ b/util/trace/annotate.py @@ -1,23 +1,30 @@ #!/usr/bin/env python3 - # Copyright 2021 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 - -# This script parses the traces generated by Snitch and creates an annotated -# trace that includes code sources -# Example output: -# ; snrt_hartid (team.c:14) -# ; in snrt_cluster_core_idx (team.c:47) -# ; in main (event_unit.c:21) -# ; asm("csrr %0, mhartid" : "=r"(hartid)); -# 80000048 x13=0000000a # csrr a3, mhartid -# -# If the -d/--diff option is specified, it instead outputs a (fictitious) diff -# file which allows to visualize the trace-source correlation side-by-side -# instead of interleaved. -# For neater visualization, feed the diff file into a diff visualization tool e.g.: -# kompare -o +"""Annotates an instruction trace with source-code information. + +This script parses a human-readable trace, as generated by CVA6 or +Snitch's [`gen_trace.py`][gen_trace] script, and annotates every +instruction in the trace with information on its originating +source-code. + +Example output: +``` + ; snrt_hartid (team.c:14) + ; in snrt_cluster_core_idx (team.c:47) + ; in main (event_unit.c:21) + ; asm("csrr %0, mhartid" : "=r"(hartid)); + 80000048 x13=0000000a # csrr a3, mhartid +``` + +By default, the source-code information is interleaved in the same +file with the instruction trace. If you prefer to have a +side-by-side view, the -d/--diff option can be used. In this case, +the tool outputs a (fictitious) diff file which can be fed into a +diff visualization tool for side-by-side visualization in a GUI, +e.g. `kompare -o `. +""" import sys import os diff --git a/util/trace/events.py b/util/trace/events.py index a655be033..c5442ee2a 100755 --- a/util/trace/events.py +++ b/util/trace/events.py @@ -3,12 +3,13 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 # -# This script takes a CVA6 or Snitch trace and it exports the simulation time -# of all mcycle CSR reads in a format compatible with the gen_trace.py -# script's JSON output. -# # Author: Luca Colagrande +"""Export `mcycle` CSR read events from a Snitch or CVA6 trace. +This script takes a CVA6 or Snitch trace and it exports the +simulation time of all `mcycle` CSR reads to a JSON file in a format +compatible with [`gen_trace.py`][gen_trace]'s output. +""" import sys import argparse diff --git a/util/trace/eventvis.py b/util/trace/eventvis.py deleted file mode 100755 index 4d0fdfdc7..000000000 --- a/util/trace/eventvis.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2020 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# This script takes a CSV of events, compatible with the CSV format produced by -# `perf_csv.py`, and creates a JSON file that can be visualized by -# [Trace-Viewer](https://github.com/catapult-project/catapult/tree/master/tracing) -# In Chrome, open `about:tracing` and load the JSON file to view it. -# -# Following is an example CSV containing two regions (as would be defined by the -# presence of one mcycle CSR read in the traces): -# -# , prepare data, , send interrupt, -# 0, 32906, 32911, 32911, 33662 -# -# The first line is used to assign a name to each region. -# Each of the following lines starts with the hartid, followed by the start and -# end timestamps of each region. -# While the alignment of the region names in the first line w.r.t. the following -# lines does not matter, we suggest to align them with the columns containing the -# start times of the respective regions (as in the example above). -# -# This script can be compared to `tracevis.py`, but instead of visualizing individual -# instructions, it visualizes coarser grained regions as delimited by events -# in the traces. -# -# Author: Luca Colagrande - -import sys -import argparse -import csv -import json -import tracevis - - -def pairwise(iterable): - "s -> (s0, s1), (s2, s3), (s4, s5), ..." - a = iter(iterable) - return zip(a, a) - - -# Converts nanoseconds to microseconds -def us(ns): - return ns / 1000 - - -def main(): - # Argument parsing - parser = argparse.ArgumentParser() - parser.add_argument( - 'csv', - metavar='', - help='Input CSV file') - parser.add_argument( - '--traces', - metavar='', - nargs='*', - help='Simulation traces to process') - parser.add_argument( - '--elf', - nargs='?', - help='ELF from which the traces were generated') - parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='events.json', - help='Output JSON file') - args = parser.parse_args() - - # TraceViewer events - events = [] - - # Add a dummy instant event to mark time 0. - # This is to avoid that the events are shifted from - # their actual start times to align the first event - # at time 0. - event = {'name': 'zero', - 'ph': 'I', # Instant event type - 'ts': 0, - 's': 'g' # Global scope - } - events.append(event) - - # Read CSV to collect TraceViewer events - with open(args.csv) as f: - reader = csv.reader(f, delimiter=',') - - # Get region names - regions = [name for name in next(reader) if name] - - # Process lines - for row in reader: - - # First entry in row is the hart ID - tid = row[0] - - # Start and end times of each region follow - for i, (start, end) in enumerate(pairwise(row[1:])): - - # Filter regions this hart does not take part in - if start: - - # Create TraceViewer event - ts = int(start) - dur = int(end) - ts - event = {'name': regions[i], - 'ph': "X", # Complete event type - 'ts': us(ts), - 'dur': us(dur), - 'pid': 0, - 'tid': tid - } - events.append(event) - - # Optionally extract also instruction-level events - # from the simulation traces - if args.traces and args.elf: - events += tracevis.parse_traces(args.traces, start=0, end=-1, fmt='snitch', - addr2line='addr2line', use_time=True, pid=1, - cache=True, elf=args.elf, collapse_call_stack=True) - - # Create TraceViewer JSON object - tvobj = {} - tvobj['traceEvents'] = events - tvobj['displayTimeUnit'] = "ns" - - # Dump TraceViewer events to JSON file - with open(args.output, 'w') as f: - json.dump(tvobj, f, indent=4) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/util/trace/gen_trace.py b/util/trace/gen_trace.py index fb3351ed2..145272382 100755 --- a/util/trace/gen_trace.py +++ b/util/trace/gen_trace.py @@ -924,6 +924,12 @@ def main(): ) parser.add_argument( '-o', + '--output', + required=True, + type=argparse.FileType('w'), + help='Path to the output file' + ) + parser.add_argument( '--offl', action='store_true', help='Annotate FPSS and sequencer offloads when they happen in core') @@ -951,42 +957,44 @@ def main(): args = parser.parse_args() line_iter = iter(args.infile.readline, b'') - # Prepare stateful data structures - time_info = None - gpr_wb_info = defaultdict(deque) - fpr_wb_info = defaultdict(deque) - fseq_info = { - 'curr_sec': 0, - 'fpss_pcs': deque(), - 'fseq_pcs': deque(), - 'cfg_buf': deque(), - 'curr_cfg': None - } - perf_metrics = [ - defaultdict(int) - ] # all values initially 0, also 'start' time of measurement 0 - perf_metrics[0]['start'] = None - # Parse input line by line - for line in line_iter: - if line: - ann_insn, time_info, empty = annotate_insn( - line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False, - time_info, args.offl, not args.saddr, args.permissive) - if perf_metrics[0]['start'] is None: - perf_metrics[0]['tstart'] = time_info[0] / 1000 - perf_metrics[0]['start'] = time_info[1] - if not empty: - print(ann_insn) - else: - break # Nothing more in pipe, EOF - perf_metrics[-1]['tend'] = time_info[0] / 1000 - perf_metrics[-1]['end'] = time_info[1] - # Compute metrics - eval_perf_metrics(perf_metrics) - # Emit metrics - print('\n## Performance metrics') - for idx in range(len(perf_metrics)): - print('\n' + fmt_perf_metrics(perf_metrics, idx, not args.allkeys)) + + with args.output as file: + # Prepare stateful data structures + time_info = None + gpr_wb_info = defaultdict(deque) + fpr_wb_info = defaultdict(deque) + fseq_info = { + 'curr_sec': 0, + 'fpss_pcs': deque(), + 'fseq_pcs': deque(), + 'cfg_buf': deque(), + 'curr_cfg': None + } + perf_metrics = [ + defaultdict(int) + ] # all values initially 0, also 'start' time of measurement 0 + perf_metrics[0]['start'] = None + # Parse input line by line + for line in line_iter: + if line: + ann_insn, time_info, empty = annotate_insn( + line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False, + time_info, args.offl, not args.saddr, args.permissive) + if perf_metrics[0]['start'] is None: + perf_metrics[0]['tstart'] = time_info[0] / 1000 + perf_metrics[0]['start'] = time_info[1] + if not empty: + print(ann_insn, file=file) + else: + break # Nothing more in pipe, EOF + perf_metrics[-1]['tend'] = time_info[0] / 1000 + perf_metrics[-1]['end'] = time_info[1] + # Compute metrics + eval_perf_metrics(perf_metrics) + # Emit metrics + print('\n## Performance metrics', file=file) + for idx in range(len(perf_metrics)): + print('\n' + fmt_perf_metrics(perf_metrics, idx, not args.allkeys), file=file) if args.dump_perf: with args.dump_perf as file: diff --git a/util/trace/layout_events.py b/util/trace/layout_events.py deleted file mode 100755 index 0d0e91435..000000000 --- a/util/trace/layout_events.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2020 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# This script takes a CSV of events, compatible with the CSV format produced by -# `perf_csv.py`, and creates another CSV of events, where the events are reordered based -# on a layout CSV file and labeled for viewing with the `eventvis.py` script. -# -# Following is an example CSV of events as output by `perf_csv.py`, -# which could be fed as input to this tool: -# -# , 0_tstart, 0_tend, 1_tstart, 1_tend, 2_tstart, 2_tend -# 0, 334, 10940, 10940, 10945, 10945, 10995 -# 1, 2654, 11061, 11061, 11172, 11172, 11189 -# 2, 2654, 11061, 11061, 11172, 11172, 11190 -# 3, 2654, 11061, 11061, 11172, 11172, 11191 -# -# This is an example layout CSV, which could be fed to the tool -# together with the previous CSV: -# -# , dma-in, compute, dma-out -# 0, 0, , -# "range(1,3)", , 1, -# 9, , , 2 -# -# To produce the following output: -# -# , dma_in, , compute, , dma_out, -# 0, 334, 10940, , , , -# 1, , , 11061, 11172, , -# 2, , , 11061, 11172, , -# 3, , , , , 11172, 11191 -# -# The output CSV can be fed directly to `eventvis.py`. -# -# Author: Luca Colagrande - -import sys -import argparse -import csv -import pandas as pd -from math import isnan -import hjson - - -def main(): - # Argument parsing - parser = argparse.ArgumentParser() - parser.add_argument( - 'csv', - metavar='', - help='Input CSV file') - parser.add_argument( - 'layout', - metavar='', - help='Layout CSV file') - parser.add_argument( - '--cfg', - type=str, - help='System configuration .hjson file') - parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='trace.csv', - help='Output CSV file') - args = parser.parse_args() - - # Read input CSV - df = pd.read_csv(args.csv) - - # Read system configuration .hjson file - cfg = None - with open(args.cfg) as cfg_file: - cfg = hjson.load(cfg_file) - - # Output CSV data - data = [] - columns = [] - - # Open layout CSV - with open(args.layout) as layout_f: - layout_reader = csv.reader(layout_f, delimiter=',') - - # Get region labels from layout header - regions = [label for label in next(layout_reader) if label and not label.isspace()] - - # Generate output columns: appropriately spaced region labels - columns = ['hartid'] + [val for label in regions for val in [label, '']] - - # Iterate layout rows - for row in layout_reader: - - # First entry in row is a hart ID or a Python expression - # which generates a list of hart IDs - expr = row[0] - code = compile(expr, "", "eval") - # Symbols must be added to globals to be used in list comprehensions - # see https://bugs.python.org/issue36300 - tids = eval(code, {'cfg': cfg}, {'cfg': cfg}) - if type(tids) == int: - tids = [tids] - - # Iterate hart IDs - for tid in tids: - - # Start output row with hart ID - orow = [tid] - - # Iterate all other cells in layout row (indices of regions to take) - for cell in row[1:]: - - # If the cell is not empty, get start and end times - # of the region from the input CSV and append them to the - # output row. Otherwise, leave cells empty. - if cell and not cell.isspace(): - reg_idx = int(cell) - row_idx = tid - col_idx = 1 + reg_idx * 2 - assert row_idx < df.shape[0], f'Hart ID {row_idx} out of bounds' - assert (col_idx + 1) < df.shape[1],\ - f'Region index {reg_idx} out of bounds for hart {tid}' - assert not isnan(df.iat[row_idx, col_idx]),\ - (f'Region {reg_idx} looks empty for hart {tid},' - f'check whether it was simulated') - orow.append(int(df.iat[row_idx, col_idx])) - orow.append(int(df.iat[row_idx, col_idx + 1])) - else: - orow.append('') - orow.append('') - - data.append(orow) - - # Create output dataframe and write to CSV - df = pd.DataFrame(data, columns=columns) - df.set_index('hartid', inplace=True) - df.sort_index(axis='index', inplace=True) - df.index.name = None - df.to_csv(args.output) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/util/trace/perf_csv.py b/util/trace/perf_csv.py deleted file mode 100755 index f26e242e2..000000000 --- a/util/trace/perf_csv.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2020 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 -# -# This script takes the performance metrics from all cores, in JSON format -# as dumped by the `events.py` or `gen_trace.py` scripts, and merges them -# into a single CSV file for global inspection. -# -# Author: Luca Colagrande - - -import sys -import argparse -import re -import json -import pandas as pd - - -HARTID_REGEX = r'hart_([0-9a-f]+)_perf.json' - - -def main(): - # Argument parsing - parser = argparse.ArgumentParser() - parser.add_argument( - '-i', - '--inputs', - metavar='', - nargs='+', - help='Input performance metric dumps') - parser.add_argument( - '-o', - '--output', - metavar='', - nargs='?', - default='perf.csv', - help='Output CSV file') - parser.add_argument( - '--filter', - nargs='*', - help='All and only performance metrics to include in the CSV') - args = parser.parse_args() - - dumps = sorted(args.inputs) - - # Populate a list (one entry per hart) of dictionaries - # enumerating all the performance metrics for each hart - data = [] - index = [] - for dump in dumps: - - # Get hart id from filename and append to index - hartid = int(re.search(HARTID_REGEX, dump).group(1), base=16) - index.append(hartid) - - # Populate dictionary of metrics for the current hart - hart_metrics = {} - with open(dump, 'r') as f: - hart_data = json.load(f) - - # Uniquefy names of performance metrics in each trace - # region by prepending the region index, and merge - # all region metrics in a single dictionary - for i, region in enumerate(hart_data): - - # If filter was provided on the command-line then filter out all - # perf metrics which were not listed - if args.filter: - region = {key: val for (key, val) in region.items() if key in args.filter} - - region_metrics = {f'{i}_{key}': val for (key, val) in region.items()} - hart_metrics.update(region_metrics) - - data.append(hart_metrics) - - # Export data - df = pd.DataFrame.from_records(data, index) - df.to_csv(args.output) - - -if __name__ == '__main__': - sys.exit(main()) From 1039d2056147e9ca4d7320e95d45adecca8afca8 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 3 Apr 2024 13:52:18 +0200 Subject: [PATCH 3/8] trace: Add functionality to parse DMA trace --- target/common/common.mk | 4 +- util/trace/gen_trace.py | 177 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 161 insertions(+), 20 deletions(-) diff --git a/target/common/common.mk b/target/common/common.mk index 18f40ac45..e2c19eeb3 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -225,8 +225,8 @@ clean-perf: clean-visual-trace: rm -f $(VISUAL_TRACE) -$(addprefix $(LOGS_DIR)/,trace_hart_%.txt hart_%_perf.json): $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) - $(DASM) < $< | $(GENTRACE_PY) --permissive -d $(LOGS_DIR)/hart_$*_perf.json -o $(LOGS_DIR)/trace_hart_$*.txt +$(addprefix $(LOGS_DIR)/,trace_hart_%.txt hart_%_perf.json dma_%_perf.json): $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) + $(DASM) < $< | $(GENTRACE_PY) --permissive --dma-trace $(SIM_DIR)/dma_trace_$*.log --dump-hart-perf $(LOGS_DIR)/hart_$*_perf.json --dump-dma-perf $(LOGS_DIR)/dma_$*_perf.json -o $(LOGS_DIR)/trace_hart_$*.txt # Generate source-code interleaved traces for all harts. Reads the binary from # the logs/.rtlbinary file that is written at start of simulation in the vsim script diff --git a/util/trace/gen_trace.py b/util/trace/gen_trace.py index 145272382..ad97359ef 100755 --- a/util/trace/gen_trace.py +++ b/util/trace/gen_trace.py @@ -3,13 +3,31 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 # -# Author: Paul Scheffler -# Luca Colagrande -"""Human-readable Snitch trace generator script. - -This script takes a trace generated for a Snitch hart and transforms the -additional decode stage info into meaningful annotation. It also counts -and computes various performance metrics up to each mcycle CSR read. +# Authors: Paul Scheffler +# Luca Colagrande +"""Script to generate human-readable instruction traces for Snitch. + +This script takes a trace generated by a Snitch hart +(see `snitch_cc.sv`) and transforms the additional decode stage info +into meaningful annotation. + +It also counts and computes various performance metrics for every +execution region. An execution region is a sequence of instructions. +Every `mcycle` CSR read instruction in your trace implicitly defines +two execution regions, comprising respectively: + +- all instructions executed before the read, up to the previous read +or the first executed instruction +- all instructions executed after the read, up to the next read or +the last executed instruction + +Performance metrics are appended at the end of the generated trace +and can optionally be dumped to a separate JSON file. + +It also computes various performance metrics for every DMA transfer, +provided that the Snitch core is equipped with a tightly-coupled DMA +engine, and the DMA trace logged during simulation is fed to the tool. +DMA performance metrics are dumped to a separate JSON file. """ # TODO: OPER_TYPES and FPU_OPER_TYPES could break: optimization might alter enum mapping @@ -19,9 +37,10 @@ import re import argparse import json +import ast from ctypes import c_int32, c_uint32 from collections import deque, defaultdict -import pathlib +from pathlib import Path EXTRA_WB_WARN = 'WARNING: {} transactions still in flight for {}.' @@ -319,7 +338,7 @@ def load_opcodes(): global _cached_opcodes opcode_file_name = 'opcodes-flt-occamy_CUSTOM.csv' - opcode_file_path = pathlib.Path(__file__).parent.absolute() / opcode_file_name + opcode_file_path = Path(__file__).parent.absolute() / opcode_file_name _cached_opcodes = {} with open(opcode_file_path, 'r') as f: @@ -497,6 +516,109 @@ def flt_lit(num: int, fmt: int, width: int = 6, vlen: int = 1) -> str: return floats[0] +# -------------------- DMA -------------------- + + +# We always assume dma_trans contains at least one incomplete placeholder DMA transaction. +# This incomplete transaction contains default settings. Only upon a DMCPY* instruction +# is the size of the transaction known, completing the transaction. At that point, a new +# incomplete transaction is created, inheriting the configuration settings from the previous +# transaction, which may or may not be overriden before the next DMCPY*. +def update_dma(insn, extras, dma_trans): + # Extract instruction mnemonic from full instruction decoding (includes operand registers) + MNEMONIC_REGEX = r'^([\w.]+)\s' + match = re.match(MNEMONIC_REGEX, insn) + if match: + mnemonic = match.group(1) + # Process DMA instruction + if mnemonic in ['dmsrc', 'dmdst', 'dmstr']: + pass + elif mnemonic == 'dmrep': + dma_trans[-1]['rep'] = extras['opa'] + elif mnemonic in ['dmcpy', 'dmcpyi']: + # Create new placeholder transaction to inherit current DMA settings + dma_trans.append(dma_trans[-1].copy()) + # Set size of the transaction + dma_trans[-2]['size'] = extras['opa'] + # Override repetition count if the transaction is configured to be 1D + config = extras['rs2'] + enable_2d = (config & 2) >> 1 + if not enable_2d: + dma_trans[-2]['rep'] = 1 + + +def eval_dma_metrics(dma_trans, dma_trace): + dma_trace = Path(dma_trace) + if dma_trace.exists(): + with open(dma_trace, 'r') as f: + # Initialize variables + compl_transfers = [] + outst_transfers = [] + req_transfer_idx = 0 + req_bytes = 0 + # Iterate lines in DMA trace + for line in f.readlines(): + dma = ast.literal_eval(line) + if 'backend_burst_req_valid' in dma: + # When the first burst in a transfer is granted, we record a new transfer in + # the outstanding transfers queue, with the information obtained from the core + # trace. We record the number of bytes moved by each burst in a transfer, and + # compare the total to the number of bytes moved by the transfer, to count how + # many bursts belong to the current DMA transfer (a number which is difficult + # to pre-compute from the core trace as it depends on address alignments, etc.) + if dma['backend_burst_req_valid'] and dma['backend_burst_req_ready']: + if req_bytes == 0: + n_bytes = dma_trans[req_transfer_idx]['rep'] * \ + dma_trans[req_transfer_idx]['size'] + outst_transfers.append({'tstart': dma['time'], + 'exp_bursts': 0, + 'rec_bursts': 0, + 'bytes': n_bytes}) + req_bytes += dma['backend_burst_req_num_bytes'] + outst_transfers[-1]['exp_bursts'] += 1 + # We move on to the next transfer when the bytes requested by the previous + # bursts match the current transfer size. + if req_bytes == outst_transfers[-1]['bytes']: + req_bytes = 0 + req_transfer_idx += 1 + # Upon a burst completion, we increment the received bursts count. When this + # count matches the expected bursts count of the current transfer we record the + # end time of the transfer and promote the transfer from the outstanding to the + # completed transfers' queue. + if dma['transfer_completed']: + outst_transfers[0]['rec_bursts'] += 1 + if outst_transfers[0]['rec_bursts'] == outst_transfers[0]['exp_bursts']: + outst_transfers[0]['tend'] = dma['time'] + compl_transfer = outst_transfers.pop(0) + compl_transfer.pop('exp_bursts') + compl_transfer.pop('rec_bursts') + compl_transfers.append(compl_transfer) + # Calculate bandwidth of individual transfers + for transfer in compl_transfers: + transfer['cycles'] = transfer['tend'] - transfer['tstart'] + transfer['bw'] = transfer['bytes'] / transfer['cycles'] + # Calculate aggregate bandwidth: total number of bytes transferred while any transfer is + # active (accounts for overlaps between transfers). + prev_trans_end = 0 + active_cycles = 0 + n_bytes = 0 + for transfer in compl_transfers: + # Calculate active cycles, without double-counting overlaps + curr_trans_start, curr_trans_end = transfer['tstart'], transfer['tend'] + if curr_trans_start > prev_trans_end: + active_cycles += curr_trans_end - curr_trans_start + else: + active_cycles += curr_trans_end - prev_trans_end + prev_trans_end = curr_trans_end + # Calculate total number of bytes + n_bytes += transfer['bytes'] + dma_metrics = {} + if active_cycles != 0: + dma_metrics['aggregate_bw'] = n_bytes / active_cycles + dma_metrics['transfers'] = compl_transfers + return dma_metrics + + # -------------------- FPU Sequencer -------------------- @@ -759,7 +881,8 @@ def annotate_insn( annot_fseq_offl: bool = False, # Annotate whenever core offloads to CPU on own line force_hex_addr: bool = True, - permissive: bool = True + permissive: bool = True, + dma_trans: list = [] ) -> (str, tuple, bool ): # Return time info, whether trace line contains no info, and fseq_len match = re.search(TRACE_IN_REGEX, line.strip('\n')) @@ -788,6 +911,7 @@ def annotate_insn( insn, pc_str = ('', '') else: perf_metrics[-1]['snitch_issues'] += 1 + update_dma(insn, extras, dma_trans) # Annotate sequencer elif extras['source'] == TRACE_SRCES['sequencer']: if extras['cbuf_push']: @@ -948,12 +1072,20 @@ def main(): '--permissive', action='store_true', help='Ignore some state-related issues when they occur') - parser.add_argument('-d', - '--dump-perf', - nargs='?', - metavar='file', - type=argparse.FileType('w'), - help='Dump performance metrics as json text.') + parser.add_argument( + '--dma-trace', + help='Path to a DMA trace file' + ) + parser.add_argument( + '--dump-hart-perf', + nargs='?', + type=argparse.FileType('w'), + help='Dump hart performance metrics as json text.' + ) + parser.add_argument( + '--dump-dma-perf', + help='Dump DMA performance metrics as json text.' + ) args = parser.parse_args() line_iter = iter(args.infile.readline, b'') @@ -970,6 +1102,7 @@ def main(): 'cfg_buf': deque(), 'curr_cfg': None } + dma_trans = [{'rep': 1}] perf_metrics = [ defaultdict(int) ] # all values initially 0, also 'start' time of measurement 0 @@ -995,10 +1128,18 @@ def main(): print('\n## Performance metrics', file=file) for idx in range(len(perf_metrics)): print('\n' + fmt_perf_metrics(perf_metrics, idx, not args.allkeys), file=file) + # Emit DMA metrics + if args.dma_trace: + dma_metrics = eval_dma_metrics(dma_trans, args.dma_trace) - if args.dump_perf: - with args.dump_perf as file: + # Dump hart performance metrics to JSON file + if args.dump_hart_perf: + with args.dump_hart_perf as file: file.write(json.dumps(perf_metrics, indent=4)) + # Dump DMA performance metrics to JSON file + if args.dump_dma_perf and dma_metrics is not None: + with open(args.dump_dma_perf, 'w') as file: + file.write(json.dumps(dma_metrics, indent=4)) # Check for any loose ends and warn before exiting seq_isns = len(fseq_info['fseq_pcs']) + len(fseq_info['cfg_buf']) From f4894ec6f9523972ae5037fed51476a88c4fc6f4 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 12 Jul 2024 20:54:09 +0200 Subject: [PATCH 4/8] hw: Enable DMA tracing again after iDMA introduction --- hw/snitch_cluster/src/snitch_cc.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/snitch_cluster/src/snitch_cc.sv b/hw/snitch_cluster/src/snitch_cc.sv index 1bb836e58..76a35113d 100644 --- a/hw/snitch_cluster/src/snitch_cc.sv +++ b/hw/snitch_cluster/src/snitch_cc.sv @@ -387,6 +387,7 @@ module snitch_cc #( .NumAxInFlight (DMANumAxInFlight), .DMAReqFifoDepth (DMAReqFifoDepth), .NumChannels (DMANumChannels), + .DMATracing (1), .axi_ar_chan_t (axi_ar_chan_t), .axi_aw_chan_t (axi_aw_chan_t), .axi_req_t (axi_req_t), From ddcdd43af92f3b1ac98e2a0e9bfa2ab55b46d7ab Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 17 Jul 2024 11:49:22 +0200 Subject: [PATCH 5/8] trace: Adapt DMA trace parsing after iDMA introduction --- target/common/common.mk | 2 +- util/trace/gen_trace.py | 79 ++++++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/target/common/common.mk b/target/common/common.mk index e2c19eeb3..da1e0423e 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -226,7 +226,7 @@ clean-visual-trace: rm -f $(VISUAL_TRACE) $(addprefix $(LOGS_DIR)/,trace_hart_%.txt hart_%_perf.json dma_%_perf.json): $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY) - $(DASM) < $< | $(GENTRACE_PY) --permissive --dma-trace $(SIM_DIR)/dma_trace_$*.log --dump-hart-perf $(LOGS_DIR)/hart_$*_perf.json --dump-dma-perf $(LOGS_DIR)/dma_$*_perf.json -o $(LOGS_DIR)/trace_hart_$*.txt + $(DASM) < $< | $(GENTRACE_PY) --permissive --dma-trace $(SIM_DIR)/dma_trace_$*_00000.log --dump-hart-perf $(LOGS_DIR)/hart_$*_perf.json --dump-dma-perf $(LOGS_DIR)/dma_$*_perf.json -o $(LOGS_DIR)/trace_hart_$*.txt # Generate source-code interleaved traces for all harts. Reads the binary from # the logs/.rtlbinary file that is written at start of simulation in the vsim script diff --git a/util/trace/gen_trace.py b/util/trace/gen_trace.py index ad97359ef..62b9737d9 100755 --- a/util/trace/gen_trace.py +++ b/util/trace/gen_trace.py @@ -554,49 +554,56 @@ def eval_dma_metrics(dma_trans, dma_trace): # Initialize variables compl_transfers = [] outst_transfers = [] - req_transfer_idx = 0 + transfer_idx = 0 + exp_bytes = 0 req_bytes = 0 + bursts_in_transfer = 0 + rec_bursts = 0 # Iterate lines in DMA trace for line in f.readlines(): dma = ast.literal_eval(line) - if 'backend_burst_req_valid' in dma: - # When the first burst in a transfer is granted, we record a new transfer in - # the outstanding transfers queue, with the information obtained from the core - # trace. We record the number of bytes moved by each burst in a transfer, and - # compare the total to the number of bytes moved by the transfer, to count how - # many bursts belong to the current DMA transfer (a number which is difficult - # to pre-compute from the core trace as it depends on address alignments, etc.) - if dma['backend_burst_req_valid'] and dma['backend_burst_req_ready']: - if req_bytes == 0: - n_bytes = dma_trans[req_transfer_idx]['rep'] * \ - dma_trans[req_transfer_idx]['size'] - outst_transfers.append({'tstart': dma['time'], - 'exp_bursts': 0, - 'rec_bursts': 0, - 'bytes': n_bytes}) - req_bytes += dma['backend_burst_req_num_bytes'] - outst_transfers[-1]['exp_bursts'] += 1 - # We move on to the next transfer when the bytes requested by the previous - # bursts match the current transfer size. - if req_bytes == outst_transfers[-1]['bytes']: + time = dma['meta']['time'] + # When the first burst in a transfer is granted, we record a new transfer in + # the outstanding transfers queue, with the information obtained from the core + # trace. We record the number of bytes moved by each burst in a transfer, and + # compare the total to the number of bytes moved by the transfer, to count how + # many bursts belong to the current DMA transfer (a number which is difficult + # to pre-compute from the core trace as it depends on address alignments, etc.) + if dma['backend']['req_valid'] and dma['backend']['req_ready']: + if req_bytes == 0: + exp_bytes = dma_trans[transfer_idx]['rep'] * \ + dma_trans[transfer_idx]['size'] + outst_transfers.append({'tstart': time, + 'bytes': exp_bytes}) + req_bytes += dma['backend']['req_length'] + bursts_in_transfer += 1 + # When the aggregate size of the issued bursts matches the size of the current + # transfer, we record the number of bursts in the transfer. This info is later + # needed to count responses and determine the end time of the transfer. + if req_bytes == exp_bytes: + outst_transfers[-1]['bursts'] = bursts_in_transfer + # Reset the state for the next transfer. req_bytes = 0 - req_transfer_idx += 1 - # Upon a burst completion, we increment the received bursts count. When this - # count matches the expected bursts count of the current transfer we record the - # end time of the transfer and promote the transfer from the outstanding to the - # completed transfers' queue. - if dma['transfer_completed']: - outst_transfers[0]['rec_bursts'] += 1 - if outst_transfers[0]['rec_bursts'] == outst_transfers[0]['exp_bursts']: - outst_transfers[0]['tend'] = dma['time'] - compl_transfer = outst_transfers.pop(0) - compl_transfer.pop('exp_bursts') - compl_transfer.pop('rec_bursts') - compl_transfers.append(compl_transfer) + bursts_in_transfer = 0 + transfer_idx += 1 + # Upon a burst completion, we increment the received bursts count. + if dma['backend']['rsp_valid'] and dma['backend']['rsp_ready']: + rec_bursts += 1 + # When the received bursts count matches the expected bursts for the current + # transfer we record the end time of the transfer and promote the transfer + # from the outstanding to the completed transfers' queue. The first response + # may arrive before the last request is issued. To allow for this condition + # we default to -1. + if rec_bursts == outst_transfers[0].get('bursts', -1): + outst_transfers[0]['tend'] = time + compl_transfers.append(outst_transfers.pop(0)) + # Reset the state for the next transfer. + rec_bursts = 0 # Calculate bandwidth of individual transfers for transfer in compl_transfers: transfer['cycles'] = transfer['tend'] - transfer['tstart'] - transfer['bw'] = transfer['bytes'] / transfer['cycles'] + if transfer['bytes'] > 0: + transfer['bw'] = transfer['bytes'] / transfer['cycles'] # Calculate aggregate bandwidth: total number of bytes transferred while any transfer is # active (accounts for overlaps between transfers). prev_trans_end = 0 @@ -1112,7 +1119,7 @@ def main(): if line: ann_insn, time_info, empty = annotate_insn( line, gpr_wb_info, fpr_wb_info, fseq_info, perf_metrics, False, - time_info, args.offl, not args.saddr, args.permissive) + time_info, args.offl, not args.saddr, args.permissive, dma_trans) if perf_metrics[0]['start'] is None: perf_metrics[0]['tstart'] = time_info[0] / 1000 perf_metrics[0]['start'] = time_info[1] From 10628d7c9aef6a1a53593d9efd2e71577f5033cb Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Tue, 23 Jul 2024 23:40:02 +0200 Subject: [PATCH 6/8] target: Align Verilator timescale with Questa --- target/common/common.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/target/common/common.mk b/target/common/common.mk index da1e0423e..aac1e5d59 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -79,6 +79,7 @@ VLT_SOURCES = $(shell ${BENDER} script flist ${VLT_BENDER} | ${SED_SRCS}) VLT_BUILDDIR := $(abspath work-vlt) VLT_FESVR = $(VLT_BUILDDIR)/riscv-isa-sim VLT_FLAGS += --timing +VLT_FLAGS += --timescale 1ns/1ps VLT_FLAGS += -Wno-BLKANDNBLK VLT_FLAGS += -Wno-LITENDIAN VLT_FLAGS += -Wno-CASEINCOMPLETE From 0a12023950e97c27c19250b5ddc89f438ba9a6d4 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 17 Jul 2024 14:17:40 +0200 Subject: [PATCH 7/8] treewide: Update iDMA to include tracer --- Bender.lock | 6 +++--- Bender.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Bender.lock b/Bender.lock index f7b6b0080..e6b40378a 100644 --- a/Bender.lock +++ b/Bender.lock @@ -7,7 +7,7 @@ packages: dependencies: - common_cells axi: - revision: 41859549968db0147062b6c3cdbe4af00080cc09 + revision: 4e54ac6766b160217a83a74d5a23af9bbf59e6ee version: null source: Git: https://github.com/pulp-platform/axi @@ -71,8 +71,8 @@ packages: dependencies: - common_cells idma: - revision: c12caf59bb482fe44b27361f6924ad346b2d22fe - version: 0.6.3 + revision: 0a0c113434aee743923d9e85631de009b4f00847 + version: null source: Git: https://github.com/pulp-platform/iDMA dependencies: diff --git a/Bender.yml b/Bender.yml index 660175daa..350f78c5a 100644 --- a/Bender.yml +++ b/Bender.yml @@ -27,7 +27,7 @@ dependencies: tech_cells_generic: { git: https://github.com/pulp-platform/tech_cells_generic, version: 0.2.11 } riscv-dbg: { git: https://github.com/pulp-platform/riscv-dbg, version: 0.8.0 } cluster_icache: { git: https://github.com/pulp-platform/cluster_icache.git, version: 0.1.0 } - idma: { git: https://github.com/pulp-platform/iDMA, version: 0.6.3 } + idma: { git: https://github.com/pulp-platform/iDMA, rev: snitch-tracing } export_include_dirs: - hw/reqrsp_interface/include From eafe83e15b37f3605d41ca8f7f5d5e214d4d33b1 Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Fri, 9 Aug 2024 12:57:42 +0200 Subject: [PATCH 8/8] target: Generate iDMA dependencies --- .github/workflows/ci.yml | 1 + .github/workflows/lint.yml | 1 + .gitlab-ci.yml | 9 +++++---- Makefile | 2 +- iis-setup.sh | 4 +--- target/common/common.mk | 5 ++--- target/snitch_cluster/Makefile | 10 ++++++++++ 7 files changed, 21 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4613be05d..6f5efe205 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,6 +56,7 @@ jobs: - name: Build Hardware working-directory: target/snitch_cluster run: | + pip install -r $(bender path idma)/requirements.txt make CFG_OVERRIDE=cfg/github-ci.hjson VLT_JOBS=1 bin/snitch_cluster.vlt - name: Run Tests working-directory: target/snitch_cluster diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 023c20fd8..cbe6372b7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -54,6 +54,7 @@ jobs: - name: Generate RTL sources working-directory: target/snitch_cluster run: | + pip install -r $(bender path idma)/requirements.txt make rtl # For some reason, the checkout is done by a different user, # than that running `git diff` (root, possibly due to Docker). diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 656558761..adf33c858 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,6 +15,7 @@ variables: before_script: source iis-setup.sh + pip install $($BENDER path idma)/requirements.txt ############## # Build docs # @@ -158,7 +159,7 @@ snitch-cluster-mchan-vsim: # Non-free # ############ -nonfree: - script: - - make nonfree - - make elab +# nonfree: +# script: +# - make nonfree +# - make elab diff --git a/Makefile b/Makefile index dcd0d6ef1..2d89360e3 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ ROOT = $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) ############ NONFREE_REMOTE ?= git@iis-git.ee.ethz.ch:pulp-restricted/snitch-cluster-nonfree.git -NONFREE_COMMIT ?= e30961e20a23a76442da27d2ba07c9fe20f3b575 +NONFREE_COMMIT ?= 214380a8c6af4cdd3d53a5b74cfaf462da877be2 NONFREE_DIR = $(ROOT)/nonfree all: nonfree diff --git a/iis-setup.sh b/iis-setup.sh index d56886c72..cf376b1a1 100755 --- a/iis-setup.sh +++ b/iis-setup.sh @@ -20,11 +20,9 @@ source .venv/bin/activate # occurring when the /tmp folder is filled by other processes. mkdir tmp TMPDIR=tmp pip install -r python-requirements.txt +TMPDIR=tmp pip install -r $($BENDER path idma)/requirements.txt rm -rf tmp -# Bender initialization -$BENDER vendor init - # Install spike-dasm mkdir tools/ cd tools/ diff --git a/target/common/common.mk b/target/common/common.mk index aac1e5d59..4c2b2c95b 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -47,9 +47,8 @@ VLT_ROOT ?= ${VERILATOR_ROOT} VLT_JOBS ?= $(shell nproc) VLT_NUM_THREADS ?= 1 -MATCH_END := '/+incdir+/ s/$$/\/*\/*/' -MATCH_BGN := 's/+incdir+//g' -SED_SRCS := sed -e ${MATCH_END} -e ${MATCH_BGN} +MATCH_REMOVE := 's/+incdir+\/[^ ]*//g' +SED_SRCS := sed -e ${MATCH_REMOVE} COMMON_BENDER_FLAGS += -t rtl diff --git a/target/snitch_cluster/Makefile b/target/snitch_cluster/Makefile index 60c49d8dc..552e169b5 100644 --- a/target/snitch_cluster/Makefile +++ b/target/snitch_cluster/Makefile @@ -151,9 +151,19 @@ include $(ROOT)/target/snitch_cluster/sw.mk # RTL # ####### +# Include iDMA Makefile to generate prerequisite iDMA sources +include $(shell $(BENDER) path idma)/idma.mk + GENERATED_RTL_SOURCES = $(PERIPH_DIR)/snitch_cluster_peripheral_reg_top.sv GENERATED_RTL_SOURCES += $(PERIPH_DIR)/snitch_cluster_peripheral_reg_pkg.sv GENERATED_RTL_SOURCES += $(GENERATED_DIR)/snitch_cluster_wrapper.sv +GENERATED_RTL_SOURCES += $(IDMA_ROOT)/target/rtl/idma_inst64_top.sv +GENERATED_RTL_SOURCES += $(IDMA_ROOT)/target/rtl/include/idma/tracer.svh + +# Add dependency on DMA header files +VSIM_SOURCES += $(IDMA_ROOT)/target/rtl/include/idma/tracer.svh +VLT_SOURCES += $(IDMA_ROOT)/target/rtl/include/idma/tracer.svh +VCS_SOURCES += $(IDMA_ROOT)/target/rtl/include/idma/tracer.svh .PHONY: rtl clean-rtl