diff --git a/sw/snRuntime/api/cluster_interrupt_decls.h b/sw/snRuntime/api/cluster_interrupt_decls.h index aa18db210..00a6f0a43 100644 --- a/sw/snRuntime/api/cluster_interrupt_decls.h +++ b/sw/snRuntime/api/cluster_interrupt_decls.h @@ -6,6 +6,8 @@ inline void snrt_int_cluster_set(uint32_t mask); inline void snrt_int_cluster_clr(uint32_t mask); +inline void snrt_int_clr_mcip_unsafe(); + inline void snrt_int_clr_mcip(); inline void snrt_int_set_mcip(); diff --git a/sw/snRuntime/src/cluster_interrupts.h b/sw/snRuntime/src/cluster_interrupts.h index a3b15e8e0..ee2a36f87 100644 --- a/sw/snRuntime/src/cluster_interrupts.h +++ b/sw/snRuntime/src/cluster_interrupts.h @@ -2,6 +2,8 @@ // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 +#include "../../deps/riscv-opcodes/encoding.h" + /** * @brief Write mask to the cluster-local interrupt set register * @param mask set bit at X sets the interrupt of hart X @@ -18,10 +20,33 @@ inline void snrt_int_cluster_clr(uint32_t mask) { *(snrt_cluster_clint_clr_ptr()) = mask; } -inline void snrt_int_clr_mcip() { +/** + * @brief Clear MCIP interrupt + * @detail The interrupt is cleared asynchronously, i.e. it may not be cleared + * yet when the function returns. Use `snrt_int_clr_mcip()` or + * `snrt_int_wait_mcip_clr` if you need to block until the interrupt is + * cleared. + */ +inline void snrt_int_clr_mcip_unsafe() { snrt_int_cluster_clr(1 << snrt_cluster_core_idx()); } +/** + * @brief Wait for MCIP interrupt to be cleared + */ +inline void snrt_int_wait_mcip_clr() { + while (read_csr(mip) & MIP_MCIP) + ; +} + +/** + * @brief Clear MCIP interrupt and wait for the write to have effect + */ +inline void snrt_int_clr_mcip() { + snrt_int_clr_mcip_unsafe(); + snrt_int_wait_mcip_clr(); +} + inline void snrt_int_set_mcip() { snrt_int_cluster_set(1 << snrt_cluster_core_idx()); } diff --git a/sw/snRuntime/src/riscv.h b/sw/snRuntime/src/riscv.h index 47542daa3..faaf888b8 100644 --- a/sw/snRuntime/src/riscv.h +++ b/sw/snRuntime/src/riscv.h @@ -10,6 +10,8 @@ */ static inline void snrt_wfi() { asm volatile("wfi"); } +static inline void snrt_nop() { asm volatile("nop" : : :); } + static inline uint32_t snrt_mcycle() { uint32_t register r; asm volatile("csrr %0, mcycle" : "=r"(r) : : "memory"); diff --git a/sw/snRuntime/src/start.c b/sw/snRuntime/src/start.c index 8a692e921..3fb338f4a 100644 --- a/sw/snRuntime/src/start.c +++ b/sw/snRuntime/src/start.c @@ -2,13 +2,6 @@ // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 -static inline void snrt_crt0_cluster_hw_barrier() { - uint32_t register r; - uint32_t hw_barrier = - SNRT_CLUSTER_HW_BARRIER_ADDR + snrt_cluster_idx() * SNRT_CLUSTER_OFFSET; - asm volatile("lw %0, 0(%1)" : "=r"(r) : "r"(hw_barrier) : "memory"); -} - #ifdef SNRT_INIT_CLS static inline uint32_t snrt_cls_base_addr() { extern volatile uint32_t __cdata_start, __cdata_end; @@ -52,11 +45,9 @@ static inline void snrt_init_bss() { // Only one core needs to perform the initialization if (snrt_cluster_idx() == 0 && snrt_is_dm_core()) { - volatile uint32_t* p; - - for (p = (uint32_t*)(&__bss_start); p < (uint32_t*)(&__bss_end); p++) { - *p = 0; - } + size_t size = (size_t)(&__bss_end) - (size_t)(&__bss_start); + snrt_dma_start_1d_wideptr((uint64_t)(&__bss_start), + (uint64_t)(snrt_zero_memory_ptr()), size); } } #endif @@ -70,22 +61,17 @@ static inline void snrt_init_cls() { // Only one core per cluster has to do this if (snrt_is_dm_core()) { - volatile uint32_t* p; - volatile uint32_t* cls_ptr = (volatile uint32_t*)snrt_cls_base_addr(); + void* ptr = (void*)snrt_cls_base_addr(); + size_t size; // Copy cdata section to base of the TCDM - for (p = (uint32_t*)(&__cdata_start); p < (uint32_t*)(&__cdata_end); - p++) { - *cls_ptr = *p; - cls_ptr++; - } + size = (size_t)(&__cdata_end) - (size_t)(&__cdata_start); + if (size > 0) snrt_dma_start_1d(ptr, (void*)(&__cdata_start), size); // Clear cbss section - for (p = (uint32_t*)(&__cbss_start); p < (uint32_t*)(&__cbss_end); - p++) { - *cls_ptr = 0; - cls_ptr++; - } + ptr = (void*)((uint32_t)ptr + size); + size = (size_t)(&__cbss_end) - (size_t)(&__cbss_start); + snrt_dma_start_1d(ptr, (void*)(snrt_zero_memory_ptr()), size); } } #endif @@ -105,7 +91,6 @@ void snrt_main() { int exit_code = 0; #ifdef SNRT_CRT0_CALLBACK0 - snrt_crt0_callback0(); #endif @@ -129,6 +114,11 @@ void snrt_main() { snrt_init_cls(); #endif +#if defined(SNRT_INIT_BSS) || defined(SNRT_INIT_CLS) + // Single DMA wait call for both snrt_init_bss() and snrt_init_cls() + if (snrt_is_dm_core()) snrt_dma_wait_all(); +#endif + #ifdef SNRT_CRT0_CALLBACK3 snrt_crt0_callback3(); #endif @@ -142,7 +132,7 @@ void snrt_main() { #endif #ifdef SNRT_CRT0_PRE_BARRIER - snrt_crt0_cluster_hw_barrier(); + snrt_cluster_hw_barrier(); #endif #ifdef SNRT_CRT0_CALLBACK5 @@ -159,7 +149,7 @@ void snrt_main() { #endif #ifdef SNRT_CRT0_POST_BARRIER - snrt_crt0_cluster_hw_barrier(); + snrt_cluster_hw_barrier(); #endif #ifdef SNRT_CRT0_CALLBACK7 diff --git a/sw/snRuntime/src/team.h b/sw/snRuntime/src/team.h index 54a3b0aa2..918037e64 100644 --- a/sw/snRuntime/src/team.h +++ b/sw/snRuntime/src/team.h @@ -28,6 +28,11 @@ inline uint32_t __attribute__((const)) snrt_global_core_idx() { return snrt_hartid() - snrt_global_core_base_hartid(); } +inline uint32_t __attribute__((const)) snrt_global_compute_core_idx() { + return snrt_cluster_idx() * snrt_cluster_compute_core_num() + + snrt_cluster_core_idx(); +} + inline uint32_t __attribute__((const)) snrt_cluster_idx() { return snrt_global_core_idx() / snrt_cluster_core_num(); } diff --git a/target/common/common.mk b/target/common/common.mk index 3535fb156..9c469f5a6 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -6,15 +6,19 @@ LOGS_DIR ?= logs TB_DIR ?= $(SNITCH_ROOT)/target/common/test UTIL_DIR ?= $(SNITCH_ROOT)/util -# Support for local override +# External executables BENDER ?= bender DASM ?= spike-dasm VLT ?= verilator VERIBLE_FMT ?= verible-verilog-format +CLANG_FORMAT ?= clang-format + +# Internal executables BIN2JTAG ?= $(UTIL_DIR)/bin2jtag.py -ANNOTATE ?= $(UTIL_DIR)/trace/annotate.py GENTRACE ?= $(UTIL_DIR)/trace/gen_trace.py -CLANG_FORMAT ?= clang-format +ANNOTATE_PY ?= $(UTIL_DIR)/trace/annotate.py +EVENTS_PY ?= $(UTIL_DIR)/trace/events.py +PERF_CSV_PY ?= $(UTIL_DIR)/trace/perf_csv.py VERILATOR_ROOT ?= $(dir $(shell which $(VLT)))/../share/verilator VLT_ROOT ?= ${VERILATOR_ROOT} @@ -194,10 +198,10 @@ traces: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\. # make annotate # Generate source-code interleaved traces for all harts. Reads the binary from # the logs/.rtlbinary file that is written at start of simulation in the vsim script -$(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE} - $(PYTHON) ${ANNOTATE} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -$(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE} - $(PYTHON) ${ANNOTATE} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d +$(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} + $(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< +$(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY} + $(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d BINARY ?= $(shell cat $(LOGS_DIR)/.rtlbinary) annotate: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\.s/') || echo "") \ $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\.diff/') || echo "") diff --git a/target/snitch_cluster/Makefile b/target/snitch_cluster/Makefile index 9b346ba1a..f464697d1 100644 --- a/target/snitch_cluster/Makefile +++ b/target/snitch_cluster/Makefile @@ -266,12 +266,12 @@ bin/snitch_cluster.vcs: ${VCS_SOURCES} ${TB_SRCS} $(TB_CC_SOURCES) $(TB_ASM_SOUR ########## $(LOGS_DIR)/perf.csv: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/trace_hart/hart/' | sed 's/.dasm/_perf.json/')) \ - $(ROOT)/util/trace/perf_csv.py - $(PYTHON) $(ROOT)/util/trace/perf_csv.py -o $@ -i $(LOGS_DIR)/hart_*_perf.json + $(PERF_CSV_PY) + $(PYTHON) $(PERF_CSV_PY) -o $@ -i $(LOGS_DIR)/hart_*_perf.json $(LOGS_DIR)/event.csv: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/trace_hart/hart/' | sed 's/.dasm/_perf.json/')) \ - $(ROOT)/util/trace/perf_csv.py - $(PYTHON) $(ROOT)/util/trace/perf_csv.py -o $@ -i $(LOGS_DIR)/hart_*_perf.json --filter tstart tend + $(PERF_CSV_PY) + $(PYTHON) $(PERF_CSV_PY) -o $@ -i $(LOGS_DIR)/hart_*_perf.json --filter tstart tend ######## # Util # diff --git a/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c b/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c index f76f16508..48c08faa3 100644 --- a/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c +++ b/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.c @@ -11,3 +11,5 @@ extern volatile uint32_t* snrt_cluster_clint_set_ptr(); extern volatile uint32_t* snrt_cluster_clint_clr_ptr(); extern uint32_t snrt_cluster_hw_barrier_addr(); + +extern volatile uint32_t* snrt_zero_memory_ptr();