Skip to content

Commit

Permalink
snRuntime: Various improvements from offload study
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Sep 11, 2023
1 parent 8c5cf9e commit 5bd34b1
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 39 deletions.
2 changes: 2 additions & 0 deletions sw/snRuntime/api/cluster_interrupt_decls.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ inline void snrt_int_cluster_set(uint32_t mask);

inline void snrt_int_cluster_clr(uint32_t mask);

inline void snrt_int_clr_mcip_unsafe();

inline void snrt_int_clr_mcip();

inline void snrt_int_set_mcip();
27 changes: 26 additions & 1 deletion sw/snRuntime/src/cluster_interrupts.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#include "../../deps/riscv-opcodes/encoding.h"

/**
* @brief Write mask to the cluster-local interrupt set register
* @param mask set bit at X sets the interrupt of hart X
Expand All @@ -18,10 +20,33 @@ inline void snrt_int_cluster_clr(uint32_t mask) {
*(snrt_cluster_clint_clr_ptr()) = mask;
}

inline void snrt_int_clr_mcip() {
/**
* @brief Clear MCIP interrupt
* @detail The interrupt is cleared asynchronously, i.e. it may not be cleared
* yet when the function returns. Use `snrt_int_clr_mcip()` or
* `snrt_int_wait_mcip_clr` if you need to block until the interrupt is
* cleared.
*/
inline void snrt_int_clr_mcip_unsafe() {
snrt_int_cluster_clr(1 << snrt_cluster_core_idx());
}

/**
* @brief Wait for MCIP interrupt to be cleared
*/
inline void snrt_int_wait_mcip_clr() {
while (read_csr(mip) & MIP_MCIP)
;
}

/**
* @brief Clear MCIP interrupt and wait for the write to have effect
*/
inline void snrt_int_clr_mcip() {
snrt_int_clr_mcip_unsafe();
snrt_int_wait_mcip_clr();
}

inline void snrt_int_set_mcip() {
snrt_int_cluster_set(1 << snrt_cluster_core_idx());
}
2 changes: 2 additions & 0 deletions sw/snRuntime/src/riscv.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
*/
static inline void snrt_wfi() { asm volatile("wfi"); }

static inline void snrt_nop() { asm volatile("nop" : : :); }

static inline uint32_t snrt_mcycle() {
uint32_t register r;
asm volatile("csrr %0, mcycle" : "=r"(r) : : "memory");
Expand Down
44 changes: 17 additions & 27 deletions sw/snRuntime/src/start.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,6 @@
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

static inline void snrt_crt0_cluster_hw_barrier() {
uint32_t register r;
uint32_t hw_barrier =
SNRT_CLUSTER_HW_BARRIER_ADDR + snrt_cluster_idx() * SNRT_CLUSTER_OFFSET;
asm volatile("lw %0, 0(%1)" : "=r"(r) : "r"(hw_barrier) : "memory");
}

#ifdef SNRT_INIT_CLS
static inline uint32_t snrt_cls_base_addr() {
extern volatile uint32_t __cdata_start, __cdata_end;
Expand Down Expand Up @@ -52,11 +45,9 @@ static inline void snrt_init_bss() {

// Only one core needs to perform the initialization
if (snrt_cluster_idx() == 0 && snrt_is_dm_core()) {
volatile uint32_t* p;

for (p = (uint32_t*)(&__bss_start); p < (uint32_t*)(&__bss_end); p++) {
*p = 0;
}
size_t size = (size_t)(&__bss_end) - (size_t)(&__bss_start);
snrt_dma_start_1d_wideptr((uint64_t)(&__bss_start),
(uint64_t)(snrt_zero_memory_ptr()), size);
}
}
#endif
Expand All @@ -70,22 +61,17 @@ static inline void snrt_init_cls() {

// Only one core per cluster has to do this
if (snrt_is_dm_core()) {
volatile uint32_t* p;
volatile uint32_t* cls_ptr = (volatile uint32_t*)snrt_cls_base_addr();
void* ptr = (void*)snrt_cls_base_addr();
size_t size;

// Copy cdata section to base of the TCDM
for (p = (uint32_t*)(&__cdata_start); p < (uint32_t*)(&__cdata_end);
p++) {
*cls_ptr = *p;
cls_ptr++;
}
size = (size_t)(&__cdata_end) - (size_t)(&__cdata_start);
if (size > 0) snrt_dma_start_1d(ptr, (void*)(&__cdata_start), size);

// Clear cbss section
for (p = (uint32_t*)(&__cbss_start); p < (uint32_t*)(&__cbss_end);
p++) {
*cls_ptr = 0;
cls_ptr++;
}
ptr = (void*)((uint32_t)ptr + size);
size = (size_t)(&__cbss_end) - (size_t)(&__cbss_start);
snrt_dma_start_1d(ptr, (void*)(snrt_zero_memory_ptr()), size);
}
}
#endif
Expand All @@ -105,7 +91,6 @@ void snrt_main() {
int exit_code = 0;

#ifdef SNRT_CRT0_CALLBACK0

snrt_crt0_callback0();
#endif

Expand All @@ -129,6 +114,11 @@ void snrt_main() {
snrt_init_cls();
#endif

#if defined(SNRT_INIT_BSS) || defined(SNRT_INIT_CLS)
// Single DMA wait call for both snrt_init_bss() and snrt_init_cls()
if (snrt_is_dm_core()) snrt_dma_wait_all();
#endif

#ifdef SNRT_CRT0_CALLBACK3
snrt_crt0_callback3();
#endif
Expand All @@ -142,7 +132,7 @@ void snrt_main() {
#endif

#ifdef SNRT_CRT0_PRE_BARRIER
snrt_crt0_cluster_hw_barrier();
snrt_cluster_hw_barrier();
#endif

#ifdef SNRT_CRT0_CALLBACK5
Expand All @@ -159,7 +149,7 @@ void snrt_main() {
#endif

#ifdef SNRT_CRT0_POST_BARRIER
snrt_crt0_cluster_hw_barrier();
snrt_cluster_hw_barrier();
#endif

#ifdef SNRT_CRT0_CALLBACK7
Expand Down
5 changes: 5 additions & 0 deletions sw/snRuntime/src/team.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ inline uint32_t __attribute__((const)) snrt_global_core_idx() {
return snrt_hartid() - snrt_global_core_base_hartid();
}

inline uint32_t __attribute__((const)) snrt_global_compute_core_idx() {
return snrt_cluster_idx() * snrt_cluster_compute_core_num() +
snrt_cluster_core_idx();
}

inline uint32_t __attribute__((const)) snrt_cluster_idx() {
return snrt_global_core_idx() / snrt_cluster_core_num();
}
Expand Down
18 changes: 11 additions & 7 deletions target/common/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@ LOGS_DIR ?= logs
TB_DIR ?= $(SNITCH_ROOT)/target/common/test
UTIL_DIR ?= $(SNITCH_ROOT)/util

# Support for local override
# External executables
BENDER ?= bender
DASM ?= spike-dasm
VLT ?= verilator
VERIBLE_FMT ?= verible-verilog-format
CLANG_FORMAT ?= clang-format

# Internal executables
BIN2JTAG ?= $(UTIL_DIR)/bin2jtag.py
ANNOTATE ?= $(UTIL_DIR)/trace/annotate.py
GENTRACE ?= $(UTIL_DIR)/trace/gen_trace.py
CLANG_FORMAT ?= clang-format
ANNOTATE_PY ?= $(UTIL_DIR)/trace/annotate.py
EVENTS_PY ?= $(UTIL_DIR)/trace/events.py
PERF_CSV_PY ?= $(UTIL_DIR)/trace/perf_csv.py

VERILATOR_ROOT ?= $(dir $(shell which $(VLT)))/../share/verilator
VLT_ROOT ?= ${VERILATOR_ROOT}
Expand Down Expand Up @@ -194,10 +198,10 @@ traces: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\.
# make annotate
# Generate source-code interleaved traces for all harts. Reads the binary from
# the logs/.rtlbinary file that is written at start of simulation in the vsim script
$(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE}
$(PYTHON) ${ANNOTATE} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $<
$(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE}
$(PYTHON) ${ANNOTATE} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d
$(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY}
$(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $<
$(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY}
$(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d
BINARY ?= $(shell cat $(LOGS_DIR)/.rtlbinary)
annotate: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\.s/') || echo "") \
$(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/\.dasm/\.diff/') || echo "")
Expand Down
8 changes: 4 additions & 4 deletions target/snitch_cluster/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,12 @@ bin/snitch_cluster.vcs: ${VCS_SOURCES} ${TB_SRCS} $(TB_CC_SOURCES) $(TB_ASM_SOUR
##########

$(LOGS_DIR)/perf.csv: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/trace_hart/hart/' | sed 's/.dasm/_perf.json/')) \
$(ROOT)/util/trace/perf_csv.py
$(PYTHON) $(ROOT)/util/trace/perf_csv.py -o $@ -i $(LOGS_DIR)/hart_*_perf.json
$(PERF_CSV_PY)
$(PYTHON) $(PERF_CSV_PY) -o $@ -i $(LOGS_DIR)/hart_*_perf.json

$(LOGS_DIR)/event.csv: $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null | sed 's/trace_hart/hart/' | sed 's/.dasm/_perf.json/')) \
$(ROOT)/util/trace/perf_csv.py
$(PYTHON) $(ROOT)/util/trace/perf_csv.py -o $@ -i $(LOGS_DIR)/hart_*_perf.json --filter tstart tend
$(PERF_CSV_PY)
$(PYTHON) $(PERF_CSV_PY) -o $@ -i $(LOGS_DIR)/hart_*_perf.json --filter tstart tend

########
# Util #
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ extern volatile uint32_t* snrt_cluster_clint_set_ptr();
extern volatile uint32_t* snrt_cluster_clint_clr_ptr();

extern uint32_t snrt_cluster_hw_barrier_addr();

extern volatile uint32_t* snrt_zero_memory_ptr();

0 comments on commit 5bd34b1

Please sign in to comment.