Skip to content

Commit

Permalink
runtime: Calculate addresses based on cluster id
Browse files Browse the repository at this point in the history
  • Loading branch information
fischeti committed Feb 16, 2024
1 parent 59a6415 commit 257494c
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 26 deletions.
2 changes: 1 addition & 1 deletion sw/blas/gemm/src/gemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ int gemm(precision_t prec, uint32_t expand, uint32_t setup_ssr,
frac_m, frac_n, n, prec);
} else if (k_tile == 0) {
snrt_dma_start_1d(local_c_partial,
(void*)snrt_zero_memory_ptr(),
(void*)snrt_zero_memory_ptr(snrt_cluster_idx()),
frac_m * frac_n * prec);
}
}
Expand Down
2 changes: 1 addition & 1 deletion sw/dnn/batchnorm/src/batchnorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ static inline void batchnorm_layer(const batchnorm_layer_t *l) {
uint32_t weights_size = l->CI;
uint32_t ofmap_size = 2 * l->IW * l->TILE_CI;

double *ptr = (double *)snrt_l1_start_addr();
double *ptr = (double *)snrt_l1_start_addr(cluster_id);
double *ifmap = ptr;
ptr += ifmap_size;
double *gamma = ptr;
Expand Down
5 changes: 3 additions & 2 deletions sw/snRuntime/src/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,14 @@ inline void *snrt_l3alloc(size_t size) {
inline void snrt_alloc_init() {
// Only one core per cluster has to initialize the L1 allocator
if (snrt_is_dm_core()) {
const uint32_t cluster_idx = snrt_cluster_idx();
// Initialize L1 allocator
// Note: at the moment the allocator assumes all of the TCDM is
// available for allocation. However, the CLS, TLS and stack already
// occupy a possibly significant portion.
snrt_l1_allocator()->base =
ALIGN_UP(snrt_l1_start_addr(), MIN_CHUNK_SIZE);
snrt_l1_allocator()->size = snrt_l1_end_addr() - snrt_l1_start_addr();
ALIGN_UP(snrt_l1_start_addr(cluster_idx), MIN_CHUNK_SIZE);
snrt_l1_allocator()->size = snrt_l1_end_addr(cluster_idx) - snrt_l1_start_addr(cluster_idx);
snrt_l1_allocator()->next = snrt_l1_allocator()->base;
// Initialize L3 allocator
extern uint32_t _edram;
Expand Down
4 changes: 2 additions & 2 deletions sw/snRuntime/src/cluster_interrupts.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
* @param mask set bit at X sets the interrupt of hart X
*/
inline void snrt_int_cluster_set(uint32_t mask) {
*(snrt_cluster_clint_set_ptr()) = mask;
*(snrt_cluster_clint_set_ptr(snrt_cluster_idx())) = mask;
}

/**
* @brief Write mask to the cluster-local interrupt clear register
* @param mask set bit at X clears the interrupt of hart X
*/
inline void snrt_int_cluster_clr(uint32_t mask) {
*(snrt_cluster_clint_clr_ptr()) = mask;
*(snrt_cluster_clint_clr_ptr(snrt_cluster_idx())) = mask;
}

/**
Expand Down
2 changes: 1 addition & 1 deletion sw/snRuntime/src/perf_cnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ typedef struct {
} perf_regs_t;

inline perf_regs_t* snrt_perf_counters() {
return (perf_regs_t*)snrt_cluster_perf_counters_addr();
return (perf_regs_t*)snrt_cluster_perf_counters_addr(snrt_cluster_idx());
}

// Enable a specific perf_counter
Expand Down
6 changes: 3 additions & 3 deletions sw/snRuntime/src/start.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ static inline void snrt_init_tls() {
size = (size_t)(&__tbss_end) - (size_t)(&__tbss_start);
for (int i = 0; i < snrt_cluster_core_num(); i++) {
snrt_dma_start_1d((void*)(tls_ptr + i * tls_offset),
(void*)(snrt_zero_memory_ptr()), size);
(void*)(snrt_zero_memory_ptr(snrt_cluster_idx())), size);
}
}

Expand All @@ -63,7 +63,7 @@ static inline void snrt_init_bss() {
if (snrt_cluster_idx() == 0 && snrt_is_dm_core()) {
size_t size = (size_t)(&__bss_end) - (size_t)(&__bss_start);
snrt_dma_start_1d_wideptr((uint64_t)(&__bss_start),
(uint64_t)(snrt_zero_memory_ptr()), size);
(uint64_t)(snrt_zero_memory_ptr(snrt_cluster_idx())), size);
}
}
#endif
Expand All @@ -87,7 +87,7 @@ static inline void snrt_init_cls() {
// Clear cbss section
ptr = (void*)((uint32_t)ptr + size);
size = (size_t)(&__cbss_end) - (size_t)(&__cbss_start);
snrt_dma_start_1d(ptr, (void*)(snrt_zero_memory_ptr()), size);
snrt_dma_start_1d(ptr, (void*)(snrt_zero_memory_ptr(snrt_cluster_idx())), size);
}
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion sw/tests/alias.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "snrt.h"

uint32_t cluster_global_to_local_address(uint32_t global_addr) {
return global_addr - snrt_l1_start_addr() + ALIAS_TCDM_BASE_ADDR;
return global_addr - snrt_l1_start_addr(snrt_cluster_idx()) + ALIAS_TCDM_BASE_ADDR;
}

const uint32_t n_inputs = 16;
Expand Down
2 changes: 1 addition & 1 deletion sw/tests/zero_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ int main() {
uint32_t n_inputs = 4;

// Get memory locations
uint32_t *zero_mem = (uint32_t *)snrt_zero_memory_ptr();
uint32_t *zero_mem = (uint32_t *)snrt_zero_memory_ptr(snrt_cluster_idx());
uint32_t *buffer_tcdm = snrt_l1_next();
uint32_t *buffer_golden = (snrt_l1_next() + 128);

Expand Down
33 changes: 19 additions & 14 deletions target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,35 @@
// snRuntime interface functions
//===============================================================

inline uint32_t __attribute__((const)) snrt_l1_start_addr() {
return CLUSTER_TCDM_START_ADDR;

inline uint32_t cluster_base_offset(uint32_t cluster_idx) {
return cluster_idx * SNRT_CLUSTER_OFFSET;
}

inline uint32_t snrt_l1_start_addr(uint32_t cluster_idx) {
return CLUSTER_TCDM_BASE_ADDR + cluster_base_offset(cluster_idx);
}

inline uint32_t __attribute__((const)) snrt_l1_end_addr() {
return CLUSTER_TCDM_END_ADDR;
inline uint32_t snrt_l1_end_addr(uint32_t cluster_idx) {
return CLUSTER_PERIPH_BASE_ADDR + cluster_base_offset(cluster_idx);
}

inline volatile uint32_t* __attribute__((const)) snrt_cluster_clint_set_ptr() {
return (uint32_t*)CLUSTER_CLINT_SET_ADDR;
inline volatile uint32_t* snrt_cluster_clint_set_ptr(uint32_t cluster_idx) {
return (uint32_t*)(CLUSTER_CLINT_SET_ADDR + cluster_base_offset(cluster_idx));
}

inline volatile uint32_t* __attribute__((const)) snrt_cluster_clint_clr_ptr() {
return (uint32_t*)CLUSTER_CLINT_CLR_ADDR;
inline volatile uint32_t* snrt_cluster_clint_clr_ptr(uint32_t cluster_idx) {
return (uint32_t*)(CLUSTER_CLINT_CLR_ADDR + cluster_base_offset(cluster_idx));
}

inline uint32_t __attribute__((const)) snrt_cluster_hw_barrier_addr() {
return CLUSTER_HW_BARRIER_ADDR;
inline uint32_t snrt_cluster_hw_barrier_addr(uint32_t cluster_idx) {
return CLUSTER_HW_BARRIER_ADDR + cluster_base_offset(cluster_idx);
}

inline uint32_t __attribute__((const)) snrt_cluster_perf_counters_addr() {
return CLUSTER_PERF_COUNTER_ADDR;
inline uint32_t snrt_cluster_perf_counters_addr(uint32_t cluster_idx) {
return CLUSTER_PERF_COUNTER_ADDR + cluster_base_offset(cluster_idx);
}

inline volatile uint32_t* __attribute__((const)) snrt_zero_memory_ptr() {
return (uint32_t*)CLUSTER_ZERO_MEM_START_ADDR;
inline volatile uint32_t* snrt_zero_memory_ptr(uint32_t cluster_idx) {
return (uint32_t*)(CLUSTER_ZERO_MEM_START_ADDR + cluster_base_offset(cluster_idx));
}

0 comments on commit 257494c

Please sign in to comment.