diff --git a/sw/blas/gemm/src/gemm.h b/sw/blas/gemm/src/gemm.h
index bab6c339cc..db2870eb91 100644
--- a/sw/blas/gemm/src/gemm.h
+++ b/sw/blas/gemm/src/gemm.h
@@ -1361,7 +1361,7 @@ int gemm(precision_t prec, uint32_t expand, uint32_t setup_ssr,
                                                   frac_m, frac_n, n, prec);
                         } else if (k_tile == 0) {
                             snrt_dma_start_1d(local_c_partial,
-                                              (void*)snrt_zero_memory_ptr(),
+                                              (void*)snrt_zero_memory_ptr(snrt_cluster_idx()),
                                               frac_m * frac_n * prec);
                         }
                     }
diff --git a/sw/dnn/batchnorm/src/batchnorm.h b/sw/dnn/batchnorm/src/batchnorm.h
index 4c8b5adc10..a31429bbb6 100644
--- a/sw/dnn/batchnorm/src/batchnorm.h
+++ b/sw/dnn/batchnorm/src/batchnorm.h
@@ -78,7 +78,7 @@ static inline void batchnorm_layer(const batchnorm_layer_t *l) {
     uint32_t weights_size = l->CI;
     uint32_t ofmap_size = 2 * l->IW * l->TILE_CI;
 
-    double *ptr = (double *)snrt_l1_start_addr();
+    double *ptr = (double *)snrt_l1_start_addr(cluster_id);
     double *ifmap = ptr;
     ptr += ifmap_size;
     double *gamma = ptr;
diff --git a/sw/snRuntime/src/alloc.h b/sw/snRuntime/src/alloc.h
index 653137da14..aee76815d3 100644
--- a/sw/snRuntime/src/alloc.h
+++ b/sw/snRuntime/src/alloc.h
@@ -75,13 +75,14 @@ inline void *snrt_l3alloc(size_t size) {
 inline void snrt_alloc_init() {
     // Only one core per cluster has to initialize the L1 allocator
     if (snrt_is_dm_core()) {
+        const uint32_t cluster_idx = snrt_cluster_idx();
         // Initialize L1 allocator
         // Note: at the moment the allocator assumes all of the TCDM is
         // available for allocation. However, the CLS, TLS and stack already
         // occupy a possibly significant portion.
         snrt_l1_allocator()->base =
-            ALIGN_UP(snrt_l1_start_addr(), MIN_CHUNK_SIZE);
-        snrt_l1_allocator()->size = snrt_l1_end_addr() - snrt_l1_start_addr();
+            ALIGN_UP(snrt_l1_start_addr(cluster_idx), MIN_CHUNK_SIZE);
+        snrt_l1_allocator()->size = snrt_l1_end_addr(cluster_idx) - snrt_l1_start_addr(cluster_idx);
         snrt_l1_allocator()->next = snrt_l1_allocator()->base;
         // Initialize L3 allocator
         extern uint32_t _edram;
diff --git a/sw/snRuntime/src/cluster_interrupts.h b/sw/snRuntime/src/cluster_interrupts.h
index ee2a36f87b..1c908d21d0 100644
--- a/sw/snRuntime/src/cluster_interrupts.h
+++ b/sw/snRuntime/src/cluster_interrupts.h
@@ -9,7 +9,7 @@
  * @param mask set bit at X sets the interrupt of hart X
  */
 inline void snrt_int_cluster_set(uint32_t mask) {
-    *(snrt_cluster_clint_set_ptr()) = mask;
+    *(snrt_cluster_clint_set_ptr(snrt_cluster_idx())) = mask;
 }
 
 /**
@@ -17,7 +17,7 @@ inline void snrt_int_cluster_set(uint32_t mask) {
  * @param mask set bit at X clears the interrupt of hart X
  */
 inline void snrt_int_cluster_clr(uint32_t mask) {
-    *(snrt_cluster_clint_clr_ptr()) = mask;
+    *(snrt_cluster_clint_clr_ptr(snrt_cluster_idx())) = mask;
 }
 
 /**
diff --git a/sw/snRuntime/src/perf_cnt.h b/sw/snRuntime/src/perf_cnt.h
index 11157c08bf..c282656ffc 100644
--- a/sw/snRuntime/src/perf_cnt.h
+++ b/sw/snRuntime/src/perf_cnt.h
@@ -70,7 +70,7 @@ typedef struct {
 } perf_regs_t;
 
 inline perf_regs_t* snrt_perf_counters() {
-    return (perf_regs_t*)snrt_cluster_perf_counters_addr();
+    return (perf_regs_t*)snrt_cluster_perf_counters_addr(snrt_cluster_idx());
 }
 
 // Enable a specific perf_counter
diff --git a/sw/snRuntime/src/start.c b/sw/snRuntime/src/start.c
index 582e93b8e7..37bb7fc76c 100644
--- a/sw/snRuntime/src/start.c
+++ b/sw/snRuntime/src/start.c
@@ -47,7 +47,7 @@ static inline void snrt_init_tls() {
         size = (size_t)(&__tbss_end) - (size_t)(&__tbss_start);
         for (int i = 0; i < snrt_cluster_core_num(); i++) {
             snrt_dma_start_1d((void*)(tls_ptr + i * tls_offset),
-                              (void*)(snrt_zero_memory_ptr()), size);
+                              (void*)(snrt_zero_memory_ptr(snrt_cluster_idx())), size);
         }
     }
 
@@ -63,7 +63,7 @@ static inline void snrt_init_bss() {
     if (snrt_cluster_idx() == 0 && snrt_is_dm_core()) {
         size_t size = (size_t)(&__bss_end) - (size_t)(&__bss_start);
         snrt_dma_start_1d_wideptr((uint64_t)(&__bss_start),
-                                  (uint64_t)(snrt_zero_memory_ptr()), size);
+                                  (uint64_t)(snrt_zero_memory_ptr(snrt_cluster_idx())), size);
     }
 }
 #endif
@@ -87,7 +87,7 @@ static inline void snrt_init_cls() {
         // Clear cbss section
         ptr = (void*)((uint32_t)ptr + size);
         size = (size_t)(&__cbss_end) - (size_t)(&__cbss_start);
-        snrt_dma_start_1d(ptr, (void*)(snrt_zero_memory_ptr()), size);
+        snrt_dma_start_1d(ptr, (void*)(snrt_zero_memory_ptr(snrt_cluster_idx())), size);
     }
 }
 #endif
diff --git a/sw/tests/alias.c b/sw/tests/alias.c
index d27c96ce35..54b33e17a5 100644
--- a/sw/tests/alias.c
+++ b/sw/tests/alias.c
@@ -5,7 +5,7 @@
 #include "snrt.h"
 
 uint32_t cluster_global_to_local_address(uint32_t global_addr) {
-    return global_addr - snrt_l1_start_addr() + ALIAS_TCDM_BASE_ADDR;
+    return global_addr - snrt_l1_start_addr(snrt_cluster_idx()) + ALIAS_TCDM_BASE_ADDR;
 }
 
 const uint32_t n_inputs = 16;
diff --git a/sw/tests/zero_mem.c b/sw/tests/zero_mem.c
index 53a333712e..3ac4b47627 100644
--- a/sw/tests/zero_mem.c
+++ b/sw/tests/zero_mem.c
@@ -10,7 +10,7 @@ int main() {
     uint32_t n_inputs = 4;
 
     // Get memory locations
-    uint32_t *zero_mem = (uint32_t *)snrt_zero_memory_ptr();
+    uint32_t *zero_mem = (uint32_t *)snrt_zero_memory_ptr(snrt_cluster_idx());
     uint32_t *buffer_tcdm = snrt_l1_next();
     uint32_t *buffer_golden = (snrt_l1_next() + 128);
 
diff --git a/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.h b/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.h
index cb30390cc8..243ca6f4d1 100644
--- a/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.h
+++ b/target/snitch_cluster/sw/runtime/common/snitch_cluster_memory.h
@@ -30,30 +30,35 @@
 // snRuntime interface functions
 //===============================================================
 
-inline uint32_t __attribute__((const)) snrt_l1_start_addr() {
-    return CLUSTER_TCDM_START_ADDR;
+
+inline uint32_t cluster_base_offset(uint32_t cluster_idx) {
+    return cluster_idx * SNRT_CLUSTER_OFFSET;
+}
+
+inline uint32_t snrt_l1_start_addr(uint32_t cluster_idx) {
+    return CLUSTER_TCDM_BASE_ADDR + cluster_base_offset(cluster_idx);
 }
 
-inline uint32_t __attribute__((const)) snrt_l1_end_addr() {
-    return CLUSTER_TCDM_END_ADDR;
+inline uint32_t snrt_l1_end_addr(uint32_t cluster_idx) {
+    return CLUSTER_PERIPH_BASE_ADDR + cluster_base_offset(cluster_idx);
 }
 
-inline volatile uint32_t* __attribute__((const)) snrt_cluster_clint_set_ptr() {
-    return (uint32_t*)CLUSTER_CLINT_SET_ADDR;
+inline volatile uint32_t* snrt_cluster_clint_set_ptr(uint32_t cluster_idx) {
+    return (uint32_t*)(CLUSTER_CLINT_SET_ADDR + cluster_base_offset(cluster_idx));
 }
 
-inline volatile uint32_t* __attribute__((const)) snrt_cluster_clint_clr_ptr() {
-    return (uint32_t*)CLUSTER_CLINT_CLR_ADDR;
+inline volatile uint32_t* snrt_cluster_clint_clr_ptr(uint32_t cluster_idx) {
+    return (uint32_t*)(CLUSTER_CLINT_CLR_ADDR + cluster_base_offset(cluster_idx));
 }
 
-inline uint32_t __attribute__((const)) snrt_cluster_hw_barrier_addr() {
-    return CLUSTER_HW_BARRIER_ADDR;
+inline uint32_t snrt_cluster_hw_barrier_addr(uint32_t cluster_idx) {
+    return CLUSTER_HW_BARRIER_ADDR + cluster_base_offset(cluster_idx);
 }
 
-inline uint32_t __attribute__((const)) snrt_cluster_perf_counters_addr() {
-    return CLUSTER_PERF_COUNTER_ADDR;
+inline uint32_t snrt_cluster_perf_counters_addr(uint32_t cluster_idx) {
+    return CLUSTER_PERF_COUNTER_ADDR + cluster_base_offset(cluster_idx);
 }
 
-inline volatile uint32_t* __attribute__((const)) snrt_zero_memory_ptr() {
-    return (uint32_t*)CLUSTER_ZERO_MEM_START_ADDR;
+inline volatile uint32_t* snrt_zero_memory_ptr(uint32_t cluster_idx) {
+    return (uint32_t*)(CLUSTER_ZERO_MEM_START_ADDR + cluster_base_offset(cluster_idx));
 }