From f7599ca0c67d54bb60dd7e07a0e44698c5613de2 Mon Sep 17 00:00:00 2001 From: IveanEx Date: Sat, 7 Dec 2024 19:59:21 +0100 Subject: [PATCH] Update Software + AXI Xbar Rule --- .../sim/sw/device/runtime/src/occamy_device.h | 9 ++- .../sim/sw/device/runtime/src/occamy_start.c | 5 +- target/sim/sw/host/apps/offload/src/offload.c | 10 +-- target/sim/sw/shared/runtime/chip_id.h | 4 ++ .../sw/shared/runtime/heterogeneous_runtime.h | 63 ++++++++++++++++++- target/sim_chip/apps/Makefile | 4 +- util/occamygen/occamy.py | 10 +-- 7 files changed, 87 insertions(+), 18 deletions(-) diff --git a/target/sim/sw/device/runtime/src/occamy_device.h b/target/sim/sw/device/runtime/src/occamy_device.h index dcc1fbde1..b769e8f48 100644 --- a/target/sim/sw/device/runtime/src/occamy_device.h +++ b/target/sim/sw/device/runtime/src/occamy_device.h @@ -55,14 +55,17 @@ inline void return_to_cva6(sync_t sync) { if (cnt == snrt_cluster_num()) { #endif *((volatile uint32_t*)barrier_ptr) = 0; - // Interrupt the local host to signal the exit code (snitch by default only has the access to local domain) - set_host_sw_interrupt(0); + // Interrupt the local host to signal the exit code (snitch by + // default only has the access to local domain) + comm_buffer_t* comm_buffer = get_communication_buffer(); + set_host_sw_interrupt(comm_buffer->chip_id); } } } // Otherwise assume cores are already synchronized and only // one core calls this function else { - set_host_sw_interrupt(0); + comm_buffer_t* comm_buffer = get_communication_buffer(); + set_host_sw_interrupt(comm_buffer->chip_id); } } diff --git a/target/sim/sw/device/runtime/src/occamy_start.c b/target/sim/sw/device/runtime/src/occamy_start.c index 00fca72ba..e8c33381f 100644 --- a/target/sim/sw/device/runtime/src/occamy_start.c +++ b/target/sim/sw/device/runtime/src/occamy_start.c @@ -26,7 +26,10 @@ static inline void snrt_exit_default(int exit_code); static inline void snrt_exit(int exit_code) { snrt_exit_default(exit_code); // Interrupt the local host to signal the exit code (snitch by default only has the access to local domain) - if (snrt_global_core_idx() == 0) set_host_sw_interrupt(0); + if (snrt_global_core_idx() == 0) { + comm_buffer_t* comm_buffer = get_communication_buffer(); + set_host_sw_interrupt(comm_buffer->chip_id); + } } #include "start.c" diff --git a/target/sim/sw/host/apps/offload/src/offload.c b/target/sim/sw/host/apps/offload/src/offload.c index 5dd4b24d6..e2f7af1be 100644 --- a/target/sim/sw/host/apps/offload/src/offload.c +++ b/target/sim/sw/host/apps/offload/src/offload.c @@ -55,19 +55,19 @@ int main() { reset_and_ungate_quadrants_all(target_chip_id); // print_str(current_chip_address_prefix, "[Occamy] Snitch ungated. \r\n"); deisolate_all(target_chip_id); - // print_str(current_chip_address_prefix, "[Occamy] Snitch deisolated. \r\n"); - // Enable interrupts to receive notice of job termination + // print_str(current_chip_address_prefix, "[Occamy] Snitch deisolated. + // \r\n"); Enable interrupts to receive notice of job termination enable_sw_interrupts(); // Program Snitch entry point and communication buffer - (*comm_buffer_ptr).lock = 0; - (*comm_buffer_ptr).chip_id = current_chip_id; + comm_buffer_ptr->lock = 0; + comm_buffer_ptr->chip_id = current_chip_id; program_snitches(target_chip_id, comm_buffer_ptr); // print_str(current_chip_address_prefix, // "[Occamy] Snitch Jump Address Programmed. \r\n"); // Compiler fence to ensure Snitch entry point is // programmed before Snitches are woken up - asm volatile("" ::: "memory"); + asm volatile("fence.i" ::: "memory"); print_str(current_chip_address_prefix, "[Occamy] Calling snitch cluster to execute the task \r\n"); diff --git a/target/sim/sw/shared/runtime/chip_id.h b/target/sim/sw/shared/runtime/chip_id.h index 04928e84f..52d6ab67c 100644 --- a/target/sim/sw/shared/runtime/chip_id.h +++ b/target/sim/sw/shared/runtime/chip_id.h @@ -37,6 +37,10 @@ inline uint8_t *get_chip_baseaddress(uint8_t chip_id) { #endif } +inline uint32_t get_chip_baseaddress_h(uint8_t chip_id) { + return (uint32_t)(chip_id << 8); +} + inline uint32_t get_current_chip_baseaddress_h() { uint32_t chip_id = get_current_chip_id(); return (uint32_t)(chip_id << 8); diff --git a/target/sim/sw/shared/runtime/heterogeneous_runtime.h b/target/sim/sw/shared/runtime/heterogeneous_runtime.h index a33ab31a7..96b3397da 100644 --- a/target/sim/sw/shared/runtime/heterogeneous_runtime.h +++ b/target/sim/sw/shared/runtime/heterogeneous_runtime.h @@ -22,26 +22,85 @@ typedef struct { /**************/ inline static void set_host_sw_interrupt(uint8_t chip_id) { +#if __riscv_xlen == 64 uint32_t* msip_ptr = (uint32_t*)(((uintptr_t)clint_msip_ptr(0)) | ((uintptr_t)get_chip_baseaddress(chip_id))); *msip_ptr = 1; +#elif __riscv_xlen == 32 + uint32_t* msip_ptr = clint_msip_ptr(0); + uint32_t target_addrh = get_chip_baseaddress_h(chip_id); + uint32_t current_addrh = get_current_chip_baseaddress_h(); + + register uint32_t reg_target_addrh asm("t0") = target_addrh; + register uint32_t reg_return_value asm("t1") = 1; + register uint32_t reg_msip_ptr asm("t2") = (uint32_t)msip_ptr; + register uint32_t reg_current_addrh asm("t3") = current_addrh; + + asm volatile( + "csrw 0xbc0, t0;" + "sw t1, 0(t2);" + "csrw 0xbc0, t3;" + : + : "r"(reg_target_addrh), "r"(reg_return_value), "r"(reg_msip_ptr), + "r"(reg_current_addrh) + : "memory"); +#endif } inline void clear_host_sw_interrupt_unsafe(uint8_t chip_id) { +#if __riscv_xlen == 64 uint32_t* msip_ptr = (uint32_t*)(((uintptr_t)clint_msip_ptr(0)) | ((uintptr_t)get_chip_baseaddress(chip_id))); - *msip_ptr = 0; +#elif __riscv_xlen == 32 + uint32_t* msip_ptr = clint_msip_ptr(0); + uint32_t target_addrh = get_chip_baseaddress_h(chip_id); + uint32_t current_addrh = get_current_chip_baseaddress_h(); + + register uint32_t reg_target_addrh asm("t0") = target_addrh; + register uint32_t reg_return_value asm("t1") = 0; + register uint32_t reg_msip_ptr asm("t2") = (uint32_t)msip_ptr; + register uint32_t reg_current_addrh asm("t3") = current_addrh; + + asm volatile( + "csrw 0xbc0, t0;" + "sw t1, 0(t2);" + "csrw 0xbc0, t3;" + : + : "r"(reg_target_addrh), "r"(reg_return_value), "r"(reg_msip_ptr), + "r"(reg_current_addrh) + : "memory"); +#endif } inline void wait_host_sw_interrupt_clear(uint8_t chip_id) { +#if __riscv_xlen == 64 uint32_t* msip_ptr = (uint32_t*)(((uintptr_t)clint_msip_ptr(0)) | ((uintptr_t)get_chip_baseaddress(chip_id))); - while (*msip_ptr); +#elif __riscv_xlen == 32 + uint32_t* msip_ptr = clint_msip_ptr(0); + uint32_t target_addrh = get_chip_baseaddress_h(chip_id); + uint32_t current_addrh = get_current_chip_baseaddress_h(); + + register uint32_t reg_target_addrh asm("t0") = target_addrh; + register uint32_t reg_value asm("t1"); + register uint32_t reg_msip_ptr asm("t2") = (uint32_t)msip_ptr; + register uint32_t reg_current_addrh asm("t3") = current_addrh; + + do { + asm volatile( + "csrw 0xbc0, t0;" + "lw t1, 0(t2);" + "csrw 0xbc0, t3;" + : "=r"(reg_value) + : "r"(reg_target_addrh), "r"(reg_msip_ptr), "r"(reg_current_addrh) + : "memory"); + } while (reg_value); +#endif } static inline void clear_host_sw_interrupt(uint8_t chip_id) { diff --git a/target/sim_chip/apps/Makefile b/target/sim_chip/apps/Makefile index 5a7bb53d5..72a2846a2 100644 --- a/target/sim_chip/apps/Makefile +++ b/target/sim_chip/apps/Makefile @@ -13,11 +13,11 @@ RISCV_READELF = $(CVA6_GCC_ROOT)/riscv64-unknown-elf-readelf ELFS = $(shell find $(TARGET)/sim/sw/host/apps -type f -name "*.elf") # Generate BIN filenames from the ELF filenames BINS = $(ELFS:.elf=.bin) -$(info BINS: $(BINS)) -.PHONY: apps clean +.PHONY: apps hex clean apps: $(BINS) + python3 bin2hex.py # Use full paths for the dependencies %.bin: %.elf diff --git a/util/occamygen/occamy.py b/util/occamygen/occamy.py index 8dc50948b..5be51ffaa 100644 --- a/util/occamygen/occamy.py +++ b/util/occamygen/occamy.py @@ -315,10 +315,10 @@ def am_connect_soc_wide_xbar_quad(am, am_soc_narrow_xbar, am_wide_xbar_quadrant_ "quadrant_{}_cluster_{}_tcdm".format(i, j), clusters_tcdm_size[j+1], *bases_cluster - ).attach_to( - am_wide_xbar_quadrant_s1[i] ).attach_to( am_narrow_xbar_quadrant_s1[i] + ).attach_to( + am_wide_xbar_quadrant_s1[i] ) ) @@ -330,11 +330,11 @@ def am_connect_soc_wide_xbar_quad(am, am_soc_narrow_xbar, am_wide_xbar_quadrant_ "quadrant_{}_cluster_{}_periph".format(i, j), clusters_periph_size[j+1], *bases_cluster - ).attach_to( - am_wide_xbar_quadrant_s1[i] ).attach_to( am_narrow_xbar_quadrant_s1[i] - ) + ) # .attach_to( + # am_wide_xbar_quadrant_s1[i] + # ) ) bases_cluster = list()