Skip to content

Commit

Permalink
Update Offload Function + Host Runtime (#98)
Browse files Browse the repository at this point in the history
* Update Offload Function + Host Runtime

* Fix MultiChip Bug

* Update Software + AXI Xbar Rule

* New Offload_Multichip Function

* Split the offload and offload_multichip program to pass ci tests
  • Loading branch information
IveanEx authored Dec 12, 2024
1 parent 7c5e0f9 commit baf6611
Show file tree
Hide file tree
Showing 22 changed files with 1,254 additions and 894 deletions.
25 changes: 16 additions & 9 deletions target/rtl/bootrom/src/bootrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ void delay_cycles(uint64_t cycle) {

// Boot modes.
enum boot_mode_t {
HALT,
TARGET_CHIPID,
UART,
COPY_TO_REMOTE,
Expand Down Expand Up @@ -67,16 +68,17 @@ void bootrom() {
print_u8(address_prefix, target_chip_id);
print_str(address_prefix,
"\r\n\t Enter the number to select the mode: ");
print_str(address_prefix, "\r\n\t 1. Change the target remote Chip ID");
print_str(address_prefix, "\r\n\t 2. Load from UART to 0x");
print_str(address_prefix, "\r\n\t 1. Halt the CVA6 Core");
print_str(address_prefix, "\r\n\t 2. Change the target remote Chip ID");
print_str(address_prefix, "\r\n\t 3. Load from UART to 0x");
print_u48(address_prefix, remote_chip_mem_start_address);
print_str(address_prefix,
"\r\n\t 3. Copy memory from local chip to remote chip");
"\r\n\t 4. Copy memory from local chip to remote chip");
print_str(address_prefix,
"\r\n\t 4. Copy memory from remote chip to local chip");
print_str(address_prefix, "\r\n\t 5. Print memory from 0x");
"\r\n\t 5. Copy memory from remote chip to local chip");
print_str(address_prefix, "\r\n\t 6. Print memory from 0x");
print_u48(address_prefix, remote_chip_mem_start_address);
print_str(address_prefix, "\r\n\t 6. Continue to Boot from 0x");
print_str(address_prefix, "\r\n\t 7. Continue to Boot from 0x");
print_u48(address_prefix, local_chip_mem_start_address);
print_str(address_prefix, "\r\n");

Expand All @@ -85,19 +87,24 @@ void bootrom() {
char* cur = 0;

switch (boot_mode) {
case HALT:
print_str(address_prefix, "\r\n\t CVA6 Core is Halted. ");
getchar(address_prefix);
__asm__ volatile("wfi");
break;
case TARGET_CHIPID:
print_str(address_prefix,
"\r\n\t Enter the target remote Chip ID: ");
scan_uart(address_prefix, in_buf);
cur = in_buf;
target_chip_id = 0;
while (*cur != '\0') {
if (*cur >= '0' || *cur <= '9') {
if (*cur >= '0' && *cur <= '9') {
target_chip_id = (target_chip_id << 4) + *cur - '0';
} else if (*cur >= 'A' || *cur <= 'F') {
} else if (*cur >= 'A' && *cur <= 'F') {
target_chip_id =
(target_chip_id << 4) + *cur - 'A' + 10;
} else if (*cur >= 'a' || *cur <= 'f') {
} else if (*cur >= 'a' && *cur <= 'f') {
target_chip_id =
(target_chip_id << 4) + *cur - 'a' + 10;
} else {
Expand Down
1 change: 0 additions & 1 deletion target/sim/sw/device/apps/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ INCDIRS += $(SNRT_DIR)/../math/include
BASE_LD = $(abspath $(SNRT_DIR)/base.ld)
MEMORY_LD = $(abspath $(APPSDIR)/memory.ld)
ORIGIN_LD = $(abspath $(BUILDDIR)/origin.ld)
BASE_LD = $(abspath $(SNRT_DIR)/base.ld)
SNRT_LIB_DIR = $(abspath $(RUNTIME_DIR)/build/)
SNRT_LIB_NAME = snRuntime
SNRT_LIB = $(realpath $(SNRT_LIB_DIR)/lib$(SNRT_LIB_NAME).a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int main() {
if (snrt_is_dm_core()) {
tcdm0_start_addr = (uint64_t)snrt_cluster_base_addrl();
tcdm0_start_addr += (uint64_t)snrt_cluster_base_addrh() << 32;
printf("The C0 TCDM ADDR is %p%p \n",
printf("The C0 TCDM ADDR is %p%p \r\n",
(uint8_t*)(tcdm0_start_addr >> 32),
(uint8_t*)tcdm0_start_addr);
}
Expand All @@ -37,7 +37,7 @@ int main() {
if (snrt_is_dm_core()) {
tcdm1_start_addr = (uint64_t)snrt_cluster_base_addrl();
tcdm1_start_addr += (uint64_t)snrt_cluster_base_addrh() << 32;
printf("The C1 TCDM ADDR is %p%p \n",
printf("The C1 TCDM ADDR is %p%p \r\n",
(uint8_t*)(tcdm1_start_addr >> 32),
(uint8_t*)tcdm1_start_addr);
}
Expand All @@ -48,7 +48,7 @@ int main() {
if (snrt_is_dm_core()) {
test_data_start_addr = (uint64_t)test_data;
test_data_start_addr += (uint64_t)snrt_cluster_base_addrh() << 32;
printf("[C0] Start to load data from %p%p \n",
printf("[C0] Start to load data from %p%p \r\n",
(uint8_t*)(test_data_start_addr >> 32),
(uint8_t*)test_data_start_addr);
snrt_dma_start_1d_wideptr(tcdm0_start_addr, test_data_start_addr,
Expand All @@ -62,7 +62,7 @@ int main() {
// Thenc C1 fetches data from C0
if (snrt_cluster_idx() == 1) {
if (snrt_is_dm_core()) {
printf("[C1] Start to load data from %p%p \n",
printf("[C1] Start to load data from %p%p \r\n",
(uint8_t*)(tcdm0_start_addr >> 32),
(uint8_t*)tcdm0_start_addr);
snrt_dma_start_1d_wideptr(tcdm1_start_addr, tcdm0_start_addr,
Expand All @@ -76,12 +76,12 @@ int main() {
// Start to check
if (snrt_cluster_idx() == 0) {
if (snrt_cluster_core_idx() == 0) {
printf("C0 Checking the results\n");
printf("C0 Checking the results\r\n");
for (int i = 0; i < length_data; i++) {
if (((int8_t*)tcdm0_start_addr)[i] != test_data[i]) {
err++;
printf("C0 data is incorrect!\n");
printf("tcdm0[%d]=%d, test_data[%d]=%d\n", i,
printf("C0 data is incorrect!\r\n");
printf("tcdm0[%d]=%d, test_data[%d]=%d\r\n", i,
((int8_t*)tcdm0_start_addr)[i], i, test_data[i]);
return -1;
}
Expand All @@ -91,12 +91,12 @@ int main() {
snrt_global_barrier();
if (snrt_cluster_idx() == 1) {
if (snrt_cluster_core_idx() == 0) {
printf("C1 Checking the results\n");
printf("C1 Checking the results\r\n");
for (int i = 0; i < length_data; i++) {
if (((int8_t*)tcdm1_start_addr)[i] != test_data[i]) {
err++;
printf("C1 data is incorrect!\n");
printf("tcdm0[%d]=%d, test_data[%d]=%d\n", i,
printf("C1 data is incorrect!\r\n");
printf("tcdm0[%d]=%d, test_data[%d]=%d\r\n", i,
((int8_t*)tcdm1_start_addr)[i], i, test_data[i]);
return -1;
}
Expand All @@ -107,7 +107,7 @@ int main() {
snrt_global_barrier();
if (snrt_cluster_idx() == 0) {
if (snrt_is_dm_core()) {
printf("Checking all done! No error!\n");
printf("Checking all done! No error!\r\n");
}
}

Expand Down
9 changes: 6 additions & 3 deletions target/sim/sw/device/runtime/src/occamy_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,17 @@ inline void return_to_cva6(sync_t sync) {
if (cnt == snrt_cluster_num()) {
#endif
*((volatile uint32_t*)barrier_ptr) = 0;
// Interrupt the local host to signal the exit code (snitch by default only has the access to local domain)
set_host_sw_interrupt(0);
// Interrupt the local host to signal the exit code (snitch by
// default only has the access to local domain)
comm_buffer_t* comm_buffer = get_communication_buffer();
set_host_sw_interrupt(comm_buffer->chip_id);
}
}
}
// Otherwise assume cores are already synchronized and only
// one core calls this function
else {
set_host_sw_interrupt(0);
comm_buffer_t* comm_buffer = get_communication_buffer();
set_host_sw_interrupt(comm_buffer->chip_id);
}
}
5 changes: 4 additions & 1 deletion target/sim/sw/device/runtime/src/occamy_start.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ static inline void snrt_exit_default(int exit_code);
static inline void snrt_exit(int exit_code) {
snrt_exit_default(exit_code);
// Interrupt the local host to signal the exit code (snitch by default only has the access to local domain)
if (snrt_global_core_idx() == 0) set_host_sw_interrupt(0);
if (snrt_global_core_idx() == 0) {
comm_buffer_t* comm_buffer = get_communication_buffer();
set_host_sw_interrupt(comm_buffer->chip_id);
}
}

#include "start.c"
6 changes: 6 additions & 0 deletions target/sim/sw/host/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@

# Add user applications to APPS variable
APPS = hello_world
ifneq ($(findstring chiplet,$(CFG_OVERRIDE)),)
# If chiplet cfg is used, offloaf_multichip is compiled with the support to execute applications on cores at different chips
APPS += offload_multichip
else
# Otherwise, simple offload is compiled, which is mainly used for ci.
APPS += offload
endif

TARGET ?= all

Expand Down
3 changes: 2 additions & 1 deletion target/sim/sw/host/apps/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ finalize-build: $(FINAL_OUTPUTS)
.PHONY: clean
clean:
rm -rf $(BUILDDIR)
rm -f $(OFFSET_LD)
rm -f $(ORIGIN_LD)

$(BUILDDIR):
mkdir -p $@
Expand All @@ -106,6 +106,7 @@ $(DEP): $(SRCS) | $(BUILDDIR)

# Partially linked object
$(PARTIAL_ELF): $(DEP) $(LD_SRCS) | $(BUILDDIR)
rm -f $(ORIGIN_LD)
$(RISCV_CC) $(RISCV_CFLAGS) $(RISCV_LDFLAGS) $(SRCS) -o $@

$(PARTIAL_DUMP): $(PARTIAL_ELF) | $(BUILDDIR)
Expand Down
3 changes: 1 addition & 2 deletions target/sim/sw/host/apps/hello_world/src/hello_world.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
// SPDX-License-Identifier: Apache-2.0

#include <stdio.h>
#include "chip_id.h"
#include "host.c"
#include "host.h"

// Frequency at which the UART peripheral is clocked
#define PERIPH_FREQ 50000000
Expand Down
3 changes: 2 additions & 1 deletion target/sim/sw/host/apps/offload/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ finalize-build: $(FINAL_OUTPUTS)
.PHONY: clean
clean:
rm -rf $(BUILDDIR)
rm -f $(OFFSET_LD)
rm -f $(ORIGIN_LD)

$(BUILDDIR):
mkdir -p $@
Expand All @@ -124,6 +124,7 @@ $(PARTIAL_DUMP): $(PARTIAL_ELF) | $(BUILDDIR)
$(RISCV_OBJDUMP) -D $< > $@

# Device object relocation address
.PHONY: $(DEVICE_DIR)/apps/%/build/origin.ld
$(DEVICE_DIR)/apps/%/build/origin.ld: $(PARTIAL_ELF) | $(DEVICE_DIR)/apps/%/build
@RELOC_ADDR=$$($(RISCV_OBJDUMP) -t $< | grep snitch_main | cut -c9-16); \
echo "Writing device object relocation address 0x$$RELOC_ADDR to $@"; \
Expand Down
64 changes: 41 additions & 23 deletions target/sim/sw/host/apps/offload/src/offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,60 @@
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#include "host.c"
#include "host.h"

// Global Variables for communication buffer
volatile comm_buffer_t* comm_buffer_ptr = (comm_buffer_t*)0;

int main() {
// Reset and ungate all quadrants, deisolate
uintptr_t address_prefix = (uintptr_t)get_current_chip_baseaddress();
uint32_t chip_id = get_current_chip_id();

init_uart(address_prefix, 50000000, 1000000);
print_str(address_prefix, "[Occamy] The Offload main function \r\n");
print_str(address_prefix, "[Occamy] Current Chip ID is: ");
print_u8(address_prefix, chip_id);
print_str(address_prefix, "\r\n");
reset_and_ungate_quadrants_all(chip_id);
print_str(address_prefix, "[Occamy] Snitch ungated. \r\n");
deisolate_all(chip_id);
print_str(address_prefix, "[Occamy] Snitch deisolated. \r\n");
// Enable interrupts to receive notice of job termination
uintptr_t current_chip_address_prefix =
(uintptr_t)get_current_chip_baseaddress();
uint32_t current_chip_id = get_current_chip_id();

init_uart(current_chip_address_prefix, 50000000, 1000000);
print_str(current_chip_address_prefix,
"[Occamy] The Offload main function \r\n");
print_str(current_chip_address_prefix, "[Occamy] Current Chip ID is: ");
print_u8(current_chip_address_prefix, current_chip_id);
print_str(current_chip_address_prefix, "\r\n");

comm_buffer_ptr = (comm_buffer_t*)(((uint64_t)&__narrow_spm_start) |
current_chip_address_prefix);

// print_str(current_chip_address_prefix,
// "[Occamy] Snitch Communication Buffer is: ");
// print_u48(current_chip_address_prefix, (uint64_t)comm_buffer_ptr);
// print_str(current_chip_address_prefix, "\r\n");
reset_and_ungate_quadrants_all(current_chip_id);
// print_str(current_chip_address_prefix, "[Occamy] Snitch ungated. \r\n");
deisolate_all(current_chip_id);
// print_str(current_chip_address_prefix, "[Occamy] Snitch deisolated.
// \r\n"); Enable interrupts to receive notice of job termination
enable_sw_interrupts();
// Program Snitch entry point and communication buffer
program_snitches(chip_id);
print_str(address_prefix, "[Occamy] Snitch Jump Address Programmed. \r\n");
comm_buffer_ptr->lock = 0;
comm_buffer_ptr->chip_id = current_chip_id;
program_snitches(current_chip_id, comm_buffer_ptr);
// print_str(current_chip_address_prefix,
// "[Occamy] Snitch Jump Address Programmed. \r\n");

// Compiler fence to ensure Snitch entry point is
// programmed before Snitches are woken up
asm volatile("" ::: "memory");
asm volatile("fence.i" ::: "memory");

print_str(address_prefix, "[Occamy] Calling snitch cluster to execute the task \r\n");
print_str(current_chip_address_prefix,
"[Occamy] Calling snitch cluster to execute the task \r\n");

// Start Snitches
wakeup_snitches_cl(chip_id);
wakeup_snitches_cl(current_chip_id);

int ret = wait_snitches_done(chip_id);
int ret = wait_snitches_done(current_chip_id);

print_str(address_prefix, "[Occamy] Snitch cluster done with exit code ");
print_u32(address_prefix, ret);
print_str(address_prefix, "\r\n");
print_str(current_chip_address_prefix,
"[Occamy] Snitch cluster done with exit code ");
print_u32(current_chip_address_prefix, ret);
print_str(current_chip_address_prefix, "\r\n");

// Wait for job done and return Snitch exit code
return ret;
Expand Down
Loading

0 comments on commit baf6611

Please sign in to comment.