Skip to content

Commit

Permalink
Chip-position agnostic software (#50)
Browse files Browse the repository at this point in the history
* Update C Runtime

* Update C Runtime

* Update C Runtime

* Update C Runtime

* Update C Runtime

* Removwe param in ro cache tpl

* Update Testbench

* Update gitignore

* Update Testbench

* copybin bug fix
  • Loading branch information
IveanEx authored Sep 29, 2024
1 parent a6b7132 commit bbf0ada
Show file tree
Hide file tree
Showing 27 changed files with 663 additions and 298 deletions.
2 changes: 1 addition & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ sources:
- target/rtl/src/occamy_xilinx.sv

# target/sim
- target: any(simulation,simulation_vlt)
- target: simulation_occamy
files:
- target/rtl/test/uartdpi/uartdpi.sv
- target/rtl/test/testharness.sv
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ occamy_system_vsim: # In ESAT Server
$(MAKE) -C ./target/sim bin/occamy_top.vsim

hemaia_system_vsim_preparation: # In SNAX Docker
$(MAKE) -C ./target/sim_chip work-vsim/compile.vsim.tcl
$(MAKE) -C ./target/sim_chip work-vsim/compile.vsim.tcl CFG_OVERRIDE=$(CFG)

hemaia_system_vsim: # In ESAT Server
$(MAKE) -C ./target/sim_chip bin/occamy_chip.vsim
1 change: 1 addition & 0 deletions hw/occamy/occamy_soc.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ module ${name}_soc
) i_axi_dma_backend_sys_idma (
.clk_i,
.rst_ni,
.chip_id_i,
.dma_id_i ( 'd0 ),
.axi_dma_req_o ( ${in_sys_idma_mst.req_name()} ),
.axi_dma_res_i ( ${in_sys_idma_mst.rsp_name()} ),
Expand Down
14 changes: 8 additions & 6 deletions hw/vendor/openhwgroup_cva6/common/local/util/instr_tracer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

module instr_tracer (
instr_tracer_if tracer_if,
input logic[riscv::XLEN-1:0] hart_id_i
input logic[riscv::XLEN-1:0] hart_id_i,
input ariane_pkg::chip_id_t chip_id_i
);

// keep the decoded instructions in a queue
Expand All @@ -44,11 +45,12 @@ module instr_tracer (

// static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst();

function void create_file(logic [63:0] hart_id);
function void create_file(logic [7:0] chip_id, logic [63:0] hart_id);
string fn, fn_commit_log;
$sformat(fn, "trace_hart_%0.0f.log", hart_id);
$sformat(fn_commit_log, "trace_hart_%0.0f_commit.log", hart_id);
$display("[TRACER] Output filename is: %s", fn);
$sformat(fn, "logs/trace_chip_%01x%01x_hart_%05x.log", chip_id[7:4], chip_id[3:0], hart_id);
$sformat(fn_commit_log, "logs/trace_chip_%01x%01x_hart_%05x_commit.log", chip_id[7:4],
chip_id[3:0], hart_id);
$display("[Tracer] Logging Hart %d to %s", hart_id, fn);

f = $fopen(fn,"w");
if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG) commit_log = $fopen(fn_commit_log, "w");
Expand Down Expand Up @@ -214,7 +216,7 @@ module instr_tracer (

initial begin
#15ns;
create_file(hart_id_i);
create_file(chip_id_i, hart_id_i);
trace();
end

Expand Down
3 changes: 2 additions & 1 deletion hw/vendor/openhwgroup_cva6/core/cva6.sv
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,8 @@ input axi_rsp_t axi_resp_i

instr_tracer instr_tracer_i (
.tracer_if(tracer_if),
.hart_id_i
.hart_id_i,
.chip_id_i
);

// mock tracer for Verilator, to be used with spike-dasm
Expand Down
7 changes: 6 additions & 1 deletion target/rtl/cfg/occamy_cfg/hemaia.hjson
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
hemaia_multichip: {
chip_id_width: 8,
single_chip: true,
single_chip_id: 0
single_chip_id: 0,
testbench_cfg: {
// Emulate a four-chips configuration
upper_left_coordinate: [0, 0],
lower_right_coordinate: [2, 2]
}
}
addr_width: 48,
data_width: 64,
Expand Down
29 changes: 16 additions & 13 deletions target/sim/sim.mk
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,20 @@ MATCH_END := '/+incdir+/ s/$$/\/*\/*/'
MATCH_BGN := 's/+incdir+//g'
SED_SRCS := sed -e ${MATCH_END} -e ${MATCH_BGN}

VSIM_BENDER += -t test -t rtl -t simulation -t vsim
VSIM_BENDER += -t test -t rtl -t simulation_occamy -t vsim
VSIM_SOURCES = $(shell ${BENDER} script flist ${VSIM_BENDER} | ${SED_SRCS})
VSIM_BUILDDIR ?= work-vsim
VOPT_FLAGS = +acc

# VCS_BUILDDIR should to be the same as the `DEFAULT : ./work-vcs`
# in target/snitch_cluster/synopsys_sim.setup
VCS_BENDER += -t test -t rtl -t simulation -t vcs
VCS_BENDER += -t test -t rtl -t simulation_occamy -t vcs
VCS_SOURCES = $(shell ${BENDER} script flist ${VCS_BENDER} | ${SED_SRCS})
VCS_BUILDDIR := work-vcs

# For synthesis with DC compiler
SYN_FLIST ?= syn_flist.tcl
SYN_BENDER += -t test -t synthesis -t simulation
SYN_BENDER += -t test -t synthesis -t simulation_occamy
ifeq ($(MEM_TYPE), exclude_tcsram)
VSIM_BENDER += -t tech_cells_generic_exclude_tc_sram
SYN_BENDER += -t tech_cells_generic_exclude_tc_sram
Expand All @@ -70,7 +70,7 @@ SYN_BUILDDIR := work-syn
FESVR ?= ${MKFILE_DIR}work
FESVR_VERSION ?= 98d2c29e431f3b14feefbda48c5f70c2f451acf2

VLT_BENDER += -t rtl -t simulation_vlt
VLT_BENDER += -t rtl -t simulation_occamy
VLT_SOURCES = $(shell ${BENDER} script flist ${VLT_BENDER} | ${SED_SRCS})
VLT_BUILDDIR := work-vlt
VLT_FESVR = $(VLT_BUILDDIR)/riscv-isa-sim
Expand Down Expand Up @@ -245,9 +245,9 @@ endef
# Traces #
##########

DASM_TRACES = $(shell (ls $(LOGS_DIR)/trace_hart_*.dasm 2>/dev/null))
DASM_TRACES = $(shell (ls $(LOGS_DIR)/trace_chip_??_hart_*.dasm 2>/dev/null))
TXT_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.txt/g'))
PERF_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/trace_hart/hart/g' | sed 's/.dasm/_perf.json/g'))
PERF_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/.dasm/_perf.json/g'))
ANNOTATED_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.s/g'))
DIFF_TRACES = $(shell (echo $(DASM_TRACES) | sed 's/\.dasm/\.diff/g'))

Expand All @@ -265,16 +265,19 @@ perf-csv: $(PERF_CSV)
event-csv: $(EVENT_CSV)
layout: $(TRACE_CSV) $(TRACE_JSON)

$(LOGS_DIR)/trace_hart_%.txt $(LOGS_DIR)/hart_%_perf.json: $(LOGS_DIR)/trace_hart_%.dasm $(GENTRACE_PY)
$(DASM) < $< | $(PYTHON) $(GENTRACE_PY) --permissive -d $(LOGS_DIR)/hart_$*_perf.json > $(LOGS_DIR)/trace_hart_$*.txt

$(LOGS_DIR)/%.txt $(LOGS_DIR)/%_perf.json: $(LOGS_DIR)/%.dasm $(GENTRACE_PY)
@CHIP=$(word 3,$(subst _, ,$*)) && \
HART=$(word 5,$(subst _, ,$*)) && \
echo "Processing Chip $$CHIP Hart $$HART" && \
$(DASM) < $< | $(PYTHON) $(GENTRACE_PY) --permissive -d $(LOGS_DIR)/chip_$$CHIP\_hart_$$HART\_perf.json > $(LOGS_DIR)/trace_chip_$$CHIP\_hart_$$HART.txt
# Generate source-code interleaved traces for all harts. Reads the binary from
# the logs/.rtlbinary file that is written at start of simulation in the vsim script
BINARY ?= $(shell cat $(LOGS_DIR)/.rtlbinary)
$(LOGS_DIR)/trace_hart_%.s: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY}
$(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $<
$(LOGS_DIR)/trace_hart_%.diff: $(LOGS_DIR)/trace_hart_%.txt ${ANNOTATE_PY}
$(PYTHON) ${ANNOTATE_PY} ${ANNOTATE_FLAGS} -o $@ $(BINARY) $< -d

$(LOGS_DIR)/%.s: $(LOGS_DIR)/%.txt $(ANNOTATE_PY)
$(PYTHON) $(ANNOTATE_PY) $(ANNOTATE_FLAGS) -o $@ $(BINARY) $<
$(LOGS_DIR)/%.diff: $(LOGS_DIR)/%.txt $(ANNOTATE_PY)
$(PYTHON) $(ANNOTATE_PY) $(ANNOTATE_FLAGS) -o $@ $(BINARY) $< -d

$(PERF_CSV): $(PERF_TRACES) $(PERF_CSV_PY)
$(PYTHON) $(PERF_CSV_PY) -o $@ -i $(PERF_TRACES)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,47 @@
#include "data.h"

#include "snrt.h"
int8_t* tcdm0_start_addr;
int8_t* tcdm1_start_addr;

uint64_t tcdm0_start_addr;
uint64_t tcdm1_start_addr;
uint64_t test_data_start_addr;

int main() {
int err = 0;
// First set the addr of cluster 0
// tcdm0_start_addr = (int8_t*)0x10000000;
// tcdm1_start_addr = (int8_t*)0x10100000;
if (snrt_cluster_idx() == 0) {
if (snrt_is_dm_core()) {
tcdm0_start_addr = (int8_t*)snrt_cluster_base_addrl();
printf("The C0 TCDM ADDR is %p \n", tcdm0_start_addr);
tcdm0_start_addr = (uint64_t)snrt_cluster_base_addrl();
tcdm0_start_addr += (uint64_t)snrt_cluster_base_addrh() << 32;
printf("The C0 TCDM ADDR is %p%p \n",
(uint8_t*)(tcdm0_start_addr >> 32),
(uint8_t*)tcdm0_start_addr);
}
}
snrt_global_barrier();

if (snrt_cluster_idx() == 1) {
if (snrt_is_dm_core()) {
tcdm1_start_addr = (int8_t*)snrt_cluster_base_addrl();
printf("The C1 TCDM ADDR is %p \n", tcdm1_start_addr);
tcdm1_start_addr = (uint64_t)snrt_cluster_base_addrl();
tcdm1_start_addr += (uint64_t)snrt_cluster_base_addrh() << 32;
printf("The C1 TCDM ADDR is %p%p \n",
(uint8_t*)(tcdm1_start_addr >> 32),
(uint8_t*)tcdm1_start_addr);
}
}
snrt_global_barrier();
// C0 Load the data from l3 -> l1
if (snrt_cluster_idx() == 0) {
if (snrt_is_dm_core()) {
printf("[C0] Start to load data from %p\n", test_data);
snrt_dma_start_1d(tcdm0_start_addr, test_data, length_data);
test_data_start_addr = (uint64_t)test_data;
test_data_start_addr += (uint64_t)snrt_cluster_base_addrh() << 32;
printf("[C0] Start to load data from %p%p \n",
(uint8_t*)(test_data_start_addr >> 32),
(uint8_t*)test_data_start_addr);
snrt_dma_start_1d_wideptr(tcdm0_start_addr, test_data_start_addr,
length_data);
snrt_dma_wait_all();
}
}
Expand All @@ -48,8 +62,11 @@ int main() {
// Thenc C1 fetches data from C0
if (snrt_cluster_idx() == 1) {
if (snrt_is_dm_core()) {
printf("[C1] Load data from C0 TCDM %p\n", tcdm0_start_addr);
snrt_dma_start_1d(tcdm1_start_addr, tcdm0_start_addr, length_data);
printf("[C1] Start to load data from %p%p \n",
(uint8_t*)(tcdm0_start_addr >> 32),
(uint8_t*)tcdm0_start_addr);
snrt_dma_start_1d_wideptr(tcdm1_start_addr, tcdm0_start_addr,
length_data);
snrt_dma_wait_all();
}
}
Expand All @@ -61,11 +78,11 @@ int main() {
if (snrt_cluster_core_idx() == 0) {
printf("C0 Checking the results\n");
for (int i = 0; i < length_data; i++) {
if (tcdm0_start_addr[i] != test_data[i]) {
if (((int8_t*)tcdm0_start_addr)[i] != test_data[i]) {
err++;
printf("C0 data is incorrect!\n");
printf("tcdm0[%d]=%d, test_data[%d]=%d\n", i,
tcdm0_start_addr[i], i, test_data[i]);
((int8_t*)tcdm0_start_addr)[i], i, test_data[i]);
return -1;
}
}
Expand All @@ -76,11 +93,11 @@ int main() {
if (snrt_cluster_core_idx() == 0) {
printf("C1 Checking the results\n");
for (int i = 0; i < length_data; i++) {
if (tcdm1_start_addr[i] != test_data[i]) {
if (((int8_t*)tcdm1_start_addr)[i] != test_data[i]) {
err++;
printf("C1 data is incorrect!\n");
printf("tcdm0[%d]=%d, test_data[%d]=%d\n", i,
tcdm1_start_addr[i], i, test_data[i]);
((int8_t*)tcdm1_start_addr)[i], i, test_data[i]);
return -1;
}
}
Expand All @@ -95,4 +112,4 @@ int main() {
}

return 0;
}
}
5 changes: 3 additions & 2 deletions target/sim/sw/device/runtime/src/occamy_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,14 @@ inline void return_to_cva6(sync_t sync) {
if (cnt == snrt_cluster_num()) {
#endif
*((volatile uint32_t*)barrier_ptr) = 0;
set_host_sw_interrupt();
// Interrupt the local host to signal the exit code (snitch by default only has the access to local domain)
set_host_sw_interrupt(0);
}
}
}
// Otherwise assume cores are already synchronized and only
// one core calls this function
else {
set_host_sw_interrupt();
set_host_sw_interrupt(0);
}
}
3 changes: 2 additions & 1 deletion target/sim/sw/device/runtime/src/occamy_start.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ static inline void snrt_exit_default(int exit_code);

static inline void snrt_exit(int exit_code) {
snrt_exit_default(exit_code);
if (snrt_global_core_idx() == 0) set_host_sw_interrupt();
// Interrupt the local host to signal the exit code (snitch by default only has the access to local domain)
if (snrt_global_core_idx() == 0) set_host_sw_interrupt(0);
}

#include "start.c"
6 changes: 2 additions & 4 deletions target/sim/sw/device/runtime/src/putchar_chip.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#include "uart.h"

void _putchar(char character) {
while (is_transmit_empty() == 0) {
};

write_reg_u8(UART_THR, character);
// Print to UART of local chip
print_char((uintptr_t)0, character);
}
11 changes: 7 additions & 4 deletions target/sim/sw/host/apps/hello_world/src/hello_world.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@
// SPDX-License-Identifier: Apache-2.0

#include <stdio.h>
#include "chip_id.h"
#include "host.c"

// Frequency at which the UART peripheral is clocked
#define PERIPH_FREQ 50000000

int main() {
init_uart(PERIPH_FREQ, 1000000);
uintptr_t address_prefix = (uintptr_t)get_current_chip_baseaddress();

init_uart(address_prefix, PERIPH_FREQ, 1000000);
asm volatile("fence" : : : "memory");
print_uart("Hello world from Occamy in VCU128! \r\n");
print_str(address_prefix, "Hello world from Occamy in VCU128! \r\n");
char uart_rx_buffer[512];
char uart_tx_buffer[512];

while (1) {
scan_uart(uart_rx_buffer);
scan_str(address_prefix, uart_rx_buffer);
sprintf(uart_tx_buffer, "[Occamy] What you said is: %s",
uart_rx_buffer);
print_uart(uart_tx_buffer);
print_str(address_prefix, uart_tx_buffer);
// Artificial delay to ensure last symbol has been transmitted
// (just waiting for the UART TSR register to be empty is not
// sufficient)
Expand Down
33 changes: 20 additions & 13 deletions target/sim/sw/host/apps/offload/src/offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,38 @@

int main() {
// Reset and ungate all quadrants, deisolate
init_uart(50000000, 1000000);
print_uart("[Occamy] The Offload main function \r\n");
reset_and_ungate_quadrants();
deisolate_all();

uintptr_t address_prefix = (uintptr_t)get_current_chip_baseaddress();
uint32_t chip_id = get_current_chip_id();

init_uart(address_prefix, 50000000, 1000000);
print_str(address_prefix, "[Occamy] The Offload main function \r\n");
print_str(address_prefix, "[Occamy] Current Chip ID is: ");
print_u8(address_prefix, chip_id);
print_str(address_prefix, "\r\n");
reset_and_ungate_quadrants(chip_id);
print_str(address_prefix, "[Occamy] Snitch ungated. \r\n");
deisolate_all(chip_id);
print_str(address_prefix, "[Occamy] Snitch deisolated. \r\n");
// Enable interrupts to receive notice of job termination
enable_sw_interrupts();

// Program Snitch entry point and communication buffer
program_snitches();
program_snitches(chip_id);
print_str(address_prefix, "[Occamy] Snitch Jump Address Programmed. \r\n");

// Compiler fence to ensure Snitch entry point is
// programmed before Snitches are woken up
asm volatile("" ::: "memory");

print_uart("[Occamy] Calling snitch cluster to execute the task \r\n");
print_str(address_prefix, "[Occamy] Calling snitch cluster to execute the task \r\n");

// Start Snitches
wakeup_snitches_cl();
wakeup_snitches_cl(chip_id);

int ret = wait_snitches_done();
int ret = wait_snitches_done(chip_id);

print_uart("[Occamy] Snitch cluster done with exit code ");
print_uart_int(ret);
print_uart("\r\n");
print_str(address_prefix, "[Occamy] Snitch cluster done with exit code ");
print_u32(address_prefix, ret);
print_str(address_prefix, "\r\n");

// Wait for job done and return Snitch exit code
return ret;
Expand Down
Loading

0 comments on commit bbf0ada

Please sign in to comment.