From 8908048d406b24bc1ee98c37e3561e4627fa4967 Mon Sep 17 00:00:00 2001 From: Yunhao Deng Date: Tue, 3 Dec 2024 10:26:22 +0100 Subject: [PATCH] Add Chiplet AXI Interconnect (#92) * Initial Push - Seperate Chiplet Generation * Update OccamyGen to parse the type to testharness * Modified Testharness for Chiplet Interconnect * Update Bootrom * Small Updates --- Bender.yml | 3 +- Makefile | 1 + hw/occamy/occamy_chip.sv.tpl | 17 ++ hw/occamy/occamy_soc.sv.tpl | 33 ++- hw/occamy/occamy_top.sv.tpl | 19 +- target/rtl/bootrom/src/bootrom.c | 126 +++++++++-- target/rtl/bootrom/src/chip_id.h | 43 ++++ target/rtl/bootrom/src/sys_dma.h | 117 ++++++++++ target/rtl/cfg/hemaia.hjson | 8 +- ...aia_tapeout.hjson => hemaia_chiplet.hjson} | 22 +- target/rtl/cfg/hemaia_minimal.hjson | 203 ----------------- target/rtl/cfg/hemaia_two_clusters.hjson | 205 ------------------ target/sim_chip/Makefile | 4 +- .../sim_chip/testharness/testharness.sv.tpl | 105 ++++++++- util/occamygen/occamy.py | 19 +- util/occamygen/occamygen.py | 50 ++++- 16 files changed, 513 insertions(+), 462 deletions(-) create mode 100644 target/rtl/bootrom/src/chip_id.h create mode 100644 target/rtl/bootrom/src/sys_dma.h rename target/rtl/cfg/{hemaia_tapeout.hjson => hemaia_chiplet.hjson} (87%) delete mode 100755 target/rtl/cfg/hemaia_minimal.hjson delete mode 100755 target/rtl/cfg/hemaia_two_clusters.hjson diff --git a/Bender.yml b/Bender.yml index 7454ded8..1884e2c4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -118,7 +118,8 @@ sources: files: - target/rtl/test/uartdpi/uartdpi.sv - target/sim_chip/testharness/testharness.sv - - target/sim/test/bootrom.sv + # - target/sim/test/bootrom.sv + - target/rtl/bootrom/bootrom.sv - target/rtl/src/occamy_chip.sv diff --git a/Makefile b/Makefile index 4160ddb5..a7e5b6a1 100644 --- a/Makefile +++ b/Makefile @@ -131,3 +131,4 @@ hemaia_system_vcs_preparation: # In SNAX Docker hemaia_system_vcs: # In ESAT Server $(MAKE) -C ./target/sim_chip bin/occamy_chip.vcs +# How to start the execution of the simulation: cd ./target/sim_chip/bin; ./occamy_chip.vcs -gui -R -fgp=num_threads:8 diff --git a/hw/occamy/occamy_chip.sv.tpl b/hw/occamy/occamy_chip.sv.tpl index 8d95d215..3c1e8e77 100644 --- a/hw/occamy/occamy_chip.sv.tpl +++ b/hw/occamy/occamy_chip.sv.tpl @@ -19,6 +19,15 @@ import ${name}_pkg::*; input logic test_mode_i, input chip_id_t chip_id_i, input logic [1:0] boot_mode_i, +% if occamy_cfg['hemaia_multichip']['single_chip'] is False: + // HeMAiA D2D AXI Interface + // Chiplet Requst to Router + output ${soc2router_bus.req_type()} soc2router_req_o, + input ${soc2router_bus.rsp_type()} soc2router_rsp_i, + // Router Requst to Chiplet + input ${router2soc_bus.req_type()} router2soc_req_i, + output ${router2soc_bus.rsp_type()} router2soc_rsp_o, +% endif // `uart` Interface output logic uart_tx_o, input logic uart_rx_i, @@ -160,6 +169,14 @@ import ${name}_pkg::*; .rtc_i (rtc_i), .test_mode_i (test_mode_i), .chip_id_i (chip_id_i), +% if occamy_cfg['hemaia_multichip']['single_chip'] is False: + // Chiplet Requst to Router + .soc2router_req_o, + .soc2router_rsp_i, + // Router Requst to Chiplet + .router2soc_req_i, + .router2soc_rsp_o, +% endif .boot_mode_i (boot_mode_i), .uart_tx_o (uart_tx_o), .uart_cts_ni (uart_cts_ni), diff --git a/hw/occamy/occamy_soc.sv.tpl b/hw/occamy/occamy_soc.sv.tpl index 19af2421..201c41bd 100644 --- a/hw/occamy/occamy_soc.sv.tpl +++ b/hw/occamy/occamy_soc.sv.tpl @@ -8,6 +8,8 @@ // AUTOMATICALLY GENERATED by genoccamy.py; edit the script instead. <% + cuts_soc_to_router = occamy_cfg["cuts"]["soc_to_router"] + cuts_router_to_soc = occamy_cfg["cuts"]["router_to_soc"] cuts_narrow_to_quad = occamy_cfg["cuts"]["narrow_to_quad"] cuts_quad_to_narrow = occamy_cfg["cuts"]["quad_to_narrow"] cuts_wide_to_quad = occamy_cfg["cuts"]["wide_to_quad"] @@ -59,6 +61,16 @@ module ${name}_soc output ${soc_wide_xbar.out_spm_wide.req_type()} spm_axi_wide_req_o, input ${soc_wide_xbar.out_spm_wide.rsp_type()} spm_axi_wide_rsp_i, +% if occamy_cfg['hemaia_multichip']['single_chip'] is False: + // HeMAiA Multi-Chip AXI Interface + // Chiplet Requst to Router + output ${soc2router_bus.req_type()} soc2router_req_o, + input ${soc2router_bus.rsp_type()} soc2router_rsp_i, + // Router Requst to Chiplet + input ${router2soc_bus.req_type()} router2soc_req_i, + output ${router2soc_bus.rsp_type()} router2soc_rsp_o, +% endif + // SoC control register IO output logic [1:0] spm_narrow_rerror_o, output logic [1:0] spm_wide_rerror_o, @@ -80,6 +92,25 @@ module ${name}_soc ${module} +% if occamy_cfg['hemaia_multichip']['single_chip'] is False: + /////////////////////////////////////// + // Connections between SoC to Router // + /////////////////////////////////////// + <% + soc2router = soc_wide_xbar.out_hemaia_multichip \ + .change_iw(context, soc2router_bus.iw, "soc2router_iwc") \ + .cut(context, cuts_soc_to_router, name="soc2router_cut") + router2soc = router2soc_bus.copy(name="router2soc") \ + .declare(context) + router2soc.cut(context, cuts_router_to_soc, name="router2soc_cut") \ + .change_iw(context, soc_wide_xbar.in_hemaia_multichip.iw, name="router2soc_iwc", to=soc_wide_xbar.in_hemaia_multichip) + %> \ + assign soc2router_req_o = ${soc2router.req_name()}; + assign ${soc2router.rsp_name()} = soc2router_rsp_i; + assign ${router2soc.req_name()} = router2soc_req_i; + assign router2soc_rsp_o = ${router2soc.rsp_name()}; +% endif + /////////////////////////////////// // Connections between crossbars // /////////////////////////////////// @@ -233,7 +264,7 @@ module ${name}_soc // SPM WIDE // ////////////// <% wide_spm_mst = soc_wide_xbar.out_spm_wide \ - .cut(context, cuts_wide_conv_to_spm_wide) + .cut(context, cuts_wide_conv_to_spm_wide, name="wide_spm_cut") %>\ assign spm_axi_wide_req_o = ${wide_spm_mst.req_name()}; diff --git a/hw/occamy/occamy_top.sv.tpl b/hw/occamy/occamy_top.sv.tpl index 13c63dfc..7ad83048 100644 --- a/hw/occamy/occamy_top.sv.tpl +++ b/hw/occamy/occamy_top.sv.tpl @@ -72,6 +72,16 @@ module ${name}_top output ${soc_wide_xbar.out_spm_wide.req_type()} spm_axi_wide_req_o, input ${soc_wide_xbar.out_spm_wide.rsp_type()} spm_axi_wide_rsp_i, +% if occamy_cfg['hemaia_multichip']['single_chip'] is False: + // HeMAiA Multi-Chip AXI Interface + // Chiplet Requst to Router + output ${soc2router_bus.req_type()} soc2router_req_o, + input ${soc2router_bus.rsp_type()} soc2router_rsp_i, + // Router Requst to Chiplet + input ${router2soc_bus.req_type()} router2soc_req_i, + output ${router2soc_bus.rsp_type()} router2soc_rsp_o, +% endif + /// Chip specific control registers output ${soc_axi_lite_narrow_periph_xbar.out_chip_ctrl.req_type()} chip_ctrl_req_o, input ${soc_axi_lite_narrow_periph_xbar.out_chip_ctrl.rsp_type()} chip_ctrl_rsp_i, @@ -83,7 +93,6 @@ module ${name}_top ); <% - cuts_clint_cfg = occamy_cfg["cuts"]["periph_axi_lite_narrow_clint_cfg"] cuts_soc_ctrl_cfg = occamy_cfg["cuts"]["periph_axi_lite_narrow_soc_ctrl_cfg"] cuts_chip_ctrl_cfg = occamy_cfg["cuts"]["periph_axi_lite_narrow_chip_ctrl_cfg"] @@ -153,6 +162,14 @@ module ${name}_top .periph_axi_lite_narrow_rsp_i ( periph_regbus_soc2per_rsp ), .spm_axi_wide_req_o, .spm_axi_wide_rsp_i, +% if occamy_cfg['hemaia_multichip']['single_chip'] is False: + // Chiplet Requst to Router + .soc2router_req_o, + .soc2router_rsp_i, + // Router Requst to Chiplet + .router2soc_req_i, + .router2soc_rsp_o, +% endif .spm_narrow_rerror_o (spm_narrow_rerror), .spm_wide_rerror_o (spm_wide_rerror), .mtip_i ( mtip ), diff --git a/target/rtl/bootrom/src/bootrom.c b/target/rtl/bootrom/src/bootrom.c index 54556174..390c60c5 100644 --- a/target/rtl/bootrom/src/bootrom.c +++ b/target/rtl/bootrom/src/bootrom.c @@ -12,6 +12,8 @@ // For values need to share between functions, use uninitialized global variable // + initialization function +#include "chip_id.h" +#include "sys_dma.h" #include "uart.h" #include "xmodem.h" @@ -30,52 +32,133 @@ void delay_cycles(uint64_t cycle) { } // Boot modes. -enum boot_mode_t { JTAG, UART, PRINTMEM, NORMAL }; +enum boot_mode_t { + TARGET_CHIPID, + UART, + COPY_TO_REMOTE, + COPY_FROM_REMOTE, + PRINTMEM, + NORMAL +}; void bootrom() { - enum boot_mode_t boot_mode = JTAG; - uint64_t start_address; + enum boot_mode_t boot_mode = NORMAL; + uint64_t local_chip_mem_start_address; + uint64_t remote_chip_mem_start_address; uint64_t memory_length; - uint32_t chip_id; - asm volatile("csrr %0, 0xf15" : "=r"(chip_id)); + uint32_t chip_id = get_current_chip_id(); + uint32_t target_chip_id = chip_id; uintptr_t address_prefix = ((uintptr_t)chip_id) << 40; char in_buf[8]; init_uart(address_prefix, 50000000, 1000000); while (1) { - start_address = 0x80000000L | ((uint64_t)address_prefix); + local_chip_mem_start_address = 0x80000000L | ((uint64_t)address_prefix); + remote_chip_mem_start_address = + 0x80000000L | ((uint64_t)target_chip_id << 40); print_str(address_prefix, "\033[2J"); print_str(address_prefix, "\r\n\t\t Welcome to HeMAiA Bootrom"); print_str(address_prefix, "\r\n"); print_str(address_prefix, "\r\n\t Chip ID: 0x"); print_u8(address_prefix, chip_id); + print_str(address_prefix, "\r\n\t Target Remote Chip ID: 0x"); + print_u8(address_prefix, target_chip_id); print_str(address_prefix, "\r\n\t Enter the number to select the mode: "); - print_str(address_prefix, "\r\n\t 1. Load from JTAG"); + print_str(address_prefix, "\r\n\t 1. Change the target remote Chip ID"); print_str(address_prefix, "\r\n\t 2. Load from UART to 0x"); - print_u48(address_prefix, start_address); - print_str(address_prefix, "\r\n\t 3. Print memory from 0x"); - print_u48(address_prefix, start_address); - print_str(address_prefix, "\r\n\t 4. Continue to Boot from 0x"); - print_u48(address_prefix, start_address); + print_u48(address_prefix, remote_chip_mem_start_address); + print_str(address_prefix, + "\r\n\t 3. Copy memory from local chip to remote chip"); + print_str(address_prefix, + "\r\n\t 4. Copy memory from remote chip to local chip"); + print_str(address_prefix, "\r\n\t 5. Print memory from 0x"); + print_u48(address_prefix, remote_chip_mem_start_address); + print_str(address_prefix, "\r\n\t 6. Continue to Boot from 0x"); + print_u48(address_prefix, local_chip_mem_start_address); print_str(address_prefix, "\r\n"); boot_mode = getchar(address_prefix) - '0' - 1; + char* cur = 0; + switch (boot_mode) { - case JTAG: + case TARGET_CHIPID: print_str(address_prefix, - "\r\n\t Handover to debugger... \r\n\r\n"); - __asm__ volatile( - "csrr a0, mhartid;" - "ebreak;"); + "\r\n\t Enter the target remote Chip ID: "); + scan_uart(address_prefix, in_buf); + cur = in_buf; + target_chip_id = 0; + while (*cur != '\0') { + if (*cur >= '0' || *cur <= '9') { + target_chip_id = (target_chip_id << 4) + *cur - '0'; + } else if (*cur >= 'A' || *cur <= 'F') { + target_chip_id = + (target_chip_id << 4) + *cur - 'A' + 10; + } else if (*cur >= 'a' || *cur <= 'f') { + target_chip_id = + (target_chip_id << 4) + *cur - 'a' + 10; + } else { + print_str(address_prefix, "\r\n\t Invalid input. "); + getchar(address_prefix); + break; + } + cur++; + } + break; + + case COPY_TO_REMOTE: + print_str(address_prefix, + "\r\n\t Enter the size of the memory in byte: "); + scan_uart(address_prefix, in_buf); + cur = in_buf; + memory_length = 0; + while (*cur != '\0') { + memory_length = memory_length * 10 + *cur - '0'; + cur++; + } + print_str(address_prefix, "\r\n\t Copying memory from 0x"); + print_u48(address_prefix, local_chip_mem_start_address); + print_str(address_prefix, " to 0x"); + print_u48(address_prefix, remote_chip_mem_start_address); + print_str(address_prefix, " with size "); + print_u48(address_prefix, memory_length); + print_str(address_prefix, " bytes..."); + sys_dma_blk_memcpy(remote_chip_mem_start_address, + local_chip_mem_start_address, memory_length); + print_str(address_prefix, "\r\n\t Copy finished. "); + getchar(address_prefix); + break; + + case COPY_FROM_REMOTE: + print_str(address_prefix, + "\r\n\t Enter the size of the memory in byte: "); + scan_uart(address_prefix, in_buf); + cur = in_buf; + memory_length = 0; + while (*cur != '\0') { + memory_length = memory_length * 10 + *cur - '0'; + cur++; + } + print_str(address_prefix, "\r\n\t Copying memory from 0x"); + print_u48(address_prefix, remote_chip_mem_start_address); + print_str(address_prefix, " to 0x"); + print_u48(address_prefix, local_chip_mem_start_address); + print_str(address_prefix, " with size "); + print_u48(address_prefix, memory_length); + print_str(address_prefix, " bytes..."); + sys_dma_blk_memcpy(local_chip_mem_start_address, + remote_chip_mem_start_address, + memory_length); + print_str(address_prefix, "\r\n\t Copy finished. "); + getchar(address_prefix); break; case UART: delay_cycles(50000000); // Delay for 1s - uart_xmodem(address_prefix, start_address); + uart_xmodem(address_prefix, remote_chip_mem_start_address); print_str(address_prefix, "\r\n\t Load finished. \r\n\r\n"); break; @@ -90,9 +173,10 @@ void bootrom() { cur++; } print_str(address_prefix, "\r\n\t The memory from 0x"); - print_u48(address_prefix, start_address); + print_u48(address_prefix, remote_chip_mem_start_address); print_str(address_prefix, " is:"); - print_mem_hex(address_prefix, (char*)start_address, + print_mem_hex(address_prefix, + (char*)remote_chip_mem_start_address, memory_length); print_str(address_prefix, "\r\n\r\n\t Print finished. "); getchar(address_prefix); @@ -101,7 +185,7 @@ void bootrom() { case NORMAL: print_str(address_prefix, "\033[2J"); print_str(address_prefix, "\r\n\t Booting at 0x"); - print_u48(address_prefix, start_address); + print_u48(address_prefix, local_chip_mem_start_address); print_str(address_prefix, "...\r\n\r\n\r\n"); return; break; diff --git a/target/rtl/bootrom/src/chip_id.h b/target/rtl/bootrom/src/chip_id.h new file mode 100644 index 00000000..8d6f00a7 --- /dev/null +++ b/target/rtl/bootrom/src/chip_id.h @@ -0,0 +1,43 @@ +#pragma once +#include + +inline uint8_t get_current_chip_id() { + uint32_t chip_id; +# if __riscv_xlen == 64 + // 64-bit system (CVA6), get chip_id from 0xf15 + asm volatile("csrr %0, 0xf15" : "=r"(chip_id)); +# else + // 32-bit system, get chip_id from 0xbc2 (base_addrh) + // and shift it to the right by 8 bits + asm volatile ("csrr %0, 0xbc2" : "=r"(chip_id)); + chip_id = chip_id >> 8; +# endif + return (uint8_t)chip_id; +} + +inline uint8_t *get_current_chip_baseaddress() { +#if __riscv_xlen == 64 + // 64-bit system (CVA6), get chip_id from 0xf15 + uint32_t chip_id; + asm volatile("csrr %0, 0xf15" : "=r"(chip_id)); + return (uint8_t *)((uintptr_t)chip_id << 40); +#else + // 32-bit system, return 0 (not supported) + return (uint8_t *)0; +#endif +} + +inline uint8_t *get_chip_baseaddress(uint8_t chip_id) { +#if __riscv_xlen == 64 + // 64-bit system, perform the shift and return the base address + return (uint8_t *)((uintptr_t)chip_id << 40); +#else + // 32-bit system, return 0 (not supported) + return (uint8_t *)0; +#endif +} + +inline uint32_t get_current_chip_baseaddress_h() { + uint32_t chip_id = get_current_chip_id(); + return (uint32_t)(chip_id << 8); +} diff --git a/target/rtl/bootrom/src/sys_dma.h b/target/rtl/bootrom/src/sys_dma.h new file mode 100644 index 00000000..3132ac5e --- /dev/null +++ b/target/rtl/bootrom/src/sys_dma.h @@ -0,0 +1,117 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Generated register defines for idma_reg64_frontend + +#ifndef _IDMA_REG64_FRONTEND_REG_DEFS_ +#define _IDMA_REG64_FRONTEND_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Register width +#define IDMA_REG64_FRONTEND_PARAM_REG_WIDTH 64 + +// Source Address +#define IDMA_REG64_FRONTEND_SRC_ADDR_REG_OFFSET 0x0 + +// Destination Address +#define IDMA_REG64_FRONTEND_DST_ADDR_REG_OFFSET 0x8 + +// Number of bytes +#define IDMA_REG64_FRONTEND_NUM_BYTES_REG_OFFSET 0x10 + +// Configuration Register for DMA settings +#define IDMA_REG64_FRONTEND_CONF_REG_OFFSET 0x18 +#define IDMA_REG64_FRONTEND_CONF_DECOUPLE_BIT 0 +#define IDMA_REG64_FRONTEND_CONF_DEBURST_BIT 1 +#define IDMA_REG64_FRONTEND_CONF_SERIALIZE_BIT 2 + +// DMA Status +#define IDMA_REG64_FRONTEND_STATUS_REG_OFFSET 0x20 +#define IDMA_REG64_FRONTEND_STATUS_BUSY_BIT 0 + +// Next ID, launches transfer, returns 0 if transfer not set up properly. +#define IDMA_REG64_FRONTEND_NEXT_ID_REG_OFFSET 0x28 + +// Get ID of finished transactions. +#define IDMA_REG64_FRONTEND_DONE_REG_OFFSET 0x30 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _IDMA_REG64_FRONTEND_REG_DEFS_ +// End generated register defines for idma_reg64_frontend + +#include +#include "chip_id.h" + +#define SYS_IDMA_CFG_BASE_ADDR 0x11000000 + +#define IDMA_SRC_ADDR \ + (SYS_IDMA_CFG_BASE_ADDR + IDMA_REG64_FRONTEND_SRC_ADDR_REG_OFFSET) +#define IDMA_DST_ADDR \ + (SYS_IDMA_CFG_BASE_ADDR + IDMA_REG64_FRONTEND_DST_ADDR_REG_OFFSET) +#define IDMA_NUMBYTES_ADDR \ + (SYS_IDMA_CFG_BASE_ADDR + IDMA_REG64_FRONTEND_NUM_BYTES_REG_OFFSET) +#define IDMA_CONF_ADDR \ + (SYS_IDMA_CFG_BASE_ADDR + IDMA_REG64_FRONTEND_CONF_REG_OFFSET) +#define IDMA_STATUS_ADDR \ + (SYS_IDMA_CFG_BASE_ADDR + IDMA_REG64_FRONTEND_STATUS_REG_OFFSET) +#define IDMA_NEXTID_ADDR \ + (SYS_IDMA_CFG_BASE_ADDR + IDMA_REG64_FRONTEND_NEXT_ID_REG_OFFSET) +#define IDMA_DONE_ADDR \ + (SYS_IDMA_CFG_BASE_ADDR + IDMA_REG64_FRONTEND_DONE_REG_OFFSET) + +#define IDMA_CONF_DECOUPLE 0 +#define IDMA_CONF_DEBURST 0 +#define IDMA_CONF_SERIALIZE 0 + +static inline volatile uint64_t *sys_dma_src_ptr(void) { + return (volatile uint64_t *)(IDMA_SRC_ADDR | + (uintptr_t)get_current_chip_baseaddress()); +} +static inline volatile uint64_t *sys_dma_dst_ptr(void) { + return (volatile uint64_t *)(IDMA_DST_ADDR | + (uintptr_t)get_current_chip_baseaddress()); +} +static inline volatile uint64_t *sys_dma_num_bytes_ptr(void) { + return (volatile uint64_t *)(IDMA_NUMBYTES_ADDR | + (uintptr_t)get_current_chip_baseaddress()); +} +static inline volatile uint64_t *sys_dma_conf_ptr(void) { + return (volatile uint64_t *)(IDMA_CONF_ADDR | + (uintptr_t)get_current_chip_baseaddress()); +} +static inline volatile uint64_t *sys_dma_status_ptr(void) { + return (volatile uint64_t *)(IDMA_STATUS_ADDR | + (uintptr_t)get_current_chip_baseaddress()); +} +static inline volatile uint64_t *sys_dma_nextid_ptr(void) { + return (volatile uint64_t *)(IDMA_NEXTID_ADDR | + (uintptr_t)get_current_chip_baseaddress()); +} +static inline volatile uint64_t *sys_dma_done_ptr(void) { + return (volatile uint64_t *)(IDMA_DONE_ADDR | + (uintptr_t)get_current_chip_baseaddress()); +} + +static inline uint64_t sys_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size) { + *(sys_dma_src_ptr()) = (uint64_t)src; + *(sys_dma_dst_ptr()) = (uint64_t)dst; + *(sys_dma_num_bytes_ptr()) = size; + *(sys_dma_conf_ptr()) = + (IDMA_CONF_DECOUPLE << IDMA_REG64_FRONTEND_CONF_DECOUPLE_BIT) | + (IDMA_CONF_DEBURST << IDMA_REG64_FRONTEND_CONF_DEBURST_BIT) | + (IDMA_CONF_SERIALIZE << IDMA_REG64_FRONTEND_CONF_SERIALIZE_BIT); + return *(sys_dma_nextid_ptr()); +} + +static inline void sys_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size) { + volatile uint64_t tf_id = sys_dma_memcpy(dst, src, size); + + while (*(sys_dma_done_ptr()) != tf_id) { + asm volatile("nop"); + } +} diff --git a/target/rtl/cfg/hemaia.hjson b/target/rtl/cfg/hemaia.hjson index 5f46d90b..7d3e2926 100755 --- a/target/rtl/cfg/hemaia.hjson +++ b/target/rtl/cfg/hemaia.hjson @@ -6,7 +6,9 @@ // Multi-chip configuration hemaia_multichip: { chip_id_width: 8, - single_chip: false, + router_to_soc_iw: 8, + soc_to_router_iw: 4, + single_chip: true, single_chip_id: 0, testbench_cfg: { // Emulate a four-chips configuration @@ -34,6 +36,8 @@ fall_through: false, }, cuts: { + soc_to_router: 1, + router_to_soc: 1, narrow_to_quad: 3, quad_to_narrow: 3, wide_to_quad: 3, @@ -116,7 +120,7 @@ }, clusters:[ "snax_KUL_cluster", - "snax_KUL_xdma_cluster", + "snax_KUL_dse_cluster", "snax_hypercorex_cluster", // "snax_dimc_cluster" ], diff --git a/target/rtl/cfg/hemaia_tapeout.hjson b/target/rtl/cfg/hemaia_chiplet.hjson similarity index 87% rename from target/rtl/cfg/hemaia_tapeout.hjson rename to target/rtl/cfg/hemaia_chiplet.hjson index 076d6b3f..96039adb 100755 --- a/target/rtl/cfg/hemaia_tapeout.hjson +++ b/target/rtl/cfg/hemaia_chiplet.hjson @@ -5,13 +5,15 @@ remote_quadrants: [], // Multi-chip configuration hemaia_multichip: { - chip_id_width: 8, - single_chip: true, - single_chip_id: 0, - testbench_cfg: { - // Emulate a four-chips configuration + chip_id_width: 8, // Not verified, should never be changed + router_to_soc_iw: 5, // soc_to_router_iw + clog2(chips) + soc_to_router_iw: 4, // Not beneficial for iw > 6 + single_chip: false, + single_chip_id: 0, // Only used for single chip simulation + testbench_cfg: { // The total number of homogeneous chips (x * y) on the virtual interposer + // Emulate a two-chips configuration upper_left_coordinate: [0, 0], - lower_right_coordinate: [1, 1] + lower_right_coordinate: [1, 0] } } addr_width: 48, @@ -34,6 +36,8 @@ fall_through: false, }, cuts: { + soc_to_router: 1, + router_to_soc: 1, narrow_to_quad: 3, quad_to_narrow: 3, wide_to_quad: 3, @@ -115,10 +119,10 @@ cfg_base_offset: 65536 // 0x10000 }, clusters:[ - "snax_cgra_cluster", - "snax_KUL_xdma_cluster", + "snax_KUL_cluster", + "snax_KUL_dse_cluster", "snax_hypercorex_cluster", - "snax_dimc_cluster" + // "snax_dimc_cluster" ], // peripherals diff --git a/target/rtl/cfg/hemaia_minimal.hjson b/target/rtl/cfg/hemaia_minimal.hjson deleted file mode 100755 index 3e47f735..00000000 --- a/target/rtl/cfg/hemaia_minimal.hjson +++ /dev/null @@ -1,203 +0,0 @@ -{ - bender_target: ["cv64a6_imafdc_sv39", "occamy"], - // Remote CFG, about to be removed - is_remote_quadrant: false, - remote_quadrants: [], - // Multi-chip configuration - hemaia_multichip: { - chip_id_width: 8, - single_chip: true, - single_chip_id: 0, - testbench_cfg: { - // Emulate a four-chips configuration - upper_left_coordinate: [0, 0], - lower_right_coordinate: [1, 1] - } - } - addr_width: 48, - data_width: 64, - // XBARs - wide_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - quadrant_inter_xbar_slv_id_width_no_rocache: 3, - quadrant_inter_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - narrow_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - cuts: { - narrow_to_quad: 3, - quad_to_narrow: 3, - wide_to_quad: 3, - quad_to_wide: 3, - narrow_to_cva6: 2, - narrow_conv_to_spm_narrow_pre: 2, - narrow_conv_to_spm_narrow: 1, - narrow_and_pcie: 3, - narrow_and_wide: 1, - wide_conv_to_spm_wide: 3, - wide_to_wide_zero_mem: 0, - wide_to_hbm: 3, - wide_and_inter: 3, - wide_and_hbi: 3, - narrow_and_hbi: 3, - pre_to_hbmx: 3, - hbmx_to_hbm: 3, - atomic_adapter_narrow: 1, - atomic_adapter_narrow_wide: 1, - // Give some flexibility in peripheral xbar placement - periph_axi_lite_narrow: 2, - periph_axi_lite: 2, - periph_axi_lite_narrow_hbm_xbar_cfg: 2, - // Non-right-side chip peripherals - periph_axi_lite_narrow_hbm_cfg: 3, - periph_axi_lite_narrow_pcie_cfg: 3, - periph_axi_lite_narrow_chip_ctrl_cfg: 3, - periph_axi_lite_narrow_hbi_narrow_cfg: 3, - periph_axi_lite_narrow_hbi_wide_cfg: 3, - periph_axi_lite_narrow_bootrom_cfg: 3, - periph_axi_lite_narrow_fll_system_cfg: 3, - periph_axi_lite_narrow_fll_periph_cfg: 3, - periph_axi_lite_narrow_fll_hbm2e_cfg: 3, - // Right-side or latency-invariant chip peripherals - periph_axi_lite_narrow_soc_ctrl_cfg: 1, - periph_axi_lite_narrow_uart_cfg: 1, - periph_axi_lite_narrow_i2c_cfg: 1, - periph_axi_lite_narrow_gpio_cfg: 1, - periph_axi_lite_narrow_clint_cfg: 1, - periph_axi_lite_narrow_plic_cfg: 1, - periph_axi_lite_narrow_spim_cfg: 1, - periph_axi_lite_narrow_timer_cfg: 1, - }, - txns: { - wide_and_inter: 128, - wide_to_hbm: 128, - narrow_and_wide: 16, - rmq: 4, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 3, // clog2(total number of clusters) - nr_s1_quadrant: 1, - s1_quadrant: { - // number of pending transactions on the narrow/wide network - narrow_trans: 32, - wide_trans: 32, - // Disable for easier flow trials. - ro_cache_cfg: { - width: 1024, - count: 128, - sets: 2, - max_trans: 32, - address_regions: 4, - } - wide_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - wide_xbar_slv_id_width: 3 - narrow_xbar: { - max_slv_trans: 8, - max_mst_trans: 8, - fall_through: false, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 3, // clog2(total number of clusters) - cfg_base_addr: 184549376, // 0x0b000000 - cfg_base_offset: 65536 // 0x10000 - }, - clusters:[ - "snax_KUL_xdma_cluster" - ], - - // peripherals - peripherals: { - rom: { - address: 16777216, // 0x0100_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - clint: { - address: 67108864, // 0x0400_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - axi_lite_peripherals: [ - { - name: "debug", - address: 0, // 0x0000_0000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "spis", // Only Master port, no slave port - } - ], - axi_lite_narrow_peripherals: [ - { - name: "soc_ctrl", - address: 33554432, // 0x0200_0000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "uart", - address: 33562624, // 0x0200_2000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "gpio", - address: 33566720, // 0x0200_3000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "i2c", - address: 33570816, // 0x0200_4000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "chip_ctrl", - address: 33574912, // 0x0200_5000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "timer", - address: 33579008, // 0x0200_6000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "spim", - address: 50331648, // 0x0300_0000 - length: 131072, // 4 kiB 0x2_0000 - }, - { - name: "plic", - address: 201326592, // 0x0C00_0000 - length: 67108864, // 64 MiB 0x400_0000 - }, - ], - }, - // non-peripheral IPs - spm_narrow: { - address: 1879048192, // 0x7000_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - spm_wide: { - address: 2147483648, // 0x8000_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - wide_zero_mem: { - address: 68719476736, // 0x10_0000_0000 - length: 8589934592, // 8 GiB 0x11_0000_0000 - }, - sys_idma_cfg: { - address: 285212672, // 0x1100_0000 - length: 65536, // 64 kiB 0x1_0000 - }, - // backup boot address - backup_boot_addr: 2147483648 // 0x8000_0000 -} diff --git a/target/rtl/cfg/hemaia_two_clusters.hjson b/target/rtl/cfg/hemaia_two_clusters.hjson deleted file mode 100755 index 90b461ba..00000000 --- a/target/rtl/cfg/hemaia_two_clusters.hjson +++ /dev/null @@ -1,205 +0,0 @@ -{ - bender_target: ["cv64a6_imafdc_sv39", "occamy"], - // Remote CFG, about to be removed - is_remote_quadrant: false, - remote_quadrants: [], - // Multi-chip configuration - hemaia_multichip: { - chip_id_width: 8, - single_chip: true, - single_chip_id: 0, - testbench_cfg: { - // Emulate a four-chips configuration - upper_left_coordinate: [0, 0], - lower_right_coordinate: [1, 1] - } - } - addr_width: 48, - data_width: 64, - // XBARs - wide_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - quadrant_inter_xbar_slv_id_width_no_rocache: 3, - quadrant_inter_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - narrow_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - cuts: { - narrow_to_quad: 3, - quad_to_narrow: 3, - wide_to_quad: 3, - quad_to_wide: 3, - narrow_to_cva6: 2, - narrow_conv_to_spm_narrow_pre: 2, - narrow_conv_to_spm_narrow: 1, - narrow_and_pcie: 3, - narrow_and_wide: 1, - wide_conv_to_spm_wide: 3, - wide_to_wide_zero_mem: 0, - wide_to_hbm: 3, - wide_and_inter: 3, - wide_and_hbi: 3, - narrow_and_hbi: 3, - pre_to_hbmx: 3, - hbmx_to_hbm: 3, - atomic_adapter_narrow: 1, - atomic_adapter_narrow_wide: 1, - // Give some flexibility in peripheral xbar placement - periph_axi_lite_narrow: 2, - periph_axi_lite: 2, - periph_axi_lite_narrow_hbm_xbar_cfg: 2, - // Non-right-side chip peripherals - periph_axi_lite_narrow_hbm_cfg: 3, - periph_axi_lite_narrow_pcie_cfg: 3, - periph_axi_lite_narrow_chip_ctrl_cfg: 3, - periph_axi_lite_narrow_hbi_narrow_cfg: 3, - periph_axi_lite_narrow_hbi_wide_cfg: 3, - periph_axi_lite_narrow_bootrom_cfg: 3, - periph_axi_lite_narrow_fll_system_cfg: 3, - periph_axi_lite_narrow_fll_periph_cfg: 3, - periph_axi_lite_narrow_fll_hbm2e_cfg: 3, - // Right-side or latency-invariant chip peripherals - periph_axi_lite_narrow_soc_ctrl_cfg: 1, - periph_axi_lite_narrow_uart_cfg: 1, - periph_axi_lite_narrow_i2c_cfg: 1, - periph_axi_lite_narrow_gpio_cfg: 1, - periph_axi_lite_narrow_clint_cfg: 1, - periph_axi_lite_narrow_plic_cfg: 1, - periph_axi_lite_narrow_spim_cfg: 1, - periph_axi_lite_narrow_timer_cfg: 1, - }, - txns: { - wide_and_inter: 128, - wide_to_hbm: 128, - narrow_and_wide: 16, - rmq: 4, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 3, // clog2(total number of clusters) - nr_s1_quadrant: 1, - s1_quadrant: { - // number of pending transactions on the narrow/wide network - narrow_trans: 32, - wide_trans: 32, - // Disable for easier flow trials. - ro_cache_cfg: { - width: 1024, - count: 128, - sets: 2, - max_trans: 32, - address_regions: 4, - } - wide_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - wide_xbar_slv_id_width: 3 - narrow_xbar: { - max_slv_trans: 8, - max_mst_trans: 8, - fall_through: false, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 3, // clog2(total number of clusters) - cfg_base_addr: 184549376, // 0x0b000000 - cfg_base_offset: 65536 // 0x10000 - }, - clusters:[ - "snax_KUL_cluster", - "snax_KUL_xdma_cluster" - ], - - // peripherals - peripherals: { - rom: { - address: 16777216, // 0x0100_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - clint: { - address: 67108864, // 0x0400_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - axi_lite_peripherals: [ - { - name: "debug", - address: 0, // 0x0000_0000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "spis", // Only Master port, no slave port - } - ], - axi_lite_narrow_peripherals: [ - { - name: "soc_ctrl", - address: 33554432, // 0x0200_0000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "uart", - address: 33562624, // 0x0200_2000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "gpio", - address: 33566720, // 0x0200_3000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "i2c", - address: 33570816, // 0x0200_4000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "chip_ctrl", - address: 33574912, // 0x0200_5000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "timer", - address: 33579008, // 0x0200_6000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "spim", - address: 50331648, // 0x0300_0000 - length: 131072, // 4 kiB 0x2_0000 - }, - { - name: "plic", - address: 201326592, // 0x0C00_0000 - length: 67108864, // 64 MiB 0x400_0000 - }, - ], - }, - // non-peripheral IPs - spm_narrow: { - address: 1879048192, // 0x7000_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - spm_wide: { - address: 2147483648, // 0x8000_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - wide_zero_mem: { - address: 68719476736, // 0x10_0000_0000 - length: 8589934592, // 8 GiB 0x11_0000_0000 - }, - sys_idma_cfg: { - address: 285212672, // 0x1100_0000 - length: 65536, // 64 kiB 0x1_0000 - }, - // backup boot address - backup_boot_addr: 2147483648 // 0x8000_0000 - -} diff --git a/target/sim_chip/Makefile b/target/sim_chip/Makefile index 95213224..3f2e9a7c 100644 --- a/target/sim_chip/Makefile +++ b/target/sim_chip/Makefile @@ -166,7 +166,7 @@ CFG = $(TARGET_RTL)/cfg/lru.hjson OCCAMYGEN ?= $(ROOT)/util/occamygen/occamygen.py -$(CFG): +$(CFG): FORCE @# If the LRU config file doesn't exist, we use the default config. @if [ ! -e $@ ] ; then \ DEFAULT_CFG="$(TARGET_RTL)/cfg/hemaia.hjson"; \ @@ -179,7 +179,7 @@ $(CFG): echo "Overriding config file with: $(CFG_OVERRIDE)"; \ cp $(CFG_OVERRIDE) $@; \ fi - +FORCE: testharness/testharness.sv: testharness/testharness.sv.tpl $(CFG) @echo "[OCCAMYGEN] Generate $@" diff --git a/target/sim_chip/testharness/testharness.sv.tpl b/target/sim_chip/testharness/testharness.sv.tpl index b1e3f0f5..bff07724 100644 --- a/target/sim_chip/testharness/testharness.sv.tpl +++ b/target/sim_chip/testharness/testharness.sv.tpl @@ -107,15 +107,28 @@ module testharness // Must be the frequency of i_uart0.clk_i in Hz localparam int unsigned UartDPIFreq = 1_000_000_000; + // Instatiate Chips +<% + axi_wires = {} +%> + % for i in x: % for j in y: /// Uart signals logic tx_${i}_${j}, rx_${i}_${j}; - <% +<% i_hex_string = "{:01x}".format(i) j_hex_string = "{:01x}".format(j) - %> + + if multichip_cfg['single_chip'] is False: + axi_wires["chip_{}_{}_soc2router".format(i_hex_string, j_hex_string)] = \ + soc2router_bus.copy(name="chip_{}_{}_soc2router".format(i_hex_string, j_hex_string)) \ + .declare(context) + axi_wires["chip_{}_{}_router2soc".format(i_hex_string, j_hex_string)] = \ + router2soc_bus.copy(name="chip_{}_{}_router2soc".format(i_hex_string, j_hex_string)) \ + .declare(context) +%> occamy_chip i_occamy_${i}_${j} ( .clk_i, @@ -126,6 +139,12 @@ module testharness .chip_id_i(8'h${i_hex_string}${j_hex_string}), .test_mode_i(1'b0), .boot_mode_i('0), +% if multichip_cfg['single_chip'] is False: + .soc2router_req_o(${axi_wires["chip_{}_{}_soc2router".format(i_hex_string, j_hex_string)].req_name()}), + .soc2router_rsp_i(${axi_wires["chip_{}_{}_soc2router".format(i_hex_string, j_hex_string)].rsp_name()}), + .router2soc_req_i(${axi_wires["chip_{}_{}_router2soc".format(i_hex_string, j_hex_string)].req_name()}), + .router2soc_rsp_o(${axi_wires["chip_{}_{}_router2soc".format(i_hex_string, j_hex_string)].rsp_name()}), +% endif .uart_tx_o(tx_${i}_${j}), .uart_rx_i(rx_${i}_${j}), .uart_rts_no(), @@ -175,4 +194,86 @@ module testharness % endfor % endfor +% if multichip_cfg['single_chip'] is False: + // AXI XBar as the temporary router and virtual interposer + // Wires to connect between chips and router + ${soc2router_bus.req_type()} [${(max(x) + 1) * (max(y) + 1) - 1}:0] soc2router_req; + ${soc2router_bus.rsp_type()} [${(max(x) + 1) * (max(y) + 1) - 1}:0] soc2router_rsp; + ${router2soc_bus.req_type()} [${(max(x) + 1) * (max(y) + 1) - 1}:0] router2soc_req; + ${router2soc_bus.rsp_type()} [${(max(x) + 1) * (max(y) + 1) - 1}:0] router2soc_rsp; + + // AXI XBar Configuration + localparam axi_pkg::xbar_cfg_t VInterposerCfg = '{ + NoSlvPorts: ${(max(x) + 1) * (max(y) + 1)}, + NoMstPorts: ${(max(x) + 1) * (max(y) + 1)}, + MaxSlvTrans: 64, + MaxMstTrans: 64, + FallThrough: 0, + LatencyMode: axi_pkg::CUT_ALL_PORTS, + PipelineStages: 0, + AxiIdWidthSlvPorts: ${multichip_cfg['soc_to_router_iw']}, + AxiIdUsedSlvPorts: ${multichip_cfg['soc_to_router_iw']}, + UniqueIds: 0, + AxiAddrWidth: 48, + AxiDataWidth: 512, + NoAddrRules: ${(max(x) + 1) * (max(y) + 1)} + }; + + // AXI Rules + xbar_rule_48_t [${(max(x) + 1) * (max(y) + 1) - 1}:0] VInterposerRule; + % for i in x: + % for j in y: + assign VInterposerRule[${i + j * (max(x) + 1)}] = '{ + idx: ${i + j * (max(x) + 1)}, start_addr: {4'd${i}, 4'd${j}, 40'h0}, end_addr: {4'd${i}, 4'd${j}, 40'hFFFFFFFFFF} + }; + % endfor + % endfor + + // Instantiation of the AXI XBar + axi_xbar #( + .Cfg (VInterposerCfg), + .Connectivity ('1), + .ATOPs (0), + .slv_aw_chan_t (${soc2router_bus.aw_chan_type()}), + .mst_aw_chan_t (${router2soc_bus.aw_chan_type()}), + .w_chan_t (${soc2router_bus.w_chan_type()}), + .slv_b_chan_t (${soc2router_bus.b_chan_type()}), + .mst_b_chan_t (${router2soc_bus.b_chan_type()}), + .slv_ar_chan_t (${soc2router_bus.ar_chan_type()}), + .mst_ar_chan_t (${router2soc_bus.ar_chan_type()}), + .slv_r_chan_t (${soc2router_bus.r_chan_type()}), + .mst_r_chan_t (${router2soc_bus.r_chan_type()}), + .slv_req_t (${soc2router_bus.req_type()}), + .slv_resp_t (${soc2router_bus.rsp_type()}), + .mst_req_t (${router2soc_bus.req_type()}), + .mst_resp_t (${router2soc_bus.rsp_type()}), + .rule_t (xbar_rule_48_t) + ) i_virtual_interposer ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .test_i (1'b0), + .slv_ports_req_i (soc2router_req), + .slv_ports_resp_o (soc2router_rsp), + .mst_ports_req_o (router2soc_req), + .mst_ports_resp_i (router2soc_rsp), + .addr_map_i (VInterposerRule), + .en_default_mst_port_i ('0), + .default_mst_port_i ('0) + ); + + // Assign the Wires + % for i in x: + % for j in y: +<% + i_hex_string = "{:01x}".format(i) + j_hex_string = "{:01x}".format(j) +%> + assign soc2router_req[${i + j * (max(x) + 1)}] = ${axi_wires["chip_{}_{}_soc2router".format(i_hex_string, j_hex_string)].req_name()}; + assign ${axi_wires["chip_{}_{}_soc2router".format(i_hex_string, j_hex_string)].rsp_name()} = soc2router_rsp[${i + j * (max(x) + 1)}]; + assign ${axi_wires["chip_{}_{}_router2soc".format(i_hex_string, j_hex_string)].req_name()} = router2soc_req[${i + j * (max(x) + 1)}]; + assign router2soc_rsp[${i + j * (max(x) + 1)}] = ${axi_wires["chip_{}_{}_router2soc".format(i_hex_string, j_hex_string)].rsp_name()}; + % endfor + % endfor +% endif + endmodule diff --git a/util/occamygen/occamy.py b/util/occamygen/occamy.py index f0262f45..8dc50948 100644 --- a/util/occamygen/occamy.py +++ b/util/occamygen/occamy.py @@ -363,7 +363,6 @@ def am_connect_soc_wide_xbar_quad(am, am_soc_narrow_xbar, am_wide_xbar_quadrant_ ) return am_clusters - def get_dts(occamy_cfg, am_clint, am_axi_lite_peripherals, am_axi_lite_narrow_peripherals): dts = device_tree.DeviceTree() @@ -404,7 +403,7 @@ def get_dts(occamy_cfg, am_clint, am_axi_lite_peripherals, am_axi_lite_narrow_pe return dts -def get_top_kwargs(occamy_cfg, cluster_generators, soc_axi_lite_narrow_periph_xbar, soc_wide_xbar, soc_narrow_xbar, name): +def get_top_kwargs(occamy_cfg, cluster_generators, soc_axi_lite_narrow_periph_xbar, soc_wide_xbar, soc_narrow_xbar, soc2router_bus, router2soc_bus, name): core_per_cluster_list = [cluster_generator.cfg["nr_cores"] for cluster_generator in cluster_generators] nr_cores_quadrant = sum(core_per_cluster_list) @@ -415,12 +414,14 @@ def get_top_kwargs(occamy_cfg, cluster_generators, soc_axi_lite_narrow_periph_xb "soc_axi_lite_narrow_periph_xbar": soc_axi_lite_narrow_periph_xbar, "soc_wide_xbar": soc_wide_xbar, "soc_narrow_xbar": soc_narrow_xbar, + "soc2router_bus": soc2router_bus, + "router2soc_bus": router2soc_bus, "cores": nr_s1_quadrants * nr_cores_quadrant + 1, } return top_kwargs -def get_soc_kwargs(occamy_cfg, cluster_generators, soc_narrow_xbar, soc_wide_xbar, util, name): +def get_soc_kwargs(occamy_cfg, cluster_generators, soc_narrow_xbar, soc_wide_xbar, soc2router_bus, router2soc_bus, util, name): core_per_cluster_list = [cluster_generator.cfg["nr_cores"] for cluster_generator in cluster_generators] nr_cores_quadrant = sum(core_per_cluster_list) @@ -431,6 +432,8 @@ def get_soc_kwargs(occamy_cfg, cluster_generators, soc_narrow_xbar, soc_wide_xba "occamy_cfg": occamy_cfg, "soc_narrow_xbar": soc_narrow_xbar, "soc_wide_xbar": soc_wide_xbar, + "soc2router_bus": soc2router_bus, + "router2soc_bus": router2soc_bus, "cores": nr_s1_quadrants * nr_cores_quadrant + 1, "nr_s1_quadrants": nr_s1_quadrants, "nr_cores_quadrant": nr_cores_quadrant @@ -654,14 +657,16 @@ def get_testharness_kwargs(soc_wide_xbar, soc_axi_lite_narrow_periph_xbar, chip_ } return testharness_kwargs -def get_multichip_testharness_kwargs(occamy_cfg, name): +def get_multichip_testharness_kwargs(occamy_cfg, soc2router_bus, router2soc_bus, name): testharness_kwargs = { "name": name, - "multichip_cfg": occamy_cfg["hemaia_multichip"] + "multichip_cfg": occamy_cfg["hemaia_multichip"], + "soc2router_bus": soc2router_bus, + "router2soc_bus": router2soc_bus } return testharness_kwargs -def get_chip_kwargs(soc_wide_xbar, soc_axi_lite_narrow_periph_xbar, occamy_cfg, cluster_generators, util, name): +def get_chip_kwargs(soc_wide_xbar, soc_axi_lite_narrow_periph_xbar, soc2router_bus, router2soc_bus, occamy_cfg, cluster_generators, util, name): core_per_cluster_list = [cluster_generator.cfg["nr_cores"] for cluster_generator in cluster_generators] nr_cores_quadrant = sum(core_per_cluster_list) @@ -672,6 +677,8 @@ def get_chip_kwargs(soc_wide_xbar, soc_axi_lite_narrow_periph_xbar, occamy_cfg, "occamy_cfg": occamy_cfg, "soc_wide_xbar": soc_wide_xbar, "soc_axi_lite_narrow_periph_xbar": soc_axi_lite_narrow_periph_xbar, + "soc2router_bus": soc2router_bus, + "router2soc_bus": router2soc_bus, "cores": nr_s1_quadrants * nr_cores_quadrant + 1 } return chip_kwargs diff --git a/util/occamygen/occamygen.py b/util/occamygen/occamygen.py index 9b1dceb5..cb529876 100755 --- a/util/occamygen/occamygen.py +++ b/util/occamygen/occamygen.py @@ -228,6 +228,7 @@ def main(): am_spm_narrow, am_sys_idma_cfg = occamy.am_connect_soc_narrow_xbar( am, am_soc_narrow_xbar, occamy_cfg) + am_spm_wide, am_wide_zero_mem = occamy.am_connect_soc_wide_xbar_mem( am, am_soc_wide_xbar, occamy_cfg) @@ -314,6 +315,9 @@ def main(): soc_axi_lite_narrow_periph_xbar.add_output_entry("bootrom", am_bootrom) soc_axi_lite_narrow_periph_xbar.add_output_entry("clint", am_clint) + ################# + # SoC Wide Xbar # + ################# soc_wide_xbar = solder.AxiXbar( 48, 512, @@ -332,6 +336,10 @@ def main(): context="soc", node=am_soc_wide_xbar) + if occamy_cfg["hemaia_multichip"]["single_chip"] is False: + # The chiplet output port does not have the rule; it is the default port + soc_wide_xbar.outputs.append("hemaia_multichip") + soc_wide_xbar.add_input("hemaia_multichip") soc_wide_xbar.add_output_entry("soc_narrow", am_soc_narrow_xbar) soc_wide_xbar.add_input("soc_narrow") soc_wide_xbar.add_input("sys_idma_mst") @@ -489,19 +497,42 @@ def main(): "ClusterAddressSpace") narrow_xbar_quadrant_s1.add_input("cluster_{}".format(i)) - # Generate the Verilog code. + # Generate the Verilog code for occamy_pkg.sv (Only include the definition related to xbars) solder.render() - cluster_cfgs = list() - nr_clusters = len(occamy_cfg["clusters"]) - for i in range(nr_clusters): - cluster_cfgs.append(cluster_generators[i].cfg) + + ############################################## + # Die2Die AXI Bus For Module I/O Declaration # + ############################################## + # As the Die2Die communication is irrelevant to XBars inside one chip, it is declared in the standalone way, so it should be placed below solder.render() + soc2router_bus = solder.AxiBus( + clk=soc_wide_xbar.clk, + rst=soc_wide_xbar.rst, + aw=soc_wide_xbar.aw, + dw=soc_wide_xbar.dw, + iw=occamy_cfg["hemaia_multichip"]["soc_to_router_iw"], + uw=soc_wide_xbar.uw, + name="soc2router_bus", + # declared=True + ) + + router2soc_bus = solder.AxiBus( + clk=soc_wide_xbar.clk, + rst=soc_wide_xbar.rst, + aw=soc_wide_xbar.aw, + dw=soc_wide_xbar.dw, + iw=occamy_cfg["hemaia_multichip"]["router_to_soc_iw"], + uw=soc_wide_xbar.uw, + name="router2soc_bus", + # declared=True + ) + # Emit the code. ############# # Top-Level # ############# if args.top_sv: top_kwargs = occamy.get_top_kwargs(occamy_cfg, cluster_generators, - soc_axi_lite_narrow_periph_xbar, soc_wide_xbar, soc_narrow_xbar, args.name) + soc_axi_lite_narrow_periph_xbar, soc_wide_xbar, soc_narrow_xbar, soc2router_bus, router2soc_bus, args.name) write_template(args.top_sv, outdir, fname="{}_top.sv".format(args.name), @@ -513,7 +544,7 @@ def main(): ########################### if args.soc_sv: soc_kwargs = occamy.get_soc_kwargs( - occamy_cfg, cluster_generators, soc_narrow_xbar, soc_wide_xbar, util, args.name) + occamy_cfg, cluster_generators, soc_narrow_xbar, soc_wide_xbar, soc2router_bus, router2soc_bus, util, args.name) write_template(args.soc_sv, outdir, module=solder.code_module['soc'], @@ -609,7 +640,8 @@ def main(): write_template(args.testharness_sv, outdir, **testharness_kwargs) if args.multichip_testharness_sv: - multichip_testharness_kwargs = occamy.get_multichip_testharness_kwargs(occamy_cfg, name) + multichip_testharness_kwargs = occamy.get_multichip_testharness_kwargs( + occamy_cfg, soc2router_bus, router2soc_bus, name) write_template(args.multichip_testharness_sv, outdir, **multichip_testharness_kwargs) ############ @@ -627,7 +659,7 @@ def main(): ######## if args.chip: chip_kwargs = occamy.get_chip_kwargs( - soc_wide_xbar, soc_axi_lite_narrow_periph_xbar, occamy_cfg, cluster_generators, util, args.name) + soc_wide_xbar, soc_axi_lite_narrow_periph_xbar, soc2router_bus, router2soc_bus, occamy_cfg, cluster_generators, util, args.name) write_template(args.chip, outdir, **chip_kwargs) ########