diff --git a/docs/schema/snitch_cluster.schema.json b/docs/schema/snitch_cluster.schema.json index 6e8c1181a..27693d2e8 100644 --- a/docs/schema/snitch_cluster.schema.json +++ b/docs/schema/snitch_cluster.schema.json @@ -66,6 +66,16 @@ 64 ] }, + "alias_region_enable": { + "type": "boolean", + "description": "Whether to provide a hardwired alias region for the TCDM (and cluster) address space.", + "default": false + }, + "alias_region_base": { + "type": "number", + "description": "Base address of the hardwired alias region for the TCDM (and cluster) if enabled. The default setting is `0x1800_0000`.", + "default": 402653184 + }, "addr_width": { "type": "number", "description": "Length of the address, should be greater than 30. If the address is larger than 34 the data bus needs to be 64 bits in size.", diff --git a/hw/snitch_cluster/src/snitch_cc.sv b/hw/snitch_cluster/src/snitch_cc.sv index 5bb3b1b48..2c67c1974 100644 --- a/hw/snitch_cluster/src/snitch_cc.sv +++ b/hw/snitch_cluster/src/snitch_cc.sv @@ -99,6 +99,9 @@ module snitch_cc #( parameter snitch_pma_pkg::snitch_pma_t SnitchPMACfg = '{default: 0}, /// Enable debug support. parameter bit DebugSupport = 1, + /// Optional fixed TCDM alias. + parameter bit TCDMAliasEnable = 1'b0, + parameter logic [AddrWidth-1:0] TCDMAliasStart = '0, /// Derived parameter *Do not override* parameter int unsigned TCDMPorts = (NumSsrs > 1 ? NumSsrs : 1), parameter type addr_t = logic [AddrWidth-1:0], @@ -591,16 +594,23 @@ module snitch_cc #( logic [AddrWidth-1:0] mask; } reqrsp_rule_t; - reqrsp_rule_t addr_map; - assign addr_map = '{ + reqrsp_rule_t [TCDMAliasEnable:0] addr_map; + assign addr_map[0] = '{ idx: 1, base: tcdm_addr_base_i, mask: ({AddrWidth{1'b1}} << TCDMAddrWidth) }; + if (TCDMAliasEnable) begin : gen_tcdm_alias_rule + assign addr_map[1] = '{ + idx: 1, + base: TCDMAliasStart, + mask: ({AddrWidth{1'b1}} << TCDMAddrWidth) + }; + end addr_decode_napot #( .NoIndices (2), - .NoRules (1), + .NoRules (1 + TCDMAliasEnable), .addr_t (logic [AddrWidth-1:0]), .rule_t (reqrsp_rule_t) ) i_addr_decode_napot ( diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index 4f666075e..7536ab0ae 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -183,7 +183,10 @@ module snitch_cluster // additional cycle latency, which is taken into account here. parameter int unsigned MemoryMacroLatency = 1 + RegisterTCDMCuts, /// Enable debug support. - parameter bit DebugSupport = 1 + parameter bit DebugSupport = 1, + /// Optional fixed cluster alias region. + parameter bit AliasRegionEnable = 1'b0, + parameter logic [PhysicalAddrWidth-1:0] AliasRegionBase = '0 ) ( /// System clock. If `IsoCrossing` is enabled this port is the _fast_ clock. /// The slower, half-frequency clock, is derived internally. @@ -261,13 +264,16 @@ module snitch_cluster localparam int unsigned NarrowIdWidthOut = $clog2(NrNarrowMasters) + NarrowIdWidthIn; localparam int unsigned NrSlaves = 3; - localparam int unsigned NrRules = NrSlaves - 1; + localparam int unsigned NrRuleIdcs = NrSlaves - 1; + localparam int unsigned NrRules = (1 + AliasRegionEnable) * NrRuleIdcs; // DMA, SoC Request, `n` instruction caches. localparam int unsigned NrWideMasters = 2 + NrHives; localparam int unsigned WideIdWidthOut = $clog2(NrWideMasters) + WideIdWidthIn; // DMA X-BAR configuration localparam int unsigned NrWideSlaves = 3; + localparam int unsigned NrWideRuleIdcs = NrWideSlaves - 1; + localparam int unsigned NrWideRules = (1 + AliasRegionEnable) * NrWideRuleIdcs; // AXI Configuration localparam axi_pkg::xbar_cfg_t ClusterXbarCfg = '{ @@ -300,7 +306,7 @@ module snitch_cluster UniqueIds: 1'b0, AxiAddrWidth: PhysicalAddrWidth, AxiDataWidth: WideDataWidth, - NoAddrRules: 2 + NoAddrRules: NrWideRules }; function automatic int unsigned get_hive_size(int unsigned current_hive); @@ -436,9 +442,9 @@ module snitch_cluster logic [1:0] ptw_is_4mega; } hive_rsp_t; - // ----------- - // Assignments - // ----------- + // --------------------------- + // Cluster-internal Addressing + // --------------------------- // Calculate start and end address of TCDM based on the `cluster_base_addr_i`. addr_t tcdm_start_address, tcdm_end_address; assign tcdm_start_address = (cluster_base_addr_i & TCDMMask); @@ -452,6 +458,15 @@ module snitch_cluster assign zero_mem_start_address = cluster_periph_end_address; assign zero_mem_end_address = cluster_periph_end_address + ZeroMemorySize * 1024; + localparam addr_t TCDMAliasStart = AliasRegionBase & TCDMMask; + localparam addr_t TCDMAliasEnd = (TCDMAliasStart + TCDMSize) & TCDMMask; + + localparam addr_t PeriphAliasStart = TCDMAliasEnd; + localparam addr_t PeriphAliasEnd = TCDMAliasEnd + ClusterPeriphSize * 1024; + + localparam addr_t ZeroMemAliasStart = PeriphAliasEnd; + localparam addr_t ZeroMemAliasEnd = PeriphAliasEnd + ZeroMemorySize * 1024; + // ---------------- // Wire Definitions // ---------------- @@ -551,7 +566,7 @@ module snitch_cluster xbar_rule_t [DmaXbarCfg.NoAddrRules-1:0] dma_xbar_rule; assign dma_xbar_default_port = '{default: SoCDMAOut}; - assign dma_xbar_rule = '{ + assign dma_xbar_rule[NrWideRuleIdcs-1:0] = '{ '{ idx: TCDMDMA, start_addr: tcdm_start_address, @@ -563,6 +578,21 @@ module snitch_cluster end_addr: zero_mem_end_address } }; + if (AliasRegionEnable) begin : gen_dma_xbar_alias + assign dma_xbar_rule [NrWideRules-1:NrWideRuleIdcs] = '{ + '{ + idx: TCDMDMA, + start_addr: TCDMAliasStart, + end_addr: TCDMAliasEnd + }, + '{ + idx: ZeroMemory, + start_addr: ZeroMemAliasStart, + end_addr: ZeroMemAliasEnd + } + }; + end + localparam bit [DmaXbarCfg.NoSlvPorts-1:0] DMAEnableDefaultMstPort = '1; axi_xbar #( .Cfg (DmaXbarCfg), @@ -873,7 +903,9 @@ module snitch_cluster .RegisterFPUIn (RegisterFPUIn), .RegisterFPUOut (RegisterFPUOut), .TCDMAddrWidth (TCDMAddrWidth), - .DebugSupport (DebugSupport) + .DebugSupport (DebugSupport), + .TCDMAliasEnable (AliasRegionEnable), + .TCDMAliasStart (TCDMAliasStart) ) i_snitch_cc ( .clk_i, .clk_d2_i (clk_d2), @@ -1062,7 +1094,7 @@ module snitch_cluster cluster_xbar_default_port; xbar_rule_t [NrRules-1:0] cluster_xbar_rules; - assign cluster_xbar_rules = '{ + assign cluster_xbar_rules [NrRuleIdcs-1:0] = '{ '{ idx: TCDM, start_addr: tcdm_start_address, @@ -1074,6 +1106,20 @@ module snitch_cluster end_addr: cluster_periph_end_address } }; + if (AliasRegionEnable) begin : gen_cluster_xbar_alias + assign cluster_xbar_rules [NrRules-1:NrRuleIdcs] = '{ + '{ + idx: TCDM, + start_addr: TCDMAliasStart, + end_addr: TCDMAliasEnd + }, + '{ + idx: ClusterPeripherals, + start_addr: PeriphAliasStart, + end_addr: PeriphAliasEnd + } + }; + end localparam bit [ClusterXbarCfg.NoSlvPorts-1:0] ClusterEnableDefaultMstPort = '1; axi_xbar #( @@ -1183,9 +1229,6 @@ module snitch_cluster .rst_ni, .reg_req_i (reg_req), .reg_rsp_o (reg_rsp), - /// The TCDM always starts at the cluster base. - .tcdm_start_address_i (tcdm_start_address), - .tcdm_end_address_i (tcdm_end_address), .icache_prefetch_enable_o (icache_prefetch_enable), .cl_clint_o (cl_interrupt), .cluster_hart_base_id_i (hart_base_id_i), @@ -1245,6 +1288,8 @@ module snitch_cluster `ASSERT_INIT(CheckSuperBankFactor, (NrBanks % BanksPerSuperBank) == 0); // Check that the cluster base address aligns to the TCDMSize. `ASSERT(ClusterBaseAddrAlign, ((TCDMSize - 1) & cluster_base_addr_i) == 0) + // Check that the cluster alias address, if enabled, aligns to the TCDMSize. + `ASSERT_INIT(AliasRegionAddrAlign, ~AliasRegionEnable || ((TCDMSize - 1) & AliasRegionBase) == 0) // Make sure we only have one DMA in the system. `ASSERT_INIT(NumberDMA, $onehot0(Xdma)) diff --git a/hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral.sv b/hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral.sv index 144ee2c8a..998a1daba 100644 --- a/hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral.sv +++ b/hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral.sv @@ -27,8 +27,6 @@ module snitch_cluster_peripheral input reg_req_t reg_req_i, output reg_rsp_t reg_rsp_o, - input addr_t tcdm_start_address_i, - input addr_t tcdm_end_address_i, output logic icache_prefetch_enable_o, output logic [NrCores-1:0] cl_clint_o, input logic [9:0] cluster_hart_base_id_i, diff --git a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl index 6c1df0dc1..a7286c112 100644 --- a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl @@ -332,7 +332,9 @@ module ${cfg['name']}_wrapper ( .NarrowMaxSlvTrans (${cfg['narrow_trans']}), .sram_cfg_t (${cfg['pkg_name']}::sram_cfg_t), .sram_cfgs_t (${cfg['pkg_name']}::sram_cfgs_t), - .DebugSupport (${int(cfg['enable_debug'])}) + .DebugSupport (${int(cfg['enable_debug'])}), + .AliasRegionEnable (${int(cfg['alias_region_enable'])}), + .AliasRegionBase (${int(cfg['alias_region_base'])}) ) i_cluster ( .clk_i, .rst_ni, diff --git a/sw/tests/alias.c b/sw/tests/alias.c new file mode 100644 index 000000000..d27c96ce3 --- /dev/null +++ b/sw/tests/alias.c @@ -0,0 +1,46 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "snrt.h" + +uint32_t cluster_global_to_local_address(uint32_t global_addr) { + return global_addr - snrt_l1_start_addr() + ALIAS_TCDM_BASE_ADDR; +} + +const uint32_t n_inputs = 16; +volatile int errors = 2 * n_inputs; + +int main() { + // Get global and local memory aliases + volatile uint32_t *buffer_global = snrt_l1_next(); + volatile uint32_t *buffer_local = + (uint32_t *)cluster_global_to_local_address((uint32_t)buffer_global); + + // Test narrow cluster XBAR + if (snrt_cluster_core_idx() == 0) { + // Write to global buffer + for (uint32_t i = 0; i < n_inputs; i++) buffer_global[i] = i; + // Read from local buffer + for (uint32_t i = 0; i < n_inputs; i++) + if (buffer_local[i] == i) errors--; + } + + snrt_cluster_hw_barrier(); + + // Test wide DMA XBAR + if (snrt_is_dm_core()) { + // Read from local buffer using DMA + buffer_global += n_inputs; + snrt_dma_start_1d((void *)buffer_global, (void *)buffer_local, + n_inputs * sizeof(uint32_t)); + snrt_dma_wait_all(); + // Check results + for (uint32_t i = 0; i < n_inputs; i++) + if (buffer_global[i] == i) errors--; + } + + snrt_cluster_hw_barrier(); + + return errors; +} diff --git a/target/snitch_cluster/cfg/default.hjson b/target/snitch_cluster/cfg/default.hjson index 7f28a1073..76cca6e64 100644 --- a/target/snitch_cluster/cfg/default.hjson +++ b/target/snitch_cluster/cfg/default.hjson @@ -22,6 +22,7 @@ }, cluster_periph_size: 64, // kB zero_mem_size: 64, // kB + alias_region_enable: true, dma_data_width: 512, dma_axi_req_fifo_depth: 3, dma_req_fifo_depth: 3, diff --git a/target/snitch_cluster/src/banshee.yaml b/target/snitch_cluster/src/banshee.yaml index 3c39dcd6d..ba8045683 100644 --- a/target/snitch_cluster/src/banshee.yaml +++ b/target/snitch_cluster/src/banshee.yaml @@ -58,5 +58,7 @@ memory: size: 0x020000 # size of the TCDM offset: 0x40000 # offset to next cluster is start + size + offset latency: 1 - ssr: - num_dm: 3 + tcdm_alias: true + tcdm_alias_start: 1800_0000 +ssr: + num_dm: 3 diff --git a/target/snitch_cluster/sw/run.yaml b/target/snitch_cluster/sw/run.yaml index bf211effe..b75cb2149 100644 --- a/target/snitch_cluster/sw/run.yaml +++ b/target/snitch_cluster/sw/run.yaml @@ -3,6 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 runs: + - elf: tests/build/alias.elf + simulators: [vsim, vcs, verilator] # banshee does not model alias regions - elf: tests/build/atomics.elf simulators: [vsim, vcs, verilator] # banshee fails with exit code 0x4 - elf: tests/build/barrier.elf diff --git a/target/snitch_cluster/sw/runtime/common/snitch_cluster_addrmap.h.tpl b/target/snitch_cluster/sw/runtime/common/snitch_cluster_addrmap.h.tpl index 3d6b3b806..645c29129 100644 --- a/target/snitch_cluster/sw/runtime/common/snitch_cluster_addrmap.h.tpl +++ b/target/snitch_cluster/sw/runtime/common/snitch_cluster_addrmap.h.tpl @@ -7,3 +7,6 @@ #define CLUSTER_ZERO_MEM_START_ADDR (CLUSTER_PERIPH_BASE_ADDR + ${hex(cfg['cluster']['cluster_periph_size'] * 1024)}) #define CLUSTER_ZERO_MEM_END_ADDR (CLUSTER_ZERO_MEM_START_ADDR + ${hex(cfg['cluster']['zero_mem_size'] * 1024)}) #define CLINT_BASE_ADDR ${hex(cfg['peripherals']['clint']['address'])} +#define ALIAS_TCDM_BASE_ADDR ${hex(cfg['cluster']['alias_region_base'])} +#define ALIAS_PERIPH_BASE_ADDR (ALIAS_TCDM_BASE_ADDR + ${hex(cfg['cluster']['tcdm']['size'] * 1024)}) +#define ALIAS_ZERO_MEM_START_ADDR (ALIAS_PERIPH_BASE_ADDR + ${hex(cfg['cluster']['cluster_periph_size'] * 1024)})