From c4021579befd40305e9c010246034dff33f11c8f Mon Sep 17 00:00:00 2001 From: Ryan Antonio Date: Tue, 5 Dec 2023 15:05:48 +0100 Subject: [PATCH] Change acceleration integration to be fully configuration based (#51) * docs: Update schema to make snax_acc strings * hw: Move accelerators out of snitch_cluster.sv * hw: Process accelerators outside of snitch_cluster.sv * cfg: Update snax-mac.hjson config * hw: Take out unnecessary parameters * tpl: Fix python backend in mako tpl * cfg: Make snax-mac into string * cfg: Update snax-gemm.hjson to use string * docs: Update schema details * hw: Take out unnecessary comments * hw: Renamed snax_mac to snax_mac_wrapper * hw: Change module name to snax_mac_wrapper * bender: Change to use snax_mac_wrapper.sv * hw: Put comments on template * cfg: Add _wrapper affix --- Bender.yml | 2 +- docs/schema/snitch_cluster.schema.json | 6 +- .../src/{snax_mac.sv => snax_mac_wrapper.sv} | 2 +- hw/snitch_cluster/src/snitch_cluster.sv | 161 +++--------------- .../src/snitch_cluster_wrapper.sv.tpl | 128 +++++++++++++- target/snitch_cluster/cfg/snax-gemm.hjson | 2 +- target/snitch_cluster/cfg/snax-mac.hjson | 2 +- 7 files changed, 156 insertions(+), 147 deletions(-) rename hw/snax_hwpe_mac/src/{snax_mac.sv => snax_mac_wrapper.sv} (99%) diff --git a/Bender.yml b/Bender.yml index e06b33dc4..9aaf9b70b 100644 --- a/Bender.yml +++ b/Bender.yml @@ -192,7 +192,7 @@ sources: # Level 0 - hw/snax_hwpe_mac/src/snax_hwpe_ctrl.sv - hw/snax_hwpe_mac/src/snax_hwpe_to_reqrsp.sv - - hw/snax_hwpe_mac/src/snax_mac.sv + - hw/snax_hwpe_mac/src/snax_mac_wrapper.sv # snitch_vm - files: diff --git a/docs/schema/snitch_cluster.schema.json b/docs/schema/snitch_cluster.schema.json index e1ad5ed51..485d00be5 100644 --- a/docs/schema/snitch_cluster.schema.json +++ b/docs/schema/snitch_cluster.schema.json @@ -417,10 +417,10 @@ "default": false }, "snax_acc": { - "type": "number", + "type": "string", "title": "SNAX Accelerator Extension", - "description": "Enable specific Snitch Accelerator eXtension (SNAX). 1 for MAC engine and 2 for GEMM engine.", - "default": 0 + "description": "Enable specific Snitch Accelerator eXtension (SNAX). Simply put the string name of the accelerator wrapper.", + "default": "none" }, "snax_tcdm_ports": { "type": "number", diff --git a/hw/snax_hwpe_mac/src/snax_mac.sv b/hw/snax_hwpe_mac/src/snax_mac_wrapper.sv similarity index 99% rename from hw/snax_hwpe_mac/src/snax_mac.sv rename to hw/snax_hwpe_mac/src/snax_mac_wrapper.sv index 4d3c63755..52d916634 100644 --- a/hw/snax_hwpe_mac/src/snax_mac.sv +++ b/hw/snax_hwpe_mac/src/snax_mac_wrapper.sv @@ -7,7 +7,7 @@ // verilog_lint: waive-start line-length // verilog_lint: waive-start no-trailing-spaces -module snax_mac # ( +module snax_mac_wrapper # ( parameter int unsigned DataWidth = 32, parameter int unsigned SnaxTcdmPorts = 4, parameter type acc_req_t = logic, diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index 32dafa36f..793107ae5 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -96,10 +96,8 @@ module snitch_cluster /// FPU configuration. parameter fpnew_pkg::fpu_implementation_t FPUImplementation [NrCores] = '{default: fpnew_pkg::fpu_implementation_t'(0)}, - /// Enable SNAX accelerators per core. Integer coded. - parameter int unsigned SNAX [NrCores] = '{default: 0}, - /// Number of SNAX TCDM ports per core - parameter int unsigned SnaxTcdmPorts [NrCores] = '{default: 0}, + /// Total Number of SNAX TCDM ports + parameter int unsigned TotalSnaxTcdmPorts = 0, /// Physical Memory Attribute Configuration parameter snitch_pma_pkg::snitch_pma_t SnitchPMACfg = '0, /// # Per-core parameters @@ -180,6 +178,11 @@ module snitch_cluster // Memory configuration input types; these vary depending on implementation. parameter type sram_cfg_t = logic, parameter type sram_cfgs_t = logic, + // Accelerator typedef + parameter type acc_req_t = logic, + parameter type acc_resp_t = logic, + parameter type tcdm_req_t = logic, + parameter type tcdm_rsp_t = logic, // Memory latency parameter. Most of the memories have a read latency of 1. In // case you have memory macros which are pipelined you want to adjust this // value here. This only applies to the TCDM. The instruction cache macros will break! @@ -221,6 +224,15 @@ module snitch_cluster /// Bypass half-frequency clock. (`d2` = divide-by-two). This signal is /// pseudo-static. input logic clk_d2_bypass_i, + /// SNAX ports + output acc_req_t [NrCores-1:0] snax_req_o, + output logic [NrCores-1:0] snax_qvalid_o, + input logic [NrCores-1:0] snax_qready_i, + input acc_resp_t [NrCores-1:0] snax_resp_i, + input logic [NrCores-1:0] snax_pvalid_i, + output logic [NrCores-1:0] snax_pready_o, + input tcdm_req_t [TotalSnaxTcdmPorts-1:0] snax_tcdm_req_i, + output tcdm_rsp_t [TotalSnaxTcdmPorts-1:0] snax_tcdm_rsp_o, /// AXI Core cluster in-port. input narrow_in_req_t narrow_in_req_i, output narrow_in_resp_t narrow_in_resp_o, @@ -256,29 +268,6 @@ module snitch_cluster return n; endfunction - function automatic int unsigned get_snax_tcdm_ports(int unsigned core); - return SnaxTcdmPorts[core]; - endfunction - - function automatic int unsigned get_snax_tcdm_port_offs(int unsigned core_idx); - automatic int n = 0; - for (int i = 0; i < core_idx; i++) n += get_snax_tcdm_ports(i); - return n; - endfunction - - function automatic int unsigned check_if_snax(int unsigned core_idx); - for (int i = 0; i < core_idx; i++) begin - if ( SNAX[i] > 0) begin - return 1; - end - end - return 0; - endfunction - - // SNAX TCDM - localparam int unsigned SnaxSystem = check_if_snax(NrCores); - localparam int unsigned TotalSnaxTcdmPorts = get_snax_tcdm_port_offs(NrCores); - localparam int unsigned NrTCDMPortsCores = get_tcdm_port_offs(NrCores); localparam int unsigned NumTCDMIn = NrTCDMPortsCores + 1; localparam logic [PhysicalAddrWidth-1:0] TCDMMask = ~(TCDMSize-1); @@ -379,7 +368,6 @@ module snitch_cluster `MEM_TYPEDEF_ALL(mem, tcdm_mem_addr_t, data_t, strb_t, tcdm_user_t) `MEM_TYPEDEF_ALL(mem_dma, tcdm_mem_addr_t, data_dma_t, strb_dma_t, logic) - `TCDM_TYPEDEF_ALL(tcdm, tcdm_addr_t, data_t, strb_t, tcdm_user_t) `TCDM_TYPEDEF_ALL(tcdm_dma, tcdm_addr_t, data_dma_t, strb_dma_t, logic) `REG_BUS_TYPEDEF_REQ(reg_req_t, addr_t, data_t, strb_t) @@ -414,21 +402,6 @@ module snitch_cluster addr_t end_addr; } xbar_rule_t; - typedef struct packed { - acc_addr_e addr; - logic [4:0] id; - logic [31:0] data_op; - data_t data_arga; - data_t data_argb; - addr_t data_argc; - } acc_req_t; - - typedef struct packed { - logic [4:0] id; - logic error; - data_t data; - } acc_resp_t; - `SNITCH_VM_TYPEDEF(PhysicalAddrWidth) typedef struct packed { @@ -513,10 +486,6 @@ module snitch_cluster tcdm_req_t [NrTCDMPortsCores-1:0] tcdm_req; tcdm_rsp_t [NrTCDMPortsCores-1:0] tcdm_rsp; - // Generation of SNAX wires - tcdm_req_t [TotalSnaxTcdmPorts-1:0 ] snax_tcdm_req; - tcdm_rsp_t [TotalSnaxTcdmPorts-1:0 ] snax_tcdm_rsp; - core_events_t [NrCores-1:0] core_events; tcdm_events_t tcdm_events; dma_events_t dma_events; @@ -788,7 +757,8 @@ module snitch_cluster end // generate TCDM for snax if any of the cores has SNAX enabled - if( SnaxSystem ) begin: gen_yes_snax_tcdm_interconnect + if( TotalSnaxTcdmPorts > 0 ) begin: gen_yes_snax_tcdm_interconnect + snitch_tcdm_interconnect #( .NumInp (NumTCDMIn + TotalSnaxTcdmPorts), .NumOut (NrBanks), @@ -805,12 +775,13 @@ module snitch_cluster ) i_tcdm_interconnect ( .clk_i, .rst_ni, - .req_i ({axi_soc_req, tcdm_req, snax_tcdm_req}), - .rsp_o ({axi_soc_rsp, tcdm_rsp, snax_tcdm_rsp}), + .req_i ({axi_soc_req, tcdm_req, snax_tcdm_req_i}), + .rsp_o ({axi_soc_rsp, tcdm_rsp, snax_tcdm_rsp_o}), .mem_req_o (ic_req), .mem_rsp_i (ic_rsp) ); end else begin: gen_no_snax_tcdm_interconnect + snitch_tcdm_interconnect #( .NumInp (NumTCDMIn), .NumOut (NrBanks), @@ -850,14 +821,6 @@ module snitch_cluster hive_req_t [NrCores-1:0] hive_req; hive_rsp_t [NrCores-1:0] hive_rsp; - // SNAX wiring - acc_req_t [NrCores-1:0] snax_req; - logic [NrCores-1:0] snax_qvalid; - logic [NrCores-1:0] snax_qready; - acc_resp_t [NrCores-1:0] snax_resp; - logic [NrCores-1:0] snax_pvalid; - logic [NrCores-1:0] snax_pready; - for (genvar i = 0; i < NrCores; i++) begin : gen_core localparam int unsigned TcdmPorts = get_tcdm_ports(i); localparam int unsigned TcdmPortsOffs = get_tcdm_port_offs(i); @@ -957,12 +920,12 @@ module snitch_cluster .axi_dma_busy_o (), .axi_dma_perf_o (), .axi_dma_events_o (dma_core_events), - .snax_req_o (snax_req[i]), - .snax_qvalid_o (snax_qvalid[i]), - .snax_qready_i (snax_qready[i]), - .snax_resp_i (snax_resp[i]), - .snax_pvalid_i (snax_pvalid[i]), - .snax_pready_o (snax_pready[i]), + .snax_req_o (snax_req_o[i]), + .snax_qvalid_o (snax_qvalid_o[i]), + .snax_qready_i (snax_qready_i[i]), + .snax_resp_i (snax_resp_i[i]), + .snax_pvalid_i (snax_pvalid_i[i]), + .snax_pready_o (snax_pready_o[i]), .core_events_o (core_events[i]), .tcdm_addr_base_i (tcdm_start_address), .barrier_o (barrier_in[i]), @@ -982,76 +945,6 @@ module snitch_cluster end end - for (genvar i = 0; i < NrCores; i++) begin : gen_snax_acc - - // Calculate exact count of SNAX TCDM ports - localparam int unsigned LocalSnaxTcdmPorts = get_snax_tcdm_ports(i); - localparam int unsigned LocalSnaxTcdmOffset = get_snax_tcdm_port_offs(i); - - tcdm_req_t [LocalSnaxTcdmPorts-1:0] hang_snax_tcdm_req; - tcdm_rsp_t [LocalSnaxTcdmPorts-1:0] hang_snax_tcdm_rsp; - - // Remap SNAX TCDM ports to prune cores that - // do not use SNAX TCDM ports - for (genvar j = 0; j < LocalSnaxTcdmPorts; j++) begin : gen_snax_tcdm_map_per_core - always_comb begin - snax_tcdm_req[LocalSnaxTcdmOffset+j] = hang_snax_tcdm_req[j]; - hang_snax_tcdm_rsp[j] = snax_tcdm_rsp[LocalSnaxTcdmOffset+j]; - end - end - - if(SNAX[i] == 1) begin: gen_snax_mac - - snax_mac # ( - .DataWidth ( 32 ), - .SnaxTcdmPorts ( LocalSnaxTcdmPorts ), - .acc_req_t ( acc_req_t ), - .acc_rsp_t ( acc_resp_t ), - .tcdm_req_t ( tcdm_req_t ), - .tcdm_rsp_t ( tcdm_rsp_t ) - ) i_snax_mac ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .snax_req_i ( snax_req[i] ), - .snax_qvalid_i ( snax_qvalid[i] ), - .snax_qready_o ( snax_qready[i] ), - .snax_resp_o ( snax_resp[i] ), - .snax_pvalid_o ( snax_pvalid[i] ), - .snax_pready_i ( snax_pready[i] ), - .snax_tcdm_req_o ( hang_snax_tcdm_req ), - .snax_tcdm_rsp_i ( hang_snax_tcdm_rsp ) - ); - - end else if (SNAX[i] == 2) begin: gen_snax_gemm - - snax_gemm_wrapper # ( - .DataWidth ( NarrowDataWidth ), - .SnaxTcdmPorts ( LocalSnaxTcdmPorts ), - .acc_req_t ( acc_req_t ), - .acc_rsp_t ( acc_resp_t ), - .tcdm_req_t ( tcdm_req_t ), - .tcdm_rsp_t ( tcdm_rsp_t ) - ) i_snax_gemm ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .snax_req_i ( snax_req[i] ), - .snax_qvalid_i ( snax_qvalid[i] ), - .snax_qready_o ( snax_qready[i] ), - .snax_resp_o ( snax_resp[i] ), - .snax_pvalid_o ( snax_pvalid[i] ), - .snax_pready_i ( snax_pready[i] ), - .snax_tcdm_req_o ( hang_snax_tcdm_req ), - .snax_tcdm_rsp_i ( hang_snax_tcdm_rsp ) - ); - - end else begin: gen_no_snax_acc - // Tie these signal to low when no SNAX accelerator is present - assign snax_qready[i] = '0; - assign snax_resp[i] = '0; - assign snax_pvalid[i] = '0; - end - end - for (genvar i = 0; i < NrHives; i++) begin : gen_hive localparam int unsigned HiveSize = get_hive_size(i); diff --git a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl index 5a07b5b89..52bcf2cb6 100644 --- a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl @@ -41,11 +41,15 @@ ${',' if not loop.last else ''} % endfor \ +// These includes are necessary for pre-defined typedefs `include "axi/typedef.svh" +`include "tcdm_interface/typedef.svh" +// Main cluster package // verilog_lint: waive-start package-filename package ${cfg['pkg_name']}; + // Base and pre-calculated parameters localparam int unsigned NrCores = ${cfg['nr_cores']}; localparam int unsigned NrHives = ${cfg['nr_hives']}; @@ -61,6 +65,11 @@ package ${cfg['pkg_name']}; localparam int unsigned WideIdWidthIn = ${cfg['dma_id_width_in']}; localparam int unsigned WideIdWidthOut = $clog2(NrDmaMasters) + WideIdWidthIn; + localparam int unsigned CoreIDWidth = cf_math_pkg::idx_width(NrCores); + + localparam int unsigned TCDMDepth = ${cfg['tcdm']['depth']}; + localparam int unsigned NrBanks = ${cfg['tcdm']['banks']}; + localparam int unsigned NarrowUserWidth = ${cfg['user_width']}; localparam int unsigned WideUserWidth = ${cfg['dma_user_width']}; @@ -70,6 +79,7 @@ package ${cfg['pkg_name']}; localparam int unsigned Hive [NrCores] = '{${core_cfg('hive')}}; + // SRAM configurations typedef struct packed { % for field, width in cfg['sram_cfg_fields'].items(): logic [${width-1}:0] ${field}; @@ -82,6 +92,7 @@ package ${cfg['pkg_name']}; sram_cfg_t tcdm; } sram_cfgs_t; + // Re-defined typedefs used for other typedefs typedef logic [AddrWidth-1:0] addr_t; typedef logic [NarrowDataWidth-1:0] data_t; typedef logic [NarrowDataWidth/8-1:0] strb_t; @@ -94,11 +105,43 @@ package ${cfg['pkg_name']}; typedef logic [NarrowUserWidth-1:0] user_t; typedef logic [WideUserWidth-1:0] user_dma_t; + // Typedefs for the AXI connections `AXI_TYPEDEF_ALL(narrow_in, addr_t, narrow_in_id_t, data_t, strb_t, user_t) `AXI_TYPEDEF_ALL(narrow_out, addr_t, narrow_out_id_t, data_t, strb_t, user_t) `AXI_TYPEDEF_ALL(wide_in, addr_t, wide_in_id_t, data_dma_t, strb_dma_t, user_dma_t) `AXI_TYPEDEF_ALL(wide_out, addr_t, wide_out_id_t, data_dma_t, strb_dma_t, user_dma_t) + localparam int unsigned TCDMMemAddrWidth = $clog2(TCDMDepth); + localparam int unsigned TCDMSize = NrBanks * TCDMDepth * (NarrowDataWidth/8); + localparam int unsigned TCDMAddrWidth = $clog2(TCDMSize); + typedef logic [TCDMAddrWidth-1:0] tcdm_addr_t; + + // TCDM definitions + typedef struct packed { + logic [CoreIDWidth-1:0] core_id; + bit is_core; + } tcdm_user_t; + + `TCDM_TYPEDEF_ALL(tcdm, tcdm_addr_t, data_t, strb_t, tcdm_user_t) + + // Accelerator definitions for SNAX and other Snitch L0 accelerators + // (e.g., of L0 accelerators - DMA, FPSS, and so on) + typedef struct packed { + snitch_pkg::acc_addr_e addr; + logic [4:0] id; + logic [31:0] data_op; + data_t data_arga; + data_t data_argb; + addr_t data_argc; + } acc_req_t; + + typedef struct packed { + logic [4:0] id; + logic error; + data_t data; + } acc_resp_t; + + // Function pre-calculations function automatic snitch_pma_pkg::rule_t [snitch_pma_pkg::NrMaxRules-1:0] get_cached_regions(); automatic snitch_pma_pkg::rule_t [snitch_pma_pkg::NrMaxRules-1:0] cached_regions; cached_regions = '{default: '0}; @@ -114,6 +157,7 @@ package ${cfg['pkg_name']}; default: 0 }; + // FPU configurations per FPU that has a core localparam fpnew_pkg::fpu_implementation_t FPUImplementation [${cfg['nr_cores']}] = '{ % for c in cfg['cores']: '{ @@ -213,6 +257,7 @@ ${ssr_cfg(core, '{reg_idx}', '/*None*/ 0', ',')}\ endpackage // verilog_lint: waive-stop package-filename +// Main snitch or SNAX cluster wrapper module ${cfg['name']}_wrapper ( input logic clk_i, input logic rst_ni, @@ -242,6 +287,7 @@ module ${cfg['name']}_wrapper ( output ${cfg['pkg_name']}::wide_in_resp_t wide_in_resp_o ); + // Internal local parameters to be hooked into the Snitch / SNAX cluster localparam int unsigned NumIntOutstandingLoads [${cfg['nr_cores']}] = '{${core_cfg('num_int_outstanding_loads')}}; localparam int unsigned NumIntOutstandingMem [${cfg['nr_cores']}] = '{${core_cfg('num_int_outstanding_mem')}}; localparam int unsigned NumFPOutstandingLoads [${cfg['nr_cores']}] = '{${core_cfg('num_fp_outstanding_loads')}}; @@ -251,9 +297,33 @@ module ${cfg['name']}_wrapper ( localparam int unsigned NumSequencerInstr [${cfg['nr_cores']}] = '{${core_cfg('num_sequencer_instructions')}}; localparam int unsigned NumSsrs [${cfg['nr_cores']}] = '{${core_cfg('num_ssrs')}}; localparam int unsigned SsrMuxRespDepth [${cfg['nr_cores']}] = '{${core_cfg('ssr_mux_resp_depth')}}; - localparam int unsigned SNAX [${cfg['nr_cores']}] = '{${core_cfg('snax_acc')}}; localparam int unsigned SnaxTcdmPorts [${cfg['nr_cores']}] = '{${core_cfg('snax_tcdm_ports')}}; + // SNAX accelerator ports + ${cfg['pkg_name']}::acc_req_t [${cfg['pkg_name']}::NrCores-1:0] snax_req; + logic [${cfg['pkg_name']}::NrCores-1:0] snax_qvalid; + logic [${cfg['pkg_name']}::NrCores-1:0] snax_qready; + ${cfg['pkg_name']}::acc_resp_t [${cfg['pkg_name']}::NrCores-1:0] snax_resp; + logic [${cfg['pkg_name']}::NrCores-1:0] snax_pvalid; + logic [${cfg['pkg_name']}::NrCores-1:0] snax_pready; + ## This set of lines are for the internal pre-calculations for SNAX ports + <% + extract_port_num_list = [] + for i in range(len(cfg['cores'])): + extract_port_num_list.append(cfg['cores'][i]['snax_tcdm_ports']) + + offset_list = [] + init_offset = 0 + + for i in range(len(extract_port_num_list)): + offset_list.append(init_offset) + init_offset += extract_port_num_list[i] + %> + // SNAX TCDM wires + // Wires need to be declared before use + ${cfg['pkg_name']}::tcdm_req_t [${init_offset-1}:0] snax_tcdm_req; + ${cfg['pkg_name']}::tcdm_rsp_t [${init_offset-1}:0] snax_tcdm_rsp; + // Snitch cluster under test. snitch_cluster #( .PhysicalAddrWidth (${cfg['addr_width']}), @@ -274,10 +344,10 @@ module ${cfg['name']}_wrapper ( .wide_in_resp_t (${cfg['pkg_name']}::wide_in_resp_t), .NrHives (${cfg['nr_hives']}), .NrCores (${cfg['nr_cores']}), - .TCDMDepth (${cfg['tcdm']['depth']}), + .TCDMDepth (${cfg['pkg_name']}::TCDMDepth), .ZeroMemorySize (${cfg['zero_mem_size']}), .ClusterPeriphSize (${cfg['cluster_periph_size']}), - .NrBanks (${cfg['tcdm']['banks']}), + .NrBanks (${cfg['pkg_name']}::NrBanks), .DMAAxiReqFifoDepth (${cfg['dma_axi_req_fifo_depth']}), .DMAReqFifoDepth (${cfg['dma_req_fifo_depth']}), .ICacheLineWidth (${cfg['pkg_name']}::ICacheLineWidth), @@ -297,8 +367,7 @@ module ${cfg['name']}_wrapper ( .Xdma (${core_cfg_flat('xdma')}), .Xssr (${core_cfg_flat('xssr')}), .Xfrep (${core_cfg_flat('xfrep')}), - .SNAX(SNAX), - .SnaxTcdmPorts(SnaxTcdmPorts), + .TotalSnaxTcdmPorts(${init_offset}), .FPUImplementation (${cfg['pkg_name']}::FPUImplementation), .SnitchPMACfg (${cfg['pkg_name']}::SnitchPMACfg), .NumIntOutstandingLoads (NumIntOutstandingLoads), @@ -336,7 +405,11 @@ module ${cfg['name']}_wrapper ( .NarrowMaxSlvTrans (${cfg['narrow_trans']}), .sram_cfg_t (${cfg['pkg_name']}::sram_cfg_t), .sram_cfgs_t (${cfg['pkg_name']}::sram_cfgs_t), - .DebugSupport (${int(cfg['enable_debug'])}) + .DebugSupport (${int(cfg['enable_debug'])}), + .acc_req_t (${cfg['pkg_name']}::acc_req_t), + .acc_resp_t (${cfg['pkg_name']}::acc_resp_t), + .tcdm_req_t (${cfg['pkg_name']}::tcdm_req_t), + .tcdm_rsp_t (${cfg['pkg_name']}::tcdm_rsp_t) ) i_cluster ( .clk_i, .rst_ni, @@ -360,6 +433,14 @@ module ${cfg['name']}_wrapper ( % else: .clk_d2_bypass_i (1'b0), % endif + .snax_req_o (snax_req), + .snax_qvalid_o (snax_qvalid), + .snax_qready_i (snax_qready), + .snax_resp_i (snax_resp), + .snax_pvalid_i (snax_pvalid), + .snax_pready_o (snax_pready), + .snax_tcdm_req_i (snax_tcdm_req), + .snax_tcdm_rsp_o (snax_tcdm_rsp), % if cfg['sram_cfg_expose']: .sram_cfgs_i (sram_cfgs_i), % else: @@ -374,4 +455,39 @@ module ${cfg['name']}_wrapper ( .wide_in_req_i, .wide_in_resp_o ); + + // Accelerator instances if there are accelerator ports + // If there are not accelerator ports, we tie the input signals to 0 +% for idx, c in enumerate(cfg['cores']): + % if c['snax_acc'] != "none": + + ${c['snax_acc']} # ( + .DataWidth ( ${cfg['pkg_name']}::NarrowDataWidth ), + .SnaxTcdmPorts ( SnaxTcdmPorts[${idx}] ), + .acc_req_t ( ${cfg['pkg_name']}::acc_req_t ), + .acc_rsp_t ( ${cfg['pkg_name']}::acc_resp_t ), + .tcdm_req_t ( ${cfg['pkg_name']}::tcdm_req_t ), + .tcdm_rsp_t ( ${cfg['pkg_name']}::tcdm_rsp_t ) + ) i_${c['snax_acc']}_${idx} ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .snax_req_i ( snax_req[${idx}] ), + .snax_qvalid_i ( snax_qvalid[${idx}] ), + .snax_qready_o ( snax_qready[${idx}] ), + .snax_resp_o ( snax_resp[${idx}] ), + .snax_pvalid_o ( snax_pvalid[${idx}] ), + .snax_pready_i ( snax_pready[${idx}] ), + .snax_tcdm_req_o ( snax_tcdm_req[${offset_list[idx+1]-1}:${offset_list[idx]}] ), + .snax_tcdm_rsp_i ( snax_tcdm_rsp[${offset_list[idx+1]-1}:${offset_list[idx]}] ) + ); + % else: + + assign snax_qready[${idx}] = '0; + assign snax_resp[${idx}] = '0; + assign snax_pvalid[${idx}] = '0; + + % endif +% endfor + + endmodule diff --git a/target/snitch_cluster/cfg/snax-gemm.hjson b/target/snitch_cluster/cfg/snax-gemm.hjson index 4215664b7..596fe623b 100644 --- a/target/snitch_cluster/cfg/snax-gemm.hjson +++ b/target/snitch_cluster/cfg/snax-gemm.hjson @@ -86,7 +86,7 @@ xf8alt: true, xfdotp: true, xfvec: true, - snax_acc: 2, + snax_acc: "snax_gemm_wrapper", snax_tcdm_ports: 24, num_int_outstanding_loads: 1, num_int_outstanding_mem: 4, diff --git a/target/snitch_cluster/cfg/snax-mac.hjson b/target/snitch_cluster/cfg/snax-mac.hjson index 851d3b735..583c0b8d1 100644 --- a/target/snitch_cluster/cfg/snax-mac.hjson +++ b/target/snitch_cluster/cfg/snax-mac.hjson @@ -86,7 +86,7 @@ xf8alt: true, xfdotp: true, xfvec: true, - snax_acc: 1, + snax_acc: "snax_mac_wrapper", snax_tcdm_ports: 4, num_int_outstanding_loads: 1, num_int_outstanding_mem: 4,