Skip to content

Commit

Permalink
Change acceleration integration to be fully configuration based (pulp…
Browse files Browse the repository at this point in the history
…-platform#51)

* docs: Update schema to make snax_acc strings

* hw: Move accelerators out of snitch_cluster.sv

* hw: Process accelerators outside of snitch_cluster.sv

* cfg: Update snax-mac.hjson config

* hw: Take out unnecessary parameters

* tpl: Fix python backend in mako tpl

* cfg: Make snax-mac into string

* cfg: Update snax-gemm.hjson to use string

* docs: Update schema details

* hw: Take out unnecessary comments

* hw: Renamed snax_mac to snax_mac_wrapper

* hw: Change module name to snax_mac_wrapper

* bender: Change to use snax_mac_wrapper.sv

* hw: Put comments on template

* cfg: Add _wrapper affix
  • Loading branch information
rgantonio authored and JosseVanDelm committed Dec 11, 2023
1 parent 833cd48 commit c402157
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 147 deletions.
2 changes: 1 addition & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ sources:
# Level 0
- hw/snax_hwpe_mac/src/snax_hwpe_ctrl.sv
- hw/snax_hwpe_mac/src/snax_hwpe_to_reqrsp.sv
- hw/snax_hwpe_mac/src/snax_mac.sv
- hw/snax_hwpe_mac/src/snax_mac_wrapper.sv

# snitch_vm
- files:
Expand Down
6 changes: 3 additions & 3 deletions docs/schema/snitch_cluster.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,10 @@
"default": false
},
"snax_acc": {
"type": "number",
"type": "string",
"title": "SNAX Accelerator Extension",
"description": "Enable specific Snitch Accelerator eXtension (SNAX). 1 for MAC engine and 2 for GEMM engine.",
"default": 0
"description": "Enable specific Snitch Accelerator eXtension (SNAX). Simply put the string name of the accelerator wrapper.",
"default": "none"
},
"snax_tcdm_ports": {
"type": "number",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// verilog_lint: waive-start line-length
// verilog_lint: waive-start no-trailing-spaces

module snax_mac # (
module snax_mac_wrapper # (
parameter int unsigned DataWidth = 32,
parameter int unsigned SnaxTcdmPorts = 4,
parameter type acc_req_t = logic,
Expand Down
161 changes: 27 additions & 134 deletions hw/snitch_cluster/src/snitch_cluster.sv
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,8 @@ module snitch_cluster
/// FPU configuration.
parameter fpnew_pkg::fpu_implementation_t FPUImplementation [NrCores] =
'{default: fpnew_pkg::fpu_implementation_t'(0)},
/// Enable SNAX accelerators per core. Integer coded.
parameter int unsigned SNAX [NrCores] = '{default: 0},
/// Number of SNAX TCDM ports per core
parameter int unsigned SnaxTcdmPorts [NrCores] = '{default: 0},
/// Total Number of SNAX TCDM ports
parameter int unsigned TotalSnaxTcdmPorts = 0,
/// Physical Memory Attribute Configuration
parameter snitch_pma_pkg::snitch_pma_t SnitchPMACfg = '0,
/// # Per-core parameters
Expand Down Expand Up @@ -180,6 +178,11 @@ module snitch_cluster
// Memory configuration input types; these vary depending on implementation.
parameter type sram_cfg_t = logic,
parameter type sram_cfgs_t = logic,
// Accelerator typedef
parameter type acc_req_t = logic,
parameter type acc_resp_t = logic,
parameter type tcdm_req_t = logic,
parameter type tcdm_rsp_t = logic,
// Memory latency parameter. Most of the memories have a read latency of 1. In
// case you have memory macros which are pipelined you want to adjust this
// value here. This only applies to the TCDM. The instruction cache macros will break!
Expand Down Expand Up @@ -221,6 +224,15 @@ module snitch_cluster
/// Bypass half-frequency clock. (`d2` = divide-by-two). This signal is
/// pseudo-static.
input logic clk_d2_bypass_i,
/// SNAX ports
output acc_req_t [NrCores-1:0] snax_req_o,
output logic [NrCores-1:0] snax_qvalid_o,
input logic [NrCores-1:0] snax_qready_i,
input acc_resp_t [NrCores-1:0] snax_resp_i,
input logic [NrCores-1:0] snax_pvalid_i,
output logic [NrCores-1:0] snax_pready_o,
input tcdm_req_t [TotalSnaxTcdmPorts-1:0] snax_tcdm_req_i,
output tcdm_rsp_t [TotalSnaxTcdmPorts-1:0] snax_tcdm_rsp_o,
/// AXI Core cluster in-port.
input narrow_in_req_t narrow_in_req_i,
output narrow_in_resp_t narrow_in_resp_o,
Expand Down Expand Up @@ -256,29 +268,6 @@ module snitch_cluster
return n;
endfunction

function automatic int unsigned get_snax_tcdm_ports(int unsigned core);
return SnaxTcdmPorts[core];
endfunction

function automatic int unsigned get_snax_tcdm_port_offs(int unsigned core_idx);
automatic int n = 0;
for (int i = 0; i < core_idx; i++) n += get_snax_tcdm_ports(i);
return n;
endfunction

function automatic int unsigned check_if_snax(int unsigned core_idx);
for (int i = 0; i < core_idx; i++) begin
if ( SNAX[i] > 0) begin
return 1;
end
end
return 0;
endfunction

// SNAX TCDM
localparam int unsigned SnaxSystem = check_if_snax(NrCores);
localparam int unsigned TotalSnaxTcdmPorts = get_snax_tcdm_port_offs(NrCores);

localparam int unsigned NrTCDMPortsCores = get_tcdm_port_offs(NrCores);
localparam int unsigned NumTCDMIn = NrTCDMPortsCores + 1;
localparam logic [PhysicalAddrWidth-1:0] TCDMMask = ~(TCDMSize-1);
Expand Down Expand Up @@ -379,7 +368,6 @@ module snitch_cluster
`MEM_TYPEDEF_ALL(mem, tcdm_mem_addr_t, data_t, strb_t, tcdm_user_t)
`MEM_TYPEDEF_ALL(mem_dma, tcdm_mem_addr_t, data_dma_t, strb_dma_t, logic)

`TCDM_TYPEDEF_ALL(tcdm, tcdm_addr_t, data_t, strb_t, tcdm_user_t)
`TCDM_TYPEDEF_ALL(tcdm_dma, tcdm_addr_t, data_dma_t, strb_dma_t, logic)

`REG_BUS_TYPEDEF_REQ(reg_req_t, addr_t, data_t, strb_t)
Expand Down Expand Up @@ -414,21 +402,6 @@ module snitch_cluster
addr_t end_addr;
} xbar_rule_t;

typedef struct packed {
acc_addr_e addr;
logic [4:0] id;
logic [31:0] data_op;
data_t data_arga;
data_t data_argb;
addr_t data_argc;
} acc_req_t;

typedef struct packed {
logic [4:0] id;
logic error;
data_t data;
} acc_resp_t;

`SNITCH_VM_TYPEDEF(PhysicalAddrWidth)

typedef struct packed {
Expand Down Expand Up @@ -513,10 +486,6 @@ module snitch_cluster
tcdm_req_t [NrTCDMPortsCores-1:0] tcdm_req;
tcdm_rsp_t [NrTCDMPortsCores-1:0] tcdm_rsp;

// Generation of SNAX wires
tcdm_req_t [TotalSnaxTcdmPorts-1:0 ] snax_tcdm_req;
tcdm_rsp_t [TotalSnaxTcdmPorts-1:0 ] snax_tcdm_rsp;

core_events_t [NrCores-1:0] core_events;
tcdm_events_t tcdm_events;
dma_events_t dma_events;
Expand Down Expand Up @@ -788,7 +757,8 @@ module snitch_cluster
end

// generate TCDM for snax if any of the cores has SNAX enabled
if( SnaxSystem ) begin: gen_yes_snax_tcdm_interconnect
if( TotalSnaxTcdmPorts > 0 ) begin: gen_yes_snax_tcdm_interconnect

snitch_tcdm_interconnect #(
.NumInp (NumTCDMIn + TotalSnaxTcdmPorts),
.NumOut (NrBanks),
Expand All @@ -805,12 +775,13 @@ module snitch_cluster
) i_tcdm_interconnect (
.clk_i,
.rst_ni,
.req_i ({axi_soc_req, tcdm_req, snax_tcdm_req}),
.rsp_o ({axi_soc_rsp, tcdm_rsp, snax_tcdm_rsp}),
.req_i ({axi_soc_req, tcdm_req, snax_tcdm_req_i}),
.rsp_o ({axi_soc_rsp, tcdm_rsp, snax_tcdm_rsp_o}),
.mem_req_o (ic_req),
.mem_rsp_i (ic_rsp)
);
end else begin: gen_no_snax_tcdm_interconnect

snitch_tcdm_interconnect #(
.NumInp (NumTCDMIn),
.NumOut (NrBanks),
Expand Down Expand Up @@ -850,14 +821,6 @@ module snitch_cluster
hive_req_t [NrCores-1:0] hive_req;
hive_rsp_t [NrCores-1:0] hive_rsp;

// SNAX wiring
acc_req_t [NrCores-1:0] snax_req;
logic [NrCores-1:0] snax_qvalid;
logic [NrCores-1:0] snax_qready;
acc_resp_t [NrCores-1:0] snax_resp;
logic [NrCores-1:0] snax_pvalid;
logic [NrCores-1:0] snax_pready;

for (genvar i = 0; i < NrCores; i++) begin : gen_core
localparam int unsigned TcdmPorts = get_tcdm_ports(i);
localparam int unsigned TcdmPortsOffs = get_tcdm_port_offs(i);
Expand Down Expand Up @@ -957,12 +920,12 @@ module snitch_cluster
.axi_dma_busy_o (),
.axi_dma_perf_o (),
.axi_dma_events_o (dma_core_events),
.snax_req_o (snax_req[i]),
.snax_qvalid_o (snax_qvalid[i]),
.snax_qready_i (snax_qready[i]),
.snax_resp_i (snax_resp[i]),
.snax_pvalid_i (snax_pvalid[i]),
.snax_pready_o (snax_pready[i]),
.snax_req_o (snax_req_o[i]),
.snax_qvalid_o (snax_qvalid_o[i]),
.snax_qready_i (snax_qready_i[i]),
.snax_resp_i (snax_resp_i[i]),
.snax_pvalid_i (snax_pvalid_i[i]),
.snax_pready_o (snax_pready_o[i]),
.core_events_o (core_events[i]),
.tcdm_addr_base_i (tcdm_start_address),
.barrier_o (barrier_in[i]),
Expand All @@ -982,76 +945,6 @@ module snitch_cluster
end
end

for (genvar i = 0; i < NrCores; i++) begin : gen_snax_acc

// Calculate exact count of SNAX TCDM ports
localparam int unsigned LocalSnaxTcdmPorts = get_snax_tcdm_ports(i);
localparam int unsigned LocalSnaxTcdmOffset = get_snax_tcdm_port_offs(i);

tcdm_req_t [LocalSnaxTcdmPorts-1:0] hang_snax_tcdm_req;
tcdm_rsp_t [LocalSnaxTcdmPorts-1:0] hang_snax_tcdm_rsp;

// Remap SNAX TCDM ports to prune cores that
// do not use SNAX TCDM ports
for (genvar j = 0; j < LocalSnaxTcdmPorts; j++) begin : gen_snax_tcdm_map_per_core
always_comb begin
snax_tcdm_req[LocalSnaxTcdmOffset+j] = hang_snax_tcdm_req[j];
hang_snax_tcdm_rsp[j] = snax_tcdm_rsp[LocalSnaxTcdmOffset+j];
end
end

if(SNAX[i] == 1) begin: gen_snax_mac

snax_mac # (
.DataWidth ( 32 ),
.SnaxTcdmPorts ( LocalSnaxTcdmPorts ),
.acc_req_t ( acc_req_t ),
.acc_rsp_t ( acc_resp_t ),
.tcdm_req_t ( tcdm_req_t ),
.tcdm_rsp_t ( tcdm_rsp_t )
) i_snax_mac (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.snax_req_i ( snax_req[i] ),
.snax_qvalid_i ( snax_qvalid[i] ),
.snax_qready_o ( snax_qready[i] ),
.snax_resp_o ( snax_resp[i] ),
.snax_pvalid_o ( snax_pvalid[i] ),
.snax_pready_i ( snax_pready[i] ),
.snax_tcdm_req_o ( hang_snax_tcdm_req ),
.snax_tcdm_rsp_i ( hang_snax_tcdm_rsp )
);

end else if (SNAX[i] == 2) begin: gen_snax_gemm

snax_gemm_wrapper # (
.DataWidth ( NarrowDataWidth ),
.SnaxTcdmPorts ( LocalSnaxTcdmPorts ),
.acc_req_t ( acc_req_t ),
.acc_rsp_t ( acc_resp_t ),
.tcdm_req_t ( tcdm_req_t ),
.tcdm_rsp_t ( tcdm_rsp_t )
) i_snax_gemm (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.snax_req_i ( snax_req[i] ),
.snax_qvalid_i ( snax_qvalid[i] ),
.snax_qready_o ( snax_qready[i] ),
.snax_resp_o ( snax_resp[i] ),
.snax_pvalid_o ( snax_pvalid[i] ),
.snax_pready_i ( snax_pready[i] ),
.snax_tcdm_req_o ( hang_snax_tcdm_req ),
.snax_tcdm_rsp_i ( hang_snax_tcdm_rsp )
);

end else begin: gen_no_snax_acc
// Tie these signal to low when no SNAX accelerator is present
assign snax_qready[i] = '0;
assign snax_resp[i] = '0;
assign snax_pvalid[i] = '0;
end
end

for (genvar i = 0; i < NrHives; i++) begin : gen_hive
localparam int unsigned HiveSize = get_hive_size(i);

Expand Down
Loading

0 comments on commit c402157

Please sign in to comment.