Skip to content

Commit

Permalink
hw: Parameterize Omega network additions properly
Browse files Browse the repository at this point in the history
  • Loading branch information
paulsc96 committed Sep 25, 2023
1 parent f2638cd commit a38be41
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 28 deletions.
23 changes: 23 additions & 0 deletions docs/schema/snitch_cluster.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,29 @@
16,
32
]
},
"topology": {
"type": "string",
"description": "Network topology used for TCDM interconnect.",
"enum": ["LogarithmicInterconnect", "OmegaNet"],
"default": "LogarithmicInterconnect"
},
"radix": {
"type": "number",
"description": "Radix of switches in switch-based TCDM interconnect topologies (ignored for logarithmic interconnect).",
"enum": [2, 4],
"default": 4
},
"num_switch_nets": {
"type": "number",
"description": "Number of parallel networks in switch-based TCDM interconnect topologies (ignored for logarithmic interconnect).",
"minimum": 1,
"default": 4
},
"switch_lfsr_arbiter": {
"type": "boolean",
"description": "Whether to use pseudorandom (LFSR-generated) arbitration in switch-based TCDM interconect topologies instead of pseudo-round-robin (ignored for logarithmic interconnect).",
"default": false
}
}
},
Expand Down
8 changes: 7 additions & 1 deletion hw/snitch_cluster/src/snitch_cluster.sv
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ module snitch_cluster
/// Radix of the individual switch points of the network.
/// Currently supported are `32'd2` and `32'd4`.
parameter int unsigned Radix = 32'd2,
/// Number of parallel networks for switch-based TCDM interconnect.
parameter int unsigned NumSwitchNets = 32'd2,
/// Whether to use an LFSR to arbitrate switch-based TCDM networks.
parameter bit SwitchLfsrArbiter = 1'b0,
/// ## Timing Tuning Parameters
/// Insert Pipeline registers into off-loading path (request)
parameter bit RegisterOffloadReq = 1'b0,
Expand Down Expand Up @@ -764,7 +768,9 @@ module snitch_cluster
.user_t (tcdm_user_t),
.MemoryResponseLatency (1 + RegisterTCDMCuts),
.Radix (Radix),
.Topology (Topology)
.Topology (Topology),
.NumSwitchNets (NumSwitchNets),
.SwitchLfsrArbiter (SwitchLfsrArbiter)
) i_tcdm_interconnect (
.clk_i,
.rst_ni,
Expand Down
6 changes: 4 additions & 2 deletions hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,10 @@ module ${cfg['name']}_wrapper (
.SsrCfgs (${cfg['pkg_name']}::SsrCfgs),
.NumSequencerInstr (NumSequencerInstr),
.Hive (${cfg['pkg_name']}::Hive),
.Topology (snitch_pkg::LogarithmicInterconnect),
.Radix (2),
.Topology (snitch_pkg::${cfg['tcdm']['topology']}),
.Radix (${int(cfg['tcdm']['radix'])}),
.NumSwitchNets (${int(cfg['tcdm']['num_switch_nets'])}),
.SwitchLfsrArbiter (${int(cfg['tcdm']['switch_lfsr_arbiter'])}),
.RegisterOffloadReq (${int(cfg['timing']['register_offload_req'])}),
.RegisterOffloadRsp (${int(cfg['timing']['register_offload_rsp'])}),
.RegisterCoreReq (${int(cfg['timing']['register_core_req'])}),
Expand Down
48 changes: 23 additions & 25 deletions hw/snitch_cluster/src/snitch_tcdm_interconnect.sv
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ module snitch_tcdm_interconnect #(
/// Radix of the individual switch points of the network.
/// Currently supported are `32'd2` and `32'd4`.
parameter int unsigned Radix = 32'd2,
/// Number of parallel networks for Omega interconnect
parameter int unsigned NumOmegaNets = 32'd2,
/// Whether to use an LFSR to arbitrate Omega networks
parameter bit OmegaLfsrArbiter = 1'b0,
/// Number of parallel networks for switch-based interconnects.
parameter int unsigned NumSwitchNets = 32'd2,
/// Whether to use an LFSR to arbitrate switch-based networks.
parameter bit SwitchLfsrArbiter = 1'b0,
/// Payload type of the data request ports.
parameter type tcdm_req_t = logic,
/// Payload type of the data response ports.
Expand Down Expand Up @@ -108,14 +108,12 @@ module snitch_tcdm_interconnect #(
assign mem_req_o[i].q = out_req[i];
end

localparam snitch_pkg::topo_e TOPO = snitch_pkg::OmegaNet;

// ------------
// Request Side
// ------------
// We need to arbitrate the requests coming from the input side and resolve
// potential bank conflicts. Therefore a full arbitration tree is needed.
if (TOPO == snitch_pkg::LogarithmicInterconnect) begin : gen_xbar
if (Topology == snitch_pkg::LogarithmicInterconnect) begin : gen_xbar
stream_xbar #(
.NumInp ( NumInp ),
.NumOut ( NumOut ),
Expand All @@ -138,20 +136,20 @@ module snitch_tcdm_interconnect #(
.valid_o ( mem_q_valid_flat ),
.ready_i ( mem_q_ready_flat )
);
end else if (TOPO == snitch_pkg::OmegaNet) begin : gen_omega_net
localparam int unsigned NumInpPerNet = cf_math_pkg::ceil_div(NumInp, NumOmegaNets);
end else if (Topology == snitch_pkg::OmegaNet) begin : gen_omega_net
localparam int unsigned NumInpPerNet = cf_math_pkg::ceil_div(NumInp, NumSwitchNets);

// Intermediate request signals for Omega-to-Xbar interface
mem_req_chan_t [NumOmegaNets-1:0][NumOut-1:0] oout_data;
logic [NumOmegaNets-1:0][NumOut-1:0] oout_valid, oout_ready;
mem_req_chan_t [NumSwitchNets-1:0][NumOut-1:0] oout_data;
logic [NumSwitchNets-1:0][NumOut-1:0] oout_valid, oout_ready;

// Arbitration for Omega and Xbar stages, respectively
logic [cf_math_pkg::idx_width(NumOut)-1:0] rr1;
logic [cf_math_pkg::idx_width(NumOmegaNets)-1:0] rr2;
logic [cf_math_pkg::idx_width(NumSwitchNets)-1:0] rr2;

// Use pseudorandom arbitration if desired. For reference, see:
// https://github.com/pulp-platform/cluster_interconnect/blob/master/rtl/tcdm_interconnect/tcdm_interconnect.sv
if (OmegaLfsrArbiter) begin : gen_omega_lsfr
if (SwitchLfsrArbiter) begin : gen_omega_lsfr
logic [cf_math_pkg::idx_width(NumInp)-1:0] rr;
lfsr #(
.LfsrWidth ( 64 ),
Expand All @@ -166,20 +164,20 @@ module snitch_tcdm_interconnect #(
);
// The upper bits of `rr1` are truncated iff not needed in Butterfly networks.
// TODO: Does this also hold for Omega networks? Verify!
assign rr1 = rr[$high(rr):$clog2(NumOmegaNets)];
assign rr2 = rr[$clog2(NumOmegaNets)-1:0];
assign rr1 = rr[$high(rr):$clog2(NumSwitchNets)];
assign rr2 = rr[$clog2(NumSwitchNets)-1:0];
end else begin : gen_no_omega_lsfr
assign rr1 = '0;
assign rr2 = '0;
end

// Work around enum incompatibility and expand signals for part selects
typedef logic [$bits(mem_req_chan_t)-1:0] data_t;
typedef data_t [NumOmegaNets-1:0][NumInpPerNet-1:0] flat_data_t;
typedef logic [NumOmegaNets-1:0][NumInpPerNet-1:0] flat_hs_t;
typedef data_t [NumSwitchNets-1:0][NumInpPerNet-1:0] flat_data_t;
typedef logic [NumSwitchNets-1:0][NumInpPerNet-1:0] flat_hs_t;

flat_data_t data_in;
select_t [NumOmegaNets-1:0][NumInpPerNet-1:0] in_sel;
select_t [NumSwitchNets-1:0][NumInpPerNet-1:0] in_sel;
flat_hs_t in_valid, in_ready;

assign data_in = in_req;
Expand All @@ -191,14 +189,14 @@ module snitch_tcdm_interconnect #(
// Generate Omega networks (first stage)
// TODO: Properly balance tying of unused ports throughout parallel networks;
// This should minimize imbalalance and maximize performance.
for (genvar i = 0; i < NumOmegaNets; ++i) begin : gen_omega_nets
for (genvar i = 0; i < NumSwitchNets; ++i) begin : gen_omega_nets
data_t [NumOut-1:0] data_out;
assign oout_data[i] = data_out;
stream_omega_net #(
.NumInp ( NumInpPerNet ),
.NumOut ( NumOut ),
.payload_t ( data_t ),
.ExtPrio ( OmegaLfsrArbiter ),
.ExtPrio ( SwitchLfsrArbiter ),
.SpillReg ( 1'b0 ),
.AxiVldRdy ( 1'b1 ),
.LockIn ( 1'b1 ),
Expand All @@ -223,20 +221,20 @@ module snitch_tcdm_interconnect #(

// Generate per-output multiplexers (second stage)
for (genvar i = 0; i < NumOut; ++i) begin : gen_out_arbs
mem_req_chan_t [NumOmegaNets-1:0] rrin_data;
logic [NumOmegaNets-1:0] rrin_valid, rrin_ready;
mem_req_chan_t [NumSwitchNets-1:0] rrin_data;
logic [NumSwitchNets-1:0] rrin_valid, rrin_ready;

// Bundle Omega net request channels for this bank
for (genvar k = 0; k < NumOmegaNets; ++k) begin : gen_rrin_in
for (genvar k = 0; k < NumSwitchNets; ++k) begin : gen_rrin_in
assign rrin_data[k] = oout_data[k][i];
assign rrin_valid[k] = oout_valid[k][i];
assign oout_ready[k][i] = rrin_ready[k];
end

rr_arb_tree #(
.NumIn ( NumOmegaNets ),
.NumIn ( NumSwitchNets ),
.DataType ( mem_req_chan_t ),
.ExtPrio ( OmegaLfsrArbiter ),
.ExtPrio ( SwitchLfsrArbiter ),
.AxiVldRdy ( 1'b1 ),
.LockIn ( 1'b1 )
) i_rr_arb_tree (
Expand Down

0 comments on commit a38be41

Please sign in to comment.