From b602d2971bfab2db7f25de00a54df4cae740c69d Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Fri, 17 May 2024 17:59:15 +0200 Subject: [PATCH] wip --- hw/ip/spatz/src/spatz_vlsu.sv | 918 ++++++++++++++++++---------------- 1 file changed, 481 insertions(+), 437 deletions(-) diff --git a/hw/ip/spatz/src/spatz_vlsu.sv b/hw/ip/spatz/src/spatz_vlsu.sv index 38cf788c..b3f98d5c 100644 --- a/hw/ip/spatz/src/spatz_vlsu.sv +++ b/hw/ip/spatz/src/spatz_vlsu.sv @@ -36,11 +36,11 @@ module spatz_vlsu output vrf_be_t [NrInterfaces-1:0] vrf_wbe_o, input logic [NrInterfaces-1:0] vrf_wvalid_i, - output spatz_id_t [(3*NrInterfaces)-1:0] vrf_id_o, - output vrf_addr_t [(2*NrInterfaces)-1:0] vrf_raddr_o, - output logic [(2*NrInterfaces)-1:0] vrf_re_o, - input vrf_data_t [(2*NrInterfaces)-1:0] vrf_rdata_i, - input logic [(2*NrInterfaces)-1:0] vrf_rvalid_i, + output spatz_id_t [NrInterfaces-1:0] [2:0] vrf_id_o, + output vrf_addr_t [NrInterfaces-1:0] [1:0] vrf_raddr_o, + output logic [NrInterfaces-1:0] [1:0] vrf_re_o, + input vrf_data_t [NrInterfaces-1:0] [1:0] vrf_rdata_i, + input logic [NrInterfaces-1:0] [1:0] vrf_rvalid_i, // Memory Request output spatz_mem_req_t [NrMemPorts-1:0] spatz_mem_req_o, output logic [NrMemPorts-1:0] spatz_mem_req_valid_o, @@ -137,30 +137,35 @@ module spatz_vlsu `FF(state_q, state_d, VLSU_RunningLoad) - id_t [NrMemPorts-1:0] store_count_q; - id_t [NrMemPorts-1:0] store_count_d; + id_t [NrInterfaces-1:0] [N_FU-1:0] store_count_q; + id_t [NrInterfaces-1:0] [N_FU-1:0] store_count_d; - for (genvar port = 0; port < NrMemPorts; port++) begin: gen_store_count_q - `FF(store_count_q[port], store_count_d[port], '0) + for (int intf = 0; intf < NrInterfaces; intf++) begin : gen_store_cnt_intf + for (int fu = 0; fu < N_FU; fu++) begin : gen_store_cnt_intf_fu + `FF(store_count_q[intf][fu], store_count_d[intf][fu], '0) end: gen_store_count_q always_comb begin: proc_store_count // Maintain state store_count_d = store_count_q; - for (int port = 0; port < NrMemPorts; port++) begin - if (spatz_mem_req_o[port].write && spatz_mem_req_valid_o[port] && spatz_mem_req_ready_i[port]) - // Did we send a store? - store_count_d[port]++; - - // Did we get the ack of a store? - `ifdef MEMPOOL_SPATZ - if (store_count_q[port] != '0 && spatz_mem_rsp_valid_i[port] && spatz_mem_rsp_i[port].write) - store_count_d[port]--; - `else - if (store_count_q[port] != '0 && spatz_mem_rsp_valid_i[port]) - store_count_d[port]--; - `endif + for (int intf = 0; intf < NrInterfaces; intf++) begin + for (int fu = 0; fu < N_FU; fu++) begin + automatic int unsigned port = intf * N_FU + fu; + + if (spatz_mem_req_o[intf][fu].write && spatz_mem_req_valid_o[intf][fu] && spatz_mem_req_ready_i[intf][fu]) + // Did we send a store? + store_count_d[intf][fu]++; + + // Did we get the ack of a store? +`ifdef MEMPOOL_SPATZ + if (store_count_q[intf][fu] != '0 && spatz_mem_rsp_valid_i[port] && spatz_mem_rsp_i[port].write) + store_count_d[intf][fu]--; +`else + if (store_count_q[intf][fu] != '0 && spatz_mem_rsp_valid_i[port]) + store_count_d[intf][fu]--; +`endif + end end end: proc_store_count @@ -168,8 +173,6 @@ module spatz_vlsu // Reorder Buffer // ////////////////////// - // On the VRF side, each interface works independently because of the VRF structure - typedef logic [int'(MAXEW)-1:0] addr_offset_t; elen_t [NrInterfaces-1:0] [N_FU-1:0] rob_wdata; @@ -234,59 +237,59 @@ module spatz_vlsu // Memory request // ////////////////////// - // On the memory side, the interface boundary is transparent - // Is the memory operation valid and are we at the last one? - logic [NrMemPorts-1:0] mem_operation_valid; - logic [NrMemPorts-1:0] mem_operation_last; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_operation_valid; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_operation_last; // For each memory port we count how many bytes we have already loaded/stored (VLSU <-> MEM). // Multiple counters are needed all memory ports can work independent of each other. - vlen_t [NrMemPorts-1:0] mem_counter_max; - logic [NrMemPorts-1:0] mem_counter_en; - logic [NrMemPorts-1:0] mem_counter_load; - vlen_t [NrMemPorts-1:0] mem_counter_delta; - vlen_t [NrMemPorts-1:0] mem_counter_d; - vlen_t [NrMemPorts-1:0] mem_counter_q; - logic [NrMemPorts-1:0] mem_port_finished_q; - - vlen_t [NrMemPorts-1:0] mem_idx_counter_delta; - vlen_t [NrMemPorts-1:0] mem_idx_counter_d; - vlen_t [NrMemPorts-1:0] mem_idx_counter_q; - - for (genvar port = 0; port < NrMemPorts; port++) begin: gen_mem_counters - delta_counter #( - .WIDTH($bits(vlen_t)) - ) i_delta_counter_mem ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .clear_i (1'b0 ), - .en_i (mem_counter_en[port] ), - .load_i (mem_counter_load[port] ), - .down_i (1'b0 ), // We always count up - .delta_i (mem_counter_delta[port]), - .d_i (mem_counter_d[port] ), - .q_o (mem_counter_q[port] ), - .overflow_o(/* Unused */ ) - ); + vlen_t [NrInterfaces-1:0] [N_FU-1:0] mem_counter_max; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_counter_en; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_counter_load; + vlen_t [NrInterfaces-1:0] [N_FU-1:0] mem_counter_delta; + vlen_t [NrInterfaces-1:0] [N_FU-1:0] mem_counter_d; + vlen_t [NrInterfaces-1:0] [N_FU-1:0] mem_counter_q; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_port_finished_q; + + vlen_t [NrInterfaces-1:0] [N_FU-1:0] mem_idx_counter_delta; + vlen_t [NrInterfaces-1:0] [N_FU-1:0] mem_idx_counter_d; + vlen_t [NrInterfaces-1:0] [N_FU-1:0] mem_idx_counter_q; + + for (genvar intf = 0; intf < NrInterfaces; intf++) begin: gen_mem_counters_intf + for (genvar fu = 0; fu < N_FU; fu++) begin: gen_mem_counters_intf_fu + delta_counter #( + .WIDTH($bits(vlen_t)) + ) i_delta_counter_mem ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .clear_i (1'b0 ), + .en_i (mem_counter_en[intf][fu] ), + .load_i (mem_counter_load[intf][fu] ), + .down_i (1'b0 ), // We always count up + .delta_i (mem_counter_delta[intf][fu]), + .d_i (mem_counter_d[intf][fu] ), + .q_o (mem_counter_q[intf][fu] ), + .overflow_o(/* Unused */ ) + ); - delta_counter #( - .WIDTH($bits(vlen_t)) - ) i_delta_counter_mem_idx ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .clear_i (1'b0 ), - .en_i (mem_counter_en[port] ), - .load_i (mem_counter_load[port] ), - .down_i (1'b0 ), // We always count up - .delta_i (mem_idx_counter_delta[port]), - .d_i (mem_idx_counter_d[port] ), - .q_o (mem_idx_counter_q[port] ), - .overflow_o(/* Unused */ ) - ); + delta_counter #( + .WIDTH($bits(vlen_t)) + ) i_delta_counter_mem_idx ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .clear_i (1'b0 ), + .en_i (mem_counter_en[intf][fu] ), + .load_i (mem_counter_load[intf][fu] ), + .down_i (1'b0 ), // We always count up + .delta_i (mem_idx_counter_delta[intf][fu]), + .d_i (mem_idx_counter_d[intf][fu] ), + .q_o (mem_idx_counter_q[intf][fu] ), + .overflow_o(/* Unused */ ) + ); - assign mem_port_finished_q[port] = mem_spatz_req_valid && (mem_counter_q[port] == mem_counter_max[port]); - end: gen_mem_counters + assign mem_port_finished_q[intf][fu] = mem_spatz_req_valid && (mem_counter_q[intf][fu] == mem_counter_max[intf][fu]); + end: gen_mem_counters_intf_fu + end: gen_mem_counters_intf // Did the current instruction finished the memory requests? logic [NrParallelInstructions-1:0] mem_insn_finished_q, mem_insn_finished_d; @@ -421,7 +424,7 @@ module spatz_vlsu // Address Generation // //////////////////////// - elen_t [NrMemPorts-1:0] mem_req_addr; + elen_t [NrInterfaces-1:0] [N_FU-1:0] mem_req_addr; vrf_addr_t [NrInterfaces-1:0] vd_vreg_addr; vrf_addr_t [NrInterfaces-1:0] vs2_vreg_addr; @@ -432,53 +435,57 @@ module spatz_vlsu `FF(vs2_elem_id_q, vs2_elem_id_d, '0) // Pending indexes - logic [NrMemPorts-1:0] pending_index; + logic [NrInterfaces-1:0] [N_FU-1:0] pending_index; // Calculate the memory address for each memory port - addr_offset_t [NrMemPorts-1:0] mem_req_addr_offset; - for (genvar port = 0; port < NrMemPorts; port++) begin: gen_mem_req_addr - logic [31:0] addr; - logic [31:0] stride; - logic [31:0] offset; - - // Pre-shuffling index offset - typedef logic [int'(MAXEW)-1:0] maxew_t; - maxew_t idx_offset; - assign idx_offset = mem_idx_counter_q[port]; - - always_comb begin - stride = mem_is_strided ? mem_spatz_req.rs2 >> mem_spatz_req.vtype.vsew : 'd1; - - if (mem_is_indexed) begin - // What is the relationship between data and index width? - automatic logic [1:0] data_index_width_diff = int'(mem_spatz_req.vtype.vsew) - int'(mem_spatz_req.op_mem.ew); - - // Pointer to index - automatic logic [idx_width(N_FU*NrMemPorts*ELENB)-1:0] word_index = (port << (MAXEW - data_index_width_diff)) + (maxew_t'(idx_offset << data_index_width_diff) >> data_index_width_diff) + (maxew_t'(idx_offset >> (MAXEW - data_index_width_diff)) << (MAXEW - data_index_width_diff)) * NrMemPorts; - - // Index - unique case (mem_spatz_req.op_mem.ew) - EW_8 : offset = $signed(vrf_rdata_i[1][8 * word_index +: 8]); - EW_16: offset = $signed(vrf_rdata_i[1][8 * word_index +: 16]); - default: offset = $signed(vrf_rdata_i[1][8 * word_index +: 32]); - endcase - end else begin - offset = ({mem_counter_q[port][$bits(vlen_t)-1:MAXEW] << $clog2(NrMemPorts), mem_counter_q[port][int'(MAXEW)-1:0]} + (port << MAXEW)) * stride; - end + addr_offset_t [NrInterfaces-1:0] [N_FU-1:0] mem_req_addr_offset; + for (genvar intf = 0; intf < NrInterfaces; intf++) begin: gen_mem_req_addr_intf + for (genvar fu = 0; fu < N_FU; fu++) begin: gen_mem_req_addr_intf_fu + automatic int unsigned port = intf * N_FU + fu; + + logic [31:0] addr; + logic [31:0] stride; + logic [31:0] offset; + + // Pre-shuffling index offset + typedef logic [int'(MAXEW)-1:0] maxew_t; + maxew_t idx_offset; + assign idx_offset = mem_idx_counter_q[intf][fu]; + + always_comb begin + stride = mem_is_strided ? mem_spatz_req.rs2 >> mem_spatz_req.vtype.vsew : 'd1; + + if (mem_is_indexed) begin + // What is the relationship between data and index width? + automatic logic [1:0] data_index_width_diff = int'(mem_spatz_req.vtype.vsew) - int'(mem_spatz_req.op_mem.ew); + + // Pointer to index + automatic logic [idx_width(NrMemPorts*ELENB)-1:0] word_index = (port << (MAXEW - data_index_width_diff)) + (maxew_t'(idx_offset << data_index_width_diff) >> data_index_width_diff) + (maxew_t'(idx_offset >> (MAXEW - data_index_width_diff)) << (MAXEW - data_index_width_diff)) * NrMemPorts; + + // Index + unique case (mem_spatz_req.op_mem.ew) + EW_8 : offset = $signed(vrf_rdata_i[1][8 * word_index +: 8]); + EW_16: offset = $signed(vrf_rdata_i[1][8 * word_index +: 16]); + default: offset = $signed(vrf_rdata_i[1][8 * word_index +: 32]); + endcase + end else begin + offset = ({mem_counter_q[intf][fu][$bits(vlen_t)-1:MAXEW] << $clog2(NrMemPorts), mem_counter_q[intf][fu][int'(MAXEW)-1:0]} + (port << MAXEW)) * stride; + end - addr = mem_spatz_req.rs1 + offset; - mem_req_addr[port] = (addr >> MAXEW) << MAXEW; - mem_req_addr_offset[port] = addr[int'(MAXEW)-1:0]; + addr = mem_spatz_req.rs1 + offset; + mem_req_addr[intf][fu] = (addr >> MAXEW) << MAXEW; + mem_req_addr_offset[intf][fu] = addr[int'(MAXEW)-1:0]; - pending_index[port] = (mem_idx_counter_q[port][$clog2(NrWordsPerVector*ELENB)-1:0] >> MAXEW) != vs2_vreg_addr[$clog2(NrWordsPerVector)-1:0]; - end - end: gen_mem_req_addr + pending_index[intf][fu] = (mem_idx_counter_q[intf][fu][$clog2(NrWordsPerVector*ELENB)-1:0] >> MAXEW) != vs2_vreg_addr[$clog2(NrWordsPerVector)-1:0]; + end + end: gen_mem_req_addr_intf_fu + end: gen_mem_req_addr_intf // Calculate the register file addresses always_comb begin : gen_vreg_addr for (genvar intf = 0; intf < NrInterfaces; intf++) begin : gen_vreg_addr_intf - vd_vreg_addr[intf] = (commit_insn_q.vd << $clog2(NrWordsPerVector)) + ($unsigned(vd_elem_id) << 1) + $unsigned(intf); - vs2_vreg_addr[intf] = (mem_spatz_req.vs2 << $clog2(NrWordsPerVector)) + ($unsigned(vs2_elem_id_q) << 1) + $unsigned(intf); + vd_vreg_addr[intf] = (commit_insn_q.vd << $clog2(NrWordsPerVector)) + ($unsigned(vd_elem_id) << 1) + intf; + vs2_vreg_addr[intf] = (mem_spatz_req.vs2 << $clog2(NrWordsPerVector)) + ($unsigned(vs2_elem_id_q) << 1) + intf; end end @@ -494,9 +501,9 @@ module spatz_vlsu logic vlsu_finished_req; // Memory requests - spatz_mem_req_t [NrMemPorts-1:0] spatz_mem_req; - logic [NrMemPorts-1:0] spatz_mem_req_valid; - logic [NrMemPorts-1:0] spatz_mem_req_ready; + spatz_mem_req_t [NrInterfaces-1:0] [N_FU-1:0] spatz_mem_req; + logic [NrInterfaces-1:0] [N_FU-1:0] spatz_mem_req_valid; + logic [NrInterfaces-1:0] [N_FU-1:0] spatz_mem_req_ready; always_comb begin: control_proc // Maintain state @@ -522,8 +529,8 @@ module spatz_vlsu end: control_proc // Is the VRF operation valid and are we at the last one? - logic [NrMemPorts-1:0] commit_operation_valid; - logic [NrMemPorts-1:0] commit_operation_last; + logic [NrInterfaces-1:0] [N_FU-1:0] commit_operation_valid; + logic [NrInterfaces-1:0] [N_FU-1:0] commit_operation_last; // Is instruction a load? logic mem_is_load; @@ -571,26 +578,28 @@ module spatz_vlsu //////////////////// // Store the offsets of all loads, for realigning - addr_offset_t [N_FU*rMemPorts-1:0] vreg_addr_offset; - logic [NrMemPorts-1:0] offset_queue_full; - for (genvar port = 0; port < NrMemPorts; port++) begin : gen_offset_queue - fifo_v3 #( - .DATA_WIDTH(int'(MAXEW) ), - .DEPTH (NrOutstandingLoads) - ) i_offset_queue ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .flush_i (1'b0 ), - .testmode_i(1'b0 ), - .empty_o (/* Unused */ ), - .full_o (offset_queue_full[port] ), - .push_i (spatz_mem_req_valid[port] && spatz_mem_req_ready[port] && mem_is_load), - .data_i (mem_req_addr_offset[port] ), - .data_o (vreg_addr_offset[port] ), - .pop_i (rob_pop[port] && commit_insn_q.is_load ), - .usage_o (/* Unused */ ) - ); - end: gen_offset_queue + addr_offset_t [NrInterfaces-1:0] [N_FU-1:0] vreg_addr_offset; + logic [NrInterfaces-1:0] [N_FU-1:0] offset_queue_full; + for (genvar intf = 0; intf < NrInterfaces; intf++) begin : gen_offset_queue_intf + for (genvar fu = 0; fu < N_FU; fu++) begin : gen_offset_queue_intf_fu + fifo_v3 #( + .DATA_WIDTH(int'(MAXEW) ), + .DEPTH (NrOutstandingLoads) + ) i_offset_queue ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .flush_i (1'b0 ), + .testmode_i(1'b0 ), + .empty_o (/* Unused */ ), + .full_o (offset_queue_full[intf][fu] ), + .push_i (spatz_mem_req_valid[intf][fu] && spatz_mem_req_ready[intf][fu] && mem_is_load), + .data_i (mem_req_addr_offset[intf][fu] ), + .data_o (vreg_addr_offset[intf][fu] ), + .pop_i (rob_pop[intf][fu] && commit_insn_q.is_load ), + .usage_o (/* Unused */ ) + ); + end: gen_offset_queue_intf_fu + end: gen_offset_queue_intf /////////////////////// // Output Register // @@ -629,12 +638,17 @@ module spatz_vlsu assign vrf_we_o[intf] = vrf_req_valid_q[intf]; assign vrf_id_o[intf] = {vrf_req_q[intf].rsp.id, mem_spatz_req.id, commit_insn_q.id}; assign vrf_req_ready_q[intf] = vrf_wvalid_i[intf]; - end + // Remember if the interfaces finished writing back to the VRF. + // Clear this notebook once the memory instruction is over. + `FFLARNC(vrf_req_q_rsp_valid_q[intf], 1'b1, vrf_req_q[intf].rsp_valid, vrf_commit_all_intf_valid, '0, clk_i, rst_ni) + assign vrf_commit_intf_valid[intf] = vrf_req_q[intf].rsp_valid | vrf_req_q_rsp_valid_q[intf]; + end - // Ack when the vector store finishes, or when the vector load commits to the VRF - assign vlsu_rsp_o = vrf_req_q.rsp_valid && vrf_req_valid_q ? vrf_req_q.rsp : '{id: commit_insn_q.id, default: '0}; - assign vlsu_rsp_valid_o = vrf_req_q.rsp_valid && vrf_req_valid_q ? vrf_req_ready_q : vlsu_finished_req && !commit_insn_q.is_load; + // Ack when the vector store finishes, or when the vector load commits to the VRF. + // With more than an interface, we need to wait until all the interfaces commit to the VRF. + assign vlsu_rsp_o = &vrf_commit_intf_valid && |vrf_req_valid_q ? vrf_req_q.rsp : '{id: commit_insn_q.id, default: '0}; + assign vlsu_rsp_valid_o = &vrf_commit_intf_valid && |vrf_req_valid_q ? |vrf_req_ready_q : vlsu_finished_req && !commit_insn_q.is_load; ////////////// // Counters // @@ -643,75 +657,89 @@ module spatz_vlsu // Do we need to catch up to reach element idx parity? (Because of non-zero vstart) vlen_t vreg_start_0; assign vreg_start_0 = vlen_t'(commit_insn_q.vstart[$clog2(ELENB)-1:0]); - logic [N_FU*NrMemPorts-1:0] catchup; - for (genvar i = 0; i < N_FU*NrMemPorts; i++) begin: gen_catchup - assign catchup[i] = (commit_counter_q[i] < vreg_start_0) & (commit_counter_max[i] != commit_counter_q[i]); - end: gen_catchup - - for (genvar fu = 0; fu < N_FU*NrMemPorts; fu++) begin: gen_vreg_counter_proc - // The total amount of vector bytes we have to work through - vlen_t max_bytes; - - always_comb begin - // Default value - max_bytes = (commit_insn_q.vl >> $clog2(N_FU*NrMemPorts*ELENB)) << $clog2(ELENB); - - // Full transfer - if (commit_insn_q.vl[$clog2(ELENB) +: $clog2(N_FU*NrMemPorts)] > fu) - max_bytes += ELENB; - else if (commit_insn_q.vl[$clog2(N_FU*NrMemPorts*ELENB)-1:$clog2(ELENB)] == fu) - max_bytes += commit_insn_q.vl[$clog2(ELENB)-1:0]; - - commit_counter_load[port] = commit_insn_pop; - commit_counter_d[port] = (commit_insn_q.vstart >> $clog2(N_FU*NrMemPorts*ELENB)) << $clog2(ELENB); - if (commit_insn_q.vstart[$clog2(N_FU*NrMemPorts*ELENB)-1:$clog2(ELENB)] > fu) - commit_counter_d[port] += ELENB; - else if (commit_insn_q.vstart[idx_width(N_FU*NrMemPorts*ELENB)-1:$clog2(ELENB)] == fu) - commit_counter_d[port] += commit_insn_q.vstart[$clog2(ELENB)-1:0]; - commit_operation_valid[port] = commit_insn_valid && (commit_counter_q[port] != max_bytes) && (catchup[port] || (!catchup[port] && ~|catchup)); - commit_operation_last[port] = commit_operation_valid[port] && ((max_bytes - commit_counter_q[port]) <= (commit_is_single_element_operation ? commit_single_element_size : ELENB)); - commit_counter_delta[port] = !commit_operation_valid[port] ? vlen_t'('d0) : commit_is_single_element_operation ? vlen_t'(commit_single_element_size) : commit_operation_last[port] ? (max_bytes - commit_counter_q[port]) : vlen_t'(ELENB); - commit_counter_en[intf][fu] = commit_operation_valid[port] && (commit_insn_q.is_load && vrf_req_valid_d && vrf_req_ready_d) || (!commit_insn_q.is_load && vrf_rvalid_i[0] && vrf_re_o[0] && (!mem_is_indexed || vrf_rvalid_i[1])); - commit_counter_max[port] = max_bytes; + logic [NrInterfaces-1:0] [N_FU-1:0] catchup; + for (genvar intf = 0; intf < NrInterfaces; intf++) begin: gen_catchup_intf + for (genvar fu = 0; fu < N_FU; fu++) begin: gen_catchup_intf_fu + assign catchup[intf][fu] = (commit_counter_q[intf][fu] < vreg_start_0) & (commit_counter_max[intf][fu] != commit_counter_q[intf][fu]); + end: gen_catchup_intf_fu + end: gen_catchup_intf + + for (genvar intf = 0; intf < NrInterfaces; intf++) begin: gen_vreg_counter_proc + for (genvar fu = 0; fu < N_FU; fu++) begin: gen_vreg_counter_proc + automatic int unsigned port = intf * N_FU + fu; + + // The total amount of vector bytes we have to work through + vlen_t max_bytes; + + always_comb begin + // Default value + max_bytes = (commit_insn_q.vl >> $clog2(NrMemPorts*ELENB)) << $clog2(ELENB); + + // Full transfer + if (commit_insn_q.vl[$clog2(ELENB) +: $clog2(NrMemPorts)] > port) + max_bytes += ELENB; + else if (commit_insn_q.vl[$clog2(NrMemPorts*ELENB)-1:$clog2(ELENB)] == port) + max_bytes += commit_insn_q.vl[$clog2(ELENB)-1:0]; + + commit_counter_load[intf][fu] = commit_insn_pop; + commit_counter_d[intf][fu] = (commit_insn_q.vstart >> $clog2(NrMemPorts*ELENB)) << $clog2(ELENB); + if (commit_insn_q.vstart[$clog2(NrMemPorts*ELENB)-1:$clog2(ELENB)] > port) + commit_counter_d[intf][fu] += ELENB; + else if (commit_insn_q.vstart[idx_width(NrMemPorts*ELENB)-1:$clog2(ELENB)] == port) + commit_counter_d[intf][fu] += commit_insn_q.vstart[$clog2(ELENB)-1:0]; + commit_operation_valid[intf][fu] = commit_insn_valid && (commit_counter_q[intf][fu] != max_bytes) && (catchup[intf][fu] || (!catchup[intf][fu] && ~|catchup)); + commit_operation_last[intf][fu] = commit_operation_valid[intf][fu] && ((max_bytes - commit_counter_q[intf][fu]) <= (commit_is_single_element_operation ? commit_single_element_size : ELENB)); + commit_counter_delta[intf][fu] = !commit_operation_valid[intf][fu] ? vlen_t'('d0) : commit_is_single_element_operation ? vlen_t'(commit_single_element_size) : commit_operation_last[intf][fu] ? (max_bytes - commit_counter_q[intf][fu]) : vlen_t'(ELENB); + commit_counter_en[intf][intf][fu] = commit_operation_valid[intf][fu] && (commit_insn_q.is_load && vrf_req_valid_d && vrf_req_ready_d) || (!commit_insn_q.is_load && vrf_rvalid_i[0] && vrf_re_o[0] && (!mem_is_indexed || vrf_rvalid_i[1])); + commit_counter_max[intf][fu] = max_bytes; + end end end - assign vd_elem_id = (commit_counter_q[0] > vreg_start_0) ? commit_counter_q[0] >> $clog2(ELENB) : commit_counter_q[N_FU*NrMemPorts-1] >> $clog2(ELENB); - - for (genvar port = 0; port < NrMemPorts; port++) begin: gen_mem_counter_proc - // The total amount of vector bytes we have to work through - vlen_t max_bytes; - - always_comb begin - // Default value - max_bytes = (mem_spatz_req.vl >> $clog2(NrMemPorts*MemDataWidthB)) << $clog2(MemDataWidthB); - - if (NrMemPorts == 1) - max_bytes = mem_spatz_req.vl; - else - if (mem_spatz_req.vl[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] > port) - max_bytes += MemDataWidthB; - else if (mem_spatz_req.vl[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] == port) - max_bytes += mem_spatz_req.vl[$clog2(MemDataWidthB)-1:0]; - - mem_operation_valid[port] = mem_spatz_req_valid && (max_bytes != mem_counter_q[port]); - mem_operation_last[port] = mem_operation_valid[port] && ((max_bytes - mem_counter_q[port]) <= (mem_is_single_element_operation ? mem_single_element_size : MemDataWidthB)); - mem_counter_load[port] = mem_spatz_req_ready; - mem_counter_d[port] = (mem_spatz_req.vstart >> $clog2(NrMemPorts*MemDataWidthB)) << $clog2(MemDataWidthB); - if (NrMemPorts == 1) - mem_counter_d[port] = mem_spatz_req.vstart; - else - if (mem_spatz_req.vstart[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] > port) - mem_counter_d[port] += MemDataWidthB; - else if (mem_spatz_req.vstart[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] == port) - mem_counter_d[port] += mem_spatz_req.vstart[$clog2(MemDataWidthB)-1:0]; - mem_counter_delta[port] = !mem_operation_valid[port] ? 'd0 : mem_is_single_element_operation ? mem_single_element_size : mem_operation_last[port] ? (max_bytes - mem_counter_q[port]) : MemDataWidthB; - mem_counter_en[port] = spatz_mem_req_ready[port] && spatz_mem_req_valid[port]; - mem_counter_max[port] = max_bytes; - - // Index counter - mem_idx_counter_d[port] = mem_counter_d[port]; - mem_idx_counter_delta[port] = !mem_operation_valid[port] ? 'd0 : mem_idx_single_element_size; + for (genvar intf = 0; intf < NrInterfaces; intf++) begin: gen_vd_elem_id + assign vd_elem_id[intf] = (commit_counter_q[intf*N_FU] > vreg_start_0) + ? commit_counter_q[intf*N_FU] >> $clog2(ELENB) + : commit_counter_q[((intf+1)*N_FU)-1] >> $clog2(ELENB); + end + + for (genvar intf = 0; intf < NrInterfaces; intf++) begin: gen_mem_counter_proc_intf + for (genvar fu = 0; fu < N_FU; fu++) begin: gen_mem_counter_proc_intf_fu + automatic int unsigned port = intf * N_FU + fu; + + // The total amount of vector bytes we have to work through + vlen_t max_bytes; + + always_comb begin + // Default value + max_bytes = (mem_spatz_req.vl >> $clog2(NrMemPorts*MemDataWidthB)) << $clog2(MemDataWidthB); + + if (NrMemPorts == 1) + max_bytes = mem_spatz_req.vl; + else + if (mem_spatz_req.vl[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] > port) + max_bytes += MemDataWidthB; + else if (mem_spatz_req.vl[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] == port) + max_bytes += mem_spatz_req.vl[$clog2(MemDataWidthB)-1:0]; + + mem_operation_valid[intf][fu] = mem_spatz_req_valid && (max_bytes != mem_counter_q[intf][fu]); + mem_operation_last[intf][fu] = mem_operation_valid[intf][fu] && ((max_bytes - mem_counter_q[intf][fu]) <= (mem_is_single_element_operation ? mem_single_element_size : MemDataWidthB)); + mem_counter_load[intf][fu] = mem_spatz_req_ready; + mem_counter_d[intf][fu] = (mem_spatz_req.vstart >> $clog2(NrMemPorts*MemDataWidthB)) << $clog2(MemDataWidthB); + if (NrMemPorts == 1) + mem_counter_d[intf][fu] = mem_spatz_req.vstart; + else + if (mem_spatz_req.vstart[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] > port) + mem_counter_d[intf][fu] += MemDataWidthB; + else if (mem_spatz_req.vstart[$clog2(MemDataWidthB) +: $clog2(NrMemPorts)] == port) + mem_counter_d[intf][fu] += mem_spatz_req.vstart[$clog2(MemDataWidthB)-1:0]; + mem_counter_delta[intf][fu] = !mem_operation_valid[intf][fu] ? 'd0 : mem_is_single_element_operation ? mem_single_element_size : mem_operation_last[intf][fu] ? (max_bytes - mem_counter_q[intf][fu]) : MemDataWidthB; + mem_counter_en[intf][fu] = spatz_mem_req_ready[intf][fu] && spatz_mem_req_valid[intf][fu]; + mem_counter_max[intf][fu] = max_bytes; + + // Index counter + mem_idx_counter_d[intf][fu] = mem_counter_d[intf][fu]; + mem_idx_counter_delta[intf][fu] = !mem_operation_valid[intf][fu] ? 'd0 : mem_idx_single_element_size; + end end end @@ -745,241 +773,252 @@ module spatz_vlsu ////////////////////////// // Memory request signals - id_t [NrMemPorts-1:0] mem_req_id; - logic [NrMemPorts-1:0][MemDataWidth-1:0] mem_req_data; - logic [NrMemPorts-1:0] mem_req_svalid; - logic [NrMemPorts-1:0][ELEN/8-1:0] mem_req_strb; - logic [NrMemPorts-1:0] mem_req_lvalid; - logic [NrMemPorts-1:0] mem_req_last; + id_t [NrInterfaces-1:0] [N_FU-1:0] mem_req_id; + logic [NrInterfaces-1:0] [N_FU-1:0][MemDataWidth-1:0] mem_req_data; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_req_svalid; + logic [NrInterfaces-1:0] [N_FU-1:0][ELEN/8-1:0] mem_req_strb; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_req_lvalid; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_req_last; // Number of pending requests - logic [NrMemPorts-1:0][idx_width(NrOutstandingLoads):0] mem_pending_d, mem_pending_q; - logic [NrMemPorts-1:0] mem_pending; + logic [NrInterfaces-1:0] [N_FU-1:0][idx_width(NrOutstandingLoads):0] mem_pending_d, mem_pending_q; + logic [NrInterfaces-1:0] [N_FU-1:0] mem_pending; `FF(mem_pending_q, mem_pending_d, '{default: '0}) always_comb begin // Maintain state mem_pending_d = mem_pending_q; - for (int port = 0; port < NrMemPorts; port++) begin - mem_pending[port] = mem_pending_q[port] != '0; + for (int intf = 0; intf < NrInterfaces; intf++) begin + for (int fu = 0; fu < N_FU; fu++) begin + mem_pending[intf][fu] = mem_pending_q[intf][fu] != '0; - // New request sent - if (mem_is_load && spatz_mem_req_valid[port] && spatz_mem_req_ready[port]) - mem_pending_d[port]++; + // New request sent + if (mem_is_load && spatz_mem_req_valid[intf][fu] && spatz_mem_req_ready[intf][fu]) + mem_pending_d[intf][fu]++; - // Response used - if (commit_insn_q.is_load && rob_rvalid[port] && rob_pop[port]) - mem_pending_d[port]--; + // Response used + if (commit_insn_q.is_load && rob_rvalid[intf][fu] && rob_pop[intf][fu]) + mem_pending_d[intf][fu]--; + end end end // verilator lint_off LATCH always_comb begin - vrf_raddr_o = {vs2_vreg_addr, vd_vreg_addr}; - vrf_re_o = '0; - vrf_req_d = '0; - vrf_req_valid_d = 1'b0; - - rob_wdata = '0; - rob_wid = '0; - rob_push = '0; - rob_pop = '0; - rob_req_id = '0; - - mem_req_id = '0; - mem_req_data = '0; - mem_req_strb = '0; - mem_req_svalid = '0; - mem_req_lvalid = '0; - mem_req_last = '0; - - // Propagate request ID - vrf_req_d.rsp.id = commit_insn_q.id; - vrf_req_d.rsp_valid = commit_insn_valid && &commit_finished_d && mem_insn_finished_d[commit_insn_q.id]; - - // Request indexes - vrf_re_o[1] = mem_is_indexed; - - // Count which vs2 element we should load (indexed loads) - vs2_elem_id_d = vs2_elem_id_q; - if (&(pending_index ^ ~mem_operation_valid) && mem_is_indexed) - vs2_elem_id_d = vs2_elem_id_q + 1; - if (mem_spatz_req_ready) - vs2_elem_id_d = '0; - - if (commit_insn_valid && commit_insn_q.is_load) begin - // If we have a valid element in the buffer, store it back to the register file - if (state_q == VLSU_RunningLoad && |commit_operation_valid) begin - // Enable write back to the VRF if we have a valid element in all buffers that still have to write something back. - vrf_req_d.waddr = vd_vreg_addr; - vrf_req_valid_d = &(rob_rvalid | ~mem_pending) && |mem_pending; - - for (int unsigned port = 0; port < NrMemPorts; port++) begin - automatic logic [63:0] data = rob_rdata[port]; - - // Shift data to correct position if we have an unaligned memory request - if (MAXEW == EW_32) - unique case ((commit_insn_q.is_strided || commit_insn_q.is_indexed) ? vreg_addr_offset[port] : commit_insn_q.rs1[1:0]) - 2'b01: data = {data[7:0], data[31:8]}; - 2'b10: data = {data[15:0], data[31:16]}; - 2'b11: data = {data[23:0], data[31:24]}; - default: data = data; - endcase - else - unique case ((commit_insn_q.is_strided || commit_insn_q.is_indexed) ? vreg_addr_offset[port] : commit_insn_q.rs1[2:0]) - 3'b001: data = {data[7:0], data[63:8]}; - 3'b010: data = {data[15:0], data[63:16]}; - 3'b011: data = {data[23:0], data[63:24]}; - 3'b100: data = {data[31:0], data[63:32]}; - 3'b101: data = {data[39:0], data[63:40]}; - 3'b110: data = {data[47:0], data[63:48]}; - 3'b111: data = {data[55:0], data[63:56]}; - default: data = data; - endcase - - // Pop stored element and free space in buffer - rob_pop[port] = rob_rvalid[port] && vrf_req_valid_d && vrf_req_ready_d && commit_counter_en[port]; - - // Shift data to correct position if we have a strided memory access - if (commit_insn_q.is_strided || commit_insn_q.is_indexed) + for (int intf = 0; intf < NrInterfaces; intf++) begin + vrf_raddr_o[intf] = {vs2_vreg_addr[intf], vd_vreg_addr[intf]}; + vrf_re_o[intf] = '0; + vrf_req_d[intf] = '0; + vrf_req_valid_d[intf] = '0; + + rob_wdata = '0; + rob_wid = '0; + rob_push = '0; + rob_pop = '0; + rob_req_id = '0; + + mem_req_id = '0; + mem_req_data = '0; + mem_req_strb = '0; + mem_req_svalid = '0; + mem_req_lvalid = '0; + mem_req_last = '0; + + // Propagate request ID + vrf_req_d[intf].rsp.id = commit_insn_q.id; + vrf_req_d[intf].rsp_valid = commit_insn_valid && &commit_finished_d[intf] && mem_insn_finished_d[commit_insn_q.id]; + + // Request indexes + vrf_re_o[intf][1] = mem_is_indexed; + + // Count which vs2 element we should load (indexed loads) + vs2_elem_id_d = vs2_elem_id_q; + if (&(pending_index ^ ~mem_operation_valid) && mem_is_indexed) + vs2_elem_id_d = vs2_elem_id_q + 1; + if (mem_spatz_req_ready) + vs2_elem_id_d = '0; + + if (commit_insn_valid && commit_insn_q.is_load) begin + // If we have a valid element in the buffer, store it back to the register file + if (state_q == VLSU_RunningLoad && |commit_operation_valid[intf]) begin + // Enable write back from an interface to the VRF if we have a valid element in all + // the interface buffers that still have to write something back. + vrf_req_d[intf].waddr = vd_vreg_addr[intf]; + vrf_req_valid_d[intf] = &(rob_rvalid[intf] | ~mem_pending[intf]) && |mem_pending[intf]; + + for (int unsigned fu = 0; fu < N_FU; fu++) begin + automatic unsigned int port = intf * N_FU + fu; + + automatic logic [63:0] data = rob_rdata[intf][fu]; + + // Shift data to correct position if we have an unaligned memory request if (MAXEW == EW_32) - unique case (commit_counter_q[port][1:0]) - 2'b01: data = {data[23:0], data[31:24]}; + unique case ((commit_insn_q.is_strided || commit_insn_q.is_indexed) ? vreg_addr_offset[intf][fu] : commit_insn_q.rs1[1:0]) + 2'b01: data = {data[7:0], data[31:8]}; 2'b10: data = {data[15:0], data[31:16]}; - 2'b11: data = {data[7:0], data[31:8]}; + 2'b11: data = {data[23:0], data[31:24]}; default: data = data; endcase else - unique case (commit_counter_q[port][2:0]) - 3'b001: data = {data[55:0], data[63:56]}; - 3'b010: data = {data[47:0], data[63:48]}; - 3'b011: data = {data[39:0], data[63:40]}; + unique case ((commit_insn_q.is_strided || commit_insn_q.is_indexed) ? vreg_addr_offset[intf][fu] : commit_insn_q.rs1[2:0]) + 3'b001: data = {data[7:0], data[63:8]}; + 3'b010: data = {data[15:0], data[63:16]}; + 3'b011: data = {data[23:0], data[63:24]}; 3'b100: data = {data[31:0], data[63:32]}; - 3'b101: data = {data[23:0], data[63:24]}; - 3'b110: data = {data[15:0], data[63:16]}; - 3'b111: data = {data[7:0], data[63:8]}; + 3'b101: data = {data[39:0], data[63:40]}; + 3'b110: data = {data[47:0], data[63:48]}; + 3'b111: data = {data[55:0], data[63:56]}; default: data = data; endcase - vrf_req_d.wdata[ELEN*port +: ELEN] = data; - - // Create write byte enable mask for register file - if (commit_counter_en[port]) - if (commit_is_single_element_operation) begin - automatic logic [$clog2(ELENB)-1:0] shift = commit_counter_q[port][$clog2(ELENB)-1:0]; - automatic logic [ELENB-1:0] mask = '1; - case (commit_insn_q.vsew) - EW_8 : mask = 1; - EW_16: mask = 3; - EW_32: mask = 15; - default: mask = '1; - endcase - vrf_req_d.wbe[ELENB*port +: ELENB] = mask << shift; - end else - for (int unsigned k = 0; k < ELENB; k++) - vrf_req_d.wbe[ELENB*port+k] = k < commit_counter_delta[port]; + + // Pop stored element and free space in buffer + rob_pop[intf][fu] = rob_rvalid[intf][fu] && vrf_req_valid_d[intf] && vrf_req_ready_d[intf] && commit_counter_en[intf][fu]; + + // Shift data to correct position if we have a strided memory access + if (commit_insn_q.is_strided || commit_insn_q.is_indexed) + if (MAXEW == EW_32) + unique case (commit_counter_q[intf][fu][1:0]) + 2'b01: data = {data[23:0], data[31:24]}; + 2'b10: data = {data[15:0], data[31:16]}; + 2'b11: data = {data[7:0], data[31:8]}; + default: data = data; + endcase + else + unique case (commit_counter_q[intf][fu][2:0]) + 3'b001: data = {data[55:0], data[63:56]}; + 3'b010: data = {data[47:0], data[63:48]}; + 3'b011: data = {data[39:0], data[63:40]}; + 3'b100: data = {data[31:0], data[63:32]}; + 3'b101: data = {data[23:0], data[63:24]}; + 3'b110: data = {data[15:0], data[63:16]}; + 3'b111: data = {data[7:0], data[63:8]}; + default: data = data; + endcase + vrf_req_d[intf].wdata[ELEN*port +: ELEN] = data; + + // Create write byte enable mask for register file + if (commit_counter_en[intf][fu]) + if (commit_is_single_element_operation) begin + automatic logic [$clog2(ELENB)-1:0] shift = commit_counter_q[intf][fu][$clog2(ELENB)-1:0]; + automatic logic [ELENB-1:0] mask = '1; + case (commit_insn_q.vsew) + EW_8 : mask = 1; + EW_16: mask = 3; + EW_32: mask = 15; + default: mask = '1; + endcase + vrf_req_d[intf].wbe[ELENB*port +: ELENB] = mask << shift; + end else + for (int unsigned k = 0; k < ELENB; k++) + vrf_req_d[intf].wbe[ELENB*port+k] = k < commit_counter_delta[intf][fu]; + end end - end - for (int unsigned port = 0; port < NrMemPorts; port++) begin - // Write the load result to the buffer - rob_wdata[port] = spatz_mem_rsp_i[port].data; + for (int unsigned fu = 0; fu < N_FU; fu++) begin + automatic int unsigned port = intf * N_FU + fu; + + // Write the load result to the buffer + rob_wdata[intf][fu] = spatz_mem_rsp_i[port].data; `ifdef MEMPOOL_SPATZ - rob_wid[port] = spatz_mem_rsp_i[port].id; - // Need to consider out-of-order memory response - rob_push[port] = spatz_mem_rsp_valid_i[port] && (state_q == VLSU_RunningLoad) && spatz_mem_rsp_i[port].write == '0; + rob_wid[intf][fu] = spatz_mem_rsp_i[port].id; + // Need to consider out-of-order memory response + rob_push[intf][fu] = spatz_mem_rsp_valid_i[port] && (state_q == VLSU_RunningLoad) && spatz_mem_rsp_i[port].write == '0; `else - rob_push[port] = spatz_mem_rsp_valid_i[port] && (state_q == VLSU_RunningLoad) && store_count_q[port] == '0; + rob_push[intf][fu] = spatz_mem_rsp_valid_i[port] && (state_q == VLSU_RunningLoad) && store_count_q[intf][fu] == '0; `endif - if (!rob_full[port] && !offset_queue_full[port] && mem_operation_valid[port]) begin - rob_req_id[port] = spatz_mem_req_ready[port] & spatz_mem_req_valid[port]; - mem_req_lvalid[port] = (!mem_is_indexed || (vrf_rvalid_i[1] && !pending_index[port])) && mem_spatz_req.op_mem.is_load; - mem_req_id[port] = rob_id[port]; - mem_req_last[port] = mem_operation_last[port]; + if (!rob_full[intf][fu] && !offset_queue_full[intf][fu] && mem_operation_valid[intf][fu]) begin + rob_req_id[intf][fu] = spatz_mem_req_ready[intf][fu] & spatz_mem_req_valid[intf][fu]; + mem_req_lvalid[intf][fu] = (!mem_is_indexed || (vrf_rvalid_i[1] && !pending_index[intf][fu])) && mem_spatz_req.op_mem.is_load; + mem_req_id[intf][fu] = rob_id[intf][fu]; + mem_req_last[intf][fu] = mem_operation_last[intf][fu]; + end end - end - // Store operation - end else begin - // Read new element from the register file and store it to the buffer - if (state_q == VLSU_RunningStore && !(|rob_full) && |commit_operation_valid) begin - vrf_re_o[0] = 1'b1; - - for (int unsigned port = 0; port < NrMemPorts; port++) begin - rob_wdata[port] = vrf_rdata_i[0][ELEN*port +: ELEN]; - rob_wid[port] = rob_id[port]; - rob_req_id[port] = vrf_rvalid_i[0] && (!mem_is_indexed || vrf_rvalid_i[1]); - rob_push[port] = rob_req_id[port]; + // Store operation + end else begin + // Read new element from the register file and store it to the buffer + if (state_q == VLSU_RunningStore && !(|rob_full) && |commit_operation_valid) begin + vrf_re_o[0] = 1'b1; + + for (int unsigned fu = 0; fu < N_FU; fu++) begin + automatic int unsigned port = intf * N_FU + fu; + + rob_wdata[intf][fu] = vrf_rdata_i[0][ELEN*port +: ELEN]; + rob_wid[intf][fu] = rob_id[intf][fu]; + rob_req_id[intf][fu] = vrf_rvalid_i[0] && (!mem_is_indexed || vrf_rvalid_i[1]); + rob_push[intf][fu] = rob_req_id[intf][fu]; + end end - end - - for (int unsigned port = 0; port < NrMemPorts; port++) begin - // Read element from buffer and execute memory request - if (mem_operation_valid[port]) begin - automatic logic [63:0] data = rob_rdata[port]; - // Shift data to lsb if we have a strided or indexed memory access - if (mem_is_strided || mem_is_indexed) + for (int unsigned fu = 0; fu < N_FU; fu++) begin + // Read element from buffer and execute memory request + if (mem_operation_valid[intf][fu]) begin + automatic logic [63:0] data = rob_rdata[intf][fu]; + + // Shift data to lsb if we have a strided or indexed memory access + if (mem_is_strided || mem_is_indexed) + if (MAXEW == EW_32) + unique case (mem_counter_q[intf][fu][1:0]) + 2'b01: data = {data[7:0], data[31:8]}; + 2'b10: data = {data[15:0], data[31:16]}; + 2'b11: data = {data[23:0], data[31:24]}; + default:; // Do nothing + endcase + else + unique case (mem_counter_q[intf][fu][2:0]) + 3'b001: data = {data[7:0], data[63:8]}; + 3'b010: data = {data[15:0], data[63:16]}; + 3'b011: data = {data[23:0], data[63:24]}; + 3'b100: data = {data[31:0], data[63:32]}; + 3'b101: data = {data[39:0], data[63:40]}; + 3'b110: data = {data[47:0], data[63:48]}; + 3'b111: data = {data[55:0], data[63:56]}; + default:; // Do nothing + endcase + + // Shift data to correct position if we have an unaligned memory request if (MAXEW == EW_32) - unique case (mem_counter_q[port][1:0]) - 2'b01: data = {data[7:0], data[31:8]}; - 2'b10: data = {data[15:0], data[31:16]}; - 2'b11: data = {data[23:0], data[31:24]}; - default:; // Do nothing + unique case ((mem_is_strided || mem_is_indexed) ? mem_req_addr_offset[intf][fu] : mem_spatz_req.rs1[1:0]) + 2'b01: mem_req_data[intf][fu] = {data[23:0], data[31:24]}; + 2'b10: mem_req_data[intf][fu] = {data[15:0], data[31:16]}; + 2'b11: mem_req_data[intf][fu] = {data[7:0], data[31:8]}; + default: mem_req_data[intf][fu] = data; endcase else - unique case (mem_counter_q[port][2:0]) - 3'b001: data = {data[7:0], data[63:8]}; - 3'b010: data = {data[15:0], data[63:16]}; - 3'b011: data = {data[23:0], data[63:24]}; - 3'b100: data = {data[31:0], data[63:32]}; - 3'b101: data = {data[39:0], data[63:40]}; - 3'b110: data = {data[47:0], data[63:48]}; - 3'b111: data = {data[55:0], data[63:56]}; - default:; // Do nothing + unique case ((mem_is_strided || mem_is_indexed) ? mem_req_addr_offset[intf][fu] : mem_spatz_req.rs1[2:0]) + 3'b001: mem_req_data[intf][fu] = {data[55:0], data[63:56]}; + 3'b010: mem_req_data[intf][fu] = {data[47:0], data[63:48]}; + 3'b011: mem_req_data[intf][fu] = {data[39:0], data[63:40]}; + 3'b100: mem_req_data[intf][fu] = {data[31:0], data[63:32]}; + 3'b101: mem_req_data[intf][fu] = {data[23:0], data[63:24]}; + 3'b110: mem_req_data[intf][fu] = {data[15:0], data[63:16]}; + 3'b111: mem_req_data[intf][fu] = {data[7:0], data[63:8]}; + default: mem_req_data[intf][fu] = data; endcase - // Shift data to correct position if we have an unaligned memory request - if (MAXEW == EW_32) - unique case ((mem_is_strided || mem_is_indexed) ? mem_req_addr_offset[port] : mem_spatz_req.rs1[1:0]) - 2'b01: mem_req_data[port] = {data[23:0], data[31:24]}; - 2'b10: mem_req_data[port] = {data[15:0], data[31:16]}; - 2'b11: mem_req_data[port] = {data[7:0], data[31:8]}; - default: mem_req_data[port] = data; - endcase - else - unique case ((mem_is_strided || mem_is_indexed) ? mem_req_addr_offset[port] : mem_spatz_req.rs1[2:0]) - 3'b001: mem_req_data[port] = {data[55:0], data[63:56]}; - 3'b010: mem_req_data[port] = {data[47:0], data[63:48]}; - 3'b011: mem_req_data[port] = {data[39:0], data[63:40]}; - 3'b100: mem_req_data[port] = {data[31:0], data[63:32]}; - 3'b101: mem_req_data[port] = {data[23:0], data[63:24]}; - 3'b110: mem_req_data[port] = {data[15:0], data[63:16]}; - 3'b111: mem_req_data[port] = {data[7:0], data[63:8]}; - default: mem_req_data[port] = data; - endcase - - mem_req_svalid[port] = rob_rvalid[port] && (!mem_is_indexed || (vrf_rvalid_i[1] && !pending_index[port])) && !mem_spatz_req.op_mem.is_load; - mem_req_id[port] = rob_rid[port]; - mem_req_last[port] = mem_operation_last[port]; - rob_pop[port] = spatz_mem_req_valid[port] && spatz_mem_req_ready[port]; - - // Create byte enable signal for memory request - if (mem_is_single_element_operation) begin - automatic logic [$clog2(ELENB)-1:0] shift = (mem_is_strided || mem_is_indexed) ? mem_req_addr_offset[port] : mem_counter_q[port][$clog2(ELENB)-1:0] + commit_insn_q.rs1[int'(MAXEW)-1:0]; - automatic logic [MemDataWidthB-1:0] mask = '1; - case (mem_spatz_req.vtype.vsew) - EW_8 : mask = 1; - EW_16: mask = 3; - EW_32: mask = 15; - default: mask = '1; - endcase - mem_req_strb[port] = mask << shift; - end else - for (int unsigned k = 0; k < ELENB; k++) - mem_req_strb[port][k] = k < mem_counter_delta[port]; - end else begin - // Clear empty buffer id requests - if (!rob_empty[port]) - rob_pop[port] = 1'b1; + mem_req_svalid[intf][fu] = rob_rvalid[intf][fu] && (!mem_is_indexed || (vrf_rvalid_i[1] && !pending_index[intf][fu])) && !mem_spatz_req.op_mem.is_load; + mem_req_id[intf][fu] = rob_rid[intf][fu]; + mem_req_last[intf][fu] = mem_operation_last[intf][fu]; + rob_pop[intf][fu] = spatz_mem_req_valid[intf][fu] && spatz_mem_req_ready[intf][fu]; + + // Create byte enable signal for memory request + if (mem_is_single_element_operation) begin + automatic logic [$clog2(ELENB)-1:0] shift = (mem_is_strided || mem_is_indexed) ? mem_req_addr_offset[intf][fu] : mem_counter_q[intf][fu][$clog2(ELENB)-1:0] + commit_insn_q.rs1[int'(MAXEW)-1:0]; + automatic logic [MemDataWidthB-1:0] mask = '1; + case (mem_spatz_req.vtype.vsew) + EW_8 : mask = 1; + EW_16: mask = 3; + EW_32: mask = 15; + default: mask = '1; + endcase + mem_req_strb[intf][fu] = mask << shift; + end else + for (int unsigned k = 0; k < ELENB; k++) + mem_req_strb[intf][fu][k] = k < mem_counter_delta[intf][fu]; + end else begin + // Clear empty buffer id requests + if (!rob_empty[intf][fu]) + rob_pop[intf][fu] = 1'b1; + end end end end @@ -987,40 +1026,45 @@ module spatz_vlsu // verilator lint_on LATCH // Create memory requests - for (genvar port = 0; port < NrMemPorts; port++) begin : gen_mem_req - spill_register #( - .T(spatz_mem_req_t) - ) i_spatz_mem_req_register ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .data_i (spatz_mem_req[port] ), - .valid_i (spatz_mem_req_valid[port] ), - .ready_o (spatz_mem_req_ready[port] ), - .data_o (spatz_mem_req_o[port] ), - .valid_o (spatz_mem_req_valid_o[port]), - .ready_i (spatz_mem_req_ready_i[port]) - ); + for (genvar intf = 0; intf < NrInterfaces; intf++) begin : gen_mem_req + for (genvar fu = 0; fu < N_FU; fu++) begin : gen_mem_req + genvar port; + port = intf * N_FU + fu; + + spill_register #( + .T(spatz_mem_req_t) + ) i_spatz_mem_req_register ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .data_i (spatz_mem_req[intf][fu] ), + .valid_i (spatz_mem_req_valid[intf][fu] ), + .ready_o (spatz_mem_req_ready[intf][fu] ), + .data_o (spatz_mem_req_o[port] ), + .valid_o (spatz_mem_req_valid_o[port] ), + .ready_i (spatz_mem_req_ready_i[port] ) + ); `ifdef MEMPOOL_SPATZ - // ID is required in Mempool-Spatz - assign spatz_mem_req[port].id = mem_req_id[port]; - assign spatz_mem_req[port].addr = mem_req_addr[port]; - assign spatz_mem_req[port].mode = '0; // Request always uses user privilege level - assign spatz_mem_req[port].size = mem_spatz_req.vtype.vsew[1:0]; - assign spatz_mem_req[port].write = !mem_is_load; - assign spatz_mem_req[port].strb = mem_req_strb[port]; - assign spatz_mem_req[port].data = mem_req_data[port]; - assign spatz_mem_req[port].last = mem_req_last[port]; - assign spatz_mem_req[port].spec = 1'b0; // Request is never speculative - assign spatz_mem_req_valid[port] = mem_req_svalid[port] || mem_req_lvalid[port]; + // ID is required in Mempool-Spatz + assign spatz_mem_req[intf][fu].id = mem_req_id[intf][fu]; + assign spatz_mem_req[intf][fu].addr = mem_req_addr[intf][fu]; + assign spatz_mem_req[intf][fu].mode = '0; // Request always uses user privilege level + assign spatz_mem_req[intf][fu].size = mem_spatz_req.vtype.vsew[1:0]; + assign spatz_mem_req[intf][fu].write = !mem_is_load; + assign spatz_mem_req[intf][fu].strb = mem_req_strb[intf][fu]; + assign spatz_mem_req[intf][fu].data = mem_req_data[intf][fu]; + assign spatz_mem_req[intf][fu].last = mem_req_last[intf][fu]; + assign spatz_mem_req[intf][fu].spec = 1'b0; // Request is never speculative + assign spatz_mem_req_valid[intf][fu] = mem_req_svalid[intf][fu] || mem_req_lvalid[intf][fu]; `else - assign spatz_mem_req[port].addr = mem_req_addr[port]; - assign spatz_mem_req[port].write = !mem_is_load; - assign spatz_mem_req[port].amo = reqrsp_pkg::AMONone; - assign spatz_mem_req[port].data = mem_req_data[port]; - assign spatz_mem_req[port].strb = mem_req_strb[port]; - assign spatz_mem_req[port].user = '0; - assign spatz_mem_req_valid[port] = mem_req_svalid[port] || mem_req_lvalid[port]; + assign spatz_mem_req[intf][fu].addr = mem_req_addr[intf][fu]; + assign spatz_mem_req[intf][fu].write = !mem_is_load; + assign spatz_mem_req[intf][fu].amo = reqrsp_pkg::AMONone; + assign spatz_mem_req[intf][fu].data = mem_req_data[intf][fu]; + assign spatz_mem_req[intf][fu].strb = mem_req_strb[intf][fu]; + assign spatz_mem_req[intf][fu].user = '0; + assign spatz_mem_req_valid[intf][fu] = mem_req_svalid[intf][fu] || mem_req_lvalid[intf][fu]; `endif + end end ////////////////