From eac60af1a9d194fdc2c0aeb8e798353b00fa5fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=B4me?= <124148386+cathales@users.noreply.github.com> Date: Sun, 9 Jun 2024 20:47:09 +0200 Subject: [PATCH] superscalar: add a second issue port (#2209) --- core/branch_unit.sv | 2 - core/cache_subsystem/cache_ctrl.sv | 6 +- core/cva6.sv | 36 +- core/cva6_rvfi.sv | 46 +- core/cva6_rvfi_probes.sv | 12 +- core/ex_stage.sv | 134 +++-- core/include/build_config_pkg.sv | 2 +- core/include/rvfi_types.svh | 16 +- core/issue_read_operands.sv | 771 +++++++++++++++++------------ core/issue_stage.sv | 75 ++- core/scoreboard.sv | 237 ++++----- 11 files changed, 763 insertions(+), 574 deletions(-) diff --git a/core/branch_unit.sv b/core/branch_unit.sv index 43eabdb1e3..5a7439f84d 100644 --- a/core/branch_unit.sv +++ b/core/branch_unit.sv @@ -33,8 +33,6 @@ module branch_unit #( input logic [CVA6Cfg.VLEN-1:0] pc_i, // Instruction is compressed - ISSUE_STAGE input logic is_compressed_instr_i, - // any functional unit is valid, check that there is no accidental mis-predict - TO_BE_COMPLETED - input logic fu_valid_i, // Branch unit instruction is valid - ISSUE_STAGE input logic branch_valid_i, // ALU branch compare result - ALU diff --git a/core/cache_subsystem/cache_ctrl.sv b/core/cache_subsystem/cache_ctrl.sv index f787937257..445c4927d6 100644 --- a/core/cache_subsystem/cache_ctrl.sv +++ b/core/cache_subsystem/cache_ctrl.sv @@ -311,9 +311,9 @@ module cache_ctrl // two memory look-ups on a single-ported SRAM and therefore is non-atomic if (!mshr_index_matches_i) begin // store data, write dirty bit - req_o = hit_way_q; - addr_o = mem_req_q.index; - we_o = 1'b1; + req_o = hit_way_q; + addr_o = mem_req_q.index; + we_o = 1'b1; be_o.vldrty = hit_way_q; diff --git a/core/cva6.sv b/core/cva6.sv index df8ed3ec9e..e63601d3cb 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -370,13 +370,13 @@ module cva6 // -------------- // ISSUE <-> EX // -------------- - logic [CVA6Cfg.VLEN-1:0] rs1_forwarding_id_ex; // unregistered version of fu_data_o.operanda - logic [CVA6Cfg.VLEN-1:0] rs2_forwarding_id_ex; // unregistered version of fu_data_o.operandb + logic [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs1_forwarding_id_ex; // unregistered version of fu_data_o.operanda + logic [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs2_forwarding_id_ex; // unregistered version of fu_data_o.operandb - fu_data_t fu_data_id_ex; + fu_data_t [SUPERSCALAR:0] fu_data_id_ex; logic [CVA6Cfg.VLEN-1:0] pc_id_ex; logic is_compressed_instr_id_ex; - logic [31:0] tinst_ex; + logic [SUPERSCALAR:0][31:0] tinst_ex; // fixed latency units logic flu_ready_ex_id; logic [CVA6Cfg.TRANS_ID_BITS-1:0] flu_trans_id_ex_id; @@ -384,14 +384,14 @@ module cva6 logic [CVA6Cfg.XLEN-1:0] flu_result_ex_id; exception_t flu_exception_ex_id; // ALU - logic alu_valid_id_ex; + logic [SUPERSCALAR:0] alu_valid_id_ex; // Branches and Jumps - logic branch_valid_id_ex; + logic [SUPERSCALAR:0] branch_valid_id_ex; branchpredict_sbe_t branch_predict_id_ex; logic resolve_branch_ex_id; // LSU - logic lsu_valid_id_ex; + logic [SUPERSCALAR:0] lsu_valid_id_ex; logic lsu_ready_ex_id; logic [CVA6Cfg.TRANS_ID_BITS-1:0] load_trans_id_ex_id; @@ -404,10 +404,10 @@ module cva6 logic store_valid_ex_id; exception_t store_exception_ex_id; // MULT - logic mult_valid_id_ex; + logic [SUPERSCALAR:0] mult_valid_id_ex; // FPU logic fpu_ready_ex_id; - logic fpu_valid_id_ex; + logic [SUPERSCALAR:0] fpu_valid_id_ex; logic [1:0] fpu_fmt_id_ex; logic [2:0] fpu_rm_id_ex; logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; @@ -427,7 +427,7 @@ module cva6 logic acc_resp_fflags_valid; logic single_step_acc_commit; // CSR - logic csr_valid_id_ex; + logic [SUPERSCALAR:0] csr_valid_id_ex; logic csr_hs_ld_st_inst_ex; // CVXIF logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_trans_id_ex_id; @@ -435,7 +435,7 @@ module cva6 logic x_valid_ex_id; exception_t x_exception_ex_id; logic x_we_ex_id; - logic x_issue_valid_id_ex; + logic [SUPERSCALAR:0] x_issue_valid_id_ex; logic x_issue_ready_ex_id; logic [31:0] x_off_instr_id_ex; // -------------- @@ -462,7 +462,7 @@ module cva6 // -------------- // RVFI // -------------- - logic [CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_issue_pointer; + logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_issue_pointer; logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_commit_pointer; // -------------- // COMMIT <-> ID @@ -1396,7 +1396,7 @@ module cva6 .issue_instr_i (issue_instr_id_acc), .issue_instr_hs_i (issue_instr_hs_id_acc), .issue_stall_o (stall_acc_id), - .fu_data_i (fu_data_id_ex), + .fu_data_i (fu_data_id_ex[0]), .commit_instr_i (commit_instr_id_commit), .commit_st_barrier_i (fence_i_commit_controller | fence_commit_controller), .acc_trans_id_o (acc_trans_id_ex_id), @@ -1626,8 +1626,8 @@ module cva6 .commit_pointer_i(rvfi_commit_pointer), .flush_unissued_instr_i(flush_unissued_instr_ctrl_id), - .decoded_instr_valid_i (issue_entry_valid_id_issue[0]), - .decoded_instr_ack_i (issue_instr_issue_id[0]), + .decoded_instr_valid_i (issue_entry_valid_id_issue), + .decoded_instr_ack_i (issue_instr_issue_id), .rs1_forwarding_i(rs1_forwarding_id_ex), .rs2_forwarding_i(rs2_forwarding_id_ex), @@ -1649,5 +1649,11 @@ module cva6 ); + //pragma translate_off + initial begin + assert (!(ariane_pkg::SUPERSCALAR && CVA6Cfg.EnableAccelerator)) + else $fatal(1, "Accelerator is not supported by superscalar pipeline"); + end + //pragma translate_on endmodule // ariane diff --git a/core/cva6_rvfi.sv b/core/cva6_rvfi.sv index bc4dd4d0a8..7fd2342c43 100644 --- a/core/cva6_rvfi.sv +++ b/core/cva6_rvfi.sv @@ -51,21 +51,21 @@ module cva6_rvfi localparam logic [63:0] SMODE_STATUS_READ_MASK = ariane_pkg::smode_status_read_mask(CVA6Cfg); logic flush; - logic issue_instr_ack; + logic [ariane_pkg::SUPERSCALAR:0] issue_instr_ack; logic [ariane_pkg::SUPERSCALAR:0] fetch_entry_valid; logic [ariane_pkg::SUPERSCALAR:0][31:0] instruction; logic [ariane_pkg::SUPERSCALAR:0] is_compressed; logic [ariane_pkg::SUPERSCALAR:0][31:0] truncated; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer; + logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer; logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] commit_pointer; logic flush_unissued_instr; - logic decoded_instr_valid; - logic decoded_instr_ack; + logic [ariane_pkg::SUPERSCALAR:0] decoded_instr_valid; + logic [ariane_pkg::SUPERSCALAR:0] decoded_instr_ack; - logic [CVA6Cfg.XLEN-1:0] rs1_forwarding; - logic [CVA6Cfg.XLEN-1:0] rs2_forwarding; + logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] rs1_forwarding; + logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] rs2_forwarding; logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.VLEN-1:0] commit_instr_pc; fu_op [CVA6Cfg.NrCommitPorts-1:0] commit_instr_op; @@ -73,7 +73,7 @@ module cva6_rvfi logic [CVA6Cfg.NrCommitPorts-1:0][REG_ADDR_SIZE-1:0] commit_instr_rs2; logic [CVA6Cfg.NrCommitPorts-1:0][REG_ADDR_SIZE-1:0] commit_instr_rd; logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] commit_instr_result; - logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.VLEN-1:0] commit_instr_valid; + logic [CVA6Cfg.NrCommitPorts-1:0] commit_instr_valid; logic [CVA6Cfg.XLEN-1:0] ex_commit_cause; logic ex_commit_valid; @@ -174,7 +174,11 @@ module cva6_rvfi issue_n = issue_q; took0 = 1'b0; - if (issue_instr_ack) issue_n[0].valid = 1'b0; + for (int unsigned i = 0; i <= ariane_pkg::SUPERSCALAR; i++) begin + if (issue_instr_ack[i]) begin + issue_n[i].valid = 1'b0; + end + end if (!issue_n[ariane_pkg::SUPERSCALAR].valid) begin issue_n[ariane_pkg::SUPERSCALAR].valid = fetch_entry_valid[0]; @@ -229,16 +233,18 @@ module cva6_rvfi always_comb begin : issue_fifo mem_n = mem_q; - if (decoded_instr_valid && decoded_instr_ack && !flush_unissued_instr) begin - mem_n[issue_pointer] = '{ - rs1_rdata: rs1_forwarding, - rs2_rdata: rs2_forwarding, - lsu_addr: '0, - lsu_rmask: '0, - lsu_wmask: '0, - lsu_wdata: '0, - instr: issue_q[0].instr - }; + for (int unsigned i = 0; i <= ariane_pkg::SUPERSCALAR; i++) begin + if (decoded_instr_valid[i] && decoded_instr_ack[i] && !flush_unissued_instr) begin + mem_n[issue_pointer[i]] = '{ + rs1_rdata: rs1_forwarding[i], + rs2_rdata: rs2_forwarding[i], + lsu_addr: '0, + lsu_rmask: '0, + lsu_wmask: '0, + lsu_wdata: '0, + instr: issue_q[i].instr + }; + end end if (lsu_rmask != 0) begin @@ -266,7 +272,7 @@ module cva6_rvfi always_ff @(posedge clk_i) begin for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin logic exception; - exception = commit_instr_valid[i][0] && ex_commit_valid; + exception = commit_instr_valid[i] && ex_commit_valid; rvfi_instr_o[i].valid <= (commit_ack[i] && !ex_commit_valid) || (exception && (ex_commit_cause == riscv::ENV_CALL_MMODE || ex_commit_cause == riscv::ENV_CALL_SMODE || @@ -350,7 +356,7 @@ module cva6_rvfi `CONNECT_RVFI_FULL(1'b1, mstatus, csr.mstatus_extended) bit [31:0] mstatush_q; - `CONNECT_RVFI_FULL(1'b1, mstatush, mstatush_q) + `CONNECT_RVFI_FULL(1'b1, mstatush, mstatush_q) `CONNECT_RVFI_FULL(1'b1, misa, IsaCode) diff --git a/core/cva6_rvfi_probes.sv b/core/cva6_rvfi_probes.sv index f0a9c5e21f..a150c320fb 100644 --- a/core/cva6_rvfi_probes.sv +++ b/core/cva6_rvfi_probes.sv @@ -28,15 +28,15 @@ module cva6_rvfi_probes input logic [SUPERSCALAR:0][31:0] instruction_i, input logic [SUPERSCALAR:0] is_compressed_i, - input logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer_i, + input logic [ SUPERSCALAR : 0][CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer_i, input logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] commit_pointer_i, input logic flush_unissued_instr_i, - input logic decoded_instr_valid_i, - input logic decoded_instr_ack_i, + input logic [SUPERSCALAR:0] decoded_instr_valid_i, + input logic [SUPERSCALAR:0] decoded_instr_ack_i, - input logic [CVA6Cfg.XLEN-1:0] rs1_forwarding_i, - input logic [CVA6Cfg.XLEN-1:0] rs2_forwarding_i, + input logic [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs1_forwarding_i, + input logic [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs2_forwarding_i, input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, input exception_t ex_commit_i, @@ -63,7 +63,7 @@ module cva6_rvfi_probes instr = '0; instr.flush = flush_i; - instr.issue_instr_ack = issue_instr_ack_i[0]; + instr.issue_instr_ack = issue_instr_ack_i; instr.fetch_entry_valid = fetch_entry_valid_i; instr.instruction = instruction_i; instr.is_compressed = is_compressed_i; diff --git a/core/ex_stage.sv b/core/ex_stage.sv index a07a8ec7e3..5a8f71f5ce 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -39,17 +39,17 @@ module ex_stage // Debug mode is enabled - CSR_REGFILE input logic debug_mode_i, // rs1 forwarding - ISSUE_STAGE - input logic [CVA6Cfg.VLEN-1:0] rs1_forwarding_i, + input logic [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs1_forwarding_i, // rs2 forwarding - ISSUE_STAGE - input logic [CVA6Cfg.VLEN-1:0] rs2_forwarding_i, + input logic [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs2_forwarding_i, // FU data useful to execute instruction - ISSUE_STAGE - input fu_data_t fu_data_i, + input fu_data_t [SUPERSCALAR:0] fu_data_i, // PC of the current instruction - ISSUE_STAGE input logic [CVA6Cfg.VLEN-1:0] pc_i, - // Report whether isntruction is compressed - ISSUE_STAGE + // Report whether instruction is compressed - ISSUE_STAGE input logic is_compressed_instr_i, // Report instruction encoding - ISSUE_STAGE - input logic [31:0] tinst_i, + input logic [SUPERSCALAR:0][31:0] tinst_i, // Fixed Latency Unit result - ISSUE_STAGE output logic [CVA6Cfg.XLEN-1:0] flu_result_o, // ID of the scoreboard entry at which a=to write back - ISSUE_STAGE @@ -61,9 +61,9 @@ module ex_stage // FLU result is valid - ISSUE_STAGE output logic flu_valid_o, // ALU instruction is valid - ISSUE_STAGE - input logic alu_valid_i, + input logic [SUPERSCALAR:0] alu_valid_i, // Branch unit instruction is valid - ISSUE_STAGE - input logic branch_valid_i, + input logic [SUPERSCALAR:0] branch_valid_i, // Information of branch prediction - ISSUE_STAGE input branchpredict_sbe_t branch_predict_i, // The branch engine uses the write back from the ALU - several_modules @@ -71,17 +71,17 @@ module ex_stage // Signaling that we resolved the branch - ISSUE_STAGE output logic resolve_branch_o, // CSR instruction is valid - ISSUE_STAGE - input logic csr_valid_i, + input logic [SUPERSCALAR:0] csr_valid_i, // CSR address to write - COMMIT_STAGE output logic [11:0] csr_addr_o, // CSR commit - COMMIT_STAGE input logic csr_commit_i, // MULT instruction is valid - ISSUE_STAGE - input logic mult_valid_i, + input logic [SUPERSCALAR:0] mult_valid_i, // LSU is ready - ISSUE_STAGE output logic lsu_ready_o, // LSU instruction is valid - ISSUE_STAGE - input logic lsu_valid_i, + input logic [SUPERSCALAR:0] lsu_valid_i, // Load result is valid - ISSUE_STAGE output logic load_valid_o, // Load result valid - ISSUE_STAGE @@ -113,7 +113,7 @@ module ex_stage // FU is ready - ISSUE_STAGE output logic fpu_ready_o, // FPU instruction is ready - ISSUE_STAGE - input logic fpu_valid_i, + input logic [SUPERSCALAR:0] fpu_valid_i, // FPU format - ISSUE_STAGE input logic [1:0] fpu_fmt_i, // FPU rm - ISSUE_STAGE @@ -131,7 +131,7 @@ module ex_stage // FPU exception - ISSUE_STAGE output exception_t fpu_exception_o, // CVXIF instruction is valid - ISSUE_STAGE - input logic x_valid_i, + input logic [SUPERSCALAR:0] x_valid_i, // CVXIF is ready - ISSUE_STAGE output logic x_ready_o, // undecoded instruction - ISSUE_STAGE @@ -263,18 +263,29 @@ module ex_stage logic [CVA6Cfg.TRANS_ID_BITS-1:0] mult_trans_id; logic mult_valid; - // 1. ALU (combinatorial) - // data silence operation - fu_data_t alu_data; - assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0; + logic [SUPERSCALAR:0] one_cycle_select; + assign one_cycle_select = alu_valid_i | branch_valid_i | csr_valid_i; + + fu_data_t one_cycle_data; + always_comb begin + // data silence operation + one_cycle_data = one_cycle_select[0] ? fu_data_i[0] : '0; + + if (SUPERSCALAR) begin + if (one_cycle_select[1]) begin + one_cycle_data = fu_data_i[1]; + end + end + end + // 1. ALU (combinatorial) alu #( .CVA6Cfg (CVA6Cfg), .fu_data_t(fu_data_t) ) alu_i ( .clk_i, .rst_ni, - .fu_data_i (alu_data), + .fu_data_i (one_cycle_data), .result_o (alu_result), .alu_branch_res_o(alu_branch_res) ); @@ -293,14 +304,12 @@ module ex_stage .rst_ni, .v_i, .debug_mode_i, - .fu_data_i, + .fu_data_i (one_cycle_data), .pc_i, .is_compressed_instr_i, - // any functional unit is valid, check that there is no accidental mis-predict - .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) , - .branch_valid_i, - .branch_comp_res_i(alu_branch_res), - .branch_result_o(branch_result), + .branch_valid_i (|branch_valid_i), + .branch_comp_res_i (alu_branch_res), + .branch_result_o (branch_result), .branch_predict_i, .resolved_branch_o, .resolve_branch_o, @@ -315,26 +324,26 @@ module ex_stage .clk_i, .rst_ni, .flush_i, - .fu_data_i, - .csr_valid_i, + .fu_data_i (one_cycle_data), + .csr_valid_i (|csr_valid_i), .csr_ready_o (csr_ready), .csr_result_o(csr_result), .csr_commit_i, .csr_addr_o ); - assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid; + assign flu_valid_o = |one_cycle_select | mult_valid; // result MUX always_comb begin // Branch result as default case flu_result_o = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, branch_result}; - flu_trans_id_o = fu_data_i.trans_id; + flu_trans_id_o = one_cycle_data.trans_id; // ALU result - if (alu_valid_i) begin + if (|alu_valid_i) begin flu_result_o = alu_result; // CSR result - end else if (csr_valid_i) begin + end else if (|csr_valid_i) begin flu_result_o = csr_result; end else if (mult_valid) begin flu_result_o = mult_result; @@ -350,7 +359,14 @@ module ex_stage // 4. Multiplication (Sequential) fu_data_t mult_data; // input silencing of multiplier - assign mult_data = mult_valid_i ? fu_data_i : '0; + always_comb begin + mult_data = mult_valid_i[0] ? fu_data_i[0] : '0; + if (SUPERSCALAR) begin + if (mult_valid_i[1]) begin + mult_data = fu_data_i[1]; + end + end + end mult #( .CVA6Cfg (CVA6Cfg), @@ -359,7 +375,7 @@ module ex_stage .clk_i, .rst_ni, .flush_i, - .mult_valid_i, + .mult_valid_i (|mult_valid_i), .fu_data_i (mult_data), .result_o (mult_result), .mult_valid_o (mult_valid), @@ -373,7 +389,14 @@ module ex_stage generate if (CVA6Cfg.FpPresent) begin : fpu_gen fu_data_t fpu_data; - assign fpu_data = fpu_valid_i ? fu_data_i : '0; + always_comb begin + fpu_data = fpu_valid_i[0] ? fu_data_i[0] : '0; + if (SUPERSCALAR) begin + if (fpu_valid_i[1]) begin + fpu_data = fu_data_i[1]; + end + end + end fpu_wrap #( .CVA6Cfg(CVA6Cfg), @@ -383,7 +406,7 @@ module ex_stage .clk_i, .rst_ni, .flush_i, - .fpu_valid_i, + .fpu_valid_i(|fpu_valid_i), .fpu_ready_o, .fu_data_i(fpu_data), .fpu_fmt_i, @@ -391,7 +414,7 @@ module ex_stage .fpu_frm_i, .fpu_prec_i, .fpu_trans_id_o, - .result_o (fpu_result_o), + .result_o(fpu_result_o), .fpu_valid_o, .fpu_exception_o ); @@ -408,8 +431,18 @@ module ex_stage // Load-Store Unit // ---------------- fu_data_t lsu_data; + logic [31:0] lsu_tinst; + always_comb begin + lsu_data = lsu_valid_i[0] ? fu_data_i[0] : '0; + lsu_tinst = tinst_i[0]; - assign lsu_data = lsu_valid_i ? fu_data_i : '0; + if (SUPERSCALAR) begin + if (lsu_valid_i[1]) begin + lsu_data = fu_data_i[1]; + lsu_tinst = tinst_i[1]; + end + end + end load_store_unit #( .CVA6Cfg (CVA6Cfg), @@ -430,7 +463,7 @@ module ex_stage .no_st_pending_o, .fu_data_i (lsu_data), .lsu_ready_o, - .lsu_valid_i, + .lsu_valid_i (|lsu_valid_i), .load_trans_id_o, .load_result_o, .load_valid_o, @@ -479,7 +512,7 @@ module ex_stage .amo_valid_commit_i, .amo_req_o, .amo_resp_i, - .tinst_i, + .tinst_i (lsu_tinst), .pmpcfg_i, .pmpaddr_i, .rvfi_lsu_ctrl_o, @@ -488,7 +521,15 @@ module ex_stage if (CVA6Cfg.CvxifEn) begin : gen_cvxif fu_data_t cvxif_data; - assign cvxif_data = x_valid_i ? fu_data_i : '0; + always_comb begin + cvxif_data = x_valid_i[0] ? fu_data_i[0] : '0; + if (SUPERSCALAR) begin + if (x_valid_i[1]) begin + cvxif_data = fu_data_i[1]; + end + end + end + cvxif_fu #( .CVA6Cfg(CVA6Cfg), .exception_t(exception_t), @@ -496,9 +537,9 @@ module ex_stage ) cvxif_fu_i ( .clk_i, .rst_ni, - .fu_data_i, + .fu_data_i (cvxif_data), .priv_lvl_i(ld_st_priv_lvl_i), - .x_valid_i, + .x_valid_i (|x_valid_i), .x_ready_o, .x_off_instr_i, .x_trans_id_o, @@ -525,15 +566,16 @@ module ex_stage current_instruction_is_hfence_vvma <= 1'b0; current_instruction_is_hfence_gvma <= 1'b0; end else begin + // TODO handle this with superscalar (issue only one instruction in this case?) if (flush_i) begin current_instruction_is_sfence_vma <= 1'b0; current_instruction_is_hfence_vvma <= 1'b0; current_instruction_is_hfence_gvma <= 1'b0; - end else if ((fu_data_i.operation == SFENCE_VMA && !v_i) && csr_valid_i) begin + end else if ((fu_data_i[0].operation == SFENCE_VMA && !v_i) && |csr_valid_i) begin current_instruction_is_sfence_vma <= 1'b1; - end else if (((fu_data_i.operation == SFENCE_VMA && v_i) || fu_data_i.operation == HFENCE_VVMA) && csr_valid_i) begin + end else if (((fu_data_i[0].operation == SFENCE_VMA && v_i) || fu_data_i[0].operation == HFENCE_VVMA) && |csr_valid_i) begin current_instruction_is_hfence_vvma <= 1'b1; - end else if ((fu_data_i.operation == HFENCE_GVMA) && csr_valid_i) begin + end else if ((fu_data_i[0].operation == HFENCE_GVMA) && |csr_valid_i) begin current_instruction_is_hfence_gvma <= 1'b1; end end @@ -547,7 +589,7 @@ module ex_stage end else begin if (flush_i) begin current_instruction_is_sfence_vma <= 1'b0; - end else if (fu_data_i.operation == SFENCE_VMA && csr_valid_i) begin + end else if (fu_data_i[0].operation == SFENCE_VMA && |csr_valid_i) begin current_instruction_is_sfence_vma <= 1'b1; end end @@ -562,7 +604,7 @@ module ex_stage vaddr_to_be_flushed <= '0; gpaddr_to_be_flushed <= '0; // if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen - end else if ((~(current_instruction_is_sfence_vma || current_instruction_is_hfence_vvma || current_instruction_is_hfence_gvma)) && (~((fu_data_i.operation == SFENCE_VMA || fu_data_i.operation == HFENCE_VVMA || fu_data_i.operation == HFENCE_GVMA ) && csr_valid_i))) begin + end else if ((~(current_instruction_is_sfence_vma || current_instruction_is_hfence_vvma || current_instruction_is_hfence_gvma)) && (~((fu_data_i[0].operation == SFENCE_VMA || fu_data_i[0].operation == HFENCE_VVMA || fu_data_i[0].operation == HFENCE_GVMA ) && |csr_valid_i))) begin vaddr_to_be_flushed <= rs1_forwarding_i; gpaddr_to_be_flushed <= {2'b00, rs1_forwarding_i[CVA6Cfg.GPLEN-1:2]}; asid_to_be_flushed <= rs2_forwarding_i[CVA6Cfg.ASID_WIDTH-1:0]; @@ -578,7 +620,7 @@ module ex_stage asid_to_be_flushed <= '0; vaddr_to_be_flushed <= '0; // if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen - end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin + end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i[0].operation == SFENCE_VMA) && |csr_valid_i))) begin vaddr_to_be_flushed <= rs1_forwarding_i; asid_to_be_flushed <= rs2_forwarding_i[CVA6Cfg.ASID_WIDTH-1:0]; end diff --git a/core/include/build_config_pkg.sv b/core/include/build_config_pkg.sv index e9e6fccfe3..da366e17fe 100644 --- a/core/include/build_config_pkg.sv +++ b/core/include/build_config_pkg.sv @@ -77,7 +77,7 @@ package build_config_pkg; cfg.XF16Vec = bit'(XF16Vec); cfg.XF16ALTVec = bit'(XF16ALTVec); cfg.XF8Vec = bit'(XF8Vec); - cfg.NrRgprPorts = unsigned'(2); + cfg.NrRgprPorts = unsigned'(2 << ariane_pkg::SUPERSCALAR); cfg.NrWbPorts = unsigned'(NrWbPorts); cfg.EnableAccelerator = bit'(EnableAccelerator); cfg.PerfCounterEn = CVA6Cfg.PerfCounterEn; diff --git a/core/include/rvfi_types.svh b/core/include/rvfi_types.svh index 9d275c7a4e..3d33d212e4 100644 --- a/core/include/rvfi_types.svh +++ b/core/include/rvfi_types.svh @@ -93,18 +93,18 @@ // RVFI PROBES `define RVFI_PROBES_INSTR_T(Cfg) struct packed { \ - logic [Cfg.TRANS_ID_BITS-1:0] issue_pointer; \ + logic [ariane_pkg::SUPERSCALAR:0][Cfg.TRANS_ID_BITS-1:0] issue_pointer; \ logic [Cfg.NrCommitPorts-1:0][Cfg.TRANS_ID_BITS-1:0] commit_pointer; \ logic flush_unissued_instr; \ - logic decoded_instr_valid; \ - logic decoded_instr_ack; \ + logic [ariane_pkg::SUPERSCALAR:0] decoded_instr_valid; \ + logic [ariane_pkg::SUPERSCALAR:0] decoded_instr_ack; \ logic flush; \ - logic issue_instr_ack; \ + logic [ariane_pkg::SUPERSCALAR:0] issue_instr_ack; \ logic [ariane_pkg::SUPERSCALAR:0] fetch_entry_valid; \ logic [ariane_pkg::SUPERSCALAR:0][31:0] instruction; \ logic [ariane_pkg::SUPERSCALAR:0] is_compressed; \ - logic [Cfg.XLEN-1:0] rs1_forwarding; \ - logic [Cfg.XLEN-1:0] rs2_forwarding; \ + logic [ariane_pkg::SUPERSCALAR:0][Cfg.VLEN-1:0] rs1_forwarding; \ + logic [ariane_pkg::SUPERSCALAR:0][Cfg.VLEN-1:0] rs2_forwarding; \ logic [Cfg.NrCommitPorts-1:0][Cfg.VLEN-1:0] commit_instr_pc; \ ariane_pkg::fu_op [Cfg.NrCommitPorts-1:0] commit_instr_op; \ logic [Cfg.NrCommitPorts-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] commit_instr_rs1; \ @@ -119,7 +119,7 @@ ariane_pkg::fu_t lsu_ctrl_fu; \ logic [(Cfg.XLEN/8)-1:0] lsu_ctrl_be; \ logic [Cfg.TRANS_ID_BITS-1:0] lsu_ctrl_trans_id; \ - logic [((Cfg.CvxifEn || Cfg.RVV) ? 5 : 4)-1:0][Cfg.XLEN-1:0] wbdata; \ + logic [Cfg.NrWbPorts-1:0][Cfg.XLEN-1:0] wbdata; \ logic [Cfg.NrCommitPorts-1:0] commit_ack; \ logic [Cfg.PLEN-1:0] mem_paddr; \ logic debug_mode; \ @@ -161,4 +161,4 @@ logic [15:0][Cfg.PLEN-3:0] pmpaddr_q; \ } -`endif // RVFI_TYPES_SVH +`endif // RVFI_TYPES_SVH diff --git a/core/issue_read_operands.sv b/core/issue_read_operands.sv index 645599ae44..eef1b818f3 100644 --- a/core/issue_read_operands.sv +++ b/core/issue_read_operands.sv @@ -32,42 +32,42 @@ module issue_read_operands // Stall inserted by Acc dispatcher - ACC_DISPATCHER input logic stall_i, // TO_BE_COMPLETED - TO_BE_COMPLETED - input scoreboard_entry_t issue_instr_i, + input scoreboard_entry_t [SUPERSCALAR:0] issue_instr_i, // TO_BE_COMPLETED - TO_BE_COMPLETED - input logic [31:0] orig_instr_i, + input logic [SUPERSCALAR:0][31:0] orig_instr_i, // TO_BE_COMPLETED - TO_BE_COMPLETED - input logic issue_instr_valid_i, + input logic [SUPERSCALAR:0] issue_instr_valid_i, // Issue stage acknowledge - TO_BE_COMPLETED - output logic issue_ack_o, + output logic [SUPERSCALAR:0] issue_ack_o, // rs1 operand address - scoreboard - output logic [REG_ADDR_SIZE-1:0] rs1_o, + output logic [SUPERSCALAR:0][REG_ADDR_SIZE-1:0] rs1_o, // rs1 operand - scoreboard - input logic [CVA6Cfg.XLEN-1:0] rs1_i, + input logic [SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] rs1_i, // rs1 operand is valid - scoreboard - input logic rs1_valid_i, + input logic [SUPERSCALAR:0] rs1_valid_i, // rs2 operand address - scoreboard - output logic [REG_ADDR_SIZE-1:0] rs2_o, + output logic [SUPERSCALAR:0][REG_ADDR_SIZE-1:0] rs2_o, // rs2 operand - scoreboard - input logic [CVA6Cfg.XLEN-1:0] rs2_i, + input logic [SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] rs2_i, // rs2 operand is valid - scoreboard - input logic rs2_valid_i, + input logic [SUPERSCALAR:0] rs2_valid_i, // rs3 operand address - scoreboard - output logic [REG_ADDR_SIZE-1:0] rs3_o, + output logic [SUPERSCALAR:0][REG_ADDR_SIZE-1:0] rs3_o, // rs3 operand - scoreboard - input rs3_len_t rs3_i, + input rs3_len_t [SUPERSCALAR:0] rs3_i, // rs3 operand is valid - scoreboard - input logic rs3_valid_i, + input logic [SUPERSCALAR:0] rs3_valid_i, // get clobber input // TO_BE_COMPLETED - TO_BE_COMPLETED input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_i, // TO_BE_COMPLETED - TO_BE_COMPLETED input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_i, // TO_BE_COMPLETED - TO_BE_COMPLETED - output fu_data_t fu_data_o, + output fu_data_t [SUPERSCALAR:0] fu_data_o, // Unregistered version of fu_data_o.operanda - TO_BE_COMPLETED - output logic [CVA6Cfg.XLEN-1:0] rs1_forwarding_o, + output logic [SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] rs1_forwarding_o, // Unregistered version of fu_data_o.operandb - TO_BE_COMPLETED - output logic [CVA6Cfg.XLEN-1:0] rs2_forwarding_o, + output logic [SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] rs2_forwarding_o, // Instruction pc - TO_BE_COMPLETED output logic [CVA6Cfg.VLEN-1:0] pc_o, // Is compressed instruction - TO_BE_COMPLETED @@ -75,31 +75,31 @@ module issue_read_operands // Fixed Latency Unit ready to accept new request - TO_BE_COMPLETED input logic flu_ready_i, // ALU output is valid - TO_BE_COMPLETED - output logic alu_valid_o, + output logic [SUPERSCALAR:0] alu_valid_o, // Branch instruction is valid - TO_BE_COMPLETED - output logic branch_valid_o, + output logic [SUPERSCALAR:0] branch_valid_o, // Transformed instruction - TO_BE_COMPLETED - output logic [31:0] tinst_o, + output logic [SUPERSCALAR:0][31:0] tinst_o, // TO_BE_COMPLETED - TO_BE_COMPLETED output branchpredict_sbe_t branch_predict_o, // Load Store Unit is ready - TO_BE_COMPLETED input logic lsu_ready_i, // Load Store Unit result is valid - TO_BE_COMPLETED - output logic lsu_valid_o, + output logic [SUPERSCALAR:0] lsu_valid_o, // Mult result is valid - TO_BE_COMPLETED - output logic mult_valid_o, + output logic [SUPERSCALAR:0] mult_valid_o, // FPU is ready - TO_BE_COMPLETED input logic fpu_ready_i, // FPU result is valid - TO_BE_COMPLETED - output logic fpu_valid_o, + output logic [SUPERSCALAR:0] fpu_valid_o, // FPU fmt field from instruction - TO_BE_COMPLETED output logic [1:0] fpu_fmt_o, // FPU rm field from isntruction - TO_BE_COMPLETED output logic [2:0] fpu_rm_o, // CSR result is valid - TO_BE_COMPLETED - output logic csr_valid_o, + output logic [SUPERSCALAR:0] csr_valid_o, // CVXIF result is valid - TO_BE_COMPLETED - output logic cvxif_valid_o, + output logic [SUPERSCALAR:0] cvxif_valid_o, // CVXIF is ready - TO_BE_COMPLETED input logic cvxif_ready_i, // CVXIF offloaded instruction - TO_BE_COMPLETED @@ -116,51 +116,53 @@ module issue_read_operands // Stall signal, we do not want to fetch any more entries - TO_BE_COMPLETED output logic stall_issue_o ); - logic stall; - logic fu_busy; // functional unit is busy - logic [CVA6Cfg.XLEN-1:0] operand_a_regfile, operand_b_regfile; // operands coming from regfile - rs3_len_t - operand_c_regfile, - operand_c_fpr, - operand_c_gpr; // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 + + localparam OPERANDS_PER_INSTR = CVA6Cfg.NrRgprPorts >> SUPERSCALAR; + + typedef struct packed { + logic none, load, store, alu, ctrl_flow, mult, csr, fpu, fpu_vec, cvxif, accel; + } fus_busy_t; + + logic [SUPERSCALAR:0] stall; + logic [SUPERSCALAR:0] fu_busy; // functional unit is busy + fus_busy_t [SUPERSCALAR:0] fus_busy; // which functional units are considered busy + // operands coming from regfile + logic [SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] operand_a_regfile, operand_b_regfile; + // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 + rs3_len_t [SUPERSCALAR:0] operand_c_regfile, operand_c_gpr; + rs3_len_t operand_c_fpr; // output flipflop (ID <-> EX) - logic [CVA6Cfg.XLEN-1:0] - operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q, imm_forward_rs3; - - logic alu_valid_q; - logic mult_valid_q; - logic fpu_valid_q; - logic [ 1:0] fpu_fmt_q; - logic [ 2:0] fpu_rm_q; - logic lsu_valid_q; - logic csr_valid_q; - logic branch_valid_q; - logic cvxif_valid_q; - logic [31:0] cvxif_off_instr_q; - - logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; - fu_op operator_n, operator_q; // operation to perform - fu_t fu_n, fu_q; // functional unit to use - logic [31:0] tinst_n, tinst_q; // transformed instruction + fu_data_t [SUPERSCALAR:0] fu_data_n, fu_data_q; + logic [CVA6Cfg.XLEN-1:0] imm_forward_rs3; + + logic [ SUPERSCALAR:0] alu_valid_q; + logic [ SUPERSCALAR:0] mult_valid_q; + logic [ SUPERSCALAR:0] fpu_valid_q; + logic [ 1:0] fpu_fmt_q; + logic [ 2:0] fpu_rm_q; + logic [ SUPERSCALAR:0] lsu_valid_q; + logic [ SUPERSCALAR:0] csr_valid_q; + logic [ SUPERSCALAR:0] branch_valid_q; + logic [ SUPERSCALAR:0] cvxif_valid_q; + logic [ 31:0] cvxif_off_instr_q; + + logic [SUPERSCALAR:0][31:0] tinst_n, tinst_q; // transformed instruction // forwarding signals - logic forward_rs1, forward_rs2, forward_rs3; + logic [SUPERSCALAR:0] forward_rs1, forward_rs2, forward_rs3; // original instruction riscv::instruction_t orig_instr; - assign orig_instr = riscv::instruction_t'(orig_instr_i); + assign orig_instr = riscv::instruction_t'(orig_instr_i[0]); // ID <-> EX registers - assign rs1_forwarding_o = operand_a_n[CVA6Cfg.VLEN-1:0]; //forwarding or unregistered rs1 value - assign rs2_forwarding_o = operand_b_n[CVA6Cfg.VLEN-1:0]; //forwarding or unregistered rs2 value + for (genvar i = 0; i <= SUPERSCALAR; i++) begin + assign rs1_forwarding_o[i] = fu_data_n[i].operand_a[CVA6Cfg.VLEN-1:0]; //forwarding or unregistered rs1 value + assign rs2_forwarding_o[i] = fu_data_n[i].operand_b[CVA6Cfg.VLEN-1:0]; //forwarding or unregistered rs2 value + end - assign fu_data_o.operand_a = operand_a_q; - assign fu_data_o.operand_b = operand_b_q; - assign fu_data_o.fu = fu_q; - assign fu_data_o.operation = operator_q; - assign fu_data_o.trans_id = trans_id_q; - assign fu_data_o.imm = imm_q; + assign fu_data_o = fu_data_q; assign alu_valid_o = alu_valid_q; assign branch_valid_o = branch_valid_q; assign lsu_valid_o = lsu_valid_q; @@ -171,28 +173,97 @@ module issue_read_operands assign fpu_rm_o = fpu_rm_q; assign cvxif_valid_o = CVA6Cfg.CvxifEn ? cvxif_valid_q : '0; assign cvxif_off_instr_o = CVA6Cfg.CvxifEn ? cvxif_off_instr_q : '0; - assign stall_issue_o = stall; + assign stall_issue_o = stall[0]; assign tinst_o = CVA6Cfg.RVH ? tinst_q : '0; // --------------- // Issue Stage // --------------- + always_comb begin : structural_hazards + fus_busy = '0; + + if (!flu_ready_i) begin + fus_busy[0].alu = 1'b1; + fus_busy[0].ctrl_flow = 1'b1; + fus_busy[0].csr = 1'b1; + fus_busy[0].mult = 1'b1; + end + + // after a multiplication was issued we can only issue another multiplication + // otherwise we will get contentions on the fixed latency bus + if (mult_valid_q) begin + fus_busy[0].alu = 1'b1; + fus_busy[0].ctrl_flow = 1'b1; + fus_busy[0].csr = 1'b1; + end + + if (CVA6Cfg.FpPresent && !fpu_ready_i) begin + fus_busy[0].fpu = 1'b1; + fus_busy[0].fpu_vec = 1'b1; + end + + if (!lsu_ready_i) begin + fus_busy[0].load = 1'b1; + fus_busy[0].store = 1'b1; + end + + if (!cvxif_ready_i) begin + fus_busy[0].cvxif = 1'b1; + end + + if (SUPERSCALAR) begin + fus_busy[1] = fus_busy[0]; + + unique case (issue_instr_i[0].fu) + NONE: fus_busy[1].none = 1'b1; + CTRL_FLOW: begin + // There are no branch misses on a JAL + if (issue_instr_i[0].op == ariane_pkg::ADD) begin + fus_busy[1].alu = 1'b1; + fus_busy[1].ctrl_flow = 1'b1; + fus_busy[1].csr = 1'b1; + end else begin + // Control hazard + fus_busy[1] = '1; + end + end + ALU, CSR: begin + fus_busy[1].alu = 1'b1; + fus_busy[1].ctrl_flow = 1'b1; + fus_busy[1].csr = 1'b1; + end + MULT: fus_busy[1].mult = 1'b1; + FPU, FPU_VEC: begin + fus_busy[1].fpu = 1'b1; + fus_busy[1].fpu_vec = 1'b1; + end + LOAD, STORE: begin + fus_busy[1].load = 1'b1; + fus_busy[1].store = 1'b1; + end + CVXIF: fus_busy[1].cvxif = 1'b1; + endcase + end + end + // select the right busy signal // this obviously depends on the functional unit we need - always_comb begin : unit_busy - unique case (issue_instr_i.fu) - NONE: fu_busy = 1'b0; - ALU, CTRL_FLOW, CSR, MULT: fu_busy = ~flu_ready_i; - LOAD, STORE: fu_busy = ~lsu_ready_i; - CVXIF: fu_busy = ~cvxif_ready_i; - default: begin - if (CVA6Cfg.FpPresent && (issue_instr_i.fu == FPU || issue_instr_i.fu == FPU_VEC)) begin - fu_busy = ~fpu_ready_i; - end else begin - fu_busy = 1'b0; - end - end - endcase + for (genvar i = 0; i <= ariane_pkg::SUPERSCALAR; i++) begin + always_comb begin + unique case (issue_instr_i[i].fu) + NONE: fu_busy[i] = fus_busy[i].none; + ALU: fu_busy[i] = fus_busy[i].alu; + CTRL_FLOW: fu_busy[i] = fus_busy[i].ctrl_flow; + CSR: fu_busy[i] = fus_busy[i].csr; + MULT: fu_busy[i] = fus_busy[i].mult; + FPU: fu_busy[i] = fus_busy[i].fpu; + FPU_VEC: fu_busy[i] = fus_busy[i].fpu_vec; + LOAD: fu_busy[i] = fus_busy[i].load; + STORE: fu_busy[i] = fus_busy[i].store; + CVXIF: fu_busy[i] = fus_busy[i].cvxif; + default: fu_busy[i] = 1'b0; + endcase + end end // --------------- @@ -201,126 +272,160 @@ module issue_read_operands // check that all operands are available, otherwise stall // forward corresponding register always_comb begin : operands_available - stall = stall_i; + stall = '{default: stall_i}; // operand forwarding signals - forward_rs1 = 1'b0; - forward_rs2 = 1'b0; - forward_rs3 = 1'b0; // FPR only - // poll the scoreboard for those values - rs1_o = issue_instr_i.rs1; - rs2_o = issue_instr_i.rs2; - rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field - - // 0. check that we are not using the zimm type in RS1 - // as this is an immediate we do not have to wait on anything here - // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) - // 2. poll the scoreboard - if (!issue_instr_i.use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr( - issue_instr_i.op - )) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE : - rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin - // check if the clobbering instruction is not a CSR instruction, CSR instructions can only - // be fetched through the register file since they can't be forwarded - // if the operand is available, forward it. CSRs don't write to/from FPR - if (rs1_valid_i && (CVA6Cfg.FpPresent && is_rs1_fpr( - issue_instr_i.op - ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs1] != CSR) || - (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin - forward_rs1 = 1'b1; - end else begin // the operand is not available -> stall - stall = 1'b1; + forward_rs1 = '0; + forward_rs2 = '0; + forward_rs3 = '0; // FPR only + + for (int unsigned i = 0; i <= SUPERSCALAR; i++) begin + // poll the scoreboard for those values + rs1_o[i] = issue_instr_i[i].rs1; + rs2_o[i] = issue_instr_i[i].rs2; + rs3_o[i] = issue_instr_i[i].result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field + + // 0. check that we are not using the zimm type in RS1 + // as this is an immediate we do not have to wait on anything here + // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) + // 2. poll the scoreboard + if (!issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i[i].op + )) ? rd_clobber_fpr_i[issue_instr_i[i].rs1] != NONE : + rd_clobber_gpr_i[issue_instr_i[i].rs1] != NONE)) begin + // check if the clobbering instruction is not a CSR instruction, CSR instructions can only + // be fetched through the register file since they can't be forwarded + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs1_valid_i[i] && (CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i[i].op + ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i[i].rs1] != CSR) || + (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin + forward_rs1[i] = 1'b1; + end else begin // the operand is not available -> stall + stall[i] = 1'b1; + end end - end - if ((CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i.op - )) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE : - rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin - // if the operand is available, forward it. CSRs don't write to/from FPR - if (rs2_valid_i && (CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i.op - ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs2] != CSR) || - (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin - forward_rs2 = 1'b1; - end else begin // the operand is not available -> stall - stall = 1'b1; + if ((CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i[i].op + )) ? rd_clobber_fpr_i[issue_instr_i[i].rs2] != NONE : + rd_clobber_gpr_i[issue_instr_i[i].rs2] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs2_valid_i[i] && (CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i[i].op + ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i[i].rs2] != CSR) || + (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin + forward_rs2[i] = 1'b1; + end else begin // the operand is not available -> stall + stall[i] = 1'b1; + end + end + + // Only check clobbered gpr for OFFLOADED instruction + if ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i[i].op + )) ? rd_clobber_fpr_i[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : + issue_instr_i[i].op == OFFLOAD && CVA6Cfg.NrRgprPorts == 3 ? + rd_clobber_gpr_i[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0) begin + // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check + if (rs3_valid_i[i]) begin + forward_rs3[i] = 1'b1; + end else begin // the operand is not available -> stall + stall[i] = 1'b1; + end end end - // Only check clobbered gpr for OFFLOADED instruction - if ((CVA6Cfg.FpPresent && is_imm_fpr( - issue_instr_i.op - )) ? rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE : - issue_instr_i.op == OFFLOAD && CVA6Cfg.NrRgprPorts == 3 ? - rd_clobber_gpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE : 0) begin - // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check - if (rs3_valid_i) begin - forward_rs3 = 1'b1; - end else begin // the operand is not available -> stall - stall = 1'b1; + if (SUPERSCALAR) begin + if (!issue_instr_i[1].use_zimm && (!CVA6Cfg.FpPresent || (is_rs1_fpr( + issue_instr_i[1].op + ) == is_rd_fpr( + issue_instr_i[0].op + ))) && issue_instr_i[1].rs1 == issue_instr_i[0].rd && issue_instr_i[1].rs1 != '0) begin + stall[1] = 1'b1; + end + + if ((!CVA6Cfg.FpPresent || (is_rs2_fpr( + issue_instr_i[1].op + ) == is_rd_fpr( + issue_instr_i[0].op + ))) && issue_instr_i[1].rs2 == issue_instr_i[0].rd && issue_instr_i[1].rs2 != '0) begin + stall[1] = 1'b1; + end + + // Only check clobbered gpr for OFFLOADED instruction + if ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i[1].op + )) ? is_rd_fpr( + issue_instr_i[0].op + ) && issue_instr_i[0].rd == issue_instr_i[1].result[REG_ADDR_SIZE-1:0] : + issue_instr_i[1].op == OFFLOAD && CVA6Cfg.NrRgprPorts == 3 ? + issue_instr_i[0].rd == issue_instr_i[1].result[REG_ADDR_SIZE-1:0] : 1'b0) begin + stall[1] = 1'b1; end end end // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_rs3 - assign imm_forward_rs3 = rs3_i; + assign imm_forward_rs3 = rs3_i[0]; end else begin : gen_fp_rs3 - assign imm_forward_rs3 = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_i}; + assign imm_forward_rs3 = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_i[0]}; end // Forwarding/Output MUX - always_comb begin : forwarding_operand_select - // default is regfiles (gpr or fpr) - operand_a_n = operand_a_regfile; - operand_b_n = operand_b_regfile; - // immediates are the third operands in the store case - // for FP operations, the imm field can also be the third operand from the regfile - if (CVA6Cfg.NrRgprPorts == 3) begin - imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? - {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : - issue_instr_i.op == OFFLOAD ? operand_c_regfile : issue_instr_i.result; - end else begin - imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? - {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : issue_instr_i.result; - end - trans_id_n = issue_instr_i.trans_id; - fu_n = issue_instr_i.fu; - operator_n = issue_instr_i.op; - if (CVA6Cfg.RVH) begin - tinst_n = issue_instr_i.ex.tinst; - end - // or should we forward - if (forward_rs1) begin - operand_a_n = rs1_i; - end - - if (forward_rs2) begin - operand_b_n = rs2_i; - end + for (genvar i = 0; i <= SUPERSCALAR; i++) begin + always_comb begin : forwarding_operand_select + // default is regfiles (gpr or fpr) + fu_data_n[i].operand_a = operand_a_regfile[i]; + fu_data_n[i].operand_b = operand_b_regfile[i]; + + // immediates are the third operands in the store case + // for FP operations, the imm field can also be the third operand from the regfile + if (CVA6Cfg.NrRgprPorts == 3) begin + fu_data_n[i].imm = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i[i].op)) ? + {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile[i]} : + issue_instr_i[i].op == OFFLOAD ? operand_c_regfile[i] : issue_instr_i[i].result; + end else begin + fu_data_n[i].imm = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i[i].op)) ? + {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile[i]} : issue_instr_i[i].result; + end + fu_data_n[i].trans_id = issue_instr_i[i].trans_id; + fu_data_n[i].fu = issue_instr_i[i].fu; + fu_data_n[i].operation = issue_instr_i[i].op; + if (CVA6Cfg.RVH) begin + tinst_n[i] = issue_instr_i[i].ex.tinst; + end - if (CVA6Cfg.FpPresent && forward_rs3) begin - imm_n = imm_forward_rs3; - end + // or should we forward + if (forward_rs1[i]) begin + fu_data_n[i].operand_a = rs1_i[i]; + end + if (forward_rs2[i]) begin + fu_data_n[i].operand_b = rs2_i[i]; + end + if (CVA6Cfg.FpPresent && forward_rs3[i]) begin + fu_data_n[i].imm = imm_forward_rs3; + end - // use the PC as operand a - if (issue_instr_i.use_pc) begin - operand_a_n = { - {CVA6Cfg.XLEN - CVA6Cfg.VLEN{issue_instr_i.pc[CVA6Cfg.VLEN-1]}}, issue_instr_i.pc - }; - end + // use the PC as operand a + if (issue_instr_i[i].use_pc) begin + fu_data_n[i].operand_a = { + {CVA6Cfg.XLEN - CVA6Cfg.VLEN{issue_instr_i[i].pc[CVA6Cfg.VLEN-1]}}, issue_instr_i[i].pc + }; + end - // use the zimm as operand a - if (issue_instr_i.use_zimm) begin - // zero extend operand a - operand_a_n = {{CVA6Cfg.XLEN - 5{1'b0}}, issue_instr_i.rs1[4:0]}; - end - // or is it an immediate (including PC), this is not the case for a store, control flow, and accelerator instructions - // also make sure operand B is not already used as an FP operand - if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && (issue_instr_i.fu != ACCEL) && !(CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i.op - ))) begin - operand_b_n = issue_instr_i.result; + // use the zimm as operand a + if (issue_instr_i[i].use_zimm) begin + // zero extend operand a + fu_data_n[i].operand_a = {{CVA6Cfg.XLEN - 5{1'b0}}, issue_instr_i[i].rs1[4:0]}; + end + // or is it an immediate (including PC), this is not the case for a store, control flow, and accelerator instructions + // also make sure operand B is not already used as an FP operand + if (issue_instr_i[i].use_imm && (issue_instr_i[i].fu != STORE) && (issue_instr_i[i].fu != CTRL_FLOW) && (issue_instr_i[i].fu != ACCEL) && !(CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i[i].op + ))) begin + fu_data_n[i].operand_b = issue_instr_i[i].result; + end end end @@ -328,65 +433,67 @@ module issue_read_operands // This needs to be like this to make verilator happy. I know its ugly. always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin - alu_valid_q <= 1'b0; - lsu_valid_q <= 1'b0; - mult_valid_q <= 1'b0; - fpu_valid_q <= 1'b0; - fpu_fmt_q <= 2'b0; - fpu_rm_q <= 3'b0; - csr_valid_q <= 1'b0; - branch_valid_q <= 1'b0; + alu_valid_q <= '0; + lsu_valid_q <= '0; + mult_valid_q <= '0; + fpu_valid_q <= '0; + fpu_fmt_q <= '0; + fpu_rm_q <= '0; + csr_valid_q <= '0; + branch_valid_q <= '0; end else begin - alu_valid_q <= 1'b0; - lsu_valid_q <= 1'b0; - mult_valid_q <= 1'b0; - fpu_valid_q <= 1'b0; - fpu_fmt_q <= 2'b0; - fpu_rm_q <= 3'b0; - csr_valid_q <= 1'b0; - branch_valid_q <= 1'b0; + alu_valid_q <= '0; + lsu_valid_q <= '0; + mult_valid_q <= '0; + fpu_valid_q <= '0; + fpu_fmt_q <= '0; + fpu_rm_q <= '0; + csr_valid_q <= '0; + branch_valid_q <= '0; // Exception pass through: // If an exception has occurred simply pass it through // we do not want to issue this instruction - if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin - case (issue_instr_i.fu) - ALU: begin - alu_valid_q <= 1'b1; - end - CTRL_FLOW: begin - branch_valid_q <= 1'b1; - end - MULT: begin - mult_valid_q <= 1'b1; - end - LOAD, STORE: begin - lsu_valid_q <= 1'b1; - end - CSR: begin - csr_valid_q <= 1'b1; - end - default: begin - if (issue_instr_i.fu == FPU && CVA6Cfg.FpPresent) begin - fpu_valid_q <= 1'b1; - fpu_fmt_q <= orig_instr.rftype.fmt; // fmt bits from instruction - fpu_rm_q <= orig_instr.rftype.rm; // rm bits from instruction - end else if (issue_instr_i.fu == FPU_VEC && CVA6Cfg.FpPresent) begin - fpu_valid_q <= 1'b1; - fpu_fmt_q <= orig_instr.rvftype.vfmt; // vfmt bits from instruction - fpu_rm_q <= {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction + for (int unsigned i = 0; i <= SUPERSCALAR; i++) begin + if (!issue_instr_i[i].ex.valid && issue_instr_valid_i[i] && issue_ack_o[i]) begin + case (issue_instr_i[i].fu) + ALU: begin + alu_valid_q[i] <= 1'b1; end - end - endcase + CTRL_FLOW: begin + branch_valid_q[i] <= 1'b1; + end + MULT: begin + mult_valid_q[i] <= 1'b1; + end + LOAD, STORE: begin + lsu_valid_q[i] <= 1'b1; + end + CSR: begin + csr_valid_q[i] <= 1'b1; + end + default: begin + if (issue_instr_i[i].fu == FPU && CVA6Cfg.FpPresent) begin + fpu_valid_q[i] <= 1'b1; + fpu_fmt_q <= orig_instr.rftype.fmt; // fmt bits from instruction + fpu_rm_q <= orig_instr.rftype.rm; // rm bits from instruction + end else if (issue_instr_i[i].fu == FPU_VEC && CVA6Cfg.FpPresent) begin + fpu_valid_q[i] <= 1'b1; + fpu_fmt_q <= orig_instr.rvftype.vfmt; // vfmt bits from instruction + fpu_rm_q <= {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction + end + end + endcase + end end // if we got a flush request, de-assert the valid flag, otherwise we will start this // functional unit with the wrong inputs if (flush_i) begin - alu_valid_q <= 1'b0; - lsu_valid_q <= 1'b0; - mult_valid_q <= 1'b0; - fpu_valid_q <= 1'b0; - csr_valid_q <= 1'b0; - branch_valid_q <= 1'b0; + alu_valid_q <= '0; + lsu_valid_q <= '0; + mult_valid_q <= '0; + fpu_valid_q <= '0; + csr_valid_q <= '0; + branch_valid_q <= '0; end end end @@ -394,22 +501,24 @@ module issue_read_operands if (CVA6Cfg.CvxifEn) begin always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin - cvxif_valid_q <= 1'b0; + cvxif_valid_q <= '0; cvxif_off_instr_q <= 32'b0; end else begin - cvxif_valid_q <= 1'b0; + cvxif_valid_q <= '0; cvxif_off_instr_q <= 32'b0; - if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin - case (issue_instr_i.fu) - CVXIF: begin - cvxif_valid_q <= 1'b1; - cvxif_off_instr_q <= orig_instr; - end - default: ; - endcase + for (int unsigned i = 0; i <= SUPERSCALAR; i++) begin + if (!issue_instr_i[i].ex.valid && issue_instr_valid_i[i] && issue_ack_o[i]) begin + case (issue_instr_i[i].fu) + CVXIF: begin + cvxif_valid_q[i] <= 1'b1; + cvxif_off_instr_q <= orig_instr; + end + default: ; + endcase + end end if (flush_i) begin - cvxif_valid_q <= 1'b0; + cvxif_valid_q <= '0; cvxif_off_instr_q <= 32'b0; end end @@ -420,51 +529,59 @@ module issue_read_operands // destination register. // We also need to check if there is an unresolved branch in the scoreboard. always_comb begin : issue_scoreboard - // default assignment - issue_ack_o = 1'b0; - // check that we didn't stall, that the instruction we got is valid - // and that the functional unit we need is not busy - if (issue_instr_valid_i) begin - // check that the corresponding functional unit is not busy - if (!stall && !fu_busy) begin - // ----------------------------------------- - // WAW - Write After Write Dependency Check - // ----------------------------------------- - // no other instruction has the same destination register -> issue the instruction - if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - issue_instr_i.op - )) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) : - (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin - issue_ack_o = 1'b1; + for (int unsigned i = 0; i <= SUPERSCALAR; i++) begin + // default assignment + issue_ack_o[i] = 1'b0; + // check that we didn't stall, that the instruction we got is valid + // and that the functional unit we need is not busy + if (issue_instr_valid_i[i] && !fu_busy[i]) begin + // check that the corresponding functional unit is not busy + if (!stall[i]) begin + // ----------------------------------------- + // WAW - Write After Write Dependency Check + // ----------------------------------------- + // no other instruction has the same destination register -> issue the instruction + if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + issue_instr_i[i].op + )) ? (rd_clobber_fpr_i[issue_instr_i[i].rd] == NONE) : + (rd_clobber_gpr_i[issue_instr_i[i].rd] == NONE)) begin + issue_ack_o[i] = 1'b1; + end + // or check that the target destination register will be written in this cycle by the + // commit stage + for (int unsigned c = 0; c < CVA6Cfg.NrCommitPorts; c++) begin + if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + issue_instr_i[i].op + )) ? (we_fpr_i[c] && waddr_i[c] == issue_instr_i[i].rd[4:0]) : + (we_gpr_i[c] && waddr_i[c] == issue_instr_i[i].rd[4:0])) begin + issue_ack_o[i] = 1'b1; + end + end + if (i > 0) begin + if ((issue_instr_i[i].rd[4:0] == issue_instr_i[i-1].rd[4:0]) && (issue_instr_i[i].rd[4:0] != '0)) begin + issue_ack_o[i] = 1'b0; + end + end end - // or check that the target destination register will be written in this cycle by the - // commit stage - for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) - if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - issue_instr_i.op - )) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0]) : - (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0])) begin - issue_ack_o = 1'b1; + // we can also issue the instruction under the following two circumstances: + // we can do this even if we are stalled or no functional unit is ready (as we don't need one) + // the decoder needs to make sure that the instruction is marked as valid when it does not + // need any functional unit or if an exception occurred previous to the execute stage. + // 1. we already got an exception + if (issue_instr_i[i].ex.valid) begin + issue_ack_o[i] = 1'b1; + end + // 2. it is an instruction which does not need any functional unit + if (issue_instr_i[i].fu == NONE) begin + issue_ack_o[i] = 1'b1; end - - end - // we can also issue the instruction under the following two circumstances: - // we can do this even if we are stalled or no functional unit is ready (as we don't need one) - // the decoder needs to make sure that the instruction is marked as valid when it does not - // need any functional unit or if an exception occurred previous to the execute stage. - // 1. we already got an exception - if (issue_instr_i.ex.valid) begin - issue_ack_o = 1'b1; - end - // 2. it is an instruction which does not need any functional unit - if (issue_instr_i.fu == NONE) begin - issue_ack_o = 1'b1; end end - // after a multiplication was issued we can only issue another multiplication - // otherwise we will get contentions on the fixed latency bus - if (mult_valid_q && issue_instr_i.fu inside {ALU, CTRL_FLOW, CSR}) begin - issue_ack_o = 1'b0; + + if (SUPERSCALAR) begin + if (!issue_ack_o[0]) begin + issue_ack_o[1] = 1'b0; + end end end @@ -479,10 +596,12 @@ module issue_read_operands logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_pack; logic [CVA6Cfg.NrCommitPorts-1:0] we_pack; - if (CVA6Cfg.NrRgprPorts == 3) begin : gen_rs3 - assign raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; - end else begin : gen_no_rs3 - assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + for (genvar i = 0; i <= SUPERSCALAR; i++) begin + assign raddr_pack[i*OPERANDS_PER_INSTR+0] = issue_instr_i[i].rs1[4:0]; + assign raddr_pack[i*OPERANDS_PER_INSTR+1] = issue_instr_i[i].rs2[4:0]; + if (OPERANDS_PER_INSTR == 3) begin + assign raddr_pack[i*OPERANDS_PER_INSTR+2] = issue_instr_i[i].result[4:0]; + end end for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_write_back_port @@ -531,11 +650,22 @@ module issue_read_operands logic [2:0][4:0] fp_raddr_pack; logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] fp_wdata_pack; + always_comb begin : assign_fp_raddr_pack + fp_raddr_pack = { + issue_instr_i[0].result[4:0], issue_instr_i[0].rs2[4:0], issue_instr_i[0].rs1[4:0] + }; + + if (SUPERSCALAR) begin + if (!(issue_instr_i[0].fu inside {FPU, FPU_VEC})) begin + fp_raddr_pack = { + issue_instr_i[1].result[4:0], issue_instr_i[1].rs2[4:0], issue_instr_i[1].rs1[4:0] + }; + end + end + end + generate if (CVA6Cfg.FpPresent) begin : float_regfile_gen - assign fp_raddr_pack = { - issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0] - }; for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_fp_wdata_pack assign fp_wdata_pack[i] = {wdata_i[i][CVA6Cfg.FLen-1:0]}; end @@ -577,33 +707,32 @@ module issue_read_operands if (CVA6Cfg.NrRgprPorts == 3) begin : gen_operand_c assign operand_c_fpr = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[2]}; - assign operand_c_gpr = rdata[2]; end else begin assign operand_c_fpr = fprdata[2]; end - assign operand_a_regfile = (CVA6Cfg.FpPresent && is_rs1_fpr( - issue_instr_i.op - )) ? {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[0]} : rdata[0]; - assign operand_b_regfile = (CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i.op - )) ? {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[1]} : rdata[1]; - assign operand_c_regfile = (CVA6Cfg.NrRgprPorts == 3) ? ((CVA6Cfg.FpPresent && is_imm_fpr( - issue_instr_i.op - )) ? operand_c_fpr : operand_c_gpr) : operand_c_fpr; + for (genvar i = 0; i <= SUPERSCALAR; i++) begin + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_operand_c + assign operand_c_gpr[i] = rdata[i*OPERANDS_PER_INSTR+2]; + end + assign operand_a_regfile[i] = (CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i[i].op + )) ? {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[0]} : rdata[i*OPERANDS_PER_INSTR+0]; + assign operand_b_regfile[i] = (CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i[i].op + )) ? {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[1]} : rdata[i*OPERANDS_PER_INSTR+1]; + assign operand_c_regfile[i] = (CVA6Cfg.NrRgprPorts == 3) ? ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i[i].op + )) ? operand_c_fpr : operand_c_gpr[i]) : operand_c_fpr; + end // ---------------------- // Registers (ID <-> EX) // ---------------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin - operand_a_q <= '{default: 0}; - operand_b_q <= '{default: 0}; - imm_q <= '0; - fu_q <= NONE; - operator_q <= ADD; - trans_id_q <= '0; + fu_data_q <= '0; if (CVA6Cfg.RVH) begin tinst_q <= '0; end @@ -611,24 +740,28 @@ module issue_read_operands is_compressed_instr_o <= 1'b0; branch_predict_o <= {cf_t'(0), {CVA6Cfg.VLEN{1'b0}}}; end else begin - operand_a_q <= operand_a_n; - operand_b_q <= operand_b_n; - imm_q <= imm_n; - fu_q <= fu_n; - operator_q <= operator_n; - trans_id_q <= trans_id_n; + fu_data_q <= fu_data_n; if (CVA6Cfg.RVH) begin tinst_q <= tinst_n; end - pc_o <= issue_instr_i.pc; - is_compressed_instr_o <= issue_instr_i.is_compressed; - branch_predict_o <= issue_instr_i.bp; + if (SUPERSCALAR) begin + if (issue_instr_i[1].fu == CTRL_FLOW) begin + pc_o <= issue_instr_i[1].pc; + is_compressed_instr_o <= issue_instr_i[1].is_compressed; + branch_predict_o <= issue_instr_i[1].bp; + end + end + if (issue_instr_i[0].fu == CTRL_FLOW) begin + pc_o <= issue_instr_i[0].pc; + is_compressed_instr_o <= issue_instr_i[0].is_compressed; + branch_predict_o <= issue_instr_i[0].bp; + end end end //pragma translate_off initial begin - assert (CVA6Cfg.NrRgprPorts == 2 || (CVA6Cfg.NrRgprPorts == 3 && CVA6Cfg.CvxifEn)) + assert (CVA6Cfg.NrRgprPorts == 2 || (CVA6Cfg.NrRgprPorts == 3 && CVA6Cfg.CvxifEn) || SUPERSCALAR) else $fatal( 1, @@ -636,14 +769,14 @@ module issue_read_operands ); end - assert property (@(posedge clk_i) (branch_valid_q) |-> (!$isunknown( - operand_a_q - ) && !$isunknown( - operand_b_q - ))) - else $warning("Got unknown value in one of the operands"); - + for (genvar i = 0; i <= SUPERSCALAR; i++) begin + assert property (@(posedge clk_i) (branch_valid_q) |-> (!$isunknown( + fu_data_q[i].operand_a + ) && !$isunknown( + fu_data_q[i].operand_b + ))) + else $warning("Got unknown value in one of the operands"); + end //pragma translate_on -endmodule - +endmodule diff --git a/core/issue_stage.sv b/core/issue_stage.sv index 1e3aaf8e41..6d8da085cd 100644 --- a/core/issue_stage.sv +++ b/core/issue_stage.sv @@ -47,45 +47,45 @@ module issue_stage // Handshake's acknowlege with decode stage - ID_STAGE output logic [SUPERSCALAR:0] decoded_instr_ack_o, // rs1 forwarding - EX_STAGE - output [CVA6Cfg.VLEN-1:0] rs1_forwarding_o, + output [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs1_forwarding_o, // rs2 forwarding - EX_STAGE - output [CVA6Cfg.VLEN-1:0] rs2_forwarding_o, + output [SUPERSCALAR:0][CVA6Cfg.VLEN-1:0] rs2_forwarding_o, // FU data useful to execute instruction - EX_STAGE - output fu_data_t fu_data_o, + output fu_data_t [SUPERSCALAR:0] fu_data_o, // Program Counter - EX_STAGE output logic [CVA6Cfg.VLEN-1:0] pc_o, // Is compressed instruction - EX_STAGE output logic is_compressed_instr_o, // Transformed trap instruction - EX_STAGE - output logic [31:0] tinst_o, + output logic [SUPERSCALAR:0][31:0] tinst_o, // Fixed Latency Unit is ready - EX_STAGE input logic flu_ready_i, // ALU FU is valid - EX_STAGE - output logic alu_valid_o, + output logic [SUPERSCALAR:0] alu_valid_o, // Signaling that we resolved the branch - EX_STAGE input logic resolve_branch_i, // Load store unit FU is ready - EX_STAGE input logic lsu_ready_i, // Load store unit FU is valid - EX_STAGE - output logic lsu_valid_o, + output logic [SUPERSCALAR:0] lsu_valid_o, // Branch unit is valid - EX_STAGE - output logic branch_valid_o, + output logic [SUPERSCALAR:0] branch_valid_o, // Information of branch prediction - EX_STAGE output branchpredict_sbe_t branch_predict_o, // Mult FU is valid - EX_STAGE - output logic mult_valid_o, + output logic [SUPERSCALAR:0] mult_valid_o, // FPU FU is ready - EX_STAGE input logic fpu_ready_i, // FPU FU is valid - EX_STAGE - output logic fpu_valid_o, + output logic [SUPERSCALAR:0] fpu_valid_o, // FPU fmt field - EX_STAGE output logic [1:0] fpu_fmt_o, // FPU rm field - EX_STAGE output logic [2:0] fpu_rm_o, // CSR is valid - EX_STAGE - output logic csr_valid_o, + output logic [SUPERSCALAR:0] csr_valid_o, // CVXIF FU is valid - EX_STAGE - output logic x_issue_valid_o, + output logic [SUPERSCALAR:0] x_issue_valid_o, // CVXIF is FU ready - EX_STAGE input logic x_issue_ready_i, // CVXIF offloader instruction value - EX_STAGE @@ -121,7 +121,7 @@ module issue_stage // Issue stall - PERF_COUNTERS output logic stall_issue_o, // Information dedicated to RVFI - RVFI - output logic [CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, + output logic [SUPERSCALAR:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, // Information dedicated to RVFI - RVFI output logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_commit_pointer_o ); @@ -130,31 +130,33 @@ module issue_stage // --------------------------------------------------- typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? CVA6Cfg.XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t; - fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; - fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; + fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; + fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; - logic [ REG_ADDR_SIZE-1:0] rs1_iro_sb; - logic [ CVA6Cfg.XLEN-1:0] rs1_sb_iro; - logic rs1_valid_sb_iro; + logic [ SUPERSCALAR:0][REG_ADDR_SIZE-1:0] rs1_iro_sb; + logic [ SUPERSCALAR:0][ CVA6Cfg.XLEN-1:0] rs1_sb_iro; + logic [ SUPERSCALAR:0] rs1_valid_sb_iro; - logic [ REG_ADDR_SIZE-1:0] rs2_iro_sb; - logic [ CVA6Cfg.XLEN-1:0] rs2_sb_iro; - logic rs2_valid_iro_sb; + logic [ SUPERSCALAR:0][REG_ADDR_SIZE-1:0] rs2_iro_sb; + logic [ SUPERSCALAR:0][ CVA6Cfg.XLEN-1:0] rs2_sb_iro; + logic [ SUPERSCALAR:0] rs2_valid_iro_sb; - logic [ REG_ADDR_SIZE-1:0] rs3_iro_sb; - rs3_len_t rs3_sb_iro; - logic rs3_valid_iro_sb; + logic [ SUPERSCALAR:0][REG_ADDR_SIZE-1:0] rs3_iro_sb; + rs3_len_t [ SUPERSCALAR:0] rs3_sb_iro; + logic [ SUPERSCALAR:0] rs3_valid_iro_sb; - scoreboard_entry_t [ SUPERSCALAR:0] issue_instr_sb_iro; - logic [ SUPERSCALAR:0][31:0] orig_instr_sb_iro; - logic [ SUPERSCALAR:0] issue_instr_valid_sb_iro; - logic [ SUPERSCALAR:0] issue_ack_iro_sb; + scoreboard_entry_t [ SUPERSCALAR:0] issue_instr_sb_iro; + logic [ SUPERSCALAR:0][ 31:0] orig_instr_sb_iro; + logic [ SUPERSCALAR:0] issue_instr_valid_sb_iro; + logic [ SUPERSCALAR:0] issue_ack_iro_sb; - logic [ CVA6Cfg.XLEN-1:0] rs1_forwarding_xlen; - logic [ CVA6Cfg.XLEN-1:0] rs2_forwarding_xlen; + logic [ SUPERSCALAR:0][ CVA6Cfg.XLEN-1:0] rs1_forwarding_xlen; + logic [ SUPERSCALAR:0][ CVA6Cfg.XLEN-1:0] rs2_forwarding_xlen; - assign rs1_forwarding_o = rs1_forwarding_xlen[CVA6Cfg.VLEN-1:0]; - assign rs2_forwarding_o = rs2_forwarding_xlen[CVA6Cfg.VLEN-1:0]; + for (genvar i = 0; i <= SUPERSCALAR; i++) begin + assign rs1_forwarding_o[i] = rs1_forwarding_xlen[i][CVA6Cfg.VLEN-1:0]; + assign rs2_forwarding_o[i] = rs2_forwarding_xlen[i][CVA6Cfg.VLEN-1:0]; + end assign issue_instr_o = issue_instr_sb_iro[0]; assign issue_instr_hs_o = issue_instr_valid_sb_iro[0] & issue_ack_iro_sb[0]; @@ -209,10 +211,10 @@ module issue_stage .rs3_len_t(rs3_len_t) ) i_issue_read_operands ( .flush_i (flush_unissued_instr_i), - .issue_instr_i (issue_instr_sb_iro[0]), - .orig_instr_i (orig_instr_sb_iro[0]), - .issue_instr_valid_i(issue_instr_valid_sb_iro[0]), - .issue_ack_o (issue_ack_iro_sb[0]), + .issue_instr_i (issue_instr_sb_iro), + .orig_instr_i (orig_instr_sb_iro), + .issue_instr_valid_i(issue_instr_valid_sb_iro), + .issue_ack_o (issue_ack_iro_sb), .fu_data_o (fu_data_o), .flu_ready_i (flu_ready_i), .rs1_o (rs1_iro_sb), @@ -239,8 +241,5 @@ module issue_stage .tinst_o (tinst_o), .* ); - if (SUPERSCALAR) begin - assign issue_ack_iro_sb[1] = 1'b0; - end endmodule diff --git a/core/scoreboard.sv b/core/scoreboard.sv index 47f0f67519..8ad3b0a392 100644 --- a/core/scoreboard.sv +++ b/core/scoreboard.sv @@ -35,25 +35,25 @@ module scoreboard #( output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o, // rs1 operand address - issue_read_operands - input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, + input logic [ariane_pkg::SUPERSCALAR:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, // rs1 operand - issue_read_operands - output logic [ CVA6Cfg.XLEN-1:0] rs1_o, + output logic [ariane_pkg::SUPERSCALAR:0][ CVA6Cfg.XLEN-1:0] rs1_o, // rs1 operand is valid - issue_read_operands - output logic rs1_valid_o, + output logic [ariane_pkg::SUPERSCALAR:0] rs1_valid_o, // rs2 operand address - issue_read_operands - input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, + input logic [ariane_pkg::SUPERSCALAR:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, // rs2 operand - issue_read_operands - output logic [ CVA6Cfg.XLEN-1:0] rs2_o, + output logic [ariane_pkg::SUPERSCALAR:0][ CVA6Cfg.XLEN-1:0] rs2_o, // rs2 operand is valid - issue_read_operands - output logic rs2_valid_o, + output logic [ariane_pkg::SUPERSCALAR:0] rs2_valid_o, // rs3 operand address - issue_read_operands - input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, + input logic [ariane_pkg::SUPERSCALAR:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, // rs3 operand - issue_read_operands - output rs3_len_t rs3_o, + output rs3_len_t [ariane_pkg::SUPERSCALAR:0] rs3_o, // rs3 operand is valid - issue_read_operands - output logic rs3_valid_o, + output logic [ariane_pkg::SUPERSCALAR:0] rs3_valid_o, // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer // TO_BE_COMPLETED - TO_BE_COMPLETED @@ -96,7 +96,7 @@ module scoreboard #( input logic x_we_i, // TO_BE_COMPLETED - RVFI - output logic [CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, + output logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, // TO_BE_COMPLETED - RVFI output logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] rvfi_commit_pointer_o ); @@ -120,9 +120,13 @@ module scoreboard #( // the issue queue is full don't issue any new instructions // works since aligned to power of 2 - assign issue_full = (issue_cnt_q[CVA6Cfg.TRANS_ID_BITS] == 1'b1); + if (ariane_pkg::SUPERSCALAR) begin + assign issue_full = (issue_cnt_q[CVA6Cfg.TRANS_ID_BITS] == 1'b1) || &issue_cnt_q[CVA6Cfg.TRANS_ID_BITS-1:0]; + end else begin + assign issue_full = (issue_cnt_q[CVA6Cfg.TRANS_ID_BITS] == 1'b1); + end - assign sb_full_o = issue_full; + assign sb_full_o = issue_full; // output commit instruction directly always_comb begin : commit_ports @@ -146,8 +150,6 @@ module scoreboard #( // make sure we assign the correct trans ID issue_instr_o[i].trans_id = issue_pointer[i]; - // we are ready if we are not full and don't have any unresolved branches, but it can be - // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i == 1) issue_instr_valid_o[i] = decoded_instr_valid_i[i] & ~issue_full; decoded_instr_ack_o[i] = issue_ack_i[i] & ~issue_full; end @@ -337,112 +339,115 @@ module scoreboard #( // Read Operands (a.k.a forwarding) // ---------------------------------- // read operand interface: same logic as register file - logic [CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; - logic [CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data; - logic rs1_valid, rs2_valid, rs3_valid; + logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] + rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; + logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data; + logic [ariane_pkg::SUPERSCALAR:0] rs1_valid, rs2_valid, rs3_valid; // WB ports have higher prio than entries - for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb - assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_o[0].op - ))); - assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_o[0].op - ))); - assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_o[0].op - ))); - assign rs_data[k] = wbdata_i[k]; - end - for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries - assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_o[0].op - ))); - assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_o[0].op + for (genvar i = 0; i <= ariane_pkg::SUPERSCALAR; i++) begin + for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb + assign rs1_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i[i]) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o[i].op + ))); + assign rs2_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i[i]) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o[i].op + ))); + assign rs3_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i[i]) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o[i].op + ))); + assign rs_data[i][k] = wbdata_i[k]; + end + for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries + assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i[i]) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o[i].op + ))); + assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i[i]) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o[i].op + ))); + assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i[i]) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o[i].op + ))); + assign rs_data[i][k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result; + end + + // check whether we are accessing GPR[0] + assign rs1_valid_o[i] = rs1_valid[i] & ((|rs1_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o[i].op ))); - assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_o[0].op + assign rs2_valid_o[i] = rs2_valid[i] & ((|rs2_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o[i].op ))); - assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result; - end + assign rs3_valid_o[i] = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid[i] & ((|rs3_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o[i].op + ))) : rs3_valid[i]; - // check whether we are accessing GPR[0] - assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_o[0].op - ))); - assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_o[0].op - ))); - assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_o[0].op - ))) : rs3_valid; - - // use fixed prio here - // this implicitly gives higher prio to WB ports - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs1 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs1_fwd_req), - .gnt_o (), - .data_i (rs_data), - .gnt_i (1'b1), - .req_o (rs1_valid), - .data_o (rs1_o), - .idx_o () - ); - - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs2 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs2_fwd_req), - .gnt_o (), - .data_i (rs_data), - .gnt_i (1'b1), - .req_o (rs2_valid), - .data_o (rs2_o), - .idx_o () - ); - - logic [CVA6Cfg.XLEN-1:0] rs3; - - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs3 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs3_fwd_req), - .gnt_o (), - .data_i (rs_data), - .gnt_i (1'b1), - .req_o (rs3_valid), - .data_o (rs3), - .idx_o () - ); - - if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port - assign rs3_o = rs3[CVA6Cfg.XLEN-1:0]; - end else begin : gen_fp_three_port - assign rs3_o = rs3[CVA6Cfg.FLen-1:0]; + // use fixed prio here + // this implicitly gives higher prio to WB ports + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(CVA6Cfg.XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs1 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs1_fwd_req[i]), + .gnt_o (), + .data_i (rs_data[i]), + .gnt_i (1'b1), + .req_o (rs1_valid[i]), + .data_o (rs1_o[i]), + .idx_o () + ); + + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(CVA6Cfg.XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs2 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs2_fwd_req[i]), + .gnt_o (), + .data_i (rs_data[i]), + .gnt_i (1'b1), + .req_o (rs2_valid[i]), + .data_o (rs2_o[i]), + .idx_o () + ); + + logic [ariane_pkg::SUPERSCALAR:0][CVA6Cfg.XLEN-1:0] rs3; + + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(CVA6Cfg.XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs3 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs3_fwd_req[i]), + .gnt_o (), + .data_i (rs_data[i]), + .gnt_i (1'b1), + .req_o (rs3_valid[i]), + .data_o (rs3[i]), + .idx_o () + ); + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port + assign rs3_o[i] = rs3[i][riscv::XLEN-1:0]; + end else begin : gen_fp_three_port + assign rs3_o[i] = rs3[i][CVA6Cfg.FLen-1:0]; + end end @@ -462,7 +467,7 @@ module scoreboard #( end //RVFI - assign rvfi_issue_pointer_o = issue_pointer_q; + assign rvfi_issue_pointer_o = issue_pointer[ariane_pkg::SUPERSCALAR:0]; assign rvfi_commit_pointer_o = commit_pointer_q; //pragma translate_off