From e8f721b7a0d15885f119f3d06a5a973524535b65 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Tue, 5 Nov 2024 17:56:42 +0100 Subject: [PATCH 01/13] acc_dispatcher, load_store_unit: add mmu interface for accelerator --- core/acc_dispatcher.sv | 104 +++++++++++++++++----------------------- core/cva6.sv | 14 ++++++ core/ex_stage.sv | 5 ++ core/load_store_unit.sv | 14 +++++- 4 files changed, 76 insertions(+), 61 deletions(-) diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv index d00e7eb5f0..481db6025a 100644 --- a/core/acc_dispatcher.sv +++ b/core/acc_dispatcher.sv @@ -23,36 +23,10 @@ module acc_dispatcher parameter type exception_t = logic, parameter type fu_data_t = logic, parameter type scoreboard_entry_t = logic, - localparam type accelerator_req_t = struct packed { - logic req_valid; - logic resp_ready; - riscv::instruction_t insn; - logic [CVA6Cfg.XLEN-1:0] rs1; - logic [CVA6Cfg.XLEN-1:0] rs2; - fpnew_pkg::roundmode_e frm; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; - logic store_pending; - // Invalidation interface - logic acc_cons_en; - logic inval_ready; - }, + localparam type accelerator_req_t = acc_pkg::cva6_to_acc_t, parameter type acc_req_t = accelerator_req_t, - parameter type acc_resp_t = struct packed { - logic req_ready; - logic resp_valid; - logic [CVA6Cfg.XLEN-1:0] result; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; - exception_t exception; - // Metadata - logic store_pending; - logic store_complete; - logic load_complete; - logic [4:0] fflags; - logic fflags_valid; - // Invalidation interface - logic inval_valid; - logic [63:0] inval_addr; - }, + localparam type accelerator_resp_t = acc_pkg::acc_to_cva6_t, + parameter type acc_resp_t = accelerator_resp_t, parameter type acc_cfg_t = logic, parameter acc_cfg_t AccCfg = '0 ) ( @@ -69,6 +43,7 @@ module acc_dispatcher input logic [CVA6Cfg.NrPMPEntries-1:0][CVA6Cfg.PLEN-3:0] pmpaddr_i, input logic [2:0] fcsr_frm_i, output logic dirty_v_state_o, + input logic acc_mmu_en_i, // Interface with the issue stage input scoreboard_entry_t issue_instr_i, input logic issue_instr_hs_i, @@ -88,6 +63,9 @@ module acc_dispatcher output logic acc_stall_st_pending_o, input logic acc_no_st_pending_i, input dcache_req_i_t [2:0] dcache_req_ports_i, + // Interface with the MMU + output acc_pkg::acc_mmu_req_t acc_mmu_req_o, + input acc_pkg::acc_mmu_resp_t acc_mmu_resp_i, // Interface with the controller output logic ctrl_halt_o, input logic [11:0] csr_addr_i, @@ -219,20 +197,20 @@ module acc_dispatcher end // An accelerator instruction was issued. - if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0; + if (acc_req_o.acc_req.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0; end : p_non_speculative_ff /************************* * Accelerator request * *************************/ - accelerator_req_t acc_req; - logic acc_req_valid; - logic acc_req_ready; + acc_req_t acc_req; + logic acc_req_valid; + logic acc_req_ready; - accelerator_req_t acc_req_int; + acc_req_t acc_req_int; fall_through_register #( - .T(accelerator_req_t) + .T(acc_req_t) ) i_accelerator_req_register ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -242,18 +220,22 @@ module acc_dispatcher .valid_i (acc_req_valid), .ready_o (acc_req_ready), .data_o (acc_req_int), - .valid_o (acc_req_o.req_valid), + .valid_o (acc_req_o.acc_req.req_valid), .ready_i (acc_resp_i.req_ready) ); - assign acc_req_o.insn = acc_req_int.insn; - assign acc_req_o.rs1 = acc_req_int.rs1; - assign acc_req_o.rs2 = acc_req_int.rs2; - assign acc_req_o.frm = acc_req_int.frm; - assign acc_req_o.trans_id = acc_req_int.trans_id; - assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i; - assign acc_req_o.acc_cons_en = acc_cons_en_i; - assign acc_req_o.inval_ready = inval_ready_i; + assign acc_req_o.acc_req.insn = acc_req_int.insn; + assign acc_req_o.acc_req.rs1 = acc_req_int.rs1; + assign acc_req_o.acc_req.rs2 = acc_req_int.rs2; + assign acc_req_o.acc_req.frm = acc_req_int.frm; + assign acc_req_o.acc_req.trans_id = acc_req_int.trans_id; + assign acc_req_o.acc_req.store_pending = !acc_no_st_pending_i && acc_cons_en_i; + assign acc_req_o.acc_req.acc_cons_en = acc_cons_en_i; + assign acc_req_o.acc_req.inval_ready = inval_ready_i; + + // MMU interface + assign acc_req_o.acc_mmu_resp = acc_mmu_resp_i; + assign acc_req_o.acc_mmu_en = acc_mmu_en_i; always_comb begin : accelerator_req_dispatcher // Do not fetch from the instruction queue @@ -263,7 +245,7 @@ module acc_dispatcher acc_req = '0; acc_req_valid = 1'b0; - // Unpack fu_data_t into accelerator_req_t + // Unpack fu_data_t into acc_req_t if (!acc_insn_queue_empty) begin acc_req = '{ // Instruction is forwarded from the decoder as an immediate @@ -297,23 +279,27 @@ module acc_dispatcher logic acc_ld_disp; logic acc_st_disp; - assign acc_trans_id_o = acc_resp_i.trans_id; - assign acc_result_o = acc_resp_i.result; - assign acc_valid_o = acc_resp_i.resp_valid; - assign acc_exception_o = acc_resp_i.exception; + assign acc_trans_id_o = acc_resp_i.acc_resp.trans_id; + assign acc_result_o = acc_resp_i.acc_resp.result; + assign acc_valid_o = acc_resp_i.acc_resp.resp_valid; + assign acc_exception_o = acc_resp_i.acc_resp.exception; // Unpack the accelerator response - assign acc_fflags_valid_o = acc_resp_i.fflags_valid; - assign acc_fflags_o = acc_resp_i.fflags; + assign acc_fflags_valid_o = acc_resp_i.acc_resp.fflags_valid; + assign acc_fflags_o = acc_resp_i.acc_resp.fflags; + + // MMU interface + assign acc_mmu_req_o = acc_resp_i.acc_mmu_req; + // Always ready to receive responses - assign acc_req_o.resp_ready = 1'b1; + assign acc_req_o.acc_req.resp_ready = 1'b1; // Signal dispatched load/store to issue stage assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); // Cache invalidation - assign inval_valid_o = acc_resp_i.inval_valid; - assign inval_addr_o = acc_resp_i.inval_addr; + assign inval_valid_o = acc_resp_i.acc_resp.inval_valid; + assign inval_addr_o = acc_resp_i.acc_resp.inval_addr; /************************** * Accelerator commit * @@ -351,7 +337,7 @@ module acc_dispatcher `FF(wait_acc_store_q, wait_acc_store_d, '0) // Set on store barrier. Clear when no store is pending. - assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending; + assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.acc_resp.store_pending; assign ctrl_halt_o = wait_acc_store_q; /************************** @@ -390,9 +376,9 @@ module acc_dispatcher .clk_i (clk_i), .rst_ni (rst_ni), .clear_i (1'b0), - .en_i (acc_ld_disp ^ acc_resp_i.load_complete), + .en_i (acc_ld_disp ^ acc_resp_i.acc_resp.load_complete), .load_i (1'b0), - .down_i (acc_resp_i.load_complete), + .down_i (acc_resp_i.acc_resp.load_complete), .d_i ('0), .q_o (acc_disp_loads_pending), .overflow_o(acc_disp_loads_overflow) @@ -435,9 +421,9 @@ module acc_dispatcher .clk_i (clk_i), .rst_ni (rst_ni), .clear_i (1'b0), - .en_i (acc_st_disp ^ acc_resp_i.store_complete), + .en_i (acc_st_disp ^ acc_resp_i.acc_resp.store_complete), .load_i (1'b0), - .down_i (acc_resp_i.store_complete), + .down_i (acc_resp_i.acc_resp.store_complete), .d_i ('0), .q_o (acc_disp_stores_pending), .overflow_o(acc_disp_stores_overflow) diff --git a/core/cva6.sv b/core/cva6.sv index ffb6d28460..13903f86b0 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -500,6 +500,11 @@ module cva6 // ACCEL Commit logic acc_valid_acc_ex; // -------------- + // EX <-> ACC_DISP + // -------------- + acc_pkg::acc_mmu_req_t acc_mmu_req; + acc_pkg::acc_mmu_resp_t acc_mmu_resp; + // -------------- // ID <-> COMMIT // -------------- scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_id_commit; @@ -981,6 +986,9 @@ module cva6 .x_result_ready_o (x_result_ready), // Accelerator .acc_valid_i (acc_valid_acc_ex), + // Accelerator MMU access + .acc_mmu_req_i (acc_mmu_req), + .acc_mmu_resp_o (acc_mmu_resp), // Performance counters .itlb_miss_o (itlb_miss_ex_perf), .dtlb_miss_o (dtlb_miss_ex_perf), @@ -1476,6 +1484,7 @@ module cva6 .pmpcfg_i (pmpcfg), .pmpaddr_i (pmpaddr), .fcsr_frm_i (frm_csr_id_issue_ex), + .acc_mmu_en_i (enable_translation_csr_ex), .dirty_v_state_o (dirty_v_state), .issue_instr_i (issue_instr_id_acc), .issue_instr_hs_i (issue_instr_hs_id_acc), @@ -1492,6 +1501,8 @@ module cva6 .acc_stall_st_pending_o(stall_st_pending_ex), .acc_no_st_pending_i (no_st_pending_commit), .dcache_req_ports_i (dcache_req_ports_ex_cache), + .acc_mmu_req_o (acc_mmu_req), + .acc_mmu_resp_i (acc_mmu_resp), .ctrl_halt_o (halt_acc_ctrl), .csr_addr_i (csr_addr_ex_csr), .acc_dcache_req_ports_o(dcache_req_ports_acc_cache), @@ -1521,6 +1532,9 @@ module cva6 // D$ connection is unused assign dcache_req_ports_acc_cache = '0; + // MMU access is unused + assign acc_mmu_req = '0; + // No invalidation interface assign inval_valid = '0; assign inval_addr = '0; diff --git a/core/ex_stage.sv b/core/ex_stage.sv index 115c17f1c5..b488633018 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -159,6 +159,9 @@ module ex_stage input logic x_transaction_rejected_i, // accelerate port result is valid - ACC_DISPATCHER input logic acc_valid_i, + // Accelerator MMU access + input acc_pkg::acc_mmu_req_t acc_mmu_req_i, + output acc_pkg::acc_mmu_resp_t acc_mmu_resp_o, // Enable virtual memory translation - CSR_REGFILE input logic enable_translation_i, // Enable G-Stage memory translation - CSR_REGFILE @@ -551,6 +554,8 @@ module ex_stage .enable_g_translation_i, .en_ld_st_translation_i, .en_ld_st_g_translation_i, + .acc_mmu_req_i, + .acc_mmu_resp_o, .icache_areq_i, .icache_areq_o, .priv_lvl_i, diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index 134b6a3dc1..6ecc043742 100644 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -82,6 +82,10 @@ module load_store_unit // Enable G-Stage memory translation for load/stores - TO_BE_COMPLETED input logic en_ld_st_g_translation_i, + // Accelerator request for CVA6's MMU + input acc_pkg::acc_mmu_req_t acc_mmu_req_i, + output acc_pkg::acc_mmu_resp_t acc_mmu_resp_o, + // Instruction cache input request - CACHES input icache_arsp_t icache_areq_i, // Instruction cache output request - CACHES @@ -237,6 +241,14 @@ module load_store_unit logic [CVA6Cfg.PPNW-1:0] satp_ppn[2:0]; logic [CVA6Cfg.ASID_WIDTH-1:0] asid[2:0], asid_to_be_flushed[1:0]; logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed[1:0]; + + // Accelerator's request for the MMU + assign acc_mmu_resp_o.acc_mmu_dtlb_hit_o = '0; + assign acc_mmu_resp_o.acc_mmu_dtlb_ppn_o = '0; + assign acc_mmu_resp_o.acc_mmu_valid_o = '0; + assign acc_mmu_resp_o.acc_mmu_paddr_o = '0; + assign acc_mmu_resp_o.acc_mmu_exception_o = '0; + // ------------------- // MMU e.g.: TLBs/PTW // ------------------- @@ -766,5 +778,3 @@ module load_store_unit assign rvfi_lsu_ctrl_o = lsu_ctrl; endmodule - - From 0817830c0bab85ca259e11da91c15c10415899ac Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Tue, 5 Nov 2024 18:18:06 +0100 Subject: [PATCH 02/13] load_store_unit: add MMU accelerator port --- core/load_store_unit.sv | 284 ++++++++++++++++++++++++++-------------- 1 file changed, 187 insertions(+), 97 deletions(-) diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index 6ecc043742..9004c0bd46 100644 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -169,7 +169,7 @@ module load_store_unit // -------------------------------------- // those are the signals which are always correct // e.g.: they keep the value in the stall case - lsu_ctrl_t lsu_ctrl; + lsu_ctrl_t lsu_ctrl, lsu_ctrl_byp; logic pop_st; logic pop_ld; @@ -197,7 +197,7 @@ module load_store_unit logic st_valid_i; logic ld_valid_i; logic ld_translation_req; - logic st_translation_req; + logic st_translation_req, cva6_st_translation_req, acc_st_translation_req; logic [CVA6Cfg.VLEN-1:0] ld_vaddr; logic [ 31:0] ld_tinst; logic ld_hs_ld_st_inst; @@ -206,49 +206,41 @@ module load_store_unit logic [ 31:0] st_tinst; logic st_hs_ld_st_inst; logic st_hlvx_inst; - logic translation_req; - logic translation_valid; - logic [CVA6Cfg.VLEN-1:0] mmu_vaddr; - logic [CVA6Cfg.PLEN-1:0] mmu_paddr, lsu_paddr; - logic [ 31:0] mmu_tinst; - logic mmu_hs_ld_st_inst; - logic mmu_hlvx_inst; - exception_t mmu_exception; - exception_t pmp_exception; - icache_areq_t pmp_icache_areq_i; - logic pmp_translation_valid; - logic dtlb_hit; - logic [ CVA6Cfg.PPNW-1:0] dtlb_ppn; - - logic ld_valid; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] ld_trans_id; - logic [ CVA6Cfg.XLEN-1:0] ld_result; - logic st_valid; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] st_trans_id; - logic [ CVA6Cfg.XLEN-1:0] st_result; - - logic [ 11:0] page_offset; - logic page_offset_matches; - - exception_t misaligned_exception; - exception_t ld_ex; - exception_t st_ex; - - logic hs_ld_st_inst; - logic hlvx_inst; - + logic translation_req, cva6_translation_req, acc_translation_req; + logic translation_valid, cva6_translation_valid, acc_translataion_valid; + logic [CVA6Cfg.VLEN-1:0] mmu_vaddr, cva6_mmu_vaddr, acc_mmu_vaddr; + logic [CVA6Cfg.PLEN-1:0] mmu_paddr, cva6_mmu_paddr, acc_mmu_paddr, lsu_paddr; + logic [ 31:0] mmu_tinst; + logic mmu_hs_ld_st_inst; + logic mmu_hlvx_inst; + exception_t mmu_exception, cva6_mmu_exception, acc_mmu_exception; + exception_t pmp_exception; + icache_areq_t pmp_icache_areq_i; + logic pmp_translation_valid; + logic dtlb_hit, cva6_dtlb_hit, acc_dtlb_hit; + logic [ CVA6Cfg.PPNW-1:0] dtlb_ppn, cva6_dtlb_ppn, acc_dtlb_ppn; + + logic ld_valid; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] ld_trans_id; + logic [ CVA6Cfg.XLEN-1:0] ld_result; + logic st_valid; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] st_trans_id; + logic [ CVA6Cfg.XLEN-1:0] st_result; + + logic [ 11:0] page_offset; + logic page_offset_matches; + + exception_t misaligned_exception, cva6_misaligned_exception, acc_misaligned_exception; + exception_t ld_ex; + exception_t st_ex; + + logic hs_ld_st_inst; + logic hlvx_inst; logic [1:0] sum, mxr; logic [CVA6Cfg.PPNW-1:0] satp_ppn[2:0]; logic [CVA6Cfg.ASID_WIDTH-1:0] asid[2:0], asid_to_be_flushed[1:0]; logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed[1:0]; - // Accelerator's request for the MMU - assign acc_mmu_resp_o.acc_mmu_dtlb_hit_o = '0; - assign acc_mmu_resp_o.acc_mmu_dtlb_ppn_o = '0; - assign acc_mmu_resp_o.acc_mmu_valid_o = '0; - assign acc_mmu_resp_o.acc_mmu_paddr_o = '0; - assign acc_mmu_resp_o.acc_mmu_exception_o = '0; - // ------------------- // MMU e.g.: TLBs/PTW // ------------------- @@ -399,6 +391,104 @@ module load_store_unit .pmpaddr_i (pmpaddr_i) ); + // ------------------ + // External MMU port + // ------------------ + + if (CVA6Cfg.EnableAccelerator) begin + // The MMU can be connected to CVA6 or the ACCELERATOR + enum logic {CVA6, ACC} mmu_state_d, mmu_state_q; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mmu_state_q <= CVA6; + end else begin + mmu_state_q <= mmu_state_d; + end + end + // Straightforward and slow-reactive MMU arbitration logic + // This logic can be optimized to reduce answer latency and contention + always_comb begin + // Maintain state + mmu_state_d = mmu_state_q; + // Serve CVA6 and gate the accelerator by default + // MMU input + misaligned_exception = cva6_misaligned_exception; + st_translation_req = cva6_st_translation_req; + translation_req = cva6_translation_req; + mmu_vaddr = cva6_mmu_vaddr; + // MMU output + cva6_translation_valid = translation_valid; + cva6_mmu_paddr = mmu_paddr; + cva6_mmu_exception = mmu_exception; + cva6_dtlb_hit = dtlb_hit; + cva6_dtlb_ppn = dtlb_ppn; + acc_mmu_resp_o.acc_mmu_valid = '0; + acc_mmu_resp_o.acc_mmu_paddr = '0; + acc_mmu_resp_o.acc_mmu_exception = '0; + acc_mmu_resp_o.acc_mmu_dtlb_hit = '0; + acc_mmu_resp_o.acc_mmu_dtlb_ppn = '0; + unique case (mmu_state_q) + CVA6: begin + // Only the accelerator is requesting, and the lsu bypass queue is empty. + if (acc_mmu_req_i.acc_mmu_req && !lsu_valid_i && lsu_ready_o) begin + // Lock the MMU to the accelerator. + // If the issue stage is firing a mem op in this cycle, + // the bypass queue will buffer it. + mmu_state_d = ACC; + end + // Make this a mealy FSM to cut some latency. + // It should be okay timing-wise since cva6's requests already + // depend on lsu_valid_i. Moreover, lsu_ready_o is sequentially + // generated by the bypass and, in this first implementation, + // the acc request already depends combinatorially upon acc_mmu_req_i.acc_mmu_req. + end + ACC: begin + // MMU input + misaligned_exception = acc_mmu_req_i.acc_mmu_misaligned_ex; + st_translation_req = acc_mmu_req_i.acc_mmu_is_store; + translation_req = acc_mmu_req_i.acc_mmu_req; + mmu_vaddr = acc_mmu_req_i.acc_mmu_vaddr; + // MMU output + acc_mmu_resp_o.acc_mmu_valid = translation_valid; + acc_mmu_resp_o.acc_mmu_paddr = mmu_paddr; + acc_mmu_resp_o.acc_mmu_exception = mmu_exception; + acc_mmu_resp_o.acc_mmu_dtlb_hit = dtlb_hit; + acc_mmu_resp_o.acc_mmu_dtlb_ppn = dtlb_ppn; + cva6_translation_valid = '0; + cva6_mmu_paddr = '0; + cva6_mmu_exception = '0; + cva6_dtlb_hit = '0; + cva6_dtlb_ppn = '0; + // Get back to CVA6 after the translation + if (translation_valid) mmu_state_d = CVA6; + end + default: mmu_state_d = CVA6; + endcase + end + always_comb begin + // Feed forward + lsu_ctrl = lsu_ctrl_byp; + // Mask the lsu valid so that cva6's req gets buffered in the + // bypass queue when the MMU is being used by the accelerator. + lsu_ctrl.valid = (mmu_state_q == ACC) ? 1'b0 : lsu_ctrl_byp.valid; + end + end else begin + // MMU input + assign misaligned_exception = cva6_misaligned_exception; + assign st_translation_req = cva6_st_translation_req; + assign translation_req = cva6_translation_req; + assign mmu_vaddr = cva6_mmu_vaddr; + // MMU output + assign cva6_translation_valid = translation_valid; + assign cva6_mmu_paddr = mmu_paddr; + assign cva6_mmu_exception = mmu_exception; + assign cva6_dtlb_hit = dtlb_hit; + assign cva6_dtlb_ppn = dtlb_ppn; + // No accelerator + assign acc_mmu_resp_o = '0; + // Feed forward the lsu_ctrl bypass + assign lsu_ctrl = lsu_ctrl_byp; + end logic store_buffer_empty; // ------------------ @@ -430,15 +520,15 @@ module load_store_unit .result_o (st_result), .ex_o (st_ex), // MMU port - .translation_req_o (st_translation_req), + .translation_req_o (cva6_st_translation_req), .vaddr_o (st_vaddr), .rvfi_mem_paddr_o (rvfi_mem_paddr_o), .tinst_o (st_tinst), .hs_ld_st_inst_o (st_hs_ld_st_inst), .hlvx_inst_o (st_hlvx_inst), - .paddr_i (mmu_paddr), - .ex_i (mmu_exception), - .dtlb_hit_i (dtlb_hit), + .paddr_i (cva6_mmu_paddr), + .ex_i (cva6_mmu_exception), + .dtlb_hit_i (cva6_dtlb_hit), // Load Unit .page_offset_i (page_offset), .page_offset_matches_o(page_offset_matches), @@ -477,10 +567,10 @@ module load_store_unit .tinst_o (ld_tinst), .hs_ld_st_inst_o (ld_hs_ld_st_inst), .hlvx_inst_o (ld_hlvx_inst), - .paddr_i (mmu_paddr), - .ex_i (mmu_exception), - .dtlb_hit_i (dtlb_hit), - .dtlb_ppn_i (dtlb_ppn), + .paddr_i (cva6_mmu_paddr), + .ex_i (cva6_mmu_exception), + .dtlb_hit_i (cva6_dtlb_hit), + .dtlb_ppn_i (cva6_dtlb_ppn), // to store unit .page_offset_o (page_offset), .page_offset_matches_i(page_offset_matches), @@ -525,19 +615,19 @@ module load_store_unit ld_valid_i = 1'b0; st_valid_i = 1'b0; - translation_req = 1'b0; - mmu_vaddr = {CVA6Cfg.VLEN{1'b0}}; - mmu_tinst = {32{1'b0}}; - mmu_hs_ld_st_inst = 1'b0; - mmu_hlvx_inst = 1'b0; + cva6_translation_req = 1'b0; + cva6_mmu_vaddr = {CVA6Cfg.VLEN{1'b0}}; + mmu_tinst = {32{1'b0}}; + mmu_hs_ld_st_inst = 1'b0; + mmu_hlvx_inst = 1'b0; // check the operation to activate the right functional unit accordingly unique case (lsu_ctrl.fu) // all loads go here LOAD: begin - ld_valid_i = lsu_ctrl.valid; - translation_req = ld_translation_req; - mmu_vaddr = ld_vaddr; + ld_valid_i = lsu_ctrl.valid; + cva6_translation_req = ld_translation_req; + cva6_mmu_vaddr = ld_vaddr; if (CVA6Cfg.RVH) begin mmu_tinst = ld_tinst; mmu_hs_ld_st_inst = ld_hs_ld_st_inst; @@ -546,9 +636,9 @@ module load_store_unit end // all stores go here STORE: begin - st_valid_i = lsu_ctrl.valid; - translation_req = st_translation_req; - mmu_vaddr = st_vaddr; + st_valid_i = lsu_ctrl.valid; + cva6_translation_req = st_translation_req; + cva6_mmu_vaddr = st_vaddr; if (CVA6Cfg.RVH) begin mmu_tinst = st_tinst; mmu_hs_ld_st_inst = st_hs_ld_st_inst; @@ -606,7 +696,7 @@ module load_store_unit // the misaligned exception is passed to the functional unit via the MMU, which in case // can augment the exception if other memory related exceptions like a page fault or access errors always_comb begin : data_misaligned_detection - misaligned_exception = { + cva6_misaligned_exception = { {CVA6Cfg.XLEN{1'b0}}, {CVA6Cfg.XLEN{1'b0}}, {CVA6Cfg.GPLEN{1'b0}}, {32{1'b0}}, 1'b0, 1'b0 }; data_misaligned = 1'b0; @@ -652,26 +742,26 @@ module load_store_unit if (data_misaligned) begin case (lsu_ctrl.fu) LOAD: begin - misaligned_exception.cause = riscv::LD_ADDR_MISALIGNED; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::LD_ADDR_MISALIGNED; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; if (CVA6Cfg.RVH) begin - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end STORE: begin - misaligned_exception.cause = riscv::ST_ADDR_MISALIGNED; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::ST_ADDR_MISALIGNED; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; if (CVA6Cfg.RVH) begin - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end default: ; @@ -682,25 +772,25 @@ module load_store_unit case (lsu_ctrl.fu) LOAD: begin - misaligned_exception.cause = riscv::LOAD_PAGE_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::LOAD_PAGE_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; if (CVA6Cfg.RVH) begin - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end STORE: begin - misaligned_exception.cause = riscv::STORE_PAGE_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::STORE_PAGE_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; if (CVA6Cfg.RVH) begin - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end default: ; @@ -711,25 +801,25 @@ module load_store_unit case (lsu_ctrl.fu) LOAD: begin - misaligned_exception.cause = riscv::LOAD_GUEST_PAGE_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::LOAD_GUEST_PAGE_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; if (CVA6Cfg.RVH) begin - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end STORE: begin - misaligned_exception.cause = riscv::STORE_GUEST_PAGE_FAULT; - misaligned_exception.valid = 1'b1; + cva6_misaligned_exception.cause = riscv::STORE_GUEST_PAGE_FAULT; + cva6_misaligned_exception.valid = 1'b1; if (CVA6Cfg.TvalEn) - misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; + cva6_misaligned_exception.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}; if (CVA6Cfg.RVH) begin - misaligned_exception.tval2 = '0; - misaligned_exception.tinst = lsu_ctrl.tinst; - misaligned_exception.gva = ld_st_v_i; + cva6_misaligned_exception.tval2 = '0; + cva6_misaligned_exception.tinst = lsu_ctrl.tinst; + cva6_misaligned_exception.gva = ld_st_v_i; end end default: ; @@ -771,7 +861,7 @@ module load_store_unit .pop_ld_i (pop_ld), .pop_st_i (pop_st), - .lsu_ctrl_o(lsu_ctrl), + .lsu_ctrl_o(lsu_ctrl_byp), .ready_o (lsu_ready_o) ); From 9cfd27653cbde8c14e6b70b4b2ff95fa755845f3 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Tue, 5 Nov 2024 23:22:42 +0100 Subject: [PATCH 03/13] ara: fix parametrization --- core/acc_dispatcher.sv | 12 ++++++------ core/cva6.sv | 28 ++++++++++++++++++++++++---- core/ex_stage.sv | 12 ++++++++---- core/load_store_unit.sv | 8 +++++--- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv index 481db6025a..637d91d7ed 100644 --- a/core/acc_dispatcher.sv +++ b/core/acc_dispatcher.sv @@ -23,10 +23,10 @@ module acc_dispatcher parameter type exception_t = logic, parameter type fu_data_t = logic, parameter type scoreboard_entry_t = logic, - localparam type accelerator_req_t = acc_pkg::cva6_to_acc_t, - parameter type acc_req_t = accelerator_req_t, - localparam type accelerator_resp_t = acc_pkg::acc_to_cva6_t, - parameter type acc_resp_t = accelerator_resp_t, + parameter type acc_req_t = logic, + parameter type acc_resp_t = logic, + parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_resp_t = logic, parameter type acc_cfg_t = logic, parameter acc_cfg_t AccCfg = '0 ) ( @@ -64,8 +64,8 @@ module acc_dispatcher input logic acc_no_st_pending_i, input dcache_req_i_t [2:0] dcache_req_ports_i, // Interface with the MMU - output acc_pkg::acc_mmu_req_t acc_mmu_req_o, - input acc_pkg::acc_mmu_resp_t acc_mmu_resp_i, + output acc_mmu_req_t acc_mmu_req_o, + input acc_mmu_resp_t acc_mmu_resp_i, // Interface with the controller output logic ctrl_halt_o, input logic [11:0] csr_addr_i, diff --git a/core/cva6.sv b/core/cva6.sv index 13903f86b0..a4e666f516 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -211,6 +211,22 @@ module cva6 logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] data_ruser; }, + // Accelerator - CVA6's MMU + localparam type acc_mmu_req_t = struct packed { + logic acc_mmu_misaligned_ex; + logic acc_mmu_req; + logic [CVA6Cfg.VLEN-1:0] acc_mmu_vaddr; + logic acc_mmu_is_store; + }, + + localparam type acc_mmu_resp_t = struct packed { + logic acc_mmu_dtlb_hit; + logic [CVA6Cfg.PPNW-1:0] acc_mmu_dtlb_ppn; + logic acc_mmu_valid; + logic [CVA6Cfg.PLEN-1:0] acc_mmu_paddr; + exception_t acc_mmu_exception; + }, + // AXI types parameter type axi_ar_chan_t = struct packed { logic [CVA6Cfg.AxiIdWidth-1:0] id; @@ -502,8 +518,8 @@ module cva6 // -------------- // EX <-> ACC_DISP // -------------- - acc_pkg::acc_mmu_req_t acc_mmu_req; - acc_pkg::acc_mmu_resp_t acc_mmu_resp; + acc_mmu_req_t acc_mmu_req; + acc_mmu_resp_t acc_mmu_resp; // -------------- // ID <-> COMMIT // -------------- @@ -903,7 +919,9 @@ module cva6 .icache_dreq_t(icache_dreq_t), .icache_drsp_t(icache_drsp_t), .lsu_ctrl_t(lsu_ctrl_t), - .x_result_t(x_result_t) + .x_result_t(x_result_t), + .acc_mmu_req_t(acc_mmu_req_t), + .acc_mmu_resp_t(acc_mmu_resp_t) ) ex_stage_i ( .clk_i(clk_i), .rst_ni(rst_ni), @@ -1468,7 +1486,9 @@ module cva6 .acc_cfg_t (acc_cfg_t), .AccCfg (AccCfg), .acc_req_t (cvxif_req_t), - .acc_resp_t (cvxif_resp_t) + .acc_resp_t (cvxif_resp_t), + .acc_mmu_req_t (acc_mmu_req_t), + .acc_mmu_resp_t (acc_mmu_resp_t) ) i_acc_dispatcher ( .clk_i (clk_i), .rst_ni (rst_ni), diff --git a/core/ex_stage.sv b/core/ex_stage.sv index b488633018..b1a9984bc6 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -29,7 +29,9 @@ module ex_stage parameter type icache_dreq_t = logic, parameter type icache_drsp_t = logic, parameter type lsu_ctrl_t = logic, - parameter type x_result_t = logic + parameter type x_result_t = logic, + parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_resp_t = logic ) ( // Subsystem Clock - SUBSYSTEM input logic clk_i, @@ -160,8 +162,8 @@ module ex_stage // accelerate port result is valid - ACC_DISPATCHER input logic acc_valid_i, // Accelerator MMU access - input acc_pkg::acc_mmu_req_t acc_mmu_req_i, - output acc_pkg::acc_mmu_resp_t acc_mmu_resp_o, + input acc_mmu_req_t acc_mmu_req_i, + output acc_mmu_resp_t acc_mmu_resp_o, // Enable virtual memory translation - CSR_REGFILE input logic enable_translation_i, // Enable G-Stage memory translation - CSR_REGFILE @@ -529,7 +531,9 @@ module ex_stage .icache_arsp_t(icache_arsp_t), .icache_dreq_t(icache_dreq_t), .icache_drsp_t(icache_drsp_t), - .lsu_ctrl_t(lsu_ctrl_t) + .lsu_ctrl_t(lsu_ctrl_t), + .acc_mmu_req_t(acc_mmu_req_t), + .acc_mmu_resp_t(acc_mmu_resp_t) ) lsu_i ( .clk_i, .rst_ni, diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index 9004c0bd46..978a18d471 100644 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -25,7 +25,9 @@ module load_store_unit parameter type icache_arsp_t = logic, parameter type icache_dreq_t = logic, parameter type icache_drsp_t = logic, - parameter type lsu_ctrl_t = logic + parameter type lsu_ctrl_t = logic, + parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_resp_t = logic ) ( // Subsystem Clock - SUBSYSTEM input logic clk_i, @@ -83,8 +85,8 @@ module load_store_unit input logic en_ld_st_g_translation_i, // Accelerator request for CVA6's MMU - input acc_pkg::acc_mmu_req_t acc_mmu_req_i, - output acc_pkg::acc_mmu_resp_t acc_mmu_resp_o, + input acc_mmu_req_t acc_mmu_req_i, + output acc_mmu_resp_t acc_mmu_resp_o, // Instruction cache input request - CACHES input icache_arsp_t icache_areq_i, From 8cf71b19da84537d49b209bd8dc864219d8e1aba Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Wed, 6 Nov 2024 17:01:33 +0100 Subject: [PATCH 04/13] tracer: parametrize interface on NrCommitPorts --- common/local/util/instr_tracer.sv | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/common/local/util/instr_tracer.sv b/common/local/util/instr_tracer.sv index 9083664069..850465c690 100644 --- a/common/local/util/instr_tracer.sv +++ b/common/local/util/instr_tracer.sv @@ -25,32 +25,32 @@ module instr_tracer #( parameter type exception_t = logic, parameter interrupts_t INTERRUPTS = '0 )( - input logic pck, - input logic rstn, - input logic flush_unissued, - input logic flush_all, - input logic [31:0] instruction, - input logic fetch_valid, - input logic fetch_ack, - input logic issue_ack, // issue acknowledged - input scoreboard_entry_t issue_sbe, // issue scoreboard entry - input logic [1:0][4:0] waddr, // WB stage - input logic [1:0][63:0] wdata, - input logic [1:0] we_gpr, - input logic [1:0] we_fpr, - input scoreboard_entry_t [1:0] commit_instr, // commit instruction - input logic [1:0] commit_ack, - input logic st_valid, // stores - address translation - input logic [CVA6Cfg.PLEN-1:0] st_paddr, - input logic ld_valid, // loads - input logic ld_kill, - input logic [CVA6Cfg.PLEN-1:0] ld_paddr, - input bp_resolve_t resolve_branch, // misprediction - input exception_t commit_exception, - input riscv::priv_lvl_t priv_lvl, // current privilege level - input logic debug_mode, + input logic pck, + input logic rstn, + input logic flush_unissued, + input logic flush_all, + input logic [31:0] instruction, + input logic fetch_valid, + input logic fetch_ack, + input logic issue_ack, // issue acknowledged + input scoreboard_entry_t issue_sbe, // issue scoreboard entry + input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr, // WB stage + input logic [CVA6Cfg.NrCommitPorts-1:0][63:0] wdata, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr, + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr, // commit instruction + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack, + input logic st_valid, // stores - address translation + input logic [CVA6Cfg.PLEN-1:0] st_paddr, + input logic ld_valid, // loads + input logic ld_kill, + input logic [CVA6Cfg.PLEN-1:0] ld_paddr, + input bp_resolve_t resolve_branch, // misprediction + input exception_t commit_exception, + input riscv::priv_lvl_t priv_lvl, // current privilege level + input logic debug_mode, - input logic[CVA6Cfg.XLEN-1:0] hart_id_i + input logic[CVA6Cfg.XLEN-1:0] hart_id_i ); // keep the decoded instructions in a queue From be4f21f4cf2f100f59a48f44d857f09e7b921ca2 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Wed, 6 Nov 2024 17:02:33 +0100 Subject: [PATCH 05/13] cva6: :bug: guard xif signals within CvxifEn --- core/cva6.sv | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/core/cva6.sv b/core/cva6.sv index a4e666f516..da4ca032b0 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -765,16 +765,17 @@ module cva6 assign ex_ex_ex_id[FPU_WB] = fpu_exception_ex_id; assign wt_valid_ex_id[FPU_WB] = fpu_valid_ex_id; - always_comb begin : gen_cvxif_input_assignement - x_compressed_ready = cvxif_resp_i.compressed_ready; - x_compressed_resp = cvxif_resp_i.compressed_resp; - x_issue_ready = cvxif_resp_i.issue_ready; - x_issue_resp = cvxif_resp_i.issue_resp; - x_register_ready = cvxif_resp_i.register_ready; - x_result_valid = cvxif_resp_i.result_valid; - x_result = cvxif_resp_i.result; - end if (CVA6Cfg.CvxifEn) begin + always_comb begin : gen_cvxif_input_assignement + x_compressed_ready = cvxif_resp_i.compressed_ready; + x_compressed_resp = cvxif_resp_i.compressed_resp; + x_issue_ready = cvxif_resp_i.issue_ready; + x_issue_resp = cvxif_resp_i.issue_resp; + x_register_ready = cvxif_resp_i.register_ready; + x_result_valid = cvxif_resp_i.result_valid; + x_result = cvxif_resp_i.result; + end + always_comb begin : gen_cvxif_output_assignement cvxif_req.compressed_valid = x_compressed_valid; cvxif_req.compressed_req = x_compressed_req; From c992be3ebde423dec0f4fcea9b5670d8ab617a1b Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Wed, 6 Nov 2024 17:18:40 +0100 Subject: [PATCH 06/13] acc_dispatcher: add accelerator_req interface definitions --- core/acc_dispatcher.sv | 8 +++++--- core/cva6.sv | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv index 637d91d7ed..604322c86f 100644 --- a/core/acc_dispatcher.sv +++ b/core/acc_dispatcher.sv @@ -25,6 +25,8 @@ module acc_dispatcher parameter type scoreboard_entry_t = logic, parameter type acc_req_t = logic, parameter type acc_resp_t = logic, + parameter type accelerator_req_t = logic, + parameter type accelerator_resp_t = logic, parameter type acc_mmu_req_t = logic, parameter type acc_mmu_resp_t = logic, parameter type acc_cfg_t = logic, @@ -204,13 +206,13 @@ module acc_dispatcher * Accelerator request * *************************/ - acc_req_t acc_req; + accelerator_req_t acc_req; logic acc_req_valid; logic acc_req_ready; - acc_req_t acc_req_int; + accelerator_req_t acc_req_int; fall_through_register #( - .T(acc_req_t) + .T(accelerator_req_t) ) i_accelerator_req_register ( .clk_i (clk_i), .rst_ni (rst_ni), diff --git a/core/cva6.sv b/core/cva6.sv index da4ca032b0..593878009a 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -211,6 +211,35 @@ module cva6 logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] data_ruser; }, + // Accelerator - CVA6's + localparam type accelerator_req_t = struct packed { + logic req_valid; + logic resp_ready; + riscv::instruction_t insn; + logic [CVA6Cfg.XLEN-1:0] rs1; + logic [CVA6Cfg.XLEN-1:0] rs2; + fpnew_pkg::roundmode_e frm; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; + logic store_pending; + logic acc_cons_en; // Invalidation interface + logic inval_ready; // Invalidation interface + }, + + localparam type accelerator_resp_t = struct packed { + logic req_ready; + logic resp_valid; + logic [CVA6Cfg.XLEN-1:0] result; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; + exception_t exception; + logic store_pending; + logic store_complete; + logic load_complete; + logic [4:0] fflags; + logic fflags_valid; + logic inval_valid; // Invalidation interface + logic [63:0] inval_addr; // Invalidation interface + }, + // Accelerator - CVA6's MMU localparam type acc_mmu_req_t = struct packed { logic acc_mmu_misaligned_ex; @@ -1488,6 +1517,8 @@ module cva6 .AccCfg (AccCfg), .acc_req_t (cvxif_req_t), .acc_resp_t (cvxif_resp_t), + .accelerator_req_t (accelerator_req_t), + .accelerator_resp_t(accelerator_resp_t), .acc_mmu_req_t (acc_mmu_req_t), .acc_mmu_resp_t (acc_mmu_resp_t) ) i_acc_dispatcher ( From 3752dceb7c1949da086380b9e6440f8ac91fa058 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Wed, 6 Nov 2024 17:06:35 +0100 Subject: [PATCH 07/13] acc_dispatcher: fix signal names --- core/acc_dispatcher.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv index 604322c86f..09f57d8c41 100644 --- a/core/acc_dispatcher.sv +++ b/core/acc_dispatcher.sv @@ -199,7 +199,7 @@ module acc_dispatcher end // An accelerator instruction was issued. - if (acc_req_o.acc_req.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0; + if (acc_req_o.acc_req.req_valid) insn_ready_d[acc_req_o.acc_req.trans_id] = 1'b0; end : p_non_speculative_ff /************************* @@ -223,7 +223,7 @@ module acc_dispatcher .ready_o (acc_req_ready), .data_o (acc_req_int), .valid_o (acc_req_o.acc_req.req_valid), - .ready_i (acc_resp_i.req_ready) + .ready_i (acc_resp_i.acc_resp.req_ready) ); assign acc_req_o.acc_req.insn = acc_req_int.insn; From 8ca94e1b086fce260a4c5f5f7683f1693f977f7e Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Wed, 6 Nov 2024 17:48:46 +0100 Subject: [PATCH 08/13] icache: guard user-icache-line assignments with FETCH_USER_EN --- core/cache_subsystem/cva6_icache.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/cache_subsystem/cva6_icache.sv b/core/cache_subsystem/cva6_icache.sv index d97fed369a..a173af54ce 100644 --- a/core/cache_subsystem/cva6_icache.sv +++ b/core/cache_subsystem/cva6_icache.sv @@ -426,7 +426,7 @@ module cva6_icache for (genvar i = 0; i < CVA6Cfg.ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH]; - assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH]; + assign cl_user[i] = CVA6Cfg.FETCH_USER_EN ? cl_ruser[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH] : '0; end @@ -441,10 +441,10 @@ module cva6_icache always_comb begin if (cmp_en_q) begin dreq_o.data = cl_sel[hit_idx]; - dreq_o.user = cl_user[hit_idx]; + dreq_o.user = CVA6Cfg.FETCH_USER_EN ? cl_user[hit_idx] : '0; end else begin dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH]; - dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH]; + dreq_o.user = CVA6Cfg.FETCH_USER_EN ? mem_rtrn_i.user[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH] : '0; end end From bf13267ab38f1c2edfc4bc9946d1332b17755e38 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Wed, 6 Nov 2024 17:49:16 +0100 Subject: [PATCH 09/13] cv64a6_imafdcv_sv39_config_pkg: use safe value of unused feature to avoid warnings --- core/include/cv64a6_imafdcv_sv39_config_pkg.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv index 2c642f5345..28291d5774 100644 --- a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv +++ b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv @@ -32,7 +32,7 @@ package cva6_config_pkg; localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigFetchUserEn = 0; - localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; + localparam CVA6ConfigFetchUserWidth = 1; // Just not to raise warnings localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; From cd91e7209974b9b4f89fb1ec9bcdcf892fe8e45d Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 11 Nov 2024 10:16:06 +0100 Subject: [PATCH 10/13] acc_dispatcher: cut combinatorial path from CVA6 to Ara --- core/acc_dispatcher.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv index 09f57d8c41..f1f13764e8 100644 --- a/core/acc_dispatcher.sv +++ b/core/acc_dispatcher.sv @@ -211,13 +211,11 @@ module acc_dispatcher logic acc_req_ready; accelerator_req_t acc_req_int; - fall_through_register #( + spill_register #( .T(accelerator_req_t) ) i_accelerator_req_register ( .clk_i (clk_i), .rst_ni (rst_ni), - .clr_i (1'b0), - .testmode_i(1'b0), .data_i (acc_req), .valid_i (acc_req_valid), .ready_o (acc_req_ready), From 477c1c223e8ba255d90ee1887059f6dcf6e540a2 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 11 Nov 2024 10:31:03 +0100 Subject: [PATCH 11/13] cv64a6_imafdcv_sv39: 2 commit ports by default --- core/include/cv64a6_imafdcv_sv39_config_pkg.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv index 28291d5774..f088ec0ea0 100644 --- a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv +++ b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv @@ -12,6 +12,8 @@ package cva6_config_pkg; localparam CVA6ConfigXlen = 64; + localparam CVA6ConfigNrCommitPorts = 2; + localparam CVA6ConfigRVF = 1; localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16AltEn = 0; @@ -77,7 +79,7 @@ package cva6_config_pkg; FpgaAlteraEn: bit'(0), // for Altera (only) TechnoCut: bit'(0), SuperscalarEn: bit'(0), - NrCommitPorts: unsigned'(1), + NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiIdWidth: unsigned'(CVA6ConfigAxiIdWidth), From ab64842d8880f70c99918552b1dc6812af7ad769 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Wed, 4 Dec 2024 22:40:03 +0100 Subject: [PATCH 12/13] core: re-parametrize the RVV interfaces --- core/cva6.sv | 65 ++++------------- core/include/build_config_pkg.sv | 8 +-- core/include/config_pkg.sv | 6 ++ .../include/cv64a6_imafdcv_sv39_config_pkg.sv | 72 ++++++++++++++++++- 4 files changed, 95 insertions(+), 56 deletions(-) diff --git a/core/cva6.sv b/core/cva6.sv index 593878009a..8f77964c6d 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -31,15 +31,7 @@ module cva6 rvfi_probes_instr_t instr; }, - // branchpredict scoreboard entry - // this is the struct which we will inject into the pipeline to guide the various - // units towards the correct branch decision and resolve - localparam type branchpredict_sbe_t = struct packed { - cf_t cf; // type of control flow prediction - logic [CVA6Cfg.VLEN-1:0] predict_address; // target address at which to jump, or not - }, - - localparam type exception_t = struct packed { + parameter type exception_t = struct packed { logic [CVA6Cfg.XLEN-1:0] cause; // cause of exception logic [CVA6Cfg.XLEN-1:0] tval; // additional information of causing exception (e.g.: instruction causing it), // address of LD/ST fault @@ -49,6 +41,14 @@ module cva6 logic valid; }, + // branchpredict scoreboard entry + // this is the struct which we will inject into the pipeline to guide the various + // units towards the correct branch decision and resolve + localparam type branchpredict_sbe_t = struct packed { + cf_t cf; // type of control flow prediction + logic [CVA6Cfg.VLEN-1:0] predict_address; // target address at which to jump, or not + }, + // cache request ports // I$ address translation requests localparam type icache_areq_t = struct packed { @@ -211,50 +211,13 @@ module cva6 logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] data_ruser; }, - // Accelerator - CVA6's - localparam type accelerator_req_t = struct packed { - logic req_valid; - logic resp_ready; - riscv::instruction_t insn; - logic [CVA6Cfg.XLEN-1:0] rs1; - logic [CVA6Cfg.XLEN-1:0] rs2; - fpnew_pkg::roundmode_e frm; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; - logic store_pending; - logic acc_cons_en; // Invalidation interface - logic inval_ready; // Invalidation interface - }, - - localparam type accelerator_resp_t = struct packed { - logic req_ready; - logic resp_valid; - logic [CVA6Cfg.XLEN-1:0] result; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; - exception_t exception; - logic store_pending; - logic store_complete; - logic load_complete; - logic [4:0] fflags; - logic fflags_valid; - logic inval_valid; // Invalidation interface - logic [63:0] inval_addr; // Invalidation interface - }, + // Accelerator - CVA6 + parameter type accelerator_req_t = logic, + parameter type accelerator_resp_t = logic, // Accelerator - CVA6's MMU - localparam type acc_mmu_req_t = struct packed { - logic acc_mmu_misaligned_ex; - logic acc_mmu_req; - logic [CVA6Cfg.VLEN-1:0] acc_mmu_vaddr; - logic acc_mmu_is_store; - }, - - localparam type acc_mmu_resp_t = struct packed { - logic acc_mmu_dtlb_hit; - logic [CVA6Cfg.PPNW-1:0] acc_mmu_dtlb_ppn; - logic acc_mmu_valid; - logic [CVA6Cfg.PLEN-1:0] acc_mmu_paddr; - exception_t acc_mmu_exception; - }, + parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_resp_t = logic, // AXI types parameter type axi_ar_chan_t = struct packed { diff --git a/core/include/build_config_pkg.sv b/core/include/build_config_pkg.sv index 5d4808bb1c..bb37c194ca 100644 --- a/core/include/build_config_pkg.sv +++ b/core/include/build_config_pkg.sv @@ -33,8 +33,8 @@ package build_config_pkg; cfg.XLEN = CVA6Cfg.XLEN; cfg.VLEN = CVA6Cfg.VLEN; - cfg.PLEN = (CVA6Cfg.XLEN == 32) ? 34 : 56; - cfg.GPLEN = (CVA6Cfg.XLEN == 32) ? 34 : 41; + cfg.PLEN = CVA6Cfg.PLEN; + cfg.GPLEN = CVA6Cfg.GPLEN; cfg.IS_XLEN32 = IS_XLEN32; cfg.IS_XLEN64 = IS_XLEN64; cfg.XLEN_ALIGN_BYTES = $clog2(CVA6Cfg.XLEN / 8); @@ -76,7 +76,7 @@ package build_config_pkg; cfg.RVZicntr = CVA6Cfg.RVZicntr; cfg.RVZihpm = CVA6Cfg.RVZihpm; cfg.NR_SB_ENTRIES = CVA6Cfg.NrScoreboardEntries; - cfg.TRANS_ID_BITS = $clog2(CVA6Cfg.NrScoreboardEntries); + cfg.TRANS_ID_BITS = CVA6Cfg.TRANS_ID_BITS; cfg.FpPresent = bit'(FpPresent); cfg.NSX = bit'(NSX); @@ -156,7 +156,7 @@ package build_config_pkg; cfg.ModeW = (CVA6Cfg.XLEN == 32) ? 1 : 4; cfg.ASIDW = (CVA6Cfg.XLEN == 32) ? 9 : 16; cfg.VMIDW = (CVA6Cfg.XLEN == 32) ? 7 : 14; - cfg.PPNW = (CVA6Cfg.XLEN == 32) ? 22 : 44; + cfg.PPNW = CVA6Cfg.PPNW; cfg.GPPNW = (CVA6Cfg.XLEN == 32) ? 22 : 29; cfg.MODE_SV = (CVA6Cfg.XLEN == 32) ? config_pkg::ModeSv32 : config_pkg::ModeSv39; cfg.SV = (cfg.MODE_SV == config_pkg::ModeSv32) ? 32 : 39; diff --git a/core/include/config_pkg.sv b/core/include/config_pkg.sv index d711d5f262..13e0f2efdd 100644 --- a/core/include/config_pkg.sv +++ b/core/include/config_pkg.sv @@ -50,6 +50,10 @@ package config_pkg; int unsigned XLEN; // Virtual address Size (in bits) int unsigned VLEN; + // + int unsigned PLEN; + int unsigned GPLEN; + int unsigned PPNW; // Atomic RISC-V extension bit RVA; // Bit manipulation RISC-V extension @@ -202,6 +206,8 @@ package config_pkg; bit unsigned UseSharedTlb; // MMU depth of shared TLB int unsigned SharedTlbDepth; + // + int unsigned TRANS_ID_BITS; } cva6_user_cfg_t; typedef struct packed { diff --git a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv index f088ec0ea0..3791c83a4e 100644 --- a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv +++ b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv @@ -75,6 +75,9 @@ package cva6_config_pkg; localparam config_pkg::cva6_user_cfg_t cva6_cfg = '{ XLEN: unsigned'(CVA6ConfigXlen), VLEN: unsigned'(64), + PLEN: unsigned'(56), + GPLEN: unsigned'(41), + PPNW: unsigned'(44), FpgaEn: bit'(0), // for Xilinx and Altera FpgaAlteraEn: bit'(0), // for Altera (only) TechnoCut: bit'(0), @@ -150,6 +153,73 @@ package cva6_config_pkg; SharedTlbDepth: int'(64), NrLoadPipeRegs: int'(CVA6ConfigNrLoadPipeRegs), NrStorePipeRegs: int'(CVA6ConfigNrStorePipeRegs), - DcacheIdWidth: int'(CVA6ConfigDcacheIdWidth) + DcacheIdWidth: int'(CVA6ConfigDcacheIdWidth), + TRANS_ID_BITS: $clog2(unsigned'(CVA6ConfigNrScoreboardEntries)) }; + + typedef struct packed { + logic [cva6_cfg.XLEN-1:0] cause; // cause of exception + logic [cva6_cfg.XLEN-1:0] tval; // additional information of causing exception (e.g.: instruction causing it), + // address of LD/ST fault + logic [cva6_cfg.GPLEN-1:0] tval2; // additional information when the causing exception in a guest exception + logic [31:0] tinst; // transformed instruction information + logic gva; // signals when a guest virtual address is written to tval + logic valid; + } exception_t; + + // Accelerator - CVA6's + typedef struct packed { + logic req_valid; + logic resp_ready; + logic [31:0] insn; + logic [cva6_cfg.XLEN-1:0] rs1; + logic [cva6_cfg.XLEN-1:0] rs2; + fpnew_pkg::roundmode_e frm; + logic [cva6_cfg.TRANS_ID_BITS-1:0] trans_id; + logic store_pending; + logic acc_cons_en; // Invalidation interface + logic inval_ready; // Invalidation interface + } accelerator_req_t; + + typedef struct packed { + logic req_ready; + logic resp_valid; + logic [cva6_cfg.XLEN-1:0] result; + logic [cva6_cfg.TRANS_ID_BITS-1:0] trans_id; + exception_t exception; + logic store_pending; + logic store_complete; + logic load_complete; + logic [4:0] fflags; + logic fflags_valid; + logic inval_valid; // Invalidation interface + logic [63:0] inval_addr; // Invalidation interface + } accelerator_resp_t; + + // Accelerator - CVA6's MMU + typedef struct packed { + logic acc_mmu_misaligned_ex; + logic acc_mmu_req; + logic [cva6_cfg.VLEN-1:0] acc_mmu_vaddr; + logic acc_mmu_is_store; + } acc_mmu_req_t; + + typedef struct packed { + logic acc_mmu_dtlb_hit; + logic [cva6_cfg.PPNW-1:0] acc_mmu_dtlb_ppn; + logic acc_mmu_valid; + logic [cva6_cfg.PLEN-1:0] acc_mmu_paddr; + exception_t acc_mmu_exception; + } acc_mmu_resp_t; + + typedef struct packed { + accelerator_req_t acc_req; // Insn/mem + logic acc_mmu_en; // MMU + acc_mmu_resp_t acc_mmu_resp; // MMU + } cva6_to_acc_t; + + typedef struct packed { + accelerator_resp_t acc_resp; // Insn/mem + acc_mmu_req_t acc_mmu_req; // MMU + } acc_to_cva6_t; endpackage From d960b715fbbbdca7f8d8bf1c89e61c1d6dfc84d7 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Mon, 16 Dec 2024 14:04:17 +0100 Subject: [PATCH 13/13] treewide: verible pass --- core/acc_dispatcher.sv | 42 ++++---- core/cache_subsystem/cva6_icache.sv | 4 +- core/cache_subsystem/wt_dcache_missunit.sv | 4 +- core/cache_subsystem/wt_dcache_wbuffer.sv | 8 +- core/cva6.sv | 4 +- core/cva6_mmu/cva6_shared_tlb.sv | 6 +- core/ex_stage.sv | 4 +- core/frontend/frontend.sv | 3 +- core/include/build_config_pkg.sv | 6 +- .../include/cv64a6_imafdcv_sv39_config_pkg.sv | 40 +++---- core/load_store_unit.sv | 102 +++++++++--------- core/pmp/tb/tb_pkg.sv | 3 +- core/scoreboard.sv | 3 +- 13 files changed, 120 insertions(+), 109 deletions(-) diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv index f1f13764e8..0f6bfdf6c9 100644 --- a/core/acc_dispatcher.sv +++ b/core/acc_dispatcher.sv @@ -27,7 +27,7 @@ module acc_dispatcher parameter type acc_resp_t = logic, parameter type accelerator_req_t = logic, parameter type accelerator_resp_t = logic, - parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_req_t = logic, parameter type acc_mmu_resp_t = logic, parameter type acc_cfg_t = logic, parameter acc_cfg_t AccCfg = '0 @@ -207,21 +207,21 @@ module acc_dispatcher *************************/ accelerator_req_t acc_req; - logic acc_req_valid; - logic acc_req_ready; + logic acc_req_valid; + logic acc_req_ready; accelerator_req_t acc_req_int; spill_register #( .T(accelerator_req_t) ) i_accelerator_req_register ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .data_i (acc_req), - .valid_i (acc_req_valid), - .ready_o (acc_req_ready), - .data_o (acc_req_int), - .valid_o (acc_req_o.acc_req.req_valid), - .ready_i (acc_resp_i.acc_resp.req_ready) + .clk_i (clk_i), + .rst_ni (rst_ni), + .data_i (acc_req), + .valid_i(acc_req_valid), + .ready_o(acc_req_ready), + .data_o (acc_req_int), + .valid_o(acc_req_o.acc_req.req_valid), + .ready_i(acc_resp_i.acc_resp.req_ready) ); assign acc_req_o.acc_req.insn = acc_req_int.insn; @@ -234,8 +234,8 @@ module acc_dispatcher assign acc_req_o.acc_req.inval_ready = inval_ready_i; // MMU interface - assign acc_req_o.acc_mmu_resp = acc_mmu_resp_i; - assign acc_req_o.acc_mmu_en = acc_mmu_en_i; + assign acc_req_o.acc_mmu_resp = acc_mmu_resp_i; + assign acc_req_o.acc_mmu_en = acc_mmu_en_i; always_comb begin : accelerator_req_dispatcher // Do not fetch from the instruction queue @@ -279,13 +279,13 @@ module acc_dispatcher logic acc_ld_disp; logic acc_st_disp; - assign acc_trans_id_o = acc_resp_i.acc_resp.trans_id; - assign acc_result_o = acc_resp_i.acc_resp.result; - assign acc_valid_o = acc_resp_i.acc_resp.resp_valid; - assign acc_exception_o = acc_resp_i.acc_resp.exception; + assign acc_trans_id_o = acc_resp_i.acc_resp.trans_id; + assign acc_result_o = acc_resp_i.acc_resp.result; + assign acc_valid_o = acc_resp_i.acc_resp.resp_valid; + assign acc_exception_o = acc_resp_i.acc_resp.exception; // Unpack the accelerator response assign acc_fflags_valid_o = acc_resp_i.acc_resp.fflags_valid; - assign acc_fflags_o = acc_resp_i.acc_resp.fflags; + assign acc_fflags_o = acc_resp_i.acc_resp.fflags; // MMU interface assign acc_mmu_req_o = acc_resp_i.acc_mmu_req; @@ -294,8 +294,8 @@ module acc_dispatcher assign acc_req_o.acc_req.resp_ready = 1'b1; // Signal dispatched load/store to issue stage - assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); - assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); + assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); + assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); // Cache invalidation assign inval_valid_o = acc_resp_i.acc_resp.inval_valid; @@ -338,7 +338,7 @@ module acc_dispatcher // Set on store barrier. Clear when no store is pending. assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.acc_resp.store_pending; - assign ctrl_halt_o = wait_acc_store_q; + assign ctrl_halt_o = wait_acc_store_q; /************************** * Load/Store tracking * diff --git a/core/cache_subsystem/cva6_icache.sv b/core/cache_subsystem/cva6_icache.sv index a173af54ce..be7becb0a0 100644 --- a/core/cache_subsystem/cva6_icache.sv +++ b/core/cache_subsystem/cva6_icache.sv @@ -424,8 +424,8 @@ module cva6_icache logic [CVA6Cfg.ICACHE_SET_ASSOC_WIDTH-1:0] hit_idx; for (genvar i = 0; i < CVA6Cfg.ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel - assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; - assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH]; + assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; + assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH]; assign cl_user[i] = CVA6Cfg.FETCH_USER_EN ? cl_ruser[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH] : '0; end diff --git a/core/cache_subsystem/wt_dcache_missunit.sv b/core/cache_subsystem/wt_dcache_missunit.sv index 5eb202e08e..59f637fe7e 100644 --- a/core/cache_subsystem/wt_dcache_missunit.sv +++ b/core/cache_subsystem/wt_dcache_missunit.sv @@ -91,8 +91,8 @@ module wt_dcache_missunit // 010: word // 011: dword // 111: DCACHE line - function automatic logic [CVA6Cfg.PLEN-1:0] paddrSizeAlign(input logic [CVA6Cfg.PLEN-1:0] paddr, - input logic [2:0] size); + function automatic logic [CVA6Cfg.PLEN-1:0] paddrSizeAlign( + input logic [CVA6Cfg.PLEN-1:0] paddr, input logic [2:0] size); logic [CVA6Cfg.PLEN-1:0] out; out = paddr; unique case (size) diff --git a/core/cache_subsystem/wt_dcache_wbuffer.sv b/core/cache_subsystem/wt_dcache_wbuffer.sv index be2029952c..841dea9cbd 100644 --- a/core/cache_subsystem/wt_dcache_wbuffer.sv +++ b/core/cache_subsystem/wt_dcache_wbuffer.sv @@ -136,8 +136,8 @@ module wt_dcache_wbuffer // openpiton requires the data to be replicated in case of smaller sizes than dwords function automatic logic [CVA6Cfg.XLEN-1:0] repData64( - input logic [CVA6Cfg.XLEN-1:0] data, input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, - input logic [1:0] size); + input logic [CVA6Cfg.XLEN-1:0] data, + input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); logic [CVA6Cfg.XLEN-1:0] out; unique case (size) 2'b00: for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8]; // byte @@ -149,8 +149,8 @@ module wt_dcache_wbuffer endfunction : repData64 function automatic logic [CVA6Cfg.XLEN-1:0] repData32( - input logic [CVA6Cfg.XLEN-1:0] data, input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, - input logic [1:0] size); + input logic [CVA6Cfg.XLEN-1:0] data, + input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); logic [CVA6Cfg.XLEN-1:0] out; unique case (size) 2'b00: for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8]; // byte diff --git a/core/cva6.sv b/core/cva6.sv index 8f77964c6d..c1c9df5f0c 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -212,11 +212,11 @@ module cva6 }, // Accelerator - CVA6 - parameter type accelerator_req_t = logic, + parameter type accelerator_req_t = logic, parameter type accelerator_resp_t = logic, // Accelerator - CVA6's MMU - parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_req_t = logic, parameter type acc_mmu_resp_t = logic, // AXI types diff --git a/core/cva6_mmu/cva6_shared_tlb.sv b/core/cva6_mmu/cva6_shared_tlb.sv index 729d2194c5..2654cdb7f1 100644 --- a/core/cva6_mmu/cva6_shared_tlb.sv +++ b/core/cva6_mmu/cva6_shared_tlb.sv @@ -94,7 +94,8 @@ module cva6_shared_tlb #( shared_tag_t shared_tag_wr; shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd; - logic [CVA6Cfg.SharedTlbDepth-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d; + logic [CVA6Cfg.SharedTlbDepth-1:0][SHARED_TLB_WAYS-1:0] + shared_tag_valid_q, shared_tag_valid_d; logic [ SHARED_TLB_WAYS-1:0] shared_tag_valid; @@ -122,7 +123,8 @@ module cva6_shared_tlb #( logic [ SHARED_TLB_WAYS-1:0] pte_we; logic [$clog2(CVA6Cfg.SharedTlbDepth)-1:0] pte_addr; - logic [CVA6Cfg.PtLevels+HYP_EXT-1:0][(CVA6Cfg.VpnLen/CVA6Cfg.PtLevels)-1:0] vpn_d, vpn_q; + logic [CVA6Cfg.PtLevels+HYP_EXT-1:0][(CVA6Cfg.VpnLen/CVA6Cfg.PtLevels)-1:0] + vpn_d, vpn_q; logic [SHARED_TLB_WAYS-1:0][CVA6Cfg.PtLevels-1:0] vpn_match; logic [SHARED_TLB_WAYS-1:0][CVA6Cfg.PtLevels-1:0] page_match; logic [SHARED_TLB_WAYS-1:0][CVA6Cfg.PtLevels-1:0] level_match; diff --git a/core/ex_stage.sv b/core/ex_stage.sv index b1a9984bc6..8570b02f73 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -30,7 +30,7 @@ module ex_stage parameter type icache_drsp_t = logic, parameter type lsu_ctrl_t = logic, parameter type x_result_t = logic, - parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_req_t = logic, parameter type acc_mmu_resp_t = logic ) ( // Subsystem Clock - SUBSYSTEM @@ -162,7 +162,7 @@ module ex_stage // accelerate port result is valid - ACC_DISPATCHER input logic acc_valid_i, // Accelerator MMU access - input acc_mmu_req_t acc_mmu_req_i, + input acc_mmu_req_t acc_mmu_req_i, output acc_mmu_resp_t acc_mmu_resp_o, // Enable virtual memory translation - CSR_REGFILE input logic enable_translation_i, diff --git a/core/frontend/frontend.sv b/core/frontend/frontend.sv index 3117e27654..99a1b4c93d 100644 --- a/core/frontend/frontend.sv +++ b/core/frontend/frontend.sv @@ -127,7 +127,8 @@ module frontend logic [CVA6Cfg.INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump; logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] rvi_imm; // RVC branching - logic [CVA6Cfg.INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] + rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call; logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] rvc_imm; // re-aligned instruction and address (coming from cache - combinationally) logic [CVA6Cfg.INSTR_PER_FETCH-1:0][ 31:0] instr; diff --git a/core/include/build_config_pkg.sv b/core/include/build_config_pkg.sv index bb37c194ca..94bae36d8f 100644 --- a/core/include/build_config_pkg.sv +++ b/core/include/build_config_pkg.sv @@ -123,7 +123,8 @@ package build_config_pkg; cfg.AxiBurstWriteEn = CVA6Cfg.AxiBurstWriteEn; cfg.ICACHE_SET_ASSOC = CVA6Cfg.IcacheSetAssoc; - cfg.ICACHE_SET_ASSOC_WIDTH = CVA6Cfg.IcacheSetAssoc > 1 ? $clog2(CVA6Cfg.IcacheSetAssoc) : CVA6Cfg.IcacheSetAssoc; + cfg.ICACHE_SET_ASSOC_WIDTH = CVA6Cfg.IcacheSetAssoc > 1 ? $clog2(CVA6Cfg.IcacheSetAssoc) : + CVA6Cfg.IcacheSetAssoc; cfg.ICACHE_INDEX_WIDTH = ICACHE_INDEX_WIDTH; cfg.ICACHE_TAG_WIDTH = cfg.PLEN - ICACHE_INDEX_WIDTH; cfg.ICACHE_LINE_WIDTH = CVA6Cfg.IcacheLineWidth; @@ -131,7 +132,8 @@ package build_config_pkg; cfg.DCacheType = CVA6Cfg.DCacheType; cfg.DcacheIdWidth = CVA6Cfg.DcacheIdWidth; cfg.DCACHE_SET_ASSOC = CVA6Cfg.DcacheSetAssoc; - cfg.DCACHE_SET_ASSOC_WIDTH = CVA6Cfg.DcacheSetAssoc > 1 ? $clog2(CVA6Cfg.DcacheSetAssoc) : CVA6Cfg.DcacheSetAssoc; + cfg.DCACHE_SET_ASSOC_WIDTH = CVA6Cfg.DcacheSetAssoc > 1 ? $clog2(CVA6Cfg.DcacheSetAssoc) : + CVA6Cfg.DcacheSetAssoc; cfg.DCACHE_INDEX_WIDTH = DCACHE_INDEX_WIDTH; cfg.DCACHE_TAG_WIDTH = cfg.PLEN - DCACHE_INDEX_WIDTH; cfg.DCACHE_LINE_WIDTH = CVA6Cfg.DcacheLineWidth; diff --git a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv index 3791c83a4e..1c24181b8a 100644 --- a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv +++ b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv @@ -34,7 +34,7 @@ package cva6_config_pkg; localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigFetchUserEn = 0; - localparam CVA6ConfigFetchUserWidth = 1; // Just not to raise warnings + localparam CVA6ConfigFetchUserWidth = 1; // Just not to raise warnings localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; @@ -177,23 +177,23 @@ package cva6_config_pkg; fpnew_pkg::roundmode_e frm; logic [cva6_cfg.TRANS_ID_BITS-1:0] trans_id; logic store_pending; - logic acc_cons_en; // Invalidation interface - logic inval_ready; // Invalidation interface + logic acc_cons_en; // Invalidation interface + logic inval_ready; // Invalidation interface } accelerator_req_t; typedef struct packed { - logic req_ready; - logic resp_valid; - logic [cva6_cfg.XLEN-1:0] result; - logic [cva6_cfg.TRANS_ID_BITS-1:0] trans_id; - exception_t exception; - logic store_pending; - logic store_complete; - logic load_complete; - logic [4:0] fflags; - logic fflags_valid; - logic inval_valid; // Invalidation interface - logic [63:0] inval_addr; // Invalidation interface + logic req_ready; + logic resp_valid; + logic [cva6_cfg.XLEN-1:0] result; + logic [cva6_cfg.TRANS_ID_BITS-1:0] trans_id; + exception_t exception; + logic store_pending; + logic store_complete; + logic load_complete; + logic [4:0] fflags; + logic fflags_valid; + logic inval_valid; // Invalidation interface + logic [63:0] inval_addr; // Invalidation interface } accelerator_resp_t; // Accelerator - CVA6's MMU @@ -213,13 +213,13 @@ package cva6_config_pkg; } acc_mmu_resp_t; typedef struct packed { - accelerator_req_t acc_req; // Insn/mem - logic acc_mmu_en; // MMU - acc_mmu_resp_t acc_mmu_resp; // MMU + accelerator_req_t acc_req; // Insn/mem + logic acc_mmu_en; // MMU + acc_mmu_resp_t acc_mmu_resp; // MMU } cva6_to_acc_t; typedef struct packed { - accelerator_resp_t acc_resp; // Insn/mem - acc_mmu_req_t acc_mmu_req; // MMU + accelerator_resp_t acc_resp; // Insn/mem + acc_mmu_req_t acc_mmu_req; // MMU } acc_to_cva6_t; endpackage diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index 978a18d471..5134e13d5b 100644 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -26,7 +26,7 @@ module load_store_unit parameter type icache_dreq_t = logic, parameter type icache_drsp_t = logic, parameter type lsu_ctrl_t = logic, - parameter type acc_mmu_req_t = logic, + parameter type acc_mmu_req_t = logic, parameter type acc_mmu_resp_t = logic ) ( // Subsystem Clock - SUBSYSTEM @@ -85,7 +85,7 @@ module load_store_unit input logic en_ld_st_g_translation_i, // Accelerator request for CVA6's MMU - input acc_mmu_req_t acc_mmu_req_i, + input acc_mmu_req_t acc_mmu_req_i, output acc_mmu_resp_t acc_mmu_resp_o, // Instruction cache input request - CACHES @@ -165,26 +165,26 @@ module load_store_unit ); // data is misaligned - logic data_misaligned; + logic data_misaligned; // -------------------------------------- // 1st register stage - (stall registers) // -------------------------------------- // those are the signals which are always correct // e.g.: they keep the value in the stall case - lsu_ctrl_t lsu_ctrl, lsu_ctrl_byp; + lsu_ctrl_t lsu_ctrl, lsu_ctrl_byp; - logic pop_st; - logic pop_ld; + logic pop_st; + logic pop_ld; // ------------------------------ // Address Generation Unit (AGU) // ------------------------------ // virtual address as calculated by the AGU in the first cycle - logic [ CVA6Cfg.VLEN-1:0] vaddr_i; - logic [ CVA6Cfg.XLEN-1:0] vaddr_xlen; - logic overflow; - logic g_overflow; - logic [(CVA6Cfg.XLEN/8)-1:0] be_i; + logic [ CVA6Cfg.VLEN-1:0] vaddr_i; + logic [ CVA6Cfg.XLEN-1:0] vaddr_xlen; + logic overflow; + logic g_overflow; + logic [(CVA6Cfg.XLEN/8)-1:0] be_i; assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a)); assign vaddr_i = vaddr_xlen[CVA6Cfg.VLEN-1:0]; @@ -196,10 +196,10 @@ module load_store_unit assign g_overflow = 1'b0; end - logic st_valid_i; - logic ld_valid_i; - logic ld_translation_req; - logic st_translation_req, cva6_st_translation_req, acc_st_translation_req; + logic st_valid_i; + logic ld_valid_i; + logic ld_translation_req; + logic st_translation_req, cva6_st_translation_req, acc_st_translation_req; logic [CVA6Cfg.VLEN-1:0] ld_vaddr; logic [ 31:0] ld_tinst; logic ld_hs_ld_st_inst; @@ -208,36 +208,36 @@ module load_store_unit logic [ 31:0] st_tinst; logic st_hs_ld_st_inst; logic st_hlvx_inst; - logic translation_req, cva6_translation_req, acc_translation_req; - logic translation_valid, cva6_translation_valid, acc_translataion_valid; + logic translation_req, cva6_translation_req, acc_translation_req; + logic translation_valid, cva6_translation_valid, acc_translataion_valid; logic [CVA6Cfg.VLEN-1:0] mmu_vaddr, cva6_mmu_vaddr, acc_mmu_vaddr; logic [CVA6Cfg.PLEN-1:0] mmu_paddr, cva6_mmu_paddr, acc_mmu_paddr, lsu_paddr; - logic [ 31:0] mmu_tinst; - logic mmu_hs_ld_st_inst; - logic mmu_hlvx_inst; - exception_t mmu_exception, cva6_mmu_exception, acc_mmu_exception; - exception_t pmp_exception; - icache_areq_t pmp_icache_areq_i; - logic pmp_translation_valid; - logic dtlb_hit, cva6_dtlb_hit, acc_dtlb_hit; - logic [ CVA6Cfg.PPNW-1:0] dtlb_ppn, cva6_dtlb_ppn, acc_dtlb_ppn; - - logic ld_valid; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] ld_trans_id; - logic [ CVA6Cfg.XLEN-1:0] ld_result; - logic st_valid; - logic [CVA6Cfg.TRANS_ID_BITS-1:0] st_trans_id; - logic [ CVA6Cfg.XLEN-1:0] st_result; - - logic [ 11:0] page_offset; - logic page_offset_matches; - - exception_t misaligned_exception, cva6_misaligned_exception, acc_misaligned_exception; - exception_t ld_ex; - exception_t st_ex; - - logic hs_ld_st_inst; - logic hlvx_inst; + logic [31:0] mmu_tinst; + logic mmu_hs_ld_st_inst; + logic mmu_hlvx_inst; + exception_t mmu_exception, cva6_mmu_exception, acc_mmu_exception; + exception_t pmp_exception; + icache_areq_t pmp_icache_areq_i; + logic pmp_translation_valid; + logic dtlb_hit, cva6_dtlb_hit, acc_dtlb_hit; + logic [CVA6Cfg.PPNW-1:0] dtlb_ppn, cva6_dtlb_ppn, acc_dtlb_ppn; + + logic ld_valid; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] ld_trans_id; + logic [ CVA6Cfg.XLEN-1:0] ld_result; + logic st_valid; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] st_trans_id; + logic [ CVA6Cfg.XLEN-1:0] st_result; + + logic [ 11:0] page_offset; + logic page_offset_matches; + + exception_t misaligned_exception, cva6_misaligned_exception, acc_misaligned_exception; + exception_t ld_ex; + exception_t st_ex; + + logic hs_ld_st_inst; + logic hlvx_inst; logic [1:0] sum, mxr; logic [CVA6Cfg.PPNW-1:0] satp_ppn[2:0]; logic [CVA6Cfg.ASID_WIDTH-1:0] asid[2:0], asid_to_be_flushed[1:0]; @@ -399,7 +399,11 @@ module load_store_unit if (CVA6Cfg.EnableAccelerator) begin // The MMU can be connected to CVA6 or the ACCELERATOR - enum logic {CVA6, ACC} mmu_state_d, mmu_state_q; + enum logic { + CVA6, + ACC + } + mmu_state_d, mmu_state_q; always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin mmu_state_q <= CVA6; @@ -411,7 +415,7 @@ module load_store_unit // This logic can be optimized to reduce answer latency and contention always_comb begin // Maintain state - mmu_state_d = mmu_state_q; + mmu_state_d = mmu_state_q; // Serve CVA6 and gate the accelerator by default // MMU input misaligned_exception = cva6_misaligned_exception; @@ -487,9 +491,9 @@ module load_store_unit assign cva6_dtlb_hit = dtlb_hit; assign cva6_dtlb_ppn = dtlb_ppn; // No accelerator - assign acc_mmu_resp_o = '0; + assign acc_mmu_resp_o = '0; // Feed forward the lsu_ctrl bypass - assign lsu_ctrl = lsu_ctrl_byp; + assign lsu_ctrl = lsu_ctrl_byp; end logic store_buffer_empty; @@ -614,8 +618,8 @@ module load_store_unit // determine whether this is a load or store always_comb begin : which_op - ld_valid_i = 1'b0; - st_valid_i = 1'b0; + ld_valid_i = 1'b0; + st_valid_i = 1'b0; cva6_translation_req = 1'b0; cva6_mmu_vaddr = {CVA6Cfg.VLEN{1'b0}}; diff --git a/core/pmp/tb/tb_pkg.sv b/core/pmp/tb/tb_pkg.sv index 9bcac61f7d..56127db85d 100644 --- a/core/pmp/tb/tb_pkg.sv +++ b/core/pmp/tb/tb_pkg.sv @@ -18,7 +18,8 @@ package tb_pkg; parameter WIDTH = 32, parameter PMP_LEN = 32 ); - static function logic [PMP_LEN-1:0] base_to_conf(logic [WIDTH-1:0] base, int unsigned size_i); + static function logic [PMP_LEN-1:0] base_to_conf(logic [WIDTH-1:0] base, + int unsigned size_i); logic [PMP_LEN-1:0] pmp_reg; pmp_reg = '0; diff --git a/core/scoreboard.sv b/core/scoreboard.sv index 6a1b3d094f..97b1bc800e 100644 --- a/core/scoreboard.sv +++ b/core/scoreboard.sv @@ -111,7 +111,8 @@ module scoreboard #( logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer_n, issue_pointer_q; logic [CVA6Cfg.NrIssuePorts:0][CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer; - logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] commit_pointer_n, commit_pointer_q; + logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] + commit_pointer_n, commit_pointer_q; logic [$clog2(CVA6Cfg.NrCommitPorts):0] num_commit; for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin