diff --git a/core/commit_stage.sv b/core/commit_stage.sv index d0dcc25be0..112641266b 100644 --- a/core/commit_stage.sv +++ b/core/commit_stage.sv @@ -36,13 +36,13 @@ module commit_stage input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // Acknowledge that we are indeed committing - ISSUE_STAGE output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, - // Register file write address - ID_STAGE + // Register file write address - ISSUE_STAGE output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, - // Register file write data - ID_STAGE + // Register file write data - ISSUE_STAGE output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, - // Register file write enable - ID_STAGE + // Register file write enable - ISSUE_STAGE output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, - // Floating point register enable - ID_STAGE + // Floating point register enable - ISSUE_STAGE output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // Result of AMO operation - CACHE input amo_resp_t amo_resp_i, diff --git a/core/compressed_decoder.sv b/core/compressed_decoder.sv index c218a83d3b..2c0a527172 100644 --- a/core/compressed_decoder.sv +++ b/core/compressed_decoder.sv @@ -22,9 +22,13 @@ module compressed_decoder #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty ) ( + // Input instruction coming from fetch stage - FRONTEND input logic [31:0] instr_i, + // Output instruction in uncompressed format - decoder output logic [31:0] instr_o, + // Input instruction is illegal - decoder output logic illegal_instr_o, + // Output instruction is compressed - decoder output logic is_compressed_o ); diff --git a/core/decoder.sv b/core/decoder.sv index 97812bffdc..0ebf6b9819 100644 --- a/core/decoder.sv +++ b/core/decoder.sv @@ -24,28 +24,48 @@ module decoder #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty ) ( - input logic debug_req_i, // external debug request - input logic [riscv::VLEN-1:0] pc_i, // PC from IF - input logic is_compressed_i, // is a compressed instruction - input logic [15:0] compressed_instr_i, // compressed form of instruction - input logic is_illegal_i, // illegal compressed instruction - input logic [31:0] instruction_i, // instruction from IF + // Debug (async) request - SUBSYSTEM + input logic debug_req_i, + // PC from fetch stage - FRONTEND + input logic [riscv::VLEN-1:0] pc_i, + // Is a compressed instruction - compressed_decoder + input logic is_compressed_i, + // Compressed form of instruction - FRONTEND + input logic [15:0] compressed_instr_i, + // Illegal compressed instruction - compressed_decoder + input logic is_illegal_i, + // Instruction from fetch stage - FRONTEND + input logic [31:0] instruction_i, + // Is a branch predict instruction - FRONTEND input branchpredict_sbe_t branch_predict_i, - input exception_t ex_i, // if an exception occured in if - input logic [1:0] irq_i, // external interrupt - input irq_ctrl_t irq_ctrl_i, // interrupt control and status information from CSRs - // From CSR - input riscv::priv_lvl_t priv_lvl_i, // current privilege level - input logic debug_mode_i, // we are in debug mode - input riscv::xs_t fs_i, // floating point extension status - input logic [2:0] frm_i, // floating-point dynamic rounding mode - input riscv::xs_t vs_i, // vector extension status - input logic tvm_i, // trap virtual memory - input logic tw_i, // timeout wait - input logic tsr_i, // trap sret - output scoreboard_entry_t instruction_o, // scoreboard entry to scoreboard - output logic [31:0] orig_instr_o, // instruction opcode to issue read operand for CVXIF - output logic is_control_flow_instr_o // this instruction will change the control flow + // If an exception occured in fetch stage - FRONTEND + input exception_t ex_i, + // Level sensitive (async) interrupts - SUBSYSTEM + input logic [1:0] irq_i, + // Interrupt control status - CSR_REGFILE + input irq_ctrl_t irq_ctrl_i, + // Current privilege level - CSR_REGFILE + input riscv::priv_lvl_t priv_lvl_i, + // Is debug mode - CSR_REGFILE + input logic debug_mode_i, + // Floating point extension status - CSR_REGFILE + input riscv::xs_t fs_i, + // Floating-point dynamic rounding mode - CSR_REGFILE + input logic [2:0] frm_i, + // Vector extension status - CSR_REGFILE + input riscv::xs_t vs_i, + // Trap virtual memory - CSR_REGFILE + input logic tvm_i, + // Timeout wait - CSR_REGFILE + input logic tw_i, + // Trap sret - CSR_REGFILE + input logic tsr_i, + // Instruction to be added to scoreboard entry - ISSUE_STAGE + output scoreboard_entry_t instruction_o, + // Instruction - ISSUE_STAGE + output logic [31:0] orig_instr_o, + // Is a control flow instruction - ISSUE_STAGE + output logic is_control_flow_instr_o ); logic illegal_instr; logic illegal_instr_bm; diff --git a/core/ex_stage.sv b/core/ex_stage.sv index 2ce58a43db..e31f7b2670 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -28,67 +28,67 @@ module ex_stage input logic flush_i, // TO_BE_COMPLETED - CSR_REGFILE input logic debug_mode_i, - // TO_BE_COMPLETED - ID_STAGE + // rs1 forwarding - ISSUE_STAGE input logic [riscv::VLEN-1:0] rs1_forwarding_i, - // TO_BE_COMPLETED - ID_STAGE + // rs2 forwarding - ISSUE_STAGE input logic [riscv::VLEN-1:0] rs2_forwarding_i, - // TO_BE_COMPLETED - ID_STAGE + // FU data useful to execute instruction - ISSUE_STAGE input fu_data_t fu_data_i, - // PC of the current instruction - ID_STAGE + // PC of the current instruction - ISSUE_STAGE input logic [riscv::VLEN-1:0] pc_i, - // Report whether isntruction is compressed - ID_STAGE + // Report whether isntruction is compressed - ISSUE_STAGE input logic is_compressed_instr_i, - // TO_BE_COMPLETED - ID_STAGE + // TO_BE_COMPLETED - ISSUE_STAGE output riscv::xlen_t flu_result_o, - // ID of the scoreboard entry at which a=to write back - ID_STAGE + // ID of the scoreboard entry at which a=to write back - ISSUE_STAGE output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, - // TO_BE_COMPLETED - ID_STAGE + // TO_BE_COMPLETED - ISSUE_STAGE output exception_t flu_exception_o, - // FLU is ready - ID_STAGE + // FLU is ready - ISSUE_STAGE output logic flu_ready_o, - // FLU result is valid - ID_STAGE + // FLU result is valid - ISSUE_STAGE output logic flu_valid_o, - // ALU result is valid - ID_STAGE + // ALU instruction is valid - ISSUE_STAGE input logic alu_valid_i, - // Branch unit result is valid - ID_STAGE + // Branch unit instruction is valid - ISSUE_STAGE input logic branch_valid_i, - // TO_BE_COMPLETED - ID_STAGE + // Information of branch prediction - ISSUE_STAGE input branchpredict_sbe_t branch_predict_i, - // The branch engine uses the write back from the ALU - CONTROLLER PERF_COUNTER FRONTEND ISSUE_STAGE + // The branch engine uses the write back from the ALU - several_modules output bp_resolve_t resolved_branch_o, - // ID signaling that we resolved the branch - ID_STAGE + // Signaling that we resolved the branch - ISSUE_STAGE output logic resolve_branch_o, - // TO_BE_COMPLETED - ID_STAGE + // CSR instruction is valid - ISSUE_STAGE input logic csr_valid_i, - // TO_BE_COMPLETED - CSR_REGISTERS + // CSR address to write - COMMIT_STAGE output logic [11:0] csr_addr_o, // TO_BE_COMPLETED - COMMIT_STAGE input logic csr_commit_i, - // MULT result is valid - ID_STAGE + // MULT instruction is valid - ISSUE_STAGE input logic mult_valid_i, - // FU is ready - ID_STAGE + // LSU is ready - ISSUE_STAGE output logic lsu_ready_o, - // LSU result is valid - ID_STAGE + // LSU instruction is valid - ISSUE_STAGE input logic lsu_valid_i, - // TO_BE_COMPLETED - ID_STAGE + // Load result is valid - ISSUE_STAGE output logic load_valid_o, - // TO_BE_COMPLETED - ID_STAGE + // Load result valid - ISSUE_STAGE output riscv::xlen_t load_result_o, - // TO_BE_COMPLETED - ID_STAGE + // Load instruction ID - ISSUE_STAGE output logic [TRANS_ID_BITS-1:0] load_trans_id_o, - // TO_BE_COMPLETED - ID_STAGE + // Exception generated by load instruction - ISSUE_STAGE output exception_t load_exception_o, - // TO_BE_COMPLETED - ID_STAGE + // Store result is valid - ISSUe_STAGE output logic store_valid_o, - // TO_BE_COMPLETED - ID_STAGE + // Store result - ISSUE_STAGE output riscv::xlen_t store_result_o, - // TO_BE_COMPLETED - ID_STAGE + // Store instruction ID - ISSUE_STAGE output logic [TRANS_ID_BITS-1:0] store_trans_id_o, - // TO_BE_COMPLETED - ID_STAGE + // Exception generated by store instruction - ISSUE_STAGE output exception_t store_exception_o, // TO_BE_COMPLETED - COMMIT_STAGE input logic lsu_commit_i, - // Commit queue is ready to accept another commit request - COMMIT_STAGE + // Commit queue ready to accept another commit request - COMMIT_STAGE output logic lsu_commit_ready_o, // TO_BE_COMPLETED - COMMIT_STAGE input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, @@ -96,49 +96,49 @@ module ex_stage input logic stall_st_pending_i, // TO_BE_COMPLETED - COMMIT_STAGE output logic no_st_pending_o, - // TO_BE_COMPLETED - COMMIT_STAGE + // Atomic result is valid - COMMIT_STAGE input logic amo_valid_commit_i, - // FU is ready - ID_STAGE + // FU is ready - ISSUE_STAGE output logic fpu_ready_o, - // Output is ready - ID_STAGE + // FPU instruction is ready - ISSUE_STAGE input logic fpu_valid_i, - // report FP format - ID_STAGE + // FPU format - ISSUE_STAGE input logic [1:0] fpu_fmt_i, - // FP rm - ID_STAGE + // FPU rm - ISSUE_STAGE input logic [2:0] fpu_rm_i, - // FP frm - ID_STAGE + // FPU frm - ISSUE_STAGE input logic [2:0] fpu_frm_i, - // FP precision control - CSR_REGFILE + // FPU precision control - CSR_REGFILE input logic [6:0] fpu_prec_i, - // TO_BE_COMPLETED - ID_STAGE + // FPU transaction ID - ISSUE_STAGE output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, - // TO_BE_COMPLETED - ID_STAGE + // FPU result - ISSUE_STAGE output riscv::xlen_t fpu_result_o, - // TO_BE_COMPLETED - ID_STAGE + // FPU valid - ISSUE_STAGE output logic fpu_valid_o, - // TO_BE_COMPLETED - ID_STAGE + // FPU exception - ISSUE_STAGE output exception_t fpu_exception_o, - // TO_BE_COMPLETED - ID_STAGE + // CVXIF instruction is valid - ISSUE_STAGE input logic x_valid_i, - // TO_BE_COMPLETED - ID_STAGE + // CVXIF is ready - ISSUE_STAGE output logic x_ready_o, - // TO_BE_COMPLETED - ID_STAGE + // undecoded instruction - ISSUE_STAGE input logic [31:0] x_off_instr_i, - // TO_BE_COMPLETED - ID_STAGE + // CVXIF transaction ID - ISSUE_STAGE output logic [TRANS_ID_BITS-1:0] x_trans_id_o, - // TO_BE_COMPLETED - ID_STAGE + // CVXIF exception - ISSUE_STAGE output exception_t x_exception_o, - // TO_BE_COMPLETED - ID_STAGE + // CVXIF result - ISSUE_STAGE output riscv::xlen_t x_result_o, - // TO_BE_COMPLETED - ID_STAGE + // CVXIF result valid - ISSUE_STAGE output logic x_valid_o, - // TO_BE_COMPLETED - ID_STAGE + // CVXIF write enable - ISSUE_STAGE output logic x_we_o, - // TO_BE_COMPLETED - SUBSYSTEM + // CVXIF request - SUBSYSTEM output cvxif_pkg::cvxif_req_t cvxif_req_o, - // TO_BE_COMPLETED - SUBSYSTEM + // CVXIF response - SUBSYSTEM input cvxif_pkg::cvxif_resp_t cvxif_resp_i, - // TO_BE_COMPLETED - ACC_DISPATCHER + // accelerate port result is valid - ACC_DISPATCHER input logic acc_valid_i, // TO_BE_COMPLETED - CSR_REGFILE input logic enable_translation_i, @@ -146,13 +146,13 @@ module ex_stage input logic en_ld_st_translation_i, // TO_BE_COMPLETED - CONTROLLER input logic flush_tlb_i, - // TO_BE_COMPLETED - CSR_REGFILE + // Privilege mode - CSR_REGFILE input riscv::priv_lvl_t priv_lvl_i, - // TO_BE_COMPLETED - CSR_REGFILE + // Privilege level at which load and stores should happen - CSR_REGFILE input riscv::priv_lvl_t ld_st_priv_lvl_i, - // TO_BE_COMPLETED - CSR_REGFILE + // Supervisor user memory - CSR_REGFILE input logic sum_i, - // TO_BE_COMPLETED - CSR_REGFILE + // Make executable readable - CSR_REGFILE input logic mxr_i, // TO_BE_COMPLETED - CSR_REGFILE input logic [riscv::PPNW-1:0] satp_ppn_i, @@ -183,9 +183,9 @@ module ex_stage input riscv::pmpcfg_t [15:0] pmpcfg_i, // Report the PMP addresses - CSR_REGFILE input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, - // Information dedicated to RVFI - SUBSYSTEM + // Information dedicated to RVFI - RVFI output lsu_ctrl_t rvfi_lsu_ctrl_o, - // Information dedicated to RVFI - SUBSYSTEM + // Information dedicated to RVFI - RVFI output [riscv::PLEN-1:0] rvfi_mem_paddr_o ); diff --git a/core/id_stage.sv b/core/id_stage.sv index e2460adfbf..7dfda5a8b2 100644 --- a/core/id_stage.sv +++ b/core/id_stage.sv @@ -32,7 +32,7 @@ module id_stage #( output logic fetch_entry_ready_o, // Handshake's data between decode and issue - ISSUE output ariane_pkg::scoreboard_entry_t issue_entry_o, - // instruction value - ISSUE + // Instruction value - ISSUE output logic [31:0] orig_instr_o, // Handshake's valid between decode and issue - ISSUE output logic issue_entry_valid_o, @@ -40,27 +40,27 @@ module id_stage #( output logic is_ctrl_flow_o, // Handshake's acknowlege between decode and issue - ISSUE input logic issue_instr_ack_i, - // Information dedicated to RVFI- SUBSYSTEM + // Information dedicated to RVFI - RVFI output logic rvfi_is_compressed_o, - // Report current privilege level - CSR + // Current privilege level - CSR_REGFILE input riscv::priv_lvl_t priv_lvl_i, - // Report floating point extension status - CSR + // Floating point extension status - CSR_REGFILE input riscv::xs_t fs_i, - // Report floating point dynamic rounding mode - CSR + // Floating point dynamic rounding mode - CSR_REGFILE input logic [2:0] frm_i, - // Report vector extension status - CSR + // Vector extension status - CSR_REGFILE input riscv::xs_t vs_i, // Level sensitive (async) interrupts - SUBSYSTEM input logic [1:0] irq_i, - // TBD - CSR + // Interrupt control status - CSR_REGFILE input ariane_pkg::irq_ctrl_t irq_ctrl_i, - // Report if current mode is debug - CSR + // Is current mode debug ? - CSR_REGFILE input logic debug_mode_i, - // TBD - CSR + // Trap virtual memory - CSR_REGFILE input logic tvm_i, - // TBD - CSR + // Timeout wait - CSR_REGFILE input logic tw_i, - // TBD- CSR + // Trap sret - CSR_REGFILE input logic tsr_i ); // ID/ISSUE register stage diff --git a/core/instr_realign.sv b/core/instr_realign.sv index 93d4382260..31a99a5561 100644 --- a/core/instr_realign.sv +++ b/core/instr_realign.sv @@ -43,7 +43,7 @@ module instr_realign output logic [INSTR_PER_FETCH-1:0] valid_o, // Instruction address - FRONTEND output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o, - // Instruction - instr_scan, instr_queue + // Instruction - instr_scan&instr_queue output logic [INSTR_PER_FETCH-1:0][31:0] instr_o ); // as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions diff --git a/core/issue_stage.sv b/core/issue_stage.sv index b8dc224e1b..d046b24f68 100644 --- a/core/issue_stage.sv +++ b/core/issue_stage.sv @@ -23,67 +23,67 @@ module issue_stage input logic clk_i, // Asynchronous reset active low - SUBSYSTEM input logic rst_ni, - // TO_BE_COMPLETED - PERF_COUNTERS + // Is scoreboard full - PERF_COUNTERS output logic sb_full_o, // TO_BE_COMPLETED - CONTROLLER input logic flush_unissued_instr_i, // TO_BE_COMPLETED - CONTROLLER input logic flush_i, - // zero when accelerate port is disable - ACC_DISPATCHER + // Stall inserted by Acc dispatcher - ACC_DISPATCHER input logic stall_i, - // Handshake's data between decode and issue - ID_STAGE + // Handshake's data with decode stage - ID_STAGE input scoreboard_entry_t decoded_instr_i, // instruction value - ID_STAGE input logic [31:0] orig_instr_i, - // Handshake's valid between decode and issue - ID_STAGE + // Handshake's valid with decode stage - ID_STAGE input logic decoded_instr_valid_i, - // Report if instruction is a control flow instruction - ID_STAGE + // Is instruction a control flow instruction - ID_STAGE input logic is_ctrl_flow_i, - // Handshake's acknowlege between decode and issue - ID_STAGE + // Handshake's acknowlege with decode stage - ID_STAGE output logic decoded_instr_ack_o, - // TO_BE_COMPLETED - EX_STAGE + // rs1 forwarding - EX_STAGE output [riscv::VLEN-1:0] rs1_forwarding_o, - // TO_BE_COMPLETED - EX_STAGE + // rs2 forwarding - EX_STAGE output [riscv::VLEN-1:0] rs2_forwarding_o, - // TO_BE_COMPLETED - EX_STAGE + // FU data useful to execute instruction - EX_STAGE output fu_data_t fu_data_o, // TO_BE_COMPLETED - EX_STAGE output logic [riscv::VLEN-1:0] pc_o, - // TO_BE_COMPLETED - EX_STAGE + // Is compressed instruction - EX_STAGE output logic is_compressed_instr_o, // TO_BE_COMPLETED - EX_STAGE input logic flu_ready_i, - // TO_BE_COMPLETED - EX_STAGE + // ALU FU is valid - EX_STAGE output logic alu_valid_o, // TO_BE_COMPLETED - EX_STAGE input logic resolve_branch_i, - // TO_BE_COMPLETED - EX_STAGE + // Load store unit FU is ready - EX_STAGE input logic lsu_ready_i, - // TO_BE_COMPLETED - EX_STAGE + // Load store unit FU is valid - EX_STAGE output logic lsu_valid_o, - // TO_BE_COMPLETED - EX_STAGE + // Branch unit is valid - EX_STAGE output logic branch_valid_o, - // TO_BE_COMPLETED - EX_STAGE + // Information of branch prediction - EX_STAGE output branchpredict_sbe_t branch_predict_o, - // TO_BE_COMPLETED - EX_STAGE + // Mult FU is valid - EX_STAGE output logic mult_valid_o, - // TO_BE_COMPLETED - EX_STAGE + // FPU FU is ready - EX_STAGE input logic fpu_ready_i, - // TO_BE_COMPLETED - EX_STAGE + // FPU FU is valid - EX_STAGE output logic fpu_valid_o, - // Report FP fmt field - EX_STAGE + // FPU fmt field - EX_STAGE output logic [1:0] fpu_fmt_o, - // report FP rm field - EX_STAGE + // FPU rm field - EX_STAGE output logic [2:0] fpu_rm_o, - // TO_BE_COMPLETED - EX_STAGE + // CSR is valid - EX_STAGE output logic csr_valid_o, - // TO_BE_COMPLETED - EX_STAGE + // CVXIF FU is valid - EX_STAGE output logic x_issue_valid_o, - // TO_BE_COMPLETED - EX_STAGE + // CVXIF is FU ready - EX_STAGE input logic x_issue_ready_i, - // TO_BE_COMPLETED - EX_STAGE + // CVXIF offloader instruction value - EX_STAGE output logic [31:0] x_off_instr_o, - // TO_BE_COMPLETED - ACC_DISPATCHER + // Issue scoreboard entry - ACC_DISPATCHER output scoreboard_entry_t issue_instr_o, // TO_BE_COMPLETED - ACC_DISPATCHER output logic issue_instr_hs_o, @@ -93,7 +93,7 @@ module issue_stage input bp_resolve_t resolved_branch_i, // TO_BE_COMPLETED - EX_STAGE input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, - // exception from execute stage or CVXIF offloaded instruction - EX_STAGE + // exception from execute stage or CVXIF - EX_STAGE input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_i, // TO_BE_COMPLETED - EX_STAGE input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, @@ -113,9 +113,9 @@ module issue_stage input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // Issue stall - PERF_COUNTERS output logic stall_issue_o, - // Information dedicated to RVFI - SUBSYSTEM + // Information dedicated to RVFI - RVFI output logic [TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, - // Information dedicated to RVFI - SUBSYSTEM + // Information dedicated to RVFI - RVFI output logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] rvfi_commit_pointer_o ); // --------------------------------------------------- diff --git a/docs/01_cva6_user/RISCV_Instructions.rst b/docs/01_cva6_user/RISCV_Instructions.rst index 414f883ba7..a2d9cdd245 100644 --- a/docs/01_cva6_user/RISCV_Instructions.rst +++ b/docs/01_cva6_user/RISCV_Instructions.rst @@ -20,8 +20,8 @@ *This chapter is applicable to all configurations.* -RISC-V Instructions -=================== +CVA6 RISC-V Instructions +======================== Introduction ------------------ diff --git a/docs/04_cv32a65x_design/images/CV32A65X_subsystems.png b/docs/04_cv32a65x_design/images/CV32A65X_subsystems.png new file mode 100644 index 0000000000..baa7f7e9ae Binary files /dev/null and b/docs/04_cv32a65x_design/images/CV32A65X_subsystems.png differ diff --git a/docs/04_cv32a65x_design/images/ex_stage_modules.png b/docs/04_cv32a65x_design/images/ex_stage_modules.png new file mode 100644 index 0000000000..bd1f029b2c Binary files /dev/null and b/docs/04_cv32a65x_design/images/ex_stage_modules.png differ diff --git a/docs/04_cv32a65x_design/images/id_stage_modules.png b/docs/04_cv32a65x_design/images/id_stage_modules.png new file mode 100644 index 0000000000..8e0115f39e Binary files /dev/null and b/docs/04_cv32a65x_design/images/id_stage_modules.png differ diff --git a/docs/04_cv32a65x_design/images/issue_stage_modules.png b/docs/04_cv32a65x_design/images/issue_stage_modules.png new file mode 100644 index 0000000000..d0f14e46a4 Binary files /dev/null and b/docs/04_cv32a65x_design/images/issue_stage_modules.png differ diff --git a/docs/04_cv32a65x_design/images/load_store_unit_modules.png b/docs/04_cv32a65x_design/images/load_store_unit_modules.png new file mode 100644 index 0000000000..f368dbaa85 Binary files /dev/null and b/docs/04_cv32a65x_design/images/load_store_unit_modules.png differ diff --git a/docs/04_cv32a65x_design/images/mult_modules.png b/docs/04_cv32a65x_design/images/mult_modules.png new file mode 100644 index 0000000000..4ba4bbe25e Binary files /dev/null and b/docs/04_cv32a65x_design/images/mult_modules.png differ diff --git a/docs/04_cv32a65x_design/images/subsystems.png b/docs/04_cv32a65x_design/images/subsystems.png index a43af542cc..1592d9b5c5 100644 Binary files a/docs/04_cv32a65x_design/images/subsystems.png and b/docs/04_cv32a65x_design/images/subsystems.png differ diff --git a/docs/04_cv32a65x_design/source/CSRs.rst b/docs/04_cv32a65x_design/source/CSRs.rst index ac96dfca7d..b55116a223 100644 --- a/docs/04_cv32a65x_design/source/CSRs.rst +++ b/docs/04_cv32a65x_design/source/CSRs.rst @@ -12,7 +12,7 @@ CSR === .. toctree:: - :hidden: + :maxdepth: 1 csr_list csr diff --git a/docs/04_cv32a65x_design/source/architecture.rst b/docs/04_cv32a65x_design/source/architecture.rst index 3df70d989b..671ef859f3 100644 --- a/docs/04_cv32a65x_design/source/architecture.rst +++ b/docs/04_cv32a65x_design/source/architecture.rst @@ -12,23 +12,27 @@ Architecture and Modules ======================== -The CV32A6 v0.1.0 subsystem is composed of 8 modules. +The CV32A65X is fully synthesizable. It has been designed mainly for ASIC designs, but FPGA synthesis is supported as well. + +For ASIC synthesis, the whole design is completely synchronous and uses positive-edge triggered flip-flops. The core occupies an area of about 80 kGE. The clock frequency can be more than 1GHz depending of technology. + +The CV32A65X subsystem is composed of 8 modules. .. figure:: ../images/subsystems.png :name: CV32A6 v0.1.0 modules :align: center :alt: - CV32A6 v0.1.0 modules + CV32A65X modules Connections between modules are illustrated in the following block diagram. FRONTEND, DECODE, ISSUE, EXECUTE, COMMIT and CONTROLLER are part of the pipeline. And CACHES implements the instruction and data caches and CSRFILE contains registers. -.. figure:: ../images/CVA6_subsystems.png - :name: CVA6 subsystem +.. figure:: ../images/CV32A65X_subsystems.png + :name: CV32A65X subsystem :align: center :alt: - CV32A6 v0.1.0 pipeline and modules + CV32A65X pipeline and modules .. toctree:: :hidden: diff --git a/docs/04_cv32a65x_design/source/cv32a6_execute.rst b/docs/04_cv32a65x_design/source/cv32a6_execute.rst index 6fb676c816..f1c163a7c9 100644 --- a/docs/04_cv32a65x_design/source/cv32a6_execute.rst +++ b/docs/04_cv32a65x_design/source/cv32a6_execute.rst @@ -8,11 +8,14 @@ EX_STAGE Module Description *********** -The EX_STAGE module implements ... TO BE COMPLETED +The EX_STAGE module is a logical stage which implements the execute stage. +It encapsulates the following functional units: +ALU, Branch Unit, CSR buffer, Mult, load and store and CVXIF. The module is connected to: -* TO BE COMPLETED +* ID_STAGE module provides scoreboard entry. +* .. include:: port_ex_stage.rst @@ -22,1602 +25,97 @@ Functionality TO BE COMPLETED -*************************** -Architecture and Submodules -*************************** +********** +Submodules +********** -ALU -=== - -Branch Unit -=========== - -Load Store Unit (LSU) -===================== - ----------------------- -Memory Management Unit ----------------------- - -The Memory Management Unit (MMU) SV32 module is a crucial component in the RISC-V-based processor, serving as the backbone for virtual memory management and address translation. - -.. figure:: ../images/mmu_in_out.png - :name: **Figure 1:** Inputs and Outputs of CVA6 MMU SV32 - :align: center - :width: 70% - :alt: mmu_in_out - - **Figure 1:** Inputs and Outputs of CVA6 MMU SV32 - -At its core, the MMU SV32 plays a pivotal role in translating virtual addresses into their corresponding physical counterparts. This translation process is paramount for providing memory protection, isolation, and efficient memory management in modern computer systems. Importantly, it handles both instruction and data accesses, ensuring a seamless interaction between the processor and virtual memory. Within the MMU, several major blocks play pivotal roles in this address translation process. These includes: - -* Instruction TLB (ITLB) -* Data TLB (DTLB) -* Shared TLB -* Page Table Walker (PTW) - -.. figure:: ../images/mmu_major_blocks.png - :name: **Figure 2:** Major Blocks in CVA6 MMU SV32 - :align: center - :width: 60% - :alt: mmu_major_blocks - - **Figure 2:** Major Blocks in CVA6 MMU SV32 - -The MMU SV32 manages privilege levels and access control, enforcing permissions for user and supervisor modes while handling access exceptions. It employs Translation Lookaside Buffers (TLBs) for efficient address translation, reducing the need for page table access. TLB hits yield quick translations, but on misses, the shared TLB is consulted, and if necessary, the Page Table Walker (PTW) performs page table walks, updating TLBs and managing exceptions during the process. - -In addition to these functionalities, the MMU SV32 seamlessly integrates support for Physical Memory Protection (PMP), enabling it to enforce access permissions and memory protection configurations as specified by the PMP settings. This additional layer of security and control enhances the management of memory accesses - -.. raw:: html - - Instruction and Data Interfaces - -The MMU SV32 maintains interfaces with the instruction cache (ICache) and the load-store unit (LSU). It receives virtual addresses from these components and proceeds to translate them into physical addresses, a fundamental task for ensuring proper program execution and memory access. - -.. raw:: html - - Signal Description of MMU - -.. raw:: html - -
Table 1: CVA6 MMU SV32 Input Output Signals
- -.. list-table:: - :header-rows: 1 - - * - Signal - - IO - - Connection Type - - Type - - Description - - * - ``clk_i`` - - in - - Subsystem - - logic - - Subsystem Clock - - * - ``rst_ni`` - - in - - Subsystem - - logic - - Asynchronous reset active low - - * - ``flush_i`` - - in - - Controller - - logic - - Sfence Committed - - * - ``enable_translation_i`` - - in - - CSR RegFile - - logic - - Indicate address translation request for instruction - - * - ``en_ld_st_translation_i`` - - in - - CSR RegFile - - logic - - Indicate address translation request for load or store - - * - ``icache_areq_i`` - - in - - Cache Subsystem - - icache_arsp_t - - Icache Response - - * - ``icache_areq_o`` - - out - - Cache Subsystem - - icache_areq_t - - Icache Request - - * - ``misaligned_ex_i`` - - in - - Load Store Unit - - exception_t - - Indicate misaligned exception - - * - ``lsu_req_i`` - - in - - Load Store Unit - - logic - - Request address translation - - * - ``lsu_vaddr_i`` - - in - - Load Store Unit - - logic [riscv::VLEN-1:0] - - Virtual Address In - - * - ``lsu_is_store_i`` - - in - - Store Unit - - logic - - Translation is requested by a store - - * - ``lsu_dtlb_hit_o`` - - out - - Store / Load Unit - - logic - - Indicate a DTLB hit - - * - ``lsu_dtlb_ppn_o`` - - out - - Load Unit - - logic [riscv::PPNW-1:0] - - Send PNN to LSU - - * - ``lsu_valid_o`` - - out - - Load Store Unit - - logic - - Indicate a valid translation - - * - ``lsu_paddr_o`` - - out - - Store / Load Unit - - logic [riscv::PLEN-1:0] - - Translated Address - - * - ``lsu_exception_o`` - - out - - Store / Load Unit - - exception_t - - Address Translation threw an exception - - * - ``priv_lvl_i`` - - in - - CSR RegFile - - riscv::priv_lvl_t - - Privilege level for instruction fetch interface - - * - ``ld_st_priv_lvl_i`` - - in - - CSR RegFile - - riscv::priv_lvl_t - - Privilege Level for Data Interface - - * - ``sum_i`` - - in - - CSR RegFile - - logic - - Supervisor User Memory Access bit in xSTATUS CSR register - - * - ``mxr_i`` - - in - - CSR RegFile - - logic - - Make Executable Readable bit in xSTATUS CSR register - - * - ``satp_ppn_I`` - - in - - CSR RegFile - - logic [riscv::PPNW-1:0] - - PPN of top level page table from SATP register - - * - ``asid_i`` - - in - - CSR RegFile - - logic [ASID_WIDTH-1:0] - - ASID to for the lookup - - * - ``asid_to_be_flushed`` - - in - - Execute Stage - - logic [ASID_WIDTH-1:0] - - ASID of the entry to be flushed. - - * - ``vaddr_to_be_flushed_i`` - - in - - Execute Stage - - logic [riscv::VLEN-1:0] - - Virtual address of the entry to be flushed. - - * - ``flush_tlb_i`` - - in - - Controller - - logic - - SFENCE.VMA committed - - * - ``itlb_miss_o`` - - out - - Performance Counter - - logic - - Indicate an ITLB miss - - * - ``dtlb_miss_o`` - - out - - Performance Counter - - logic - - Indicate a DTLB miss - - * - ``req_port_i`` - - in - - Cache Subsystem - - dcache_req_o_t - - D Cache Data Requests - - * - ``req_port_o`` - - out - - Cache Subsystem - - dcache_req_i_t - - D Cache Data Response - - * - ``pmpcfg_i`` - - in - - CSR RegFile - - riscv::pmpcfg_t [15:0] - - PMP configurations - - * - ``pmpaddr_i`` - - in - - CSR RegFile - - logic [15:0][riscv::PLEN-3:0] - - PMP Address - -.. raw:: html - - Struct Description - -.. raw:: html - -Table 2: I Cache Request Struct (icache_areq_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``fetch_valid`` - - logic - - Address Translation Valid - - * - ``fetch_paddr`` - - logic [riscv::PLEN-1:0] - - Physical Address In - - * - ``fetch_exception`` - - exception_t - - Exception occurred during fetch - -.. raw:: html - -Table 3: I Cache Response Struct (icache_arsq_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``fetch_req`` - - logic - - Address Translation Request - - * - ``fetch_vaddr`` - - logic [riscv::VLEN-1:0] - - Virtual Address out - -.. raw:: html - -Table 4: Exception Struct (exception_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``cause`` - - riscv::xlen_t - - Cause of exception - - * - ``tval`` - - riscv::xlen_t - - Additional information of causing exception (e.g. instruction causing it), address of LD/ST fault - - * - ``valid`` - - logic - - Indicate that exception is valid - -.. raw:: html - -Table 5: PMP Configuration Struct (pmpcfg_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``locked`` - - logic - - Lock this configuration - - * - ``reserved`` - - logic[1:0] - - Reserved bits in pmpcfg CSR - - * - ``addr_mode`` - - pmp_addr_mode_t - - Addressing Modes: OFF, TOR, NA4, NAPOT - - * - ``access_type`` - - pmpcfg_access_t - - None, read, write, execute - -.. raw:: html - - Control Flow in MMU SV32 Module - -.. figure:: ../images/mmu_control_flow.png - :name: **Figure 3:** Control Flow in CVA6 MMU SV32 - :align: center - :width: 95% - :alt: mmu_control_flow - - **Figure 3:** Control Flow in CVA6 MMU SV32 - -.. raw:: html - - Exception Sources with Address Translation Enabled - -Two potential exception sources exist: - -* Hardware Page Table Walker (HPTW) throwing an exception, signifying a page fault exception. -* Access error due to insufficient permissions of PMP, known as an access exception. - -.. raw:: html - - Instruction Fetch Interface - -The IF stage initiates a request to retrieve memory content at a specific virtual address. When the MMU is disabled, the instruction fetch request is directly passed to the I$ without modifications. - -.. raw:: html - - Address Translation in Instruction Interface - -If virtual memory translation is enabled for instruction fetches, the following operations are performed in the instruction interface: - -* Compatibility of requested virtual address with selected page based address translation scheme is checked. -* For 4K page translation, the module determines the fetch physical address by combining the physical page number (PPN) from ITLB content and the offset from the virtual address. -* In the case of Mega page translation, if the ITLB indicates a 4M page, the VPN0 from the fetch virtual address is written to the PPN0 of the fetch physical address to ensure alignment for superpage translation. -* If the Instruction TLB (ITLB) lookup hits, the fetch valid signal (which indicates a valid physical address) is activated in response to the input fetch request. Memory region accessibility is checked from the perspective of the fetch operation, potentially triggering a page fault exception in case of an access error or insufficient PMP permission. -* In case of an ITLB miss, if the page table walker (PTW) is active (only active if there is a shared TLB miss) and handling instruction fetches, the fetch valid signal is determined based on PTW errors or access exceptions. - -If the fetch physical address doesn't match any execute region, an Instruction Access Fault is raised. When not translating, PMPs are immediately checked against the physical address for access verification. - -.. raw:: html - - Data Interface - -.. raw:: html - - Address Translation in Data Interface - -If address translation is enabled for load or store, and no misaligned exception has occurred, the following operations are performed in the data interface: - -* Initially, translation is assumed to be invalid, signified by the MMU to LSU. -* The translated physical address is formed by combining the PPN from the Page Table Entry (PTE) and the offset from the virtual address requiring translation. This send one cycle later due to the additional bank of registers which delayed the MMU’s answer. The PPN from the PTE is also shared separately with LSU in the same cycle as the hit. -* In the case of superpage translation, as in SV32, known as the 4M page, PPN0 of the translated physical address and the separately shared PPN are updated with the VPN0 of the virtual address. - -If a Data TLB (DTLB) hit occurs, it indicates a valid translation, and various fault checks are performed depending on whether it's a load or store request. - -* For store requests, if the page is not writable, the dirty flag isn't set, or privileges are violated, it results in a page fault corresponding to the store access. If PMPs are also violated, it leads to an access fault corresponding to the store access. Page faults take precedence over access faults. -* For load requests, a page fault is triggered if there are insufficient access privileges. PMPs are checked again during load access, resulting in an access fault corresponding to load access if PMPs are violated. - -In case of a DTLB miss, potential exceptions are monitored during the page table walk. If the PTW indicates a page fault, the corresponding page fault related to the requested type is signaled. If the PTW indicates an access exception, the load access fault is indicated through address translation because the page table walker can only throw load access faults. - -.. raw:: html - - Address Translation is Disabled - -When address translation is not enabled, the physical address is immediately checked against Physical Memory Protections (PMPs). If there is a request from LSU, no misaligned exception, and PMPs are violated, it results in an access fault corresponding to the request being indicated. - ----------------------------- -Translation Lookaside Buffer ----------------------------- - -Page tables are accessed for translating virtual memory addresses to physical memory addresses. This translation needs to be carried out for every load and store instruction and also for every instruction fetch. Since page tables are resident in physical memory, accessing these tables in all these situations has a significant impact on performance. Page table accesses occur in patterns that are closely related in time. Furthermore, the spatial and temporal locality of data accesses or instruction fetches mean that the same page is referenced repeatedly. Taking advantage of these access patterns the processor keeps the information of recent address translations, to enable fast retrieval, in a small cache called the Translation Lookaside Buffer (TLB) or an address-translation cache. - -The CVA6 TLB is structured as a fully associative cache, where the virtual address that needs to be translated is compared against all the individual TLB entries. Given a virtual address, the processor examines the TLB (TLB lookup) to determine if the virtual page number (VPN) of the page being accessed is in the TLB. When a TLB entry is found (TLB hit), the TLB returns the corresponding physical page number (PPN) which is used to calculate the target physical address. If no TLB entry is found (TLB miss) the processor has to read individual page table entries from memory (Table walk). In CVA6 table walking is supported by dedicated hardware. Once the processor finishes the table walk it has the Physical Page Number (PPN) corresponding to the Virtual Page Number (VPN) That needs to be translated. The processor adds an entry for this address translation to the TLB so future translations of that virtual address will happen quickly through the TLB. During the table walk the processor may find out that the corresponding physical page is not resident in memory. At this stage a page table exception (Page Fault) is generated which gets handled by the operating system. The operating system places the appropriate page in memory, updates the appropriate page tables and returns execution to the instruction which generated the exception. - -The inputs and output signals of the TLB are shown in the following two figures. - -.. figure:: ../images/in_out_tlb.png - :name: **Figure 4:** Inputs and Outputs of CVA6 TLB - :align: center - :width: 65% - :alt: in_out_tlb - - **Figure 4:** Inputs and Outputs of CVA6 TLB - -.. raw:: html - - Signal Description of TLB - -.. raw:: html - -Table 6: CVA6 TLB Input Output Signals
- -.. list-table:: - :header-rows: 1 - - * - Signal - - IO - - connection - - Type - - Description - - * - ``clk_i`` - - in - - SUBSYSTEM - - logic - - Subsystem Clock - - * - ``rst_ni`` - - in - - SUBSYSTEM - - logic - - Asynchronous reset active low - - * - ``flush_i`` - - in - - Controller - - logic - - Asynchronous reset active low - - * - ``update_i`` - - in - - Shared TLB - - tlb_update_sv32_t - - Updated tag and content of TLB - - * - ``lu_access_i`` - - in - - Cache Subsystem - - logic - - Signal indicating a lookup access is being requested - - * - ``lu_asid_i`` - - in - - CSR RegFile - - logic[ASID_WIDTH-1:0] - - ASID (Address Space Identifier) for the lookup - - * - ``lu_vaddr_i`` - - in - - Cache Subsystem - - logic[riscv::VLEN-1:0] - - Virtual address for the lookup - - * - ``lu_content_o`` - - out - - MMU SV32 - - riscv::pte_sv32_t - - Output for the content of the TLB entry - - * - ``asid_to_be_flushed_i`` - - in - - Execute Stage - - logic[ASID_WIDTH-1:0] - - ASID of the entry to be flushed - - * - ``vaddr_to_be_flushed_i`` - - in - - Execute Stage - - logic[riscv::VLEN-1:0] - - Virtual address of the entry to be flushed - - * - ``lu_is_4M_o`` - - out - - MMU SV32 - - logic - - Output indicating whether the TLB entry corresponds to a 4MB page - - * - ``lu_hit_o`` - - out - - MMU SV32 - - logic - - Output indicating whether the lookup resulted in a hit or miss - -.. raw:: html - - Struct Description - -.. raw:: html - -Table 7: SV32 TLB Update Struct (tlb_update_sv32_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``valid`` - - logic - - Indicates whether the TLB update entry is valid or not - - * - ``is_4M`` - - logic - - Indicates if the TLB entry corresponds to a 4MB page - - * - ``vpn`` - - logic[19:0] - - Virtual Page Number (VPN) used for updating the TLB, consisting of 20 bits - - * - ``asid`` - - logic[8:0] - - Address Space Identifier (ASID) used for updating the TLB, with a length of 9 bits for Sv32 MMU - - * - ``content`` - - riscv::pte_sv32_t - - Content of the TLB update entry, defined by the structure - -.. raw:: html - -Table 8: SV32 PTE Struct (riscv::pte_sv32_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``ppn`` - - logic[21:0] - - 22 bit Physical Page Number (PPN) - - * - ``rsw`` - - logic[1:0] - - Reserved for use by supervisor software - - * - ``d`` - - logic - - | Dirty bit indicating whether the page has been modified (dirty) or not - | 0: Page is clean i.e., has not been written - | 1: Page is dirty i.e., has been written - - * - ``a`` - - logic - - | Accessed bit indicating whether the page has been accessed - | 0: Virtual page has not been accessed since the last time A bit was cleared - | 1: Virtual page has been read, written, or fetched from since the last time the A bit was cleared - - * - ``g`` - - logic - - | Global bit marking a page as part of a global address space valid for all ASIDs - | 0: Translation is valid for specific ASID - | 1: Translation is valid for all ASIDs - - * - ``u`` - - logic - - | User bit indicating privilege level of the page - | 0: Page is not accessible in user mode but in supervisor mode - | 1: Page is accessible in user mode but not in supervisor mode - - * - ``x`` - - logic - - | Execute bit which allows execution of code from the page - | 0: Code execution is not allowed - | 1: Code execution is permitted - - * - ``w`` - - logic - - | Write bit allows the page to be written - | 0: Write operations are not allowed - | 1: Write operations are permitted - - * - ``r`` - - logic - - | Read bit allows read access to the page - | 0: Read operations are not allowed - | 1: Read operations are permitted - - * - ``v`` - - logic - - | Valid bit indicating the page table entry is valid - | 0: Page is invalid i.e. page is not in DRAM, translation is not valid - | 1: Page is valid i.e. page resides in the DRAM, translation is valid - -.. raw:: html - - TLB Entry Fields - -The number of TLB entries can be changed via a design parameter. In 32-bit configurations of CVA6 only 2 TLB entries are instantiated. Each TLB entry is made up of two fields: Tag and Content. The Tag field holds the virtual page number (VPN1, VPN0), ASID, page size (is_4M) along with a valid bit (VALID) indicating that the entry is valid. The SV32 virtual page number, which is supported by CV32A6X, is further split into two separate virtual page numbers VPN1 and VPN0. The Content field contains two physical page numbers (PPN1, PPN0) along with a number of bits which specify various attributes of the physical page. Note that the V bit in the Content field is the V bit which is present in the page table in memory. It is copied from the page table, as is, and the VALID bit in the Tag is set based on its value.The TLB entry fields are shown in **Figure 2**. - -.. figure:: ../images/cva6_tlb_entry.png - :name: **Figure 5:** Fields in CVA6 TLB entry - :align: center - :width: 80% - :alt: cva6_tlb_entry - - **Figure 5:** Fields in CVA6 TLB entry - -.. raw:: html - - CVA6 TLB Management / Implementation - -The CVA6 TLB implements the following three functions: - -* **Translation:** This function implements the address lookup and match logic. -* **Update and Flush:** This function implements the update and flush logic. -* **Pseudo Least Recently Used Replacement Policy:** This function implements the replacement policy for TLB entries. - -.. raw:: html - - Translation - -This function takes in the virtual address and certain other fields, examines the TLB to determine if the virtual page number of the page being accessed is in the TLB or not. If a TLB entry is found (TLB hit), the TLB returns the corresponding physical page number (PPN) which is then used to calculate the target physical address. The following checks are done as part of this lookup function to find a match in the TLB: - -* **Validity Check:** For a TLB hit, the associated TLB entry must be valid . -* **ASID and Global Flag Check:** The TLB entry's ASID must match the given ASID (ASID associated with the Virtual address). If the TLB entry’s Global bit (G) bit is set then this check is not done. This ensures that the translation is either specific to the provided ASID or it is globally applicable. -* **Level 1 VPN match:** SV32 implements a two-level page table. As such the virtual address is broken up into three parts which are the virtual page number 1, virtual page number 0 and displacement. So the condition that is checked next is that the virtual page number 1 of the virtual address matches the virtual page number 1(VPN1) of the TLB entry. -* **Level 0 VPN match or 4-Mega Page:** The last condition to be checked, for a TLB hit, is that the virtual page number 0 of the virtual address matches the virtual page number 0 of the TLB entry (VPN0). This match is ignored if the is_4M bit in the Tag is set which implies a super 4M page. - -All the conditions listed above are checked against every TLB entry. If there is a TLB hit then the corresponding bit in the hit array is set. **Figure 3** Illustrates the TLB hit/miss process listed above. - -.. figure:: ../images/cva6_tlb_hit.png - :name: **Figure 6:** Block diagram of CVA6 TLB hit or miss +.. figure:: ../images/ex_stage_modules.png + :name: EX_STAGE submodules :align: center - :width: 75% - :alt: cva6_tlb_hit - - **Figure 6:** Block diagram of CVA6 TLB hit or miss - -.. raw:: html - - Flushing TLB entries - -The SFENCE.VMA instruction can be used with certain specific source register specifiers (rs1 & rs2) to flush a specific TLB entry, some set of TLB entries or all TLB entries. Like all instructions this action only takes place when the SFENCE.VMA instruction is committed (shown via the commit_sfence signal in the following figures.) The behavior of the instruction is as follows: + :alt: -* **If rs1 is not equal to x0 and rs2 is not equal to x0:** Invalidate all TLB entries which contain leaf page table entries corresponding to the virtual address in rs1 (shown below as Virtual Address to be flushed) and that match the address space identifier as specified by integer register rs2 (shown below as asid_to_be_flushed_i), except for entries containing global mappings. This is referred to as the “SFENCE.VMA vaddr asid” case. - -.. figure:: ../images/sfence_vaddr_asid.png - :name: **Figure 7:** Invalidate TLB entry if ASID and virtual address match - :align: center - :width: 75% - :alt: sfence_vaddr_asid - - **Figure 7:** Invalidate TLB entry if ASID and virtual address match - -* **If rs1 is equal to x0 and rs2 is equal to x0:** Invalidate all TLB entries for all address spaces. This is referred to as the "SFENCE.VMA x0 x0" case. - -.. figure:: ../images/sfence_x0_x0.png - :name: **Figure 8:** Invalidate all TLB entries if both source register specifiers are x0 - :align: center - :width: 62% - :alt: sfence_x0_x0 - - **Figure 8:** Invalidate all TLB entries if both source register specifiers are x0 - -* **If rs1 is not equal to x0 and rs2 is equal to x0:** invalidate all TLB entries that contain leaf page table entries corresponding to the virtual address in rs1, for all address spaces. This is referred to as the “SFENCE.VMA vaddr x0” case. - -.. figure:: ../images/sfence_vaddr_x0.png - :name: **Figure 9:** Invalidate TLB entry with matching virtual address for all address spaces - :align: center - :width: 75% - :alt: sfence_vaddr_x0 - - **Figure 9:** Invalidate TLB entry with matching virtual address for all address spaces - -* **If rs1 is equal to x0 and rs2 is not equal to x0:** Invalidate all TLB entries matching the address space identified by integer register rs2, except for entries containing global mappings. This is referred to as the “SFENCE.VMA 0 asid” case. - -.. figure:: ../images/sfence_x0_asid.png - :name: **Figure 10:** Invalidate TLB entry for matching ASIDs - :align: center - :width: 75% - :alt: sfence_x0_asid + EX_STAGE submodules - **Figure 10:** Invalidate TLB entry for matching ASIDs - -.. raw:: html - - Updating TLB - -When a TLB valid update request is signaled by the shared TLB, and the replacement policy select the update of a specific TLB entry, the corresponding entry's tag is updated with the new tag, and its associated content is refreshed with the information from the update request. This ensures that the TLB entry accurately reflects the new translation information. - -.. raw:: html - - Pseudo Least Recently Used Replacement Policy - -Cache replacement algorithms are used to determine which TLB entry should be replaced, because it is not likely to be used in the near future. The Pseudo-Least-Recently-Used (PLRU) is a cache entry replacement algorithm, derived from Least-Recently-Used (LRU) cache entry replacement algorithm, used by the TLB. Instead of precisely tracking recent usage as the LRU algorithm does, PLRU employs an approximate measure to determine which entry in the cache has not been recently used and as such can be replaced. - -CVA6 implements the PLRU algorithm via the Tree-PLRU method which implements a binary tree. The TLB entries are the leaf nodes of the tree. Each internal node, of the tree, consists of a single bit, referred to as the state bit or plru bit, indicating which subtree contains the (pseudo) least recently used entry (the PLRU); 0 for the left hand tree and 1 for the right hand tree. Following this traversal, the leaf node reached, corresponds to the PLRU entry which can be replaced. Having accessed an entry (so as to replace it) we need to promote that entry to be the Most Recently Used (MRU) entry. This is done by updating the value of each node along the access path to point away from that entry. If the accessed entry is a right child i.e., its parent node value is 1, it is set to 0, and if the parent is the left child of its parent (the grandparent of the accessed node) then its node value is set to 1 and so on all the way up to the root node. - -The PLRU binary tree is implemented as an array of node values. Nodes are organized in the array based on levels, with those from lower levels appearing before higher ones. Furthermore those on the left side of a node appear before those on the right side of a node. The figure below shows a tree and the corresponding array. - -.. figure:: ../images/plru_tree_indexing.png - :name: **Figure 11:** PLRU Tree Indexing - :align: center - :width: 60% - :alt: plru_tree_indexing - - **Figure 11:** PLRU Tree Indexing - -For n-way associative, we require n - 1 internal nodes in the tree. With those nodes, two operations need to be performed efficiently. - -* Promote the accessed entry to be MRU -* Identify which entry to replace (i.e. the PLRU entry) - -.. raw:: html - - Updating the PLRU-Tree - -For a TLB entry which is accessed, the following steps are taken to make it the MRU: - -1. Iterate through each level of the binary tree. -2. Calculate the index of the leftmost child within the current level. Let us call that index the index base. -3. Calculate the shift amount to identify the relevant node based on the level and TLB entry index. -4. Calculate the new value that the node should have in order to make the accessed entry the Most Recently Used (MRU). The new value of the root node is the opposite of the TLB entry index, MSB at the root node, MSB - 1 at node at next level and so on. -5. Assign this new value to the relevant node, ensuring that the hit entry becomes the MRU within the binary tree structure. - -At level 0, no bit of the TLB entry’s index determines the offset from the index base because it’s a root node. At level 1, MSB of entry’s index determines the amount of offset from index base at that level. At level 2, the first two bits of the entry's index from MSB side determine the offset from the index base because there are 4 nodes at the level 2 and so on. - -.. figure:: ../images/update_tree.png - :name: **Figure 12:** Promote Entry to be MRU - :align: center - :width: 82% - :alt: update_tree - - **Figure 12:** Promote Entry to be MRU - -In the above figure entry at index 5, is accessed. To make it MRU entry, every node along the access path should point away from it. Entry 5 is a right child, therefore, its parent plru bit set to 0, its parent is a left child, its grand parent’s plru bit set to 1, and great grandparent’s plru bit set to 0. - -.. raw:: html - - Entry Selection for Replacement - -Every TLB entry is checked for the replacement entry. The following steps are taken: - -1. Iterate through each level of the binary tree. -2. Calculate the index of the leftmost child within the current level. Let us call that index the index base. -3. Calculate the shift amount to identify the relevant node based on the level and TLB entry index. -4. If the corresponding bit of the entry's index matches the value of the node being traversed at the current level, keep the replacement signal high for that entry; otherwise, set the replacement signal to low. - -.. figure:: ../images/replacement_entry.png - :name: **Figure 13:** Possible path traverse for entry selection for replacement - :align: center - :width: 65% - :alt: replacement_entry - **Figure 13:** Possible path traverse for entry selection for replacement - -Figure shows every possible path that traverses to find out the PLRU entry. If the plru bit at each level matches with the corresponding bit of the entry's index, that’s the next entry to replace. Below Table shows the entry selection for replacement. - -.. raw:: html - -Table 9: Entry Selection for Reaplacement
- -+-------------------+---------------+----------------------+ -| **Path Traverse** | **PLRU Bits** | **Entry to replace** | -+-------------------+---------------+----------------------+ -| 0 -> 1 -> 3 | 000 | 0 | -| +---------------+----------------------+ -| | 001 | 1 | -+-------------------+---------------+----------------------+ -| 0 -> 1 -> 4 | 010 | 2 | -| +---------------+----------------------+ -| | 011 | 3 | -+-------------------+---------------+----------------------+ -| 0 -> 2 -> 5 | 100 | 4 | -| +---------------+----------------------+ -| | 101 | 5 | -+-------------------+---------------+----------------------+ -| 0 -> 2 -> 6 | 110 | 6 | -| +---------------+----------------------+ -| | 111 | 7 | -+-------------------+---------------+----------------------+ - ------------------------------------ -Shared Translation Lookaside Buffer ------------------------------------ - -The CVA6 shared TLB is structured as a 2-way associative cache, where the virtual address requiring translation is compared with the set indicated by the virtual page number. The shared TLB is looked up in case of an Instruction TLB (ITLB) or data TLB (DTLB) miss, signaled by these TLBs. If the entry is found in the shared TLB set, the respective TLB, whose translation is being requested, is updated. If the entry is not found in the shared TLB, then the processor has to perform a page table walk. Once the processor obtains a PPN corresponding to the VPN, the shared TLB is updated with this information. If the physical page is not found in the page table, it results in a page fault, which is handled by the operating system. The operating system will then place the corresponding physical page in memory. - -The inputs and output signals of the shared TLB are shown in the following two figures. - -.. figure:: ../images/shared_tlb_in_out.png - :name: **Figure 14:** Inputs and outputs of CVA6 shared TLB - :align: center - :width: 60% - :alt: shared_tlb_in_out - - **Figure 14:** Inputs and outputs of CVA6 shared TLB - -.. raw:: html - - Signal Description - -.. raw:: html - -Table 10: Signal Description of CVA6 shared TLB
- -.. list-table:: - :header-rows: 1 - - * - Signal - - IO - - Connection - - Type - - Description - - * - ``clk_i`` - - in - - Subsystem - - logic - - Subsystem Clock - - * - ``rst_ni`` - - in - - Subsystem - - logic - - Asynchronous reset active low - - * - ``flush_i`` - - in - - Controller - - logic - - TLB flush request - - * - ``enable_translation_i`` - - in - - CSR Regfile - - logic - - CSRs indicate to enable Sv32 - - * - ``en_ld_st_translation_i`` - - in - - CSR Regfile - - logic - - Enable virtual memory translation for load/stores - - * - ``asid_i`` - - in - - CSR Regfile - - logic - - ASID for the lookup - - * - ``itlb_access_i`` - - in - - Cache Subsystem - - logic - - Signal indicating a lookup access in ITLB is being requested. - - * - ``itlb_hit_i`` - - in - - ITLB - - logic - - Signal indicating an ITLB hit - - * - ``itlb_vaddr_i`` - - in - - Cache Subsystem - - logic[31:0] - - Virtual address lookup in ITLB - - * - ``dtlb_access_i`` - - in - - Load/Store Unit - - logic - - Signal indicating a lookup access in DTLB is being requested. - - * - ``dtlb_hit_i`` - - in - - DTLB - - logic - - Signal indicating a DTLB hit - - * - ``dtlb_vaddr_i`` - - in - - Load/Store Unit - - logic[31:0] - - Virtual address lookup in DTLB - - * - ``itlb_update_o`` - - out - - ITLB - - tlb_update_sv32_t - - Tag and content to update ITLB - - * - ``dtlb_update_o`` - - out - - DTLB - - tlb_update_sv32_t - - Tag and content to update DTLB - - * - ``itlb_miss_o`` - - out - - Performance Counter - - logic - - Signal indicating an ITLB miss - - * - ``dtlb_miss_o`` - - out - - Performance Counter - - logic - - Signal indicating a DTLB miss - - * - ``shared_tlb_access_o`` - - out - - PTW - - logic - - Signal indicating a lookup access in shared TLB is being requested - - * - ``shared_tlb_hit_o`` - - out - - PTW - - logic - - Signal indicating a shared TLB hit - - * - ``shared_tlb_vadd_o`` - - out - - PTW - - logic[31:0] - - Virtual address lookup in shared TLB - - * - ``itlb_req_o`` - - out - - PTW - - logic - - ITLB Request Output - - * - ``shared_tlb_update_i`` - - in - - PTW - - tlb_update_sv32_t - - Updated tag and content of shared TLB - -.. raw:: html - - Struct Description - -.. raw:: html - -Table 11: Shared TLB Update Struct (shared_tag_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``is_4M`` - - logic - - Indicates if the shared TLB entry corresponds to a 4MB page. - - * - ``vpn1`` - - logic[9:0] - - Virtual Page Number (VPN) represents the index of PTE in the page table level 1. - - * - ``vpn0`` - - logic[9:0] - - Virtual Page Number (VPN) represents the index of PTE in the page table level 0. - - * - ``asid`` - - logic - - Address Space Identifier (ASID) used to identify different address spaces - -.. raw:: html - - Shared TLB Entry Structure - -Shared TLB is 2-way associative, with a depth of 64. A single entry in the set contains the valid bit, tag and the content. The Tag segment stores details such as the virtual page number (VPN1, VPN0), ASID, and page size (is_4M). The Content field contains two physical page numbers (PPN1, PPN0) along with a number of bits which specify various attributes of the physical page. - -.. figure:: ../images/shared_tlb.png - :name: **Figure 15:** CVA6 Shared TLB Structure - :align: center - :width: 60% - :alt: shared_tlb - - **Figure 15:** CVA6 Shared TLB Structure - -.. raw:: html - - Shared TLB Implementation in CVA6 - -The implementation of a shared TLB in CVA6 is described in the following sections: - -* **ITLB and DTLB Miss:** Prepare a shared TLB lookup if the entry is not found in ITLB or DTLB. -* **Tag Comparison:** Look up the provided virtual address in the shared TLB. -* **Update and Flush:** Flush the shared TLB or update it. -* **Replacement Policies:** First non-valid entry and random replacement policy. - -.. raw:: html - - ITLB and DTLB Miss - -Consider a scenario where an entry is found in the ITLB or DTLB. In this case, there is no need to perform a lookup in the shared TLB since the entry has already been found. Next, there are two scenarios: an ITLB miss or a DTLB miss. - -To identify an ITLB miss, the following conditions need to be fulfilled: - -* Address translation must be enabled. -* There must be an access request to the ITLB. -* The ITLB should indicate an ITLB miss. -* There should be no access request to the DTLB. - -During an ITLB miss, access is granted to read the tag and content of the shared TLB from their respective sram. The address for reading the tag and content of the shared TLB entry is calculated using the virtual address for which translation is not found in the ITLB. The ITLB miss is also explicitly indicated by the shared TLB. A request for shared TLB access is initiated. - -To identify the DTLB miss, the following conditions need to be fulfilled: - -* Address translation for load and stores must be enabled. -* There must be an access request to the DTLB. -* The DTLB should indicate a DTLB miss. - -In the case of a DTLB miss, the same logic is employed as described for an ITLB miss. - -.. raw:: html - - Tag Comparison - -Shared TLB lookup for a hit occurs under the same conditions as described for the TLB modules used as ITLB and DTLB. However, there are some distinctions. In both the ITLB and DTLB, the virtual address requiring translation is compared against all TLB entries. In contrast, the shared TLB only compares the tag and content of the set indicated by the provided virtual page number. The index of the set is extracted from VPN0 of the requested virtual address. Given that the shared TLB is 2-way associative, each set contains two entries. Consequently, both of these entries are compared. Below figure illustrates how the set is opted for the lookup. - -.. figure:: ../images/shared_tlb_set.png - :name: **Figure 16:** Set opted for lookup in shared TLB - :align: center - :width: 60% - :alt: shared_tlb_set - - **Figure 16:** Set opted for lookup in shared TLB - -.. raw:: html - - Update and Flush - -Differing from the ITLB and DTLB, a specific virtual address or addressing space cannot be flushed in the shared TLB. When SFENCE.VMA is committed, all entries in the shared TLB are invalidated. (Cases of SFENCE.VMA should also be added in shared TLB) - -.. raw:: html - - Updating Shared TLB - -When the Page Table Walker signals a valid update request, the shared TLB is updated by selecting an entry through the replacement policy and marking it as valid. This also triggers the writing of the new tag and content to the respective SRAM. - -.. raw:: html - - Replacement Policy Implemented in CVA6 Shared TLB - -In CVA6's shared TLB, two replacement policies are employed for replacements based on a specific condition. These replacement policies select the entry within the set indicated by the virtual page number. The two policies are: - -* First non-valid encounter replacement policy -* Random replacement policy - -First replacement policy failed if all ways are valid. Therefore, a random replacement policy is opted for. - -.. raw:: html - - First non-valid encounter replacement policy - -The module implemented in CVA6 to find the first non-valid entry in the shared TLB is the Leading Zero Counter (LZC). It takes three parameters as input: - -1. **WIDTH:** The width of the input vector. -2. **MODE:** Mode selection - 0 for trailing zero, 1 for leading zero. -3. **CNT WIDTH:** Width of the output signal containing the zero count. - -The input signal is the vector to be counted, and the output represents the count of trailing/leading zeros. If all bits in the input vector are zero, it will also be indicated. - -When initializing the module, the width of the input vector is set to the number of shared TLB ways. The trailing zero counter mode is selected. The vector of valid bits is set as the input vector, but with negation. This is because we want the index of the first non-valid entry, and LZC returns the count of trailing zeros, which actually corresponds to the index of the first occurrence of 1 from the least significant bit (LSB). if there is at least one non-valid entry, that entry is opted for the replacement, and If not then this is signaled by LZC. - -.. figure:: ../images/LZC.png - :name: **Figure 17:** Replacement of First invalid entry. - :align: center - :width: 60% - :alt: LZC - - **Figure 17:** Replacement of First invalid entry. - -.. raw:: html - - Random replacement policy - -If all ways are valid, a random replacement policy is employed for the replacement process. The Linear Feedback Shift Register (LFSR) is utilized to select the replacement entry randomly. LFSR is commonly used in generating sequences of pseudo-random numbers. When the enable signal is active, the current state of the LFSR undergoes a transformation. Specifically, the state is shifted right by one bit, and the result is combined with a predetermined masking pattern. This masking pattern is derived from the predefined “Masks” array, introducing a non-linear behavior to the sequence generation of the LFSR. The masking process involves XOR operations between the shifted state bits and specific pattern bits, contributing to the complexity and unpredictability of the generated sequence. - -.. figure:: ../images/RR.png - :name: **Figure 18:** Entry selection for replacement using LFSR - :align: center - :width: 95% - :alt: RR - - **Figure 18:** Entry selection for replacement using LFSR - ------------------ -Page Table Walker ------------------ - -The "CVA6 Page Table Walker (PTW) for MMU Sv32" is a hardware module developed for the CV32A6 processor architecture, designed to facilitate the translation of virtual addresses into physical addresses, a crucial task in memory access management. - -.. figure:: ../images/ptw_in_out.png - :name: **Figure 19:** Input and Outputs of Page Table Walker - :align: center - :width: 60% - :alt: ptw_in_out - - **Figure 19:** Input and Outputs of Page Table Walker - -.. raw:: html - - Operation of PTW Module - -The PTW module operates through various states, each with its specific function, such as handling memory access requests, validating page table entries, and responding to errors. - -.. raw:: html - - Key Features and Capabilities - -Key features of this PTW module include support for two levels of page tables (LVL1 and LVL2) in the Sv32 standard, accommodating instruction and data page table walks. It rigorously validates and verifies page table entries (PTEs) to ensure translation accuracy and adherence to access permissions. This module seamlessly integrates with the CV32A6 processor's memory management unit (MMU), which governs memory access control. It also takes into account global mapping, access flags, and privilege levels during the translation process, ensuring that memory access adheres to the processor's security and privilege settings. - -.. raw:: html - - Exception Handling - -In addition to its translation capabilities, the PTW module is equipped to detect and manage errors, including page-fault exceptions and access exceptions, contributing to the robustness of the memory access system. It works harmoniously with physical memory protection (PMP) configurations, a critical aspect of modern processors' memory security. Moreover, the module efficiently processes virtual addresses, generating corresponding physical addresses, all while maintaining speculative translation, a feature essential for preserving processor performance during memory access operations. - -.. raw:: html - - Signal Description - -.. raw:: html - -Table 12: Signal Description of PTW
- -.. list-table:: - :header-rows: 1 - - * - Signal - - IO - - Connection - - Type - - Description - - * - ``clk_i`` - - in - - Subsystem - - logic - - Subsystem Clock - - * - ``rst_ni`` - - in - - Subsystem - - logic - - Asynchronous reset active low - - * - ``flush_i`` - - in - - Controller - - logic - - Sfence Committed - - * - ``ptw_active_o`` - - out - - MMU - - logic - - Output signal indicating whether the Page Table Walker (PTW) is currently active - - * - ``walking_instr_o`` - - out - - MMU - - logic - - Indicating it's an instruction page table walk or not - - * - ``ptw_error_o`` - - out - - MMU - - logic - - Output signal indicating that an error occurred during PTW operation - - * - ``ptw_access_exception_o`` - - out - - MMU - - logic - - Output signal indicating that a PMP (Physical Memory Protection) access exception occurred during PTW operation. - - * - ``lsu_is_store_i`` - - in - - Store Unit - - logic - - Input signal indicating whether the translation was triggered by a store operation. - - * - ``req_port_i`` - - in - - Cache Subsystem - - dcache_req_o_t - - D Cache Data Requests - - * - ``req_port_o`` - - out - - Cache Subsystem / Perf Counter - - dcache_req_u_t - - D Cache Data Response - - * - ``shared_tlb_update_o`` - - out - - Shared TLB - - tlb_update_sv32_t - - Updated tag and content of shared TLB - - * - ``update_vaddr_o`` - - out - - MMU - - logic[riscv::VLEN-1:0] - - Updated VADDR from shared TLB - - * - ``asid_i`` - - in - - CSR RegFile - - logic[ASID_WIDTH-1:0] - - ASID for the lookup - - * - ``shared_tlb_access_i`` - - in - - Shared TLB - - logic - - Access request of shared TLB - - * - ``shared_tlb_hit_i`` - - in - - Shared TLB - - logic - - Indicate shared TLB hit - - * - ``shared_tlb_vaddr_i`` - - in - - Shared TLB - - logic[riscv::VLEN-1:0] - - Virtual Address from shared TLB - - * - ``itlb_req_i`` - - in - - Shared TLB - - logic - - Indicate request to ITLB - - * - ``satp_ppn_i`` - - in - - CSR RegFile - - logic[riscv::PPNW-1:0] - - PPN of top level page table from SATP register - - * - ``mxr_i`` - - in - - CSR RegFile - - logic - - Make Executable Readable bit in xSTATUS CSR register - - * - ``shared_tlb_miss_o`` - - out - - OPEN - - logic - - Indicate a shared TLB miss - - * - ``pmpcfg_i`` - - in - - CSR RegFile - - riscv::pmpcfg_t[15:0] - - PMP configuration - - * - ``pmpaddr_i`` - - in - - CSR RegFile - - logic[15:0][riscv::PLEN-3:0] - - PMP Address - - * - ``bad_paddr_o`` - - out - - MMU - - logic[riscv::PLEN-1:0] - - Bad Physical Address in case of access exception - -.. raw:: html - - Struct Description - -.. raw:: html - -Table 13: D Cache Response Struct (dcache_req_i_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``address_index`` - - logic [DCACHE_INDEX_WIDTH-1:0] - - Index of the Dcache Line - - * - ``address_tag`` - - logic [DCACHE_TAG_WIDTH-1:0] - - Tag of the Dcache Line - - * - ``data_wdata`` - - riscv::xlen_t - - Data to write in the Dcache - - * - ``data_wuser`` - - logic [DCACHE_USER_WIDTH-1:0] - - data_wuser - - * - ``data_req`` - - logic - - Data Request - - * - ``data_we`` - - logic - - Data Write enabled - - * - ``data_be`` - - logic [(riscv::XLEN/8)-1:0] - - Data Byte enable - - * - ``data_size`` - - logic [1:0] - - Size of data - - * - ``data_id`` - - logic [DCACHE_TID_WIDTH-1:0] - - Data ID - - * - ``kill_req`` - - logic - - Kill the D cache request - - * - ``tag_valid`` - - logic - - Indicate that teh tag is valid - -.. raw:: html - -Table 14: D Cache Request Struct (dcache_req_o_t)
- -.. list-table:: - :header-rows: 1 - - * - Signal - - Type - - Description - - * - ``data_gnt`` - - logic - - Grant of data is given in response to the data request - - * - ``data_rvalid`` - - logic - - Indicate that data is valid which is sent by D cache - - * - ``data_rid`` - - logic [DCACHE_TID_WIDTH-1:0] - - Requested data ID - - * - ``data_rdata`` - - riscv::xlen_t - - Data from D cache - - * - ``data_ruser`` - - logic [DCACHE_USER_WIDTH-1:0] - - Requested data user - -.. raw:: html - - PTW State Machine - -Page Table Walker is implemented as a finite state machine. It listens to shared TLB for incoming translation requests. If there is a shared TLB miss, it saves the virtual address and starts the page table walk. Page table walker transition between 7 states in CVA6. - -* **IDLE:** The initial state where the PTW is awaiting a trigger, often a Shared TLB miss, to initiate a memory access request. -* **WAIT_GRANT:** Request memory access and wait for data grant -* **PTE_LOOKUP:** Once granted access, the PTW examines the valid Page Table Entry (PTE), checking attributes to determine the appropriate course of action. -* **PROPOGATE_ERROR:** If the PTE is invalid, this state handles the propagation of an error, often leading to a page-fault exception due to non-compliance with access conditions -* **PROPOGATE_ACCESS_ERROR:** Propagate access fault if access is not allowed from a PMP perspective -* **WAIT_RVALID:** After processing a PTE, the PTW waits for a valid data signal, indicating that relevant data is ready for further processing. -* **LATENCY:** Introduces a delay to account for synchronization or timing requirements between states. - -.. figure:: ../images/ptw_state_diagram.png - :name: **Figure 20:** State Machine Diagram of CVA6 PTW - :align: center - :width: 95% - :alt: ptw_state_diagram - - **Figure 20:** State Machine Diagram of CVA6 PTW - -.. raw:: html - - IDLE state - -In the IDLE state of the Page Table Walker (PTW) finite state machine, the system awaits a trigger to initiate the page table walk process. This trigger is often prompted by a Shared Translation Lookaside Buffer (TLB) miss, indicating that the required translation is not present in the shared TLB cache. The PTW's behavior in this state is explained as follows: - -1. The top-most page table is selected for the page table walk. In the case of SV32, which implements a two-level page table, the level 1 page table is chosen. -2. In the IDLE state, translations are assumed to be invalid in all addressing spaces. -3. The signal indicating the instruction page table walk is set to 0. -4. A conditional check is performed: if there is a shared TLB access request and the entry is not found in the shared TLB (indicating a shared TLB miss), the following steps are executed: - - a. The address of the desired Page Table Entry within the level 1 page table is calculated by multiplying the Physical Page Number (PPN) of the level 1 page table from the SATP register by the page size (4kB). This result is then added to the product of the Virtual Page Number (VPN1), and the size of a page table entry(4 bytes). - -.. figure:: ../images/ptw_idle.png - :name: **Figure 21:** Address of Desired PTE at Level 1 - :align: center - :width: 68% - :alt: ptw_idle - - **Figure 21:** Address of Desired PTE at Level 1 - -.. _example: - - b. The signal indicating whether it's an instruction page table walk is updated based on the ITLB miss. - c. The ASID and virtual address are saved for the page table walk. - d. A shared TLB miss is indicated. - -.. raw:: html - - WAIT GRANT state - -In the **WAIT_GRANT** state of the Page Table Walker's finite state machine, a data request is sent to retrieve memory information. It waits for a data grant signal from the Dcache controller, remaining in this state until granted. Once granted, it activates a tag valid signal, marking data validity. The state then transitions to "PTE_LOOKUP" for page table entry lookup. - -.. raw:: html +ALU +=== - PTE LOOKUP state +TO BE COMPLETED -In the **PTE_LOOKUP** state of the Page Table Walker (PTW) finite state machine, the PTW performs the actual lookup and evaluation of the page table entry (PTE) based on the virtual address translation. The behavior and operations performed in this state are detailed as follows: -1. The state waits for a valid signal indicating that the data from the memory subsystem, specifically the page table entry, is available for processing. -2. Upon receiving the valid signal, the PTW proceeds with examining the retrieved page table entry to determine its properties and validity. -3. The state checks if the global mapping bit in the PTE is set, and if so, sets the global mapping signal to indicate that the translation applies globally across all address spaces. -4. The state distinguishes between two cases: Invalid PTE and Valid PTE. +Branch Unit +=========== - a. If the valid bit of the PTE is not set, or if the PTE has reserved RWX field encodings, it signifies an Invalid PTE. In such cases, the state transitions to the "PROPAGATE_ERROR" state, indicating a page-fault exception due to an invalid translation. +TO BE COMPLETED -.. figure:: ../images/ptw_pte_1.png - :name: **Figure 22:** Invalid PTE and reserved RWX encoding leads to page fault - :align: center - :width: 70% - :alt: ptw_pte_1 - **Figure 22:** Invalid PTE and reserved RWX encoding leads to page fault +CSR Buffer +========== -.. _example1: +TO BE COMPLETED - b. If the PTE is valid, the state advances to the "LATENCY" state, indicating a period of processing latency. Additionally, if the "read" flag (pte.r) or the "execute" flag (pte.x) is set, the PTE is considered valid. -5. Within the Valid PTE scenario, the state performs further checks based on whether the translation is intended for instruction fetching or data access: +Mult +==== - a. For instruction page table walk, if the page is not executable (pte.x is not set) or not marked as accessible (pte.a is not set), the state transitions to the "PROPAGATE_ERROR" state. +TO BE COMPLETED -.. figure:: ../images/ptw_iptw.png - :name: **Figure 23:** For Instruction Page Table Walk +.. figure:: ../images/mult_modules.png + :name: mult submodules :align: center - :width: 70% - :alt: ptw_iptw - - **Figure 23:** For Instruction Page Table Walk - -.. _example2: - - b. For data page table walk, the state checks if the page is readable (pte.r is set) or if the page is executable only but made readable by setting the MXR bit in xSTATUS CSR register. If either condition is met, it indicates a valid translation. If not, the state transitions to the "PROPAGATE_ERROR" state. + :alt: -.. figure:: ../images/ptw_dptw.png - :name: **Figure 24:** Data Access Page Table Walk - :width: 70% - :alt: ptw_dptw + mult submodules - **Figure 24:** Data Access Page Table Walk +---------- +multiplier +---------- -.. _example3: - - c. If the access is intended for storing data, additional checks are performed: If the page is not writable (pte.w is not set) or if it is not marked as dirty (pte.d is not set), the state transitions to the "PROPAGATE_ERROR" state. - -.. figure:: ../images/ptw_dptw_s.png - :name: **Figure 25:** Data Access Page Table Walk, Store requested - :align: center - :width: 70% - :alt: ptw_dptw_s - - **Figure 25:** Data Access Page Table Walk, Store requested - -6. The state also checks for potential misalignment issues in the translation: If the current page table level is the first level (LVL1) and if the PPN0 of in PTE is not zero, it indicates a misaligned superpage, leading to a transition to the "PROPAGATE_ERROR" state. +TO BE COMPLETED -.. figure:: ../images/ptw_mis_sup.png - :name: **Figure 26:** Misaligned Superpage Check - :align: center - :width: 70% - :alt: ptw_mis_sup - **Figure 26:** Misaligned Superpage Check +------ +serdiv +------ -7. If the PTE is valid but the page is neither readable nor executable, the PTW recognizes the PTE as a pointer to the next level of the page table, indicating that additional translation information can be found in the referenced page table at a lower level. -8. If the current page table level is the first level (LVL1), the PTW proceeds to switch to the second level (LVL2) page table, updating the next level pointer and calculating the address for the next page table entry using the Physical Page Number from the PTE and the index of the level 2 page table from virtual address. +TO BE COMPLETED -.. figure:: ../images/ptw_nlvl.png - :name: **Figure 27:** Address of desired PTE at next level of Page Table - :align: center - :width: 70% - :alt: ptw_nlvl - **Figure 27:** Address of desired PTE at next level of Page Table +Load Store Unit (LSU) +===================== -9. The state then transitions to the "WAIT_GRANT" state, indicating that the PTW is awaiting the grant signal to proceed with requesting the next level page table entry. -10. If the current level is already the second level (LVL2), an error is flagged, and the state transitions to the "PROPAGATE_ERROR" state, signifying an unexpected situation where the PTW is already at the last level page table. -11. If the translation access is found to be restricted by the Physical Memory Protection (PMP) settings (allow_access is false), the state updates the shared TLB update signal to indicate that the TLB entry should not be updated. Additionally, the saved address for the page table walk is restored to its previous value, and the state transitions to the "PROPAGATE_ACCESS_ERROR" state. -12. Lastly, if the data request for the page table entry was granted, the state indicates to the cache subsystem that the tag associated with the data is now valid. +TO BE COMPLETED -.. figure:: ../images/ptw_pte_flowchart.png - :name: **Figure 28:** Flow Chart of PTE LOOKUP State +.. figure:: ../images/load_store_unit_modules.png + :name: load_store_unit submodules :align: center - :alt: ptw_pte_flowchart - - **Figure 28:** Flow Chart of PTE LOOKUP State - -.. raw:: html + :alt: - PROPAGATE ERROR state + load_store_unit submodules -This state indicates a detected error in the page table walk process, and an error signal is asserted to indicate the Page Table Walker's error condition, triggering a transition to the "LATENCY" state for error signal propagation. +---------- +store_unit +---------- -.. raw:: html - - PROPAGATE ACCESS ERROR state - -This state indicates a detected access error in the page table walk process, and an access error signal is asserted to indicate the Page Table Walker's access error condition, triggering a transition to the "LATENCY" state for access error signal propagation. - -.. raw:: html - - WAIT RVALID state - -This state waits until it gets the "read valid" signal, and when it does, it's ready to start a new page table walk. - -.. raw:: html - - LATENCY state - -The LATENCY state introduces a latency period to allow for necessary system actions or signals to stabilize. After the latency period, the FSM transitions back to the IDLE state, indicating that the system is prepared for a new translation request. +TO BE COMPLETED -.. raw:: html - Flush Scenario +--------- +load unit +--------- -The first step when a flush is triggered is to check whether the Page Table Entry (PTE) lookup process is currently in progress. If the PTW (Page Table Walker) module is indeed in the middle of a PTE lookup operation, the code then proceeds to evaluate a specific aspect of this operation. +TO BE COMPLETED -* **Check for Data Validity (rvalid):** Within the PTE lookup operation, it's important to ensure that the data being used for the translation is valid. In other words, the code checks whether the "rvalid" signal (which likely indicates the validity of the data) is not active. If the data is not yet valid, it implies that the PTW module is waiting for the data to become valid before completing the lookup. In such a case, the code takes appropriate action to wait for the data to become valid before proceeding further. -* **Check for Waiting on Grant:** The second condition the code checks for during a flush scenario is whether the PTW module is currently waiting for a "grant." This "grant" signal is typically used to indicate permission or authorization to proceed with an operation. If the PTW module is indeed in a state of waiting for this grant signal, it implies that it requires authorization before continuing its task. +---------- +lsu_bypass +---------- - * **Waiting for Grant:** If the PTW module is in a state of waiting for the grant signal, the code ensures that it continues to wait for the grant signal to be asserted before proceeding further. +TO BE COMPLETED -* **Return to Idle State if Neither Condition is Met:** After evaluating the above two conditions, the code determines whether either of these conditions is true. If neither of these conditions applies, it suggests that the PTW module can return to its idle state, indicating that it can continue normal operations without any dependencies on the flush condition. -PMA/PMP Checks -============== -Multipler -========= +CVXIF_fu +======== -CSR Buffer -========== +TO BE COMPLETED diff --git a/docs/04_cv32a65x_design/source/cv32a6_frontend.rst b/docs/04_cv32a65x_design/source/cv32a6_frontend.rst index a7d9250849..c650dd305c 100644 --- a/docs/04_cv32a65x_design/source/cv32a6_frontend.rst +++ b/docs/04_cv32a65x_design/source/cv32a6_frontend.rst @@ -15,11 +15,19 @@ FRONTEND Module Description ----------- -The FRONTEND module implements two first stages of the cva6 pipeline, PC gen and Fetch stages. +The FRONTEND module implements two first stages of the cva6 pipeline, +PC gen and Fetch stages. -PC gen stage is responsible for generating the next program counter hosting a Branch Target Buffer (BTB) a Branch History Table (BHT) and a Return Address Stack (RAS) to speculate on the branch target address. +PC gen stage is responsible for generating the next program counter +hosting a Branch Target Buffer (BTB) a Branch History Table (BHT) and +a Return Address Stack (RAS) to speculate on the branch target address. -Fetch stage requests data to the CACHE module, realigns the data to store them in instruction queue and transmits the instructions to the DECODE module. FRONTEND can fetch up to 2 instructions per cycles when C extension instructions is used, but as instruction queue limits the data rate, up to one instruction per cycle can be sent to DECODE. +Fetch stage requests data to the CACHE module, realigns the data to +store them in instruction queue and transmits the instructions to the +DECODE module. +FRONTEND can fetch up to 2 instructions per cycles when +C extension instructions is used, but as instruction queue limits the +data rate, up to one instruction per cycle can be sent to DECODE. The module is connected to: @@ -80,8 +88,8 @@ Memory *and MMU (MMU is not enabled in CV32A6 v0.1.0)* can feedback potential ex -Architecture and Submodules ---------------------------- +Submodules +---------- .. figure:: ../images/frontend_modules.png :name: FRONTEND submodules @@ -94,20 +102,16 @@ Architecture and Submodules Instr_realign submodule ~~~~~~~~~~~~~~~~~~~~~~~ -.. include:: port_instr_realign.rst - - The 32-bit aligned block coming from the CACHE module enters the instr_realign submodule. This submodule extracts the instructions from the 32-bit blocks, up to two instructions because it is possible to fetch two instructions when C extension is used. If the instructions are not compressed, it is possible that the instruction is not aligned on the block size but rather interleaved with two cache blocks. In that case, two cache accesses are needed. The instr_realign submodule provides at maximum one instruction per cycle. Not complete instruction is stored in instr_realign submodule before being provided in the next cycles. In case of mispredict, flush, replay or branch predict, the instr_realign is re-initialized, the internal register storing the instruction alignment state is reset. +.. include:: port_instr_realign.rst + Instr_queue submodule ~~~~~~~~~~~~~~~~~~~~~ -.. include:: port_instr_queue.rst - - The instr_queue receives 32bit block from CACHES to create a valid stream of instructions to be decoded (by DECODE), to be issued (by ISSUE) and executed (by EXECUTE). FRONTEND pushes in FIFO to store the instructions and related information needed in case of mispredict or exception: instructions, instruction control flow type, exception, exception address and predicted address. DECODE pops them when decode stage is ready and indicates to the FRONTEND the instruction has been consummed. The instruction queue contains max 4 instructions. @@ -118,28 +122,32 @@ If the instruction queue is full, a replay request is sent to inform the fetch m The instruction queue can be flushed by CONTROLLER. +.. include:: port_instr_queue.rst Instr_scan submodule ~~~~~~~~~~~~~~~~~~~~ -.. include:: port_instr_scan.rst - - The instr_scan submodule pre-decodes the fetched instructions, instructions could be compressed or not. The outputs are used by the branch prediction feature. The instr_scan submodule tells if the instruction is compressed and provides the intruction type: branch, jump, return, jalr, imm, call or others. +.. include:: port_instr_scan.rst + BHT (Branch History Table) submodule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. include:: port_bht.rst -When a branch instruction is resolved by the EXECUTE, the relative information is stored in the Branch History Table. +When a branch instruction is resolved by the EXECUTE, the relative +information is stored in the Branch History Table. The information is stored in a 1024 entry table. -The Branch History table is a two-bit saturation counter that takes the virtual address of the current fetched instruction by the CACHE. It states whether the current branch request should be taken or not. The two bit counter is updated by the successive execution of the current instructions as shown in the following figure. +The Branch History table is a two-bit saturation counter that takes the +virtual address of the current fetched instruction by the CACHE. +It states whether the current branch request should be taken or not. +The two bit counter is updated by the successive execution of the current +instructions as shown in the following figure. .. figure:: ../images/bht.png :name: BHT saturation @@ -150,40 +158,53 @@ The Branch History table is a two-bit saturation counter that takes the virtual The BHT is not updated if processor is in debug mode. -When a branch instruction is pre-decoded by instr_scan submodule, the BHT informs whether the PC address is in the BHT. In this case, the BHT predicts whether the branch is taken and provides the corresponding target address. +When a branch instruction is pre-decoded by instr_scan submodule, the BHT +informs whether the PC address is in the BHT. In this case, the BHT +predicts whether the branch is taken and provides the corresponding target +address. The BHT is never flushed. +.. include:: port_bht.rst + BTB (Branch Target Buffer) submodule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. include:: port_btb.rst - -When a unconditional jumps to a register (JALR instruction) is mispredicted by the EXECUTE, the relative information is stored into the BTB, that is to say the JALR PC and the target address. +When a unconditional jumps to a register (JALR instruction) is mispredicted +by the EXECUTE, the relative information is stored into the BTB, that is +to say the JALR PC and the target address. The information is stored in a 8 entry table. The BTB is not updated if processor is in debug mode. -When a branch instruction is pre-decoded by instr_scan submodule, the BTB informs whether the input PC address is in BTB. In this case, the BTB provides the corresponding target address. +When a branch instruction is pre-decoded by instr_scan submodule, the BTB +informs whether the input PC address is in BTB. In this case, the BTB +provides the corresponding target address. The BTB is never flushed. +.. include:: port_btb.rst + RAS (Return Address Stack) submodule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. include:: port_ras.rst - -When an unconditional jumps to a known target address (JAL instruction) is consummed by the instr_queue, the next pc after the JAL instruction and the return address are stored into a FIFO. +When an unconditional jumps to a known target address (JAL instruction) +is consummed by the instr_queue, the next pc after the JAL instruction +and the return address are stored into a FIFO. The RAS FIFO depth is 2. -When a branch instruction is pre-decoded by instr_scan submodule, the RAS informs whether the input PC address is in RAS. In this case, the RAS provides the corresponding target address. +When a branch instruction is pre-decoded by instr_scan submodule, the +RAS informs whether the input PC address is in RAS. In this case, the +RAS provides the corresponding target address. The RAS is never flushed. +.. include:: port_ras.rst + diff --git a/docs/04_cv32a65x_design/source/cva6_commit_stage.rst b/docs/04_cv32a65x_design/source/cva6_commit_stage.rst index b11b18880c..c401f7e2e8 100644 --- a/docs/04_cv32a65x_design/source/cva6_commit_stage.rst +++ b/docs/04_cv32a65x_design/source/cva6_commit_stage.rst @@ -27,3 +27,4 @@ Functionality ------------- TO BE COMPLETED + diff --git a/docs/04_cv32a65x_design/source/cva6_controller.rst b/docs/04_cv32a65x_design/source/cva6_controller.rst index c8ec9bd60a..468970856e 100644 --- a/docs/04_cv32a65x_design/source/cva6_controller.rst +++ b/docs/04_cv32a65x_design/source/cva6_controller.rst @@ -27,3 +27,4 @@ Functionality ------------- TO BE COMPLETED + diff --git a/docs/04_cv32a65x_design/source/cva6_id_stage.rst b/docs/04_cv32a65x_design/source/cva6_id_stage.rst index 93024551a3..bb221cac24 100644 --- a/docs/04_cv32a65x_design/source/cva6_id_stage.rst +++ b/docs/04_cv32a65x_design/source/cva6_id_stage.rst @@ -15,15 +15,68 @@ ID_STAGE Module Description ----------- -The ID_STAGE module implements ... TO BE COMPLETED +The ID_STAGE module implements the decode stage of the pipeline. +Its main purpose is to decode RISC-V instructions coming from FRONTEND module +(fetch stage) and send them to the ISSUE_STAGE module (issue stage). + +The compressed_decoder module checks whether the incoming instruction is +compressed and output the corresponding uncompressed instruction. +Then the decoder module decodes the instruction and send it to the +issue stage. + The module is connected to: -* TO BE COMPLETED +* CONTROLLER module can flush ID_STAGE decode stage +* FRONTEND module sends instrution to ID_STAGE module +* ISSUE module receives the decoded instruction from ID_STAGE module +* CSR_REGFILE module sends status information about privilege mode, traps, extension support. .. include:: port_id_stage.rst + + Functionality ------------- TO BE COMPLETED + + +Submodules +---------- + +.. figure:: ../images/id_stage_modules.png + :name: ID_STAGE submodules + :align: center + :alt: + + ID_STAGE submodules + + +Compressed_decoder +~~~~~~~~~~~~~~~~~~ + +The compressed_decoder module decompresses all the compressed +instructions taking a 16-bit compressed instruction and expanding it +to its 32-bit equivalent. +All compressed instructions have a 32-bit equivalent. + +.. include:: port_compressed_decoder.rst + +Decoder +~~~~~~~ + +The decoder module takes the output of compressed_decoder module and decodes +it. +It transforms the instruction to the most fundamental control structure +in pipeline, a scoreboard entry. + +The scoreboard entry contains an exception entry which is composed of a +valid field, a cause and a value called TVAL. +As TVALEn configuration parameter is zero, the TVAL field is not implemented. +A potential illegal instruction exception can be detected during decoding. +If no exception has happened previously in fetch stage, the decoder will +valid the exception and add the cause and tval value to the scoreboard entry. + +.. include:: port_decoder.rst + diff --git a/docs/04_cv32a65x_design/source/cva6_issue_stage.rst b/docs/04_cv32a65x_design/source/cva6_issue_stage.rst index ae0b31e7f3..34846dc383 100644 --- a/docs/04_cv32a65x_design/source/cva6_issue_stage.rst +++ b/docs/04_cv32a65x_design/source/cva6_issue_stage.rst @@ -9,13 +9,25 @@ .. _CVA6_ISSUE_STAGE: -ID_STAGE Module -=============== +ISSUE_STAGE Module +================== Description ----------- -The ISSUE_STAGE module implements ... TO BE COMPLETED +The execution can be roughly divided into four parts: issue(1), +read operands(2), execute(3) and write-back(4). +The ISSUE_STAGE module handles step one, two and four. +The ISSUE_STAGE module receives the decoded instructions and issues them +to the various functional units. + +A data-structure called scoreboard is used to keep track of data related +to the issue instruction: which functional unit it is in and which +register it will write-back to. +The scoreboard handle the write-back data received from the COMMIT_STAGE module. + +Furthermore it contains the CPU’s register file. + The module is connected to: @@ -27,3 +39,28 @@ Functionality ------------- TO BE COMPLETED + + +Submodules +---------- + +.. figure:: ../images/issue_stage_modules.png + :name: ISSUE_STAGE submodules + :align: center + :alt: + + ISSUE_STAGE submodules + +Scoreboard +~~~~~~~~~~ + +TO BE COMPLETED + +.. include:: port_scoreboard.rst + +Issue_read_operands +~~~~~~~~~~~~~~~~~~~ + +TO BE COMPLETED + +.. include:: port_issue_read_operands.rst diff --git a/docs/04_cv32a65x_design/source/functionality.rst b/docs/04_cv32a65x_design/source/functionality.rst index f6bcc0d050..71198e13ee 100644 --- a/docs/04_cv32a65x_design/source/functionality.rst +++ b/docs/04_cv32a65x_design/source/functionality.rst @@ -13,7 +13,7 @@ Functionality ============= .. toctree:: - :hidden: + :maxdepth: 1 instructions traps diff --git a/docs/04_cv32a65x_design/source/index.rst b/docs/04_cv32a65x_design/source/index.rst index 13cab6d172..0395eafd73 100644 --- a/docs/04_cv32a65x_design/source/index.rst +++ b/docs/04_cv32a65x_design/source/index.rst @@ -17,7 +17,6 @@ Editor: **Jean Roch Coulon** :caption: Contents: intro - overview subsystem functionality architecture diff --git a/docs/04_cv32a65x_design/source/instructions.rst b/docs/04_cv32a65x_design/source/instructions.rst index 75235ad42c..79c15be36d 100644 --- a/docs/04_cv32a65x_design/source/instructions.rst +++ b/docs/04_cv32a65x_design/source/instructions.rst @@ -7,5 +7,25 @@ Original Author: Jean-Roch COULON - Thales +Instructions +============ -.. include:: ../../01_cva6_user/RISCV_Instructions.rst +The next first subchapter lists the extensions implemented in CVA6. +By configuration, we can enable/disable the extensions. +CV32A65X supports the extensions described in the next subchapters. +RVZicond, RV32A and RVZifencei extensions are not supported by CV32A65X. + + +.. toctree:: + :maxdepth: 1 + + ../../01_cva6_user/RISCV_Instructions + ../../01_cva6_user/RISCV_Instructions_RV32I + ../../01_cva6_user/RISCV_Instructions_RV32M + ../../01_cva6_user/RISCV_Instructions_RV32C + ../../01_cva6_user/RISCV_Instructions_RV32ZCb + ../../01_cva6_user/RISCV_Instructions_RVZba + ../../01_cva6_user/RISCV_Instructions_RVZbb + ../../01_cva6_user/RISCV_Instructions_RVZbc + ../../01_cva6_user/RISCV_Instructions_RVZbs + ../../01_cva6_user/RISCV_Instructions_RVZicsr diff --git a/docs/04_cv32a65x_design/source/mmu.rst b/docs/04_cv32a65x_design/source/mmu.rst new file mode 100644 index 0000000000..137c3e8ae1 --- /dev/null +++ b/docs/04_cv32a65x_design/source/mmu.rst @@ -0,0 +1,1580 @@ +.. _CVA6_MMU: + + +---------------------- +Memory Management Unit +---------------------- + +The Memory Management Unit (MMU) SV32 module is a crucial component in the RISC-V-based processor, serving as the backbone for virtual memory management and address translation. + +.. figure:: ../images/mmu_in_out.png + :name: **Figure 1:** Inputs and Outputs of CVA6 MMU SV32 + :align: center + :width: 70% + :alt: mmu_in_out + + **Figure 1:** Inputs and Outputs of CVA6 MMU SV32 + +At its core, the MMU SV32 plays a pivotal role in translating virtual addresses into their corresponding physical counterparts. This translation process is paramount for providing memory protection, isolation, and efficient memory management in modern computer systems. Importantly, it handles both instruction and data accesses, ensuring a seamless interaction between the processor and virtual memory. Within the MMU, several major blocks play pivotal roles in this address translation process. These includes: + +* Instruction TLB (ITLB) +* Data TLB (DTLB) +* Shared TLB +* Page Table Walker (PTW) + +.. figure:: ../images/mmu_major_blocks.png + :name: **Figure 2:** Major Blocks in CVA6 MMU SV32 + :align: center + :width: 60% + :alt: mmu_major_blocks + + **Figure 2:** Major Blocks in CVA6 MMU SV32 + +The MMU SV32 manages privilege levels and access control, enforcing permissions for user and supervisor modes while handling access exceptions. It employs Translation Lookaside Buffers (TLBs) for efficient address translation, reducing the need for page table access. TLB hits yield quick translations, but on misses, the shared TLB is consulted, and if necessary, the Page Table Walker (PTW) performs page table walks, updating TLBs and managing exceptions during the process. + +In addition to these functionalities, the MMU SV32 seamlessly integrates support for Physical Memory Protection (PMP), enabling it to enforce access permissions and memory protection configurations as specified by the PMP settings. This additional layer of security and control enhances the management of memory accesses + +.. raw:: html + + Instruction and Data Interfaces + +The MMU SV32 maintains interfaces with the instruction cache (ICache) and the load-store unit (LSU). It receives virtual addresses from these components and proceeds to translate them into physical addresses, a fundamental task for ensuring proper program execution and memory access. + +.. raw:: html + + Signal Description of MMU + +.. raw:: html + +Table 1: CVA6 MMU SV32 Input Output Signals
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - IO + - Connection Type + - Type + - Description + + * - ``clk_i`` + - in + - Subsystem + - logic + - Subsystem Clock + + * - ``rst_ni`` + - in + - Subsystem + - logic + - Asynchronous reset active low + + * - ``flush_i`` + - in + - Controller + - logic + - Sfence Committed + + * - ``enable_translation_i`` + - in + - CSR RegFile + - logic + - Indicate address translation request for instruction + + * - ``en_ld_st_translation_i`` + - in + - CSR RegFile + - logic + - Indicate address translation request for load or store + + * - ``icache_areq_i`` + - in + - Cache Subsystem + - icache_arsp_t + - Icache Response + + * - ``icache_areq_o`` + - out + - Cache Subsystem + - icache_areq_t + - Icache Request + + * - ``misaligned_ex_i`` + - in + - Load Store Unit + - exception_t + - Indicate misaligned exception + + * - ``lsu_req_i`` + - in + - Load Store Unit + - logic + - Request address translation + + * - ``lsu_vaddr_i`` + - in + - Load Store Unit + - logic [riscv::VLEN-1:0] + - Virtual Address In + + * - ``lsu_is_store_i`` + - in + - Store Unit + - logic + - Translation is requested by a store + + * - ``lsu_dtlb_hit_o`` + - out + - Store / Load Unit + - logic + - Indicate a DTLB hit + + * - ``lsu_dtlb_ppn_o`` + - out + - Load Unit + - logic [riscv::PPNW-1:0] + - Send PNN to LSU + + * - ``lsu_valid_o`` + - out + - Load Store Unit + - logic + - Indicate a valid translation + + * - ``lsu_paddr_o`` + - out + - Store / Load Unit + - logic [riscv::PLEN-1:0] + - Translated Address + + * - ``lsu_exception_o`` + - out + - Store / Load Unit + - exception_t + - Address Translation threw an exception + + * - ``priv_lvl_i`` + - in + - CSR RegFile + - riscv::priv_lvl_t + - Privilege level for instruction fetch interface + + * - ``ld_st_priv_lvl_i`` + - in + - CSR RegFile + - riscv::priv_lvl_t + - Privilege Level for Data Interface + + * - ``sum_i`` + - in + - CSR RegFile + - logic + - Supervisor User Memory Access bit in xSTATUS CSR register + + * - ``mxr_i`` + - in + - CSR RegFile + - logic + - Make Executable Readable bit in xSTATUS CSR register + + * - ``satp_ppn_I`` + - in + - CSR RegFile + - logic [riscv::PPNW-1:0] + - PPN of top level page table from SATP register + + * - ``asid_i`` + - in + - CSR RegFile + - logic [ASID_WIDTH-1:0] + - ASID to for the lookup + + * - ``asid_to_be_flushed`` + - in + - Execute Stage + - logic [ASID_WIDTH-1:0] + - ASID of the entry to be flushed. + + * - ``vaddr_to_be_flushed_i`` + - in + - Execute Stage + - logic [riscv::VLEN-1:0] + - Virtual address of the entry to be flushed. + + * - ``flush_tlb_i`` + - in + - Controller + - logic + - SFENCE.VMA committed + + * - ``itlb_miss_o`` + - out + - Performance Counter + - logic + - Indicate an ITLB miss + + * - ``dtlb_miss_o`` + - out + - Performance Counter + - logic + - Indicate a DTLB miss + + * - ``req_port_i`` + - in + - Cache Subsystem + - dcache_req_o_t + - D Cache Data Requests + + * - ``req_port_o`` + - out + - Cache Subsystem + - dcache_req_i_t + - D Cache Data Response + + * - ``pmpcfg_i`` + - in + - CSR RegFile + - riscv::pmpcfg_t [15:0] + - PMP configurations + + * - ``pmpaddr_i`` + - in + - CSR RegFile + - logic [15:0][riscv::PLEN-3:0] + - PMP Address + +.. raw:: html + + Struct Description + +.. raw:: html + +Table 2: I Cache Request Struct (icache_areq_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``fetch_valid`` + - logic + - Address Translation Valid + + * - ``fetch_paddr`` + - logic [riscv::PLEN-1:0] + - Physical Address In + + * - ``fetch_exception`` + - exception_t + - Exception occurred during fetch + +.. raw:: html + +Table 3: I Cache Response Struct (icache_arsq_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``fetch_req`` + - logic + - Address Translation Request + + * - ``fetch_vaddr`` + - logic [riscv::VLEN-1:0] + - Virtual Address out + +.. raw:: html + +Table 4: Exception Struct (exception_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``cause`` + - riscv::xlen_t + - Cause of exception + + * - ``tval`` + - riscv::xlen_t + - Additional information of causing exception (e.g. instruction causing it), address of LD/ST fault + + * - ``valid`` + - logic + - Indicate that exception is valid + +.. raw:: html + +Table 5: PMP Configuration Struct (pmpcfg_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``locked`` + - logic + - Lock this configuration + + * - ``reserved`` + - logic[1:0] + - Reserved bits in pmpcfg CSR + + * - ``addr_mode`` + - pmp_addr_mode_t + - Addressing Modes: OFF, TOR, NA4, NAPOT + + * - ``access_type`` + - pmpcfg_access_t + - None, read, write, execute + +.. raw:: html + + Control Flow in MMU SV32 Module + +.. figure:: ../images/mmu_control_flow.png + :name: **Figure 3:** Control Flow in CVA6 MMU SV32 + :align: center + :width: 95% + :alt: mmu_control_flow + + **Figure 3:** Control Flow in CVA6 MMU SV32 + +.. raw:: html + + Exception Sources with Address Translation Enabled + +Two potential exception sources exist: + +* Hardware Page Table Walker (HPTW) throwing an exception, signifying a page fault exception. +* Access error due to insufficient permissions of PMP, known as an access exception. + +.. raw:: html + + Instruction Fetch Interface + +The IF stage initiates a request to retrieve memory content at a specific virtual address. When the MMU is disabled, the instruction fetch request is directly passed to the I$ without modifications. + +.. raw:: html + + Address Translation in Instruction Interface + +If virtual memory translation is enabled for instruction fetches, the following operations are performed in the instruction interface: + +* Compatibility of requested virtual address with selected page based address translation scheme is checked. +* For 4K page translation, the module determines the fetch physical address by combining the physical page number (PPN) from ITLB content and the offset from the virtual address. +* In the case of Mega page translation, if the ITLB indicates a 4M page, the VPN0 from the fetch virtual address is written to the PPN0 of the fetch physical address to ensure alignment for superpage translation. +* If the Instruction TLB (ITLB) lookup hits, the fetch valid signal (which indicates a valid physical address) is activated in response to the input fetch request. Memory region accessibility is checked from the perspective of the fetch operation, potentially triggering a page fault exception in case of an access error or insufficient PMP permission. +* In case of an ITLB miss, if the page table walker (PTW) is active (only active if there is a shared TLB miss) and handling instruction fetches, the fetch valid signal is determined based on PTW errors or access exceptions. + +If the fetch physical address doesn't match any execute region, an Instruction Access Fault is raised. When not translating, PMPs are immediately checked against the physical address for access verification. + +.. raw:: html + + Data Interface + +.. raw:: html + + Address Translation in Data Interface + +If address translation is enabled for load or store, and no misaligned exception has occurred, the following operations are performed in the data interface: + +* Initially, translation is assumed to be invalid, signified by the MMU to LSU. +* The translated physical address is formed by combining the PPN from the Page Table Entry (PTE) and the offset from the virtual address requiring translation. This send one cycle later due to the additional bank of registers which delayed the MMU’s answer. The PPN from the PTE is also shared separately with LSU in the same cycle as the hit. +* In the case of superpage translation, as in SV32, known as the 4M page, PPN0 of the translated physical address and the separately shared PPN are updated with the VPN0 of the virtual address. + +If a Data TLB (DTLB) hit occurs, it indicates a valid translation, and various fault checks are performed depending on whether it's a load or store request. + +* For store requests, if the page is not writable, the dirty flag isn't set, or privileges are violated, it results in a page fault corresponding to the store access. If PMPs are also violated, it leads to an access fault corresponding to the store access. Page faults take precedence over access faults. +* For load requests, a page fault is triggered if there are insufficient access privileges. PMPs are checked again during load access, resulting in an access fault corresponding to load access if PMPs are violated. + +In case of a DTLB miss, potential exceptions are monitored during the page table walk. If the PTW indicates a page fault, the corresponding page fault related to the requested type is signaled. If the PTW indicates an access exception, the load access fault is indicated through address translation because the page table walker can only throw load access faults. + +.. raw:: html + + Address Translation is Disabled + +When address translation is not enabled, the physical address is immediately checked against Physical Memory Protections (PMPs). If there is a request from LSU, no misaligned exception, and PMPs are violated, it results in an access fault corresponding to the request being indicated. + +---------------------------- +Translation Lookaside Buffer +---------------------------- + +Page tables are accessed for translating virtual memory addresses to physical memory addresses. This translation needs to be carried out for every load and store instruction and also for every instruction fetch. Since page tables are resident in physical memory, accessing these tables in all these situations has a significant impact on performance. Page table accesses occur in patterns that are closely related in time. Furthermore, the spatial and temporal locality of data accesses or instruction fetches mean that the same page is referenced repeatedly. Taking advantage of these access patterns the processor keeps the information of recent address translations, to enable fast retrieval, in a small cache called the Translation Lookaside Buffer (TLB) or an address-translation cache. + +The CVA6 TLB is structured as a fully associative cache, where the virtual address that needs to be translated is compared against all the individual TLB entries. Given a virtual address, the processor examines the TLB (TLB lookup) to determine if the virtual page number (VPN) of the page being accessed is in the TLB. When a TLB entry is found (TLB hit), the TLB returns the corresponding physical page number (PPN) which is used to calculate the target physical address. If no TLB entry is found (TLB miss) the processor has to read individual page table entries from memory (Table walk). In CVA6 table walking is supported by dedicated hardware. Once the processor finishes the table walk it has the Physical Page Number (PPN) corresponding to the Virtual Page Number (VPN) That needs to be translated. The processor adds an entry for this address translation to the TLB so future translations of that virtual address will happen quickly through the TLB. During the table walk the processor may find out that the corresponding physical page is not resident in memory. At this stage a page table exception (Page Fault) is generated which gets handled by the operating system. The operating system places the appropriate page in memory, updates the appropriate page tables and returns execution to the instruction which generated the exception. + +The inputs and output signals of the TLB are shown in the following two figures. + +.. figure:: ../images/in_out_tlb.png + :name: **Figure 4:** Inputs and Outputs of CVA6 TLB + :align: center + :width: 65% + :alt: in_out_tlb + + **Figure 4:** Inputs and Outputs of CVA6 TLB + +.. raw:: html + + Signal Description of TLB + +.. raw:: html + +Table 6: CVA6 TLB Input Output Signals
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - IO + - connection + - Type + - Description + + * - ``clk_i`` + - in + - SUBSYSTEM + - logic + - Subsystem Clock + + * - ``rst_ni`` + - in + - SUBSYSTEM + - logic + - Asynchronous reset active low + + * - ``flush_i`` + - in + - Controller + - logic + - Asynchronous reset active low + + * - ``update_i`` + - in + - Shared TLB + - tlb_update_sv32_t + - Updated tag and content of TLB + + * - ``lu_access_i`` + - in + - Cache Subsystem + - logic + - Signal indicating a lookup access is being requested + + * - ``lu_asid_i`` + - in + - CSR RegFile + - logic[ASID_WIDTH-1:0] + - ASID (Address Space Identifier) for the lookup + + * - ``lu_vaddr_i`` + - in + - Cache Subsystem + - logic[riscv::VLEN-1:0] + - Virtual address for the lookup + + * - ``lu_content_o`` + - out + - MMU SV32 + - riscv::pte_sv32_t + - Output for the content of the TLB entry + + * - ``asid_to_be_flushed_i`` + - in + - Execute Stage + - logic[ASID_WIDTH-1:0] + - ASID of the entry to be flushed + + * - ``vaddr_to_be_flushed_i`` + - in + - Execute Stage + - logic[riscv::VLEN-1:0] + - Virtual address of the entry to be flushed + + * - ``lu_is_4M_o`` + - out + - MMU SV32 + - logic + - Output indicating whether the TLB entry corresponds to a 4MB page + + * - ``lu_hit_o`` + - out + - MMU SV32 + - logic + - Output indicating whether the lookup resulted in a hit or miss + +.. raw:: html + + Struct Description + +.. raw:: html + +Table 7: SV32 TLB Update Struct (tlb_update_sv32_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``valid`` + - logic + - Indicates whether the TLB update entry is valid or not + + * - ``is_4M`` + - logic + - Indicates if the TLB entry corresponds to a 4MB page + + * - ``vpn`` + - logic[19:0] + - Virtual Page Number (VPN) used for updating the TLB, consisting of 20 bits + + * - ``asid`` + - logic[8:0] + - Address Space Identifier (ASID) used for updating the TLB, with a length of 9 bits for Sv32 MMU + + * - ``content`` + - riscv::pte_sv32_t + - Content of the TLB update entry, defined by the structure + +.. raw:: html + +Table 8: SV32 PTE Struct (riscv::pte_sv32_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``ppn`` + - logic[21:0] + - 22 bit Physical Page Number (PPN) + + * - ``rsw`` + - logic[1:0] + - Reserved for use by supervisor software + + * - ``d`` + - logic + - | Dirty bit indicating whether the page has been modified (dirty) or not + | 0: Page is clean i.e., has not been written + | 1: Page is dirty i.e., has been written + + * - ``a`` + - logic + - | Accessed bit indicating whether the page has been accessed + | 0: Virtual page has not been accessed since the last time A bit was cleared + | 1: Virtual page has been read, written, or fetched from since the last time the A bit was cleared + + * - ``g`` + - logic + - | Global bit marking a page as part of a global address space valid for all ASIDs + | 0: Translation is valid for specific ASID + | 1: Translation is valid for all ASIDs + + * - ``u`` + - logic + - | User bit indicating privilege level of the page + | 0: Page is not accessible in user mode but in supervisor mode + | 1: Page is accessible in user mode but not in supervisor mode + + * - ``x`` + - logic + - | Execute bit which allows execution of code from the page + | 0: Code execution is not allowed + | 1: Code execution is permitted + + * - ``w`` + - logic + - | Write bit allows the page to be written + | 0: Write operations are not allowed + | 1: Write operations are permitted + + * - ``r`` + - logic + - | Read bit allows read access to the page + | 0: Read operations are not allowed + | 1: Read operations are permitted + + * - ``v`` + - logic + - | Valid bit indicating the page table entry is valid + | 0: Page is invalid i.e. page is not in DRAM, translation is not valid + | 1: Page is valid i.e. page resides in the DRAM, translation is valid + +.. raw:: html + + TLB Entry Fields + +The number of TLB entries can be changed via a design parameter. In 32-bit configurations of CVA6 only 2 TLB entries are instantiated. Each TLB entry is made up of two fields: Tag and Content. The Tag field holds the virtual page number (VPN1, VPN0), ASID, page size (is_4M) along with a valid bit (VALID) indicating that the entry is valid. The SV32 virtual page number, which is supported by CV32A6X, is further split into two separate virtual page numbers VPN1 and VPN0. The Content field contains two physical page numbers (PPN1, PPN0) along with a number of bits which specify various attributes of the physical page. Note that the V bit in the Content field is the V bit which is present in the page table in memory. It is copied from the page table, as is, and the VALID bit in the Tag is set based on its value.The TLB entry fields are shown in **Figure 2**. + +.. figure:: ../images/cva6_tlb_entry.png + :name: **Figure 5:** Fields in CVA6 TLB entry + :align: center + :width: 80% + :alt: cva6_tlb_entry + + **Figure 5:** Fields in CVA6 TLB entry + +.. raw:: html + + CVA6 TLB Management / Implementation + +The CVA6 TLB implements the following three functions: + +* **Translation:** This function implements the address lookup and match logic. +* **Update and Flush:** This function implements the update and flush logic. +* **Pseudo Least Recently Used Replacement Policy:** This function implements the replacement policy for TLB entries. + +.. raw:: html + + Translation + +This function takes in the virtual address and certain other fields, examines the TLB to determine if the virtual page number of the page being accessed is in the TLB or not. If a TLB entry is found (TLB hit), the TLB returns the corresponding physical page number (PPN) which is then used to calculate the target physical address. The following checks are done as part of this lookup function to find a match in the TLB: + +* **Validity Check:** For a TLB hit, the associated TLB entry must be valid . +* **ASID and Global Flag Check:** The TLB entry's ASID must match the given ASID (ASID associated with the Virtual address). If the TLB entry’s Global bit (G) bit is set then this check is not done. This ensures that the translation is either specific to the provided ASID or it is globally applicable. +* **Level 1 VPN match:** SV32 implements a two-level page table. As such the virtual address is broken up into three parts which are the virtual page number 1, virtual page number 0 and displacement. So the condition that is checked next is that the virtual page number 1 of the virtual address matches the virtual page number 1(VPN1) of the TLB entry. +* **Level 0 VPN match or 4-Mega Page:** The last condition to be checked, for a TLB hit, is that the virtual page number 0 of the virtual address matches the virtual page number 0 of the TLB entry (VPN0). This match is ignored if the is_4M bit in the Tag is set which implies a super 4M page. + +All the conditions listed above are checked against every TLB entry. If there is a TLB hit then the corresponding bit in the hit array is set. **Figure 3** Illustrates the TLB hit/miss process listed above. + +.. figure:: ../images/cva6_tlb_hit.png + :name: **Figure 6:** Block diagram of CVA6 TLB hit or miss + :align: center + :width: 75% + :alt: cva6_tlb_hit + + **Figure 6:** Block diagram of CVA6 TLB hit or miss + +.. raw:: html + + Flushing TLB entries + +The SFENCE.VMA instruction can be used with certain specific source register specifiers (rs1 & rs2) to flush a specific TLB entry, some set of TLB entries or all TLB entries. Like all instructions this action only takes place when the SFENCE.VMA instruction is committed (shown via the commit_sfence signal in the following figures.) The behavior of the instruction is as follows: + +* **If rs1 is not equal to x0 and rs2 is not equal to x0:** Invalidate all TLB entries which contain leaf page table entries corresponding to the virtual address in rs1 (shown below as Virtual Address to be flushed) and that match the address space identifier as specified by integer register rs2 (shown below as asid_to_be_flushed_i), except for entries containing global mappings. This is referred to as the “SFENCE.VMA vaddr asid” case. + +.. figure:: ../images/sfence_vaddr_asid.png + :name: **Figure 7:** Invalidate TLB entry if ASID and virtual address match + :align: center + :width: 75% + :alt: sfence_vaddr_asid + + **Figure 7:** Invalidate TLB entry if ASID and virtual address match + +* **If rs1 is equal to x0 and rs2 is equal to x0:** Invalidate all TLB entries for all address spaces. This is referred to as the "SFENCE.VMA x0 x0" case. + +.. figure:: ../images/sfence_x0_x0.png + :name: **Figure 8:** Invalidate all TLB entries if both source register specifiers are x0 + :align: center + :width: 62% + :alt: sfence_x0_x0 + + **Figure 8:** Invalidate all TLB entries if both source register specifiers are x0 + +* **If rs1 is not equal to x0 and rs2 is equal to x0:** invalidate all TLB entries that contain leaf page table entries corresponding to the virtual address in rs1, for all address spaces. This is referred to as the “SFENCE.VMA vaddr x0” case. + +.. figure:: ../images/sfence_vaddr_x0.png + :name: **Figure 9:** Invalidate TLB entry with matching virtual address for all address spaces + :align: center + :width: 75% + :alt: sfence_vaddr_x0 + + **Figure 9:** Invalidate TLB entry with matching virtual address for all address spaces + +* **If rs1 is equal to x0 and rs2 is not equal to x0:** Invalidate all TLB entries matching the address space identified by integer register rs2, except for entries containing global mappings. This is referred to as the “SFENCE.VMA 0 asid” case. + +.. figure:: ../images/sfence_x0_asid.png + :name: **Figure 10:** Invalidate TLB entry for matching ASIDs + :align: center + :width: 75% + :alt: sfence_x0_asid + + **Figure 10:** Invalidate TLB entry for matching ASIDs + +.. raw:: html + + Updating TLB + +When a TLB valid update request is signaled by the shared TLB, and the replacement policy select the update of a specific TLB entry, the corresponding entry's tag is updated with the new tag, and its associated content is refreshed with the information from the update request. This ensures that the TLB entry accurately reflects the new translation information. + +.. raw:: html + + Pseudo Least Recently Used Replacement Policy + +Cache replacement algorithms are used to determine which TLB entry should be replaced, because it is not likely to be used in the near future. The Pseudo-Least-Recently-Used (PLRU) is a cache entry replacement algorithm, derived from Least-Recently-Used (LRU) cache entry replacement algorithm, used by the TLB. Instead of precisely tracking recent usage as the LRU algorithm does, PLRU employs an approximate measure to determine which entry in the cache has not been recently used and as such can be replaced. + +CVA6 implements the PLRU algorithm via the Tree-PLRU method which implements a binary tree. The TLB entries are the leaf nodes of the tree. Each internal node, of the tree, consists of a single bit, referred to as the state bit or plru bit, indicating which subtree contains the (pseudo) least recently used entry (the PLRU); 0 for the left hand tree and 1 for the right hand tree. Following this traversal, the leaf node reached, corresponds to the PLRU entry which can be replaced. Having accessed an entry (so as to replace it) we need to promote that entry to be the Most Recently Used (MRU) entry. This is done by updating the value of each node along the access path to point away from that entry. If the accessed entry is a right child i.e., its parent node value is 1, it is set to 0, and if the parent is the left child of its parent (the grandparent of the accessed node) then its node value is set to 1 and so on all the way up to the root node. + +The PLRU binary tree is implemented as an array of node values. Nodes are organized in the array based on levels, with those from lower levels appearing before higher ones. Furthermore those on the left side of a node appear before those on the right side of a node. The figure below shows a tree and the corresponding array. + +.. figure:: ../images/plru_tree_indexing.png + :name: **Figure 11:** PLRU Tree Indexing + :align: center + :width: 60% + :alt: plru_tree_indexing + + **Figure 11:** PLRU Tree Indexing + +For n-way associative, we require n - 1 internal nodes in the tree. With those nodes, two operations need to be performed efficiently. + +* Promote the accessed entry to be MRU +* Identify which entry to replace (i.e. the PLRU entry) + +.. raw:: html + + Updating the PLRU-Tree + +For a TLB entry which is accessed, the following steps are taken to make it the MRU: + +1. Iterate through each level of the binary tree. +2. Calculate the index of the leftmost child within the current level. Let us call that index the index base. +3. Calculate the shift amount to identify the relevant node based on the level and TLB entry index. +4. Calculate the new value that the node should have in order to make the accessed entry the Most Recently Used (MRU). The new value of the root node is the opposite of the TLB entry index, MSB at the root node, MSB - 1 at node at next level and so on. +5. Assign this new value to the relevant node, ensuring that the hit entry becomes the MRU within the binary tree structure. + +At level 0, no bit of the TLB entry’s index determines the offset from the index base because it’s a root node. At level 1, MSB of entry’s index determines the amount of offset from index base at that level. At level 2, the first two bits of the entry's index from MSB side determine the offset from the index base because there are 4 nodes at the level 2 and so on. + +.. figure:: ../images/update_tree.png + :name: **Figure 12:** Promote Entry to be MRU + :align: center + :width: 82% + :alt: update_tree + + **Figure 12:** Promote Entry to be MRU + +In the above figure entry at index 5, is accessed. To make it MRU entry, every node along the access path should point away from it. Entry 5 is a right child, therefore, its parent plru bit set to 0, its parent is a left child, its grand parent’s plru bit set to 1, and great grandparent’s plru bit set to 0. + +.. raw:: html + + Entry Selection for Replacement + +Every TLB entry is checked for the replacement entry. The following steps are taken: + +1. Iterate through each level of the binary tree. +2. Calculate the index of the leftmost child within the current level. Let us call that index the index base. +3. Calculate the shift amount to identify the relevant node based on the level and TLB entry index. +4. If the corresponding bit of the entry's index matches the value of the node being traversed at the current level, keep the replacement signal high for that entry; otherwise, set the replacement signal to low. + +.. figure:: ../images/replacement_entry.png + :name: **Figure 13:** Possible path traverse for entry selection for replacement + :align: center + :width: 65% + :alt: replacement_entry + + **Figure 13:** Possible path traverse for entry selection for replacement + +Figure shows every possible path that traverses to find out the PLRU entry. If the plru bit at each level matches with the corresponding bit of the entry's index, that’s the next entry to replace. Below Table shows the entry selection for replacement. + +.. raw:: html + +Table 9: Entry Selection for Reaplacement
+ ++-------------------+---------------+----------------------+ +| **Path Traverse** | **PLRU Bits** | **Entry to replace** | ++-------------------+---------------+----------------------+ +| 0 -> 1 -> 3 | 000 | 0 | +| +---------------+----------------------+ +| | 001 | 1 | ++-------------------+---------------+----------------------+ +| 0 -> 1 -> 4 | 010 | 2 | +| +---------------+----------------------+ +| | 011 | 3 | ++-------------------+---------------+----------------------+ +| 0 -> 2 -> 5 | 100 | 4 | +| +---------------+----------------------+ +| | 101 | 5 | ++-------------------+---------------+----------------------+ +| 0 -> 2 -> 6 | 110 | 6 | +| +---------------+----------------------+ +| | 111 | 7 | ++-------------------+---------------+----------------------+ + +----------------------------------- +Shared Translation Lookaside Buffer +----------------------------------- + +The CVA6 shared TLB is structured as a 2-way associative cache, where the virtual address requiring translation is compared with the set indicated by the virtual page number. The shared TLB is looked up in case of an Instruction TLB (ITLB) or data TLB (DTLB) miss, signaled by these TLBs. If the entry is found in the shared TLB set, the respective TLB, whose translation is being requested, is updated. If the entry is not found in the shared TLB, then the processor has to perform a page table walk. Once the processor obtains a PPN corresponding to the VPN, the shared TLB is updated with this information. If the physical page is not found in the page table, it results in a page fault, which is handled by the operating system. The operating system will then place the corresponding physical page in memory. + +The inputs and output signals of the shared TLB are shown in the following two figures. + +.. figure:: ../images/shared_tlb_in_out.png + :name: **Figure 14:** Inputs and outputs of CVA6 shared TLB + :align: center + :width: 60% + :alt: shared_tlb_in_out + + **Figure 14:** Inputs and outputs of CVA6 shared TLB + +.. raw:: html + + Signal Description + +.. raw:: html + +Table 10: Signal Description of CVA6 shared TLB
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - IO + - Connection + - Type + - Description + + * - ``clk_i`` + - in + - Subsystem + - logic + - Subsystem Clock + + * - ``rst_ni`` + - in + - Subsystem + - logic + - Asynchronous reset active low + + * - ``flush_i`` + - in + - Controller + - logic + - TLB flush request + + * - ``enable_translation_i`` + - in + - CSR Regfile + - logic + - CSRs indicate to enable Sv32 + + * - ``en_ld_st_translation_i`` + - in + - CSR Regfile + - logic + - Enable virtual memory translation for load/stores + + * - ``asid_i`` + - in + - CSR Regfile + - logic + - ASID for the lookup + + * - ``itlb_access_i`` + - in + - Cache Subsystem + - logic + - Signal indicating a lookup access in ITLB is being requested. + + * - ``itlb_hit_i`` + - in + - ITLB + - logic + - Signal indicating an ITLB hit + + * - ``itlb_vaddr_i`` + - in + - Cache Subsystem + - logic[31:0] + - Virtual address lookup in ITLB + + * - ``dtlb_access_i`` + - in + - Load/Store Unit + - logic + - Signal indicating a lookup access in DTLB is being requested. + + * - ``dtlb_hit_i`` + - in + - DTLB + - logic + - Signal indicating a DTLB hit + + * - ``dtlb_vaddr_i`` + - in + - Load/Store Unit + - logic[31:0] + - Virtual address lookup in DTLB + + * - ``itlb_update_o`` + - out + - ITLB + - tlb_update_sv32_t + - Tag and content to update ITLB + + * - ``dtlb_update_o`` + - out + - DTLB + - tlb_update_sv32_t + - Tag and content to update DTLB + + * - ``itlb_miss_o`` + - out + - Performance Counter + - logic + - Signal indicating an ITLB miss + + * - ``dtlb_miss_o`` + - out + - Performance Counter + - logic + - Signal indicating a DTLB miss + + * - ``shared_tlb_access_o`` + - out + - PTW + - logic + - Signal indicating a lookup access in shared TLB is being requested + + * - ``shared_tlb_hit_o`` + - out + - PTW + - logic + - Signal indicating a shared TLB hit + + * - ``shared_tlb_vadd_o`` + - out + - PTW + - logic[31:0] + - Virtual address lookup in shared TLB + + * - ``itlb_req_o`` + - out + - PTW + - logic + - ITLB Request Output + + * - ``shared_tlb_update_i`` + - in + - PTW + - tlb_update_sv32_t + - Updated tag and content of shared TLB + +.. raw:: html + + Struct Description + +.. raw:: html + +Table 11: Shared TLB Update Struct (shared_tag_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``is_4M`` + - logic + - Indicates if the shared TLB entry corresponds to a 4MB page. + + * - ``vpn1`` + - logic[9:0] + - Virtual Page Number (VPN) represents the index of PTE in the page table level 1. + + * - ``vpn0`` + - logic[9:0] + - Virtual Page Number (VPN) represents the index of PTE in the page table level 0. + + * - ``asid`` + - logic + - Address Space Identifier (ASID) used to identify different address spaces + +.. raw:: html + + Shared TLB Entry Structure + +Shared TLB is 2-way associative, with a depth of 64. A single entry in the set contains the valid bit, tag and the content. The Tag segment stores details such as the virtual page number (VPN1, VPN0), ASID, and page size (is_4M). The Content field contains two physical page numbers (PPN1, PPN0) along with a number of bits which specify various attributes of the physical page. + +.. figure:: ../images/shared_tlb.png + :name: **Figure 15:** CVA6 Shared TLB Structure + :align: center + :width: 60% + :alt: shared_tlb + + **Figure 15:** CVA6 Shared TLB Structure + +.. raw:: html + + Shared TLB Implementation in CVA6 + +The implementation of a shared TLB in CVA6 is described in the following sections: + +* **ITLB and DTLB Miss:** Prepare a shared TLB lookup if the entry is not found in ITLB or DTLB. +* **Tag Comparison:** Look up the provided virtual address in the shared TLB. +* **Update and Flush:** Flush the shared TLB or update it. +* **Replacement Policies:** First non-valid entry and random replacement policy. + +.. raw:: html + + ITLB and DTLB Miss + +Consider a scenario where an entry is found in the ITLB or DTLB. In this case, there is no need to perform a lookup in the shared TLB since the entry has already been found. Next, there are two scenarios: an ITLB miss or a DTLB miss. + +To identify an ITLB miss, the following conditions need to be fulfilled: + +* Address translation must be enabled. +* There must be an access request to the ITLB. +* The ITLB should indicate an ITLB miss. +* There should be no access request to the DTLB. + +During an ITLB miss, access is granted to read the tag and content of the shared TLB from their respective sram. The address for reading the tag and content of the shared TLB entry is calculated using the virtual address for which translation is not found in the ITLB. The ITLB miss is also explicitly indicated by the shared TLB. A request for shared TLB access is initiated. + +To identify the DTLB miss, the following conditions need to be fulfilled: + +* Address translation for load and stores must be enabled. +* There must be an access request to the DTLB. +* The DTLB should indicate a DTLB miss. + +In the case of a DTLB miss, the same logic is employed as described for an ITLB miss. + +.. raw:: html + + Tag Comparison + +Shared TLB lookup for a hit occurs under the same conditions as described for the TLB modules used as ITLB and DTLB. However, there are some distinctions. In both the ITLB and DTLB, the virtual address requiring translation is compared against all TLB entries. In contrast, the shared TLB only compares the tag and content of the set indicated by the provided virtual page number. The index of the set is extracted from VPN0 of the requested virtual address. Given that the shared TLB is 2-way associative, each set contains two entries. Consequently, both of these entries are compared. Below figure illustrates how the set is opted for the lookup. + +.. figure:: ../images/shared_tlb_set.png + :name: **Figure 16:** Set opted for lookup in shared TLB + :align: center + :width: 60% + :alt: shared_tlb_set + + **Figure 16:** Set opted for lookup in shared TLB + +.. raw:: html + + Update and Flush + +Differing from the ITLB and DTLB, a specific virtual address or addressing space cannot be flushed in the shared TLB. When SFENCE.VMA is committed, all entries in the shared TLB are invalidated. (Cases of SFENCE.VMA should also be added in shared TLB) + +.. raw:: html + + Updating Shared TLB + +When the Page Table Walker signals a valid update request, the shared TLB is updated by selecting an entry through the replacement policy and marking it as valid. This also triggers the writing of the new tag and content to the respective SRAM. + +.. raw:: html + + Replacement Policy Implemented in CVA6 Shared TLB + +In CVA6's shared TLB, two replacement policies are employed for replacements based on a specific condition. These replacement policies select the entry within the set indicated by the virtual page number. The two policies are: + +* First non-valid encounter replacement policy +* Random replacement policy + +First replacement policy failed if all ways are valid. Therefore, a random replacement policy is opted for. + +.. raw:: html + + First non-valid encounter replacement policy + +The module implemented in CVA6 to find the first non-valid entry in the shared TLB is the Leading Zero Counter (LZC). It takes three parameters as input: + +1. **WIDTH:** The width of the input vector. +2. **MODE:** Mode selection - 0 for trailing zero, 1 for leading zero. +3. **CNT WIDTH:** Width of the output signal containing the zero count. + +The input signal is the vector to be counted, and the output represents the count of trailing/leading zeros. If all bits in the input vector are zero, it will also be indicated. + +When initializing the module, the width of the input vector is set to the number of shared TLB ways. The trailing zero counter mode is selected. The vector of valid bits is set as the input vector, but with negation. This is because we want the index of the first non-valid entry, and LZC returns the count of trailing zeros, which actually corresponds to the index of the first occurrence of 1 from the least significant bit (LSB). if there is at least one non-valid entry, that entry is opted for the replacement, and If not then this is signaled by LZC. + +.. figure:: ../images/LZC.png + :name: **Figure 17:** Replacement of First invalid entry. + :align: center + :width: 60% + :alt: LZC + + **Figure 17:** Replacement of First invalid entry. + +.. raw:: html + + Random replacement policy + +If all ways are valid, a random replacement policy is employed for the replacement process. The Linear Feedback Shift Register (LFSR) is utilized to select the replacement entry randomly. LFSR is commonly used in generating sequences of pseudo-random numbers. When the enable signal is active, the current state of the LFSR undergoes a transformation. Specifically, the state is shifted right by one bit, and the result is combined with a predetermined masking pattern. This masking pattern is derived from the predefined “Masks” array, introducing a non-linear behavior to the sequence generation of the LFSR. The masking process involves XOR operations between the shifted state bits and specific pattern bits, contributing to the complexity and unpredictability of the generated sequence. + +.. figure:: ../images/RR.png + :name: **Figure 18:** Entry selection for replacement using LFSR + :align: center + :width: 95% + :alt: RR + + **Figure 18:** Entry selection for replacement using LFSR + +----------------- +Page Table Walker +----------------- + +The "CVA6 Page Table Walker (PTW) for MMU Sv32" is a hardware module developed for the CV32A6 processor architecture, designed to facilitate the translation of virtual addresses into physical addresses, a crucial task in memory access management. + +.. figure:: ../images/ptw_in_out.png + :name: **Figure 19:** Input and Outputs of Page Table Walker + :align: center + :width: 60% + :alt: ptw_in_out + + **Figure 19:** Input and Outputs of Page Table Walker + +.. raw:: html + + Operation of PTW Module + +The PTW module operates through various states, each with its specific function, such as handling memory access requests, validating page table entries, and responding to errors. + +.. raw:: html + + Key Features and Capabilities + +Key features of this PTW module include support for two levels of page tables (LVL1 and LVL2) in the Sv32 standard, accommodating instruction and data page table walks. It rigorously validates and verifies page table entries (PTEs) to ensure translation accuracy and adherence to access permissions. This module seamlessly integrates with the CV32A6 processor's memory management unit (MMU), which governs memory access control. It also takes into account global mapping, access flags, and privilege levels during the translation process, ensuring that memory access adheres to the processor's security and privilege settings. + +.. raw:: html + + Exception Handling + +In addition to its translation capabilities, the PTW module is equipped to detect and manage errors, including page-fault exceptions and access exceptions, contributing to the robustness of the memory access system. It works harmoniously with physical memory protection (PMP) configurations, a critical aspect of modern processors' memory security. Moreover, the module efficiently processes virtual addresses, generating corresponding physical addresses, all while maintaining speculative translation, a feature essential for preserving processor performance during memory access operations. + +.. raw:: html + + Signal Description + +.. raw:: html + +Table 12: Signal Description of PTW
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - IO + - Connection + - Type + - Description + + * - ``clk_i`` + - in + - Subsystem + - logic + - Subsystem Clock + + * - ``rst_ni`` + - in + - Subsystem + - logic + - Asynchronous reset active low + + * - ``flush_i`` + - in + - Controller + - logic + - Sfence Committed + + * - ``ptw_active_o`` + - out + - MMU + - logic + - Output signal indicating whether the Page Table Walker (PTW) is currently active + + * - ``walking_instr_o`` + - out + - MMU + - logic + - Indicating it's an instruction page table walk or not + + * - ``ptw_error_o`` + - out + - MMU + - logic + - Output signal indicating that an error occurred during PTW operation + + * - ``ptw_access_exception_o`` + - out + - MMU + - logic + - Output signal indicating that a PMP (Physical Memory Protection) access exception occurred during PTW operation. + + * - ``lsu_is_store_i`` + - in + - Store Unit + - logic + - Input signal indicating whether the translation was triggered by a store operation. + + * - ``req_port_i`` + - in + - Cache Subsystem + - dcache_req_o_t + - D Cache Data Requests + + * - ``req_port_o`` + - out + - Cache Subsystem / Perf Counter + - dcache_req_u_t + - D Cache Data Response + + * - ``shared_tlb_update_o`` + - out + - Shared TLB + - tlb_update_sv32_t + - Updated tag and content of shared TLB + + * - ``update_vaddr_o`` + - out + - MMU + - logic[riscv::VLEN-1:0] + - Updated VADDR from shared TLB + + * - ``asid_i`` + - in + - CSR RegFile + - logic[ASID_WIDTH-1:0] + - ASID for the lookup + + * - ``shared_tlb_access_i`` + - in + - Shared TLB + - logic + - Access request of shared TLB + + * - ``shared_tlb_hit_i`` + - in + - Shared TLB + - logic + - Indicate shared TLB hit + + * - ``shared_tlb_vaddr_i`` + - in + - Shared TLB + - logic[riscv::VLEN-1:0] + - Virtual Address from shared TLB + + * - ``itlb_req_i`` + - in + - Shared TLB + - logic + - Indicate request to ITLB + + * - ``satp_ppn_i`` + - in + - CSR RegFile + - logic[riscv::PPNW-1:0] + - PPN of top level page table from SATP register + + * - ``mxr_i`` + - in + - CSR RegFile + - logic + - Make Executable Readable bit in xSTATUS CSR register + + * - ``shared_tlb_miss_o`` + - out + - OPEN + - logic + - Indicate a shared TLB miss + + * - ``pmpcfg_i`` + - in + - CSR RegFile + - riscv::pmpcfg_t[15:0] + - PMP configuration + + * - ``pmpaddr_i`` + - in + - CSR RegFile + - logic[15:0][riscv::PLEN-3:0] + - PMP Address + + * - ``bad_paddr_o`` + - out + - MMU + - logic[riscv::PLEN-1:0] + - Bad Physical Address in case of access exception + +.. raw:: html + + Struct Description + +.. raw:: html + +Table 13: D Cache Response Struct (dcache_req_i_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``address_index`` + - logic [DCACHE_INDEX_WIDTH-1:0] + - Index of the Dcache Line + + * - ``address_tag`` + - logic [DCACHE_TAG_WIDTH-1:0] + - Tag of the Dcache Line + + * - ``data_wdata`` + - riscv::xlen_t + - Data to write in the Dcache + + * - ``data_wuser`` + - logic [DCACHE_USER_WIDTH-1:0] + - data_wuser + + * - ``data_req`` + - logic + - Data Request + + * - ``data_we`` + - logic + - Data Write enabled + + * - ``data_be`` + - logic [(riscv::XLEN/8)-1:0] + - Data Byte enable + + * - ``data_size`` + - logic [1:0] + - Size of data + + * - ``data_id`` + - logic [DCACHE_TID_WIDTH-1:0] + - Data ID + + * - ``kill_req`` + - logic + - Kill the D cache request + + * - ``tag_valid`` + - logic + - Indicate that teh tag is valid + +.. raw:: html + +Table 14: D Cache Request Struct (dcache_req_o_t)
+ +.. list-table:: + :header-rows: 1 + + * - Signal + - Type + - Description + + * - ``data_gnt`` + - logic + - Grant of data is given in response to the data request + + * - ``data_rvalid`` + - logic + - Indicate that data is valid which is sent by D cache + + * - ``data_rid`` + - logic [DCACHE_TID_WIDTH-1:0] + - Requested data ID + + * - ``data_rdata`` + - riscv::xlen_t + - Data from D cache + + * - ``data_ruser`` + - logic [DCACHE_USER_WIDTH-1:0] + - Requested data user + +.. raw:: html + + PTW State Machine + +Page Table Walker is implemented as a finite state machine. It listens to shared TLB for incoming translation requests. If there is a shared TLB miss, it saves the virtual address and starts the page table walk. Page table walker transition between 7 states in CVA6. + +* **IDLE:** The initial state where the PTW is awaiting a trigger, often a Shared TLB miss, to initiate a memory access request. +* **WAIT_GRANT:** Request memory access and wait for data grant +* **PTE_LOOKUP:** Once granted access, the PTW examines the valid Page Table Entry (PTE), checking attributes to determine the appropriate course of action. +* **PROPOGATE_ERROR:** If the PTE is invalid, this state handles the propagation of an error, often leading to a page-fault exception due to non-compliance with access conditions +* **PROPOGATE_ACCESS_ERROR:** Propagate access fault if access is not allowed from a PMP perspective +* **WAIT_RVALID:** After processing a PTE, the PTW waits for a valid data signal, indicating that relevant data is ready for further processing. +* **LATENCY:** Introduces a delay to account for synchronization or timing requirements between states. + +.. figure:: ../images/ptw_state_diagram.png + :name: **Figure 20:** State Machine Diagram of CVA6 PTW + :align: center + :width: 95% + :alt: ptw_state_diagram + + **Figure 20:** State Machine Diagram of CVA6 PTW + +.. raw:: html + + IDLE state + +In the IDLE state of the Page Table Walker (PTW) finite state machine, the system awaits a trigger to initiate the page table walk process. This trigger is often prompted by a Shared Translation Lookaside Buffer (TLB) miss, indicating that the required translation is not present in the shared TLB cache. The PTW's behavior in this state is explained as follows: + +1. The top-most page table is selected for the page table walk. In the case of SV32, which implements a two-level page table, the level 1 page table is chosen. +2. In the IDLE state, translations are assumed to be invalid in all addressing spaces. +3. The signal indicating the instruction page table walk is set to 0. +4. A conditional check is performed: if there is a shared TLB access request and the entry is not found in the shared TLB (indicating a shared TLB miss), the following steps are executed: + + a. The address of the desired Page Table Entry within the level 1 page table is calculated by multiplying the Physical Page Number (PPN) of the level 1 page table from the SATP register by the page size (4kB). This result is then added to the product of the Virtual Page Number (VPN1), and the size of a page table entry(4 bytes). + +.. figure:: ../images/ptw_idle.png + :name: **Figure 21:** Address of Desired PTE at Level 1 + :align: center + :width: 68% + :alt: ptw_idle + + **Figure 21:** Address of Desired PTE at Level 1 + +.. _example: + + b. The signal indicating whether it's an instruction page table walk is updated based on the ITLB miss. + c. The ASID and virtual address are saved for the page table walk. + d. A shared TLB miss is indicated. + +.. raw:: html + + WAIT GRANT state + +In the **WAIT_GRANT** state of the Page Table Walker's finite state machine, a data request is sent to retrieve memory information. It waits for a data grant signal from the Dcache controller, remaining in this state until granted. Once granted, it activates a tag valid signal, marking data validity. The state then transitions to "PTE_LOOKUP" for page table entry lookup. + +.. raw:: html + + PTE LOOKUP state + +In the **PTE_LOOKUP** state of the Page Table Walker (PTW) finite state machine, the PTW performs the actual lookup and evaluation of the page table entry (PTE) based on the virtual address translation. The behavior and operations performed in this state are detailed as follows: + +1. The state waits for a valid signal indicating that the data from the memory subsystem, specifically the page table entry, is available for processing. +2. Upon receiving the valid signal, the PTW proceeds with examining the retrieved page table entry to determine its properties and validity. +3. The state checks if the global mapping bit in the PTE is set, and if so, sets the global mapping signal to indicate that the translation applies globally across all address spaces. +4. The state distinguishes between two cases: Invalid PTE and Valid PTE. + + a. If the valid bit of the PTE is not set, or if the PTE has reserved RWX field encodings, it signifies an Invalid PTE. In such cases, the state transitions to the "PROPAGATE_ERROR" state, indicating a page-fault exception due to an invalid translation. + +.. figure:: ../images/ptw_pte_1.png + :name: **Figure 22:** Invalid PTE and reserved RWX encoding leads to page fault + :align: center + :width: 70% + :alt: ptw_pte_1 + + **Figure 22:** Invalid PTE and reserved RWX encoding leads to page fault + +.. _example1: + + b. If the PTE is valid, the state advances to the "LATENCY" state, indicating a period of processing latency. Additionally, if the "read" flag (pte.r) or the "execute" flag (pte.x) is set, the PTE is considered valid. + +5. Within the Valid PTE scenario, the state performs further checks based on whether the translation is intended for instruction fetching or data access: + + a. For instruction page table walk, if the page is not executable (pte.x is not set) or not marked as accessible (pte.a is not set), the state transitions to the "PROPAGATE_ERROR" state. + +.. figure:: ../images/ptw_iptw.png + :name: **Figure 23:** For Instruction Page Table Walk + :align: center + :width: 70% + :alt: ptw_iptw + + **Figure 23:** For Instruction Page Table Walk + +.. _example2: + + b. For data page table walk, the state checks if the page is readable (pte.r is set) or if the page is executable only but made readable by setting the MXR bit in xSTATUS CSR register. If either condition is met, it indicates a valid translation. If not, the state transitions to the "PROPAGATE_ERROR" state. + +.. figure:: ../images/ptw_dptw.png + :name: **Figure 24:** Data Access Page Table Walk + :width: 70% + :alt: ptw_dptw + + **Figure 24:** Data Access Page Table Walk + +.. _example3: + + c. If the access is intended for storing data, additional checks are performed: If the page is not writable (pte.w is not set) or if it is not marked as dirty (pte.d is not set), the state transitions to the "PROPAGATE_ERROR" state. + +.. figure:: ../images/ptw_dptw_s.png + :name: **Figure 25:** Data Access Page Table Walk, Store requested + :align: center + :width: 70% + :alt: ptw_dptw_s + + **Figure 25:** Data Access Page Table Walk, Store requested + +6. The state also checks for potential misalignment issues in the translation: If the current page table level is the first level (LVL1) and if the PPN0 of in PTE is not zero, it indicates a misaligned superpage, leading to a transition to the "PROPAGATE_ERROR" state. + +.. figure:: ../images/ptw_mis_sup.png + :name: **Figure 26:** Misaligned Superpage Check + :align: center + :width: 70% + :alt: ptw_mis_sup + + **Figure 26:** Misaligned Superpage Check + +7. If the PTE is valid but the page is neither readable nor executable, the PTW recognizes the PTE as a pointer to the next level of the page table, indicating that additional translation information can be found in the referenced page table at a lower level. +8. If the current page table level is the first level (LVL1), the PTW proceeds to switch to the second level (LVL2) page table, updating the next level pointer and calculating the address for the next page table entry using the Physical Page Number from the PTE and the index of the level 2 page table from virtual address. + +.. figure:: ../images/ptw_nlvl.png + :name: **Figure 27:** Address of desired PTE at next level of Page Table + :align: center + :width: 70% + :alt: ptw_nlvl + + **Figure 27:** Address of desired PTE at next level of Page Table + +9. The state then transitions to the "WAIT_GRANT" state, indicating that the PTW is awaiting the grant signal to proceed with requesting the next level page table entry. +10. If the current level is already the second level (LVL2), an error is flagged, and the state transitions to the "PROPAGATE_ERROR" state, signifying an unexpected situation where the PTW is already at the last level page table. +11. If the translation access is found to be restricted by the Physical Memory Protection (PMP) settings (allow_access is false), the state updates the shared TLB update signal to indicate that the TLB entry should not be updated. Additionally, the saved address for the page table walk is restored to its previous value, and the state transitions to the "PROPAGATE_ACCESS_ERROR" state. +12. Lastly, if the data request for the page table entry was granted, the state indicates to the cache subsystem that the tag associated with the data is now valid. + +.. figure:: ../images/ptw_pte_flowchart.png + :name: **Figure 28:** Flow Chart of PTE LOOKUP State + :align: center + :alt: ptw_pte_flowchart + + **Figure 28:** Flow Chart of PTE LOOKUP State + +.. raw:: html + + PROPAGATE ERROR state + +This state indicates a detected error in the page table walk process, and an error signal is asserted to indicate the Page Table Walker's error condition, triggering a transition to the "LATENCY" state for error signal propagation. + +.. raw:: html + + PROPAGATE ACCESS ERROR state + +This state indicates a detected access error in the page table walk process, and an access error signal is asserted to indicate the Page Table Walker's access error condition, triggering a transition to the "LATENCY" state for access error signal propagation. + +.. raw:: html + + WAIT RVALID state + +This state waits until it gets the "read valid" signal, and when it does, it's ready to start a new page table walk. + +.. raw:: html + + LATENCY state + +The LATENCY state introduces a latency period to allow for necessary system actions or signals to stabilize. After the latency period, the FSM transitions back to the IDLE state, indicating that the system is prepared for a new translation request. + +.. raw:: html + + Flush Scenario + +The first step when a flush is triggered is to check whether the Page Table Entry (PTE) lookup process is currently in progress. If the PTW (Page Table Walker) module is indeed in the middle of a PTE lookup operation, the code then proceeds to evaluate a specific aspect of this operation. + +* **Check for Data Validity (rvalid):** Within the PTE lookup operation, it's important to ensure that the data being used for the translation is valid. In other words, the code checks whether the "rvalid" signal (which likely indicates the validity of the data) is not active. If the data is not yet valid, it implies that the PTW module is waiting for the data to become valid before completing the lookup. In such a case, the code takes appropriate action to wait for the data to become valid before proceeding further. + +* **Check for Waiting on Grant:** The second condition the code checks for during a flush scenario is whether the PTW module is currently waiting for a "grant." This "grant" signal is typically used to indicate permission or authorization to proceed with an operation. If the PTW module is indeed in a state of waiting for this grant signal, it implies that it requires authorization before continuing its task. + + * **Waiting for Grant:** If the PTW module is in a state of waiting for the grant signal, the code ensures that it continues to wait for the grant signal to be asserted before proceeding further. + +* **Return to Idle State if Neither Condition is Met:** After evaluating the above two conditions, the code determines whether either of these conditions is true. If neither of these conditions applies, it suggests that the PTW module can return to its idle state, indicating that it can continue normal operations without any dependencies on the flush condition. diff --git a/docs/04_cv32a65x_design/source/overview.rst b/docs/04_cv32a65x_design/source/overview.rst deleted file mode 100644 index 84880b1086..0000000000 --- a/docs/04_cv32a65x_design/source/overview.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. - Copyright 2022 Thales DIS design services SAS - Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 - You may obtain a copy of the License at https://solderpad.org/licenses/ - - Original Author: Jean-Roch COULON - Thales - - - -Overview -======== - -[TO BE COMPLETED] - -The CVA6 core is fully synthesizable. It has been designed mainly for ASIC designs, but FPGA synthesis is supported as well. - -For ASIC synthesis, the whole design is completely synchronous and uses positive-edge triggered flip-flops. The core occupies an area of about 80 kGE. The clock frequency can be more than 1GHz depending of technology. - diff --git a/docs/04_cv32a65x_design/source/parameters_cv32a65x.rst b/docs/04_cv32a65x_design/source/parameters_cv32a65x.rst index c79a8669f6..fe20c4fdb8 100644 --- a/docs/04_cv32a65x_design/source/parameters_cv32a65x.rst +++ b/docs/04_cv32a65x_design/source/parameters_cv32a65x.rst @@ -13,7 +13,7 @@ :header-rows: 1 * - Name - - Description + - description - Value * - NrCommitPorts diff --git a/docs/04_cv32a65x_design/source/port_bht.rst b/docs/04_cv32a65x_design/source/port_bht.rst index 66f695d0bc..91a5f4a449 100644 --- a/docs/04_cv32a65x_design/source/port_bht.rst +++ b/docs/04_cv32a65x_design/source/port_bht.rst @@ -14,48 +14,47 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in - - CONTROLLER - - logic - Fetch flush request - - * - ``debug_mode_i`` - - in - - CSR + - CONTROLLER - logic - - Debug mode state * - ``vpc_i`` - in + - Virtual PC - CACHE - logic[riscv::VLEN-1:0] - - Virtual PC * - ``bht_update_i`` - in + - Update bht with resolved address - EXECUTE - ariane_pkg::bht_update_t - - Update bht with resolved address * - ``bht_prediction_o`` - out + - Prediction from bht - FRONTEND - ariane_pkg::bht_prediction_t[ariane_pkg::INSTR_PER_FETCH-1:0] - - Prediction from bht + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As DebugEn = 0, +| ``debug_mode_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_btb.rst b/docs/04_cv32a65x_design/source/port_btb.rst index cfb7362c2f..225d583514 100644 --- a/docs/04_cv32a65x_design/source/port_btb.rst +++ b/docs/04_cv32a65x_design/source/port_btb.rst @@ -14,48 +14,47 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in - - CONTROLLER - - logic - Fetch flush request - - * - ``debug_mode_i`` - - in - - CSR + - CONTROLLER - logic - - Debug mode state * - ``vpc_i`` - in + - Virtual PC - CACHE - logic[riscv::VLEN-1:0] - - Virtual PC * - ``btb_update_i`` - in + - Update BTB with resolved address - EXECUTE - ariane_pkg::btb_update_t - - Update BTB with resolved address * - ``btb_prediction_o`` - out + - BTB Prediction - FRONTEND - ariane_pkg::btb_prediction_t[ariane_pkg::INSTR_PER_FETCH-1:0] - - BTB Prediction + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As DebugEn = 0, +| ``debug_mode_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_commit_stage.rst b/docs/04_cv32a65x_design/source/port_commit_stage.rst index c32c404637..ea35aa4dc1 100644 --- a/docs/04_cv32a65x_design/source/port_commit_stage.rst +++ b/docs/04_cv32a65x_design/source/port_commit_stage.rst @@ -14,186 +14,185 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``halt_i`` - in + - Request to halt the core - CONTROLLER - logic - - Request to halt the core * - ``flush_dcache_i`` - in + - request to flush dcache, also flush the pipeline - CACHE - logic - - request to flush dcache, also flush the pipeline * - ``exception_o`` - out + - TO_BE_COMPLETED - EX_STAGE - exception_t - - TO_BE_COMPLETED * - ``dirty_fp_state_o`` - out + - Mark the F state as dirty - CSR_REGFILE - logic - - Mark the F state as dirty * - ``single_step_i`` - in + - TO_BE_COMPLETED - CSR_REGFILE - logic - - TO_BE_COMPLETED * - ``commit_instr_i`` - in + - The instruction we want to commit - ISSUE_STAGE - scoreboard_entry_t[CVA6Cfg.NrCommitPorts-1:0] - - The instruction we want to commit * - ``commit_ack_o`` - out + - Acknowledge that we are indeed committing - ISSUE_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0] - - Acknowledge that we are indeed committing * - ``waddr_o`` - out + - Register file write address - ID_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0][4:0] - - Register file write address * - ``wdata_o`` - out + - Register file write data - ID_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] - - Register file write data * - ``we_gpr_o`` - out + - Register file write enable - ID_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0] - - Register file write enable * - ``we_fpr_o`` - out + - Floating point register enable - ID_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0] - - Floating point register enable - - * - ``amo_resp_i`` - - in - - CACHE - - amo_resp_t - - Result of AMO operation * - ``pc_o`` - out + - TO_BE_COMPLETED - FRONTEND_CSR - logic[riscv::VLEN-1:0] - - TO_BE_COMPLETED * - ``csr_op_o`` - out + - Decoded CSR operation - CSR_REGFILE - fu_op - - Decoded CSR operation * - ``csr_wdata_o`` - out + - Data to write to CSR - CSR_REGFILE - riscv::xlen_t - - Data to write to CSR * - ``csr_rdata_i`` - in + - Data to read from CSR - CSR_REGFILE - riscv::xlen_t - - Data to read from CSR * - ``csr_exception_i`` - in + - Exception or interrupt occurred in CSR stage (the same as commit) - CSR_REGFILE - exception_t - - Exception or interrupt occurred in CSR stage (the same as commit) * - ``csr_write_fflags_o`` - out + - Write the fflags CSR - CSR_REGFILE - logic - - Write the fflags CSR * - ``commit_lsu_o`` - out + - Commit the pending store - EX_STAGE - logic - - Commit the pending store * - ``commit_lsu_ready_i`` - in + - Commit buffer of LSU is ready - EX_STAGE - logic - - Commit buffer of LSU is ready * - ``commit_tran_id_o`` - out + - Transaction id of first commit port - ID_STAGE - logic[TRANS_ID_BITS-1:0] - - Transaction id of first commit port * - ``amo_valid_commit_o`` - out + - Valid AMO in commit stage - EX_STAGE - logic - - Valid AMO in commit stage * - ``no_st_pending_i`` - in + - no store is pending - EX_STAGE - logic - - no store is pending * - ``commit_csr_o`` - out + - Commit the pending CSR instruction - EX_STAGE - logic - - Commit the pending CSR instruction * - ``fence_i_o`` - out + - Flush I$ and pipeline - CONTROLLER - logic - - Flush I$ and pipeline * - ``fence_o`` - out + - Flush D$ and pipeline - CONTROLLER - logic - - Flush D$ and pipeline * - ``flush_commit_o`` - out + - Request a pipeline flush - CONTROLLER - logic - - Request a pipeline flush * - ``sfence_vma_o`` - out + - Flush TLBs and pipeline - CONTROLLER - logic - - Flush TLBs and pipeline + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As RVA = 0, +| ``amo_resp_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_compressed_decoder.rst b/docs/04_cv32a65x_design/source/port_compressed_decoder.rst new file mode 100644 index 0000000000..e42dd8e0fb --- /dev/null +++ b/docs/04_cv32a65x_design/source/port_compressed_decoder.rst @@ -0,0 +1,47 @@ +.. + Copyright 2024 Thales DIS France SAS + Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); + you may not use this file except in compliance with the License. + SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + You may obtain a copy of the License at https://solderpad.org/licenses/ + + Original Author: Jean-Roch COULON - Thales + +.. _CVA6_compressed_decoder_ports: + +.. list-table:: compressed_decoder module IO ports + :header-rows: 1 + + * - Signal + - IO + - Description + - connexion + - Type + + * - ``instr_i`` + - in + - Input instruction coming from fetch stage + - FRONTEND + - logic[31:0] + + * - ``instr_o`` + - out + - Output instruction in uncompressed format + - decoder + - logic[31:0] + + * - ``illegal_instr_o`` + - out + - Input instruction is illegal + - decoder + - logic + + * - ``is_compressed_o`` + - out + - Output instruction is compressed + - decoder + - logic + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +none diff --git a/docs/04_cv32a65x_design/source/port_controller.rst b/docs/04_cv32a65x_design/source/port_controller.rst index 2d036c997a..75e285168e 100644 --- a/docs/04_cv32a65x_design/source/port_controller.rst +++ b/docs/04_cv32a65x_design/source/port_controller.rst @@ -14,156 +14,146 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``set_pc_commit_o`` - out + - Set PC om PC Gen - FRONTEND - logic - - Set PC om PC Gen * - ``flush_if_o`` - out + - Flush the IF stage - FRONTEND - logic - - Flush the IF stage * - ``flush_unissued_instr_o`` - out + - Flush un-issued instructions of the scoreboard - FRONTEND - logic - - Flush un-issued instructions of the scoreboard * - ``flush_id_o`` - out + - Flush ID stage - ID_STAGE - logic - - Flush ID stage * - ``flush_ex_o`` - out + - Flush EX stage - EX_STAGE - logic - - Flush EX stage * - ``flush_bp_o`` - out + - Flush branch predictors - FRONTEND - logic - - Flush branch predictors * - ``flush_icache_o`` - out + - Flush ICache - CACHE - logic - - Flush ICache * - ``flush_dcache_o`` - out + - Flush DCache - CACHE - logic - - Flush DCache * - ``flush_dcache_ack_i`` - in + - Acknowledge the whole DCache Flush - CACHE - logic - - Acknowledge the whole DCache Flush * - ``flush_tlb_o`` - out + - Flush TLBs - EX_STAGE - logic - - Flush TLBs * - ``halt_csr_i`` - in - - CSR_REGFILE - - logic - Halt request from CSR (WFI instruction) - - * - ``halt_acc_i`` - - in - - ACC_DISPATCHER + - CSR_REGFILE - logic - - Halt request from accelerator dispatcher * - ``halt_o`` - out + - Halt signal to commit stage - COMMIT_STAGE - logic - - Halt signal to commit stage * - ``eret_i`` - in + - Return from exception - CSR_REGFILE - logic - - Return from exception * - ``ex_valid_i`` - in - - FRONTEND - - logic - We got an exception, flush the pipeline - - * - ``set_debug_pc_i`` - - in - FRONTEND - logic - - set the debug pc from CSR * - ``resolved_branch_i`` - in + - We got a resolved branch, check if we need to flush the front-end - EX_STAGE - bp_resolve_t - - We got a resolved branch, check if we need to flush the front-end * - ``flush_csr_i`` - in + - We got an instruction which altered the CSR, flush the pipeline - CSR_REGFILE - logic - - We got an instruction which altered the CSR, flush the pipeline * - ``fence_i_i`` - in + - fence.i in - ACC_DISPATCH - logic - - fence.i in * - ``fence_i`` - in + - fence in - ACC_DISPATCH - logic - - fence in * - ``sfence_vma_i`` - in + - We got an instruction to flush the TLBs and pipeline - COMMIT_STAGE - logic - - We got an instruction to flush the TLBs and pipeline * - ``flush_commit_i`` - in + - Flush request from commit stage - COMMIT_STAGE - logic - - Flush request from commit stage - * - ``flush_acc_i`` - - in - - ACC_DISPATCHER - - logic - - Flush request from accelerator +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As EnableAccelerator = 0, +| ``halt_acc_i`` input is tied to 0 +| ``flush_acc_i`` input is tied to 0 +| As DebugEn = 0, +| ``set_debug_pc_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_csr_regfile.rst b/docs/04_cv32a65x_design/source/port_csr_regfile.rst index 69a54429af..5aaf3bafe1 100644 --- a/docs/04_cv32a65x_design/source/port_csr_regfile.rst +++ b/docs/04_cv32a65x_design/source/port_csr_regfile.rst @@ -14,348 +14,294 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``time_irq_i`` - in + - Timer threw a interrupt - SUBSYSTEM - logic - - Timer threw a interrupt * - ``flush_o`` - out + - send a flush request out when a CSR with a side effect changes - CONTROLLER - logic - - send a flush request out when a CSR with a side effect changes * - ``halt_csr_o`` - out + - halt requested - CONTROLLER - logic - - halt requested * - ``commit_instr_i`` - in + - Instruction to be committed - ID_STAGE - scoreboard_entry_t[CVA6Cfg.NrCommitPorts-1:0] - - Instruction to be committed * - ``commit_ack_i`` - in + - Commit acknowledged a instruction -> increase instret CSR - COMMIT_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0] - - Commit acknowledged a instruction -> increase instret CSR * - ``boot_addr_i`` - in + - Address from which to start booting, mtvec is set to the same address - SUBSYSTEM - logic[riscv::VLEN-1:0] - - Address from which to start booting, mtvec is set to the same address * - ``hart_id_i`` - in + - Hart id in a multicore environment (reflected in a CSR) - SUBSYSTEM - logic[riscv::XLEN-1:0] - - Hart id in a multicore environment (reflected in a CSR) * - ``ex_i`` - in + - We've got an exception from the commit stage, take it - COMMIT_STAGE - exception_t - - We've got an exception from the commit stage, take it * - ``csr_op_i`` - in + - Operation to perform on the CSR file - COMMIT_STAGE - fu_op - - Operation to perform on the CSR file * - ``csr_addr_i`` - in + - Address of the register to read/write - EX_STAGE - logic[11:0] - - Address of the register to read/write * - ``csr_wdata_i`` - in + - Write data in - COMMIT_STAGE - logic[riscv::XLEN-1:0] - - Write data in * - ``csr_rdata_o`` - out + - Read data out - COMMIT_STAGE - logic[riscv::XLEN-1:0] - - Read data out * - ``dirty_fp_state_i`` - in + - Mark the FP sate as dirty - COMMIT_STAGE - logic - - Mark the FP sate as dirty * - ``csr_write_fflags_i`` - in - - COMMIT_STAGE - - logic - Write fflags register e.g.: we are retiring a floating point instruction - - * - ``dirty_v_state_i`` - - in - - ACC_DISPATCHER + - COMMIT_STAGE - logic - - Mark the V state as dirty * - ``pc_i`` - in + - PC of instruction accessing the CSR - COMMIT_STAGE - logic[riscv::VLEN-1:0] - - PC of instruction accessing the CSR * - ``csr_exception_o`` - out + - attempts to access a CSR without appropriate privilege - COMMIT_STAGE - exception_t - - attempts to access a CSR without appropriate privilege * - ``epc_o`` - out + - Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly - FRONTEND - logic[riscv::VLEN-1:0] - - Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly * - ``eret_o`` - out + - Return from exception, set the PC of epc_o - FRONTEND - logic - - Return from exception, set the PC of epc_o * - ``trap_vector_base_o`` - out + - Output base of exception vector, correct CSR is output (mtvec, stvec) - FRONTEND - logic[riscv::VLEN-1:0] - - Output base of exception vector, correct CSR is output (mtvec, stvec) * - ``priv_lvl_o`` - out + - Current privilege level the CPU is in - EX_STAGE - riscv::priv_lvl_t - - Current privilege level the CPU is in - - * - ``acc_fflags_ex_i`` - - in - - ACC_DISPATCHER - - logic[4:0] - - Imprecise FP exception from the accelerator (fcsr.fflags format) - - * - ``acc_fflags_ex_valid_i`` - - in - - ACC_DISPATCHER - - logic - - An FP exception from the accelerator occurred * - ``fs_o`` - out + - Floating point extension status - ID_STAGE - riscv::xs_t - - Floating point extension status * - ``fflags_o`` - out + - Floating-Point Accured Exceptions - COMMIT_STAGE - logic[4:0] - - Floating-Point Accured Exceptions * - ``frm_o`` - out + - Floating-Point Dynamic Rounding Mode - EX_STAGE - logic[2:0] - - Floating-Point Dynamic Rounding Mode * - ``fprec_o`` - out + - Floating-Point Precision Control - EX_STAGE - logic[6:0] - - Floating-Point Precision Control * - ``vs_o`` - out + - Vector extension status - ID_STAGE - riscv::xs_t - - Vector extension status * - ``irq_ctrl_o`` - out + - interrupt management to id stage - ID_STAGE - irq_ctrl_t - - interrupt management to id stage * - ``en_translation_o`` - out + - enable VA translation - EX_STAGE - logic - - enable VA translation * - ``en_ld_st_translation_o`` - out + - enable VA translation for load and stores - EX_STAGE - logic - - enable VA translation for load and stores * - ``ld_st_priv_lvl_o`` - out + - Privilege level at which load and stores should happen - EX_STAGE - riscv::priv_lvl_t - - Privilege level at which load and stores should happen * - ``sum_o`` - out + - TO_BE_COMPLETED - EX_STAGE - logic - - TO_BE_COMPLETED * - ``mxr_o`` - out + - TO_BE_COMPLETED - EX_STAGE - logic - - TO_BE_COMPLETED * - ``satp_ppn_o`` - out + - TO_BE_COMPLETED - EX_STAGE - logic[riscv::PPNW-1:0] - - TO_BE_COMPLETED * - ``asid_o`` - out + - TO_BE_COMPLETED - EX_STAGE - logic[AsidWidth-1:0] - - TO_BE_COMPLETED * - ``irq_i`` - in + - external interrupt in - SUBSYSTEM - logic[1:0] - - external interrupt in * - ``ipi_i`` - in - - SUBSYSTEM - - logic - inter processor interrupt -> connected to machine mode sw - - * - ``debug_req_i`` - - in - - ID_STAGE + - SUBSYSTEM - logic - - debug request in * - ``set_debug_pc_o`` - out + - TO_BE_COMPLETED - FRONTEND - logic - - TO_BE_COMPLETED * - ``tvm_o`` - out + - trap virtual memory - ID_STAGE - logic - - trap virtual memory * - ``tw_o`` - out + - timeout wait - ID_STAGE - logic - - timeout wait * - ``tsr_o`` - out + - trap sret - ID_STAGE - logic - - trap sret * - ``debug_mode_o`` - out + - we are in debug mode -> that will change some decoding - EX_STAGE - logic - - we are in debug mode -> that will change some decoding * - ``single_step_o`` - out + - we are in single-step mode - COMMIT_STAGE - logic - - we are in single-step mode * - ``icache_en_o`` - out + - L1 ICache Enable - CACHE - logic - - L1 ICache Enable * - ``dcache_en_o`` - out - - CACHE - - logic - L1 DCache Enable - - * - ``acc_cons_en_o`` - - out - - ACC_DISPATCHER - - logic - - Accelerator memory consistent mode - - * - ``perf_addr_o`` - - out - - PERF_COUNTERS - - logic[11:0] - - read/write address to performance counter module - - * - ``perf_data_o`` - - out - - PERF_COUNTERS - - logic[riscv::XLEN-1:0] - - write data to performance counter module - - * - ``perf_data_i`` - - in - - PERF_COUNTERS - - logic[riscv::XLEN-1:0] - - read data from performance counter module - - * - ``perf_we_o`` - - out - - PERF_COUNTERS + - CACHE - logic - - TO_BE_COMPLETED - - * - ``pmpcfg_o`` - - out - - ACC_DISPATCHER - - riscv::pmpcfg_t[15:0] - - PMP configuration containing pmpcfg for max 16 PMPs - * - ``pmpaddr_o`` - - out - - ACC_DISPATCHER - - logic[15:0][riscv::PLEN-3:0] - - PMP addresses - - * - ``mcountinhibit_o`` - - out - - PERF_COUNTERS - - logic[31:0] - - TO_BE_COMPLETED +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As EnableAccelerator = 0, +| ``dirty_v_state_i`` input is tied to 0 +| ``acc_fflags_ex_i`` input is tied to 0 +| ``acc_fflags_ex_valid_i`` input is tied to 0 +| ``acc_cons_en_o`` output is tied to 0 +| ``pmpcfg_o`` output is tied to 0 +| ``pmpaddr_o`` output is tied to 0 +| As DebugEn = 0, +| ``debug_req_i`` input is tied to 0 +| As PerfCounterEn = 0, +| ``perf_addr_o`` output is tied to 0 +| ``perf_data_o`` output is tied to 0 +| ``perf_data_i`` input is tied to 0 +| ``perf_we_o`` output is tied to 0 +| ``mcountinhibit_o`` output is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_cva6.rst b/docs/04_cv32a65x_design/source/port_cva6.rst index 2afd967873..a439bf3a8c 100644 --- a/docs/04_cv32a65x_design/source/port_cva6.rst +++ b/docs/04_cv32a65x_design/source/port_cva6.rst @@ -14,84 +14,83 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``boot_addr_i`` - in + - Reset boot address - SUBSYSTEM - logic[riscv::VLEN-1:0] - - Reset boot address * - ``hart_id_i`` - in + - Hard ID reflected as CSR - SUBSYSTEM - logic[riscv::XLEN-1:0] - - Hard ID reflected as CSR * - ``irq_i`` - in + - Level sensitive (async) interrupts - SUBSYSTEM - logic[1:0] - - Level sensitive (async) interrupts * - ``ipi_i`` - in + - Inter-processor (async) interrupt - SUBSYSTEM - logic - - Inter-processor (async) interrupt * - ``time_irq_i`` - in - - SUBSYSTEM - - logic - Timer (async) interrupt - - * - ``debug_req_i`` - - in - SUBSYSTEM - logic - - Debug (async) request * - ``rvfi_probes_o`` - out + - Probes to build RVFI, can be left open when not used - SUBSYSTEM - rvfi_probes_t - - Probes to build RVFI, can be left open when not used * - ``cvxif_req_o`` - out + - CVXIF request - SUBSYSTEM - cvxif_req_t - - CVXIF request * - ``cvxif_resp_i`` - in + - CVXIF response - SUBSYSTEM - cvxif_resp_t - - CVXIF response * - ``noc_req_o`` - out + - noc request, can be AXI or OpenPiton - SUBSYSTEM - noc_req_t - - noc request, can be AXI or OpenPiton * - ``noc_resp_i`` - in + - noc response, can be AXI or OpenPiton - SUBSYSTEM - noc_resp_t - - noc response, can be AXI or OpenPiton + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As DebugEn = 0, +| ``debug_req_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_decoder.rst b/docs/04_cv32a65x_design/source/port_decoder.rst new file mode 100644 index 0000000000..cf3f108d45 --- /dev/null +++ b/docs/04_cv32a65x_design/source/port_decoder.rst @@ -0,0 +1,122 @@ +.. + Copyright 2024 Thales DIS France SAS + Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); + you may not use this file except in compliance with the License. + SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + You may obtain a copy of the License at https://solderpad.org/licenses/ + + Original Author: Jean-Roch COULON - Thales + +.. _CVA6_decoder_ports: + +.. list-table:: decoder module IO ports + :header-rows: 1 + + * - Signal + - IO + - Description + - connexion + - Type + + * - ``pc_i`` + - in + - PC from fetch stage + - FRONTEND + - logic[riscv::VLEN-1:0] + + * - ``is_compressed_i`` + - in + - Is a compressed instruction + - compressed_decoder + - logic + + * - ``compressed_instr_i`` + - in + - Compressed form of instruction + - FRONTEND + - logic[15:0] + + * - ``is_illegal_i`` + - in + - Illegal compressed instruction + - compressed_decoder + - logic + + * - ``instruction_i`` + - in + - Instruction from fetch stage + - FRONTEND + - logic[31:0] + + * - ``branch_predict_i`` + - in + - Is a branch predict instruction + - FRONTEND + - branchpredict_sbe_t + + * - ``ex_i`` + - in + - If an exception occured in fetch stage + - FRONTEND + - exception_t + + * - ``irq_i`` + - in + - Level sensitive (async) interrupts + - SUBSYSTEM + - logic[1:0] + + * - ``irq_ctrl_i`` + - in + - Interrupt control status + - CSR_REGFILE + - irq_ctrl_t + + * - ``tvm_i`` + - in + - Trap virtual memory + - CSR_REGFILE + - logic + + * - ``tw_i`` + - in + - Timeout wait + - CSR_REGFILE + - logic + + * - ``tsr_i`` + - in + - Trap sret + - CSR_REGFILE + - logic + + * - ``instruction_o`` + - out + - Instruction to be added to scoreboard entry + - ISSUE_STAGE + - scoreboard_entry_t + + * - ``orig_instr_o`` + - out + - Instruction + - ISSUE_STAGE + - logic[31:0] + + * - ``is_control_flow_instr_o`` + - out + - Is a control flow instruction + - ISSUE_STAGE + - logic + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As DebugEn = 0, +| ``debug_req_i`` input is tied to 0 +| ``debug_mode_i`` input is tied to 0 +| As PRIV = MachineOnly, +| ``priv_lvl_i`` input is tied to MachineMode +| As RVF = 0, +| ``fs_i`` input is tied to 0 +| ``frm_i`` input is tied to 0 +| As RVV = 0, +| ``vs_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_ex_stage.rst b/docs/04_cv32a65x_design/source/port_ex_stage.rst index 355aa20108..3fc330347e 100644 --- a/docs/04_cv32a65x_design/source/port_ex_stage.rst +++ b/docs/04_cv32a65x_design/source/port_ex_stage.rst @@ -14,504 +14,400 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in - - CONTROLLER - - logic - Fetch flush request - - * - ``debug_mode_i`` - - in - - CSR_REGFILE + - CONTROLLER - logic - - TO_BE_COMPLETED * - ``rs1_forwarding_i`` - in + - rs1 forwarding - ID_STAGE - logic[riscv::VLEN-1:0] - - TO_BE_COMPLETED * - ``rs2_forwarding_i`` - in + - rs2 forwarding - ID_STAGE - logic[riscv::VLEN-1:0] - - TO_BE_COMPLETED * - ``fu_data_i`` - in + - FU data useful to execute instruction - ID_STAGE - fu_data_t - - TO_BE_COMPLETED * - ``pc_i`` - in + - PC of the current instruction - ID_STAGE - logic[riscv::VLEN-1:0] - - PC of the current instruction * - ``is_compressed_instr_i`` - in + - Report whether isntruction is compressed - ID_STAGE - logic - - Report whether isntruction is compressed * - ``flu_result_o`` - out + - TO_BE_COMPLETED - ID_STAGE - riscv::xlen_t - - TO_BE_COMPLETED * - ``flu_trans_id_o`` - out + - ID of the scoreboard entry at which a=to write back - ID_STAGE - logic[TRANS_ID_BITS-1:0] - - ID of the scoreboard entry at which a=to write back * - ``flu_exception_o`` - out + - TO_BE_COMPLETED - ID_STAGE - exception_t - - TO_BE_COMPLETED * - ``flu_ready_o`` - out + - FLU is ready - ID_STAGE - logic - - FLU is ready * - ``flu_valid_o`` - out + - FLU result is valid - ID_STAGE - logic - - FLU result is valid * - ``alu_valid_i`` - in + - ALU result is valid - ID_STAGE - logic - - ALU result is valid * - ``branch_valid_i`` - in + - Branch unit result is valid - ID_STAGE - logic - - Branch unit result is valid * - ``branch_predict_i`` - in + - Information of branch prediction - ID_STAGE - branchpredict_sbe_t - - TO_BE_COMPLETED * - ``resolved_branch_o`` - out - - none + - The branch engine uses the write back from the ALU + - several_modules - bp_resolve_t - - none * - ``resolve_branch_o`` - out + - ID signaling that we resolved the branch - ID_STAGE - logic - - ID signaling that we resolved the branch * - ``csr_valid_i`` - in + - CSR result is valid - ID_STAGE - logic - - TO_BE_COMPLETED * - ``csr_addr_o`` - out + - TO_BE_COMPLETED - CSR_REGISTERS - logic[11:0] - - TO_BE_COMPLETED * - ``csr_commit_i`` - in + - TO_BE_COMPLETED - COMMIT_STAGE - logic - - TO_BE_COMPLETED * - ``mult_valid_i`` - in + - MULT result is valid - ID_STAGE - logic - - MULT result is valid * - ``lsu_ready_o`` - out + - FU is ready - ID_STAGE - logic - - FU is ready * - ``lsu_valid_i`` - in + - LSU result is valid - ID_STAGE - logic - - LSU result is valid * - ``load_valid_o`` - out + - Load result is valid - ID_STAGE - logic - - TO_BE_COMPLETED * - ``load_result_o`` - out + - Load result valid - ID_STAGE - riscv::xlen_t - - TO_BE_COMPLETED * - ``load_trans_id_o`` - out + - Load instruction ID - ID_STAGE - logic[TRANS_ID_BITS-1:0] - - TO_BE_COMPLETED * - ``load_exception_o`` - out + - Exception generated by load instruction - ID_STAGE - exception_t - - TO_BE_COMPLETED * - ``store_valid_o`` - out + - Store result is valid - ID_STAGE - logic - - TO_BE_COMPLETED * - ``store_result_o`` - out + - Store result - ID_STAGE - riscv::xlen_t - - TO_BE_COMPLETED * - ``store_trans_id_o`` - out + - Store instruction ID - ID_STAGE - logic[TRANS_ID_BITS-1:0] - - TO_BE_COMPLETED * - ``store_exception_o`` - out + - Exception generated by store instruction - ID_STAGE - exception_t - - TO_BE_COMPLETED * - ``lsu_commit_i`` - in + - TO_BE_COMPLETED - COMMIT_STAGE - logic - - TO_BE_COMPLETED * - ``lsu_commit_ready_o`` - out + - Commit queue ready to accept another commit request - COMMIT_STAGE - logic - - Commit queue is ready to accept another commit request * - ``commit_tran_id_i`` - in + - TO_BE_COMPLETED - COMMIT_STAGE - logic[TRANS_ID_BITS-1:0] - - TO_BE_COMPLETED - - * - ``stall_st_pending_i`` - - in - - ACC_DISPATCHER - - logic - - TO_BE_COMPLETED * - ``no_st_pending_o`` - out - - COMMIT_STAGE - - logic - TO_BE_COMPLETED - - * - ``amo_valid_commit_i`` - - in - COMMIT_STAGE - logic - - TO_BE_COMPLETED - - * - ``fpu_ready_o`` - - out - - ID_STAGE - - logic - - FU is ready - - * - ``fpu_valid_i`` - - in - - ID_STAGE - - logic - - Output is ready - - * - ``fpu_fmt_i`` - - in - - ID_STAGE - - logic[1:0] - - report FP format - - * - ``fpu_rm_i`` - - in - - ID_STAGE - - logic[2:0] - - FP rm - - * - ``fpu_frm_i`` - - in - - ID_STAGE - - logic[2:0] - - FP frm - - * - ``fpu_prec_i`` - - in - - CSR_REGFILE - - logic[6:0] - - FP precision control - - * - ``fpu_trans_id_o`` - - out - - ID_STAGE - - logic[TRANS_ID_BITS-1:0] - - TO_BE_COMPLETED - - * - ``fpu_result_o`` - - out - - ID_STAGE - - riscv::xlen_t - - TO_BE_COMPLETED - - * - ``fpu_valid_o`` - - out - - ID_STAGE - - logic - - TO_BE_COMPLETED - - * - ``fpu_exception_o`` - - out - - ID_STAGE - - exception_t - - TO_BE_COMPLETED * - ``x_valid_i`` - in - - ID_STAGE + - CVXIF instruction is valid + - ISSUE_STAGE - logic - - TO_BE_COMPLETED * - ``x_ready_o`` - out - - ID_STAGE + - CVXIF is ready + - ISSUE_STAGE - logic - - TO_BE_COMPLETED * - ``x_off_instr_i`` - in + - TO_BE_COMPLETED - ID_STAGE - logic[31:0] - - TO_BE_COMPLETED * - ``x_trans_id_o`` - out + - TO_BE_COMPLETED - ID_STAGE - logic[TRANS_ID_BITS-1:0] - - TO_BE_COMPLETED * - ``x_exception_o`` - out + - TO_BE_COMPLETED - ID_STAGE - exception_t - - TO_BE_COMPLETED * - ``x_result_o`` - out + - TO_BE_COMPLETED - ID_STAGE - riscv::xlen_t - - TO_BE_COMPLETED * - ``x_valid_o`` - out + - TO_BE_COMPLETED - ID_STAGE - logic - - TO_BE_COMPLETED * - ``x_we_o`` - out + - TO_BE_COMPLETED - ID_STAGE - logic - - TO_BE_COMPLETED * - ``cvxif_req_o`` - out + - TO_BE_COMPLETED - SUBSYSTEM - cvxif_pkg::cvxif_req_t - - TO_BE_COMPLETED * - ``cvxif_resp_i`` - in + - TO_BE_COMPLETED - SUBSYSTEM - cvxif_pkg::cvxif_resp_t - - TO_BE_COMPLETED - - * - ``acc_valid_i`` - - in - - ACC_DISPATCHER - - logic - - TO_BE_COMPLETED * - ``enable_translation_i`` - in - - CSR_REGFILE - - logic - TO_BE_COMPLETED - - * - ``en_ld_st_translation_i`` - - in - CSR_REGFILE - logic - - TO_BE_COMPLETED - - * - ``flush_tlb_i`` - - in - - CONTROLLER - - logic - - TO_BE_COMPLETED - * - ``priv_lvl_i`` + * - ``en_ld_st_translation_i`` - in - - CSR_REGFILE - - riscv::priv_lvl_t - TO_BE_COMPLETED - - * - ``ld_st_priv_lvl_i`` - - in - CSR_REGFILE - - riscv::priv_lvl_t - - TO_BE_COMPLETED + - logic * - ``sum_i`` - in + - Supervisor user memory - CSR_REGFILE - logic - - TO_BE_COMPLETED * - ``mxr_i`` - in + - Make executable readable - CSR_REGFILE - logic - - TO_BE_COMPLETED * - ``satp_ppn_i`` - in + - TO_BE_COMPLETED - CSR_REGFILE - logic[riscv::PPNW-1:0] - - TO_BE_COMPLETED * - ``asid_i`` - in + - TO_BE_COMPLETED - CSR_REGFILE - logic[ASID_WIDTH-1:0] - - TO_BE_COMPLETED * - ``icache_areq_i`` - in + - icache translation response - CACHE - icache_arsp_t - - icache translation response * - ``icache_areq_o`` - out + - icache translation request - CACHE - icache_areq_t - - icache translation request * - ``dcache_req_ports_i`` - in + - TO_BE_COMPLETED - CACHE - dcache_req_o_t[2:0] - - TO_BE_COMPLETED * - ``dcache_req_ports_o`` - out + - TO_BE_COMPLETED - CACHE - dcache_req_i_t[2:0] - - TO_BE_COMPLETED * - ``dcache_wbuffer_empty_i`` - in + - TO_BE_COMPLETED - CACHE - logic - - TO_BE_COMPLETED * - ``dcache_wbuffer_not_ni_i`` - in - - CACHE - - logic - TO_BE_COMPLETED - - * - ``amo_req_o`` - - out - CACHE - - amo_req_t - - AMO request - - * - ``amo_resp_i`` - - in - - CACHE - - amo_resp_t - - AMO response from cache - - * - ``itlb_miss_o`` - - out - - PERF_COUNTERS - logic - - To count the instruction TLB misses - - * - ``dtlb_miss_o`` - - out - - PERF_COUNTERS - - logic - - To count the data TLB misses * - ``pmpcfg_i`` - in + - Report the PMP configuration - CSR_REGFILE - riscv::pmpcfg_t[15:0] - - Report the PMP configuration * - ``pmpaddr_i`` - in + - Report the PMP addresses - CSR_REGFILE - logic[15:0][riscv::PLEN-3:0] - - Report the PMP addresses - * - ``rvfi_lsu_ctrl_o`` - - out - - SUBSYSTEM - - lsu_ctrl_t - - Information dedicated to RVFI - - * - ``rvfi_mem_paddr_o`` - - out - - SUBSYSTEM - - [riscv::PLEN-1:0] - - Information dedicated to RVFI +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As DebugEn = 0, +| ``debug_mode_i`` input is tied to 0 +| As EnableAccelerator = 0, +| ``stall_st_pending_i`` input is tied to 0 +| ``acc_valid_i`` input is tied to 0 +| As RVA = 0, +| ``amo_valid_commit_i`` input is tied to 0 +| ``amo_req_o`` output is tied to 0 +| ``amo_resp_i`` input is tied to 0 +| As RVF = 0, +| ``fpu_ready_o`` output is tied to 0 +| ``fpu_valid_i`` input is tied to 0 +| ``fpu_fmt_i`` input is tied to 0 +| ``fpu_rm_i`` input is tied to 0 +| ``fpu_frm_i`` input is tied to 0 +| ``fpu_prec_i`` input is tied to 0 +| ``fpu_trans_id_o`` output is tied to 0 +| ``fpu_result_o`` output is tied to 0 +| ``fpu_valid_o`` output is tied to 0 +| ``fpu_exception_o`` output is tied to 0 +| As MMUPresent = 0, +| ``flush_tlb_i`` input is tied to 0 +| As PRIV = MachineOnly, +| ``priv_lvl_i`` input is tied to MachineMode +| ``ld_st_priv_lvl_i`` input is tied to MAchineMode +| As PerfCounterEn = 0, +| ``itlb_miss_o`` output is tied to 0 +| ``dtlb_miss_o`` output is tied to 0 +| As IsRVFI = 0, +| ``rvfi_lsu_ctrl_o`` output is tied to 0 +| ``rvfi_mem_paddr_o`` output is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_frontend.rst b/docs/04_cv32a65x_design/source/port_frontend.rst index 1d8b6b31a3..49405e9d24 100644 --- a/docs/04_cv32a65x_design/source/port_frontend.rst +++ b/docs/04_cv32a65x_design/source/port_frontend.rst @@ -14,126 +14,116 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in - - CONTROLLER - - logic - Fetch flush request - - * - ``flush_bp_i`` - - in - - zero + - CONTROLLER - logic - - flush branch prediction * - ``halt_i`` - in - - CONTROLLER - - logic - halt commit stage - - * - ``debug_mode_i`` - - in - - CSR + - CONTROLLER - logic - - Debug mode state * - ``boot_addr_i`` - in + - Next PC when reset - SUBSYSTEM - logic[riscv::VLEN-1:0] - - Next PC when reset * - ``resolved_branch_i`` - in + - mispredict event and next PC - EXECUTE - bp_resolve_t - - mispredict event and next PC * - ``set_pc_commit_i`` - in + - Set the PC coming from COMMIT as next PC - CONTROLLER - logic - - Set the PC coming from COMMIT as next PC * - ``pc_commit_i`` - in + - Next PC when flushing pipeline - COMMIT - logic[riscv::VLEN-1:0] - - Next PC when flushing pipeline * - ``epc_i`` - in + - Next PC when returning from exception - CSR - logic[riscv::VLEN-1:0] - - Next PC when returning from exception * - ``eret_i`` - in + - Return from exception event - CSR - logic - - Return from exception event * - ``trap_vector_base_i`` - in + - Next PC when jumping into exception - CSR - logic[riscv::VLEN-1:0] - - Next PC when jumping into exception * - ``ex_valid_i`` - in - - COMMIT - - logic - Exception event - - * - ``set_debug_pc_i`` - - in - - CSR + - COMMIT - logic - - Debug event * - ``icache_dreq_o`` - out + - Handshake between CACHE and FRONTEND (fetch) - CACHES - icache_dreq_t - - Handshake between CACHE and FRONTEND (fetch) * - ``icache_dreq_i`` - in + - Handshake between CACHE and FRONTEND (fetch) - CACHES - icache_drsp_t - - Handshake between CACHE and FRONTEND (fetch) * - ``fetch_entry_o`` - out + - Handshake's data between fetch and decode - ID_STAGE - fetch_entry_t - - Handshake's data between fetch and decode * - ``fetch_entry_valid_o`` - out + - Handshake's valid between fetch and decode - ID_STAGE - logic - - Handshake's valid between fetch and decode * - ``fetch_entry_ready_i`` - in + - Handshake's ready between fetch and decode - ID_STAGE - logic - - Handshake's ready between fetch and decode + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| For any HW configuration, +| ``flush_bp_i`` input is tied to zero +| As DebugEn = 0, +| ``debug_mode_i`` input is tied to 0 +| ``set_debug_pc_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_id_stage.rst b/docs/04_cv32a65x_design/source/port_id_stage.rst index dccc51ed53..aaaa305437 100644 --- a/docs/04_cv32a65x_design/source/port_id_stage.rst +++ b/docs/04_cv32a65x_design/source/port_id_stage.rst @@ -14,144 +14,117 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in - - CONTROLLER - - logic - Fetch flush request - - * - ``debug_req_i`` - - in - - SUBSYSTEM + - CONTROLLER - logic - - Debug (async) request * - ``fetch_entry_i`` - in + - Handshake's data between fetch and decode - FRONTEND - ariane_pkg::fetch_entry_t - - Handshake's data between fetch and decode * - ``fetch_entry_valid_i`` - in + - Handshake's valid between fetch and decode - FRONTEND - logic - - Handshake's valid between fetch and decode * - ``fetch_entry_ready_o`` - out + - Handshake's ready between fetch and decode - FRONTEND - logic - - Handshake's ready between fetch and decode * - ``issue_entry_o`` - out + - Handshake's data between decode and issue - ISSUE - ariane_pkg::scoreboard_entry_t - - Handshake's data between decode and issue * - ``orig_instr_o`` - out + - Instruction value - ISSUE - logic[31:0] - - instruction value * - ``issue_entry_valid_o`` - out + - Handshake's valid between decode and issue - ISSUE - logic - - Handshake's valid between decode and issue * - ``is_ctrl_flow_o`` - out + - Report if instruction is a control flow instruction - ISSUE - logic - - Report if instruction is a control flow instruction * - ``issue_instr_ack_i`` - in - - ISSUE - - logic - Handshake's acknowlege between decode and issue - - * - ``rvfi_is_compressed_o`` - - out - - none + - ISSUE - logic - - none - - * - ``priv_lvl_i`` - - in - - CSR - - riscv::priv_lvl_t - - Report current privilege level - - * - ``fs_i`` - - in - - CSR - - riscv::xs_t - - Report floating point extension status - - * - ``frm_i`` - - in - - CSR - - logic[2:0] - - Report floating point dynamic rounding mode - - * - ``vs_i`` - - in - - CSR - - riscv::xs_t - - Report vector extension status * - ``irq_i`` - in + - Level sensitive (async) interrupts - SUBSYSTEM - logic[1:0] - - Level sensitive (async) interrupts * - ``irq_ctrl_i`` - in - - CSR + - Interrupt control status + - CSR_REGFILE - ariane_pkg::irq_ctrl_t - - TBD - - * - ``debug_mode_i`` - - in - - CSR - - logic - - Report if current mode is debug * - ``tvm_i`` - in - - CSR + - Trap virtual memory + - CSR_REGFILE - logic - - TBD * - ``tw_i`` - in - - CSR + - Timeout wait + - CSR_REGFILE - logic - - TBD * - ``tsr_i`` - in - - none + - Trap sret + - CSR_REGFILE - logic - - none + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As DebugEn = 0, +| ``debug_req_i`` input is tied to 0 +| ``debug_mode_i`` input is tied to 0 +| As IsRVFI = 0, +| ``rvfi_is_compressed_o`` output is tied to 0 +| As PRIV = MachineOnly, +| ``priv_lvl_i`` input is tied to MachineMode +| As RVF = 0, +| ``fs_i`` input is tied to 0 +| ``frm_i`` input is tied to 0 +| As RVV = 0, +| ``vs_i`` input is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_instr_queue.rst b/docs/04_cv32a65x_design/source/port_instr_queue.rst index 04c5d2818d..6041227c20 100644 --- a/docs/04_cv32a65x_design/source/port_instr_queue.rst +++ b/docs/04_cv32a65x_design/source/port_instr_queue.rst @@ -14,108 +14,112 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in + - Fetch flush request - CONTROLLER - logic - - Fetch flush request * - ``instr_i`` - in + - Instruction - instr_realign - logic[ariane_pkg::INSTR_PER_FETCH-1:0][31:0] - - Instruction * - ``addr_i`` - in + - Instruction address - instr_realign - logic[ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] - - Instruction address * - ``valid_i`` - in + - Instruction is valid - instr_realign - logic[ariane_pkg::INSTR_PER_FETCH-1:0] - - Instruction is valid * - ``ready_o`` - out + - Handshake’s ready with CACHE - CACHE - logic - - Handshake’s ready with CACHE * - ``consumed_o`` - out + - Indicates instructions consummed, or popped by ID_STAGE - FRONTEND - logic[ariane_pkg::INSTR_PER_FETCH-1:0] - - Indicates instructions consummed, or popped by ID_STAGE * - ``exception_i`` - in + - Exception (which is page-table fault) - CACHE - ariane_pkg::frontend_exception_t - - Exception (which is page-table fault) * - ``exception_addr_i`` - in + - Exception address - CACHE - logic[riscv::VLEN-1:0] - - Exception address * - ``predict_address_i`` - in + - Branch predict - FRONTEND - logic[riscv::VLEN-1:0] - - Branch predict * - ``cf_type_i`` - in + - Instruction predict address - FRONTEND - ariane_pkg::cf_t[ariane_pkg::INSTR_PER_FETCH-1:0] - - Instruction predict address * - ``replay_o`` - out + - Replay instruction because one of the FIFO was full - FRONTEND - logic - - Replay instruction because one of the FIFO was full * - ``replay_addr_o`` - out + - Address at which to replay the fetch - FRONTEND - logic[riscv::VLEN-1:0] - - Address at which to replay the fetch * - ``fetch_entry_o`` - out + - Handshake’s data with ID_STAGE - ID_STAGE - ariane_pkg::fetch_entry_t - - Handshake’s data with ID_STAGE * - ``fetch_entry_valid_o`` - out + - Handshake’s valid with ID_STAGE - ID_STAGE - logic - - Handshake’s valid with ID_STAGE * - ``fetch_entry_ready_i`` - in + - Handshake’s ready with ID_STAGE - ID_STAGE - logic - - Handshake’s ready with ID_STAGE + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +none diff --git a/docs/04_cv32a65x_design/source/port_instr_realign.rst b/docs/04_cv32a65x_design/source/port_instr_realign.rst index 30f87a5d0b..baf4e921bf 100644 --- a/docs/04_cv32a65x_design/source/port_instr_realign.rst +++ b/docs/04_cv32a65x_design/source/port_instr_realign.rst @@ -14,66 +14,70 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in + - Fetch flush request - CONTROLLER - logic - - Fetch flush request * - ``valid_i`` - in + - 32-bit block is valid - CACHE - logic - - 32-bit block is valid * - ``serving_unaligned_o`` - out + - Instruction is unaligned - FRONTEND - logic - - Instruction is unaligned * - ``address_i`` - in + - 32-bit block address - CACHE - logic[riscv::VLEN-1:0] - - 32-bit block address * - ``data_i`` - in + - 32-bit block - CACHE - logic[FETCH_WIDTH-1:0] - - 32-bit block * - ``valid_o`` - out + - instruction is valid - FRONTEND - logic[INSTR_PER_FETCH-1:0] - - instruction is valid * - ``addr_o`` - out + - Instruction address - FRONTEND - logic[INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] - - Instruction address * - ``instr_o`` - out - - none + - Instruction + - instr_scan&instr_queue - logic[INSTR_PER_FETCH-1:0][31:0] - - none + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +none diff --git a/docs/04_cv32a65x_design/source/port_instr_scan.rst b/docs/04_cv32a65x_design/source/port_instr_scan.rst index 4a88596fb1..a11b0b0d50 100644 --- a/docs/04_cv32a65x_design/source/port_instr_scan.rst +++ b/docs/04_cv32a65x_design/source/port_instr_scan.rst @@ -14,90 +14,94 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``instr_i`` - in + - Instruction to be predecoded - instr_realign - logic[31:0] - - Instruction to be predecoded * - ``rvi_return_o`` - out + - Return instruction - FRONTEND - logic - - Return instruction * - ``rvi_call_o`` - out + - JAL instruction - FRONTEND - logic - - JAL instruction * - ``rvi_branch_o`` - out + - Branch instruction - FRONTEND - logic - - Branch instruction * - ``rvi_jalr_o`` - out + - JALR instruction - FRONTEND - logic - - JALR instruction * - ``rvi_jump_o`` - out + - Unconditional jump instruction - FRONTEND - logic - - Unconditional jump instruction * - ``rvi_imm_o`` - out + - Instruction immediat - FRONTEND - logic[riscv::VLEN-1:0] - - Instruction immediat * - ``rvc_branch_o`` - out + - Branch compressed instruction - FRONTEND - logic - - Branch compressed instruction * - ``rvc_jump_o`` - out + - Unconditional jump compressed instruction - FRONTEND - logic - - Unconditional jump compressed instruction * - ``rvc_jr_o`` - out + - JR compressed instruction - FRONTEND - logic - - JR compressed instruction * - ``rvc_return_o`` - out + - Return compressed instruction - FRONTEND - logic - - Return compressed instruction * - ``rvc_jalr_o`` - out + - JALR compressed instruction - FRONTEND - logic - - JALR compressed instruction * - ``rvc_call_o`` - out + - JAL compressed instruction - FRONTEND - logic - - JAL compressed instruction * - ``rvc_imm_o`` - out + - Instruction compressed immediat - FRONTEND - logic[riscv::VLEN-1:0] - - Instruction compressed immediat + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +none diff --git a/docs/04_cv32a65x_design/source/port_issue_read_operands.rst b/docs/04_cv32a65x_design/source/port_issue_read_operands.rst new file mode 100644 index 0000000000..e68a5d41f8 --- /dev/null +++ b/docs/04_cv32a65x_design/source/port_issue_read_operands.rst @@ -0,0 +1,287 @@ +.. + Copyright 2024 Thales DIS France SAS + Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); + you may not use this file except in compliance with the License. + SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + You may obtain a copy of the License at https://solderpad.org/licenses/ + + Original Author: Jean-Roch COULON - Thales + +.. _CVA6_issue_read_operands_ports: + +.. list-table:: issue_read_operands module IO ports + :header-rows: 1 + + * - Signal + - IO + - Description + - connexion + - Type + + * - ``Clock`` + - in + - none + - none + - logicclk_i,// + + * - ``low`` + - in + - none + - none + - logicrst_ni,//Asynchronousresetactive + + * - ``flush_i`` + - in + - none + - none + - logic + + * - ``stall_i`` + - in + - none + - none + - logic + + * - ``issue_instr_i`` + - in + - none + - none + - scoreboard_entry_t + + * - ``orig_instr_i`` + - in + - none + - none + - logic[31:0] + + * - ``issue_instr_valid_i`` + - in + - none + - none + - logic + + * - ``issue_ack_o`` + - out + - none + - none + - logic + + * - ``rs1_o`` + - out + - none + - none + - logic[REG_ADDR_SIZE-1:0] + + * - ``rs1_i`` + - in + - none + - none + - riscv::xlen_t + + * - ``rs1_valid_i`` + - in + - none + - none + - logic + + * - ``rs2_o`` + - out + - none + - none + - logic[REG_ADDR_SIZE-1:0] + + * - ``rs2_i`` + - in + - none + - none + - riscv::xlen_t + + * - ``rs2_valid_i`` + - in + - none + - none + - logic + + * - ``rs3_o`` + - out + - none + - none + - logic[REG_ADDR_SIZE-1:0] + + * - ``rs3_i`` + - in + - none + - none + - rs3_len_t + + * - ``rs3_valid_i`` + - in + - none + - none + - logic + + * - ``rd_clobber_gpr_i`` + - in + - none + - none + - fu_t[2**REG_ADDR_SIZE-1:0] + + * - ``rd_clobber_fpr_i`` + - in + - none + - none + - fu_t[2**REG_ADDR_SIZE-1:0] + + * - ``fu_data_o`` + - out + - none + - none + - fu_data_t + + * - ``fu_data_o.operanda`` + - out + - none + - none + - riscv::xlen_trs1_forwarding_o,//unregisteredversionof + + * - ``fu_data_o.operandb`` + - out + - none + - none + - riscv::xlen_trs2_forwarding_o,//unregisteredversionof + + * - ``pc_o`` + - out + - none + - none + - logic[riscv::VLEN-1:0] + + * - ``is_compressed_instr_o`` + - out + - none + - none + - logic + + * - ``request`` + - in + - none + - none + - logicflu_ready_i,//Fixedlatencyunitreadytoacceptanew + + * - ``valid`` + - out + - none + - none + - logicalu_valid_o,//Outputis + + * - ``instruction`` + - out + - none + - none + - logicbranch_valid_o,//thisisavalidbranch + + * - ``branch_predict_o`` + - out + - none + - none + - branchpredict_sbe_t + + * - ``ready`` + - in + - none + - none + - logiclsu_ready_i,//FUis + + * - ``valid`` + - out + - none + - none + - logiclsu_valid_o,//Outputis + + * - ``valid`` + - out + - none + - none + - logicmult_valid_o,//Outputis + + * - ``ready`` + - in + - none + - none + - logicfpu_ready_i,//FUis + + * - ``valid`` + - out + - none + - none + - logicfpu_valid_o,//Outputis + + * - ``instr.`` + - out + - none + - none + - logic[1:0]fpu_fmt_o,//FPfmtfieldfrom + + * - ``instr.`` + - out + - none + - none + - logic[2:0]fpu_rm_o,//FPrmfieldfrom + + * - ``valid`` + - out + - none + - none + - logiccsr_valid_o,//Outputis + + * - ``cvxif_valid_o`` + - out + - none + - none + - logic + + * - ``cvxif_ready_i`` + - in + - none + - none + - logic + + * - ``cvxif_off_instr_o`` + - out + - none + - none + - logic[31:0] + + * - ``waddr_i`` + - in + - none + - none + - logic[CVA6Cfg.NrCommitPorts-1:0][4:0] + + * - ``wdata_i`` + - in + - none + - none + - logic[CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] + + * - ``we_gpr_i`` + - in + - none + - none + - logic[CVA6Cfg.NrCommitPorts-1:0] + + * - ``we_fpr_i`` + - in + - none + - none + - logic[CVA6Cfg.NrCommitPorts-1:0] + + * - ``entries`` + - out + - none + - none + - logicstall_issue_o//stallsignal,wedonotwanttofetchanymore + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +none diff --git a/docs/04_cv32a65x_design/source/port_issue_stage.rst b/docs/04_cv32a65x_design/source/port_issue_stage.rst index 8326d90c4b..4e91674586 100644 --- a/docs/04_cv32a65x_design/source/port_issue_stage.rst +++ b/docs/04_cv32a65x_design/source/port_issue_stage.rst @@ -14,300 +14,257 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in - - SUBSYSTEM - - logic - Asynchronous reset active low - - * - ``sb_full_o`` - - out - - PERF_COUNTERS + - SUBSYSTEM - logic - - TO_BE_COMPLETED * - ``flush_unissued_instr_i`` - in + - TO_BE_COMPLETED - CONTROLLER - logic - - TO_BE_COMPLETED * - ``flush_i`` - in - - CONTROLLER - - logic - TO_BE_COMPLETED - - * - ``stall_i`` - - in - - ACC_DISPATCHER + - CONTROLLER - logic - - zero when accelerate port is disable * - ``decoded_instr_i`` - in + - Handshake's data with decode stage - ID_STAGE - scoreboard_entry_t - - Handshake's data between decode and issue * - ``orig_instr_i`` - in + - instruction value - ID_STAGE - logic[31:0] - - instruction value * - ``decoded_instr_valid_i`` - in + - Handshake's valid with decode stage - ID_STAGE - logic - - Handshake's valid between decode and issue * - ``is_ctrl_flow_i`` - in + - Is instruction a control flow instruction - ID_STAGE - logic - - Report if instruction is a control flow instruction * - ``decoded_instr_ack_o`` - out + - Handshake's acknowlege with decode stage - ID_STAGE - logic - - Handshake's acknowlege between decode and issue * - ``rs1_forwarding_o`` - out + - rs1 forwarding - EX_STAGE - [riscv::VLEN-1:0] - - TO_BE_COMPLETED * - ``rs2_forwarding_o`` - out + - rs2 forwarding - EX_STAGE - [riscv::VLEN-1:0] - - TO_BE_COMPLETED * - ``fu_data_o`` - out + - FU data useful to execute instruction - EX_STAGE - fu_data_t - - TO_BE_COMPLETED * - ``pc_o`` - out + - TO_BE_COMPLETED - EX_STAGE - logic[riscv::VLEN-1:0] - - TO_BE_COMPLETED * - ``is_compressed_instr_o`` - out + - Is compressed instruction - EX_STAGE - logic - - TO_BE_COMPLETED * - ``flu_ready_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic - - TO_BE_COMPLETED * - ``alu_valid_o`` - out + - ALU FU is valid - EX_STAGE - logic - - TO_BE_COMPLETED * - ``resolve_branch_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic - - TO_BE_COMPLETED * - ``lsu_ready_i`` - in + - Load store unit FU is ready - EX_STAGE - logic - - TO_BE_COMPLETED * - ``lsu_valid_o`` - out + - Load store unit FU is valid - EX_STAGE - logic - - TO_BE_COMPLETED * - ``branch_valid_o`` - out + - Branch unit is valid - EX_STAGE - logic - - TO_BE_COMPLETED * - ``branch_predict_o`` - out + - Information of branch prediction - EX_STAGE - branchpredict_sbe_t - - TO_BE_COMPLETED * - ``mult_valid_o`` - out + - Mult FU is valid - EX_STAGE - logic - - TO_BE_COMPLETED * - ``fpu_ready_i`` - in + - FPU FU is ready - EX_STAGE - logic - - TO_BE_COMPLETED - - * - ``fpu_valid_o`` - - out - - EX_STAGE - - logic - - TO_BE_COMPLETED - - * - ``fpu_fmt_o`` - - out - - EX_STAGE - - logic[1:0] - - Report FP fmt field - - * - ``fpu_rm_o`` - - out - - EX_STAGE - - logic[2:0] - - report FP rm field * - ``csr_valid_o`` - out + - CSR is valid - EX_STAGE - logic - - TO_BE_COMPLETED * - ``x_issue_valid_o`` - out + - CVXIF FU is valid - EX_STAGE - logic - - TO_BE_COMPLETED * - ``x_issue_ready_i`` - in + - CVXIF is FU ready - EX_STAGE - logic - - TO_BE_COMPLETED * - ``x_off_instr_o`` - out + - CVXIF offloader instruction value - EX_STAGE - logic[31:0] - - TO_BE_COMPLETED - - * - ``issue_instr_o`` - - out - - ACC_DISPATCHER - - scoreboard_entry_t - - TO_BE_COMPLETED - - * - ``issue_instr_hs_o`` - - out - - ACC_DISPATCHER - - logic - - TO_BE_COMPLETED * - ``trans_id_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic[CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] - - TO_BE_COMPLETED * - ``resolved_branch_i`` - in + - TO_BE_COMPLETED - EX_STAGE - bp_resolve_t - - TO_BE_COMPLETED * - ``wbdata_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic[CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] - - TO_BE_COMPLETED * - ``ex_ex_i`` - in + - exception from execute stage or CVXIF - EX_STAGE - exception_t[CVA6Cfg.NrWbPorts-1:0] - - exception from execute stage or CVXIF offloaded instruction * - ``wt_valid_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic[CVA6Cfg.NrWbPorts-1:0] - - TO_BE_COMPLETED * - ``x_we_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic - - TO_BE_COMPLETED * - ``waddr_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0][4:0] - - TO_BE_COMPLETED * - ``wdata_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] - - TO_BE_COMPLETED * - ``we_gpr_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0] - - TO_BE_COMPLETED * - ``we_fpr_i`` - in + - TO_BE_COMPLETED - EX_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0] - - TO_BE_COMPLETED * - ``commit_instr_o`` - out + - TO_BE_COMPLETED - COMMIT_STAGE - scoreboard_entry_t[CVA6Cfg.NrCommitPorts-1:0] - - TO_BE_COMPLETED * - ``commit_ack_i`` - in + - TO_BE_COMPLETED - COMMIT_STAGE - logic[CVA6Cfg.NrCommitPorts-1:0] - - TO_BE_COMPLETED - - * - ``stall_issue_o`` - - out - - PERF_COUNTERS - - logic - - Issue stall - * - ``rvfi_issue_pointer_o`` - - out - - SUBSYSTEM - - logic[TRANS_ID_BITS-1:0] - - Information dedicated to RVFI - - * - ``rvfi_commit_pointer_o`` - - out - - SUBSYSTEM - - logic[CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] - - Information dedicated to RVFI +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +| As PerfCounterEn = 0, +| ``sb_full_o`` output is tied to 0 +| ``stall_issue_o`` output is tied to 0 +| As EnableAccelerator = 0, +| ``stall_i`` input is tied to 0 +| ``issue_instr_o`` output is tied to 0 +| ``issue_instr_hs_o`` output is tied to 0 +| As RVF = 0, +| ``fpu_valid_o`` output is tied to 0 +| ``fpu_fmt_o`` output is tied to 0 +| ``fpu_rm_o`` output is tied to 0 +| As IsRVFI = 0, +| ``rvfi_issue_pointer_o`` output is tied to 0 +| ``rvfi_commit_pointer_o`` output is tied to 0 diff --git a/docs/04_cv32a65x_design/source/port_ras.rst b/docs/04_cv32a65x_design/source/port_ras.rst index f6d2372f23..f91820b462 100644 --- a/docs/04_cv32a65x_design/source/port_ras.rst +++ b/docs/04_cv32a65x_design/source/port_ras.rst @@ -14,48 +14,52 @@ * - Signal - IO - - Connection - - Type - Description + - connexion + - Type * - ``clk_i`` - in + - Subsystem Clock - SUBSYSTEM - logic - - Subsystem Clock * - ``rst_ni`` - in + - Asynchronous reset active low - SUBSYSTEM - logic - - Asynchronous reset active low * - ``flush_i`` - in + - Fetch flush request - CONTROLLER - logic - - Fetch flush request * - ``push_i`` - in + - Push address in RAS - FRONTEND - logic - - Push address in RAS * - ``pop_i`` - in + - Pop address from RAS - FRONTEND - logic - - Pop address from RAS * - ``data_i`` - in + - Data to be pushed - FRONTEND - logic[riscv::VLEN-1:0] - - Data to be pushed * - ``data_o`` - out + - Popped data - FRONTEND - ariane_pkg::ras_t - - Popped data + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +none diff --git a/docs/04_cv32a65x_design/source/port_scoreboard.rst b/docs/04_cv32a65x_design/source/port_scoreboard.rst new file mode 100644 index 0000000000..25f96423de --- /dev/null +++ b/docs/04_cv32a65x_design/source/port_scoreboard.rst @@ -0,0 +1,233 @@ +.. + Copyright 2024 Thales DIS France SAS + Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); + you may not use this file except in compliance with the License. + SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + You may obtain a copy of the License at https://solderpad.org/licenses/ + + Original Author: Jean-Roch COULON - Thales + +.. _CVA6_scoreboard_ports: + +.. list-table:: scoreboard module IO ports + :header-rows: 1 + + * - Signal + - IO + - Description + - connexion + - Type + + * - ``Clock`` + - in + - none + - none + - logicclk_i,// + + * - ``low`` + - in + - none + - none + - logicrst_ni,//Asynchronousresetactive + + * - ``sb_full_o`` + - out + - none + - none + - logic + + * - ``instructions`` + - in + - none + - none + - logicflush_unissued_instr_i,//flushonlyun-issued + + * - ``scoreboard`` + - in + - none + - none + - logicflush_i,//flushwhole + + * - ``branch`` + - in + - none + - none + - logicunresolved_branch_i,//wehaveanunresolved + + * - ``rd_clobber_gpr_o`` + - out + - none + - none + - ariane_pkg::fu_t[2**ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rd_clobber_fpr_o`` + - out + - none + - none + - ariane_pkg::fu_t[2**ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs1_i`` + - in + - none + - none + - logic[ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs1_o`` + - out + - none + - none + - riscv::xlen_t + + * - ``rs1_valid_o`` + - out + - none + - none + - logic + + * - ``rs2_i`` + - in + - none + - none + - logic[ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs2_o`` + - out + - none + - none + - riscv::xlen_t + + * - ``rs2_valid_o`` + - out + - none + - none + - logic + + * - ``rs3_i`` + - in + - none + - none + - logic[ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs3_o`` + - out + - none + - none + - rs3_len_t + + * - ``rs3_valid_o`` + - out + - none + - none + - logic + + * - ``commit_instr_o`` + - out + - none + - none + - ariane_pkg::scoreboard_entry_t[CVA6Cfg.NrCommitPorts-1:0] + + * - ``commit_ack_i`` + - in + - none + - none + - logic[CVA6Cfg.NrCommitPorts-1:0] + + * - ``decoded_instr_i`` + - in + - none + - none + - ariane_pkg::scoreboard_entry_t + + * - ``orig_instr_i`` + - in + - none + - none + - logic[31:0] + + * - ``decoded_instr_valid_i`` + - in + - none + - none + - logic + + * - ``decoded_instr_ack_o`` + - out + - none + - none + - logic + + * - ``issue_instr_o`` + - out + - none + - none + - ariane_pkg::scoreboard_entry_t + + * - ``orig_instr_o`` + - out + - none + - none + - logic[31:0] + + * - ``issue_instr_valid_o`` + - out + - none + - none + - logic + + * - ``issue_ack_i`` + - in + - none + - none + - logic + + * - ``resolved_branch_i`` + - in + - none + - none + - ariane_pkg::bp_resolve_t + + * - ``back`` + - in + - none + - none + - logic[CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0]trans_id_i,//transactionIDatwhichtowritetheresult + + * - ``in`` + - in + - none + - none + - logic[CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0]wbdata_i,//writedata + + * - ``exception)`` + - in + - none + - none + - ariane_pkg::exception_t[CVA6Cfg.NrWbPorts-1:0]ex_i,//exceptionfromafunctionalunit(e.g.:ld/st + + * - ``valid`` + - in + - none + - none + - logic[CVA6Cfg.NrWbPorts-1:0]wt_valid_i,//datainis + + * - ``writeback`` + - in + - none + - none + - logicx_we_i,//cvxifwefor + + * - ``rvfi_issue_pointer_o`` + - out + - none + - none + - logic[ariane_pkg::TRANS_ID_BITS-1:0] + + * - ``rvfi_commit_pointer_o`` + - out + - none + - none + - logic[CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] + +Due to cv32a65x configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below + +none diff --git a/docs/04_cv32a65x_design/source/port_scoreboard.rst.ori b/docs/04_cv32a65x_design/source/port_scoreboard.rst.ori new file mode 100644 index 0000000000..ebd3fc64ef --- /dev/null +++ b/docs/04_cv32a65x_design/source/port_scoreboard.rst.ori @@ -0,0 +1,229 @@ +.. + Copyright 2024 Thales DIS France SAS + Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); + you may not use this file except in compliance with the License. + SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + You may obtain a copy of the License at https://solderpad.org/licenses/ + + Original Author: Jean-Roch COULON - Thales + +.. _CVA6_scoreboard_ports: + +.. list-table:: scoreboard module IO ports + :header-rows: 1 + + * - Signal + - IO + - Description + - Connection + - Type + + * - ``clk_i`` + - in + - Clock + - TO_BE_COMPLETED + - logic + + * - ``rst_ni`` + - in + - Asynchronous reset active low + - TO_BE_COMPLETED + - logic + + * - ``sb_full_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``flush_unissued_instr_i`` + - in + - flush only un-issued instructions + - TO_BE_COMPLETED + - logic + + * - ``flush_i`` + - in + - flush whole scoreboard + - TO_BE_COMPLETED + - logic + + * - ``unresolved_branch_i`` + - in + - we have an unresolved branch + - TO_BE_COMPLETED + - logic + + * - ``rd_clobber_gpr_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - ariane_pkg::fu_t[2**ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rd_clobber_fpr_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - ariane_pkg::fu_t[2**ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs1_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic[ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs1_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - riscv::xlen_t + + * - ``rs1_valid_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``rs2_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic[ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs2_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - riscv::xlen_t + + * - ``rs2_valid_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``rs3_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic[ariane_pkg::REG_ADDR_SIZE-1:0] + + * - ``rs3_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - rs3_len_t + + * - ``rs3_valid_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``commit_instr_o`` + - out + - TO_BE_COMPLETED + - COMMIT_STAGE + - ariane_pkg::scoreboard_entry_t[CVA6Cfg.NrCommitPorts-1:0] + + * - ``commit_ack_i`` + - in + - Advance the commit pointer when acknowledge + - COMMIT_STAGE + - logic[CVA6Cfg.NrCommitPorts-1:0] + + * - ``decoded_instr_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - ariane_pkg::scoreboard_entry_t + + * - ``orig_instr_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic[31:0] + + * - ``decoded_instr_valid_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``decoded_instr_ack_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``issue_instr_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - ariane_pkg::scoreboard_entry_t + + * - ``orig_instr_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic[31:0] + + * - ``issue_instr_valid_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``issue_ack_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic + + * - ``resolved_branch_i`` + - in + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - ariane_pkg::bp_resolve_t + + * - ``trans_id_i`` + - in + - transaction ID at which to write the result back + - TO_BE_COMPLETED + - logic[CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] + + * - ``wbdata_i`` + - in + - write data in + - TO_BE_COMPLETED + - logic[CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] + + * - ``ex_i`` + - in + - exception from a functional unit (e.g.: ld/st exception) + - TO_BE_COMPLETED + - ariane_pkg::exception_t[CVA6Cfg.NrWbPorts-1:0] + + * - ``wt_valid_i`` + - in + - data in is valid + - TO_BE_COMPLETED + - logic[CVA6Cfg.NrWbPorts-1:0] + + * - ``x_we_i`` + - in + - cvxif we for writeback + - TO_BE_COMPLETED + - logic + + * - ``rvfi_issue_pointer_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic[ariane_pkg::TRANS_ID_BITS-1:0] + + * - ``rvfi_commit_pointer_o`` + - out + - TO_BE_COMPLETED + - TO_BE_COMPLETED + - logic[CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] diff --git a/docs/04_cv32a65x_design/source/subsystem.rst b/docs/04_cv32a65x_design/source/subsystem.rst index 784771bfd1..4f507d71bc 100644 --- a/docs/04_cv32a65x_design/source/subsystem.rst +++ b/docs/04_cv32a65x_design/source/subsystem.rst @@ -26,7 +26,12 @@ At least 6 cycles are needed to execute one instruction. Connection with other sub-systems --------------------------------- -[TO BE COMPLETED] +The submodule is connected to : + +* NOC interconnect provides memory content +* COPROCESSOR connects through CV-X-IF coprocessor interface protocol +* TRACER provides support for verification +* TRAP provides traps inputs Parameter configuration diff --git a/docs/scripts/classes.py b/docs/scripts/classes.py new file mode 100644 index 0000000000..cfc79cb145 --- /dev/null +++ b/docs/scripts/classes.py @@ -0,0 +1,38 @@ +# Copyright 2024 Thales DIS France SAS +# +# Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); +# you may not use this file except in compliance with the License. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# You may obtain a copy of the License at https://solderpad.org/licenses/ +# +# Original Author: Jean-Roch COULON - Thales + +#!/usr/bin/python3 + + +class Parameter: + def __init__( + self, + datatype, + description, + value, + ): + self.datatype = datatype + self.description = description + self.value = value + + +class PortIO: + def __init__( + self, + name, + direction, + data_type, + description, + connexion, + ): + self.name = name + self.direction = direction + self.data_type = data_type + self.description = description + self.connexion = connexion diff --git a/docs/scripts/define_blacklist.py b/docs/scripts/define_blacklist.py new file mode 100644 index 0000000000..af08fd488c --- /dev/null +++ b/docs/scripts/define_blacklist.py @@ -0,0 +1,84 @@ +# Copyright 2024 Thales DIS France SAS +# +# Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); +# you may not use this file except in compliance with the License. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# You may obtain a copy of the License at https://solderpad.org/licenses/ +# +# Original Author: Jean-Roch COULON - Thales + +#!/usr/bin/python3 + + +def define_blacklist(parameters): + + black_list = {} + black_list["flush_bp_i"] = ["For any HW configuration", "zero"] + + param = "IsRVFI" + paramvalue = "0" + if paramvalue == "0": + black_list["RVFI"] = [f"As {param} = {paramvalue}", "0"] + + param = "DebugEn" + paramvalue = parameters[param].value + if paramvalue == "0": + black_list["set_debug_pc_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["debug_mode_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["debug_req_i"] = [f"As {param} = {paramvalue}", "0"] + + param = "RVV" + paramvalue = parameters[param].value + if paramvalue == "0": + black_list["vs_i"] = [f"As {param} = {paramvalue}", "0"] + + param = "EnableAccelerator" + paramvalue = parameters[param].value + if paramvalue == "0": + black_list["ACC_DISPATCHER"] = [f"As {param} = {paramvalue}", "0"] + + param = "RVF" + paramvalue = parameters[param].value + if paramvalue == "0": + black_list["fs_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["frm_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_valid_o"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_ready_o"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_fmt_o"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_rm_o"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_valid_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_fmt_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_rm_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_frm_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_prec_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_trans_id_o"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_result_o"] = [f"As {param} = {paramvalue}", "0"] + black_list["fpu_exception_o"] = [f"As {param} = {paramvalue}", "0"] + + param = "RVA" + paramvalue = parameters[param].value + if paramvalue == "0": + black_list["amo_req_o"] = [f"As {param} = {paramvalue}", "0"] + black_list["amo_resp_i"] = [f"As {param} = {paramvalue}", "0"] + black_list["amo_valid_commit_i"] = [f"As {param} = {paramvalue}", "0"] + + param = "PRIV" + paramvalue = "MachineOnly" + if paramvalue == "MachineOnly": # TODO PRIV to be added to RTL parameters + black_list["ld_st_priv_lvl_i"] = [f"As {param} = {paramvalue}", "MAchineMode"] + black_list["priv_lvl_i"] = [f"As {param} = {paramvalue}", "MachineMode"] + # black_list["tvm_i"] = [f"As {param} = {paramvalue}", "0"] + # black_list["tw_i"] = [f"As {param} = {paramvalue}", "0"] + # black_list["tsr_i"] = [f"As {param} = {paramvalue}", "0"] + + param = "PerfCounterEn" + paramvalue = "0" + if paramvalue == "0": # TODO PerfCounterEn to be added to RTL parameters + black_list["PERF_COUNTERS"] = [f"As {param} = {paramvalue}", "0"] + + param = "MMUPresent" + paramvalue = "0" + if paramvalue == "0": # TODO the MMUPresent to be added to RTL parameters + black_list["flush_tlb_i"] = [f"As {param} = {paramvalue}", "0"] + + return black_list diff --git a/docs/scripts/table_parameter_builder.py b/docs/scripts/parameters_extractor.py old mode 100755 new mode 100644 similarity index 71% rename from docs/scripts/table_parameter_builder.py rename to docs/scripts/parameters_extractor.py index 0bedf72678..460fdbd92b --- a/docs/scripts/table_parameter_builder.py +++ b/docs/scripts/parameters_extractor.py @@ -11,57 +11,47 @@ import re +from classes import Parameter -class Parameter: - def __init__( - self, - datatype, - description, - value, - ): - self.datatype = datatype - self.description = description - self.value = value - -if __name__ == "__main__": +def parameters_extractor(spec_number, target): parameters = {} - FILE_IN = "../core/include/config_pkg.sv" + file_in = "../core/include/config_pkg.sv" - print("Input file " + FILE_IN) - with open(FILE_IN, "r", encoding="utf-8") as fin: - PRINT_ENABLE = 0 - DESCRIPT = "TO_BE_COMPLETED" + print("Input file " + file_in) + with open(file_in, "r", encoding="utf-8") as fin: + print_enable = 0 + descript = "TO_BE_COMPLETED" for line in fin: if "typedef struct packed" in line: - PRINT_ENABLE = 1 + print_enable = 1 if "cva6_cfg_t" in line: - PRINT_ENABLE = 0 + print_enable = 0 d = re.match(r"^ *(.*) ([\S]*);\n", line) h = re.match(r"^ *\/\/ (.*)\n", line) - if h and PRINT_ENABLE: - DESCRIPT = h.group(1) - if d and PRINT_ENABLE: + if h and print_enable: + descript = h.group(1) + if d and print_enable: parameters[d.group(2)] = Parameter( - d.group(1), DESCRIPT, "TO_BE_COMPLETED" + d.group(1), descript, "TO_BE_COMPLETED" ) - DESCRIPT = "TO_BE_COMPLETED" + descript = "TO_BE_COMPLETED" - FILE_IN = "../core/include/cv32a65x_config_pkg.sv" - a = re.match(r".*\/(.*)_config_pkg.sv", FILE_IN) + file_in = f"../core/include/{target}_config_pkg.sv" + a = re.match(r".*\/(.*)_config_pkg.sv", file_in) module = a.group(1) - fileout = "./04_cv32a65x_design/source/parameters_" + module + ".rst" - print("Input file " + FILE_IN) + fileout = f"./{spec_number}_{target}_design/source/parameters_{module}.rst" + print("Input file " + file_in) print("Output file " + fileout) - with open(FILE_IN, "r", encoding="utf-8") as fin: + with open(file_in, "r", encoding="utf-8") as fin: for line in fin: e = re.match(r"^ +([\S]*): (.*)(?:,|)\n", line) if e: parameters[e.group(1)].value = e.group(2) - with open(FILE_IN, "r", encoding="utf-8") as fin: + with open(file_in, "r", encoding="utf-8") as fin: for line in fin: c = re.match(r"^ +localparam ([\S]*) = (.*);\n", line) if c: @@ -79,6 +69,11 @@ def __init__( variable = variable.replace(",", "") parameters[name].value = variable + return parameters + + +def writeout_parameter_table(fileout, parameters, module): + with open(fileout, "w") as fout: fout.write("..\n") fout.write(" Copyright 2024 Thales DIS France SAS\n") @@ -98,7 +93,7 @@ def __init__( fout.write(" :header-rows: 1\n") fout.write("\n") fout.write(" * - Name\n") - fout.write(" - Description\n") + fout.write(" - description\n") fout.write(" - Value\n") for name in parameters: fout.write("\n") diff --git a/docs/scripts/spec_builder.py b/docs/scripts/spec_builder.py new file mode 100755 index 0000000000..3d34df1dce --- /dev/null +++ b/docs/scripts/spec_builder.py @@ -0,0 +1,154 @@ +# Copyright 2024 Thales DIS France SAS +# +# Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); +# you may not use this file except in compliance with the License. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# You may obtain a copy of the License at https://solderpad.org/licenses/ +# +# Original Author: Jean-Roch COULON - Thales + +#!/usr/bin/python3 + +import re + +from classes import Parameter +from classes import PortIO +from define_blacklist import define_blacklist +from parameters_extractor import parameters_extractor +from parameters_extractor import writeout_parameter_table + + +if __name__ == "__main__": + + PATH = "04_cv32a65x" + [spec_number, target] = PATH.split("_") + + print(spec_number, target) + parameters = parameters_extractor(spec_number, target) + + fileout = f"./{spec_number}_{target}_design/source/parameters_{target}.rst" + writeout_parameter_table(fileout, parameters, target) + + file = [] + file.append("../core/cva6.sv") + file.append("../core/frontend/frontend.sv") + file.append("../core/frontend/bht.sv") + file.append("../core/frontend/btb.sv") + file.append("../core/frontend/ras.sv") + file.append("../core/frontend/instr_queue.sv") + file.append("../core/frontend/instr_scan.sv") + file.append("../core/instr_realign.sv") + file.append("../core/id_stage.sv") + file.append("../core/issue_stage.sv") + file.append("../core/ex_stage.sv") + file.append("../core/commit_stage.sv") + file.append("../core/controller.sv") + file.append("../core/csr_regfile.sv") + file.append("../core/decoder.sv") + file.append("../core/compressed_decoder.sv") + file.append("../core/scoreboard.sv") + file.append("../core/issue_read_operands.sv") + + black_list = define_blacklist(parameters) + + for filein in file: + comments = [] + a = re.match(r".*\/(.*).sv", filein) + module = a.group(1) + fileout = "./04_cv32a65x_design/source/port_" + module + ".rst" + print("Input file " + filein) + print("Output file " + fileout) + ports = [] + with open(filein, "r", encoding="utf-8") as fin: + description = "none" + connexion = "none" + for line in fin: + e = re.match(r"^ +(?:(in|out))put +([\S]*(?: +.* *|)) ([\S]*)\n", line) + d = re.match(r"^ +\/\/ (.*) - ([\S]*)\n", line) + if d: + description = d.group(1) + connexion = d.group(2) + if e: + name = e.group(3) + name = name.replace(",", "") + data_type = e.group(2) + data_type = data_type.replace(" ", "") + if connexion in black_list: + for i, comment in enumerate(comments): + if black_list[connexion][0] == comment[0]: + comment[1] = ( + comment[1] + + f"\n| ``{name}`` {e.group(1)}put is tied to {black_list[connexion][1]}" + ) + break + else: + comments.append( + [ + black_list[connexion][0], + f"``{name}`` {e.group(1)}put is tied to {black_list[connexion][1]}", + ] + ) + else: + if name in black_list: + for i, comment in enumerate(comments): + if black_list[name][0] == comment[0]: + comment[1] = ( + comment[1] + + f"\n| ``{name}`` {e.group(1)}put is tied to {black_list[name][1]}" + ) + break + else: + comments.append( + [ + black_list[name][0], + f"``{name}`` {e.group(1)}put is tied to {black_list[name][1]}", + ] + ) + else: + ports.append( + PortIO( + name, e.group(1), data_type, description, connexion + ) + ) + description = "none" + connexion = "none" + + with open(fileout, "w", encoding="utf-8") as fout: + fout.write("..\n") + fout.write(" Copyright 2024 Thales DIS France SAS\n") + fout.write( + ' Licensed under the Solderpad Hardware License, Version 2.1 (the "License");\n' + ) + fout.write( + " you may not use this file except in compliance with the License.\n" + ) + fout.write(" SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1\n") + fout.write( + " You may obtain a copy of the License at https://solderpad.org/licenses/\n\n" + ) + fout.write(" Original Author: Jean-Roch COULON - Thales\n\n") + fout.write(f".. _CVA6_{module}_ports:\n\n") + fout.write(f".. list-table:: {module} module IO ports\n") + fout.write(" :header-rows: 1\n") + fout.write("\n") + fout.write(" * - Signal\n") + fout.write(" - IO\n") + fout.write(" - Description\n") + fout.write(" - connexion\n") + fout.write(" - Type\n") + for i, port in enumerate(ports): + fout.write("\n") + fout.write(f" * - ``{port.name}``\n") + fout.write(f" - {port.direction}\n") + fout.write(f" - {port.description}\n") + fout.write(f" - {port.connexion}\n") + fout.write(f" - {port.data_type}\n") + fout.write("\n") + fout.write( + f"Due to {target} configuration, some ports are tied to a static value. These ports do not appear in the above table, they are listed below\n" + ) + fout.write("\n") + for comment in comments: + fout.write(f"| {comment[0]},\n| {comment[1]}\n") + if len(comments) == 0: + fout.write("none\n") diff --git a/docs/scripts/table_port_builder.py b/docs/scripts/table_port_builder.py deleted file mode 100755 index abca98aa05..0000000000 --- a/docs/scripts/table_port_builder.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2024 Thales DIS France SAS -# -# Licensed under the Solderpad Hardware License, Version 2.1 (the "License"); -# you may not use this file except in compliance with the License. -# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -# You may obtain a copy of the License at https://solderpad.org/licenses/ -# -# Original Author: Jean-Roch COULON - Thales - -#!/usr/bin/python3 - -import re - - -class PortIO: - def __init__( - self, - name, - direction, - data_type, - description, - connection, - ): - self.name = name - self.direction = direction - self.data_type = data_type - self.description = description - self.connection = connection - - -if __name__ == "__main__": - file = [] - file.append("../core/cva6.sv") - file.append("../core/frontend/frontend.sv") - file.append("../core/frontend/bht.sv") - file.append("../core/frontend/btb.sv") - file.append("../core/frontend/ras.sv") - file.append("../core/frontend/instr_queue.sv") - file.append("../core/frontend/instr_scan.sv") - file.append("../core/instr_realign.sv") - file.append("../core/id_stage.sv") - file.append("../core/issue_stage.sv") - file.append("../core/ex_stage.sv") - file.append("../core/commit_stage.sv") - file.append("../core/controller.sv") - file.append("../core/csr_regfile.sv") - - for filein in file: - a = re.match(r".*\/(.*).sv", filein) - module = a.group(1) - fileout = "./04_cv32a65x_design/source/port_" + module + ".rst" - print("Input file " + filein) - print("Output file " + fileout) - ports = [] - with open(filein, "r", encoding="utf-8") as fin: - description = "none" - connection = "none" - for line in fin: - e = re.match(r"^ +(?:(in|out))put +([\S]*(?: +.* *|)) ([\S]*)\n", line) - d = re.match(r"^ +\/\/ (.*) - ([\S]*)\n", line) - if d: - description = d.group(1) - connection = d.group(2) - if e: - name = e.group(3) - name = name.replace(",", "") - data_type = e.group(2) - data_type = data_type.replace(" ", "") - ports.append( - PortIO(name, e.group(1), data_type, description, connection) - ) - description = "none" - connection = "none" - - with open(fileout, "w", encoding="utf-8") as fout: - fout.write("..\n") - fout.write(" Copyright 2024 Thales DIS France SAS\n") - fout.write( - ' Licensed under the Solderpad Hardware License, Version 2.1 (the "License");\n' - ) - fout.write( - " you may not use this file except in compliance with the License.\n" - ) - fout.write(" SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1\n") - fout.write( - " You may obtain a copy of the License at https://solderpad.org/licenses/\n\n" - ) - fout.write(" Original Author: Jean-Roch COULON - Thales\n\n") - fout.write(f".. _CVA6_{module}_ports:\n\n") - fout.write(f".. list-table:: {module} module IO ports\n") - fout.write(" :header-rows: 1\n") - fout.write("\n") - fout.write(" * - Signal\n") - fout.write(" - IO\n") - fout.write(" - Connection\n") - fout.write(" - Type\n") - fout.write(" - Description\n") - for i, port in enumerate(ports): - fout.write("\n") - fout.write(f" * - ``{port.name}``\n") - fout.write(f" - {port.direction}\n") - fout.write(f" - {port.connection}\n") - fout.write(f" - {port.data_type}\n") - fout.write(f" - {port.description}\n")