diff --git a/Bender.yml b/Bender.yml index c21ec54dc8..1f978d1b97 100644 --- a/Bender.yml +++ b/Bender.yml @@ -48,9 +48,10 @@ sources: - core/include/cv32a6_imac_sv0_config_pkg.sv - core/include/riscv_pkg.sv - core/include/ariane_pkg.sv - - core/mmu_sv32/cva6_tlb_sv32.sv - - core/mmu_sv32/cva6_mmu_sv32.sv + - core/mmu_unify/cva6_tlb.sv + - core/mmu_unify/cva6_mmu.sv - core/mmu_sv32/cva6_ptw_sv32.sv + - core/mmu_unify/cva6_ptw.sv - core/cva6_accel_first_pass_decoder_stub.sv - target: cv32a6_imac_sv32 @@ -58,9 +59,10 @@ sources: - core/include/cv32a6_imac_sv32_config_pkg.sv - core/include/riscv_pkg.sv - core/include/ariane_pkg.sv - - core/mmu_sv32/cva6_tlb_sv32.sv - - core/mmu_sv32/cva6_mmu_sv32.sv + - core/mmu_unify/cva6_tlb.sv + - core/mmu_unify/cva6_mmu.sv - core/mmu_sv32/cva6_ptw_sv32.sv + - core/mmu_unify/cva6_ptw.sv - core/cva6_accel_first_pass_decoder_stub.sv - target: cv32a6_imafc_sv32 @@ -68,9 +70,10 @@ sources: - core/include/cv32a6_imafc_sv32_config_pkg.sv - core/include/riscv_pkg.sv - core/include/ariane_pkg.sv - - core/mmu_sv32/cva6_tlb_sv32.sv - - core/mmu_sv32/cva6_mmu_sv32.sv + - core/mmu_unify/cva6_tlb.sv + - core/mmu_unify/cva6_mmu.sv - core/mmu_sv32/cva6_ptw_sv32.sv + - core/mmu_unify/cva6_ptw.sv - core/cva6_accel_first_pass_decoder_stub.sv # included via target core/include/${TARGET_CFG}_config_pkg.sv diff --git a/ariane.core b/ariane.core index 501f296c14..577605276a 100644 --- a/ariane.core +++ b/ariane.core @@ -35,18 +35,21 @@ filesets: - src/miss_handler.sv - src/mmu_sv39/mmu.sv - src/mmu_sv32/cva6_mmu_sv32.sv + - src/mmu_unify/cva6_mmu.sv - src/mult.sv - src/nbdcache.sv - src/pcgen_stage.sv - src/perf_counters.sv - src/mmu_sv39/ptw.sv - src/mmu_sv32/cva6_ptw_sv32.sv + - src/mmu_unify/cva6_ptw.sv - src/regfile_ff.sv - src/scoreboard.sv - src/store_buffer.sv - src/store_unit.sv - src/mmu_sv39/tlb.sv - src/mmu_sv32/cva6_tlb_sv32.sv + - src/mmu_unify/cva6_tlb.sv file_type : systemVerilogSource depend : - pulp-platform.org::axi_mem_if diff --git a/core/Flist.cva6 b/core/Flist.cva6 index 46a541603e..636398304d 100644 --- a/core/Flist.cva6 +++ b/core/Flist.cva6 @@ -176,15 +176,10 @@ ${CVA6_REPO_DIR}/common/local/util/tc_sram_wrapper.sv ${CVA6_REPO_DIR}/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv ${CVA6_REPO_DIR}/common/local/util/sram.sv -// MMU Sv39 -${CVA6_REPO_DIR}/core/mmu_sv39/mmu.sv -${CVA6_REPO_DIR}/core/mmu_sv39/ptw.sv -${CVA6_REPO_DIR}/core/mmu_sv39/tlb.sv - -// MMU Sv32 -${CVA6_REPO_DIR}/core/mmu_sv32/cva6_mmu_sv32.sv -${CVA6_REPO_DIR}/core/mmu_sv32/cva6_ptw_sv32.sv -${CVA6_REPO_DIR}/core/mmu_sv32/cva6_tlb_sv32.sv -${CVA6_REPO_DIR}/core/mmu_sv32/cva6_shared_tlb_sv32.sv +// MMU Unify +${CVA6_REPO_DIR}/core/mmu_unify/cva6_mmu.sv +${CVA6_REPO_DIR}/core/mmu_unify/cva6_tlb.sv +${CVA6_REPO_DIR}/core/mmu_unify/cva6_shared_tlb.sv +${CVA6_REPO_DIR}/core/mmu_unify/cva6_ptw.sv // end of manifest diff --git a/core/include/cv64a6_imafdc_sv39_config_pkg.sv b/core/include/cv64a6_imafdc_sv39_config_pkg.sv index 0865c0431d..7644769996 100644 --- a/core/include/cv64a6_imafdc_sv39_config_pkg.sv +++ b/core/include/cv64a6_imafdc_sv39_config_pkg.sv @@ -55,8 +55,8 @@ package cva6_config_pkg; localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrLoadBufEntries = 2; - localparam CVA6ConfigInstrTlbEntries = 16; - localparam CVA6ConfigDataTlbEntries = 16; + localparam CVA6ConfigInstrTlbEntries = 2; + localparam CVA6ConfigDataTlbEntries = 2; localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigBTBEntries = 32; diff --git a/core/include/riscv_pkg.sv b/core/include/riscv_pkg.sv index 2a9d919c1a..da1ebe00eb 100644 --- a/core/include/riscv_pkg.sv +++ b/core/include/riscv_pkg.sv @@ -39,7 +39,9 @@ package riscv; // Warning: VLEN must be superior or equal to PLEN localparam VLEN = (XLEN == 32) ? 32 : 64; // virtual address length localparam PLEN = (XLEN == 32) ? 34 : 56; // physical address length - + localparam GPLEN = (XLEN == 32) ? 34 : 41; + localparam GPPNW = (XLEN == 32) ? 22 : 29; + localparam GPPN2 = (XLEN == 32) ? riscv::VLEN - 33 : 10; localparam IS_XLEN32 = (XLEN == 32) ? 1'b1 : 1'b0; localparam IS_XLEN64 = (XLEN == 32) ? 1'b0 : 1'b1; localparam ModeW = (XLEN == 32) ? 1 : 4; @@ -326,19 +328,24 @@ package riscv; // ---------------------- localparam logic [XLEN-1:0] INSTR_ADDR_MISALIGNED = 0; localparam logic [XLEN-1:0] INSTR_ACCESS_FAULT = 1; // Illegal access as governed by PMPs and PMAs - localparam logic [XLEN-1:0] ILLEGAL_INSTR = 2; - localparam logic [XLEN-1:0] BREAKPOINT = 3; - localparam logic [XLEN-1:0] LD_ADDR_MISALIGNED = 4; - localparam logic [XLEN-1:0] LD_ACCESS_FAULT = 5; // Illegal access as governed by PMPs and PMAs - localparam logic [XLEN-1:0] ST_ADDR_MISALIGNED = 6; - localparam logic [XLEN-1:0] ST_ACCESS_FAULT = 7; // Illegal access as governed by PMPs and PMAs - localparam logic [XLEN-1:0] ENV_CALL_UMODE = 8; // environment call from user mode - localparam logic [XLEN-1:0] ENV_CALL_SMODE = 9; // environment call from supervisor mode - localparam logic [XLEN-1:0] ENV_CALL_MMODE = 11; // environment call from machine mode - localparam logic [XLEN-1:0] INSTR_PAGE_FAULT = 12; // Instruction page fault - localparam logic [XLEN-1:0] LOAD_PAGE_FAULT = 13; // Load page fault - localparam logic [XLEN-1:0] STORE_PAGE_FAULT = 15; // Store page fault - localparam logic [XLEN-1:0] DEBUG_REQUEST = 24; // Debug request + localparam logic [XLEN-1:0] ILLEGAL_INSTR = 2; + localparam logic [XLEN-1:0] BREAKPOINT = 3; + localparam logic [XLEN-1:0] LD_ADDR_MISALIGNED = 4; + localparam logic [XLEN-1:0] LD_ACCESS_FAULT = 5; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ST_ADDR_MISALIGNED = 6; + localparam logic [XLEN-1:0] ST_ACCESS_FAULT = 7; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ENV_CALL_UMODE = 8; // environment call from user mode or virtual user mode + localparam logic [XLEN-1:0] ENV_CALL_SMODE = 9; // environment call from hypervisor-extended supervisor mode + localparam logic [XLEN-1:0] ENV_CALL_VSMODE = 10; // environment call from virtual supervisor mode + localparam logic [XLEN-1:0] ENV_CALL_MMODE = 11; // environment call from machine mode + localparam logic [XLEN-1:0] INSTR_PAGE_FAULT = 12; // Instruction page fault + localparam logic [XLEN-1:0] LOAD_PAGE_FAULT = 13; // Load page fault + localparam logic [XLEN-1:0] STORE_PAGE_FAULT = 15; // Store page fault + localparam logic [XLEN-1:0] INSTR_GUEST_PAGE_FAULT= 20; // Instruction guest-page fault + localparam logic [XLEN-1:0] LOAD_GUEST_PAGE_FAULT = 21; // Load guest-page fault + localparam logic [XLEN-1:0] VIRTUAL_INSTRUCTION = 22; // virtual instruction + localparam logic [XLEN-1:0] STORE_GUEST_PAGE_FAULT= 23; // Store guest-page fault + localparam logic [XLEN-1:0] DEBUG_REQUEST = 24; // Debug request localparam int unsigned IRQ_S_SOFT = 1; localparam int unsigned IRQ_M_SOFT = 3; @@ -361,6 +368,14 @@ package riscv; localparam logic [XLEN-1:0] S_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_EXT); localparam logic [XLEN-1:0] M_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_EXT); + // ---------------------- + // PseudoInstructions Codes + // ---------------------- + localparam logic [XLEN-1:0] READ_32_PSEUDOINSTRUCTION = 32'h00002000; + localparam logic [XLEN-1:0] WRITE_32_PSEUDOINSTRUCTION = 32'h00002020; + localparam logic [XLEN-1:0] READ_64_PSEUDOINSTRUCTION = 64'h00003000; + localparam logic [XLEN-1:0] WRITE_64_PSEUDOINSTRUCTION = 64'h00003020; + // ----- // CSRs // ----- diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index 94e1c4f119..5abed4452e 100644 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -140,63 +140,65 @@ module load_store_unit // ------------------- // MMU e.g.: TLBs/PTW // ------------------- - if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39 - mmu #( - .CVA6Cfg (CVA6Cfg), - .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), - .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) - ) i_cva6_mmu ( - // misaligned bypass - .misaligned_ex_i(misaligned_exception), - .lsu_is_store_i (st_translation_req), - .lsu_req_i (translation_req), - .lsu_vaddr_i (mmu_vaddr), - .lsu_valid_o (translation_valid), - .lsu_paddr_o (mmu_paddr), - .lsu_exception_o(mmu_exception), - .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request - .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request - // connecting PTW to D$ IF - .req_port_i (dcache_req_ports_i[0]), - .req_port_o (dcache_req_ports_o[0]), - // icache address translation requests - .icache_areq_i (icache_areq_i), - .asid_to_be_flushed_i, - .vaddr_to_be_flushed_i, - .icache_areq_o (icache_areq_o), - .pmpcfg_i, - .pmpaddr_i, - .* - ); - end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32 - cva6_mmu_sv32 #( + if (MMU_PRESENT) begin : gen_mmu + + localparam HYP_EXT = 0; //CVA6Cfg.CVA6ConfigHExtEn + localparam VPN_LEN = (riscv::XLEN == 64) ? (HYP_EXT ? 29 : 27) : 20; + localparam PT_LEVELS = (riscv::XLEN == 64) ? 3 : 2; + localparam int unsigned mmu_ASID_WIDTH[HYP_EXT:0] = {ASID_WIDTH}; + + + cva6_mmu #( .CVA6Cfg (CVA6Cfg), .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) + .HYP_EXT (HYP_EXT), + .ASID_WIDTH (mmu_ASID_WIDTH), + .VPN_LEN (VPN_LEN), + .PT_LEVELS (PT_LEVELS) ) i_cva6_mmu ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .enable_translation_i ({enable_translation_i}), + .en_ld_st_translation_i({en_ld_st_translation_i}), + .icache_areq_i (icache_areq_i), + .icache_areq_o (icache_areq_o), // misaligned bypass - .misaligned_ex_i(misaligned_exception), - .lsu_is_store_i (st_translation_req), - .lsu_req_i (translation_req), - .lsu_vaddr_i (mmu_vaddr), + .misaligned_ex_i (misaligned_exception), + .lsu_req_i (translation_req), + .lsu_vaddr_i (mmu_vaddr), + .lsu_tinst_i (0), + .lsu_is_store_i (st_translation_req), + .csr_hs_ld_st_inst_o (), + .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request + .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request + .lsu_valid_o (translation_valid), .lsu_paddr_o (mmu_paddr), .lsu_exception_o(mmu_exception), - .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request - .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request - // connecting PTW to D$ IF - .req_port_i (dcache_req_ports_i[0]), - .req_port_o (dcache_req_ports_o[0]), - // icache address translation requests - .icache_areq_i (icache_areq_i), - .asid_to_be_flushed_i, - .vaddr_to_be_flushed_i, - .icache_areq_o (icache_areq_o), + + .priv_lvl_i (priv_lvl_i), + .ld_st_priv_lvl_i(ld_st_priv_lvl_i), + + .sum_i ({sum_i}), + .mxr_i ({mxr_i}), + .hlvx_inst_i (0), + .hs_ld_st_inst_i(0), + + .satp_ppn_i ({satp_ppn_i}), + .asid_i ({asid_i}), + .asid_to_be_flushed_i ({asid_to_be_flushed_i}), + .vaddr_to_be_flushed_i({vaddr_to_be_flushed_i}), + .flush_tlb_i ({flush_tlb_i}), + + .itlb_miss_o(itlb_miss_o), + .dtlb_miss_o(dtlb_miss_o), + + .req_port_i(dcache_req_ports_i[0]), + .req_port_o(dcache_req_ports_o[0]), .pmpcfg_i, - .pmpaddr_i, - .* + .pmpaddr_i ); end else begin : gen_no_mmu diff --git a/core/mmu_unify/README.md b/core/mmu_unify/README.md new file mode 100644 index 0000000000..43a6523f6d --- /dev/null +++ b/core/mmu_unify/README.md @@ -0,0 +1 @@ +Unification of MMUs: sv32, sv39 and sv39x4 \ No newline at end of file diff --git a/core/mmu_unify/cva6_mmu.sv b/core/mmu_unify/cva6_mmu.sv new file mode 100644 index 0000000000..ccd30badb8 --- /dev/null +++ b/core/mmu_unify/cva6_mmu.sv @@ -0,0 +1,916 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Angela Gonzalez, PlanV Technology +// Date: 14/02/2024 +// +// Description: Memory Management Unit for CVA6, contains TLB and +// address translation unit. SV32 SV39 and SV39x4 as defined in RISC-V +// privilege specification 1.11-WIP. +// This module is an merge of the MMU Sv39 developed +// by Florian Zaruba, the MMU Sv32 developed by Sebastien Jacq and the MMU Sv39x4 developed by Bruno Sá. + + +module cva6_mmu + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + // parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig, //This is the required config param in the hypervisor version for now + parameter int unsigned INSTR_TLB_ENTRIES = 4, + parameter int unsigned DATA_TLB_ENTRIES = 4, + parameter logic HYP_EXT = 0, + parameter int unsigned ASID_WIDTH [HYP_EXT:0], + parameter int unsigned VPN_LEN = 1, + parameter int unsigned PT_LEVELS = 1 + +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic [HYP_EXT*2:0] enable_translation_i, //[v_i,enable_g_translation,enable_translation] + input logic [HYP_EXT*2:0] en_ld_st_translation_i, // enable virtual memory translation for ld/st + // IF interface + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + // input icache_areq_o_t icache_areq_i, this is the data type in the hypervisor version for now + // output icache_areq_i_t icache_areq_o, + + // LSU interface + // this is a more minimalistic interface because the actual addressing logic is handled + // in the LSU as we distinguish load and stores, what we do here is simple address translation + input exception_t misaligned_ex_i, + input logic lsu_req_i, // request address translation + input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input riscv::xlen_t lsu_tinst_i, // transformed instruction in + input logic lsu_is_store_i, // the translation is requested by a store + output logic csr_hs_ld_st_inst_o, // hyp load store instruction + // if we need to walk the page table we can't grant in the same cycle + // Cycle 0 + output logic lsu_dtlb_hit_o, // sent in same cycle as the request if translation hits in DTLB + output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + // Cycle 1 + output logic lsu_valid_o, // translation is valid + output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output exception_t lsu_exception_o, // address translation threw an exception + // General control signals + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic [HYP_EXT:0] sum_i, + input logic [HYP_EXT:0] mxr_i, + input logic hlvx_inst_i, + input logic hs_ld_st_inst_i, + // input logic flag_mprv_i, + input logic [riscv::PPNW-1:0] satp_ppn_i[HYP_EXT*2:0], //[hgatp,vsatp,satp] + + input logic [ASID_WIDTH[0]-1:0] asid_i [HYP_EXT*2:0], //[vmid,vs_asid,asid] + input logic [ASID_WIDTH[0]-1:0] asid_to_be_flushed_i [ HYP_EXT:0], + input logic [ riscv::VLEN-1:0] vaddr_to_be_flushed_i[ HYP_EXT:0], + + input logic [HYP_EXT*2:0] flush_tlb_i, + + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i +); + logic [ASID_WIDTH[0]-1:0] dtlb_mmu_asid_i[HYP_EXT:0]; + logic [ASID_WIDTH[0]-1:0] itlb_mmu_asid_i[HYP_EXT:0]; + + genvar b; + generate + for (b = 0; b < HYP_EXT + 1; b++) begin : gen_tlbs_asid + assign dtlb_mmu_asid_i[b] = b==0 ? + ((en_ld_st_translation_i[2*HYP_EXT] || flush_tlb_i[HYP_EXT]) ? asid_i[HYP_EXT] : asid_i[0]): + asid_i[HYP_EXT*2]; + assign itlb_mmu_asid_i[b] = b==0 ? + (enable_translation_i[2*HYP_EXT] ? asid_i[HYP_EXT] : asid_i[0]): + asid_i[HYP_EXT*2]; + end + endgenerate + + // memory management, pte for cva6 + localparam type pte_cva6_t = struct packed { + logic [riscv::PPNW-1:0] ppn; // PPN length for + logic [1:0] rsw; + logic d; + logic a; + logic g; + logic u; + logic x; + logic w; + logic r; + logic v; + }; + + localparam type tlb_update_cva6_t = struct packed { + logic valid; + logic [PT_LEVELS-2:0][HYP_EXT:0] is_page; + logic [VPN_LEN-1:0] vpn; + logic [HYP_EXT:0][ASID_WIDTH[0]-1:0] asid; + logic [HYP_EXT*2:0] v_st_enbl; // v_i,g-stage enabled, s-stage enabled + pte_cva6_t [HYP_EXT:0] content; + }; + + logic [HYP_EXT:0] iaccess_err; // insufficient privilege to access this instruction page + logic [HYP_EXT:0] daccess_err; // insufficient privilege to access this data page + logic ptw_active; // PTW is currently walking a page table + logic walking_instr; // PTW is walking because of an ITLB miss + logic [HYP_EXT*2:0] ptw_error; // PTW threw an exception + logic ptw_access_exception; // PTW threw an access exception (PMPs) + logic [HYP_EXT:0][riscv::PLEN-1:0] ptw_bad_paddr; // PTW guest page fault bad guest physical addr + + logic [riscv::VLEN-1:0] update_vaddr, shared_tlb_vaddr; + + tlb_update_cva6_t update_itlb, update_dtlb, update_shared_tlb; + + logic itlb_lu_access; + pte_cva6_t [ HYP_EXT:0] itlb_content; + logic [ PT_LEVELS-2:0] itlb_is_page; + logic itlb_lu_hit; + logic [ riscv::GPLEN-1:0] itlb_gpaddr; + logic [ASID_WIDTH[0]-1:0] itlb_lu_asid; + + logic dtlb_lu_access; + pte_cva6_t [ HYP_EXT:0] dtlb_content; + logic [ PT_LEVELS-2:0] dtlb_is_page; + logic [ASID_WIDTH[0]-1:0] dtlb_lu_asid; + logic dtlb_lu_hit; + logic [ riscv::GPLEN-1:0] dtlb_gpaddr; + + logic shared_tlb_access; + logic shared_tlb_hit, itlb_req; + + // Assignments + + assign itlb_lu_access = icache_areq_i.fetch_req; + assign dtlb_lu_access = lsu_req_i; + + + cva6_tlb #( + .pte_cva6_t (pte_cva6_t), + .tlb_update_cva6_t(tlb_update_cva6_t), + .TLB_ENTRIES (INSTR_TLB_ENTRIES), + .HYP_EXT (HYP_EXT), + .ASID_WIDTH (ASID_WIDTH), + .VPN_LEN (VPN_LEN), + .PT_LEVELS (PT_LEVELS) + ) i_itlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_tlb_i), + .v_st_enbl_i (enable_translation_i), + .update_i (update_itlb), + .lu_access_i (itlb_lu_access), + .lu_asid_i (itlb_mmu_asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (icache_areq_i.fetch_vaddr), + .lu_content_o (itlb_content), + .lu_gpaddr_o (itlb_gpaddr), + .lu_is_page_o (itlb_is_page), + .lu_hit_o (itlb_lu_hit) + ); + + cva6_tlb #( + .pte_cva6_t (pte_cva6_t), + .tlb_update_cva6_t(tlb_update_cva6_t), + .TLB_ENTRIES (DATA_TLB_ENTRIES), + .HYP_EXT (HYP_EXT), + .ASID_WIDTH (ASID_WIDTH), + .VPN_LEN (VPN_LEN), + .PT_LEVELS (PT_LEVELS) + ) i_dtlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_tlb_i), + .v_st_enbl_i (en_ld_st_translation_i), + .update_i (update_dtlb), + .lu_access_i (dtlb_lu_access), + .lu_asid_i (dtlb_mmu_asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (lsu_vaddr_i), + .lu_content_o (dtlb_content), + .lu_gpaddr_o (dtlb_gpaddr), + .lu_is_page_o (dtlb_is_page), + .lu_hit_o (dtlb_lu_hit) + ); + + + cva6_shared_tlb #( + .SHARED_TLB_DEPTH (64), + .SHARED_TLB_WAYS (2), + .HYP_EXT (HYP_EXT), + .ASID_WIDTH (ASID_WIDTH), + .VPN_LEN (VPN_LEN), + .PT_LEVELS (PT_LEVELS), + .pte_cva6_t (pte_cva6_t), + .tlb_update_cva6_t(tlb_update_cva6_t) + ) i_shared_tlb ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .flush_i(flush_tlb_i), + .v_st_enbl_i({enable_translation_i, en_ld_st_translation_i}), + + .dtlb_asid_i (dtlb_mmu_asid_i), + .itlb_asid_i (itlb_mmu_asid_i), + // from TLBs + // did we miss? + .itlb_access_i(itlb_lu_access), + .itlb_hit_i (itlb_lu_hit), + .itlb_vaddr_i (icache_areq_i.fetch_vaddr), + + .dtlb_access_i(dtlb_lu_access), + .dtlb_hit_i (dtlb_lu_hit), + .dtlb_vaddr_i (lsu_vaddr_i), + + // to TLBs, update logic + .itlb_update_o(update_itlb), + .dtlb_update_o(update_dtlb), + + // Performance counters + .itlb_miss_o(itlb_miss_o), + .dtlb_miss_o(dtlb_miss_o), + + .shared_tlb_access_o(shared_tlb_access), + .shared_tlb_hit_o (shared_tlb_hit), + .shared_tlb_vaddr_o (shared_tlb_vaddr), + + .itlb_req_o (itlb_req), + // to update shared tlb + .shared_tlb_update_i(update_shared_tlb) + ); + + cva6_ptw #( + .CVA6Cfg (CVA6Cfg), + // .ArianeCfg ( ArianeCfg ), this is the configuration needed in the hypervisor extension for now + .pte_cva6_t (pte_cva6_t), + .tlb_update_cva6_t(tlb_update_cva6_t), + .HYP_EXT (HYP_EXT), + .ASID_WIDTH (ASID_WIDTH), + .VPN_LEN (VPN_LEN), + .PT_LEVELS (PT_LEVELS) + ) i_ptw ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_i), + + .ptw_active_o (ptw_active), + .walking_instr_o (walking_instr), + .ptw_error_o (ptw_error), + .ptw_access_exception_o(ptw_access_exception), + + .enable_translation_i (enable_translation_i), + .en_ld_st_translation_i(en_ld_st_translation_i), + + .lsu_is_store_i(lsu_is_store_i), + // PTW memory interface + .req_port_i (req_port_i), + .req_port_o (req_port_o), + // .enable_translation_i ( enable_translation_i ), + // .en_ld_st_translation_i ( en_ld_st_translation_i), + .asid_i (asid_i), + + .update_vaddr_o(update_vaddr), + + // to Shared TLB, update logic + .shared_tlb_update_o(update_shared_tlb), + + + // from shared TLB + // did we miss? + .shared_tlb_access_i(shared_tlb_access), + .shared_tlb_hit_i (shared_tlb_hit), + .shared_tlb_vaddr_i (shared_tlb_vaddr), + + .itlb_req_i (itlb_req), + // .dtlb_access_i ( dtlb_lu_access ), + // .dtlb_hit_i ( dtlb_lu_hit ), + // .dtlb_vaddr_i ( lsu_vaddr_i ), + .hlvx_inst_i(hlvx_inst_i), + // from CSR file + .satp_ppn_i (satp_ppn_i), + .mxr_i (mxr_i), + + // Performance counters + .shared_tlb_miss_o(), //open for now + + // PMP + .pmpcfg_i (pmpcfg_i), + .pmpaddr_i (pmpaddr_i), + .bad_paddr_o(ptw_bad_paddr) + + ); + + // ila_1 i_ila_1 ( + // .clk(clk_i), // input wire clk + // .probe0({req_port_o.address_tag, req_port_o.address_index}), + // .probe1(req_port_o.data_req), // input wire [63:0] probe1 + // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 + // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 + // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 + // .probe5(ptw_error), // input wire [1:0] probe5 + // .probe6(update_vaddr), // input wire [0:0] probe6 + // .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7 + // .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8 + // .probe9(dtlb_lu_access), // input wire [0:0] probe9 + // .probe10(lsu_vaddr_i), // input wire [0:0] probe10 + // .probe11(dtlb_lu_hit), // input wire [0:0] probe11 + // .probe12(itlb_lu_access), // input wire [0:0] probe12 + // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 + // .probe14(itlb_lu_hit) // input wire [0:0] probe13 + // ); + + //----------------------- + // Instruction Interface + //----------------------- + logic match_any_execute_region; + logic pmp_instr_allow; + localparam int PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1; + + assign icache_areq_o.fetch_paddr[11:0] = icache_areq_i.fetch_vaddr[11:0]; + assign icache_areq_o.fetch_paddr[riscv::PLEN-1:PPNWMin+1] = // + (|enable_translation_i[HYP_EXT:0]) ? // + (enable_translation_i[HYP_EXT] ? itlb_content[HYP_EXT].ppn[riscv::PPNW-1:(riscv::PPNW - (riscv::PLEN - PPNWMin-1))] : + itlb_content[0].ppn[riscv::PPNW-1:(riscv::PPNW - (riscv::PLEN - PPNWMin-1))] ): // + (riscv::PLEN-PPNWMin-1)'(icache_areq_i.fetch_vaddr[((riscv::PLEN > riscv::VLEN) ? riscv::VLEN : riscv::PLEN )-1:PPNWMin+1]); + genvar a; + generate + + for (a = 0; a < PT_LEVELS - 1; a++) begin : gen_fetch_paddr + assign icache_areq_o.fetch_paddr [PPNWMin-((VPN_LEN/PT_LEVELS)*(a)):PPNWMin-((VPN_LEN/PT_LEVELS)*(a+1))+1] = // + (|enable_translation_i[HYP_EXT:0] && (|itlb_is_page[a:0] == 0)) ? // + (enable_translation_i[HYP_EXT] ? itlb_content[HYP_EXT].ppn [(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(a))-1):(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(a+1)))]: + itlb_content[0].ppn [(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(a))-1):(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(a+1)))]) : // + icache_areq_i.fetch_vaddr[PPNWMin-((VPN_LEN/PT_LEVELS)*(a)):PPNWMin-((VPN_LEN/PT_LEVELS)*(a+1))+1]; + end + endgenerate + +// The instruction interface is a simple request response interface +always_comb begin : instr_interface + // MMU disabled: just pass through + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + // icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation + // // two potential exception sources: + // 1. HPTW threw an exception -> signal with a page fault exception + // 2. We got an access error because of insufficient permissions -> throw an access exception + icache_areq_o.fetch_exception = '0; + // Check whether we are allowed to access this memory region from a fetch perspective + iaccess_err[0] = icache_areq_i.fetch_req && (enable_translation_i[0] || HYP_EXT == 0) && // + (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content[0].u) // + || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content[0].u)); + + if (HYP_EXT == 1) + iaccess_err[HYP_EXT] = icache_areq_i.fetch_req && enable_translation_i[HYP_EXT] && !itlb_content[HYP_EXT].u; + // MMU enabled: address from TLB, request delayed until hit. Error when TLB + // hit and no access right or TLB hit and translated address not valid (e.g. + // AXI decode error), or when PTW performs walk due to ITLB miss and raises + // an error. + if ((|enable_translation_i[HYP_EXT:0])) begin + // we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) + if (HYP_EXT == 1) + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {riscv::XLEN'(icache_areq_i.fetch_vaddr)}, + {riscv::GPLEN{1'b0}}, + {riscv::XLEN{1'b0}}, + enable_translation_i[HYP_EXT*2], + 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, {riscv::XLEN'(icache_areq_i.fetch_vaddr)}, 1'b1 + }; + + icache_areq_o.fetch_valid = 1'b0; + // ---------// + // ITLB Hit + // --------// + // if we hit the ITLB output the request signal immediately + if (itlb_lu_hit) begin + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + if (HYP_EXT == 1 && iaccess_err[HYP_EXT]) + icache_areq_o.fetch_exception = { + riscv::INSTR_GUEST_PAGE_FAULT, + {riscv::XLEN'(icache_areq_i.fetch_vaddr)}, + itlb_gpaddr[riscv::GPLEN-1:0], + {riscv::XLEN{1'b0}}, + enable_translation_i[HYP_EXT*2], + 1'b1 + }; + // we got an access error + else if (iaccess_err[0]) + // throw a page fault + if (HYP_EXT == 1) + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, + {riscv::XLEN'(icache_areq_i.fetch_vaddr)}, + {riscv::GPLEN{1'b0}}, + {riscv::XLEN{1'b0}}, + enable_translation_i[HYP_EXT*2], + 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + else if (!pmp_instr_allow) + if (HYP_EXT == 1) + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {riscv::XLEN'(icache_areq_i.fetch_vaddr)}, + {riscv::GPLEN{1'b0}}, + {riscv::XLEN{1'b0}}, + enable_translation_i[HYP_EXT*2], + 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, riscv::XLEN'(icache_areq_i.fetch_vaddr), 1'b1 + }; + end else if (ptw_active && walking_instr) begin + // ---------// + // ITLB Miss + // ---------// + // watch out for exceptions happening during walking the page table + icache_areq_o.fetch_valid = ptw_error[0] | ptw_access_exception; + if (ptw_error[0]) + if (HYP_EXT == 1 && ptw_error[HYP_EXT]) + icache_areq_o.fetch_exception = { + riscv::INSTR_GUEST_PAGE_FAULT, + {riscv::XLEN'(update_vaddr)}, + ptw_bad_paddr[HYP_EXT][riscv::GPLEN-1:0], + (ptw_error[HYP_EXT*2] ? (riscv::IS_XLEN64 ? riscv::READ_64_PSEUDOINSTRUCTION : riscv::READ_32_PSEUDOINSTRUCTION) : {riscv::XLEN{1'b0}}), + enable_translation_i[2*HYP_EXT], + 1'b1 + }; + else if (HYP_EXT == 1) + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, + {riscv::XLEN'(update_vaddr)}, + {riscv::GPLEN{1'b0}}, + {riscv::XLEN{1'b0}}, + enable_translation_i[2*HYP_EXT], + 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, {riscv::XLEN'(update_vaddr)}, 1'b1 + }; + else if (HYP_EXT == 1) + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {riscv::XLEN'(update_vaddr)}, + {riscv::GPLEN{1'b0}}, + {riscv::XLEN{1'b0}}, + enable_translation_i[2*HYP_EXT], + 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + ptw_bad_paddr[0][riscv::PLEN-1:(riscv::PLEN>riscv::VLEN)?(riscv::PLEN-riscv::VLEN) : 0], + 1'b1 + }; + end + end + + // if it didn't match any execute region throw an `Instruction Access Fault` + // or: if we are not translating, check PMPs immediately on the paddr + if ((!match_any_execute_region && (!ptw_error[0]|| HYP_EXT==0) ) || (!(|enable_translation_i[HYP_EXT:0]) && !pmp_instr_allow)) + if (HYP_EXT == 1) + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {riscv::XLEN'(icache_areq_o.fetch_paddr)}, + {riscv::GPLEN{1'b0}}, + {riscv::XLEN{1'b0}}, + enable_translation_i[2*HYP_EXT], + 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + riscv::VLEN'(icache_areq_o.fetch_paddr[riscv::PLEN-1:(riscv::PLEN > riscv::VLEN) ? (riscv::PLEN - riscv::VLEN) : 0]), + 1'b1 + }; +end + +// check for execute flag on memory +assign match_any_execute_region = config_pkg::is_inside_execute_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr} +); +// assign match_any_execute_region = ariane_pkg::is_inside_execute_regions(ArianeCfg, {{64-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}); this is the package used in the hypervisor extension for now + +// Instruction fetch +pmp #( + .CVA6Cfg (CVA6Cfg), //comment for hypervisor extension + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + // .NR_ENTRIES ( ArianeCfg.NrPMPEntries ) configuration used in hypervisor extension +) i_pmp_if ( + .addr_i (icache_areq_o.fetch_paddr), + .priv_lvl_i, + // we will always execute on the instruction fetch port + .access_type_i(riscv::ACCESS_EXEC), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_instr_allow) +); + +//----------------------- +// Data Interface +//----------------------- +logic [HYP_EXT:0][riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; +logic [riscv::XLEN-1:0] lsu_tinst_n, lsu_tinst_q; +logic hs_ld_st_inst_n, hs_ld_st_inst_q; +pte_cva6_t [HYP_EXT:0] dtlb_pte_n, dtlb_pte_q; +exception_t misaligned_ex_n, misaligned_ex_q; +logic lsu_req_n, lsu_req_q; +logic lsu_is_store_n, lsu_is_store_q; +logic dtlb_hit_n, dtlb_hit_q; +logic [PT_LEVELS-2:0] dtlb_is_page_n, dtlb_is_page_q; + +// check if we need to do translation or if we are always ready (e.g.: we are not translating anything) +assign lsu_dtlb_hit_o = (|en_ld_st_translation_i[HYP_EXT:0]) ? dtlb_lu_hit : 1'b1; + +// Wires to PMP checks +riscv::pmp_access_t pmp_access_type; +logic pmp_data_allow; + +assign lsu_paddr_o[11:0] = lsu_vaddr_q[0][11:0]; +assign lsu_paddr_o [riscv::PLEN-1:PPNWMin+1] = (|en_ld_st_translation_i[HYP_EXT:0] && !misaligned_ex_q.valid) ? // + (en_ld_st_translation_i[HYP_EXT] ? dtlb_pte_q[HYP_EXT].ppn[riscv::PPNW-1:(riscv::PPNW - (riscv::PLEN - PPNWMin-1))]: + dtlb_pte_q[0].ppn[riscv::PPNW-1:(riscv::PPNW - (riscv::PLEN - PPNWMin-1))] ): // + (riscv::PLEN-PPNWMin-1)'(lsu_vaddr_q[0][((riscv::PLEN > riscv::VLEN) ? riscv::VLEN : riscv::PLEN )-1:PPNWMin+1]); + +assign lsu_dtlb_ppn_o[11:0] = (|en_ld_st_translation_i[HYP_EXT:0] && !misaligned_ex_q.valid) ? // + (en_ld_st_translation_i[HYP_EXT] ? dtlb_content[HYP_EXT].ppn[11:0]: + dtlb_content[0].ppn[11:0]) : // + lsu_vaddr_n[0][23:12]; + +genvar i; +generate + + for (i = 0; i < PT_LEVELS - 1; i++) begin : gen_paddr_ppn_o + assign lsu_paddr_o [PPNWMin-((VPN_LEN/PT_LEVELS)*(i)):PPNWMin-((VPN_LEN/PT_LEVELS)*(i+1))+1] = // + (|en_ld_st_translation_i[HYP_EXT:0] && !misaligned_ex_q.valid && (|dtlb_is_page_q[i:0]==0)) ? // + (en_ld_st_translation_i[HYP_EXT] ? dtlb_pte_q[HYP_EXT].ppn [(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(i))-1):(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(i+1)))]: + dtlb_pte_q[0].ppn [(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(i))-1):(riscv::PPNW - (riscv::PLEN - PPNWMin-1)-((VPN_LEN/PT_LEVELS)*(i+1)))] ): // + lsu_vaddr_q[0][PPNWMin-((VPN_LEN/PT_LEVELS)*(i)):PPNWMin-((VPN_LEN/PT_LEVELS)*(i+1))+1]; + + assign lsu_dtlb_ppn_o[PPNWMin-((VPN_LEN/PT_LEVELS)*(i)):PPNWMin-((VPN_LEN/PT_LEVELS)*(i+1))+1] = // + (|en_ld_st_translation_i[HYP_EXT:0] && !misaligned_ex_q.valid && (|dtlb_is_page_q[i:0]==0)) ? // + (en_ld_st_translation_i[HYP_EXT] ? dtlb_content[HYP_EXT].ppn[PPNWMin-((VPN_LEN/PT_LEVELS)*(i)):PPNWMin-((VPN_LEN/PT_LEVELS)*(i+1))+1]: + dtlb_content[0].ppn[PPNWMin-((VPN_LEN/PT_LEVELS)*(i)):PPNWMin-((VPN_LEN/PT_LEVELS)*(i+1))+1] ): // + (|en_ld_st_translation_i[HYP_EXT:0] && !misaligned_ex_q.valid && (|dtlb_is_page_q[i:0]!=0)? + lsu_vaddr_n[0][PPNWMin-((VPN_LEN/PT_LEVELS)*(i)):PPNWMin-((VPN_LEN/PT_LEVELS)*(i+1))+1]:// + (VPN_LEN/PT_LEVELS)'(lsu_vaddr_n[0][((riscv::PLEN > riscv::VLEN) ? riscv::VLEN -1 : (24 + (VPN_LEN/PT_LEVELS)*(PT_LEVELS-i-1) ) -1): (riscv::PLEN > riscv::VLEN) ? 24 :24 + (VPN_LEN/PT_LEVELS)*(PT_LEVELS-i-2)])); + end + if (riscv::IS_XLEN64) begin : gen_ppn_64 + assign lsu_dtlb_ppn_o[riscv::PPNW-1:PPNWMin+1] = (|en_ld_st_translation_i[HYP_EXT:0] && !misaligned_ex_q.valid) ? + (en_ld_st_translation_i[HYP_EXT] ? dtlb_content[HYP_EXT].ppn[riscv::PPNW-1:PPNWMin+1]: + dtlb_content[0].ppn[riscv::PPNW-1:PPNWMin+1] ): + lsu_vaddr_n[0][riscv::PLEN-1:PPNWMin+1] ; + end + +endgenerate + +// The data interface is simpler and only consists of a request/response interface +always_comb begin : data_interface + // save request and DTLB response + lsu_vaddr_n[0] = lsu_vaddr_i; + lsu_tinst_n = lsu_tinst_i; + + lsu_req_n = lsu_req_i; + hs_ld_st_inst_n = hs_ld_st_inst_i; + misaligned_ex_n = misaligned_ex_i; + dtlb_pte_n = dtlb_content; + dtlb_hit_n = dtlb_lu_hit; + lsu_is_store_n = lsu_is_store_i; + dtlb_is_page_n = dtlb_is_page; + + if (HYP_EXT == 1) begin + lsu_vaddr_n[HYP_EXT] = dtlb_gpaddr; + end + + lsu_valid_o = lsu_req_q; + lsu_exception_o = misaligned_ex_q; + csr_hs_ld_st_inst_o = hs_ld_st_inst_i || hs_ld_st_inst_q; + pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ; + + // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions + misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i; + + // Check if the User flag is set, then we may only access it in supervisor mode + // if SUM is enabled + daccess_err[0] = (en_ld_st_translation_i[0] || HYP_EXT==0)&& + ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && (en_ld_st_translation_i[HYP_EXT*2] ? !sum_i[HYP_EXT] : !sum_i[0] ) && dtlb_pte_q[0].u) || // SUM is not set and we are trying to access a user page in supervisor mode + (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q[0].u)); + + if (HYP_EXT == 1) daccess_err[HYP_EXT] = en_ld_st_translation_i[HYP_EXT] && !dtlb_pte_q[1].u; + // translation is enabled and no misaligned exception occurred + if ((|en_ld_st_translation_i[HYP_EXT:0]) && !misaligned_ex_q.valid) begin + lsu_valid_o = 1'b0; + + // --------- + // DTLB Hit + // -------- + if (dtlb_hit_q && lsu_req_q) begin + lsu_valid_o = 1'b1; + // exception priority: + // PAGE_FAULTS have higher priority than ACCESS_FAULTS + // virtual memory based exceptions are PAGE_FAULTS + // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP) + + // this is a store + if (lsu_is_store_q) begin + // check if the page is write-able and we are not violating privileges + // also check if the dirty flag is set + if(HYP_EXT==1 && en_ld_st_translation_i[HYP_EXT] && (!dtlb_pte_q[HYP_EXT].w || daccess_err[HYP_EXT] || !dtlb_pte_q[HYP_EXT].d)) begin + lsu_exception_o = { + riscv::STORE_GUEST_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, lsu_vaddr_q[0]}, + lsu_vaddr_q[1][riscv::GPLEN-1:0], + {riscv::XLEN{1'b0}}, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else if ((en_ld_st_translation_i[0] || HYP_EXT==0) && (!dtlb_pte_q[0].w || daccess_err[0] || !dtlb_pte_q[0].d)) begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, lsu_vaddr_q[0]}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, lsu_vaddr_q[0]}, + 1'b1 + }; + end + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, + {riscv::XLEN'(lsu_paddr_o)}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, + riscv::XLEN'(lsu_paddr_o[riscv::PLEN-1:(riscv::PLEN > riscv::VLEN) ? (riscv::PLEN - riscv::VLEN) : 0]), + 1'b1 + }; + end + end + + // this is a load + end else begin + if (HYP_EXT == 1 && daccess_err[HYP_EXT]) begin + lsu_exception_o = { + riscv::LOAD_GUEST_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, lsu_vaddr_q[0]}, + lsu_vaddr_q[1][riscv::GPLEN-1:0], + {riscv::XLEN{1'b0}}, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + // check for sufficient access privileges - throw a page fault if necessary + end else if (daccess_err[0]) begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, lsu_vaddr_q[0]}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, lsu_vaddr_q[0]}, + 1'b1 + }; + end + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, lsu_vaddr_q[0]}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + lsu_paddr_o[riscv::PLEN-1:(riscv::PLEN>riscv::VLEN)?(riscv::PLEN-riscv::VLEN) : 0], + 1'b1 + }; + end + end + end + end else + + // --------- + // DTLB Miss + // --------- + // watch out for exceptions + if (ptw_active && !walking_instr) begin + // page table walker threw an exception + if (ptw_error[0]) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + if (lsu_is_store_q) begin + if (HYP_EXT == 1 && ptw_error[HYP_EXT]) begin + lsu_exception_o = { + riscv::STORE_GUEST_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + ptw_bad_paddr[HYP_EXT][riscv::GPLEN-1:0], + (ptw_error[HYP_EXT*2] ? (riscv::IS_XLEN64 ? riscv::READ_64_PSEUDOINSTRUCTION : riscv::READ_32_PSEUDOINSTRUCTION) : {riscv::XLEN{1'b0}}), + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end + end + end else begin + if (HYP_EXT == 1 && ptw_error[HYP_EXT]) begin + lsu_exception_o = { + riscv::LOAD_GUEST_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + ptw_bad_paddr[HYP_EXT][riscv::GPLEN-1:0], + (ptw_error[HYP_EXT*2] ? (riscv::IS_XLEN64 ? riscv::READ_64_PSEUDOINSTRUCTION : riscv::READ_32_PSEUDOINSTRUCTION) : {riscv::XLEN{1'b0}}), + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end + end + end + end + + if (ptw_access_exception) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + ptw_bad_paddr[0][riscv::PLEN-1:(riscv::PLEN > riscv::VLEN) ? (riscv::PLEN - riscv::VLEN) : 0], + 1'b1 + }; + end + end + end + end // If translation is not enabled, check the paddr immediately against PMPs + else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin + if (lsu_is_store_q) begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, + lsu_paddr_o[riscv::PLEN-1:(riscv::PLEN>riscv::VLEN)?(riscv::PLEN-riscv::VLEN) : 0], + 1'b1 + }; + end else begin + if (HYP_EXT == 1) begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[0][riscv::VLEN-1]}}, update_vaddr}, + {riscv::GPLEN{1'b0}}, + lsu_tinst_q, + en_ld_st_translation_i[HYP_EXT*2], + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + lsu_paddr_o[riscv::PLEN-1:(riscv::PLEN>riscv::VLEN)?(riscv::PLEN-riscv::VLEN) : 0], + 1'b1 + }; + end + end + end +end + +// Load/store PMP check +pmp #( + .CVA6Cfg (CVA6Cfg), // COMMENT IN HYPERVISOR EXTENSION + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + // .NR_ENTRIES ( ArianeCfg.NrPMPEntries ) CONFIGURATION USED IN HYPERVISOR EXTENSION +) i_pmp_data ( + .addr_i (lsu_paddr_o), + .priv_lvl_i (ld_st_priv_lvl_i), + .access_type_i(pmp_access_type), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_data_allow) +); + +// ---------- +// Registers +// ---------- +always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + lsu_vaddr_q <= '0; + lsu_tinst_q <= '0; + hs_ld_st_inst_q <= '0; + lsu_req_q <= '0; + misaligned_ex_q <= '0; + dtlb_pte_q <= '0; + dtlb_hit_q <= '0; + lsu_is_store_q <= '0; + dtlb_is_page_q <= '0; + end else begin + lsu_vaddr_q <= lsu_vaddr_n; + lsu_tinst_q <= lsu_tinst_n; + hs_ld_st_inst_q <= hs_ld_st_inst_n; + lsu_req_q <= lsu_req_n; + misaligned_ex_q <= misaligned_ex_n; + dtlb_pte_q <= dtlb_pte_n; + dtlb_hit_q <= dtlb_hit_n; + lsu_is_store_q <= lsu_is_store_n; + dtlb_is_page_q <= dtlb_is_page_n; + end +end +endmodule \ No newline at end of file diff --git a/core/mmu_unify/cva6_ptw.sv b/core/mmu_unify/cva6_ptw.sv new file mode 100644 index 0000000000..f6e278bf24 --- /dev/null +++ b/core/mmu_unify/cva6_ptw.sv @@ -0,0 +1,603 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Angela Gonzalez, PlanV Technology +// Date: 02/02/2024 +// Description: Hardware-PTW (Page-Table-Walker) for CVA6 supporting sv32, sv39 and sv39x4. +// This module is an merge of the PTW Sv39 developed by Florian Zaruba, +// the PTW Sv32 developed by Sebastien Jacq and the PTW Sv39x4 by Bruno Sá. + +/* verilator lint_off WIDTH */ + +module cva6_ptw + import ariane_pkg::*; +#( + parameter type pte_cva6_t = logic, + parameter type tlb_update_cva6_t = logic, + parameter int unsigned HYP_EXT = 0, + parameter int unsigned ASID_WIDTH[HYP_EXT:0] = {1}, + parameter int unsigned VPN_LEN = 1, + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned PT_LEVELS = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush everything, we need to do this because + // actually everything we do is speculative at this stage + // e.g.: there could be a CSR instruction that changes everything + output logic ptw_active_o, + output logic walking_instr_o, // set when walking for TLB + output logic [HYP_EXT*2:0] ptw_error_o, // set when an error occurred + output logic ptw_access_exception_o, // set when an PMP access exception occured + input logic [HYP_EXT*2:0] enable_translation_i, //[v_i,enable_g_translation,enable_translation] + input logic [HYP_EXT*2:0] en_ld_st_translation_i, // enable virtual memory translation for load/stores + input logic hlvx_inst_i, // is a HLVX load/store instruction + + input logic lsu_is_store_i, // this translation was triggered by a store + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + + + // to TLBs, update logic + output tlb_update_cva6_t shared_tlb_update_o, + + output logic [riscv::VLEN-1:0] update_vaddr_o, + + input logic [ASID_WIDTH[0]-1:0] asid_i[HYP_EXT*2:0], //[vmid,vs_asid,asid] + + // from TLBs + // did we miss? + input logic shared_tlb_access_i, + input logic shared_tlb_hit_i, + input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i, + + input logic itlb_req_i, + + // from CSR file + input logic [riscv::PPNW-1:0] satp_ppn_i[HYP_EXT*2:0], //[hgatp,vsatp,satp] + input logic [ HYP_EXT:0] mxr_i, + + // Performance counters + output logic shared_tlb_miss_o, + + // PMP + + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + output logic [HYP_EXT:0][riscv::PLEN-1:0] bad_paddr_o + +); + + // input registers + logic data_rvalid_q; + riscv::xlen_t data_rdata_q; + + pte_cva6_t [HYP_EXT*2:0] pte; //[gpte_d,gpte_q,pte] + // register to perform context switch between stages + // pte_cva6_t gpte_q, gpte_d; + assign pte[0] = pte_cva6_t'(data_rdata_q[riscv::PPNW+9:0]); + + enum logic [2:0] { + IDLE, + WAIT_GRANT, + PTE_LOOKUP, + WAIT_RVALID, + PROPAGATE_ERROR, + PROPAGATE_ACCESS_ERROR, + LATENCY + } + state_q, state_d; + + logic [PT_LEVELS-1:0] misaligned_page; + logic [HYP_EXT:0][PT_LEVELS-2:0] ptw_lvl_n, ptw_lvl_q; + + // define 3 PTW stages to be used in sv39x4. sv32 and sv39 are always in S_STAGE + // S_STAGE -> S/VS-stage normal translation controlled by the satp/vsatp CSRs + // G_INTERMED_STAGE -> Converts the S/VS-stage non-leaf GPA pointers to HPA (controlled by hgatp) + // G_FINAL_STAGE -> Converts the S/VS-stage final GPA to HPA (controlled by hgatp) + enum logic [1:0] { + S_STAGE, + G_INTERMED_STAGE, + G_FINAL_STAGE + } + ptw_stage_q, ptw_stage_d; + + // is this an instruction page table walk? + logic is_instr_ptw_q, is_instr_ptw_n; + logic global_mapping_q, global_mapping_n; + // latched tag signal + logic tag_valid_n, tag_valid_q; + // register the ASIDs + logic [HYP_EXT:0][ASID_WIDTH[0]-1:0] tlb_update_asid_q, tlb_update_asid_n; + // register the VPN we need to walk, SV39 defines a 39 bit virtual address + logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + logic [HYP_EXT*2:0][PT_LEVELS-2:0][(VPN_LEN/PT_LEVELS)-1:0] vaddr_lvl; + // register the VPN we need to walk, SV39x4 defines a 41 bit virtual address for the G-Stage + logic [riscv::GPLEN-1:0] gpaddr_q, gpaddr_n, gpaddr_base; + logic [PT_LEVELS-2:0][riscv::GPLEN-1:0] gpaddr; + // 4 byte aligned physical pointer + logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + logic [riscv::PLEN-1:0] gptw_pptr_q, gptw_pptr_n; + + // Assignments + assign update_vaddr_o = vaddr_q; + + assign ptw_active_o = (state_q != IDLE); + assign walking_instr_o = is_instr_ptw_q; + // directly output the correct physical address + assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + // we are never going to kill this request + assign req_port_o.kill_req = '0; + // we are never going to write with the HPTW + assign req_port_o.data_wdata = '0; + // we only issue one single request at a time + assign req_port_o.data_id = '0; + + // ----------- + // TLB Update + // ----------- + + assign gpaddr_base = {pte[0].ppn[riscv::GPPNW-1:0], vaddr_q[11:0]}; + + genvar z, w; + generate + for (z = 0; z < PT_LEVELS - 1; z++) begin + + // check if the ppn is correctly aligned: + // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault + // exception. + assign misaligned_page[z] = (ptw_lvl_q[0] == (z)) && (pte[0].ppn[(VPN_LEN/PT_LEVELS)*(PT_LEVELS-1-z)-1:0] != '0); + + //record the vaddr corresponding to each level + for (w = 0; w < HYP_EXT * 2 + 1; w++) begin + assign vaddr_lvl[w][z] = w==0 ? vaddr_q[12+((VPN_LEN/PT_LEVELS)*(PT_LEVELS-z-1))-1:12+((VPN_LEN/PT_LEVELS)*(PT_LEVELS-z-2))] : + w==1 ? gptw_pptr_q[12+((VPN_LEN/PT_LEVELS)*(PT_LEVELS-z-1))-1:12+((VPN_LEN/PT_LEVELS)*(PT_LEVELS-z-2))]: + gpaddr_q[12+((VPN_LEN/PT_LEVELS)*(PT_LEVELS-z-1))-1:12+((VPN_LEN/PT_LEVELS)*(PT_LEVELS-z-2))]; + end + + assign gpaddr[z][VPN_LEN-(VPN_LEN/PT_LEVELS):0]= (ptw_lvl_q[0] == z) ? vaddr_q[VPN_LEN-(VPN_LEN/PT_LEVELS):0] : gpaddr_base[VPN_LEN-(VPN_LEN/PT_LEVELS):0]; + assign gpaddr[z][VPN_LEN:VPN_LEN-(VPN_LEN/PT_LEVELS)+1]= (ptw_lvl_q[0] == 0) ? vaddr_q[VPN_LEN:VPN_LEN-(VPN_LEN/PT_LEVELS)+1] : gpaddr_base[VPN_LEN:VPN_LEN-(VPN_LEN/PT_LEVELS)+1]; + assign gpaddr[z][riscv::GPLEN-1:VPN_LEN+1] = gpaddr_base[riscv::GPLEN-1:VPN_LEN+1]; + + + end + endgenerate + + always_comb begin : tlb_update + // update the correct page table level + for (int unsigned y = 0; y < HYP_EXT + 1; y++) begin + for (int unsigned x = 0; x < PT_LEVELS - 1; x++) begin + if((&enable_translation_i[HYP_EXT:0] || &en_ld_st_translation_i[HYP_EXT:0])&& HYP_EXT==1) begin + shared_tlb_update_o.is_page[x][y] = (ptw_lvl_q[y==HYP_EXT?0 : 1] == x); + end else if (enable_translation_i[0] || en_ld_st_translation_i[0] || HYP_EXT == 0) begin + shared_tlb_update_o.is_page[x][y] = y == 0 ? (ptw_lvl_q[0] == x) : 1'b0; + end else begin + shared_tlb_update_o.is_page[x][y] = y != 0 ? (ptw_lvl_q[0] == x) : 1'b0; + end + end + + // set the global mapping bit + if ((enable_translation_i[HYP_EXT] || en_ld_st_translation_i[HYP_EXT]) && HYP_EXT == 1) begin + shared_tlb_update_o.content[y] = y == 0 ? pte[HYP_EXT] | (global_mapping_q << 5) : pte[0]; + end else begin + shared_tlb_update_o.content[y] = y == 0 ? (pte[0] | (global_mapping_q << 5)) : '0; + end + end + // output the correct ASIDs + shared_tlb_update_o.asid = tlb_update_asid_q; + + bad_paddr_o[0] = ptw_access_exception_o ? ptw_pptr_q : 'b0; + if (HYP_EXT == 1) + bad_paddr_o[HYP_EXT][riscv::GPLEN:0] = ptw_error_o[HYP_EXT] ? ((ptw_stage_q == G_INTERMED_STAGE) ? gptw_pptr_q[riscv::GPLEN:0] : gpaddr_q) : 'b0; + end + + assign req_port_o.tag_valid = tag_valid_q; + + logic allow_access; + + + + pmp #( + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_ptw ( + .addr_i (ptw_pptr_q), + // PTW access are always checked as if in S-Mode... + .priv_lvl_i (riscv::PRIV_LVL_S), + // ...and they are always loads + .access_type_i(riscv::ACCESS_READ), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (allow_access) + ); + + + assign req_port_o.data_be = riscv::XLEN == 32 ? be_gen_32( + req_port_o.address_index[1:0], req_port_o.data_size + ) : be_gen( + req_port_o.address_index[2:0], req_port_o.data_size + ); + + assign shared_tlb_update_o.vpn = VPN_LEN'(vaddr_q[riscv::SV+HYP_EXT*2-1:12]); + + //------------------- + // Page table walker + //------------------- + // A virtual address va is translated into a physical address pa as follows: + // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + // PAGESIZE=2^12 and LEVELS=3.) + // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For + // Sv32, PTESIZE=4.) + // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, or if any bits or encodings + // that are reserved for future standard use are set within pte, stop and raise + // a page-fault exception corresponding to the original access type. + // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5. + // Otherwise, this PTE is a pointer to the next level of the page table. + // Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let + // a = pte.ppn × PAGESIZE and go to step 2. + // 5. A leaf PTE has been found. Determine if the requested memory access + // is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and + // raise an access exception. Otherwise, the translation is successful. + // Set pte.a to 1, and, if the memory access is a store, set pte.d to 1. + // The translated physical address is given as follows: + // - pa.pgoff = va.pgoff. + // - If i > 0, then this is a superpage translation and + // pa.ppn[i-1:0] = va.vpn[i-1:0]. + // - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + always_comb begin : ptw + automatic logic [riscv::PLEN-1:0] pptr; + // automatic logic [riscv::GPLEN-1:0] gpaddr; + // default assignments + // PTW memory interface + tag_valid_n = 1'b0; + req_port_o.data_req = 1'b0; + req_port_o.data_size = 2'(PT_LEVELS); + req_port_o.data_we = 1'b0; + ptw_error_o = '0; + ptw_access_exception_o = 1'b0; + shared_tlb_update_o.valid = 1'b0; + is_instr_ptw_n = is_instr_ptw_q; + ptw_lvl_n = ptw_lvl_q; + ptw_pptr_n = ptw_pptr_q; + gptw_pptr_n = gptw_pptr_q; + state_d = state_q; + ptw_stage_d = ptw_stage_q; + global_mapping_n = global_mapping_q; + // input registers + tlb_update_asid_n = tlb_update_asid_q; + vaddr_n = vaddr_q; + gpaddr_n = gpaddr_q; + pptr = ptw_pptr_q; + // gpaddr = gpaddr_q; + + shared_tlb_miss_o = 1'b0; + + if (HYP_EXT == 1) pte[HYP_EXT*2] = pte[HYP_EXT]; + + case (state_q) + + IDLE: begin + // by default we start with the top-most page table + ptw_lvl_n = '0; + global_mapping_n = 1'b0; + is_instr_ptw_n = 1'b0; + gpaddr_n = '0; + + if (HYP_EXT == 1) pte[HYP_EXT*2] = '0; + // if we got an ITLB miss + if (((|enable_translation_i[HYP_EXT:0]) || |en_ld_st_translation_i[HYP_EXT:0]) && shared_tlb_access_i && ~shared_tlb_hit_i) begin + if ((&enable_translation_i[HYP_EXT:0] || &en_ld_st_translation_i[HYP_EXT:0]) && HYP_EXT==1) begin + ptw_stage_d = G_INTERMED_STAGE; + pptr = { + satp_ppn_i[HYP_EXT], + shared_tlb_vaddr_i[riscv::SV-1:riscv::SV-(VPN_LEN/PT_LEVELS)], + (PT_LEVELS)'(0) + }; + gptw_pptr_n = pptr; + ptw_pptr_n = { + satp_ppn_i[HYP_EXT*2][riscv::PPNW-1:2], + pptr[riscv::SV+HYP_EXT*2-1:riscv::SV-(VPN_LEN/PT_LEVELS)], + (PT_LEVELS)'(0) + }; + end else if (((|enable_translation_i[HYP_EXT:0] && !enable_translation_i[0]) || (|en_ld_st_translation_i[HYP_EXT:0] && !en_ld_st_translation_i[0])) && HYP_EXT==1) begin + ptw_stage_d = G_FINAL_STAGE; + gpaddr_n = shared_tlb_vaddr_i[riscv::SV+HYP_EXT*2-1:0]; + ptw_pptr_n = { + satp_ppn_i[HYP_EXT*2][riscv::PPNW-1:2], + shared_tlb_vaddr_i[riscv::SV+HYP_EXT*2-1:riscv::SV-(VPN_LEN/PT_LEVELS)], + (PT_LEVELS)'(0) + }; + end else begin + ptw_stage_d = S_STAGE; + if((enable_translation_i[HYP_EXT*2] || en_ld_st_translation_i[HYP_EXT*2]) && HYP_EXT==1) + ptw_pptr_n = { + satp_ppn_i[HYP_EXT], + shared_tlb_vaddr_i[riscv::SV-1:riscv::SV-(VPN_LEN/PT_LEVELS)], + (PT_LEVELS)'(0) + }; + else + ptw_pptr_n = { + satp_ppn_i[0], + shared_tlb_vaddr_i[riscv::SV-1:riscv::SV-(VPN_LEN/PT_LEVELS)], + (PT_LEVELS)'(0) + }; + end + + is_instr_ptw_n = itlb_req_i; + vaddr_n = shared_tlb_vaddr_i; + state_d = WAIT_GRANT; + shared_tlb_miss_o = 1'b1; + + for (int unsigned b = 0; b < HYP_EXT + 1; b++) begin + tlb_update_asid_n[b] = b==0 ? ((enable_translation_i[2*HYP_EXT] || en_ld_st_translation_i[2*HYP_EXT]) ? asid_i[HYP_EXT] : asid_i[0]) : asid_i[HYP_EXT*2]; + end + end + end + + WAIT_GRANT: begin + // send a request out + req_port_o.data_req = 1'b1; + // wait for the WAIT_GRANT + if (req_port_i.data_gnt) begin + // send the tag valid signal one cycle later + tag_valid_n = 1'b1; + state_d = PTE_LOOKUP; + end + end + + PTE_LOOKUP: begin + // we wait for the valid signal + if (data_rvalid_q) begin + + // check if the global mapping bit is set + if (pte[0].g && ptw_stage_q == S_STAGE) global_mapping_n = 1'b1; + + // ------------- + // Invalid PTE + // ------------- + // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. + if (!pte[0].v || (!pte[0].r && pte[0].w)) // || (|pte.reserved)) + state_d = PROPAGATE_ERROR; + // ----------- + // Valid PTE + // ----------- + else begin + state_d = LATENCY; + // it is a valid PTE + // if pte.r = 1 or pte.x = 1 it is a valid PTE + if (pte[0].r || pte[0].x) begin + case (ptw_stage_q) + S_STAGE: begin + if (HYP_EXT==1 && ((is_instr_ptw_q && enable_translation_i[HYP_EXT]) || (!is_instr_ptw_q && en_ld_st_translation_i[HYP_EXT]))) begin + state_d = WAIT_GRANT; + ptw_stage_d = G_FINAL_STAGE; + if (HYP_EXT == 1) pte[HYP_EXT*2] = pte[0]; + ptw_lvl_n[HYP_EXT] = ptw_lvl_q[0]; + gpaddr_n = gpaddr[ptw_lvl_q[0]]; + ptw_pptr_n = { + satp_ppn_i[HYP_EXT*2][riscv::PPNW-1:2], + gpaddr[ptw_lvl_q[0]][riscv::SV+HYP_EXT*2-1:riscv::SV-(VPN_LEN/PT_LEVELS)], + (PT_LEVELS)'(0) + }; + ptw_lvl_n[0] = 0; + end + end + G_INTERMED_STAGE: begin + state_d = WAIT_GRANT; + ptw_stage_d = S_STAGE; + ptw_lvl_n[0] = ptw_lvl_q[HYP_EXT]; + pptr = {pte[0].ppn[riscv::GPPNW-1:0], gptw_pptr_q[11:0]}; + if (ptw_lvl_q[0] == 1) pptr[20:0] = gptw_pptr_q[20:0]; + if (ptw_lvl_q[0] == 0) pptr[29:0] = gptw_pptr_q[29:0]; + ptw_pptr_n = pptr; + end + default: ; + endcase + // Valid translation found (either 1G, 2M or 4K entry) + if (is_instr_ptw_q) begin + // ------------ + // Update ITLB + // ------------ + // If page is not executable, we can directly raise an error. This + // doesn't put a useless entry into the TLB. The same idea applies + // to the access flag since we let the access flag be managed by SW. + if (!pte[0].x || !pte[0].a) begin + state_d = PROPAGATE_ERROR; + ptw_stage_d = ptw_stage_q; + end else if((ptw_stage_q == G_FINAL_STAGE) || !enable_translation_i[HYP_EXT] || HYP_EXT==0) + shared_tlb_update_o.valid = 1'b1; + + end else begin + // ------------ + // Update DTLB + // ------------ + // Check if the access flag has been set, otherwise throw a page-fault + // and let the software handle those bits. + // If page is not readable (there are no write-only pages) + // we can directly raise an error. This doesn't put a useless + // entry into the TLB. + if (pte[0].a && ((pte[0].r && !hlvx_inst_i) || (pte[0].x && (mxr_i[0] || hlvx_inst_i || (ptw_stage_q == S_STAGE && mxr_i[HYP_EXT] && en_ld_st_translation_i[HYP_EXT*2] && HYP_EXT==1))))) begin + if((ptw_stage_q == G_FINAL_STAGE) || !en_ld_st_translation_i[HYP_EXT] || HYP_EXT==0) + shared_tlb_update_o.valid = 1'b1; + end else begin + state_d = PROPAGATE_ERROR; + ptw_stage_d = ptw_stage_q; + end + // Request is a store: perform some additional checks + // If the request was a store and the page is not write-able, raise an error + // the same applies if the dirty flag is not set + if (lsu_is_store_i && (!pte[0].w || !pte[0].d)) begin + shared_tlb_update_o.valid = 1'b0; + state_d = PROPAGATE_ERROR; + ptw_stage_d = ptw_stage_q; + end + end + + //if there is a misaligned page, propagate error + if (|misaligned_page) begin + state_d = PROPAGATE_ERROR; + ptw_stage_d = ptw_stage_q; + shared_tlb_update_o.valid = 1'b0; + end + + // check if 63:41 are all zeros + if (HYP_EXT==1 && ((enable_translation_i[HYP_EXT*2] && is_instr_ptw_q) || (en_ld_st_translation_i[HYP_EXT*2] && !is_instr_ptw_q)) && ptw_stage_q == S_STAGE && !((|pte[0].ppn[riscv::PPNW-HYP_EXT:riscv::GPPNW]) == 1'b0)) begin + state_d = PROPAGATE_ERROR; + ptw_stage_d = G_FINAL_STAGE; + end + // this is a pointer to the next TLB level + end else begin + // pointer to next level of page table + + if (ptw_lvl_q[0] == PT_LEVELS - 1) begin + // Should already be the last level page table => Error + ptw_lvl_n[0] = PT_LEVELS - 1; + state_d = PROPAGATE_ERROR; + ptw_stage_d = ptw_stage_q; + + + end else begin + ptw_lvl_n[0] = ptw_lvl_q[0] + 1; + state_d = WAIT_GRANT; + + case (ptw_stage_q) + S_STAGE: begin + if (HYP_EXT==1 && ((is_instr_ptw_q && enable_translation_i[HYP_EXT]) || (!is_instr_ptw_q && en_ld_st_translation_i[HYP_EXT]))) begin + ptw_stage_d = G_INTERMED_STAGE; + if (HYP_EXT == 1) pte[HYP_EXT*2] = pte[0]; + ptw_lvl_n[HYP_EXT] = ptw_lvl_q[0] + 1; + pptr = {pte[0].ppn, vaddr_lvl[0][ptw_lvl_q[0]], (PT_LEVELS)'(0)}; + gptw_pptr_n = pptr; + ptw_pptr_n = { + satp_ppn_i[HYP_EXT*2][riscv::PPNW-1:2], + pptr[riscv::SV+HYP_EXT*2-1:riscv::SV-(VPN_LEN/PT_LEVELS)], + (PT_LEVELS)'(0) + }; + ptw_lvl_n[0] = 0; + end else begin + ptw_pptr_n = {pte[0].ppn, vaddr_lvl[0][ptw_lvl_q[0]], (PT_LEVELS)'(0)}; + end + end + G_INTERMED_STAGE: begin + ptw_pptr_n = {pte[0].ppn, vaddr_lvl[HYP_EXT][ptw_lvl_q[0]], (PT_LEVELS)'(0)}; + end + G_FINAL_STAGE: begin + ptw_pptr_n = {pte[0].ppn, vaddr_lvl[HYP_EXT*2][ptw_lvl_q[0]], (PT_LEVELS)'(0)}; + end + endcase + + if (HYP_EXT == 1 && (pte[0].a || pte[0].d || pte[0].u)) begin + state_d = PROPAGATE_ERROR; + ptw_stage_d = ptw_stage_q; + end + + end + + // check if 63:41 are all zeros + if (HYP_EXT==1 && (((enable_translation_i[HYP_EXT*2] && is_instr_ptw_q) || (en_ld_st_translation_i[HYP_EXT*2] && !is_instr_ptw_q)) && ptw_stage_q == S_STAGE && !((|pte[0].ppn[riscv::PPNW-1:riscv::GPPNW]) == 1'b0))) begin + state_d = PROPAGATE_ERROR; + ptw_stage_d = ptw_stage_q; + end + end + end + + // Check if this access was actually allowed from a PMP perspective + if (!allow_access) begin + shared_tlb_update_o.valid = 1'b0; + // we have to return the failed address in bad_addr + ptw_pptr_n = ptw_pptr_q; + ptw_stage_d = ptw_stage_q; + state_d = PROPAGATE_ACCESS_ERROR; + end + end + // we've got a data WAIT_GRANT so tell the cache that the tag is valid + end + // Propagate error to MMU/LSU + PROPAGATE_ERROR: begin + state_d = LATENCY; + ptw_error_o[0] = 1'b1; + if (HYP_EXT == 1) begin + ptw_error_o[HYP_EXT] = (ptw_stage_q != S_STAGE) ? 1'b1 : 1'b0; + ptw_error_o[HYP_EXT*2] = (ptw_stage_q == G_INTERMED_STAGE) ? 1'b1 : 1'b0; + end + end + PROPAGATE_ACCESS_ERROR: begin + state_d = LATENCY; + ptw_access_exception_o = 1'b1; + end + // wait for the rvalid before going back to IDLE + WAIT_RVALID: begin + if (data_rvalid_q) state_d = IDLE; + end + LATENCY: begin + state_d = IDLE; + end + default: begin + state_d = IDLE; + end + endcase + + // ------- + // Flush + // ------- + // should we have flushed before we got an rvalid, wait for it until going back to IDLE + if (flush_i) begin + // on a flush check whether we are + // 1. in the PTE Lookup check whether we still need to wait for an rvalid + // 2. waiting for a grant, if so: wait for it + // if not, go back to idle + if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) || ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) + state_d = WAIT_RVALID; + else state_d = LATENCY; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + ptw_stage_q <= S_STAGE; + is_instr_ptw_q <= 1'b0; + ptw_lvl_q <= '0; + tag_valid_q <= 1'b0; + tlb_update_asid_q <= '0; + vaddr_q <= '0; + gpaddr_q <= '0; + ptw_pptr_q <= '0; + gptw_pptr_q <= '0; + global_mapping_q <= 1'b0; + data_rdata_q <= '0; + data_rvalid_q <= 1'b0; + if (HYP_EXT == 1) pte[HYP_EXT] <= '0; + end else begin + state_q <= state_d; + ptw_stage_q <= ptw_stage_d; + ptw_pptr_q <= ptw_pptr_n; + gptw_pptr_q <= gptw_pptr_n; + is_instr_ptw_q <= is_instr_ptw_n; + ptw_lvl_q <= ptw_lvl_n; + tag_valid_q <= tag_valid_n; + tlb_update_asid_q <= tlb_update_asid_n; + vaddr_q <= vaddr_n; + gpaddr_q <= gpaddr_n; + global_mapping_q <= global_mapping_n; + data_rdata_q <= req_port_i.data_rdata; + data_rvalid_q <= req_port_i.data_rvalid; + + if (HYP_EXT == 1) pte[HYP_EXT] <= pte[HYP_EXT*2]; + end + end + +endmodule +/* verilator lint_on WIDTH */ diff --git a/core/mmu_unify/cva6_shared_tlb.sv b/core/mmu_unify/cva6_shared_tlb.sv new file mode 100644 index 0000000000..7f24856ede --- /dev/null +++ b/core/mmu_unify/cva6_shared_tlb.sv @@ -0,0 +1,467 @@ +// Copyright (c) 2023 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Angela Gonzalez PlanV Technology +// Date: 24/11/2023 +// +// Description: N-way associative shared TLB, it allows to reduce the number +// of ITLB and DTLB entries. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2024-02-05 0.2 A.Gonzalez Generic shared TLB for CVA6 with Hypervisor support +// =========================================================================== // + +/* verilator lint_off WIDTH */ + +module cva6_shared_tlb #( + parameter type pte_cva6_t = logic, + parameter type tlb_update_cva6_t = logic, + parameter int SHARED_TLB_DEPTH = 64, + parameter int SHARED_TLB_WAYS = 2, + parameter int unsigned HYP_EXT = 0, + parameter int unsigned ASID_WIDTH[HYP_EXT:0] = {1}, //[vmid_width,asid_width] + parameter int unsigned VPN_LEN = 1, + parameter int unsigned PT_LEVELS = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic [HYP_EXT*2:0] flush_i, // Flush signal [g_stage,vs stage, normal translation signal] + input logic [1:0][HYP_EXT*2:0] v_st_enbl_i, // v_i,g-stage enabled, s-stage enabled + + input logic [ASID_WIDTH[0]-1:0] dtlb_asid_i[HYP_EXT:0], //[vmid,vs_asid,asid] + input logic [ASID_WIDTH[0]-1:0] itlb_asid_i[HYP_EXT:0], //[vmid,vs_asid,asid] + + // from TLBs + // did we miss? + input logic itlb_access_i, + input logic itlb_hit_i, + input logic [riscv::VLEN-1:0] itlb_vaddr_i, + + input logic dtlb_access_i, + input logic dtlb_hit_i, + input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + + // to TLBs, update logic + output tlb_update_cva6_t itlb_update_o, + output tlb_update_cva6_t dtlb_update_o, + + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + + output logic shared_tlb_access_o, + output logic shared_tlb_hit_o, + output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o, + + output logic itlb_req_o, + + // Update shared TLB in case of miss + input tlb_update_cva6_t shared_tlb_update_i + +); + + tlb_update_cva6_t shared_tlb_update_delayed, shared_tlb_update_delayed2; + logic shared_tlb_update_valid_delayed, shared_tlb_update_valid_delayed2; + + function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh(input logic [$clog2(SHARED_TLB_WAYS +)-1:0] in); + logic [SHARED_TLB_WAYS-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + typedef struct packed { + logic [HYP_EXT:0][ASID_WIDTH[0]-1:0] asid; + logic [PT_LEVELS+HYP_EXT-1:0][(VPN_LEN/PT_LEVELS)-1:0] vpn; + logic [PT_LEVELS-2:0][HYP_EXT:0] is_page; + logic [HYP_EXT*2:0] v_st_enbl; // v_i,g-stage enabled, s-stage enabled + } shared_tag_t; + + shared_tag_t shared_tag_wr; + shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd; + + logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d; + + logic [ SHARED_TLB_WAYS-1:0] shared_tag_valid; + + logic [ SHARED_TLB_WAYS-1:0] tag_wr_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr; + logic [ $bits(shared_tag_t)-1:0] tag_wr_data; + + logic [ SHARED_TLB_WAYS-1:0] tag_rd_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr; + logic [ $bits(shared_tag_t)-1:0] tag_rd_data [SHARED_TLB_WAYS-1:0]; + + logic [ SHARED_TLB_WAYS-1:0] tag_req; + logic [ SHARED_TLB_WAYS-1:0] tag_we; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr; + + logic [ SHARED_TLB_WAYS-1:0] pte_wr_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr; + logic [ $bits(pte_cva6_t)-1:0] pte_wr_data [ HYP_EXT:0]; + + logic [ SHARED_TLB_WAYS-1:0] pte_rd_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr; + logic [ $bits(pte_cva6_t)-1:0] pte_rd_data [SHARED_TLB_WAYS-1:0] [HYP_EXT:0]; + + logic [ SHARED_TLB_WAYS-1:0] pte_req; + logic [ SHARED_TLB_WAYS-1:0] pte_we; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr; + + logic [PT_LEVELS+HYP_EXT-1:0][(VPN_LEN/PT_LEVELS)-1:0] vpn_d, vpn_q; + logic [SHARED_TLB_WAYS-1:0][PT_LEVELS-1:0] vpn_match; + logic [SHARED_TLB_WAYS-1:0][PT_LEVELS-1:0] page_match; + logic [SHARED_TLB_WAYS-1:0][PT_LEVELS-1:0] level_match; + + logic [SHARED_TLB_WAYS-1:0][HYP_EXT:0] match_asid; + logic [SHARED_TLB_WAYS-1:0] match_stage; + + pte_cva6_t [SHARED_TLB_WAYS-1:0][HYP_EXT:0] pte; + + logic [riscv::VLEN-1-12:0] itlb_vpn_q; + logic [riscv::VLEN-1-12:0] dtlb_vpn_q; + + logic [ASID_WIDTH[0]-1:0] tlb_update_asid_q[HYP_EXT:0], tlb_update_asid_d[HYP_EXT:0]; + + logic shared_tlb_access_q, shared_tlb_access_d; + logic shared_tlb_hit_d; + logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d; + + logic itlb_req_d, itlb_req_q; + logic dtlb_req_d, dtlb_req_q; + + int i_req_d, i_req_q; + + // replacement strategy + logic [SHARED_TLB_WAYS-1:0] way_valid; + logic update_lfsr; // shift the LFSR + logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way; // first non-valid encountered + logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way; // random index for replacement + logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way; // way to replace + logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d; // way to replace (onehot) + logic all_ways_valid; // we need to switch repl strategy since all are valid + + assign shared_tlb_access_o = shared_tlb_access_q; + assign shared_tlb_hit_o = shared_tlb_hit_d; + assign shared_tlb_vaddr_o = shared_tlb_vaddr_q; + + assign itlb_req_o = itlb_req_q; + + genvar i, x; + generate + for (i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_match_tlb_ways + //identify page_match for all TLB Entries + + for (x = 0; x < PT_LEVELS; x++) begin : gen_match + assign page_match[i][x] = x==0 ? 1 :((HYP_EXT==0 || x==(PT_LEVELS-1)) ? // PAGE_MATCH CONTAINS THE MATCH INFORMATION FOR EACH TAG OF is_1G and is_2M in sv39x4. HIGHER LEVEL (Giga page), THEN THERE IS THE Mega page AND AT THE LOWER LEVEL IS ALWAYS 1 + &(shared_tag_rd[i].is_page[PT_LEVELS-1-x] | (~v_st_enbl_i[i_req_q][HYP_EXT:0])): + ((&v_st_enbl_i[i_req_q][HYP_EXT:0]) ? + ((shared_tag_rd[i].is_page[PT_LEVELS-1-x][0] && (shared_tag_rd[i].is_page[PT_LEVELS-2-x][HYP_EXT] || shared_tag_rd[i].is_page[PT_LEVELS-1-x][HYP_EXT])) + || (shared_tag_rd[i].is_page[PT_LEVELS-1-x][HYP_EXT] && (shared_tag_rd[i].is_page[PT_LEVELS-2-x][0] || shared_tag_rd[i].is_page[PT_LEVELS-1-x][0]))): + shared_tag_rd[i].is_page[PT_LEVELS-1-x][0] && v_st_enbl_i[i_req_q][0] || shared_tag_rd[i].is_page[PT_LEVELS-1-x][HYP_EXT] && v_st_enbl_i[i_req_q][HYP_EXT])); + + //identify if vpn matches at all PT levels for all TLB entries + assign vpn_match[i][x] = (HYP_EXT==1 && x==(PT_LEVELS-1) && ~v_st_enbl_i[i_req_q][0]) ? // + vpn_q[x] == shared_tag_rd[i].vpn[x] && vpn_q[x+1][(VPN_LEN%PT_LEVELS)-1:0] == shared_tag_rd[i].vpn[x+1][(VPN_LEN%PT_LEVELS)-1:0]: // + vpn_q[x] == shared_tag_rd[i].vpn[x]; + + //identify if there is a hit at each PT level for all TLB entries + assign level_match[i][x] = &vpn_match[i][PT_LEVELS-1:x] && page_match[i][x]; + + end + end + endgenerate + + genvar w; + generate + for (w = 0; w < PT_LEVELS; w++) begin + assign vpn_d[w] = ((|v_st_enbl_i[1][HYP_EXT:0]) && itlb_access_i && ~itlb_hit_i && ~dtlb_access_i) ? // + itlb_vaddr_i[12+((VPN_LEN/PT_LEVELS)*(w+1))-1:12+((VPN_LEN/PT_LEVELS)*w)] : // + (((|v_st_enbl_i[0][HYP_EXT:0]) && dtlb_access_i && ~dtlb_hit_i) ? // + dtlb_vaddr_i[12+((VPN_LEN/PT_LEVELS)*(w+1))-1:12+((VPN_LEN/PT_LEVELS)*w)] : vpn_q[w]); + end + endgenerate + + if (HYP_EXT == 1) //THIS UPDATES THE EXTRA BITS OF VPN IN SV39x4 + assign vpn_d[PT_LEVELS][(VPN_LEN%PT_LEVELS)-1:0] = ((|v_st_enbl_i[1][HYP_EXT:0]) && itlb_access_i && ~itlb_hit_i && ~dtlb_access_i) ? // + itlb_vaddr_i[VPN_LEN-1:VPN_LEN-(VPN_LEN%PT_LEVELS)] : // + (((|v_st_enbl_i[0][HYP_EXT:0]) && dtlb_access_i && ~dtlb_hit_i) ? // + dtlb_vaddr_i[VPN_LEN-1: VPN_LEN-(VPN_LEN%PT_LEVELS)] : vpn_q[PT_LEVELS][(VPN_LEN%PT_LEVELS)-1:0]); + + /////////////////////////////////////////////////////// + // tag comparison, hit generation + /////////////////////////////////////////////////////// + always_comb begin : itlb_dtlb_miss + itlb_miss_o = 1'b0; + dtlb_miss_o = 1'b0; + + tag_rd_en = '0; + pte_rd_en = '0; + + itlb_req_d = 1'b0; + dtlb_req_d = 1'b0; + + tlb_update_asid_d = tlb_update_asid_q; + + shared_tlb_access_d = '0; + shared_tlb_vaddr_d = shared_tlb_vaddr_q; + + tag_rd_addr = '0; + pte_rd_addr = '0; + i_req_d = i_req_q; + + // if we got an ITLB miss + if ((|v_st_enbl_i[1][HYP_EXT:0]) & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin + tag_rd_en = '1; + tag_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + pte_rd_en = '1; + pte_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + + itlb_miss_o = 1'b1; + itlb_req_d = 1'b1; + tlb_update_asid_d = itlb_asid_i; + + shared_tlb_access_d = '1; + shared_tlb_vaddr_d = itlb_vaddr_i; + i_req_d = 1; + + // we got an DTLB miss + end else if ((|v_st_enbl_i[0][HYP_EXT:0]) & dtlb_access_i & ~dtlb_hit_i) begin + tag_rd_en = '1; + tag_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + pte_rd_en = '1; + pte_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + + dtlb_miss_o = 1'b1; + dtlb_req_d = 1'b1; + tlb_update_asid_d = dtlb_asid_i; + + shared_tlb_access_d = '1; + shared_tlb_vaddr_d = dtlb_vaddr_i; + i_req_d = 0; + end + end //itlb_dtlb_miss + + always_comb begin : tag_comparison + shared_tlb_hit_d = 1'b0; + dtlb_update_o = '0; + itlb_update_o = '0; + + //number of ways + for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin + // first level match, this may be a giga page, check the ASID flags as well + // if the entry is associated to a global address, don't match the ASID (ASID is don't care) + match_asid[i][0] = (((tlb_update_asid_q[0][ASID_WIDTH[0]-1:0] == shared_tag_rd[i].asid[0][ASID_WIDTH[0]-1:0]) || pte[i][0].g) && v_st_enbl_i[i_req_q][0]) || !v_st_enbl_i[i_req_q][0]; + + if (HYP_EXT == 1) begin + match_asid[i][HYP_EXT] = (tlb_update_asid_q[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == shared_tag_rd[i].asid[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] && v_st_enbl_i[i_req_q][HYP_EXT]) || !v_st_enbl_i[i_req_q][HYP_EXT]; + end + + // check if translation is a: S-Stage and G-Stage, S-Stage only or G-Stage only translation and virtualization mode is on/off + match_stage[i] = shared_tag_rd[i].v_st_enbl == v_st_enbl_i[i_req_q]; + + if (shared_tag_valid[i] && &match_asid[i] && match_stage[i]) begin + if (|level_match[i]) begin + shared_tlb_hit_d = 1'b1; + if (itlb_req_q) begin + itlb_update_o.valid = 1'b1; + itlb_update_o.vpn = itlb_vpn_q; + itlb_update_o.is_page = shared_tag_rd[i].is_page; + itlb_update_o.content = pte[i]; + itlb_update_o.v_st_enbl = shared_tag_rd[i].v_st_enbl; + for (int unsigned a = 0; a < HYP_EXT + 1; a++) begin + itlb_update_o.asid[a] = tlb_update_asid_q[a]; + end + end else if (dtlb_req_q) begin + dtlb_update_o.valid = 1'b1; + dtlb_update_o.vpn = dtlb_vpn_q; + dtlb_update_o.is_page = shared_tag_rd[i].is_page; + dtlb_update_o.content = pte[i]; + dtlb_update_o.v_st_enbl = shared_tag_rd[i].v_st_enbl; + for (int unsigned a = 0; a < HYP_EXT + 1; a++) begin + dtlb_update_o.asid[a] = tlb_update_asid_q[a]; + end + end + end + end + end + end //tag_comparison + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + itlb_vpn_q <= '0; + dtlb_vpn_q <= '0; + tlb_update_asid_q <= '{default: 0}; + shared_tlb_access_q <= '0; + shared_tlb_vaddr_q <= '0; + shared_tag_valid_q <= '0; + vpn_q <= 0; + itlb_req_q <= '0; + dtlb_req_q <= '0; + i_req_q <= 0; + shared_tag_valid <= '0; + shared_tlb_update_valid_delayed <= '0; + shared_tlb_update_valid_delayed2 <= '0; + shared_tlb_update_delayed <= '0; + shared_tlb_update_delayed2 <= '0; + end else begin + itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12]; + dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12]; + tlb_update_asid_q <= tlb_update_asid_d; + shared_tlb_access_q <= shared_tlb_access_d; + shared_tlb_vaddr_q <= shared_tlb_vaddr_d; + shared_tag_valid_q <= shared_tag_valid_d; + vpn_q <= vpn_d; + itlb_req_q <= itlb_req_d; + dtlb_req_q <= dtlb_req_d; + i_req_q <= i_req_d; + shared_tag_valid <= shared_tag_valid_q[tag_rd_addr]; + + if (shared_tlb_update_i.valid) begin + shared_tlb_update_valid_delayed <= shared_tlb_update_i.valid; + shared_tlb_update_delayed <= shared_tlb_update_i; + end + + shared_tlb_update_valid_delayed2 <= shared_tlb_update_valid_delayed; + shared_tlb_update_delayed2 <= shared_tlb_update_delayed; + end + end + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + shared_tag_valid_d = shared_tag_valid_q; + tag_wr_en = '0; + pte_wr_en = '0; + + if (|flush_i) begin + shared_tag_valid_d = '0; + end else if (shared_tlb_update_i.valid) begin + for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin + if (repl_way_oh_d[i]) begin + shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1; + tag_wr_en[i] = 1'b1; + pte_wr_en[i] = 1'b1; + end + end + end + end //update_flush + + assign shared_tag_wr.asid = shared_tlb_update_i.asid; + assign shared_tag_wr.is_page = shared_tlb_update_i.is_page; + assign shared_tag_wr.v_st_enbl = v_st_enbl_i[i_req_q]; + + genvar z; + generate + for (z = 0; z < PT_LEVELS; z++) begin : gen_shared_tag + assign shared_tag_wr.vpn[z] = shared_tlb_update_i.vpn[((VPN_LEN/PT_LEVELS)*(z+1))-1:((VPN_LEN/PT_LEVELS)*z)]; + end + if (HYP_EXT == 1) begin : gen_shared_tag_hyp + //THIS UPDATES THE EXTRA BITS OF VPN IN SV39x4 + assign shared_tag_wr.vpn[PT_LEVELS][(VPN_LEN%PT_LEVELS)-1:0] = shared_tlb_update_i.vpn[VPN_LEN-1: VPN_LEN-(VPN_LEN%PT_LEVELS)]; + end + endgenerate + + + assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]; + assign tag_wr_data = shared_tag_wr; + + assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]; + + genvar h; + generate + for (h = 0; h < HYP_EXT + 1; h++) begin : gen_pte_wr_data + assign pte_wr_data[h] = shared_tlb_update_i.content[h]; + end + endgenerate + + assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]]; + assign repl_way = (all_ways_valid) ? rnd_way : inv_way; + assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid; + assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0; + + lzc #( + .WIDTH(SHARED_TLB_WAYS) + ) i_lzc ( + .in_i (~way_valid), + .cnt_o (inv_way), + .empty_o(all_ways_valid) + ); + + lfsr #( + .LfsrWidth(8), + .OutWidth ($clog2(SHARED_TLB_WAYS)) + ) i_lfsr ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .en_i (update_lfsr), + .out_o (rnd_way) + ); + + /////////////////////////////////////////////////////// + // memory arrays and regs + /////////////////////////////////////////////////////// + + assign tag_req = tag_wr_en | tag_rd_en; + assign tag_we = tag_wr_en; + assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr; + + assign pte_req = pte_wr_en | pte_rd_en; + assign pte_we = pte_wr_en; + assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr; + + for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram + // Tag RAM + sram #( + .DATA_WIDTH($bits(shared_tag_t)), + .NUM_WORDS (SHARED_TLB_DEPTH) + ) tag_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (tag_req[i]), + .we_i (tag_we[i]), + .addr_i (tag_addr), + .wuser_i('0), + .wdata_i(tag_wr_data), + .be_i ('1), + .ruser_o(), + .rdata_o(tag_rd_data[i]) + ); + + assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]); + + for (genvar a = 0; a < HYP_EXT + 1; a++) begin : g_content_sram + // PTE RAM + sram #( + .DATA_WIDTH($bits(pte_cva6_t)), + .NUM_WORDS (SHARED_TLB_DEPTH) + ) pte_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (pte_req[i]), + .we_i (pte_we[i]), + .addr_i (pte_addr), + .wuser_i('0), + .wdata_i(pte_wr_data[a]), + .be_i ('1), + .ruser_o(), + .rdata_o(pte_rd_data[i][a]) + ); + assign pte[i][a] = pte_cva6_t'(pte_rd_data[i][a]); + end + end +endmodule + +/* verilator lint_on WIDTH */ diff --git a/core/mmu_unify/cva6_tlb.sv b/core/mmu_unify/cva6_tlb.sv new file mode 100644 index 0000000000..bb39a8e8d6 --- /dev/null +++ b/core/mmu_unify/cva6_tlb.sv @@ -0,0 +1,462 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Angela Gonzalez PlanV Technology +// Date: 20/11/2023 +// +// Description: Translation Lookaside Buffer, parameterizable to Sv32 or Sv39 , +// or sv39x4 fully set-associative +// This module is an merge of the Sv32 TLB developed by Sebastien +// Jacq (Thales Research & Technology), the Sv39 TLB developed +// by Florian Zaruba and David Schaffenrath and the Sv39x4 by Bruno Sá. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2024-01-25 0.2 A.Gonzalez Generic TLB for CVA6 with Hypervisor support +// =========================================================================== // +module cva6_tlb + import ariane_pkg::*; +#( + parameter type pte_cva6_t = logic, + parameter type tlb_update_cva6_t = logic, + parameter int unsigned TLB_ENTRIES = 4, + parameter int unsigned HYP_EXT = 0, + parameter int unsigned ASID_WIDTH[HYP_EXT:0] = {1}, //[vmid_width,asid_width] + parameter int unsigned VPN_LEN = 1, + parameter int unsigned PT_LEVELS = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic [HYP_EXT*2:0] flush_i, // Flush signal [g_stage,vs stage, normal translation signal] + input logic [HYP_EXT*2:0] v_st_enbl_i, // v_i,g-stage enabled, s-stage enabled + // Update TLB + input tlb_update_cva6_t update_i, + // Lookup signals + input logic lu_access_i, + input logic [ASID_WIDTH[0]-1:0] lu_asid_i[HYP_EXT:0], //[lu_vmid,lu_asid] + input logic [riscv::VLEN-1:0] lu_vaddr_i, + output logic [riscv::GPLEN-1:0] lu_gpaddr_o, + output pte_cva6_t [HYP_EXT:0] lu_content_o, + input logic [ASID_WIDTH[0]-1:0] asid_to_be_flushed_i[HYP_EXT:0], //[vmid,asid] + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i[HYP_EXT:0], // [gpaddr,vaddr] + output logic [PT_LEVELS-2:0] lu_is_page_o, + output logic lu_hit_o +); + + // computes the paddr based on the page size, ppn and offset + function automatic logic [(riscv::GPLEN-1):0] make_gpaddr( + input logic s_st_enbl, input logic is_1G, input logic is_2M, + input logic [(riscv::VLEN-1):0] vaddr, input riscv::pte_t pte); + logic [(riscv::GPLEN-1):0] gpaddr; + if (s_st_enbl) begin + gpaddr = {pte.ppn[(riscv::GPPNW-1):0], vaddr[11:0]}; + // Giga page + if (is_1G) gpaddr[29:12] = vaddr[29:12]; + // Mega page + if (is_2M) gpaddr[20:12] = vaddr[20:12]; + end else begin + gpaddr = vaddr[(riscv::GPLEN-1):0]; + end + return gpaddr; + endfunction : make_gpaddr + + // computes the final gppn based on the guest physical address + function automatic logic [(riscv::GPPNW-1):0] make_gppn(input logic s_st_enbl, input logic is_1G, + input logic is_2M, input logic [28:0] vpn, + input riscv::pte_t pte); + logic [(riscv::GPPNW-1):0] gppn; + if (s_st_enbl) begin + gppn = pte.ppn[(riscv::GPPNW-1):0]; + if (is_2M) gppn[8:0] = vpn[8:0]; + if (is_1G) gppn[17:0] = vpn[17:0]; + end else begin + gppn = vpn; + end + return gppn; + endfunction : make_gppn + + // SV39 defines three levels of page tables + struct packed { + logic [HYP_EXT:0][ASID_WIDTH[0]-1:0] asid; + logic [PT_LEVELS+HYP_EXT-1:0][(VPN_LEN/PT_LEVELS)-1:0] vpn; + logic [PT_LEVELS-2:0][HYP_EXT:0] is_page; + logic [HYP_EXT*2:0] v_st_enbl; // v_i,g-stage enabled, s-stage enabled + logic valid; + } [TLB_ENTRIES-1:0] + tags_q, tags_n; + + pte_cva6_t [TLB_ENTRIES-1:0][HYP_EXT:0] content_q, content_n; + + logic [TLB_ENTRIES-1:0][PT_LEVELS-1:0] vpn_match; + logic [TLB_ENTRIES-1:0][PT_LEVELS-1:0] level_match; + logic [TLB_ENTRIES-1:0][HYP_EXT:0][PT_LEVELS-1:0] vaddr_vpn_match; + logic [TLB_ENTRIES-1:0][HYP_EXT:0][PT_LEVELS-1:0] vaddr_level_match; + logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic + logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy + logic [TLB_ENTRIES-1:0][HYP_EXT:0] match_asid; + logic [TLB_ENTRIES-1:0][PT_LEVELS-1:0] page_match; + logic [TLB_ENTRIES-1:0][HYP_EXT:0][PT_LEVELS-1:0] vpage_match; + logic [TLB_ENTRIES-1:0][PT_LEVELS-2:0] is_page_o; + logic [TLB_ENTRIES-1:0] match_stage, tag_valid; + pte_cva6_t g_content; + logic [TLB_ENTRIES-1:0][(riscv::GPPNW-1):0] gppn; + logic [ HYP_EXT*2:0] v_st_enbl; + + assign v_st_enbl = (HYP_EXT == 1) ? v_st_enbl_i : '1; + //------------- + // Translation + //------------- + + genvar i, x, z, w; + generate + for (i = 0; i < TLB_ENTRIES; i++) begin + for (x = 0; x < PT_LEVELS; x++) begin + //identify page_match for all TLB Entries + assign page_match[i][x] = x==0 ? 1 :((HYP_EXT==0 || x==(PT_LEVELS-1)) ? // PAGE_MATCH CONTAINS THE MATCH INFORMATION FOR EACH TAG OF is_1G and is_2M in sv39x4. HIGHER LEVEL (Giga page), THEN THERE IS THE Mega page AND AT THE LOWER LEVEL IS ALWAYS 1 + &(tags_q[i].is_page[PT_LEVELS-1-x] | (~v_st_enbl[HYP_EXT:0])): + ((&v_st_enbl[HYP_EXT:0]) ? + ((tags_q[i].is_page[PT_LEVELS-1-x][0] && (tags_q[i].is_page[PT_LEVELS-2-x][HYP_EXT] || tags_q[i].is_page[PT_LEVELS-1-x][HYP_EXT])) + || (tags_q[i].is_page[PT_LEVELS-1-x][HYP_EXT] && (tags_q[i].is_page[PT_LEVELS-2-x][0] || tags_q[i].is_page[PT_LEVELS-1-x][0]))): + tags_q[i].is_page[PT_LEVELS-1-x][0] && v_st_enbl[0] || tags_q[i].is_page[PT_LEVELS-1-x][HYP_EXT] && v_st_enbl[HYP_EXT])); + + //identify if vpn matches at all PT levels for all TLB entries + assign vpn_match[i][x] = (HYP_EXT == 1 && x == (PT_LEVELS - 1) && ~v_st_enbl[0]) ? // + lu_vaddr_i[12+((VPN_LEN/PT_LEVELS)*(x+1))-1:12+((VPN_LEN/PT_LEVELS)*x)] == tags_q[i].vpn[x] && lu_vaddr_i[12+VPN_LEN-1: 12+VPN_LEN-(VPN_LEN%PT_LEVELS)] == tags_q[i].vpn[x+1][(VPN_LEN%PT_LEVELS)-1:0]: // + lu_vaddr_i[12+((VPN_LEN/PT_LEVELS)*(x+1))-1:12+((VPN_LEN/PT_LEVELS)*x)] == tags_q[i].vpn[x]; + + //identify if there is a hit at each PT level for all TLB entries + assign level_match[i][x] = &vpn_match[i][PT_LEVELS-1:x] && page_match[i][x]; + + //identify vpage_match for all TLB Entries and vaddr_level match (if there is a hit at each PT level for all TLB entries on the vaddr) + for (z = 0; z < HYP_EXT + 1; z++) begin + assign vpage_match[i][z][x] = x == 0 ? 1 : tags_q[i].is_page[PT_LEVELS-1-x][z]; + assign vaddr_level_match[i][z][x]= &vaddr_vpn_match[i][z][PT_LEVELS-1:x] && vpage_match[i][z][x]; + + end + //identify if virtual address vpn matches at all PT levels for all TLB entries + assign vaddr_vpn_match[i][0][x] = vaddr_to_be_flushed_i[0][12+((VPN_LEN/PT_LEVELS)*(x+1))-1:12+((VPN_LEN/PT_LEVELS)*x)] == tags_q[i].vpn[x]; + + //update vpn field in tags_n for each TLB when the update is valid and the tag needs to be replaced + assign tags_n[i].vpn[x] = ((~(|flush_i)) && update_i.valid && replace_en[i] && !lu_hit_o) ? update_i.vpn[(1+x)*(VPN_LEN/PT_LEVELS)-1:x*(VPN_LEN/PT_LEVELS)] : tags_q[i].vpn[x]; + + end + + assign tags_n[i].asid = ((~(|flush_i)) && update_i.valid && replace_en[i] && !lu_hit_o) ? update_i.asid : tags_q[i].asid; + assign tags_n[i].is_page = ((~(|flush_i)) && update_i.valid && replace_en[i] && !lu_hit_o) ? update_i.is_page : tags_q[i].is_page; + + assign tags_n[i].v_st_enbl = ((~(|flush_i)) && update_i.valid && replace_en[i] && !lu_hit_o) ? update_i.v_st_enbl: tags_q[i].v_st_enbl; + assign tags_n[i].valid = ((~(|flush_i)) && update_i.valid && replace_en[i] && !lu_hit_o) ? 1'b1 : tag_valid[i]; + + + if (HYP_EXT == 1) begin + //THIS UPDATES THE EXTRA BITS OF VPN IN SV39x4 + assign tags_n[i].vpn[PT_LEVELS][(VPN_LEN%PT_LEVELS)-1:0] =((~(|flush_i)) && update_i.valid && replace_en[i] && !lu_hit_o) ? update_i.vpn[VPN_LEN-1: VPN_LEN-(VPN_LEN%PT_LEVELS)] : tags_q[i].vpn[PT_LEVELS][(VPN_LEN%PT_LEVELS)-1:0]; + //identify if GPADDR matches the GPPN + assign vaddr_vpn_match[i][HYP_EXT][0] = (vaddr_to_be_flushed_i[HYP_EXT][20:12] == gppn[i][8:0]); + assign vaddr_vpn_match[i][HYP_EXT][HYP_EXT] = (vaddr_to_be_flushed_i[HYP_EXT][29:21] == gppn[i][17:9]); + assign vaddr_vpn_match[i][HYP_EXT][HYP_EXT*2] = (vaddr_to_be_flushed_i[HYP_EXT][30+riscv::GPPN2:30] == gppn[i][18+riscv::GPPN2:18]); + + end + + for (w = 0; w < PT_LEVELS - 1; w++) begin + assign is_page_o[i][w] = page_match[i][PT_LEVELS - 1 - w]; //THIS REORGANIZES THE PAGES TO MATCH THE OUTPUT STRUCTURE (2M,1G) + end + end + endgenerate + + always_comb begin : translation + + // default assignment + lu_hit = '{default: 0}; + lu_hit_o = 1'b0; + lu_content_o = '{default: 0}; + lu_is_page_o = '{default: 0}; + match_asid = '{default: 0}; + match_stage = '{default: 0}; + g_content = '{default: 0}; + lu_gpaddr_o = '{default: 0}; + + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + // first level match, this may be a giga page, check the ASID flags as well + // if the entry is associated to a global address, don't match the ASID (ASID is don't care) + match_asid[i][0] = (((lu_asid_i[0][ASID_WIDTH[0]-1:0] == tags_q[i].asid[0][ASID_WIDTH[0]-1:0]) || content_q[i][0].g) && v_st_enbl[0]) || !v_st_enbl[0]; + + if (HYP_EXT == 1) begin + match_asid[i][HYP_EXT] = (lu_asid_i[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == tags_q[i].asid[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] && v_st_enbl[HYP_EXT]) || !v_st_enbl[HYP_EXT]; + end + + // check if translation is a: S-Stage and G-Stage, S-Stage only or G-Stage only translation and virtualization mode is on/off + match_stage[i] = tags_q[i].v_st_enbl == v_st_enbl; + + if (tags_q[i].valid && &match_asid[i] && match_stage[i]) begin + + if (HYP_EXT == 1 && vpn_match[i][HYP_EXT*2]) + lu_gpaddr_o = make_gpaddr( + v_st_enbl[0], + tags_q[i].is_page[0][0], + tags_q[i].is_page[1][0], + lu_vaddr_i, + content_q[i][0] + ); + + if (|level_match[i]) begin + lu_is_page_o = is_page_o[i]; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + + if (HYP_EXT == 1) begin + // Compute G-Stage PPN based on the gpaddr + g_content = content_q[i][HYP_EXT]; + if (tags_q[i].is_page[HYP_EXT][HYP_EXT]) g_content.ppn[8:0] = lu_gpaddr_o[20:12]; + if (tags_q[i].is_page[0][HYP_EXT]) g_content.ppn[17:0] = lu_gpaddr_o[29:12]; + // Output G-stage and S-stage content + lu_content_o[HYP_EXT] = level_match[i][PT_LEVELS-1] ? content_q[i][HYP_EXT] : g_content; + end + end + end + end + end + + + + logic [HYP_EXT:0]asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high + logic [HYP_EXT:0] vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high + + localparam int unsigned VADDR_WIDTH[1:0] = {riscv::GPLEN, riscv::VLEN}; + + genvar a; + generate + for (a = 0; a < HYP_EXT + 1; a++) begin + assign asid_to_be_flushed_is0[a] = ~(|asid_to_be_flushed_i[a][ASID_WIDTH[a]-1:0]); + assign vaddr_to_be_flushed_is0[a] = ~(|vaddr_to_be_flushed_i[a][VADDR_WIDTH[a]-1:0]); + end + endgenerate + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + content_n = content_q; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + + tag_valid[i] = tags_q[i].valid; + + if (HYP_EXT == 1) begin + gppn[i] = make_gppn( + tags_q[i].v_st_enbl[0], + tags_q[i].is_page[0][0], + tags_q[i].is_page[1][0], + { + tags_q[i].vpn[3][(VPN_LEN%PT_LEVELS)-1:0], + tags_q[i].vpn[2], + tags_q[i].vpn[1], + tags_q[i].vpn[0] + }, + content_q[i][0] + ); + end + + + if (flush_i[0]) begin + if (!tags_q[i].v_st_enbl[HYP_EXT*2] || HYP_EXT == 0) begin + // invalidate logic + // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) + if (asid_to_be_flushed_is0[0] && vaddr_to_be_flushed_is0[0]) tag_valid[i] = 1'b0; + // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0[0] && (|vaddr_level_match[i][0] ) && (~vaddr_to_be_flushed_is0[0])) + tag_valid[i] = 1'b0; + // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) + else if ((!content_q[i][0].g) && (|vaddr_level_match[i][0]) && (asid_to_be_flushed_i[0][ASID_WIDTH[0]-1:0] == tags_q[i].asid[0][ASID_WIDTH[0]-1:0] ) && (!vaddr_to_be_flushed_is0[0]) && (!asid_to_be_flushed_is0[0])) + tag_valid[i] = 1'b0; + // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) + else if ((!content_q[i][0].g) && (vaddr_to_be_flushed_is0[0]) && (asid_to_be_flushed_i[0][ASID_WIDTH[0]-1:0] == tags_q[i].asid[0][ASID_WIDTH[0]-1:0] ) && (!asid_to_be_flushed_is0[0])) + tag_valid[i] = 1'b0; + end + end else if (flush_i[HYP_EXT] && HYP_EXT == 1) begin + if (tags_q[i].v_st_enbl[HYP_EXT*2] && tags_q[i].v_st_enbl[0]) begin + // invalidate logic + // flush everything if current VMID matches and ASID is 0 and vaddr is 0 ("SFENCE.VMA/HFENCE.VVMA x0 x0" case) + if (asid_to_be_flushed_is0[0] && vaddr_to_be_flushed_is0[0] && ((tags_q[i].v_st_enbl[HYP_EXT] && lu_asid_i[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == tags_q[i].asid[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0]) || !tags_q[i].v_st_enbl[HYP_EXT])) + tag_valid[i] = 1'b0; + // flush vaddr in all addressing space if current VMID matches ("SFENCE.VMA/HFENCE.VVMA vaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0[0] && (|vaddr_level_match[i][0]) && (~vaddr_to_be_flushed_is0[0]) && ((tags_q[i].v_st_enbl[HYP_EXT] && lu_asid_i[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == tags_q[i].asid[1][ASID_WIDTH[HYP_EXT]-1:0]) || !tags_q[i].v_st_enbl[HYP_EXT])) + tag_valid[i] = 1'b0; + // the entry is flushed if it's not global and asid and vaddr and current VMID matches with the entry to be flushed ("SFENCE.VMA/HFENCE.VVMA vaddr asid" case) + else if ((!content_q[i][0].g) && (|vaddr_level_match[i][0]) && (asid_to_be_flushed_i[0][ASID_WIDTH[0]-1:0] == tags_q[i].asid[0][ASID_WIDTH[0]-1:0] && ((tags_q[i].v_st_enbl[HYP_EXT] && lu_asid_i[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == tags_q[i].asid[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0]) || !tags_q[i].v_st_enbl[HYP_EXT])) && (!vaddr_to_be_flushed_is0[0]) && (!asid_to_be_flushed_is0[0])) + tag_valid[i] = 1'b0; + // the entry is flushed if it's not global, and the asid and the current VMID matches and vaddr is 0. ("SFENCE.VMA/HFENCE.VVMA 0 asid" case) + else if ((!content_q[i][0].g) && (vaddr_to_be_flushed_is0[0]) && (asid_to_be_flushed_i[0][ASID_WIDTH[0]-1:0] == tags_q[i].asid[0][ASID_WIDTH[0]-1:0] && ((tags_q[i].v_st_enbl[HYP_EXT] && lu_asid_i[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == tags_q[i].asid[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0]) || !tags_q[i].v_st_enbl[HYP_EXT])) && (!asid_to_be_flushed_is0[0])) + tag_valid[i] = 1'b0; + end + end else if (flush_i[HYP_EXT*2] && HYP_EXT == 1) begin + if (tags_q[i].v_st_enbl[HYP_EXT]) begin + // invalidate logic + // flush everything if vmid is 0 and addr is 0 ("HFENCE.GVMA x0 x0" case) + if (asid_to_be_flushed_is0[HYP_EXT] && vaddr_to_be_flushed_is0[HYP_EXT]) + tag_valid[i] = 1'b0; + // flush gpaddr in all addressing space ("HFENCE.GVMA gpaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0[HYP_EXT] && (|vaddr_level_match[i][HYP_EXT] ) && (~vaddr_to_be_flushed_is0[HYP_EXT])) + tag_valid[i] = 1'b0; + // the entry vmid and gpaddr both matches with the entry to be flushed ("HFENCE.GVMA gpaddr vmid" case) + else if ((|vaddr_level_match[i][HYP_EXT]) && (asid_to_be_flushed_i[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == tags_q[i].asid[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0]) && (~vaddr_to_be_flushed_is0[HYP_EXT]) && (~asid_to_be_flushed_is0[HYP_EXT])) + tag_valid[i] = 1'b0; + // the entry is flushed if the vmid matches and gpaddr is 0. ("HFENCE.GVMA 0 vmid" case) + else if ((vaddr_to_be_flushed_is0[HYP_EXT]) && (asid_to_be_flushed_i[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0] == tags_q[i].asid[HYP_EXT][ASID_WIDTH[HYP_EXT]-1:0]) && (!asid_to_be_flushed_is0[HYP_EXT])) + tag_valid[i] = 1'b0; + end + // normal replacement + end else if (update_i.valid & replace_en[i] && !lu_hit_o) begin + // update content as well + content_n[i] = update_i.content; + end + end + end + + // ----------------------------------------------- + // PLRU - Pseudo Least Recently Used Replacement + // ----------------------------------------------- + logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; + always_comb begin : plru_replacement + plru_tree_n = plru_tree_q; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1}; + // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0}; + // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1}; + // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0}; + // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1}; + // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0}; + // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1}; + // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for ( + int unsigned i = 0; i < TLB_ENTRIES; i++ + ) begin + automatic int unsigned idx_base, shift, new_index; + // we got a hit so update the pointer as it was least recently used + if (lu_hit[i] & lu_access_i) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift - 1)) & 32'b1); + plru_tree_n[idx_base+(i>>shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin + automatic logic en; + automatic int unsigned idx_base, shift, new_index; + en = 1'b1; + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift - 1)) & 32'b1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base+(i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base+(i>>shift)]; + end + end + replace_en[i] = en; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + tags_q <= '{default: 0}; + content_q <= '{default: 0}; + plru_tree_q <= '{default: 0}; + end else begin + tags_q <= tags_n; + content_q <= content_n; + plru_tree_q <= plru_tree_n; + end + end + //-------------- + // Sanity checks + //-------------- + + //pragma translate_off +`ifndef VERILATOR + + initial begin : p_assertions + assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1)) + else begin + $error("TLB size must be a multiple of 2 and greater than 1"); + $stop(); + end + assert (ASID_WIDTH[0] >= 1) + else begin + $error("ASID width must be at least 1"); + $stop(); + end + end + + // Just for checking + function int countSetBits(logic [TLB_ENTRIES-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1)) + else begin + $error("More then one hit in TLB!"); + $stop(); + end + assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1)) + else begin + $error("More then one TLB entry selected for next replace!"); + $stop(); + end + +`endif + //pragma translate_on + +endmodule diff --git a/docs/01_cva6_user/Programmer_View.rst b/docs/01_cva6_user/Programmer_View.rst index e54d152272..ba28e09ff5 100644 --- a/docs/01_cva6_user/Programmer_View.rst +++ b/docs/01_cva6_user/Programmer_View.rst @@ -61,10 +61,14 @@ Note: The addition of the H Extension is in the process. After that, HS, VS, and RISC-V Virtual Memory --------------------- -CV32A6 supports the RISC-V **Sv32** virtual memory when the ``MMUEn`` parameter is set to 1 (and ``Xlen`` is set to 32). +CV32A6 supports the RISC-V **Sv32** virtual memory when the ``MMUEn`` parameter is set to 1 (and ``Xlen`` is set to 32). CV64A6 supports the RISC-V **Sv39** virtual memory when the ``MMUEn`` parameter is set to 1 (and ``Xlen`` is set to 64). +Within CV64A6, the hypervisor extension is enabled and supports **Sv39x4** virtual memory when the ``CVA6ConfigHExtEn`` parameter is set to 1 (and ``Xlen`` is set to 64). + +**PENDING: WHAT IS THE RIGHT PART NUMBER FOR THE HYPERVISOR EXTENSION? HOW IS IT ENABLED VIA SW?** + By default, CV32A6 and CV64A6 are in RISC-V **Bare** mode. **Sv32** or **Sv39** are enabled by writing 1 to ``satp[0]`` register bit. When the ``MMUEn`` parameter is set to 0, CV32A6 and CV64A6 are always in RISC-V **Bare** mode; ``satp[0]`` remains at 0 and writes to this register are ignored. @@ -73,9 +77,7 @@ Notes for the integrator: * The virtual memory is implemented by a memory management unit (MMU) that accelerates the translation from virtual memory addresses (as handled by the core) to physical memory addresses. The MMU integrates translation lookaside buffers (TLB) and a hardware page table walker (PTW). The number of instruction and data TLB entries are configured with ``InstrTlbEntries`` and ``DataTlbEntries``. -* The CV32A6 MMU will evolve with a microarchitectural optimization featuring two levels of TLB: level 1 TBL (sized by ``InstrTlbEntries`` and ``DataTlbEntries``) and a shared level 2 TLB. This optimization remains to be implemented in CV64A6. The optimization has no consequences on the programmer's view. - -* The addition of the hypervisor support will come with **Sv39x4** virtual memory that is not yet documented here. +* The MMU is implemented with a microarchitectural optimization featuring two levels of TLB: level 1 TBL (sized by ``InstrTlbEntries`` and ``DataTlbEntries``) and a shared level 2 TLB. The optimization has no consequences on the programmer's view. Memory Alignment ---------------- diff --git a/docs/04_cv32a6_design/images/cva6_tlb_entry.png b/docs/04_cv32a6_design/images/cva6_tlb_entry.png index cb691d6e87..3ef5140772 100644 Binary files a/docs/04_cv32a6_design/images/cva6_tlb_entry.png and b/docs/04_cv32a6_design/images/cva6_tlb_entry.png differ diff --git a/docs/04_cv32a6_design/images/cva6_tlb_hit.png b/docs/04_cv32a6_design/images/cva6_tlb_hit.png index d24b61e5c0..5773d3cd8e 100644 Binary files a/docs/04_cv32a6_design/images/cva6_tlb_hit.png and b/docs/04_cv32a6_design/images/cva6_tlb_hit.png differ diff --git a/docs/04_cv32a6_design/images/in_out_tlb.png b/docs/04_cv32a6_design/images/in_out_tlb.png index 31858be9d8..b85d5d8f06 100644 Binary files a/docs/04_cv32a6_design/images/in_out_tlb.png and b/docs/04_cv32a6_design/images/in_out_tlb.png differ diff --git a/docs/04_cv32a6_design/images/mmu_in_out.png b/docs/04_cv32a6_design/images/mmu_in_out.png index ae98acbe59..e9e6c14367 100644 Binary files a/docs/04_cv32a6_design/images/mmu_in_out.png and b/docs/04_cv32a6_design/images/mmu_in_out.png differ diff --git a/docs/04_cv32a6_design/images/ptw_in_out.png b/docs/04_cv32a6_design/images/ptw_in_out.png index ab5aad7bbe..e8aa3d8986 100644 Binary files a/docs/04_cv32a6_design/images/ptw_in_out.png and b/docs/04_cv32a6_design/images/ptw_in_out.png differ diff --git a/docs/04_cv32a6_design/images/ptw_nlvl.png b/docs/04_cv32a6_design/images/ptw_nlvl.png index 1de2811465..efb35a596d 100644 Binary files a/docs/04_cv32a6_design/images/ptw_nlvl.png and b/docs/04_cv32a6_design/images/ptw_nlvl.png differ diff --git a/docs/04_cv32a6_design/images/ptw_pte_1.png b/docs/04_cv32a6_design/images/ptw_pte_1.png index 7be987971d..78dc6cabd6 100644 Binary files a/docs/04_cv32a6_design/images/ptw_pte_1.png and b/docs/04_cv32a6_design/images/ptw_pte_1.png differ diff --git a/docs/04_cv32a6_design/images/sfence_vaddr_asid.png b/docs/04_cv32a6_design/images/sfence_vaddr_asid.png index 12c8363c1e..ae9102b725 100644 Binary files a/docs/04_cv32a6_design/images/sfence_vaddr_asid.png and b/docs/04_cv32a6_design/images/sfence_vaddr_asid.png differ diff --git a/docs/04_cv32a6_design/images/sfence_vaddr_x0.png b/docs/04_cv32a6_design/images/sfence_vaddr_x0.png index f185b8d775..4aea436b08 100644 Binary files a/docs/04_cv32a6_design/images/sfence_vaddr_x0.png and b/docs/04_cv32a6_design/images/sfence_vaddr_x0.png differ diff --git a/docs/04_cv32a6_design/images/sfence_x0_asid.png b/docs/04_cv32a6_design/images/sfence_x0_asid.png index 6324be4678..4dc0a4c861 100644 Binary files a/docs/04_cv32a6_design/images/sfence_x0_asid.png and b/docs/04_cv32a6_design/images/sfence_x0_asid.png differ diff --git a/docs/04_cv32a6_design/images/sfence_x0_x0.png b/docs/04_cv32a6_design/images/sfence_x0_x0.png index 3c5c857665..22abcc7317 100644 Binary files a/docs/04_cv32a6_design/images/sfence_x0_x0.png and b/docs/04_cv32a6_design/images/sfence_x0_x0.png differ diff --git a/docs/04_cv32a6_design/images/shared_tlb_in_out.png b/docs/04_cv32a6_design/images/shared_tlb_in_out.png index 3073acfae5..96b69f7382 100644 Binary files a/docs/04_cv32a6_design/images/shared_tlb_in_out.png and b/docs/04_cv32a6_design/images/shared_tlb_in_out.png differ diff --git a/docs/04_cv32a6_design/source/cv32a6_execute.rst b/docs/04_cv32a6_design/source/cv32a6_execute.rst index 092cb3257b..f252ba7d4d 100644 --- a/docs/04_cv32a6_design/source/cv32a6_execute.rst +++ b/docs/04_cv32a6_design/source/cv32a6_execute.rst @@ -29,7 +29,7 @@ Load Store Unit (LSU) Memory Management Unit ---------------------- -The Memory Management Unit (MMU) SV32 module is a crucial component in the RISC-V-based processor, serving as the backbone for virtual memory management and address translation. +The Memory Management Unit (MMU) module is a crucial component in the RISC-V-based processor, serving as the backbone for virtual memory management and address translation. The MMU block can be parameterized to support sv32, sv39 and sv39x4 virtual memory. .. figure:: ../images/mmu_in_out.png :name: **Figure 1:** Inputs and Outputs of CVA6 MMU SV32 @@ -39,7 +39,7 @@ The Memory Management Unit (MMU) SV32 module is a crucial component in the RISC- **Figure 1:** Inputs and Outputs of CVA6 MMU SV32 -At its core, the MMU SV32 plays a pivotal role in translating virtual addresses into their corresponding physical counterparts. This translation process is paramount for providing memory protection, isolation, and efficient memory management in modern computer systems. Importantly, it handles both instruction and data accesses, ensuring a seamless interaction between the processor and virtual memory. Within the MMU, several major blocks play pivotal roles in this address translation process. These includes: +At its core, the MMU plays a pivotal role in translating virtual addresses into their corresponding physical counterparts. This translation process is paramount for providing memory protection, isolation, and efficient memory management in modern computer systems. Importantly, it handles both instruction and data accesses, ensuring a seamless interaction between the processor and virtual memory. Within the MMU, several major blocks play pivotal roles in this address translation process. These includes: * Instruction TLB (ITLB) * Data TLB (DTLB) @@ -47,22 +47,22 @@ At its core, the MMU SV32 plays a pivotal role in translating virtual addresses * Page Table Walker (PTW) .. figure:: ../images/mmu_major_blocks.png - :name: **Figure 2:** Major Blocks in CVA6 MMU SV32 + :name: **Figure 2:** Major Blocks in CVA6 MMU :align: center :width: 60% :alt: mmu_major_blocks - **Figure 2:** Major Blocks in CVA6 MMU SV32 + **Figure 2:** Major Blocks in CVA6 MMU -The MMU SV32 manages privilege levels and access control, enforcing permissions for user and supervisor modes while handling access exceptions. It employs Translation Lookaside Buffers (TLBs) for efficient address translation, reducing the need for page table access. TLB hits yield quick translations, but on misses, the shared TLB is consulted, and if necessary, the Page Table Walker (PTW) performs page table walks, updating TLBs and managing exceptions during the process. +The MMU manages privilege levels and access control, enforcing permissions for user and supervisor modes while handling access exceptions. It employs Translation Lookaside Buffers (TLBs) for efficient address translation, reducing the need for page table access. TLB hits yield quick translations, but on misses, the shared TLB is consulted, and if necessary, the Page Table Walker (PTW) performs page table walks, updating TLBs and managing exceptions during the process. -In addition to these functionalities, the MMU SV32 seamlessly integrates support for Physical Memory Protection (PMP), enabling it to enforce access permissions and memory protection configurations as specified by the PMP settings. This additional layer of security and control enhances the management of memory accesses +In addition to these functionalities, the MMU seamlessly integrates support for Physical Memory Protection (PMP), enabling it to enforce access permissions and memory protection configurations as specified by the PMP settings. This additional layer of security and control enhances the management of memory accesses .. raw:: html Instruction and Data Interfaces -The MMU SV32 maintains interfaces with the instruction cache (ICache) and the load-store unit (LSU). It receives virtual addresses from these components and proceeds to translate them into physical addresses, a fundamental task for ensuring proper program execution and memory access. +The MMU maintains interfaces with the instruction cache (ICache) and the load-store unit (LSU). It receives virtual addresses from these components and proceeds to translate them into physical addresses, a fundamental task for ensuring proper program execution and memory access. .. raw:: html @@ -70,7 +70,7 @@ The MMU SV32 maintains interfaces with the instruction cache (ICache) and the lo .. raw:: html -

Table 1: CVA6 MMU SV32 Input Output Signals

+

Table 1: CVA6 MMU Input Output Signals

.. list-table:: :header-rows: 1 @@ -102,14 +102,14 @@ The MMU SV32 maintains interfaces with the instruction cache (ICache) and the lo * - ``enable_translation_i`` - in - CSR RegFile - - logic - - Indicate address translation request for instruction + - logic [HYP_EXT*2:0] + - Bit 0 indicates address translation request for instruction. In Hypervisor mode, bit 1 enables virtual memory translation for instrucionts, and bit 2 indicates the virtualization mode state * - ``en_ld_st_translation_i`` - in - CSR RegFile - logic - - Indicate address translation request for load or store + - Bit 0 indicates address translation request for load or store. In Hypervisor mode, bit 1 enables virtual memory translation for load or store, and bit 2 indicates the virtualization mode at which load and stores should happen * - ``icache_areq_i`` - in @@ -141,12 +141,24 @@ The MMU SV32 maintains interfaces with the instruction cache (ICache) and the lo - logic [riscv::VLEN-1:0] - Virtual Address In + * - ``lsu_tinst_i`` + - in + - Load Store Unit + - riscv::xlen_t + - Transformed Instruction In when Hypervisor Extension is enabled. Set to 0 (unused) when not. + * - ``lsu_is_store_i`` - in - Store Unit - logic - Translation is requested by a store + * - ``csr_hs_ld_st_inst_o`` + - out + - CSR RegFile + - logic + - Indicate a hypervisor load store instruction. + * - ``lsu_dtlb_hit_o`` - out - Store / Load Unit @@ -192,44 +204,56 @@ The MMU SV32 maintains interfaces with the instruction cache (ICache) and the lo * - ``sum_i`` - in - CSR RegFile - - logic - - Supervisor User Memory Access bit in xSTATUS CSR register + - logic [HYP_EXT:0] + - Bit 0 is the Supervisor User Memory Access bit in xSTATUS CSR register. Bit 1 is the analogous one for virtual supervisor when Hypervisor extension is enabled. * - ``mxr_i`` - in - CSR RegFile - - logic - - Make Executable Readable bit in xSTATUS CSR register + - logic [HYP_EXT:0] + - Bit 0 is the Make Executable Readable bit in xSTATUS CSR register. Bit 1 is the analogous one for virtual supervisor when Hypervisor extension is enabled. - * - ``satp_ppn_I`` + * - ``hlvx_inst_i`` + - in + - Store / Load Unit + - logic [HYP_EXT:0] + - Indicates that Instruction is a hypervisor load store with execute permissions + + * - ``hs_ld_st_inst_i`` - in - CSR RegFile - - logic [riscv::PPNW-1:0] - - PPN of top level page table from SATP register + - logic [HYP_EXT:0] + - Indicates that Instruction is a hypervisor load store instruction + + * - ``satp_ppn_i`` + - in + - CSR RegFile + - logic [HYP_EXT*2:0][riscv::PPNW-1:0] + - Vector 0 is the PPN of top level page table from SATP register. Vectors 1 and 2 are the analogous one for virtual supervisor and hypervisor when Hypervisor extension is enabled. * - ``asid_i`` - in - CSR RegFile - - logic [ASID_WIDTH-1:0] - - ASID to for the lookup + - logic [HYP_EXT*2:0][ASID_WIDTH-1:0] + - Vector 0 is the ASID for the lookup. Vectors 1 and 2 are the analogous one for virtual supervisor and hypervisor when Hypervisor extension is enabled. - * - ``asid_to_be_flushed`` + * - ``asid_to_be_flushed_i`` - in - Execute Stage - - logic [ASID_WIDTH-1:0] - - ASID of the entry to be flushed. + - logic [HYP_EXT:0][ASID_WIDTH-1:0] + - Vector 0 is the ASID of the entry to be flushed. Vector 1 is the analogous one for virtual supervisor when Hypervisor extension is enabled. * - ``vaddr_to_be_flushed_i`` - in - Execute Stage - - logic [riscv::VLEN-1:0] - - Virtual address of the entry to be flushed. + - logic [HYP_EXT:0][riscv::VLEN-1:0] + - Vector 0 is the Virtual address of the entry to be flushed. Vector 1 is the analogous one for virtual supervisor when Hypervisor extension is enabled. * - ``flush_tlb_i`` - in - Controller - - logic - - SFENCE.VMA committed + - logic [HYP_EXT*2:0] + - Bit 0 indicates SFENCE.VMA committed. When Hypervisor extension is enabled, bits 1 and 2 respectively indicate SFENCE.VVMA and SFENCE.GVMA committed. * - ``itlb_miss_o`` - out @@ -332,6 +356,18 @@ The MMU SV32 maintains interfaces with the instruction cache (ICache) and the lo - riscv::xlen_t - Additional information of causing exception (e.g. instruction causing it), address of LD/ST fault + * - ``tval2`` + - logic [riscv::GPLEN-1:0] + - Additional information when the causing exception in a guest exception (used only in hypervisor mode) + + * - ``tinst`` + - riscv::xlen_t + - Transformed instruction information + + * - ``gva`` + - logic + - Signals when a guest virtual address is written to tval + * - ``valid`` - logic - Indicate that exception is valid @@ -365,15 +401,15 @@ The MMU SV32 maintains interfaces with the instruction cache (ICache) and the lo .. raw:: html - Control Flow in MMU SV32 Module + Control Flow in MMU Module .. figure:: ../images/mmu_control_flow.png - :name: **Figure 3:** Control Flow in CVA6 MMU SV32 + :name: **Figure 3:** Control Flow in CVA6 MMU :align: center :width: 95% :alt: mmu_control_flow - **Figure 3:** Control Flow in CVA6 MMU SV32 + **Figure 3:** Control Flow in CVA6 MMU .. raw:: html @@ -397,8 +433,8 @@ The IF stage initiates a request to retrieve memory content at a specific virtua If virtual memory translation is enabled for instruction fetches, the following operations are performed in the instruction interface: * Compatibility of requested virtual address with selected page based address translation scheme is checked. -* For 4K page translation, the module determines the fetch physical address by combining the physical page number (PPN) from ITLB content and the offset from the virtual address. -* In the case of Mega page translation, if the ITLB indicates a 4M page, the VPN0 from the fetch virtual address is written to the PPN0 of the fetch physical address to ensure alignment for superpage translation. +* For page translation, the module determines the fetch physical address by combining the physical page number (PPN) from ITLB content and the offset from the virtual address. +* Depending on the size of the identified page the PPN of the fetch physical address is updated with the corresponding bits of the VPN to ensure alignment for superpage translation. * If the Instruction TLB (ITLB) lookup hits, the fetch valid signal (which indicates a valid physical address) is activated in response to the input fetch request. Memory region accessibility is checked from the perspective of the fetch operation, potentially triggering a page fault exception in case of an access error or insufficient PMP permission. * In case of an ITLB miss, if the page table walker (PTW) is active (only active if there is a shared TLB miss) and handling instruction fetches, the fetch valid signal is determined based on PTW errors or access exceptions. @@ -416,7 +452,7 @@ If address translation is enabled for load or store, and no misaligned exception * Initially, translation is assumed to be invalid, signified by the MMU to LSU. * The translated physical address is formed by combining the PPN from the Page Table Entry (PTE) and the offset from the virtual address requiring translation. This send one cycle later due to the additional bank of registers which delayed the MMU’s answer. The PPN from the PTE is also shared separately with LSU in the same cycle as the hit. -* In the case of superpage translation, as in SV32, known as the 4M page, PPN0 of the translated physical address and the separately shared PPN are updated with the VPN0 of the virtual address. +* In the case of superpage translation, the PPN of the translated physical address and the separately shared PPN are updated with the VPN of the virtual address. If a Data TLB (DTLB) hit occurs, it indicates a valid translation, and various fault checks are performed depending on whether it's a load or store request. @@ -481,13 +517,20 @@ The inputs and output signals of the TLB are shown in the following two figures. * - ``flush_i`` - in - Controller - - logic - - Asynchronous reset active low + - logic [HYP_EXT*2:0] + - Sfence Committed + + * - ``v_st_enbl_i`` + - in + - Controller + - logic [HYP_EXT*2:0] + - Used only in Hypervisor mode. Bit 0 indicates address translation request (s-stage), bit 1 enables virtual memory translation (g-stage), and bit 2 indicates the virtualization mode state + * - ``update_i`` - in - Shared TLB - - tlb_update_sv32_t + - tlb_update_cva6_t - Updated tag and content of TLB * - ``lu_access_i`` @@ -498,9 +541,9 @@ The inputs and output signals of the TLB are shown in the following two figures. * - ``lu_asid_i`` - in - - CSR RegFile - - logic[ASID_WIDTH-1:0] - - ASID (Address Space Identifier) for the lookup + - CVA6 MMU + - logic[ASID_WIDTH[0]-1:0] + - Vector 0 is the ASID (Address Space Identifier) for the lookup. Vector 1 is the analogous one for virtual supervisor or hypervisor when Hypervisor extension is enabled. * - ``lu_vaddr_i`` - in @@ -510,31 +553,31 @@ The inputs and output signals of the TLB are shown in the following two figures. * - ``lu_content_o`` - out - - MMU SV32 - - riscv::pte_sv32_t + - CVA6 MMU + - pte_cva6_t [HYP_EXT:0] - Output for the content of the TLB entry * - ``asid_to_be_flushed_i`` - in - Execute Stage - - logic[ASID_WIDTH-1:0] - - ASID of the entry to be flushed + - logic [HYP_EXT:0][ASID_WIDTH[0]-1:0] + - Vector 0 is the ASID of the entry to be flushed. Vector 1 is the analogous one for virtual supervisor when Hypervisor extension is enabled. - * - ``vaddr_to_be_flushed_i`` + * - ``vaddr_to_be_flushed_i`` - in - Execute Stage - - logic[riscv::VLEN-1:0] - - Virtual address of the entry to be flushed + - logic [HYP_EXT:0][riscv::VLEN-1:0] + - Vector 0 is the Virtual address of the entry to be flushed. Vector 1 is the analogous one for virtual supervisor when Hypervisor extension is enabled. - * - ``lu_is_4M_o`` + * - ``lu_is_page_o`` - out - - MMU SV32 - - logic - - Output indicating whether the TLB entry corresponds to a 4MB page + - CVA6 MMU + - logic [PT_LEVELS-2:0] + - Output indicating whether the TLB entry corresponds to any page at the different levels * - ``lu_hit_o`` - out - - MMU SV32 + - CVA6 MMU - logic - Output indicating whether the lookup resulted in a hit or miss @@ -544,7 +587,7 @@ The inputs and output signals of the TLB are shown in the following two figures. .. raw:: html -

Table 7: SV32 TLB Update Struct (tlb_update_sv32_t)

+

Table 7: SV32 TLB Update Struct (tlb_update_cva6_t)

.. list-table:: :header-rows: 1 @@ -557,25 +600,25 @@ The inputs and output signals of the TLB are shown in the following two figures. - logic - Indicates whether the TLB update entry is valid or not - * - ``is_4M`` - - logic - - Indicates if the TLB entry corresponds to a 4MB page + * - ``is_page`` + - logic [PT_LEVELS-2:0][HYP_EXT:0] + - Indicates if the TLB entry corresponds to a any page at the different levels. When Hypervisor extension is used it includes information also for the G-stage. * - ``vpn`` - - logic[19:0] - - Virtual Page Number (VPN) used for updating the TLB, consisting of 20 bits + - logic[VPN_LEN-1:0] + - Virtual Page Number (VPN) used for updating the TLB * - ``asid`` - - logic[8:0] - - Address Space Identifier (ASID) used for updating the TLB, with a length of 9 bits for Sv32 MMU + - logic[HYP_EXT:0][ASID_WIDTH[0]-1:0] + - Vector 0 is the Address Space Identifier (ASID) used for updating the TLB. Vector 1 is the analogous one for virtual supervisor or hypervisor when Hypervisor extension is enabled. * - ``content`` - - riscv::pte_sv32_t - - Content of the TLB update entry, defined by the structure + - pte_cva6_t [HYP_EXT:0] + - Content of the TLB update entry (both for g and s stage when applicable), defined by the structure .. raw:: html -

Table 8: SV32 PTE Struct (riscv::pte_sv32_t)

+

Table 8: SV32 PTE Struct (riscv::pte_cva6_t)

.. list-table:: :header-rows: 1 @@ -585,8 +628,8 @@ The inputs and output signals of the TLB are shown in the following two figures. - Description * - ``ppn`` - - logic[21:0] - - 22 bit Physical Page Number (PPN) + - logic[riscv::PPNW-1:0] + - Physical Page Number (PPN) * - ``rsw`` - logic[1:0] @@ -644,7 +687,7 @@ The inputs and output signals of the TLB are shown in the following two figures. TLB Entry Fields -The number of TLB entries can be changed via a design parameter. In 32-bit configurations of CVA6 only 2 TLB entries are instantiated. Each TLB entry is made up of two fields: Tag and Content. The Tag field holds the virtual page number (VPN1, VPN0), ASID, page size (is_4M) along with a valid bit (VALID) indicating that the entry is valid. The SV32 virtual page number, which is supported by CV32A6X, is further split into two separate virtual page numbers VPN1 and VPN0. The Content field contains two physical page numbers (PPN1, PPN0) along with a number of bits which specify various attributes of the physical page. Note that the V bit in the Content field is the V bit which is present in the page table in memory. It is copied from the page table, as is, and the VALID bit in the Tag is set based on its value.The TLB entry fields are shown in **Figure 2**. +The number of TLB entries can be changed via a design parameter. Each TLB entry is made up of two fields: Tag and Content. The Tag field holds the virtual page number, ASID and page size along with a valid bit (VALID) indicating that the entry is valid. The virtual page number, is further split into several separate virtual page numbers according to the number of PT_LEVELS used in each configuration. The Content field contains the physical page numbers along with a number of bits which specify various attributes of the physical page. Note that the V bit in the Content field is the V bit which is present in the page table in memory. It is copied from the page table, as is, and the VALID bit in the Tag is set based on its value.The TLB entry fields are shown in **Figure 2**. .. figure:: ../images/cva6_tlb_entry.png :name: **Figure 5:** Fields in CVA6 TLB entry @@ -672,8 +715,10 @@ This function takes in the virtual address and certain other fields, examines th * **Validity Check:** For a TLB hit, the associated TLB entry must be valid . * **ASID and Global Flag Check:** The TLB entry's ASID must match the given ASID (ASID associated with the Virtual address). If the TLB entry’s Global bit (G) bit is set then this check is not done. This ensures that the translation is either specific to the provided ASID or it is globally applicable. -* **Level 1 VPN match:** SV32 implements a two-level page table. As such the virtual address is broken up into three parts which are the virtual page number 1, virtual page number 0 and displacement. So the condition that is checked next is that the virtual page number 1 of the virtual address matches the virtual page number 1(VPN1) of the TLB entry. -* **Level 0 VPN match or 4-Mega Page:** The last condition to be checked, for a TLB hit, is that the virtual page number 0 of the virtual address matches the virtual page number 0 of the TLB entry (VPN0). This match is ignored if the is_4M bit in the Tag is set which implies a super 4M page. +* **Level VPN match:** CVA6 implements a multi-level page table. As such the virtual address is broken up into multiple parts which are the virtual page number used in the different levels. So the condition that is checked next is that the virtual page number of the virtual address matches the virtual page number of the TLB entry at each level. +* **Page match:** Without Hypervisor extension, there is a match at a certain level X if the is_page component of the tag is set to 1 at level PT_LEVELS-X. At level 0 page_match is always set to 1. For the Hypervisor extension ... **(MORE COMPLEX, THINK HOW TO EXPLAIN THIS)** + **Level match** The last condition to be checked at each page level, for a TLB hit, is that there is a vpn match for the current level and the higher ones, together with a page match at the current one. E.g. If PT_LEVELS=2, a match at level 2 will occur if there is a VPN match at level 2 and a page match at level 2. For level 1, there will be a match if there is a VPN match at levels 2 and 1, together with a page match at level 1. + All the conditions listed above are checked against every TLB entry. If there is a TLB hit then the corresponding bit in the hit array is set. **Figure 3** Illustrates the TLB hit/miss process listed above. @@ -835,7 +880,7 @@ Shared Translation Lookaside Buffer The CVA6 shared TLB is structured as a 2-way associative cache, where the virtual address requiring translation is compared with the set indicated by the virtual page number. The shared TLB is looked up in case of an Instruction TLB (ITLB) or data TLB (DTLB) miss, signaled by these TLBs. If the entry is found in the shared TLB set, the respective TLB, whose translation is being requested, is updated. If the entry is not found in the shared TLB, then the processor has to perform a page table walk. Once the processor obtains a PPN corresponding to the VPN, the shared TLB is updated with this information. If the physical page is not found in the page table, it results in a page fault, which is handled by the operating system. The operating system will then place the corresponding physical page in memory. -The inputs and output signals of the shared TLB are shown in the following two figures. +The input and output signals of the shared TLB are shown in the following two figures. .. figure:: ../images/shared_tlb_in_out.png :name: **Figure 14:** Inputs and outputs of CVA6 shared TLB @@ -877,26 +922,27 @@ The inputs and output signals of the shared TLB are shown in the following two f * - ``flush_i`` - in - Controller - - logic + - logic [HYP_EXT*2:0] - TLB flush request - * - ``enable_translation_i`` + * - ``enable_translation_i`` - in - - CSR Regfile - - logic - - CSRs indicate to enable Sv32 + - CSR RegFile + - logic [HYP_EXT*2:0] + - Bit 0 indicates address translation request for instruction. In Hypervisor mode, bit 1 enables virtual memory translation for instrucionts, and bit 2 indicates the virtualization mode state * - ``en_ld_st_translation_i`` - in - - CSR Regfile + - CSR RegFile - logic - - Enable virtual memory translation for load/stores + - Bit 0 indicates address translation request for load or store. In Hypervisor mode, bit 1 enables virtual memory translation for load or store, and bit 2 indicates the virtualization mode at which load and stores should happen + * - ``asid_i`` - in - CSR Regfile - - logic - - ASID for the lookup + - logic [HYP_EXT*2:0][ASID_WIDTH[0]-1:0] + - Vector 0 is the ASID for the lookup. Vectors 1 and 2 are the analogous one for virtual supervisor and hypervisor when Hypervisor extension is enabled. * - ``itlb_access_i`` - in @@ -913,7 +959,7 @@ The inputs and output signals of the shared TLB are shown in the following two f * - ``itlb_vaddr_i`` - in - Cache Subsystem - - logic[31:0] + - logic [riscv::VLEN-1:0] - Virtual address lookup in ITLB * - ``dtlb_access_i`` @@ -931,19 +977,19 @@ The inputs and output signals of the shared TLB are shown in the following two f * - ``dtlb_vaddr_i`` - in - Load/Store Unit - - logic[31:0] + - logic [riscv::VLEN-1:0] - Virtual address lookup in DTLB * - ``itlb_update_o`` - out - ITLB - - tlb_update_sv32_t + - tlb_update_cva6_t - Tag and content to update ITLB * - ``dtlb_update_o`` - out - DTLB - - tlb_update_sv32_t + - tlb_update_cva6_t - Tag and content to update DTLB * - ``itlb_miss_o`` @@ -970,10 +1016,10 @@ The inputs and output signals of the shared TLB are shown in the following two f - logic - Signal indicating a shared TLB hit - * - ``shared_tlb_vadd_o`` + * - ``shared_tlb_vaddr_o`` - out - PTW - - logic[31:0] + - logic [riscv::VLEN-1:0] - Virtual address lookup in shared TLB * - ``itlb_req_o`` @@ -985,7 +1031,7 @@ The inputs and output signals of the shared TLB are shown in the following two f * - ``shared_tlb_update_i`` - in - PTW - - tlb_update_sv32_t + - tlb_update_cva6_t - Updated tag and content of shared TLB .. raw:: html @@ -1003,17 +1049,13 @@ The inputs and output signals of the shared TLB are shown in the following two f - Type - Description - * - ``is_4M`` - - logic - - Indicates if the shared TLB entry corresponds to a 4MB page. - - * - ``vpn1`` - - logic[9:0] - - Virtual Page Number (VPN) represents the index of PTE in the page table level 1. + * - ``is_page`` + - logic [PT_LEVELS-2:0][HYP_EXT:0] + - Indicates if the shared TLB entry corresponds to a any page. When Hypervisor extenxion is used it includes information for G-stage too. - * - ``vpn0`` - - logic[9:0] - - Virtual Page Number (VPN) represents the index of PTE in the page table level 0. + * - ``vpn`` + - logic[PT_LEVELS+HYP_EXT-1:0][(VPN_LEN/PT_LEVELS)-1:0] + - Virtual Page Number (VPN) represents the index of PTE in each page table level. * - ``asid`` - logic @@ -1023,7 +1065,7 @@ The inputs and output signals of the shared TLB are shown in the following two f Shared TLB Entry Structure -Shared TLB is 2-way associative, with a depth of 64. A single entry in the set contains the valid bit, tag and the content. The Tag segment stores details such as the virtual page number (VPN1, VPN0), ASID, and page size (is_4M). The Content field contains two physical page numbers (PPN1, PPN0) along with a number of bits which specify various attributes of the physical page. +Shared TLB is 2-way associative, with a depth of 64. A single entry in the set contains the valid bit, tag and the content. The Tag segment stores details such as the virtual page number, ASID, and page size. The Content field contains the physical page numbers along with a number of bits which specify various attributes of the physical page. .. figure:: ../images/shared_tlb.png :name: **Figure 15:** CVA6 Shared TLB Structure @@ -1144,7 +1186,7 @@ If all ways are valid, a random replacement policy is employed for the replaceme Page Table Walker ----------------- -The "CVA6 Page Table Walker (PTW) for MMU Sv32" is a hardware module developed for the CV32A6 processor architecture, designed to facilitate the translation of virtual addresses into physical addresses, a crucial task in memory access management. +The "CVA6 Page Table Walker (PTW)" is a hardware module designed to facilitate the translation of virtual addresses into physical addresses, a crucial task in memory access management. .. figure:: ../images/ptw_in_out.png :name: **Figure 19:** Input and Outputs of Page Table Walker @@ -1164,7 +1206,7 @@ The PTW module operates through various states, each with its specific function, Key Features and Capabilities -Key features of this PTW module include support for two levels of page tables (LVL1 and LVL2) in the Sv32 standard, accommodating instruction and data page table walks. It rigorously validates and verifies page table entries (PTEs) to ensure translation accuracy and adherence to access permissions. This module seamlessly integrates with the CV32A6 processor's memory management unit (MMU), which governs memory access control. It also takes into account global mapping, access flags, and privilege levels during the translation process, ensuring that memory access adheres to the processor's security and privilege settings. +Key features of this PTW module include support for multiple levels of page tables (PT_LEVELS), accommodating instruction and data page table walks. It rigorously validates and verifies page table entries (PTEs) to ensure translation accuracy and adherence to access permissions. This module seamlessly integrates with the CVA6 processor's memory management unit (MMU), which governs memory access control. It also takes into account global mapping, access flags, and privilege levels during the translation process, ensuring that memory access adheres to the processor's security and privilege settings. .. raw:: html @@ -1222,7 +1264,7 @@ In addition to its translation capabilities, the PTW module is equipped to detec * - ``ptw_error_o`` - out - MMU - - logic + - logic [HYP_EXT*2:0] - Output signal indicating that an error occurred during PTW operation * - ``ptw_access_exception_o`` @@ -1231,7 +1273,25 @@ In addition to its translation capabilities, the PTW module is equipped to detec - logic - Output signal indicating that a PMP (Physical Memory Protection) access exception occurred during PTW operation. - * - ``lsu_is_store_i`` + * - ``enable_translation_i`` + - in + - CSR RegFile + - logic [HYP_EXT*2:0] + - Bit 0 indicates address translation request for instruction. In Hypervisor mode, bit 1 enables virtual memory translation for instrucionts, and bit 2 indicates the virtualization mode state + + * - ``en_ld_st_translation_i`` + - in + - CSR RegFile + - logic + - Bit 0 indicates address translation request for load or store. In Hypervisor mode, bit 1 enables virtual memory translation for load or store, and bit 2 indicates the virtualization mode at which load and stores should happen + + * - ``hlvx_inst_i`` + - in + - Store / Load Unit + - logic [HYP_EXT:0] + - Indicates that Instruction is a hypervisor load store with execute permissions + + * - ``lsu_is_store_i`` - in - Store Unit - logic @@ -1252,7 +1312,7 @@ In addition to its translation capabilities, the PTW module is equipped to detec * - ``shared_tlb_update_o`` - out - Shared TLB - - tlb_update_sv32_t + - tlb_update_cva6_t - Updated tag and content of shared TLB * - ``update_vaddr_o`` @@ -1263,9 +1323,9 @@ In addition to its translation capabilities, the PTW module is equipped to detec * - ``asid_i`` - in - - CSR RegFile - - logic[ASID_WIDTH-1:0] - - ASID for the lookup + - CSR Regfile + - logic [HYP_EXT*2:0][ASID_WIDTH[0]-1:0] + - Vector 0 is the ASID for the lookup. Vectors 1 and 2 are the analogous one for virtual supervisor and hypervisor when Hypervisor extension is enabled. * - ``shared_tlb_access_i`` - in @@ -1294,14 +1354,14 @@ In addition to its translation capabilities, the PTW module is equipped to detec * - ``satp_ppn_i`` - in - CSR RegFile - - logic[riscv::PPNW-1:0] - - PPN of top level page table from SATP register + - logic [HYP_EXT*2:0][riscv::PPNW-1:0] + - PPN of top level page table from SATP register (bit 0), VSATP register (bit 1 when Hypervisor Extension is enabled) and HGATP (bit 2 when Hypervisor Extension is enabled). * - ``mxr_i`` - in - CSR RegFile - - logic - - Make Executable Readable bit in xSTATUS CSR register + - logic [HYP_EXT:0] + - Bit 0 is the Make Executable Readable bit in xSTATUS CSR register. Bit 1 is the analogous one for virtual supervisor when Hypervisor extension is enabled. * - ``shared_tlb_miss_o`` - out @@ -1324,8 +1384,8 @@ In addition to its translation capabilities, the PTW module is equipped to detec * - ``bad_paddr_o`` - out - MMU - - logic[riscv::PLEN-1:0] - - Bad Physical Address in case of access exception + - logic[HYP_EXT:0][riscv::PLEN-1:0] + - Bad Physical Address in case of access exception. Same at G stage when Hypervisor is enabled. .. raw:: html @@ -1423,10 +1483,10 @@ In addition to its translation capabilities, the PTW module is equipped to detec Page Table Walker is implemented as a finite state machine. It listens to shared TLB for incoming translation requests. If there is a shared TLB miss, it saves the virtual address and starts the page table walk. Page table walker transition between 7 states in CVA6. -* **IDLE:** The initial state where the PTW is awaiting a trigger, often a Shared TLB miss, to initiate a memory access request. +* **IDLE:** The initial state where the PTW is awaiting a trigger, often a Shared TLB miss, to initiate a memory access request. In the case of the Hypervisor extension, the stage to which the translation belongs is determined by the enable_translation_i and en_ld_st_translation_i signals. There are 3 possible stages: G_INTERMED_STAGE, G_FINAL_STAGE and S_STAGE. When Hypervisor is not enabled PTW is always in S_STAGE. * **WAIT_GRANT:** Request memory access and wait for data grant -* **PTE_LOOKUP:** Once granted access, the PTW examines the valid Page Table Entry (PTE), checking attributes to determine the appropriate course of action. -* **PROPOGATE_ERROR:** If the PTE is invalid, this state handles the propagation of an error, often leading to a page-fault exception due to non-compliance with access conditions +* **PTE_LOOKUP:** Once granted access, the PTW examines the valid Page Table Entry (PTE), checking attributes to determine the appropriate course of action. Depending on the STAGE determined in the previous state, pptr and other atributes are updated accordingly. +* **PROPOGATE_ERROR:** If the PTE is invalid, this state handles the propagation of an error, often leading to a page-fault exception due to non-compliance with access conditions. * **PROPOGATE_ACCESS_ERROR:** Propagate access fault if access is not allowed from a PMP perspective * **WAIT_RVALID:** After processing a PTE, the PTW waits for a valid data signal, indicating that relevant data is ready for further processing. * **LATENCY:** Introduces a delay to account for synchronization or timing requirements between states. @@ -1445,24 +1505,24 @@ Page Table Walker is implemented as a finite state machine. It listens to shared In the IDLE state of the Page Table Walker (PTW) finite state machine, the system awaits a trigger to initiate the page table walk process. This trigger is often prompted by a Shared Translation Lookaside Buffer (TLB) miss, indicating that the required translation is not present in the shared TLB cache. The PTW's behavior in this state is explained as follows: -1. The top-most page table is selected for the page table walk. In the case of SV32, which implements a two-level page table, the level 1 page table is chosen. +1. The top-most page table is selected for the page table walk. In all configurations, the walk starts at level 0. 2. In the IDLE state, translations are assumed to be invalid in all addressing spaces. 3. The signal indicating the instruction page table walk is set to 0. 4. A conditional check is performed: if there is a shared TLB access request and the entry is not found in the shared TLB (indicating a shared TLB miss), the following steps are executed: - a. The address of the desired Page Table Entry within the level 1 page table is calculated by multiplying the Physical Page Number (PPN) of the level 1 page table from the SATP register by the page size (4kB). This result is then added to the product of the Virtual Page Number (VPN1), and the size of a page table entry(4 bytes). + a. The address of the desired Page Table Entry within the level 0 page table is calculated by multiplying the Physical Page Number (PPN) of the level 0 page table from the SATP register by the page size. This result is then added to the product of the Virtual Page Number, and the size of a page table entry. Depending on the translation indicated by enable_translation_i and en_ld_st_translation_i at the different levels [HYP_EXT * 2:0] the corresponding register (satp_ppn_i[HYP_EXT * 2:0] and bits of the VPN are used. .. figure:: ../images/ptw_idle.png - :name: **Figure 21:** Address of Desired PTE at Level 1 + :name: **Figure 21:** Address of Desired PTE at Level 0 :align: center :width: 68% :alt: ptw_idle - **Figure 21:** Address of Desired PTE at Level 1 + **Figure 21:** Address of Desired PTE at Level 0 .. _example: - b. The signal indicating whether it's an instruction page table walk is updated based on the ITLB miss. + b. The signal indicating whether it's an instruction page table walk is updated based on the itlb_req_i signal. c. The ASID and virtual address are saved for the page table walk. d. A shared TLB miss is indicated. @@ -1495,11 +1555,11 @@ In the **PTE_LOOKUP** state of the Page Table Walker (PTW) finite state machine, .. _example1: - b. If the PTE is valid, the state advances to the "LATENCY" state, indicating a period of processing latency. Additionally, if the "read" flag (pte.r) or the "execute" flag (pte.x) is set, the PTE is considered valid. + b. If the PTE is valid, by default, the state advances to the "LATENCY" state, indicating a period of processing latency. Additionally, if the "read" flag (pte.r) or the "execute" flag (pte.x) is set, the PTE is considered valid. -5. Within the Valid PTE scenario, the state performs further checks based on whether the translation is intended for instruction fetching or data access: +5. Within the Valid PTE scenario, the ptw_stage is checked to decide the next state. When no Hypervisor Extension is used, the stage is always S_STAGE and has no impact on the progress of the table walk. However, when the Hypervisor Extension is used, if the stage is not the G_FINAL_STAGE, it has to continue advancing the different stages before proceeding with the translation. In this case, the state machine goes back to WAIT_GRANT state. Afterwards, the state performs further checks based on whether the translation is intended for instruction fetching or data access: - a. For instruction page table walk, if the page is not executable (pte.x is not set) or not marked as accessible (pte.a is not set), the state transitions to the "PROPAGATE_ERROR" state. + a. For instruction page table walk, if the page is not executable (pte.x is not set) or not marked as accessible (pte.a is not set), the state transitions to the "PROPAGATE_ERROR" state. Otherwise, the translation is valid. In tcase that the Hypervisor Extension is enabled, a valid translation requires being in the G_FINAL_STAGE, or the G stage being disabled. .. figure:: ../images/ptw_iptw.png :name: **Figure 23:** For Instruction Page Table Walk @@ -1511,7 +1571,7 @@ In the **PTE_LOOKUP** state of the Page Table Walker (PTW) finite state machine, .. _example2: - b. For data page table walk, the state checks if the page is readable (pte.r is set) or if the page is executable only but made readable by setting the MXR bit in xSTATUS CSR register. If either condition is met, it indicates a valid translation. If not, the state transitions to the "PROPAGATE_ERROR" state. + b. For data page table walk, the state checks if the page is readable (pte.r is set) or if the page is executable only but made readable by setting the MXR bit in xSTATUS CSR register. If either condition is met, it indicates a valid translation. If not, the state transitions to the "PROPAGATE_ERROR" state. When Hypervisor Extension is enabled, a valid translation also requires that it is in the G_FINAL_STAGE or the G stage is not enabled. .. figure:: ../images/ptw_dptw.png :name: **Figure 24:** Data Access Page Table Walk @@ -1532,7 +1592,7 @@ In the **PTE_LOOKUP** state of the Page Table Walker (PTW) finite state machine, **Figure 25:** Data Access Page Table Walk, Store requested -6. The state also checks for potential misalignment issues in the translation: If the current page table level is the first level (LVL1) and if the PPN0 of in PTE is not zero, it indicates a misaligned superpage, leading to a transition to the "PROPAGATE_ERROR" state. +6. The state also checks for potential misalignment issues in the translation: If the current page table level is the first level and if the PPN of in PTE is not zero, it indicates a misaligned superpage, leading to a transition to the "PROPAGATE_ERROR" state. .. figure:: ../images/ptw_mis_sup.png :name: **Figure 26:** Misaligned Superpage Check @@ -1543,7 +1603,7 @@ In the **PTE_LOOKUP** state of the Page Table Walker (PTW) finite state machine, **Figure 26:** Misaligned Superpage Check 7. If the PTE is valid but the page is neither readable nor executable, the PTW recognizes the PTE as a pointer to the next level of the page table, indicating that additional translation information can be found in the referenced page table at a lower level. -8. If the current page table level is the first level (LVL1), the PTW proceeds to switch to the second level (LVL2) page table, updating the next level pointer and calculating the address for the next page table entry using the Physical Page Number from the PTE and the index of the level 2 page table from virtual address. +8. If the current page table level is not the last level, the PTW proceeds to switch to the next level page table, updating the next level pointer and calculating the address for the next page table entry using the Physical Page Number from the PTE and the index from virtual address. Depending on the level and ptw_stage, the pptr is updated accordingly. .. figure:: ../images/ptw_nlvl.png :name: **Figure 27:** Address of desired PTE at next level of Page Table @@ -1553,8 +1613,8 @@ In the **PTE_LOOKUP** state of the Page Table Walker (PTW) finite state machine, **Figure 27:** Address of desired PTE at next level of Page Table -9. The state then transitions to the "WAIT_GRANT" state, indicating that the PTW is awaiting the grant signal to proceed with requesting the next level page table entry. -10. If the current level is already the second level (LVL2), an error is flagged, and the state transitions to the "PROPAGATE_ERROR" state, signifying an unexpected situation where the PTW is already at the last level page table. +9. The state then transitions to the "WAIT_GRANT" state, indicating that the PTW is awaiting the grant signal to proceed with requesting the next level page table entry. If Hypervisor Extension is used and the page has already been accessed, is dirty or is accessible only in user mode, the state goes to PROPAGATE_ERROR. +10. If the current level is already the last level, an error is flagged, and the state transitions to the "PROPAGATE_ERROR" state, signifying an unexpected situation where the PTW is already at the last level page table. 11. If the translation access is found to be restricted by the Physical Memory Protection (PMP) settings (allow_access is false), the state updates the shared TLB update signal to indicate that the TLB entry should not be updated. Additionally, the saved address for the page table walk is restored to its previous value, and the state transitions to the "PROPAGATE_ACCESS_ERROR" state. 12. Lastly, if the data request for the page table entry was granted, the state indicates to the cache subsystem that the tag associated with the data is now valid. diff --git a/src_files.yml b/src_files.yml index 84173c67ca..b391245227 100644 --- a/src_files.yml +++ b/src_files.yml @@ -36,6 +36,7 @@ ariane: src/miss_handler.sv, src/mmu_sv39/mmu.sv, src/mmu_sv32/cva6_mmu_sv32.sv, + src/mmu_unify/cva6_mmu.sv, src/mult.sv, src/nbdcache.sv, src/vdregs.sv, @@ -45,12 +46,14 @@ ariane: src/perf_counters.sv, src/mmu_sv39/ptw.sv, src/mmu_sv32/cva6_ptw_sv32.sv, + src/mmu_unify/cva6_ptw.sv, src/re_name.sv, src/scoreboard.sv, src/store_buffer.sv, src/store_unit.sv, src/mmu_sv39/tlb.sv, src/mmu_sv32/cva6_tlb_sv32.sv, + src/mmu_unify/cva6_tlb.sv, src/acc_dispatcher.sv, src/debug/dm_csrs.sv, src/debug/dm_mem.sv,