diff --git a/core/ariane_regfile_fpga.sv b/core/ariane_regfile_fpga.sv index 22d5aaa353..9169482e3a 100644 --- a/core/ariane_regfile_fpga.sv +++ b/core/ariane_regfile_fpga.sv @@ -1,4 +1,5 @@ // Copyright 2018 ETH Zurich and University of Bologna. +// Copyright 2024 - PlanV Technologies for additionnal contribution. // Copyright and related rights are licensed under the Solderpad Hardware // License, Version 0.51 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at @@ -15,7 +16,7 @@ // Noam Gallmann - gnoam@live.com // Felipe Lisboa Malaquias // Henry Suzukawa -// +// Angela Gonzalez - PlanV Technologies // // Description: This register file is optimized for implementation on // FPGAs. The register file features one distributed RAM block per implemented @@ -50,11 +51,16 @@ module ariane_regfile_fpga #( localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts); // Distributed RAM usually supports one write port per block - duplicate for each write port. - logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts]; + logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem[CVA6Cfg.NrCommitPorts]; + + logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec; + logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; + logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; + logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_reg; + logic [NR_READ_PORTS-1:0] read_after_write; - logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec; - logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; - logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; + logic [NR_READ_PORTS-1:0][4:0] raddr_q; + logic [NR_READ_PORTS-1:0][4:0] raddr; // write adress decoder (for block selector) always_comb begin @@ -88,36 +94,55 @@ module ariane_regfile_fpga #( always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin mem_block_sel_q <= '0; + raddr_q <= '0; end else begin mem_block_sel_q <= mem_block_sel; + if (CVA6Cfg.FpgaAlteraEn) raddr_q <= raddr_i; + else raddr_q <= '0; end end // distributed RAM blocks logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts]; + logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read_sync[CVA6Cfg.NrCommitPorts]; for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block always_ff @(posedge clk_i) begin if (we_i[j] && ~waddr_i[j] != 0) begin mem[j][waddr_i[j]] <= wdata_i[j]; + if (CVA6Cfg.FpgaAlteraEn) + wdata_reg[j] <= wdata_i[j]; // register data written in case is needed to read next cycle + else wdata_reg[j] <= '0; + end + if (CVA6Cfg.FpgaAlteraEn) begin + for (int k = 0; k < NR_READ_PORTS; k++) begin : block_read + mem_read_sync[j][k] = mem[j][raddr_i[k]]; // synchronous RAM + read_after_write[k] <= '0; + if (waddr_i[j] == raddr_i[k]) + read_after_write[k] <= we_i[j] && ~waddr_i[j] != 0; // Identify if we need to read the content that was written + end end end for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read - assign mem_read[j][k] = mem[j][raddr_i[k]]; + assign mem_read[j][k] = CVA6Cfg.FpgaAlteraEn ? ( read_after_write[k] ? wdata_reg[j]: mem_read_sync[j][k]) : mem[j][raddr_i[k]]; end end + //with synchronous ram there is the need to adjust which address is used at the output MUX + assign raddr = CVA6Cfg.FpgaAlteraEn ? raddr_q : raddr_i; // output MUX logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr; for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port - assign block_addr[k] = mem_block_sel_q[raddr_i[k]]; - assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k]; + assign block_addr[k] = mem_block_sel_q[raddr[k]]; + assign rdata_o[k] = (ZERO_REG_ZERO && raddr[k] == '0) ? '0 : mem_read[block_addr[k]][k]; end // random initialization of the memory to suppress assert warnings on Questa. initial begin for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin for (int j = 0; j < NUM_WORDS; j++) begin - mem[i][j] = $random(); + if (!CVA6Cfg.FpgaAlteraEn) + mem[i][j] = $random(); //quartus does not support this random statement on synthesis + else mem[i][j] = '0; end end end diff --git a/core/cva6.sv b/core/cva6.sv index 4c6b3bf7d6..ffb6d28460 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -401,7 +401,7 @@ module cva6 // -------------- // ID <-> ISSUE // -------------- - scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_id_issue; + scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_id_issue, issue_entry_id_issue_prev; logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_id_issue; logic [CVA6Cfg.NrIssuePorts-1:0] issue_entry_valid_id_issue; logic [CVA6Cfg.NrIssuePorts-1:0] is_ctrl_fow_id_issue; @@ -690,6 +690,7 @@ module cva6 .fetch_entry_ready_o(fetch_ready_id_if), .issue_entry_o (issue_entry_id_issue), + .issue_entry_o_prev (issue_entry_id_issue_prev), .orig_instr_o (orig_instr_id_issue), .issue_entry_valid_o(issue_entry_valid_id_issue), .is_ctrl_flow_o (is_ctrl_fow_id_issue), @@ -806,6 +807,7 @@ module cva6 .stall_i (stall_acc_id), // ID Stage .decoded_instr_i (issue_entry_id_issue), + .decoded_instr_i_prev (issue_entry_id_issue_prev), .orig_instr_i (orig_instr_id_issue), .decoded_instr_valid_i (issue_entry_valid_id_issue), .is_ctrl_flow_i (is_ctrl_fow_id_issue), diff --git a/core/id_stage.sv b/core/id_stage.sv index 864c74b329..43583bde01 100644 --- a/core/id_stage.sv +++ b/core/id_stage.sv @@ -41,6 +41,7 @@ module id_stage #( output logic [CVA6Cfg.NrIssuePorts-1:0] fetch_entry_ready_o, // Handshake's data between decode and issue - ISSUE output scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_o, + output scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_o_prev, // Instruction value - ISSUE output logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_o, // Handshake's valid between decode and issue - ISSUE @@ -266,6 +267,7 @@ module id_stage #( // ------------------ for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin assign issue_entry_o[i] = issue_q[i].sbe; + assign issue_entry_o_prev[i] = CVA6Cfg.FpgaAlteraEn ? issue_n[i].sbe : '0; assign issue_entry_valid_o[i] = issue_q[i].valid; assign is_ctrl_flow_o[i] = issue_q[i].is_ctrl_flow; assign orig_instr_o[i] = issue_q[i].orig_instr; diff --git a/core/issue_read_operands.sv b/core/issue_read_operands.sv index 56045cdc5a..fa357f42fb 100644 --- a/core/issue_read_operands.sv +++ b/core/issue_read_operands.sv @@ -39,6 +39,7 @@ module issue_read_operands input logic stall_i, // Entry about the instruction to issue - SCOREBOARD input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_i, + input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_i_prev, // Instruction to issue - SCOREBOARD input logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_i, // Is there an instruction to issue - SCOREBOARD @@ -954,11 +955,12 @@ module issue_read_operands logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_pack; logic [CVA6Cfg.NrCommitPorts-1:0] we_pack; - for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin - assign raddr_pack[i*OPERANDS_PER_INSTR+0] = issue_instr_i[i].rs1; - assign raddr_pack[i*OPERANDS_PER_INSTR+1] = issue_instr_i[i].rs2; + //adjust address to read from register file (when synchronous RAM is used reads take one cycle, so we advance the address) + for (genvar i = 0; i <= CVA6Cfg.NrIssuePorts - 1; i++) begin + assign raddr_pack[i*OPERANDS_PER_INSTR+0] = CVA6Cfg.FpgaEn && CVA6Cfg.FpgaAlteraEn ? issue_instr_i_prev[i].rs1[4:0] : issue_instr_i[i].rs1[4:0]; + assign raddr_pack[i*OPERANDS_PER_INSTR+1] = CVA6Cfg.FpgaEn && CVA6Cfg.FpgaAlteraEn ? issue_instr_i_prev[i].rs2[4:0] : issue_instr_i[i].rs2[4:0]; if (OPERANDS_PER_INSTR == 3) begin - assign raddr_pack[i*OPERANDS_PER_INSTR+2] = issue_instr_i[i].result[4:0]; + assign raddr_pack[i*OPERANDS_PER_INSTR+2] = CVA6Cfg.FpgaEn && CVA6Cfg.FpgaAlteraEn ? issue_instr_i_prev[i].result[4:0] : issue_instr_i[i].result[4:0]; end end diff --git a/core/issue_stage.sv b/core/issue_stage.sv index 788cbe09b4..ea922d5ce0 100644 --- a/core/issue_stage.sv +++ b/core/issue_stage.sv @@ -43,6 +43,7 @@ module issue_stage input logic stall_i, // Handshake's data with decode stage - ID_STAGE input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] decoded_instr_i, + input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] decoded_instr_i_prev, // instruction value - ID_STAGE input logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_i, // Handshake's valid with decode stage - ID_STAGE @@ -253,6 +254,7 @@ module issue_stage .flush_i (flush_unissued_instr_i), .stall_i, .issue_instr_i (issue_instr_sb_iro), + .issue_instr_i_prev (decoded_instr_i_prev), .orig_instr_i (orig_instr_sb_iro), .issue_instr_valid_i (issue_instr_valid_sb_iro), .issue_ack_o (issue_ack_iro_sb),