diff --git a/Bender.yml b/Bender.yml index c4455ee5..60c24834 100644 --- a/Bender.yml +++ b/Bender.yml @@ -18,179 +18,187 @@ dependencies: tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.11 } export_include_dirs: + - hw/ip/reqrsp_interface/include - hw/ip/mem_interface/include - hw/ip/snitch/include - - hw/ip/reqrsp_interface/include - hw/ip/tcdm_interface/include sources: ## hw/ip/reqrsp_interface ## - - # Level 0: - - hw/ip/reqrsp_interface/src/reqrsp_pkg.sv - # Level 1: - - hw/ip/reqrsp_interface/src/reqrsp_intf.sv - # Level 2: - - hw/ip/reqrsp_interface/src/axi_to_reqrsp.sv - - hw/ip/reqrsp_interface/src/reqrsp_cut.sv - - hw/ip/reqrsp_interface/src/reqrsp_demux.sv - - hw/ip/reqrsp_interface/src/reqrsp_iso.sv - - hw/ip/reqrsp_interface/src/reqrsp_mux.sv - - hw/ip/reqrsp_interface/src/reqrsp_to_axi.sv - - target: simulation - files: - - hw/ip/reqrsp_interface/src/reqrsp_test.sv - - target: test + - target: not(mempool) files: - # Level 0 - - hw/ip/reqrsp_interface/test/axi_to_reqrsp_tb.sv - - hw/ip/reqrsp_interface/test/reqrsp_demux_tb.sv - - hw/ip/reqrsp_interface/test/reqrsp_idempotent_tb.sv - - hw/ip/reqrsp_interface/test/reqrsp_mux_tb.sv - - hw/ip/reqrsp_interface/test/reqrsp_to_axi_tb.sv - - ## hw/ip/mem_interface ## + # Level 0: + - hw/ip/reqrsp_interface/src/reqrsp_pkg.sv + # Level 1: + - hw/ip/reqrsp_interface/src/reqrsp_intf.sv + # Level 2: + - hw/ip/reqrsp_interface/src/axi_to_reqrsp.sv + - hw/ip/reqrsp_interface/src/reqrsp_cut.sv + - hw/ip/reqrsp_interface/src/reqrsp_demux.sv + - hw/ip/reqrsp_interface/src/reqrsp_iso.sv + - hw/ip/reqrsp_interface/src/reqrsp_mux.sv + - hw/ip/reqrsp_interface/src/reqrsp_to_axi.sv + - target: simulation + files: + - hw/ip/reqrsp_interface/src/reqrsp_test.sv + - target: test + files: + # Level 0 + - hw/ip/reqrsp_interface/test/axi_to_reqrsp_tb.sv + - hw/ip/reqrsp_interface/test/reqrsp_demux_tb.sv + - hw/ip/reqrsp_interface/test/reqrsp_idempotent_tb.sv + - hw/ip/reqrsp_interface/test/reqrsp_mux_tb.sv + - hw/ip/reqrsp_interface/test/reqrsp_to_axi_tb.sv + + ## hw/ip/mem_interface ## + + - hw/ip/mem_interface/src/mem_wide_narrow_mux.sv + - hw/ip/mem_interface/src/mem_interface.sv + - target: simulation + files: + - hw/ip/mem_interface/src/mem_test.sv + - target: test + files: + # Level 0 + - hw/ip/mem_interface/test/mem_wide_narrow_mux_tb.sv + + ## hw/ip/tcdm_interface ## + + # Level 0 + - hw/ip/tcdm_interface/src/tcdm_interface.sv + # Level 1 + - hw/ip/tcdm_interface/src/axi_to_tcdm.sv + - hw/ip/tcdm_interface/src/reqrsp_to_tcdm.sv + - hw/ip/tcdm_interface/src/tcdm_mux.sv + - target: simulation + files: + - hw/ip/tcdm_interface/src/tcdm_test.sv + - target: test + files: + # Level 0 + - hw/ip/tcdm_interface/test/reqrsp_to_tcdm_tb.sv + - hw/ip/tcdm_interface/test/tcdm_mux_tb.sv + + ## hw/ip/snitch ## + + # Level 0: + - hw/ip/snitch/src/snitch_pma_pkg.sv + - hw/ip/snitch/src/riscv_instr.sv + # Level 1: + - hw/ip/snitch/src/snitch_pkg.sv + # Level 2: + - hw/ip/snitch/src/snitch_regfile_ff.sv + # - hw/ip/snitch/src/snitch_regfile_fpga.sv + # - hw/ip/snitch/src/snitch_regfile_latch.sv + - hw/ip/snitch/src/snitch_lsu.sv + - hw/ip/snitch/src/snitch_l0_tlb.sv + # Level 1: + - target: not(disable_pmcs) + defines: + SNITCH_ENABLE_PERF: + files: + - hw/ip/snitch/src/snitch.sv + # Disable the performance monitoring counters to save area. + - target: disable_pmcs + files: + - hw/ip/snitch/src/snitch.sv + + - target: test + files: + - hw/ip/snitch/test/snitch_l0_tlb_tb.sv + + ## hw/ip/snitch_icache ## + + # Level 0: + - hw/ip/snitch_icache/src/snitch_icache_pkg.sv + # Level 1: + - hw/ip/snitch_icache/src/snitch_icache_l0.sv + - hw/ip/snitch_icache/src/snitch_icache_refill.sv + - hw/ip/snitch_icache/src/snitch_icache_lfsr.sv + - hw/ip/snitch_icache/src/snitch_icache_lookup.sv + # Level 2: + - hw/ip/snitch_icache/src/snitch_icache_handler.sv + # Level 3: + - hw/ip/snitch_icache/src/snitch_icache.sv + - target: test + files: + - hw/ip/snitch_icache/test/snitch_icache_l0_tb.sv + + ## hw/ip/snitch_test ## + + # Level 1: + - target: any(simulation, verilator) + files: + - hw/ip/snitch_test/src/tb_memory_regbus.sv + # Level 2 + - hw/ip/snitch_test/src/tb_memory_axi.sv + # Level 3: + - target: snitch_test + files: + - hw/ip/snitch_test/src/tb_bin.sv - - hw/ip/mem_interface/src/mem_wide_narrow_mux.sv - - hw/ip/mem_interface/src/mem_interface.sv - - target: simulation - files: - - hw/ip/mem_interface/src/mem_test.sv - - target: test + ## hw/ip/spatz ## + - target: spatz files: - # Level 0 - - hw/ip/mem_interface/test/mem_wide_narrow_mux_tb.sv - - ## hw/ip/tcdm_interface ## + # Level 0 + - hw/ip/spatz/src/reorder_buffer.sv + - hw/ip/spatz/src/rvv_pkg.sv + # Level 1 + - hw/ip/spatz/src/generated/spatz_pkg.sv + + - hw/ip/spatz/src/spatz_serdiv.sv + # Level 2 + - hw/ip/spatz/src/spatz_decoder.sv + - hw/ip/spatz/src/spatz_simd_lane.sv + - hw/ip/spatz/src/vregfile.sv + # Level 3 + - hw/ip/spatz/src/spatz_fpu_sequencer.sv + - hw/ip/spatz/src/spatz_ipu.sv + - hw/ip/spatz/src/spatz_vfu.sv + - hw/ip/spatz/src/spatz_vlsu.sv + - hw/ip/spatz/src/spatz_vrf.sv + - hw/ip/spatz/src/spatz_vsldu.sv + # Level 4 + - hw/ip/spatz/src/spatz_controller.sv + # Level 5 + - hw/ip/spatz/src/spatz.sv + # Level 6 + - target: mempool + files: + - hw/ip/spatz_cc/src/spatz_mempool_cc.sv - # Level 0 - - hw/ip/tcdm_interface/src/tcdm_interface.sv - # Level 1 - - hw/ip/tcdm_interface/src/axi_to_tcdm.sv - - hw/ip/tcdm_interface/src/reqrsp_to_tcdm.sv - - hw/ip/tcdm_interface/src/tcdm_mux.sv - - target: simulation - files: - - hw/ip/tcdm_interface/src/tcdm_test.sv - - target: test - files: + ## hw/ip/spatz_cc ## + - target: not(mempool) + files: # Level 0 - - hw/ip/tcdm_interface/test/reqrsp_to_tcdm_tb.sv - - hw/ip/tcdm_interface/test/tcdm_mux_tb.sv - - ## hw/ip/snitch ## - - # Level 0: - - hw/ip/snitch/src/snitch_pma_pkg.sv - - hw/ip/snitch/src/riscv_instr.sv - # Level 1: - - hw/ip/snitch/src/snitch_pkg.sv - # Level 2: - - hw/ip/snitch/src/snitch_regfile_ff.sv - # - hw/ip/snitch/src/snitch_regfile_fpga.sv - # - hw/ip/snitch/src/snitch_regfile_latch.sv - - hw/ip/snitch/src/snitch_lsu.sv - - hw/ip/snitch/src/snitch_l0_tlb.sv - # Level 1: - - target: not(disable_pmcs) - defines: - SNITCH_ENABLE_PERF: - files: - - hw/ip/snitch/src/snitch.sv - # Disable the performance monitoring counters to save area. - - target: disable_pmcs - files: - - hw/ip/snitch/src/snitch.sv - - - target: test - files: - - hw/ip/snitch/test/snitch_l0_tlb_tb.sv - - ## hw/ip/snitch_icache ## - - # Level 0: - - hw/ip/snitch_icache/src/snitch_icache_pkg.sv - # Level 1: - - hw/ip/snitch_icache/src/snitch_icache_l0.sv - - hw/ip/snitch_icache/src/snitch_icache_refill.sv - - hw/ip/snitch_icache/src/snitch_icache_lfsr.sv - - hw/ip/snitch_icache/src/snitch_icache_lookup.sv - # Level 2: - - hw/ip/snitch_icache/src/snitch_icache_handler.sv - # Level 3: - - hw/ip/snitch_icache/src/snitch_icache.sv - - target: test - files: - - hw/ip/snitch_icache/test/snitch_icache_l0_tb.sv - - ## hw/ip/snitch_test ## - - # Level 1: - - target: any(simulation, verilator) - files: - - hw/ip/snitch_test/src/tb_memory_regbus.sv + - hw/ip/spatz_cc/src/axi_dma_pkg.sv + # Level 1 + - hw/ip/spatz_cc/src/axi_dma_perf_counters.sv + - hw/ip/spatz_cc/src/axi_dma_twod_ext.sv # Level 2 - - hw/ip/snitch_test/src/tb_memory_axi.sv - # Level 3: - - target: snitch_test - files: - - hw/ip/snitch_test/src/tb_bin.sv + - hw/ip/spatz_cc/src/axi_dma_tc_snitch_fe.sv + # Level 3 + - hw/ip/spatz_cc/src/spatz_cc.sv - ## hw/ip/spatz ## - - # Level 0 - - hw/ip/spatz/src/reorder_buffer.sv - - hw/ip/spatz/src/rvv_pkg.sv - # Level 1 - - hw/ip/spatz/src/generated/spatz_pkg.sv - - hw/ip/spatz/src/spatz_serdiv.sv - # Level 2 - - hw/ip/spatz/src/spatz_decoder.sv - - hw/ip/spatz/src/spatz_simd_lane.sv - - hw/ip/spatz/src/vregfile.sv - # Level 3 - - hw/ip/spatz/src/spatz_fpu_sequencer.sv - - hw/ip/spatz/src/spatz_ipu.sv - - hw/ip/spatz/src/spatz_vfu.sv - - hw/ip/spatz/src/spatz_vlsu.sv - - hw/ip/spatz/src/spatz_vrf.sv - - hw/ip/spatz/src/spatz_vsldu.sv - # Level 4 - - hw/ip/spatz/src/spatz_controller.sv - # Level 5 - - hw/ip/spatz/src/spatz.sv - - ## hw/ip/spatz_cc ## + ## system/spatz_cluster ## - # Level 0 - - hw/ip/spatz_cc/src/axi_dma_pkg.sv - # Level 1 - - hw/ip/spatz_cc/src/axi_dma_perf_counters.sv - - hw/ip/spatz_cc/src/axi_dma_twod_ext.sv - # Level 2 - - hw/ip/spatz_cc/src/axi_dma_tc_snitch_fe.sv - # Level 3 - - hw/ip/spatz_cc/src/spatz_cc.sv - - ## system/spatz_cluster ## - - # Level 0 - - hw/system/spatz_cluster/src/generated/bootrom.sv - - hw/system/spatz_cluster/src/spatz_amo_shim.sv - - hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_pkg.sv - - hw/system/spatz_cluster/src/spatz_tcdm_interconnect.sv - # Level 1 - - hw/system/spatz_cluster/src/spatz_barrier.sv - - hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_top.sv - # Level 2 - - hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral.sv - # Level 3 - - hw/system/spatz_cluster/src/spatz_cluster.sv - # Level 4 - - hw/system/spatz_cluster/src/generated/spatz_cluster_wrapper.sv - - - target: spatz_test - files: + # Level 0 + - hw/system/spatz_cluster/src/generated/bootrom.sv + - hw/system/spatz_cluster/src/spatz_amo_shim.sv + - hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_pkg.sv + - hw/system/spatz_cluster/src/spatz_tcdm_interconnect.sv # Level 1 - - hw/system/spatz_cluster/tb/testharness.sv + - hw/system/spatz_cluster/src/spatz_barrier.sv + - hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_top.sv + # Level 2 + - hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral.sv + # Level 3 + - hw/system/spatz_cluster/src/spatz_cluster.sv + # Level 4 + - hw/system/spatz_cluster/src/generated/spatz_cluster_wrapper.sv + + - target: spatz_test + files: + # Level 1 + - hw/system/spatz_cluster/tb/testharness.sv diff --git a/hw/ip/spatz/src/generated/spatz_pkg.sv b/hw/ip/spatz/src/generated/spatz_pkg.sv index b54a65e6..f2191eaa 100644 --- a/hw/ip/spatz/src/generated/spatz_pkg.sv +++ b/hw/ip/spatz/src/generated/spatz_pkg.sv @@ -339,7 +339,9 @@ package spatz_pkg; Width : ELEN, EnableVectors: 1'b1, EnableNanBox : 1'b1, + // FP32 FP64 FP16 FP8 FP16a FP8a FpFmtMask : {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1}, + // INT8 INT16 INT32 INT64 IntFmtMask : {1'b1, 1'b1, 1'b1, 1'b1} } : // Single Precision FPU @@ -347,8 +349,10 @@ package spatz_pkg; Width : ELEN, EnableVectors: 1'b1, EnableNanBox : 1'b1, - FpFmtMask : {RVF, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}, - IntFmtMask : {1'b0, 1'b1, 1'b1, 1'b0} + // FP32 FP64 FP16 FP8 FP16a FP8a + FpFmtMask : {RVF, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0}, + // INT8 INT16 INT32 INT64 + IntFmtMask : {1'b1, 1'b1, 1'b1, 1'b0} }; // FP format conversion diff --git a/hw/ip/spatz/src/spatz.sv b/hw/ip/spatz/src/spatz.sv index 72ecb4c3..e4c95a3a 100644 --- a/hw/ip/spatz/src/spatz.sv +++ b/hw/ip/spatz/src/spatz.sv @@ -53,6 +53,12 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( output logic [1:0] spatz_mem_finished_o, output logic [1:0] spatz_mem_str_finished_o, // FPU memory interface interface +`ifdef MEMPOOL_SPATZ + output logic fp_lsu_mem_req_valid_o, + input logic fp_lsu_mem_req_ready_i, + input logic fp_lsu_mem_rsp_valid_i, + output logic fp_lsu_mem_rsp_ready_o, +`endif output dreq_t fp_lsu_mem_req_o, input drsp_t fp_lsu_mem_rsp_i, // FPU side channel @@ -116,6 +122,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( assign issue_req = issue_req_i; assign issue_valid = issue_valid_i; assign issue_ready_o = issue_ready; + assign issue_rsp_o = issue_rsp; assign rsp_o = resp; assign rsp_valid_o = resp_valid; @@ -123,6 +130,10 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( // Tie the memory interface to zero assign fp_lsu_mem_req_o = '0; +`ifdef MEMPOOL_SPATZ + assign fp_lsu_mem_req_valid_o = 1'b0; + assign fp_lsu_mem_rsp_ready_o = 1'b0; +`endif assign fp_lsu_mem_finished = 1'b0; assign fp_lsu_mem_str_finished = 1'b0; end: gen_no_fpu_sequencer else begin: gen_fpu_sequencer @@ -134,32 +145,38 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( .spatz_rsp_t (spatz_rsp_t ), .NumOutstandingLoads(NumOutstandingLoads ) ) i_fpu_sequencer ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), // Snitch interface - .issue_req_i (issue_req_i ), - .issue_valid_i (issue_valid_i ), - .issue_ready_o (issue_ready_o ), - .issue_rsp_o (issue_rsp_o ), - .resp_o (rsp_o ), - .resp_valid_o (rsp_valid_o ), - .resp_ready_i (rsp_ready_i ), + .issue_req_i ( issue_req_i ), + .issue_valid_i ( issue_valid_i ), + .issue_ready_o ( issue_ready_o ), + .issue_rsp_o ( issue_rsp_o ), + .resp_o ( rsp_o ), + .resp_valid_o ( rsp_valid_o ), + .resp_ready_i ( rsp_ready_i ), // Spatz interface - .issue_req_o (issue_req ), - .issue_valid_o (issue_valid ), - .issue_ready_i (issue_ready ), - .issue_rsp_i (issue_rsp ), - .resp_i (resp ), - .resp_valid_i (resp_valid ), - .resp_ready_o (resp_ready ), + .issue_req_o ( issue_req ), + .issue_valid_o ( issue_valid ), + .issue_ready_i ( issue_ready ), + .issue_rsp_i ( issue_rsp ), + .resp_i ( resp ), + .resp_valid_i ( resp_valid ), + .resp_ready_o ( resp_ready ), // Memory interface - .fp_lsu_mem_req_o (fp_lsu_mem_req_o ), - .fp_lsu_mem_rsp_i (fp_lsu_mem_rsp_i ), - .fp_lsu_mem_finished_o (fp_lsu_mem_finished ), - .fp_lsu_mem_str_finished_o(fp_lsu_mem_str_finished), +`ifdef MEMPOOL_SPATZ + .fp_lsu_mem_req_valid_o ( fp_lsu_mem_req_valid_o ), + .fp_lsu_mem_req_ready_i ( fp_lsu_mem_req_ready_i ), + .fp_lsu_mem_rsp_valid_i ( fp_lsu_mem_rsp_valid_i ), + .fp_lsu_mem_rsp_ready_o ( fp_lsu_mem_rsp_ready_o ), +`endif + .fp_lsu_mem_req_o ( fp_lsu_mem_req_o ), + .fp_lsu_mem_rsp_i ( fp_lsu_mem_rsp_i ), + .fp_lsu_mem_finished_o ( fp_lsu_mem_finished ), + .fp_lsu_mem_str_finished_o( fp_lsu_mem_str_finished), // Spatz VLSU side channel - .spatz_mem_finished_i (spatz_mem_finished ), - .spatz_mem_str_finished_i (spatz_mem_str_finished ) + .spatz_mem_finished_i ( spatz_mem_finished ), + .spatz_mem_str_finished_i ( spatz_mem_str_finished ) ); end: gen_fpu_sequencer @@ -211,7 +228,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( spatz_controller #( .NrVregfilePorts (NrReadPorts+NrWritePorts), .NrWritePorts (NrWritePorts ), - .RegisterRsp (RegisterRsp ), + .RegisterRsp (RegisterRsp ), .spatz_issue_req_t(spatz_issue_req_t ), .spatz_issue_rsp_t(spatz_issue_rsp_t ), .spatz_rsp_t (spatz_rsp_t ) @@ -237,7 +254,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( .vfu_rsp_valid_i (vfu_rsp_valid ), .vfu_rsp_ready_o (vfu_rsp_ready ), .vfu_rsp_i (vfu_rsp ), - // VFU + // VLSU .vlsu_req_ready_i (vlsu_req_ready ), .vlsu_rsp_valid_i (vlsu_rsp_valid ), .vlsu_rsp_i (vlsu_rsp ), diff --git a/hw/ip/spatz/src/spatz_controller.sv b/hw/ip/spatz/src/spatz_controller.sv index 49ee3013..26971dbb 100644 --- a/hw/ip/spatz/src/spatz_controller.sv +++ b/hw/ip/spatz/src/spatz_controller.sv @@ -174,7 +174,7 @@ module spatz_controller // Decode new instruction if new request arrives always_comb begin : proc_decode - decoder_req = '{default: '0}; + decoder_req = '0; decoder_req_valid = 1'b0; // Decode new instruction if one is received and spatz is ready @@ -466,7 +466,7 @@ module spatz_controller end: proc_next_insn_id // Respond to core about the decoded instruction. - always_comb begin : x_issue_resp + always_comb begin : acc_issue_resp issue_rsp_o = '0; // Is there something running on Spatz? If so, prevent Snitch from reading the fcsr register @@ -506,7 +506,7 @@ module spatz_controller // Do not accept it issue_rsp_o.accept = 1'b0; end - end // x_issue_resp + end // acc_issue_resp ////////////// // Retiring // @@ -554,6 +554,9 @@ module spatz_controller vfu_rsp_ready = 1'b0; if (retire_csr) begin +`ifdef MEMPOOL_SPATZ + rsp_d.write = 1'b1; +`endif // Read CSR and write back to cpu if (spatz_req.op == VCSR) begin if (spatz_req.use_rd) begin @@ -579,6 +582,9 @@ module spatz_controller end else if (vfu_rsp_valid) begin rsp_d.id = vfu_rsp.rd; rsp_d.data = vfu_rsp.result; +`ifdef MEMPOOL_SPATZ + rsp_d.write = 1'b1; +`endif rsp_valid_d = 1'b1; vfu_rsp_ready = 1'b1; end diff --git a/hw/ip/spatz/src/spatz_decoder.sv b/hw/ip/spatz/src/spatz_decoder.sv index 59f0bb17..28011d4a 100644 --- a/hw/ip/spatz/src/spatz_decoder.sv +++ b/hw/ip/spatz/src/spatz_decoder.sv @@ -1181,6 +1181,8 @@ module spatz_decoder spatz_req.vs2 = arith_s2; spatz_req.use_vs2 = 1'b1; spatz_req.op_arith.is_scalar = 1'b1; + // Keep default value (EW_8) if max element length is not 32 bit + spatz_req.vtype.vsew = (ELEN == 32) ? EW_32 : EW_8; end end diff --git a/hw/ip/spatz/src/spatz_fpu_sequencer.sv b/hw/ip/spatz/src/spatz_fpu_sequencer.sv index 4cf812b2..4fcecf7c 100644 --- a/hw/ip/spatz/src/spatz_fpu_sequencer.sv +++ b/hw/ip/spatz/src/spatz_fpu_sequencer.sv @@ -11,7 +11,8 @@ module spatz_fpu_sequencer import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; - import reqrsp_pkg::*; #( + import reqrsp_pkg::*; + import cf_math_pkg::idx_width; #( // Memory request parameter type dreq_t = logic, parameter type drsp_t = logic, @@ -22,7 +23,8 @@ module spatz_fpu_sequencer parameter int unsigned AddrWidth = 32, parameter int unsigned DataWidth = FLEN, - parameter int unsigned NumOutstandingLoads = 1 + parameter int unsigned NumOutstandingLoads = 1, + localparam int unsigned IdWidth = cf_math_pkg::idx_width(NumOutstandingLoads) ) ( input logic clk_i, input logic rst_ni, @@ -43,6 +45,12 @@ module spatz_fpu_sequencer input logic resp_valid_i, output logic resp_ready_o, // Memory interface +`ifdef MEMPOOL_SPATZ + output logic fp_lsu_mem_req_valid_o, + input logic fp_lsu_mem_req_ready_i, + input logic fp_lsu_mem_rsp_valid_i, + output logic fp_lsu_mem_rsp_ready_o, +`endif output dreq_t fp_lsu_mem_req_o, input drsp_t fp_lsu_mem_rsp_i, output logic fp_lsu_mem_finished_o, @@ -495,7 +503,7 @@ module spatz_fpu_sequencer logic [AddrWidth-1:0] fp_lsu_qaddr; logic [DataWidth-1:0] fp_lsu_qdata; logic [1:0] fp_lsu_qsize; - reqrsp_pkg::amo_op_e fp_lsu_qamo; + reqrsp_pkg::amo_op_e fp_lsu_qamo; logic fp_lsu_qvalid; logic fp_lsu_qready; @@ -504,12 +512,22 @@ module spatz_fpu_sequencer logic fp_lsu_pvalid; logic fp_lsu_pready; + // TODO: remove hardcoding + logic [AddrWidth-1:0] mem_qaddr; + logic mem_qwrite; + logic [DataWidth-1:0] mem_qdata; + logic [StrbWidth-1:0] mem_qstrb; + logic [IdWidth-1:0] mem_qid; + logic [DataWidth-1:0] mem_pdata; + logic mem_perror; + logic [IdWidth-1:0] mem_pid; + snitch_lsu #( - .DataWidth (FLEN ), .NaNBox (1 ), - .NumOutstandingLoads(NumOutstandingLoads), .dreq_t (dreq_t ), - .drsp_t (drsp_t ) + .drsp_t (drsp_t ), + .DataWidth (FLEN ), + .NumOutstandingLoads(NumOutstandingLoads) ) i_fp_lsu ( .clk_i (clk_i ), .rst_i (~rst_ni ), @@ -523,7 +541,6 @@ module spatz_fpu_sequencer .lsu_qamo_i (fp_lsu_qamo ), .lsu_qvalid_i (fp_lsu_qvalid ), .lsu_qready_o (fp_lsu_qready ), - .lsu_empty_o (/* Unused */ ), // Response interface .lsu_pdata_o (fp_lsu_pdata ), .lsu_ptag_o (fp_lsu_ptag ), @@ -531,8 +548,24 @@ module spatz_fpu_sequencer .lsu_pvalid_o (fp_lsu_pvalid ), .lsu_pready_i (fp_lsu_pready ), // Memory interface +`ifdef MEMPOOL_SPATZ + .data_qaddr_o (mem_qaddr ), + .data_qwrite_o(mem_qwrite ), + .data_qamo_o (/* Unused */ ), + .data_qdata_o (mem_qdata ), + .data_qstrb_o (mem_qstrb ), + .data_qid_o (mem_qid ), + .data_qvalid_o(fp_lsu_mem_req_valid_o ), + .data_qready_i(fp_lsu_mem_req_ready_i ), + .data_pdata_i (mem_pdata ), + .data_perror_i(mem_perror ), + .data_pid_i (mem_pid ), + .data_pvalid_i(fp_lsu_mem_rsp_valid_i ), + .data_pready_o(fp_lsu_mem_rsp_ready_o ) +`else .data_req_o (fp_lsu_mem_req_o), .data_rsp_i (fp_lsu_mem_rsp_i) +`endif ); // Number of memory operations in the accelerator @@ -567,7 +600,12 @@ module spatz_fpu_sequencer fp_lsu_qtag = fd; fp_lsu_qwrite = is_store; fp_lsu_qsigned = 1'b0; + // lsu in mempool-snitch will write to argb +`ifdef MEMPOOL_SPATZ + fp_lsu_qaddr = issue_req_i.data_argb; +`else fp_lsu_qaddr = issue_req_i.data_argc; +`endif fp_lsu_qdata = fpr_rdata[1]; fp_lsu_qsize = ls_size; fp_lsu_qamo = AMONone; @@ -579,6 +617,24 @@ module spatz_fpu_sequencer // Is the LSU stalling? lsu_stall = fp_lsu_qvalid && !fp_lsu_qready; +`ifdef MEMPOOL_SPATZ + // Assign TCDM data interface + fp_lsu_mem_req_o = '{ + id : mem_qid, + addr : mem_qaddr, + size : '1, + write : mem_qwrite, + strb : mem_qstrb, + data : mem_qdata, + last : 1'b1, + default: '0 + }; + + mem_pdata = fp_lsu_mem_rsp_i.data; + mem_perror = '0; + mem_pid = fp_lsu_mem_rsp_i.id; +`endif + if ((is_vector_load || is_vector_store) && issue_ready_i && issue_valid_o) acc_mem_cnt_d += 1; if (spatz_mem_finished_i) diff --git a/hw/ip/spatz/src/spatz_pkg.sv.tpl b/hw/ip/spatz/src/spatz_pkg.sv.tpl index efd41d6b..7d565323 100644 --- a/hw/ip/spatz/src/spatz_pkg.sv.tpl +++ b/hw/ip/spatz/src/spatz_pkg.sv.tpl @@ -12,15 +12,27 @@ package spatz_pkg; // Parameters // ////////////////// +% if cfg['mempool']: + // Number of IPUs in each VFU (between 1 and 8) + localparam int unsigned N_IPU = `ifdef N_IPU `N_IPU `else 4 `endif; + // Number of FPUs in each VFU (between 1 and 8) + localparam int unsigned N_FPU = `ifdef N_FPU `N_FPU `else 4 `endif; +% else : // Number of IPUs in each VFU (between 1 and 8) localparam int unsigned N_IPU = ${cfg['n_ipu']}; // Number of FPUs in each VFU (between 1 and 8) localparam int unsigned N_FPU = ${cfg['n_fpu']}; +% endif // Number of FUs in each VFU localparam int unsigned N_FU = N_IPU > N_FPU ? N_IPU : N_FPU; // FPU support localparam bit FPU = N_FPU != 0; -% if cfg['spatz_fpu']: +% if cfg['mempool']: + // Single-precision floating point support + localparam bit RVF = `ifdef RVF `RVF `else 0 `endif; + // Double-precision floating-point support + localparam bit RVD = `ifdef RVD `RVD `else 0 `endif; +% elif cfg['spatz_fpu']: // Single-precision floating point support localparam bit RVF = 1; // Double-precision floating-point support @@ -39,7 +51,11 @@ package spatz_pkg; // Maximum size of a single vector element in bytes localparam int unsigned ELENB = ELEN / 8; // Number of bits in a vector register +% if cfg['mempool']: + localparam int unsigned VLEN = `ifdef VLEN `VLEN `else 256 `endif; +% else : localparam int unsigned VLEN = ${cfg['vlen']}; +%endif // Number of bytes in a vector register localparam int unsigned VLENB = VLEN / 8; // Maximum vector length in elements @@ -291,6 +307,27 @@ package spatz_pkg; logic exc; } vlsu_rsp_t; +% if cfg['mempool']: + typedef struct packed { + logic [$clog2(NRVREG):0] id; + logic [31:0] addr; + logic [1:0] mode; + logic [1:0] size; + logic write; + logic [DataWidth/8-1:0] strb; + logic [DataWidth-1:0] data; + logic last; + logic spec; + } spatz_mem_req_t; + + typedef struct packed { + logic [$clog2(NRVREG)-1:0] id; + logic [DataWidth-1:0] data; + logic err; + logic write; + } spatz_mem_rsp_t; + +%endif //////////////////// // VSLDU Response // //////////////////// @@ -346,7 +383,9 @@ package spatz_pkg; Width : ELEN, EnableVectors: 1'b1, EnableNanBox : 1'b1, + // FP32 FP64 FP16 FP8 FP16a FP8a FpFmtMask : {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1}, + // INT8 INT16 INT32 INT64 IntFmtMask : {1'b1, 1'b1, 1'b1, 1'b1} } : // Single Precision FPU @@ -354,10 +393,34 @@ package spatz_pkg; Width : ELEN, EnableVectors: 1'b1, EnableNanBox : 1'b1, - FpFmtMask : {RVF, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}, - IntFmtMask : {1'b0, 1'b1, 1'b1, 1'b0} + // FP32 FP64 FP16 FP8 FP16a FP8a + FpFmtMask : {RVF, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0}, + // INT8 INT16 INT32 INT64 + IntFmtMask : {1'b1, 1'b1, 1'b1, 1'b0} }; +% if cfg['mempool']: + localparam fpnew_pkg::fpu_implementation_t MemPoolFPUImpl = + '{ + // Pipeline stages + // FP32 FP64 FP16 FP8 FP16a FP8a + PipeRegs: '{'{ 1, 2, 1, 1, 0, 0}, // ADDMUL + '{ 1, 1, 1, 1, 1, 1}, // DIVSQRT + '{ 1, 1, 1, 1, 1, 1}, // NONCOMP + '{ 2, 2, 2, 2, 2, 2}, // CONV + '{ 2, 2, 2, 2, 2, 2}}, // DOTP + // MERGED: share one functional unit for all types + // PARALLEL: multiple functional units + // DISABLED: turn off + UnitTypes:'{'{ default: fpnew_pkg::MERGED}, // ADDMUL + '{ default: fpnew_pkg::DISABLED}, // DIVSQRT + '{ default: fpnew_pkg::PARALLEL}, // NONCOMP + '{ default: fpnew_pkg::MERGED}, // CONV + '{ default: fpnew_pkg::MERGED}}, // DOTP + PipeConfig: fpnew_pkg::BEFORE + }; + +% endif // FP format conversion typedef struct packed { logic [63:63] sign; diff --git a/hw/ip/spatz/src/spatz_vfu.sv b/hw/ip/spatz/src/spatz_vfu.sv index 342fea10..327126d2 100644 --- a/hw/ip/spatz/src/spatz_vfu.sv +++ b/hw/ip/spatz/src/spatz_vfu.sv @@ -409,8 +409,14 @@ module spatz_vfu reduction_d[1] = $unsigned(vrf_rdata_i[1][32*reduction_pointer_q[idx_width(N_FU*ELENB)-3:0] +: 32]); end default: begin - if (MAXEW == EW_64) reduction_d[0] = $unsigned(vrf_rdata_i[0][63:0]); - if (MAXEW == EW_64) reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); + `ifdef MEMPOOL_SPATZ + reduction_d = '0; + `else + if (MAXEW == EW_64) begin + reduction_d[0] = $unsigned(vrf_rdata_i[0][63:0]); + reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); + end + `endif end endcase // verilator lint_on SELRANGE @@ -446,8 +452,14 @@ module spatz_vfu reduction_d[1] = $unsigned(vrf_rdata_i[1][32*reduction_pointer_q[idx_width(N_FU*ELENB)-3:0] +: 32]); end default: begin - if (MAXEW == EW_64) reduction_d[0] = $unsigned(result[63:0]); - if (MAXEW == EW_64) reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); + `ifdef MEMPOOL_SPATZ + reduction_d = '0; + `else + if (MAXEW == EW_64) begin + reduction_d[0] = $unsigned(result[63:0]); + reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); + end + `endif end endcase // verilator lint_on SELRANGE diff --git a/hw/ip/spatz/src/spatz_vlsu.sv b/hw/ip/spatz/src/spatz_vlsu.sv index 9fff0894..6daf94c5 100644 --- a/hw/ip/spatz/src/spatz_vlsu.sv +++ b/hw/ip/spatz/src/spatz_vlsu.sv @@ -151,8 +151,13 @@ module spatz_vlsu store_count_d[port]++; // Did we get the ack of a store? + `ifdef MEMPOOL_SPATZ + if (store_count_q[port] != '0 && spatz_mem_rsp_valid_i[port] && spatz_mem_rsp_i[port].write) + store_count_d[port]--; + `else if (store_count_q[port] != '0 && spatz_mem_rsp_valid_i[port]) store_count_d[port]--; + `endif end end: proc_store_count @@ -163,16 +168,40 @@ module spatz_vlsu typedef logic [int'(MAXEW)-1:0] addr_offset_t; elen_t [NrMemPorts-1:0] rob_wdata; + id_t [NrMemPorts-1:0] rob_wid; logic [NrMemPorts-1:0] rob_push; logic [NrMemPorts-1:0] rob_rvalid; elen_t [NrMemPorts-1:0] rob_rdata; logic [NrMemPorts-1:0] rob_pop; + id_t [NrMemPorts-1:0] rob_rid; + logic [NrMemPorts-1:0] rob_req_id; + id_t [NrMemPorts-1:0] rob_id; logic [NrMemPorts-1:0] rob_full; logic [NrMemPorts-1:0] rob_empty; // The reorder buffer decouples the memory side from the register file side. // All elements from one side to the other go through it. for (genvar port = 0; port < NrMemPorts; port++) begin : gen_rob +`ifdef MEMPOOL_SPATZ + reorder_buffer #( + .DataWidth(ELEN ), + .NumWords (NrOutstandingLoads) + ) i_reorder_buffer ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .data_i (rob_wdata[port] ), + .id_i (rob_wid[port] ), + .push_i (rob_push[port] ), + .data_o (rob_rdata[port] ), + .valid_o (rob_rvalid[port]), + .id_read_o(rob_rid[port] ), + .pop_i (rob_pop[port] ), + .id_req_i (rob_req_id[port]), + .id_o (rob_id[port] ), + .full_o (rob_full[port] ), + .empty_o (rob_empty[port] ) + ); +`else fifo_v3 #( .DATA_WIDTH(ELEN ), .DEPTH (NrOutstandingLoads) @@ -189,8 +218,8 @@ module spatz_vlsu .empty_o (rob_empty[port] ), .usage_o (/* Unused */ ) ); - assign rob_rvalid[port] = !rob_empty[port]; +`endif end: gen_rob ////////////////////// @@ -698,10 +727,12 @@ module spatz_vlsu ////////////////////////// // Memory request signals + id_t [NrMemPorts-1:0] mem_req_id; logic [NrMemPorts-1:0][MemDataWidth-1:0] mem_req_data; logic [NrMemPorts-1:0] mem_req_svalid; logic [NrMemPorts-1:0][ELEN/8-1:0] mem_req_strb; logic [NrMemPorts-1:0] mem_req_lvalid; + logic [NrMemPorts-1:0] mem_req_last; // Number of pending requests logic [NrMemPorts-1:0][idx_width(NrOutstandingLoads):0] mem_pending_d, mem_pending_q; @@ -732,13 +763,17 @@ module spatz_vlsu vrf_req_valid_d = 1'b0; rob_wdata = '0; + rob_wid = '0; rob_push = '0; rob_pop = '0; + rob_req_id = '0; + mem_req_id = '0; mem_req_data = '0; mem_req_strb = '0; mem_req_svalid = '0; mem_req_lvalid = '0; + mem_req_last = '0; // Propagate request ID vrf_req_d.rsp.id = commit_insn_q.id; @@ -830,10 +865,19 @@ module spatz_vlsu for (int unsigned port = 0; port < NrMemPorts; port++) begin // Write the load result to the buffer rob_wdata[port] = spatz_mem_rsp_i[port].data; +`ifdef MEMPOOL_SPATZ + rob_wid[port] = spatz_mem_rsp_i[port].id; + // Need to consider out-of-order memory response + rob_push[port] = spatz_mem_rsp_valid_i[port] && (state_q == VLSU_RunningLoad) && spatz_mem_rsp_i[port].write == '0; +`else rob_push[port] = spatz_mem_rsp_valid_i[port] && (state_q == VLSU_RunningLoad) && store_count_q[port] == '0; - - if (!rob_full[port] && !offset_queue_full[port] && mem_operation_valid[port]) +`endif + if (!rob_full[port] && !offset_queue_full[port] && mem_operation_valid[port]) begin + rob_req_id[port] = spatz_mem_req_ready[port] & spatz_mem_req_valid[port]; mem_req_lvalid[port] = (!mem_is_indexed || (vrf_rvalid_i[1] && !pending_index[port])) && mem_spatz_req.op_mem.is_load; + mem_req_id[port] = rob_id[port]; + mem_req_last[port] = mem_operation_last[port]; + end end // Store operation end else begin @@ -842,8 +886,10 @@ module spatz_vlsu vrf_re_o[0] = 1'b1; for (int unsigned port = 0; port < NrMemPorts; port++) begin - rob_wdata[port] = vrf_rdata_i[0][ELEN*port +: ELEN]; - rob_push[port] = vrf_rvalid_i[0] && (!mem_is_indexed || vrf_rvalid_i[1]); + rob_wdata[port] = vrf_rdata_i[0][ELEN*port +: ELEN]; + rob_wid[port] = rob_id[port]; + rob_req_id[port] = vrf_rvalid_i[0] && (!mem_is_indexed || vrf_rvalid_i[1]); + rob_push[port] = rob_req_id[port]; end end @@ -894,6 +940,8 @@ module spatz_vlsu endcase mem_req_svalid[port] = rob_rvalid[port] && (!mem_is_indexed || (vrf_rvalid_i[1] && !pending_index[port])) && !mem_spatz_req.op_mem.is_load; + mem_req_id[port] = rob_rid[port]; + mem_req_last[port] = mem_operation_last[port]; rob_pop[port] = spatz_mem_req_valid[port] && spatz_mem_req_ready[port]; // Create byte enable signal for memory request @@ -934,7 +982,19 @@ module spatz_vlsu .valid_o (spatz_mem_req_valid_o[port]), .ready_i (spatz_mem_req_ready_i[port]) ); - +`ifdef MEMPOOL_SPATZ + // ID is required in Mempool-Spatz + assign spatz_mem_req[port].id = mem_req_id[port]; + assign spatz_mem_req[port].addr = mem_req_addr[port]; + assign spatz_mem_req[port].mode = '0; // Request always uses user privilege level + assign spatz_mem_req[port].size = mem_spatz_req.vtype.vsew[1:0]; + assign spatz_mem_req[port].write = !mem_is_load; + assign spatz_mem_req[port].strb = mem_req_strb[port]; + assign spatz_mem_req[port].data = mem_req_data[port]; + assign spatz_mem_req[port].last = mem_req_last[port]; + assign spatz_mem_req[port].spec = 1'b0; // Request is never speculative + assign spatz_mem_req_valid[port] = mem_req_svalid[port] || mem_req_lvalid[port]; +`else assign spatz_mem_req[port].addr = mem_req_addr[port]; assign spatz_mem_req[port].write = !mem_is_load; assign spatz_mem_req[port].amo = reqrsp_pkg::AMONone; @@ -942,6 +1002,7 @@ module spatz_vlsu assign spatz_mem_req[port].strb = mem_req_strb[port]; assign spatz_mem_req[port].user = '0; assign spatz_mem_req_valid[port] = mem_req_svalid[port] || mem_req_lvalid[port]; +`endif end //////////////// diff --git a/hw/ip/spatz_cc/src/spatz_mempool_cc.sv b/hw/ip/spatz_cc/src/spatz_mempool_cc.sv new file mode 100644 index 00000000..13fce786 --- /dev/null +++ b/hw/ip/spatz_cc/src/spatz_mempool_cc.sv @@ -0,0 +1,499 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +module spatz_mempool_cc + import snitch_pkg::meta_id_t; +#( + parameter logic [31:0] BootAddr = 32'h0000_1000, + parameter logic [31:0] MTVEC = BootAddr, + parameter bit RVE = 0, // Reduced-register extension + parameter bit RVM = 1, // Enable IntegerMmultiplication & Division Extension + parameter bit RVV = 0, // Enable Vector Extension + parameter bit XFVEC = 0, + parameter bit XFDOTP = 0, + parameter bit XFAUX = 0, + /// Enable F Extension. + parameter bit RVF = 0, + /// Enable D Extension. + parameter bit RVD = 0, + parameter bit XF16 = 0, + parameter bit XF16ALT = 0, + parameter bit XF8 = 0, + parameter bit XF8ALT = 0, + /// Enable div/sqrt unit (buggy - use with caution) + parameter bit XDivSqrt = 0, + parameter bit RegisterOffloadReq = 1, + parameter bit RegisterOffloadResp = 1, + parameter bit RegisterTCDMReq = 0, + parameter bit RegisterTCDMResp = 0, + + parameter int unsigned TCDMPorts = 1, + parameter int unsigned NumMemPortsPerSpatz = 1 +) ( + input logic clk_i, + input logic rst_i, + input logic [31:0] hart_id_i, + // Instruction Port + output logic [31:0] inst_addr_o, + input logic [31:0] inst_data_i, + output logic inst_valid_o, + input logic inst_ready_i, + // TCDM Ports + output logic [TCDMPorts-1:0][31:0] data_qaddr_o, + output logic [TCDMPorts-1:0] data_qwrite_o, + output logic [TCDMPorts-1:0][3:0] data_qamo_o, + output logic [TCDMPorts-1:0][31:0] data_qdata_o, + output logic [TCDMPorts-1:0][3:0] data_qstrb_o, + output meta_id_t [TCDMPorts-1:0] data_qid_o, + output logic [TCDMPorts-1:0] data_qvalid_o, + input logic [TCDMPorts-1:0] data_qready_i, + input logic [TCDMPorts-1:0][31:0] data_pdata_i, + input logic [TCDMPorts-1:0] data_pwrite_i, + input logic [TCDMPorts-1:0] data_perror_i, + input meta_id_t [TCDMPorts-1:0] data_pid_i, + input logic [TCDMPorts-1:0] data_pvalid_i, + output logic [TCDMPorts-1:0] data_pready_o, + + input logic wake_up_sync_i, + // Core event strobes + output snitch_pkg::core_events_t core_events_o +); + + // -------- + // Typedefs + // -------- + import spatz_pkg::*; + + // TODO Diyou: dreq_t and drsp_t are not consistent in spatz, mempool and here + + typedef struct packed { + logic accept; + logic writeback; + logic loadstore; + logic exception; + logic isfloat; + } acc_issue_rsp_t; + + typedef logic [31:0] addr_t; + typedef logic [31:0] data_t; + typedef logic [3:0] strb_t; + + localparam fpnew_pkg::fpu_implementation_t FPUImplementation = spatz_pkg::MemPoolFPUImpl; + + + // ---------------- + // Wire Definitions + // ---------------- + + // Data port signals + snitch_pkg::dreq_t data_req_d, data_req_q, snitch_req, fp_lsu_req; + snitch_pkg::dresp_t data_resp_d, data_resp_q, snitch_resp, fp_lsu_rsp; + + logic data_req_d_valid, data_req_d_ready, data_resp_d_valid, data_resp_d_ready; + logic data_req_q_valid, data_req_q_ready, data_resp_q_valid, data_resp_q_ready; + logic snitch_req_valid, snitch_req_ready, snitch_resp_valid, snitch_resp_ready; + + // Accelerator signals + // TODO Diyou: do we need to change name to acc_issue_req_t to keep the same convention as in spatz? + snitch_pkg::acc_req_t acc_req_d, acc_req_q; + snitch_pkg::acc_resp_t acc_resp_d, acc_resp_q; + + logic acc_req_d_valid, acc_req_d_ready, acc_resp_d_valid, acc_resp_d_ready; + logic acc_req_q_valid, acc_req_q_ready, acc_resp_q_valid, acc_resp_q_ready; + + + // Spatz Memory consistency signals + logic [1:0] spatz_mem_finished; + logic [1:0] spatz_mem_str_finished; + + // Spatz floating point signals + fpnew_pkg::roundmode_e fpu_rnd_mode; + fpnew_pkg::fmt_mode_t fpu_fmt_mode; + fpnew_pkg::status_t fpu_status; + acc_issue_rsp_t acc_req_rsp; + + // Spatz floating point mem signals + // reqrsp_req_t fp_lsu_mem_req; + // reqrsp_rsp_t fp_lsu_mem_rsp; + + // Spatz TCDM mem ports + spatz_mem_req_t [NumMemPortsPerSpatz-1:0] spatz_mem_req; + logic [NumMemPortsPerSpatz-1:0] spatz_mem_req_valid; + logic [NumMemPortsPerSpatz-1:0] spatz_mem_req_ready; + spatz_mem_rsp_t [NumMemPortsPerSpatz-1:0] spatz_mem_rsp; + logic [NumMemPortsPerSpatz-1:0] spatz_mem_rsp_valid; + + spatz_mem_req_t fp_lsu_mem_req; + logic fp_lsu_mem_req_ready; + logic fp_lsu_mem_req_valid; + spatz_mem_rsp_t fp_lsu_mem_rsp; + logic fp_lsu_mem_rsp_ready; + logic fp_lsu_mem_rsp_valid; + + // Snitch Integer Core + snitch #( + .BootAddr ( BootAddr ), + .MTVEC ( MTVEC ), + .RVE ( RVE ), + .RVM ( RVM ), + .RVV ( RVV ), + .XFVEC ( XFVEC ), + .XFDOTP ( XFDOTP ), + .XFAUX ( XFAUX ), + .RVF ( RVF ), + .RVD ( RVD ), + .XF16 ( XF16 ), + .XF16ALT ( XF16ALT ), + .XF8 ( XF8 ), + .XF8ALT ( XF8ALT ), + .XDivSqrt ( XDivSqrt ), + .acc_issue_rsp_t ( acc_issue_rsp_t ) + ) i_snitch ( + .clk_i ( clk_i ), + .rst_i ( rst_i ), + .hart_id_i ( hart_id_i ), + .inst_addr_o ( inst_addr_o ), + .inst_data_i ( inst_data_i ), + .inst_valid_o ( inst_valid_o ), + .inst_ready_i ( inst_ready_i ), + .acc_qaddr_o ( acc_req_d.addr ), + .acc_qid_o ( acc_req_d.id ), + .acc_qdata_op_o ( acc_req_d.data_op ), + .acc_qdata_arga_o ( acc_req_d.data_arga ), + .acc_qdata_argb_o ( acc_req_d.data_argb ), + .acc_qdata_argc_o ( acc_req_d.data_argc ), + .acc_qvalid_o ( acc_req_d_valid ), + .acc_qready_i ( acc_req_d_ready ), + .acc_pdata_i ( acc_resp_q.data ), + .acc_pid_i ( acc_resp_q.id ), + .acc_pwrite_i ( acc_resp_q.write ), + .acc_perror_i ( acc_resp_q.error ), + .acc_pvalid_i ( acc_resp_q_valid ), + .acc_pready_o ( acc_resp_q_ready ), + .acc_qdata_rsp_i ( acc_req_rsp ), + .acc_mem_finished_i ( spatz_mem_finished ), + .acc_mem_str_finished_i ( spatz_mem_str_finished ), + .data_qaddr_o ( snitch_req.addr ), + .data_qwrite_o ( snitch_req.write ), + .data_qamo_o ( snitch_req.amo ), + .data_qdata_o ( snitch_req.data ), + .data_qstrb_o ( snitch_req.strb ), + .data_qid_o ( snitch_req.id ), + .data_qvalid_o ( snitch_req_valid ), + .data_qready_i ( snitch_req_ready ), + .data_pdata_i ( snitch_resp.data ), + .data_perror_i ( snitch_resp.error ), + .data_pid_i ( snitch_resp.id ), + .data_pvalid_i ( snitch_resp_valid ), + .data_pready_o ( snitch_resp_ready ), + .wake_up_sync_i ( wake_up_sync_i ), + .fpu_fmt_mode_o ( fpu_fmt_mode ), + .fpu_rnd_mode_o ( fpu_rnd_mode ), + .fpu_status_i ( fpu_status ), + .core_events_o ( core_events_o ) + ); + + assign acc_req_q = acc_req_d; + assign acc_req_q_valid = acc_req_d_valid; + assign acc_req_d_ready = acc_req_q_ready; + + // Cut off-loading response path + spill_register #( + .T ( snitch_pkg::acc_resp_t ), + .Bypass ( !RegisterOffloadResp ) + ) i_spill_register_acc_resp ( + .clk_i , + .rst_ni ( ~rst_i ), + .valid_i ( acc_resp_d_valid ), + .ready_o ( acc_resp_d_ready ), + .data_i ( acc_resp_d ), + .valid_o ( acc_resp_q_valid ), + .ready_i ( acc_resp_q_ready ), + .data_o ( acc_resp_q ) + ); + + spatz #( + .NrMemPorts ( NumMemPortsPerSpatz ), + .NumOutstandingLoads( snitch_pkg::NumIntOutstandingLoads ), + .FPUImplementation ( FPUImplementation ), + .RegisterRsp ( 1'b1 ), + .spatz_mem_req_t ( spatz_mem_req_t ), + .spatz_mem_rsp_t ( spatz_mem_rsp_t ), + .dreq_t ( spatz_mem_req_t ), + .drsp_t ( spatz_mem_rsp_t ), + .spatz_issue_req_t ( snitch_pkg::acc_req_t ), + .spatz_issue_rsp_t ( acc_issue_rsp_t ), + .spatz_rsp_t ( snitch_pkg::acc_resp_t ) + ) i_spatz ( + .clk_i ( clk_i ), + .rst_ni ( ~rst_i ), + .testmode_i ( 1'b0 ), + .hart_id_i ( hart_id_i ), + .issue_valid_i ( acc_req_q_valid ), + .issue_ready_o ( acc_req_q_ready ), + .issue_req_i ( acc_req_q ), + .issue_rsp_o ( acc_req_rsp ), + .rsp_valid_o ( acc_resp_d_valid ), + .rsp_ready_i ( acc_resp_d_ready ), + .rsp_o ( acc_resp_d ), + .spatz_mem_req_o ( spatz_mem_req ), + .spatz_mem_req_valid_o ( spatz_mem_req_valid ), + .spatz_mem_req_ready_i ( spatz_mem_req_ready ), + .spatz_mem_rsp_i ( spatz_mem_rsp ), + .spatz_mem_rsp_valid_i ( spatz_mem_rsp_valid ),// ***notice no ready signal here*** + .spatz_mem_finished_o ( spatz_mem_finished ), + .spatz_mem_str_finished_o( spatz_mem_str_finished), + .fp_lsu_mem_req_o ( fp_lsu_mem_req ), + .fp_lsu_mem_req_valid_o ( fp_lsu_mem_req_valid ), + .fp_lsu_mem_req_ready_i ( fp_lsu_mem_req_ready ), + .fp_lsu_mem_rsp_i ( fp_lsu_mem_rsp ), + .fp_lsu_mem_rsp_valid_i ( fp_lsu_mem_rsp_valid ), + .fp_lsu_mem_rsp_ready_o ( fp_lsu_mem_rsp_ready ), + .fpu_rnd_mode_i ( fpu_rnd_mode ), + .fpu_fmt_mode_i ( fpu_fmt_mode ), + .fpu_status_o ( fpu_status ) + ); + + // TODO: Perhaps put it into a module + // Assign TCDM data interface + for (genvar i = 0; i < NumMemPortsPerSpatz; i++) begin + assign data_qaddr_o[i+1] = spatz_mem_req[i].addr; + assign data_qwrite_o[i+1] = spatz_mem_req[i].write; + assign data_qamo_o[i+1] = '0; + assign data_qdata_o[i+1] = spatz_mem_req[i].data; + assign data_qstrb_o[i+1] = spatz_mem_req[i].strb; + assign data_qid_o[i+1] = spatz_mem_req[i].id; + assign data_qvalid_o[i+1] = spatz_mem_req_valid[i]; + assign spatz_mem_req_ready[i] = data_qready_i[i+1]; + assign spatz_mem_rsp[i].data = data_pdata_i[i+1]; + assign spatz_mem_rsp[i].write = data_pwrite_i[i+1]; + assign spatz_mem_rsp[i].id = data_pid_i[i+1]; + assign spatz_mem_rsp[i].err = data_perror_i[i+1]; + assign spatz_mem_rsp_valid[i] = data_pvalid_i[i+1]; + // *** no ready signal for spatz here, tie to 1 *** + assign data_pready_o[i+1] = '1; + end + + assign fp_lsu_req = '{ + addr : fp_lsu_mem_req.addr, + id : fp_lsu_mem_req.id, + write : fp_lsu_mem_req.write, + data : fp_lsu_mem_req.data, + strb : fp_lsu_mem_req.strb, + default: '0 + }; + + assign fp_lsu_mem_rsp = '{ + id : fp_lsu_rsp.id, + data: fp_lsu_rsp.data, + err : fp_lsu_rsp.error, + write: fp_lsu_rsp.write + }; + + if (RVF || RVD) begin: gen_id_remapper + // Merge Snitch and FP Subsequencer memory interfaces + tcdm_id_remapper #( + .NumIn(2) + ) i_id_remapper ( + .clk_i (clk_i ), + .rst_ni (~rst_i ), + .req_i ({fp_lsu_req, snitch_req} ), + .req_valid_i ({fp_lsu_mem_req_valid, snitch_req_valid} ), + .req_ready_o ({fp_lsu_mem_req_ready, snitch_req_ready} ), + .resp_o ({fp_lsu_rsp, snitch_resp} ), + .resp_valid_o({fp_lsu_mem_rsp_valid, snitch_resp_valid} ), + .resp_ready_i({fp_lsu_mem_rsp_ready, snitch_resp_ready} ), + .req_o (data_req_d ), + .req_valid_o (data_req_d_valid ), + .req_ready_i (data_req_d_ready ), + .resp_i (data_resp_q ), + .resp_valid_i(data_resp_q_valid ), + .resp_ready_o(data_resp_q_ready ) + ); + end: gen_id_remapper else begin: gen_id_remapper_bypass + // Bypass the remapper + assign data_req_d = snitch_req; + assign data_req_d_valid = snitch_req_valid; + assign snitch_req_ready = data_req_d_ready; + + assign snitch_resp = data_resp_q; + assign snitch_resp_valid = data_resp_q_valid; + assign data_resp_q_ready = snitch_resp_ready; + + assign fp_lsu_rsp = '0; + assign fp_lsu_mem_rsp_valid = 1'b0; + assign fp_lsu_mem_req_ready = 1'b0; + end: gen_id_remapper_bypass + + + // Cut TCDM data request path + spill_register #( + .T ( snitch_pkg::dreq_t ), + .Bypass ( !RegisterTCDMReq ) + ) i_spill_register_tcdm_req ( + .clk_i , + .rst_ni ( ~rst_i ), + .valid_i ( data_req_d_valid ), + .ready_o ( data_req_d_ready ), + .data_i ( data_req_d ), + .valid_o ( data_req_q_valid ), + .ready_i ( data_req_q_ready ), + .data_o ( data_req_q ) + ); + + // Cut TCDM data response path + spill_register #( + .T ( snitch_pkg::dresp_t ), + .Bypass ( !RegisterTCDMResp ) + ) i_spill_register_tcdm_resp ( + .clk_i , + .rst_ni ( ~rst_i ), + .valid_i ( data_resp_d_valid ), + .ready_o ( data_resp_d_ready ), + .data_i ( data_resp_d ), + .valid_o ( data_resp_q_valid ), + .ready_i ( data_resp_q_ready ), + .data_o ( data_resp_q ) + ); + + // Assign TCDM data interface + assign data_qaddr_o[0] = data_req_q.addr; + assign data_qwrite_o[0] = data_req_q.write; + assign data_qamo_o[0] = data_req_q.amo; + assign data_qdata_o[0] = data_req_q.data; + assign data_qstrb_o[0] = data_req_q.strb; + assign data_qid_o[0] = data_req_q.id; + assign data_qvalid_o[0] = data_req_q_valid; + assign data_req_q_ready = data_qready_i[0]; + assign data_resp_d.data = data_pdata_i[0]; + assign data_resp_d.id = data_pid_i[0]; + assign data_resp_d.write = '0; // Don't care here + assign data_resp_d.error = data_perror_i[0]; + assign data_resp_d_valid = data_pvalid_i[0]; + assign data_pready_o[0] = data_resp_d_ready; + + // -------------------------- + // Tracer + // -------------------------- + // pragma translate_off + int f; + string fn; + logic [63:0] cycle; + int unsigned stall, stall_ins, stall_raw, stall_lsu, stall_acc; + + always_ff @(posedge rst_i) begin + if(rst_i) begin + // Format in hex because vcs and vsim treat decimal differently + // Format with 8 digits because Verilator does not support anything else + $sformat(fn, "trace_hart_0x%08x.dasm", hart_id_i); + f = $fopen(fn, "w"); + $display("[Tracer] Logging Hart %d to %s", hart_id_i, fn); + end + end + + typedef enum logic [1:0] {SrcSnitch = 0, SrcFpu = 1, SrcFpuSeq = 2} trace_src_e; + localparam int SnitchTrace = `ifdef SNITCH_TRACE `SNITCH_TRACE `else 0 `endif; + + always_ff @(posedge clk_i or posedge rst_i) begin + automatic string trace_entry; + automatic string extras_str; + + if (!rst_i) begin + cycle <= cycle + 1; + // Trace snitch iff: + // Tracing enabled by CSR register + // we are not stalled <==> we have issued and processed an instruction (including offloads) + // OR we are retiring (issuing a writeback from) a load or accelerator instruction + if ((i_snitch.csr_trace_q || SnitchTrace) && (!i_snitch.stall || i_snitch.retire_load || i_snitch.retire_acc)) begin + // Manual loop unrolling for Verilator + // Data type keys for arrays are currently not supported in Verilator + extras_str = "{"; + // State + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "source", SrcSnitch); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "stall", i_snitch.stall); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_tot", stall); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_ins", stall_ins); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_raw", stall_raw); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_lsu", stall_lsu); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "stall_acc", stall_acc); + // Decoding + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "rs1", i_snitch.rs1); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "rs2", i_snitch.rs2); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "rd", i_snitch.rd); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "is_load", i_snitch.is_load); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "is_store", i_snitch.is_store); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "is_branch", i_snitch.is_branch); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "pc_d", i_snitch.pc_d); + // Operands + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "opa", i_snitch.opa); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "opb", i_snitch.opb); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "opa_select", i_snitch.opa_select); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "opb_select", i_snitch.opb_select); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "opc_select", i_snitch.opc_select); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "write_rd", i_snitch.write_rd); + extras_str = $sformatf("%s'%s': 0x%3x, ", extras_str, "csr_addr", i_snitch.inst_data_i[31:20]); + // Pipeline writeback + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "writeback", i_snitch.alu_writeback); + // Load/Store + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "gpr_rdata_1", i_snitch.gpr_rdata[1]); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "ls_size", i_snitch.ls_size); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "ld_result_32",i_snitch.ld_result[31:0]); + extras_str = $sformatf("%s'%s': 0x%2x, ", extras_str, "lsu_rd", i_snitch.lsu_rd); + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "retire_load", i_snitch.retire_load); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "alu_result", i_snitch.alu_result); + // Atomics + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "ls_amo", i_snitch.ls_amo); + // Accumulator + extras_str = $sformatf("%s'%s': 0x%1x, ", extras_str, "retire_acc", i_snitch.retire_acc); + extras_str = $sformatf("%s'%s': 0x%2x, ", extras_str, "acc_pid", i_snitch.acc_pid_i); + extras_str = $sformatf("%s'%s': 0x%8x, ", extras_str, "acc_pdata_32",i_snitch.acc_pdata_i[31:0]); + extras_str = $sformatf("%s}", extras_str); + + $timeformat(-9, 0, "", 10); + $sformat(trace_entry, "%t %8d 0x%h DASM(%h) #; %s\n", + $time, cycle, i_snitch.pc_q, i_snitch.inst_data_i, extras_str); + $fwrite(f, trace_entry); + end + + // Reset all stalls when we execute an instruction + if (!i_snitch.stall) begin + stall <= 0; + stall_ins <= 0; + stall_raw <= 0; + stall_lsu <= 0; + stall_acc <= 0; + end else begin + // We are currently stalled, let's count the stall causes + if (i_snitch.stall) begin + stall <= stall + 1; + end + if ((!i_snitch.inst_ready_i) && (i_snitch.inst_valid_o)) begin + stall_ins <= stall_ins + 1; + end + if ((!i_snitch.operands_ready) || (!i_snitch.dst_ready)) begin + stall_raw <= stall_raw + 1; + end + if (i_snitch.lsu_stall) begin + stall_lsu <= stall_lsu + 1; + end + if (i_snitch.acc_stall) begin + stall_acc <= stall_acc + 1; + end + end + end else begin + cycle <= '0; + stall <= 0; + stall_ins <= 0; + stall_raw <= 0; + stall_lsu <= 0; + stall_acc <= 0; + end + end + + final begin + $fclose(f); + end + // pragma translate_on + +endmodule diff --git a/hw/system/spatz_cluster/cfg/carfield.hjson b/hw/system/spatz_cluster/cfg/carfield.hjson index 88ea7e13..a8f3abd1 100644 --- a/hw/system/spatz_cluster/cfg/carfield.hjson +++ b/hw/system/spatz_cluster/cfg/carfield.hjson @@ -5,6 +5,7 @@ // Cluster configuration for a simple system. { cluster: { + mempool: 0, boot_addr: 4096, // 0x1000 cluster_base_addr: 1358954496, // 0x51000000 cluster_base_offset: 0, // 0x0 diff --git a/hw/system/spatz_cluster/cfg/mempool.hjson b/hw/system/spatz_cluster/cfg/mempool.hjson new file mode 100644 index 00000000..0ec04648 --- /dev/null +++ b/hw/system/spatz_cluster/cfg/mempool.hjson @@ -0,0 +1,95 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Cluster configuration for a simple system. +{ + cluster: { + mempool: 1, + boot_addr: 4096, // 0x1000 + cluster_base_addr: 1048576, // 0x100000 + cluster_base_offset: 0, // 0x0 + cluster_base_hartid: 0, + addr_width: 32, + data_width: 64, + id_width_in: 2, + id_width_out: 4, + axi_cdc_enable: false, + tcdm: { + size: 128, + banks: 16, + }, + cluster_periph_size: 64, // kB + dma_data_width: 512, + dma_axi_req_fifo_depth: 3, + dma_req_fifo_depth: 3, + // Spatz parameters + vlen: 512, + n_fpu: 4, + n_ipu: 1, + spatz_fpu: true, + // Timing parameters + timing: { + lat_comp_fp32: 1, + lat_comp_fp64: 2, + lat_comp_fp16: 0, + lat_comp_fp16_alt: 0, + lat_comp_fp8: 0, + lat_comp_fp8_alt: 0, + lat_noncomp: 1, + lat_conv: 2, + lat_sdotp: 2, + fpu_pipe_config: "BEFORE" + xbar_latency: "CUT_ALL_PORTS", + + register_core_req: true, + register_core_rsp: true, + register_offload_rsp: true + }, + cores: [ + { $ref: "#/dma_core_template" }, + { $ref: "#/compute_core_template" }, + ], + icache: { + size: 4, // total instruction cache size in kByte + sets: 2, // number of ways + cacheline: 256 // word size in bits + } + } + + dram: { + // 0x8000_0000 + address: 2147483648, + // 0x8000_0000 + length: 2147483648 + }, + peripherals: { + }, + + // Templates. + + compute_core_template: { + isa: "rv32imafd", + xf16: true, + xf8: true, + xfdotp: true, + xdma: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_spatz_outstanding_loads: 4, + num_dtlb_entries: 1, + num_itlb_entries: 1 + }, + dma_core_template: { + isa: "rv32imafd", + xdma: true + xf16: true, + xf8: true, + xfdotp: true, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_spatz_outstanding_loads: 4, + num_dtlb_entries: 1, + num_itlb_entries: 1 + } +} diff --git a/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson b/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson index 3ac7f3a6..3177cdbb 100644 --- a/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson +++ b/hw/system/spatz_cluster/cfg/spatz_cluster.default.hjson @@ -5,6 +5,7 @@ // Cluster configuration for a simple system. { cluster: { + mempool: 0, boot_addr: 4096, // 0x1000 cluster_base_addr: 1048576, // 0x100000 cluster_base_offset: 0, // 0x0