From a45591bc981a6855472ddeab55375c3ad54bcaf2 Mon Sep 17 00:00:00 2001 From: "msc23h24 Diyou Shen (dishen)" Date: Fri, 16 Aug 2024 11:30:00 +0200 Subject: [PATCH] Update Spatz to support latest MemPool. --- hw/ip/spatz/src/spatz.sv | 6 +- hw/ip/spatz/src/spatz_controller.sv | 4 +- hw/ip/spatz/src/spatz_fpu_sequencer.sv | 9 +- hw/ip/spatz/src/spatz_pkg.sv.tpl | 21 ++- hw/ip/spatz/src/spatz_vlsu.sv | 8 +- hw/ip/spatz_cc/src/spatz_mempool_cc.sv | 3 + hw/system/spatz_cluster/cfg/mempool.hjson | 153 +++++++++++----------- 7 files changed, 113 insertions(+), 91 deletions(-) diff --git a/hw/ip/spatz/src/spatz.sv b/hw/ip/spatz/src/spatz.sv index cfe81ea2..8ab00e2d 100644 --- a/hw/ip/spatz/src/spatz.sv +++ b/hw/ip/spatz/src/spatz.sv @@ -53,7 +53,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( output logic [1:0] spatz_mem_finished_o, output logic [1:0] spatz_mem_str_finished_o, // FPU memory interface interface -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL output logic fp_lsu_mem_req_valid_o, input logic fp_lsu_mem_req_ready_i, input logic fp_lsu_mem_rsp_valid_i, @@ -130,7 +130,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( // Tie the memory interface to zero assign fp_lsu_mem_req_o = '0; -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL assign fp_lsu_mem_req_valid_o = 1'b0; assign fp_lsu_mem_rsp_ready_o = 1'b0; `endif @@ -164,7 +164,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( .resp_valid_i ( resp_valid ), .resp_ready_o ( resp_ready ), // Memory interface -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL .fp_lsu_mem_req_valid_o ( fp_lsu_mem_req_valid_o ), .fp_lsu_mem_req_ready_i ( fp_lsu_mem_req_ready_i ), .fp_lsu_mem_rsp_valid_i ( fp_lsu_mem_rsp_valid_i ), diff --git a/hw/ip/spatz/src/spatz_controller.sv b/hw/ip/spatz/src/spatz_controller.sv index 237c09b8..980077d7 100644 --- a/hw/ip/spatz/src/spatz_controller.sv +++ b/hw/ip/spatz/src/spatz_controller.sv @@ -556,7 +556,7 @@ module spatz_controller vfu_rsp_ready = 1'b0; if (retire_csr) begin -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL rsp_d.write = 1'b1; `endif // Read CSR and write back to cpu @@ -584,7 +584,7 @@ module spatz_controller end else if (vfu_rsp_valid) begin rsp_d.id = vfu_rsp.rd; rsp_d.data = vfu_rsp.result; -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL rsp_d.write = 1'b1; `endif rsp_valid_d = 1'b1; diff --git a/hw/ip/spatz/src/spatz_fpu_sequencer.sv b/hw/ip/spatz/src/spatz_fpu_sequencer.sv index 322a8754..5bdada90 100644 --- a/hw/ip/spatz/src/spatz_fpu_sequencer.sv +++ b/hw/ip/spatz/src/spatz_fpu_sequencer.sv @@ -45,7 +45,7 @@ module spatz_fpu_sequencer input logic resp_valid_i, output logic resp_ready_o, // Memory interface -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL output logic fp_lsu_mem_req_valid_o, input logic fp_lsu_mem_req_ready_i, input logic fp_lsu_mem_rsp_valid_i, @@ -550,8 +550,9 @@ module spatz_fpu_sequencer .lsu_perror_o (/* Unused */ ), .lsu_pvalid_o (fp_lsu_pvalid ), .lsu_pready_i (fp_lsu_pready ), + .lsu_empty_o (/* unused */ ), // Memory interface -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL .data_qaddr_o (mem_qaddr ), .data_qwrite_o(mem_qwrite ), .data_qamo_o (/* Unused */ ), @@ -604,7 +605,7 @@ module spatz_fpu_sequencer fp_lsu_qwrite = is_store; fp_lsu_qsigned = 1'b0; // lsu in mempool-snitch will write to argb -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL fp_lsu_qaddr = issue_req_i.data_argb; `else fp_lsu_qaddr = issue_req_i.data_argc; @@ -620,7 +621,7 @@ module spatz_fpu_sequencer // Is the LSU stalling? lsu_stall = fp_lsu_qvalid && !fp_lsu_qready; -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL // Assign TCDM data interface fp_lsu_mem_req_o = '{ id : mem_qid, diff --git a/hw/ip/spatz/src/spatz_pkg.sv.tpl b/hw/ip/spatz/src/spatz_pkg.sv.tpl index 7d565323..5cd67e48 100644 --- a/hw/ip/spatz/src/spatz_pkg.sv.tpl +++ b/hw/ip/spatz/src/spatz_pkg.sv.tpl @@ -388,18 +388,20 @@ package spatz_pkg; // INT8 INT16 INT32 INT64 IntFmtMask : {1'b1, 1'b1, 1'b1, 1'b1} } : + +% if cfg['mempool']: + // Turn off several units to save area in MemPool // Single Precision FPU '{ Width : ELEN, EnableVectors: 1'b1, EnableNanBox : 1'b1, // FP32 FP64 FP16 FP8 FP16a FP8a - FpFmtMask : {RVF, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0}, + FpFmtMask : {RVF, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}, // INT8 INT16 INT32 INT64 - IntFmtMask : {1'b1, 1'b1, 1'b1, 1'b0} + IntFmtMask : {1'b0, 1'b1, 1'b1, 1'b0} }; -% if cfg['mempool']: localparam fpnew_pkg::fpu_implementation_t MemPoolFPUImpl = '{ // Pipeline stages @@ -416,9 +418,20 @@ package spatz_pkg; '{ default: fpnew_pkg::DISABLED}, // DIVSQRT '{ default: fpnew_pkg::PARALLEL}, // NONCOMP '{ default: fpnew_pkg::MERGED}, // CONV - '{ default: fpnew_pkg::MERGED}}, // DOTP + '{ default: fpnew_pkg::DISABLED}}, // SDOTP PipeConfig: fpnew_pkg::BEFORE }; +% else : + // Single Precision FPU + '{ + Width : ELEN, + EnableVectors: 1'b1, + EnableNanBox : 1'b1, + // FP32 FP64 FP16 FP8 FP16a FP8a + FpFmtMask : {RVF, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0}, + // INT8 INT16 INT32 INT64 + IntFmtMask : {1'b1, 1'b1, 1'b1, 1'b0} + }; % endif // FP format conversion diff --git a/hw/ip/spatz/src/spatz_vlsu.sv b/hw/ip/spatz/src/spatz_vlsu.sv index 3326d00c..398a090d 100644 --- a/hw/ip/spatz/src/spatz_vlsu.sv +++ b/hw/ip/spatz/src/spatz_vlsu.sv @@ -152,7 +152,7 @@ module spatz_vlsu store_count_d[port]++; // Did we get the ack of a store? - `ifdef MEMPOOL_SPATZ + `ifdef TARGET_MEMPOOL if (store_count_q[port] != '0 && spatz_mem_rsp_valid_i[port] && spatz_mem_rsp_i[port].write) store_count_d[port]--; `else @@ -183,7 +183,7 @@ module spatz_vlsu // The reorder buffer decouples the memory side from the register file side. // All elements from one side to the other go through it. for (genvar port = 0; port < NrMemPorts; port++) begin : gen_rob -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL reorder_buffer #( .DataWidth(ELEN ), .NumWords (NrOutstandingLoads) @@ -866,7 +866,7 @@ module spatz_vlsu for (int unsigned port = 0; port < NrMemPorts; port++) begin // Write the load result to the buffer rob_wdata[port] = spatz_mem_rsp_i[port].data; -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL rob_wid[port] = spatz_mem_rsp_i[port].id; // Need to consider out-of-order memory response rob_push[port] = spatz_mem_rsp_valid_i[port] && (state_q == VLSU_RunningLoad) && spatz_mem_rsp_i[port].write == '0; @@ -983,7 +983,7 @@ module spatz_vlsu .valid_o (spatz_mem_req_valid_o[port]), .ready_i (spatz_mem_req_ready_i[port]) ); -`ifdef MEMPOOL_SPATZ +`ifdef TARGET_MEMPOOL // ID is required in Mempool-Spatz assign spatz_mem_req[port].id = mem_req_id[port]; assign spatz_mem_req[port].addr = mem_req_addr[port]; diff --git a/hw/ip/spatz_cc/src/spatz_mempool_cc.sv b/hw/ip/spatz_cc/src/spatz_mempool_cc.sv index eb9071cb..8bd8816b 100644 --- a/hw/ip/spatz_cc/src/spatz_mempool_cc.sv +++ b/hw/ip/spatz_cc/src/spatz_mempool_cc.sv @@ -153,10 +153,12 @@ module spatz_mempool_cc .clk_i ( clk_i ), .rst_i ( rst_i ), .hart_id_i ( hart_id_i ), + /// Instruction .inst_addr_o ( inst_addr_o ), .inst_data_i ( inst_data_i ), .inst_valid_o ( inst_valid_o ), .inst_ready_i ( inst_ready_i ), + /// Spatz .acc_qaddr_o ( acc_req_d.addr ), .acc_qid_o ( acc_req_d.id ), .acc_qdata_op_o ( acc_req_d.data_op ), @@ -174,6 +176,7 @@ module spatz_mempool_cc .acc_qdata_rsp_i ( acc_req_rsp ), .acc_mem_finished_i ( spatz_mem_finished ), .acc_mem_str_finished_i ( spatz_mem_str_finished ), + /// TCDM .data_qaddr_o ( snitch_req.addr ), .data_qwrite_o ( snitch_req.write ), .data_qamo_o ( snitch_req.amo ), diff --git a/hw/system/spatz_cluster/cfg/mempool.hjson b/hw/system/spatz_cluster/cfg/mempool.hjson index 0ec04648..a5898711 100644 --- a/hw/system/spatz_cluster/cfg/mempool.hjson +++ b/hw/system/spatz_cluster/cfg/mempool.hjson @@ -4,92 +4,97 @@ // Cluster configuration for a simple system. { - cluster: { - mempool: 1, - boot_addr: 4096, // 0x1000 - cluster_base_addr: 1048576, // 0x100000 - cluster_base_offset: 0, // 0x0 - cluster_base_hartid: 0, - addr_width: 32, - data_width: 64, - id_width_in: 2, - id_width_out: 4, - axi_cdc_enable: false, - tcdm: { - size: 128, - banks: 16, + "cluster": { + "mempool": 1, + "boot_addr": 4096, // 0x1000 + "cluster_base_addr": 1048576, // 0x100000 + "cluster_base_offset": 0, // 0x0 + "cluster_base_hartid": 0, + "addr_width": 32, + "data_width": 64, + "id_width_in": 2, + "id_width_out": 4, + "user_width": 10, + "cluster_default_axi_user": 7, + "axi_cdc_enable": false, + "sw_rst_enable": true, + "axi_isolate_enable": true, + "tcdm": { + "size": 128, + "banks": 16 }, - cluster_periph_size: 64, // kB - dma_data_width: 512, - dma_axi_req_fifo_depth: 3, - dma_req_fifo_depth: 3, + "cluster_periph_size": 64, // kB + "dma_data_width": 512, + "dma_axi_req_fifo_depth": 3, + "dma_req_fifo_depth": 3, // Spatz parameters - vlen: 512, - n_fpu: 4, - n_ipu: 1, - spatz_fpu: true, + "vlen": 512, + "n_fpu": 4, + "n_ipu": 1, + "spatz_fpu": true, // Timing parameters - timing: { - lat_comp_fp32: 1, - lat_comp_fp64: 2, - lat_comp_fp16: 0, - lat_comp_fp16_alt: 0, - lat_comp_fp8: 0, - lat_comp_fp8_alt: 0, - lat_noncomp: 1, - lat_conv: 2, - lat_sdotp: 2, - fpu_pipe_config: "BEFORE" - xbar_latency: "CUT_ALL_PORTS", + "timing": { + "lat_comp_fp32": 1, + "lat_comp_fp64": 2, + "lat_comp_fp16": 0, + "lat_comp_fp16_alt": 0, + "lat_comp_fp8": 0, + "lat_comp_fp8_alt": 0, + "lat_noncomp": 1, + "lat_conv": 2, + "lat_sdotp": 2, + "fpu_pipe_config": "BEFORE", + "xbar_latency": "CUT_ALL_PORTS", - register_core_req: true, - register_core_rsp: true, - register_offload_rsp: true + "register_tcdm_cuts": true, + "register_core_req": true, + "register_core_rsp": true, + "register_offload_rsp": true }, - cores: [ - { $ref: "#/dma_core_template" }, - { $ref: "#/compute_core_template" }, + "cores": [ + // DMA core + { + "isa": "rv32imafd", + "xdma": true, + "xf16": true, + "xf8": true, + "xfdotp": true, + "num_int_outstanding_loads": 1, + "num_int_outstanding_mem": 4, + "num_spatz_outstanding_loads": 4, + "num_dtlb_entries": 1, + "num_itlb_entries": 1 + }, + + // Compute core + { + "isa": "rv32imafd", + "xf16": true, + "xf8": true, + "xfdotp": true, + "xdma": false, + "num_int_outstanding_loads": 1, + "num_int_outstanding_mem": 4, + "num_spatz_outstanding_loads": 4, + "num_dtlb_entries": 1, + "num_itlb_entries": 1 + } ], - icache: { - size: 4, // total instruction cache size in kByte - sets: 2, // number of ways - cacheline: 256 // word size in bits + "icache": { + "size": 4, // total instruction cache size in kByte + "sets": 2, // number of ways + "cacheline": 128 // word size in bits } - } + }, - dram: { + "dram": { // 0x8000_0000 - address: 2147483648, + "address": 2147483648, // 0x8000_0000 - length: 2147483648 - }, - peripherals: { + "length": 2147483648 }, - // Templates. + "peripherals": { - compute_core_template: { - isa: "rv32imafd", - xf16: true, - xf8: true, - xfdotp: true, - xdma: false, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_spatz_outstanding_loads: 4, - num_dtlb_entries: 1, - num_itlb_entries: 1 - }, - dma_core_template: { - isa: "rv32imafd", - xdma: true - xf16: true, - xf8: true, - xfdotp: true, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_spatz_outstanding_loads: 4, - num_dtlb_entries: 1, - num_itlb_entries: 1 } }