Skip to content

Commit

Permalink
[TeraPool] Rebase on the main branch; Update the variable control sty…
Browse files Browse the repository at this point in the history
…le in Makefile and config files
  • Loading branch information
yichao-zh committed Dec 8, 2023
1 parent 1f4a893 commit 7fcf586
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 27 deletions.
7 changes: 7 additions & 0 deletions config/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,10 @@ xqueue_size ?= 0

# Enable the XpulpIMG extension
xpulpimg ?= 1

# This parameter is only used for TeraPool configurations
num_sub_groups_per_group ?= 1
remote_group_latency_cycles ?= 7

# Makefile RTL Filtering Control
subgroup_rtl ?= 0
7 changes: 5 additions & 2 deletions config/terapool.mk
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@ banking_factor ?= 4
remote_group_latency_cycles ?= 7

# Radix for hierarchical AXI interconnect
axi_hier_radix ?= 10
axi_hier_radix ?= 9

# Number of AXI masters per group
axi_masters_per_group ?= 4

# Number of DMA backends in each group
dmas_per_group ?= 8
dmas_per_group ?= 4

# L2 Banks/Channels
l2_banks = 16

# Makefile RTL Filtering Control
subgroup_rtl = 1
16 changes: 7 additions & 9 deletions hardware/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ endif
vlog_args += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233
vlog_args += -work $(library)
# Defines
vlog_defs += -D$(config)
vlog_defs += -DNUM_CORES=$(num_cores) -DNUM_CORES_PER_TILE=$(num_cores_per_tile) -DNUM_GROUPS=$(num_groups) -DBANKING_FACTOR=$(banking_factor)
vlog_defs += -DL2_BASE=32\'d$(l2_base) -DL2_SIZE=32\'d$(l2_size) -DL2_BANKS=$(l2_banks)
vlog_defs += -DL1_BANK_SIZE=$(l1_bank_size)
Expand All @@ -97,14 +98,8 @@ vlog_defs += -DRO_LINE_WIDTH=$(ro_line_width)
vlog_defs += -DDMAS_PER_GROUP=$(dmas_per_group)
vlog_defs += -DAXI_HIER_RADIX=$(axi_hier_radix) -DAXI_MASTERS_PER_GROUP=$(axi_masters_per_group)
vlog_defs += -DSEQ_MEM_SIZE=$(seq_mem_size) -DXQUEUE_SIZE=$(xqueue_size)
ifdef terapool
subgroup_rtl = 1
vlog_defs += -DTERAPOOL=$(terapool)
vlog_defs += -DNUM_SUB_GROUPS_PER_GROUP=$(num_sub_groups_per_group)
vlog_defs += -DREMOTE_GROUP_LATENCY_CYCLES=$(remote_group_latency_cycles)
else
subgroup_rtl = 0
endif
# This parameter is only used for TeraPool configurations
vlog_defs += -DNUM_SUB_GROUPS_PER_GROUP=$(num_sub_groups_per_group) -DREMOTE_GROUP_LATENCY_CYCLES=$(remote_group_latency_cycles)

# Traffic generation enabled
ifdef tg
Expand Down Expand Up @@ -150,13 +145,14 @@ compile: dpi lib $(buildpath) $(buildpath)/compile.tcl update_opcodes
$(buildpath)/compile.tcl: $(bender) $(config_mk) Makefile $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f)
$(bender) script vsim --vlog-arg="$(vlog_args)" $(vlog_defs) -t rtl -t mempool_vsim > $(buildpath)/compile.tcl
echo "exit" >> $(buildpath)/compile.tcl
if [ "${subgroup_rtl}" -eq "0" ]; then awk '!/mempool_sub_group/' $(buildpath)/compile.tcl > tmpfile && mv tmpfile $(buildpath)/compile.tcl; fi
cd $(buildpath) && $(questa_cmd) vsim -work $(library) -c -do compile.tcl

# Simulation
.PHONY: sim
sim: clean-dasm compile
cd $(buildpath) && \
$(questa_cmd) vsim -voptargs=+acc $(questa_args) $(library).$(top_level) -do ../scripts/questa/run.tcl
$(questa_cmd) vsim -voptargs=+acc $(questa_args) $(library).$(top_level) -do "set config ${config}" -do ../scripts/questa/run.tcl
./scripts/return_status.sh $(buildpath)/transcript

.PHONY: simc
Expand Down Expand Up @@ -187,6 +183,7 @@ elabvcs: dpivcs $(buildpath) $(buildpath)/compilevcs.sh update_opcodes
$(buildpath)/compilevcs.sh: $(bender) $(config_mk) Makefile $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f)
$(bender) script vcs --vlogan-bin="$(vcs_cmd) vlogan" --vlog-arg="$(vlogan_args)" $(vlog_defs) -t rtl -t mempool_vsim > $(buildpath)/compilevcs.sh
echo "exit" >> $(buildpath)/compilevcs.sh
if [ "${subgroup_rtl}" -eq "0" ]; then awk '!/mempool_sub_group/' $(buildpath)/compilevcs.sh > tmpfile && mv tmpfile $(buildpath)/compilevcs.sh; fi
# Call VCS
cd $(buildpath) && \
chmod +x compilevcs.sh && \
Expand Down Expand Up @@ -260,6 +257,7 @@ $(VERILATOR_MK): $(VERILATOR_CONF) $(VERILATOR_WAIVE) $(MEMPOOL_DIR)/Bender.yml
$(eval boot_addr=$(l2_base))
# Create Bender script of all RTL files
$(bender) script verilator $(vlog_defs) -t rtl -t mempool_verilator > $(verilator_files)
if [ "${subgroup_rtl}" -eq "0" ]; then awk '!/mempool_sub_group/' $(verilator_files) > tmpfile && mv tmpfile $(verilator_files); fi
# Append the verilator library files
@echo '' >> $(verilator_files)
# Append the verilator library files: Includes
Expand Down
2 changes: 1 addition & 1 deletion hardware/src/mempool_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ package mempool_pkg;
localparam integer unsigned NumBanksPerSubGroup = NumBanksPerGroup / NumSubGroupsPerGroup;

// TeraPool Remote Groups Latency Control (in Cycles)
localparam integer unsigned RemoteGroupLatencyCycle = `ifdef REMOTE_GROUP_LATENCY_CYCLES `REMOTE_GROUP_LATENCY_CYCLES `else 9 `endif;
localparam integer unsigned RemoteGroupLatencyCycle = `ifdef REMOTE_GROUP_LATENCY_CYCLES `REMOTE_GROUP_LATENCY_CYCLES `else 7 `endif;

//TeraPool AXI/DMA Config
localparam integer unsigned NumAXIMastersPerSubGroup = NumAXIMastersPerGroup/NumSubGroupsPerGroup;
Expand Down
73 changes: 58 additions & 15 deletions hardware/tb/mempool_tb.sv
Original file line number Diff line number Diff line change
Expand Up @@ -350,36 +350,76 @@ module mempool_tb;
**************************************/
`ifndef TARGET_SYNTHESIS
`ifndef TARGET_VERILATOR
`ifndef POSTLAYOUT

// Cores
logic [NumCores-1:0] instruction_handshake, lsu_request, lsu_handshake;
int unsigned snitch_utilization, lsu_pressure, lsu_utilization;
assign snitch_utilization = $countones(instruction_handshake);
assign lsu_utilization = $countones(lsu_handshake);
assign lsu_pressure = $countones(lsu_request);
for (genvar g = 0; g < NumGroups; g++) begin
for (genvar t = 0; t < NumTilesPerGroup; t++) begin
for (genvar c = 0; c < NumCoresPerTile; c++) begin
logic valid_instr, stall;
logic lsu_valid, lsu_ready;
// Snitch
assign valid_instr = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.valid_instr;
assign stall = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.stall;
assign instruction_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = valid_instr & !stall;
// Interconnect
assign lsu_valid = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qvalid_o;
assign lsu_ready = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qready_i;
assign lsu_request[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & !lsu_ready;
assign lsu_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & lsu_ready;
`ifdef TERAPOOL
for (genvar g = 0; g < NumGroups; g++) begin
for (genvar sg = 0; sg < NumSubGroupsPerGroup; sg++) begin
for (genvar t = 0; t < NumTilesPerSubGroup; t++) begin
for (genvar c = 0; c < NumCoresPerTile; c++) begin
logic valid_instr, stall;
logic lsu_valid, lsu_ready;
// Snitch
assign valid_instr = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.valid_instr;
assign stall = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.stall;
assign instruction_handshake[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = valid_instr & !stall;
// Interconnect
assign lsu_valid = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qvalid_o;
assign lsu_ready = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qready_i;
assign lsu_request[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = lsu_valid & !lsu_ready;
assign lsu_handshake[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = lsu_valid & lsu_ready;
end
end
end
end
end
`else
for (genvar g = 0; g < NumGroups; g++) begin
for (genvar t = 0; t < NumTilesPerGroup; t++) begin
for (genvar c = 0; c < NumCoresPerTile; c++) begin
logic valid_instr, stall;
logic lsu_valid, lsu_ready;
// Snitch
assign valid_instr = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.valid_instr;
assign stall = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.stall;
assign instruction_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = valid_instr & !stall;
// Interconnect
assign lsu_valid = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qvalid_o;
assign lsu_ready = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qready_i;
assign lsu_request[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & !lsu_ready;
assign lsu_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & lsu_ready;
end
end
end
`endif

// DSPU
if (snitch_pkg::XPULPIMG) begin: gen_utilization
logic [NumCores-1:0] dspu_handshake, dspu_mac;
int unsigned dspu_utilization, mac_utilization;
assign dspu_utilization = $countones(dspu_handshake);
assign mac_utilization = $countones(dspu_mac);
`ifdef TERAPOOL
for (genvar g = 0; g < NumGroups; g++) begin
for (genvar sg = 0; sg < NumSubGroupsPerGroup; sg++) begin
for (genvar t = 0; t < NumTilesPerSubGroup; t++) begin
for (genvar c = 0; c < NumCoresPerTile; c++) begin
logic dsp_valid, dsp_ready, mac;
assign dsp_valid = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch_ipu.gen_xpulpimg.i_dspu.in_valid_i;
assign dsp_ready = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch_ipu.gen_xpulpimg.i_dspu.in_ready_o;
assign mac = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch_ipu.gen_xpulpimg.i_dspu.operator_i ==? riscv_instr::P_MAC;
assign dspu_handshake[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = dsp_valid & dsp_ready;
assign dspu_mac[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = dsp_valid & dsp_ready & mac;
end
end
end
end
`else
for (genvar g = 0; g < NumGroups; g++) begin
for (genvar t = 0; t < NumTilesPerGroup; t++) begin
for (genvar c = 0; c < NumCoresPerTile; c++) begin
Expand All @@ -392,7 +432,9 @@ module mempool_tb;
end
end
end
`endif
end

// AXI
logic [NumGroups*NumAXIMastersPerGroup-1:0] w_valid, w_ready, r_ready, r_valid;
int unsigned axi_w_utilization, axi_r_utilization;
Expand All @@ -407,5 +449,6 @@ module mempool_tb;

`endif
`endif
`endif

endmodule : mempool_tb

0 comments on commit 7fcf586

Please sign in to comment.