From f24ba0e222009c6744da3f5013e312326b35f9d1 Mon Sep 17 00:00:00 2001 From: Yunhao Deng Date: Sun, 25 Aug 2024 16:14:33 +0200 Subject: [PATCH] Merge VPK180 and VCU128 FPGA deployment flow --- target/fpga/.gitignore | 1 + target/fpga_chip/hemaia_system/.gitignore | 2 +- target/fpga_chip/hemaia_system/Makefile | 2 +- .../hemaia_system/hemaia_system_vcu128.tcl | 26 +- .../hemaia_system/hemaia_system_vcu128_bd.tcl | 50 +- .../hemaia_system_vcu128_impl.xdc | 2 +- .../hemaia_system_vcu128_impl_ext_jtag.xdc | 2 +- target/rtl/Makefile | 16 +- .../cfg/cluster_cfg/snax_KUL_cluster.hjson | 30 +- ...ster.hjson => snax_KUL_xdma_cluster.hjson} | 439 ++++++++++-------- .../snax_streamer_gemm_add_c_cluster.hjson | 175 ------- .../cfg/occamy_cfg/snax_two_clusters.hjson | 2 +- target/rtl/snax_minimal.hjson | 198 -------- target/rtl/snax_two_clusters.hjson | 200 -------- 14 files changed, 309 insertions(+), 836 deletions(-) rename target/rtl/cfg/cluster_cfg/{snax_streamer_gemmX_xdma_cluster.hjson => snax_KUL_xdma_cluster.hjson} (72%) mode change 100755 => 100644 delete mode 100644 target/rtl/cfg/cluster_cfg/snax_streamer_gemm_add_c_cluster.hjson delete mode 100755 target/rtl/snax_minimal.hjson delete mode 100755 target/rtl/snax_two_clusters.hjson diff --git a/target/fpga/.gitignore b/target/fpga/.gitignore index 71733a18a..fb2bcdec7 100644 --- a/target/fpga/.gitignore +++ b/target/fpga/.gitignore @@ -4,6 +4,7 @@ vivado_ips/xgui .Xil/ xgui/ define_defines_includes_no_simset.tcl +probes.ltx *.jou *.log *.zip diff --git a/target/fpga_chip/hemaia_system/.gitignore b/target/fpga_chip/hemaia_system/.gitignore index 8328c7b50..3ee977d59 100644 --- a/target/fpga_chip/hemaia_system/.gitignore +++ b/target/fpga_chip/hemaia_system/.gitignore @@ -1,3 +1,3 @@ /hemaia_system -/hemaia_system_vcu128 /define_defines_includes_no_simset.tcl +/probes.ltx diff --git a/target/fpga_chip/hemaia_system/Makefile b/target/fpga_chip/hemaia_system/Makefile index 171945349..abef0c4cb 100644 --- a/target/fpga_chip/hemaia_system/Makefile +++ b/target/fpga_chip/hemaia_system/Makefile @@ -34,6 +34,6 @@ define_defines_includes_no_simset.tcl: ${BENDER} script vivado $(BENDER_TARGETS) --only-defines --only-includes --no-simset > $@ clean: - rm -rf .Xil hemaia_system_vcu128 *.jou *.log *.str define_defines_includes_no_simset.tcl + rm -rf .Xil hemaia_system *.jou *.log *.str define_defines_includes_no_simset.tcl .PHONY: program flash clean diff --git a/target/fpga_chip/hemaia_system/hemaia_system_vcu128.tcl b/target/fpga_chip/hemaia_system/hemaia_system_vcu128.tcl index 741f61263..fec31b502 100644 --- a/target/fpga_chip/hemaia_system/hemaia_system_vcu128.tcl +++ b/target/fpga_chip/hemaia_system/hemaia_system_vcu128.tcl @@ -15,7 +15,7 @@ if {$argc > 1 && [lindex $argv 1]} { set EXT_JTAG true } set nproc [exec nproc] # Create project -set project hemaia_system_vcu128 +set project hemaia_system create_project $project ./$project -force -part xcvu37p-fsvh2892-2L-e set_property board_part xilinx.com:vcu128:part0:1.0 [current_project] @@ -30,31 +30,31 @@ source hemaia_system_vcu128_bd.tcl # Add constraint files add_files -fileset constrs_1 -norecurse hemaia_system_vcu128_impl.xdc import_files -fileset constrs_1 hemaia_system_vcu128_impl.xdc -set_property used_in_synthesis false [get_files hemaia_system_vcu128/hemaia_system_vcu128.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl.xdc] +set_property used_in_synthesis false [get_files hemaia_system/hemaia_system.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl.xdc] if { $EXT_JTAG } { add_files -fileset constrs_1 -norecurse hemaia_system_vcu128_impl_ext_jtag.xdc import_files -fileset constrs_1 hemaia_system_vcu128_impl_ext_jtag.xdc - set_property used_in_synthesis false [get_files hemaia_system_vcu128/hemaia_system_vcu128.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl.xdc] + set_property used_in_synthesis false [get_files hemaia_system/hemaia_system.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl.xdc] } else { delete_bd_objs [get_bd_nets -of_objects [get_bd_ports "jtag_tck_i jtag_tdi_i jtag_tdo_o jtag_tms_i" ]] delete_bd_objs [get_bd_ports jtag_*] } # Generate wrapper -make_wrapper -files [get_files ./hemaia_system_vcu128/hemaia_system_vcu128.srcs/sources_1/bd/hemaia_system_vcu128/hemaia_system_vcu128.bd] -top -add_files -norecurse ./hemaia_system_vcu128/hemaia_system_vcu128.gen/sources_1/bd/hemaia_system_vcu128/hdl/hemaia_system_vcu128_wrapper.v +make_wrapper -files [get_files ./hemaia_system/hemaia_system.srcs/sources_1/bd/hemaia_system/hemaia_system.bd] -top +add_files -norecurse ./hemaia_system/hemaia_system.gen/sources_1/bd/hemaia_system/hdl/hemaia_system_wrapper.v update_compile_order -fileset sources_1 # Create runs -generate_target all [get_files ./hemaia_system_vcu128/hemaia_system_vcu128.srcs/sources_1/bd/hemaia_system_vcu128/hemaia_system_vcu128.bd] -export_ip_user_files -of_objects [get_files ./hemaia_system_vcu128/hemaia_system_vcu128.srcs/sources_1/bd/hemaia_system_vcu128/hemaia_system_vcu128.bd] -no_script -sync -force -quiet -create_ip_run [get_files -of_objects [get_fileset sources_1] ./hemaia_system_vcu128/hemaia_system_vcu128.srcs/sources_1/bd/hemaia_system_vcu128/hemaia_system_vcu128.bd] +generate_target all [get_files ./hemaia_system/hemaia_system.srcs/sources_1/bd/hemaia_system/hemaia_system.bd] +export_ip_user_files -of_objects [get_files ./hemaia_system/hemaia_system.srcs/sources_1/bd/hemaia_system/hemaia_system.bd] -no_script -sync -force -quiet +create_ip_run [get_files -of_objects [get_fileset sources_1] ./hemaia_system/hemaia_system.srcs/sources_1/bd/hemaia_system/hemaia_system.bd] # Re-add hemaia chip includes -set build hemaia_system_vcu128 +set build hemaia_system export_ip_user_files -of_objects [get_ips occamy_chip_0] -no_script -sync -force -quiet -eval [exec sed {s/current_fileset/get_filesets hemaia_system_vcu128_occamy_chip_0_0/} define_defines_includes_no_simset.tcl] +eval [exec sed {s/current_fileset/get_filesets hemaia_system_occamy_chip_0/} define_defines_includes_no_simset.tcl] # Do NOT insert BUFGs on high-fanout nets (e.g. reset). This will backfire during placement. set_param logicopt.enableBUFGinsertHFN no @@ -117,7 +117,7 @@ if ($DEBUG) { ## Clock set_property port_width 1 [get_debug_ports u_ila_0/clk] - connect_debug_port u_ila_0/clk [get_nets [list hemaia_system_vcu128_i/clk_wiz/inst/clk_core]] + connect_debug_port u_ila_0/clk [get_nets [list hemaia_system_i/clk_wiz/inst/clk_core]] set debugNets [lsort -dictionary [get_nets -hier -filter {MARK_DEBUG == 1}]] set netNameLast "" @@ -145,9 +145,9 @@ if ($DEBUG) { set netNameLast $netName } - set_property target_constrs_file hemaia_system_vcu128/hemaia_system_vcu128.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl.xdc [current_fileset -constrset] + set_property target_constrs_file hemaia_system/hemaia_system.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl.xdc [current_fileset -constrset] if { $EXT_JTAG } { - set_property target_constrs_file hemaia_system_vcu128/hemaia_system_vcu128.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl_ext_jtag.xdc [current_fileset -constrset] + set_property target_constrs_file hemaia_system/hemaia_system.srcs/constrs_1/imports/hemaia_system/hemaia_system_vcu128_impl_ext_jtag.xdc [current_fileset -constrset] } save_constraints -force diff --git a/target/fpga_chip/hemaia_system/hemaia_system_vcu128_bd.tcl b/target/fpga_chip/hemaia_system/hemaia_system_vcu128_bd.tcl index dc28cbc3c..03443dd74 100644 --- a/target/fpga_chip/hemaia_system/hemaia_system_vcu128_bd.tcl +++ b/target/fpga_chip/hemaia_system/hemaia_system_vcu128_bd.tcl @@ -1,6 +1,6 @@ ################################################################ -# This is a generated script based on design: hemaia_system_vcu128 +# This is a generated script based on design: hemaia_system # # Though there are limitations about the generated script, # the main purpose of this utility is to make learning @@ -41,7 +41,7 @@ if { [string first $scripts_vivado_version $current_vivado_version] == -1 } { ################################################################ # To test this script, run the following commands from Vivado Tcl console: -# source hemaia_system_vcu128_script.tcl +# source hemaia_system_script.tcl # If there is no project opened, this script will create a # project, but make sure you do not have an existing project @@ -56,7 +56,7 @@ if { $list_projs eq "" } { # CHANGE DESIGN NAME HERE variable design_name -set design_name hemaia_system_vcu128 +set design_name hemaia_system # If you do not already have an existing IP Integrator design open, # you can create a design using the following command: @@ -319,46 +319,46 @@ proc create_root_design { parentCell } { set_property CONFIG.DIN_FROM {7} $xlslice_1 - # Create instance: occamy_chip_0, and set properties - set occamy_chip_0 [ create_bd_cell -type ip -vlnv MICAS_KUL:user:occamy_chip:1.0 occamy_chip_0 ] + # Create instance: occamy_chip, and set properties + set occamy_chip [ create_bd_cell -type ip -vlnv MICAS_KUL:user:occamy_chip:1.0 occamy_chip ] # Create interface connections connect_bd_intf_net -intf_net default_100mhz_clk_1 [get_bd_intf_ports default_100mhz_clk] [get_bd_intf_pins clk_wiz/CLK_IN1_D] # Create port connections - connect_bd_net -net Net [get_bd_ports spim_sd_io] [get_bd_pins occamy_chip_0/spim_sd_io] - connect_bd_net -net Net1 [get_bd_ports i2c_sda_io] [get_bd_pins occamy_chip_0/i2c_sda_io] + connect_bd_net -net Net [get_bd_ports spim_sd_io] [get_bd_pins occamy_chip/spim_sd_io] + connect_bd_net -net Net1 [get_bd_ports i2c_sda_io] [get_bd_pins occamy_chip/i2c_sda_io] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets Net1] - connect_bd_net -net Net2 [get_bd_ports i2c_scl_io] [get_bd_pins occamy_chip_0/i2c_scl_io] + connect_bd_net -net Net2 [get_bd_ports i2c_scl_io] [get_bd_pins occamy_chip/i2c_scl_io] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets Net2] - connect_bd_net -net c_high_dout [get_bd_pins c_high/dout] [get_bd_ports jtag_vdd_o] [get_bd_pins occamy_chip_0/jtag_trst_ni] - connect_bd_net -net clk_wiz_clk_core [get_bd_pins clk_wiz/clk_core] [get_bd_pins vio_sys/clk] [get_bd_pins occamy_chip_0/clk_i] [get_bd_pins occamy_chip_0/clk_periph_i] - connect_bd_net -net clk_wiz_clk_rtc [get_bd_pins clk_wiz/clk_rtc] [get_bd_pins occamy_chip_0/rtc_i] - connect_bd_net -net const_low_dout [get_bd_pins c_low/dout] [get_bd_ports jtag_gnd_o] [get_bd_pins occamy_chip_0/test_mode_i] [get_bd_pins occamy_chip_0/gpio_d_i] [get_bd_pins occamy_chip_0/ext_irq_i] - connect_bd_net -net jtag_tck_i_1 [get_bd_ports jtag_tck_i] [get_bd_pins occamy_chip_0/jtag_tck_i] + connect_bd_net -net c_high_dout [get_bd_pins c_high/dout] [get_bd_ports jtag_vdd_o] [get_bd_pins occamy_chip/jtag_trst_ni] + connect_bd_net -net clk_wiz_clk_core [get_bd_pins clk_wiz/clk_core] [get_bd_pins vio_sys/clk] [get_bd_pins occamy_chip/clk_i] [get_bd_pins occamy_chip/clk_periph_i] + connect_bd_net -net clk_wiz_clk_rtc [get_bd_pins clk_wiz/clk_rtc] [get_bd_pins occamy_chip/rtc_i] + connect_bd_net -net const_low_dout [get_bd_pins c_low/dout] [get_bd_ports jtag_gnd_o] [get_bd_pins occamy_chip/test_mode_i] [get_bd_pins occamy_chip/gpio_d_i] [get_bd_pins occamy_chip/ext_irq_i] + connect_bd_net -net jtag_tck_i_1 [get_bd_ports jtag_tck_i] [get_bd_pins occamy_chip/jtag_tck_i] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets jtag_tck_i_1] - connect_bd_net -net jtag_tdi_i_1 [get_bd_ports jtag_tdi_i] [get_bd_pins occamy_chip_0/jtag_tdi_i] + connect_bd_net -net jtag_tdi_i_1 [get_bd_ports jtag_tdi_i] [get_bd_pins occamy_chip/jtag_tdi_i] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets jtag_tdi_i_1] - connect_bd_net -net jtag_tms_i_1 [get_bd_ports jtag_tms_i] [get_bd_pins occamy_chip_0/jtag_tms_i] + connect_bd_net -net jtag_tms_i_1 [get_bd_ports jtag_tms_i] [get_bd_pins occamy_chip/jtag_tms_i] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets jtag_tms_i_1] - connect_bd_net -net occamy_chip_0_gpio_d_o [get_bd_pins occamy_chip_0/gpio_d_o] [get_bd_pins xlslice_1/Din] - connect_bd_net -net occamy_chip_0_jtag_tdo_o [get_bd_pins occamy_chip_0/jtag_tdo_o] [get_bd_ports jtag_tdo_o] + connect_bd_net -net occamy_chip_0_gpio_d_o [get_bd_pins occamy_chip/gpio_d_o] [get_bd_pins xlslice_1/Din] + connect_bd_net -net occamy_chip_0_jtag_tdo_o [get_bd_pins occamy_chip/jtag_tdo_o] [get_bd_ports jtag_tdo_o] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets occamy_chip_0_jtag_tdo_o] - connect_bd_net -net occamy_chip_0_spim_csb_o [get_bd_pins occamy_chip_0/spim_csb_o] [get_bd_ports spim_csb_o] - connect_bd_net -net occamy_chip_0_spim_sck_o [get_bd_pins occamy_chip_0/spim_sck_o] [get_bd_ports spim_sck_o] - connect_bd_net -net occamy_chip_0_uart_rts_no [get_bd_pins occamy_chip_0/uart_rts_no] [get_bd_ports uart_rts_no_0] + connect_bd_net -net occamy_chip_0_spim_csb_o [get_bd_pins occamy_chip/spim_csb_o] [get_bd_ports spim_csb_o] + connect_bd_net -net occamy_chip_0_spim_sck_o [get_bd_pins occamy_chip/spim_sck_o] [get_bd_ports spim_sck_o] + connect_bd_net -net occamy_chip_0_uart_rts_no [get_bd_pins occamy_chip/uart_rts_no] [get_bd_ports uart_rts_no_0] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets occamy_chip_0_uart_rts_no] - connect_bd_net -net occamy_chip_0_uart_tx_o [get_bd_pins occamy_chip_0/uart_tx_o] [get_bd_ports uart_tx_o_0] + connect_bd_net -net occamy_chip_0_uart_tx_o [get_bd_pins occamy_chip/uart_tx_o] [get_bd_ports uart_tx_o_0] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets occamy_chip_0_uart_tx_o] connect_bd_net -net occamy_rst [get_bd_pins rst_or_core/Res] [get_bd_pins rst_core_inv/Op1] connect_bd_net -net occamy_rst_vio [get_bd_pins vio_sys/probe_out0] [get_bd_pins concat_rst_core/In1] - connect_bd_net -net occamy_rstn [get_bd_pins rst_core_inv/Res] [get_bd_pins occamy_chip_0/rst_ni] [get_bd_pins occamy_chip_0/rst_periph_ni] + connect_bd_net -net occamy_rstn [get_bd_pins rst_core_inv/Res] [get_bd_pins occamy_chip/rst_ni] [get_bd_pins occamy_chip/rst_periph_ni] connect_bd_net -net reset_1 [get_bd_ports reset] [get_bd_pins concat_rst_core/In0] - connect_bd_net -net uart_cts_ni_0_1 [get_bd_ports uart_cts_ni_0] [get_bd_pins occamy_chip_0/uart_cts_ni] + connect_bd_net -net uart_cts_ni_0_1 [get_bd_ports uart_cts_ni_0] [get_bd_pins occamy_chip/uart_cts_ni] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets uart_cts_ni_0_1] - connect_bd_net -net uart_rx_i_0_1 [get_bd_ports uart_rx_i_0] [get_bd_pins occamy_chip_0/uart_rx_i] + connect_bd_net -net uart_rx_i_0_1 [get_bd_ports uart_rx_i_0] [get_bd_pins occamy_chip/uart_rx_i] set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_nets uart_rx_i_0_1] - connect_bd_net -net vio_sys_probe_out1 [get_bd_pins vio_sys/probe_out1] [get_bd_pins occamy_chip_0/boot_mode_i] + connect_bd_net -net vio_sys_probe_out1 [get_bd_pins vio_sys/probe_out1] [get_bd_pins occamy_chip/boot_mode_i] connect_bd_net -net xlconcat_2_dout [get_bd_pins concat_rst_core/dout] [get_bd_pins rst_or_core/Op1] connect_bd_net -net xlslice_1_Dout [get_bd_pins xlslice_1/Dout] [get_bd_ports gpio_d_o] diff --git a/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl.xdc b/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl.xdc index 90dfe7b70..f25fd7626 100644 --- a/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl.xdc +++ b/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl.xdc @@ -74,7 +74,7 @@ set_property PACKAGE_PIN BM29 [get_ports reset] set_property IOSTANDARD LVCMOS12 [get_ports reset] # Set RTC as false path -set_false_path -to [get_pins occamy_vcu128_i/occamy/inst/i_occamy/i_clint/i_sync_edge/i_sync/reg_q_reg[0]/D] +set_false_path -to [get_pins hemaia_system_i/occamy_chip/inst/i_occamy/i_clint/i_sync_edge/i_sync/reg_q_reg[0]/D] ################################################################################ # JTAG diff --git a/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl_ext_jtag.xdc b/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl_ext_jtag.xdc index 6bdd55171..081724104 100644 --- a/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl_ext_jtag.xdc +++ b/target/fpga_chip/hemaia_system/hemaia_system_vcu128_impl_ext_jtag.xdc @@ -7,7 +7,7 @@ # This constraint file is written for VCU128 + FMC XM105 Debug Card and is included only when EXT_JTAG = 1 # 5 MHz max JTAG -create_clock -period 200 -name jtag_tck_i [get_pins occamy_vcu128_i/jtag_tck_i] +create_clock -period 200 -name jtag_tck_i [get_pins hemaia_system_i/jtag_tck_i] set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets -of [get_pins jtag_tck_i_IBUF_inst/O]] set_property CLOCK_BUFFER_TYPE NONE [get_nets -of [get_pins jtag_tck_i_IBUF_inst/O]] set_input_jitter jtag_tck_i 1.000 diff --git a/target/rtl/Makefile b/target/rtl/Makefile index 13daf4fb8..6bad56217 100644 --- a/target/rtl/Makefile +++ b/target/rtl/Makefile @@ -283,7 +283,7 @@ $(TARGET_CLINT_DIR)/clint.%: $(SOURCE_CLINT_DIR)/data/clint.%.tpl $(CFG) | $(TAR ########################################### # TODO: This SNAX_CFGS should not be fixed! It should be created dynamically instead. SNAX_CFGS += cfg/cluster_cfg/snax_KUL_cluster.hjson -SNAX_CFGS += cfg/cluster_cfg/snax_streamer_gemmX_xdma_cluster.hjson +SNAX_CFGS += cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson ####################### # Step 1: Wrapper Gen # @@ -302,20 +302,16 @@ SNAX_CFGS += cfg/cluster_cfg/snax_streamer_gemmX_xdma_cluster.hjson ############################# SNAX_ACC_GEN: -ifeq ($(findstring snax_streamer_gemm_add_c_cluster,$(SNAX_CFGS)),snax_streamer_gemm_add_c_cluster) - $(eval BENDER_TARGETS += -t snax_streamer_gemm_add_c -t snax_streamer_gemm_add_c_cluster) -endif - -ifeq ($(findstring snax_streamer_gemmX_xdma_cluster,$(SNAX_CFGS)),snax_streamer_gemmX_xdma_cluster) - $(eval BENDER_TARGETS += -t snax_streamer_gemmX_xdma -t snax_streamer_gemmX_xdma_cluster) -endif - ifeq ($(findstring snax_xdma_cluster,$(SNAX_CFGS)),snax_xdma_cluster) $(eval BENDER_TARGETS += -t snax_xdma -t snax_xdma_cluster) endif ifeq ($(findstring snax_KUL_cluster,$(SNAX_CFGS)),snax_KUL_cluster) - $(eval BENDER_TARGETS += -t snax_KUL_cluster) + $(eval BENDER_TARGETS += -t snax_gemmX -t snax_data_reshuffler -t snax_KUL_cluster) +endif + +ifeq ($(findstring snax_KUL_xdma_cluster,$(SNAX_CFGS)),snax_KUL_xdma_cluster) + $(eval BENDER_TARGETS += -t snax_gemmX -t snax_KUL_xdma_cluster_xdma -t snax_KUL_xdma_cluster) endif # Create the dependency in SNAX: when generating snax, the bender target should be stored into file, which will be used by other targets (e.g. tapeout, fpga, simulation) diff --git a/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson index 990926699..d63895b1b 100644 --- a/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson +++ b/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson @@ -12,14 +12,14 @@ cluster: { name: "snax_KUL_cluster", boot_addr: 4096, // 0x1000 - cluster_base_addr: 268435456, // 0x1000_0000 - cluster_base_offset: 262144, // 256KB + cluster_base_addr: 268435456, // 0x1000_0000 + cluster_base_offset: 1048576, // 256KB -> 1MB cluster_base_hartid: 1, addr_width: 48, data_width: 64, user_width: 5, tcdm: { - size: 128, + size: 512, // 128K -> 512K banks: 32, }, cluster_periph_size: 64, // kB @@ -27,6 +27,13 @@ dma_data_width: 512, dma_axi_req_fifo_depth: 16, dma_req_fifo_depth: 8, + snax_custom_tcdm_assign: { + snax_enable_assign_tcdm_idx: true, + snax_narrow_assign_start_idx: [0,56], + snax_narrow_assign_end_idx: [7,71], + snax_wide_assign_start_idx: [8], + snax_wide_assign_end_idx: [55], + }, // AXI bandwidth switcher use_ax_bw_converter: false, converted_axi_bandwidth: 256, @@ -111,9 +118,8 @@ xfvec: false, snax_acc_cfg: { snax_acc_name: "snax_streamer_gemmX", - // add a checker here? - // some of the tcdm ports specificed here? - snax_narrow_tcdm_ports: 56, + snax_narrow_tcdm_ports: 8, + snax_wide_tcdm_ports: 48, snax_num_rw_csr: 10, snax_num_ro_csr: 2, snax_streamer_cfg: {$ref: "#/snax_streamer_gemmX_streamer_template" } @@ -125,9 +131,7 @@ num_fp_outstanding_mem: 4, num_sequencer_instructions: 16, num_dtlb_entries: 1, - num_itlb_entries: 1, - // Enable division/square root unit - // Xdiv_sqrt: true, + num_itlb_entries: 1 }, // Templates. snax_data_reshuffler_core_template: { @@ -155,14 +159,10 @@ num_fp_outstanding_mem: 4, num_sequencer_instructions: 16, num_dtlb_entries: 1, - num_itlb_entries: 1, - // Enable division/square root unit - // Xdiv_sqrt: true, + num_itlb_entries: 1 }, dma_core_template: { isa: "rv32ima", - // Xdiv_sqrt: true, - # isa: "rv32ema", xdma: true xssr: false xfrep: false @@ -178,7 +178,7 @@ num_fp_outstanding_mem: 4, num_sequencer_instructions: 16, num_dtlb_entries: 1, - num_itlb_entries: 1, + num_itlb_entries: 1 }, // SNAX Streamer Templates snax_streamer_gemmX_streamer_template :{ diff --git a/target/rtl/cfg/cluster_cfg/snax_streamer_gemmX_xdma_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson old mode 100755 new mode 100644 similarity index 72% rename from target/rtl/cfg/cluster_cfg/snax_streamer_gemmX_xdma_cluster.hjson rename to target/rtl/cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson index 14ae59f36..7d4bd8471 --- a/target/rtl/cfg/cluster_cfg/snax_streamer_gemmX_xdma_cluster.hjson +++ b/target/rtl/cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson @@ -1,195 +1,244 @@ -// Copyright 2023 ETH Zurich and University of Bologna. -// Licensed under the Apache License, Version 2.0, see LICENSE for details. -// SPDX-License-Identifier: Apache-2.0 - -// Cluster configuration for a simple testbench system. -{ - cluster: { - name: "snax_streamer_gemmX_xdma_cluster", - boot_addr: 4096, // 0x1000 - cluster_base_addr: 268435456, // 0x1000_0000 - cluster_base_offset: 262144, // 256KB - cluster_base_hartid: 1, - addr_width: 48, - data_width: 64, - user_width: 5, - tcdm: { - size: 128, - banks: 32, - }, - cluster_periph_size: 64, // kB - zero_mem_size: 64, // kB - dma_data_width: 512, - dma_axi_req_fifo_depth: 16, - dma_req_fifo_depth: 8, - // Additional parameters for Occamy Integration - narrow_trans: 4, - wide_trans: 32, - dma_user_width: 1, - // We don't need Snitch debugging in Occamy - enable_debug: false, - // We don't need Snitch (core-internal) virtual memory support - vm_support: false, - // Memory configuration inputs - sram_cfg_expose: true, - sram_cfg_fields: { - ema: 3, - emaw: 2, - emas: 1 - }, - // Timing parameters - timing: { - lat_comp_fp32: 3, - lat_comp_fp64: 3, - lat_comp_fp16: 2, - lat_comp_fp16_alt: 2, - lat_comp_fp8: 1, - lat_comp_fp8_alt: 1, - lat_noncomp: 1, - lat_conv: 1, - lat_sdotp: 2, - fpu_pipe_config: "BEFORE" - narrow_xbar_latency: "CUT_ALL_PORTS", - wide_xbar_latency: "CUT_ALL_PORTS", - // Isolate the core. - register_core_req: true, - register_core_rsp: true, - register_offload_req: true, - register_offload_rsp: true - }, - hives: [ - // Hive 0 - { - icache: { - size: 8, // total instruction cache size in kByte - sets: 2, // number of ways - cacheline: 256 // word size in bits - }, - cores: [ - { $ref: "#/snax_streamer_gemmX_core_template" }, - { $ref: "#/dma_core_template" }, - ] - } - ] - }, - dram: { - // 0x8000_0000 - address: 2147483648, - // 0x8000_0000 - length: 2147483648 - }, - peripherals: { - clint: { - // 0xffff_0000 - address: 4294901760, - // 0x0000_1000 - length: 4096 - }, - }, - // Templates. - snax_streamer_gemmX_core_template: { - isa: "rv32ima", - xssr: false, - xfrep: false, - xdma: false, - xf16: false, - xf16alt: false, - xf8: false, - xf8alt: false, - xfdotp: false, - xfvec: false, - snax_acc_cfg: { - snax_acc_name: "snax_streamer_gemmX", - // add a checker here? - // some of the tcdm ports specificed here? - snax_wide_tcdm_ports: 56, - snax_num_rw_csr: 10, - snax_num_ro_csr: 2, - snax_streamer_cfg: {$ref: "#/snax_streamer_gemmX_streamer_template" } - }, - snax_use_custom_ports: false, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_fp_outstanding_loads: 4, - num_fp_outstanding_mem: 4, - num_sequencer_instructions: 16, - num_dtlb_entries: 1, - num_itlb_entries: 1, - // Enable division/square root unit - // Xdiv_sqrt: true, - }, - dma_core_template: { - isa: "rv32ima", - snax_xdma_cfg: { - reader_buffer: 8, - writer_buffer: 8, - reader_agu_dimension: 3, - writer_agu_dimension: 3, - HasTransposer: 3, - HasVerilogMemset: 1, - HasMaxPool: 2 - } - xdma: true - xssr: false - xfrep: false - xf16: false, - xf16alt: false, - xf8: false, - xf8alt: false, - xfdotp: false, - xfvec: false, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_fp_outstanding_loads: 4, - num_fp_outstanding_mem: 4, - num_sequencer_instructions: 16, - num_dtlb_entries: 1, - num_itlb_entries: 1, - }, - // SNAX Streamer Templates - snax_streamer_gemmX_streamer_template :{ - - temporal_addrgen_unit_params: { - loop_dim: [6, 3, 2, 2, 2], - share_temp_addr_gen_loop_bounds: false, - } - - fifo_reader_params: { - fifo_width: [512, 512], - fifo_depth: [2, 2], - } - - fifo_writer_params: { - fifo_width: [512], - fifo_depth: [2], - } - - fifo_reader_writer_params: { - fifo_width: [2048], - fifo_depth: [2], - } - - data_reader_params:{ - tcdm_ports_num: [8, 8], - spatial_bounds: [[8, 8], [8, 8]], - spatial_dim: [2, 2], - element_width: [8, 8], - } - - data_writer_params:{ - tcdm_ports_num: [8], - spatial_bounds: [[8, 8]], - spatial_dim: [2], - element_width: [8], - } - - data_reader_writer_params:{ - tcdm_ports_num: [32], - spatial_bounds: [[8, 8]], - spatial_dim: [2], - element_width: [32], - } - - stationarity: [0,0,0,0,0] - } -} +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Cluster configuration for a simple testbench system. +{ + nr_s1_quadrant: 1, + s1_quadrant: { + nr_clusters: 1, + }, + + cluster: { + name: "snax_KUL_xdma_cluster", + boot_addr: 4096, // 0x1000 + cluster_base_addr: 268435456, // 0x1000_0000 + cluster_base_offset: 1048576, // 256KB -> 1MB + cluster_base_hartid: 1, + addr_width: 48, + data_width: 64, + user_width: 5, + tcdm: { + size: 512, // 128K -> 512K + banks: 32, + }, + cluster_periph_size: 64, // kB + zero_mem_size: 64, // kB + dma_data_width: 512, + dma_axi_req_fifo_depth: 16, + dma_req_fifo_depth: 8, + snax_custom_tcdm_assign: { + snax_enable_assign_tcdm_idx: true, + snax_narrow_assign_start_idx: [0,56], + snax_narrow_assign_end_idx: [7,71], + snax_wide_assign_start_idx: [8], + snax_wide_assign_end_idx: [55], + }, + // AXI bandwidth switcher + use_ax_bw_converter: false, + converted_axi_bandwidth: 256, + + // Additional parameters for Occamy Integration + narrow_trans: 4, + wide_trans: 32, + dma_user_width: 1, + // We don't need Snitch debugging in Occamy + enable_debug: false, + // We don't need Snitch (core-internal) virtual memory support + vm_support: false, + // Memory configuration inputs + sram_cfg_expose: true, + sram_cfg_fields: { + ema: 3, + emaw: 2, + emas: 1 + }, + + // Timing parameters + timing: { + lat_comp_fp32: 3, + lat_comp_fp64: 3, + lat_comp_fp16: 2, + lat_comp_fp16_alt: 2, + lat_comp_fp8: 1, + lat_comp_fp8_alt: 1, + lat_noncomp: 1, + lat_conv: 1, + lat_sdotp: 2, + fpu_pipe_config: "BEFORE" + narrow_xbar_latency: "CUT_ALL_PORTS", + wide_xbar_latency: "CUT_ALL_PORTS", + // Isolate the core. + register_core_req: true, + register_core_rsp: true, + register_offload_req: true, + register_offload_rsp: true + }, + hives: [ + // Hive 0 + { + icache: { + size: 8, // total instruction cache size in kByte + sets: 2, // number of ways + cacheline: 256 // word size in bits + }, + cores: [ + { $ref: "#/snax_streamer_gemmX_core_template" }, + { $ref: "#/dma_core_template" }, + ] + } + ] + }, + dram: { + // 0x8000_0000 + address: 2147483648, + // 0x8000_0000 + length: 2147483648 + }, + peripherals: { + clint: { + // 0xffff_0000 + address: 4294901760, + // 0x0000_1000 + length: 4096 + }, + }, + // GeMM Core Templates. + snax_streamer_gemmX_core_template: { + isa: "rv32ima", + xssr: false, + xfrep: false, + xdma: false, + xf16: false, + xf16alt: false, + xf8: false, + xf8alt: false, + xfdotp: false, + xfvec: false, + snax_acc_cfg: { + snax_acc_name: "snax_streamer_gemmX", + snax_narrow_tcdm_ports: 8, + snax_wide_tcdm_ports: 48, + snax_num_rw_csr: 10, + snax_num_ro_csr: 2, + snax_streamer_cfg: {$ref: "#/snax_streamer_gemmX_streamer_template" } + }, + snax_use_custom_ports: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1 + }, + // DMA + XDMA Core Template. + dma_core_template: { + isa: "rv32ima", + snax_xdma_cfg: { + reader_buffer: 4, + writer_buffer: 4, + reader_agu_dimension: 7, + writer_agu_dimension: 7, + HasTransposer: 3, + HasVerilogMemset: 1, + HasMaxPool: 2 + } + xdma: true + xssr: false + xfrep: false + xf16: false, + xf16alt: false, + xf8: false, + xf8alt: false, + xfdotp: false, + xfvec: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1 + }, + // SNAX Streamer Templates + snax_streamer_gemmX_streamer_template :{ + + temporal_addrgen_unit_params: { + loop_dim: [6, 3, 3, 3, 3], + share_temp_addr_gen_loop_bounds: false, + } + + fifo_reader_params: { + fifo_width: [512, 512], + fifo_depth: [2, 2], + } + + fifo_writer_params: { + fifo_width: [512], + fifo_depth: [2], + } + + fifo_reader_writer_params: { + fifo_width: [2048], + fifo_depth: [2], + } + + data_reader_params:{ + tcdm_ports_num: [8, 8], + spatial_bounds: [[8, 8], [8, 8]], + spatial_dim: [2, 2], + element_width: [8, 8], + } + + data_writer_params:{ + tcdm_ports_num: [8], + spatial_bounds: [[8, 8]], + spatial_dim: [2], + element_width: [8], + } + + data_reader_writer_params:{ + tcdm_ports_num: [32], + spatial_bounds: [[8, 8]], + spatial_dim: [2], + element_width: [32], + } + + stationarity: [0,0,0,0,0] + }, + // SNAX Streamer Templates + snax_data_reshuffler_streamer_template :{ + + temporal_addrgen_unit_params: { + loop_dim: [5, 3], + share_temp_addr_gen_loop_bounds: false, + } + + fifo_reader_params: { + fifo_width: [512], + fifo_depth: [2], + } + + fifo_writer_params: { + fifo_width: [512], + fifo_depth: [2], + } + + data_reader_params:{ + tcdm_ports_num: [8], + spatial_bounds: [[8]], + spatial_dim: [1], + element_width: [64], + } + + data_writer_params:{ + tcdm_ports_num: [8], + spatial_bounds: [[8]], + spatial_dim: [1], + element_width: [64], + } + + stationarity: [0,0] + } +} diff --git a/target/rtl/cfg/cluster_cfg/snax_streamer_gemm_add_c_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_streamer_gemm_add_c_cluster.hjson deleted file mode 100644 index ccf632aa3..000000000 --- a/target/rtl/cfg/cluster_cfg/snax_streamer_gemm_add_c_cluster.hjson +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright 2024 KU Leuven. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -// Author: Xiaoling Yi (xiaoling.yi@kuleuven.be) - -{ - cluster: { - name: "snax_streamer_gemm_add_c_cluster", - boot_addr: 4096, // 0x1000 - cluster_base_addr: 268435456, // 0x1000_0000 - cluster_base_offset: 262144, // 256KB - cluster_base_hartid: 1, - addr_width: 48, - data_width: 64, - user_width: 5, - tcdm: { - size: 128, - banks: 32, - }, - cluster_periph_size: 64, // kB - zero_mem_size: 64, // kB - dma_data_width: 512, - dma_axi_req_fifo_depth: 16, - dma_req_fifo_depth: 8, - // Additional parameters for Occamy Integration - narrow_trans: 4, - wide_trans: 32, - dma_user_width: 1, - // We don't need Snitch debugging in Occamy - enable_debug: false, - // We don't need Snitch (core-internal) virtual memory support - vm_support: false, - // Memory configuration inputs - sram_cfg_expose: true, - sram_cfg_fields: { - ema: 3, - emaw: 2, - emas: 1 - }, - // Timing parameters - timing: { - lat_comp_fp32: 3, - lat_comp_fp64: 3, - lat_comp_fp16: 2, - lat_comp_fp16_alt: 2, - lat_comp_fp8: 1, - lat_comp_fp8_alt: 1, - lat_noncomp: 1, - lat_conv: 1, - lat_sdotp: 2, - fpu_pipe_config: "BEFORE" - narrow_xbar_latency: "CUT_ALL_PORTS", - wide_xbar_latency: "CUT_ALL_PORTS", - // Isolate the core. - register_core_req: true, - register_core_rsp: true, - register_offload_req: true, - register_offload_rsp: true - }, - hives: [ - // Hive 0 - { - icache: { - size: 8, // total instruction cache size in kByte - sets: 2, // number of ways - cacheline: 256 // word size in bits - }, - cores: [ - { $ref: "#/snax_streamer_gemm_add_c_core_template" }, - { $ref: "#/dma_core_template" }, - ] - } - ] - }, - dram: { - // 0x8000_0000 - address: 2147483648, - // 0x8000_0000 - length: 2147483648 - }, - peripherals: { - clint: { - // 0xffff_0000 - address: 4294901760, - // 0x0000_1000 - length: 4096 - }, - }, - // Templates. - snax_streamer_gemm_add_c_core_template: { - isa: "rv32ima", - xssr: false, - xfrep: false, - xdma: false, - xf16: false, - xf16alt: false, - xf8: false, - xf8alt: false, - xfdotp: false, - xfvec: false, - snax_acc_cfg: { - snax_acc_name: "snax_streamer_gemm_add_c", - snax_narrow_tcdm_ports: 48, - snax_num_rw_csr: 5, - snax_num_ro_csr: 2, - snax_streamer_cfg: {$ref: "#/snax_streamer_gemm_add_c_streamer_template" } - }, - snax_use_custom_ports: false, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_fp_outstanding_loads: 4, - num_fp_outstanding_mem: 4, - num_sequencer_instructions: 16, - num_dtlb_entries: 1, - num_itlb_entries: 1, - // Enable division/square root unit - // Xdiv_sqrt: true, - }, - dma_core_template: { - isa: "rv32ima", - // Xdiv_sqrt: true, - # isa: "rv32ema", - xdma: true - xssr: false - xfrep: false - xf16: false, - xf16alt: false, - xf8: false, - xf8alt: false, - xfdotp: false, - xfvec: false, - num_int_outstanding_loads: 1, - num_int_outstanding_mem: 4, - num_fp_outstanding_loads: 4, - num_fp_outstanding_mem: 4, - num_sequencer_instructions: 16, - num_dtlb_entries: 1, - num_itlb_entries: 1, - }, - // SNAX Streamer Templates - snax_streamer_gemm_add_c_streamer_template :{ - - temporal_addrgen_unit_params: { - loop_dim: [3], - share_temp_addr_gen_loop_bounds: true, - } - - fifo_reader_params: { - fifo_width: [512, 512], - fifo_depth: [2, 2], - } - - fifo_reader_writer_params: { - fifo_width: [2048], - fifo_depth: [2], - } - - data_reader_params:{ - tcdm_ports_num: [8, 8], - spatial_bounds: [[8, 8], [8, 8]], - spatial_dim: [2, 2], - element_width: [8, 8], - } - - data_reader_writer_params:{ - tcdm_ports_num: [32], - spatial_bounds: [[8, 8]], - spatial_dim: [2], - element_width: [32], - } - - stationarity: [0,0,1,1] - } -} \ No newline at end of file diff --git a/target/rtl/cfg/occamy_cfg/snax_two_clusters.hjson b/target/rtl/cfg/occamy_cfg/snax_two_clusters.hjson index c6e6a89be..7cd0b92fa 100755 --- a/target/rtl/cfg/occamy_cfg/snax_two_clusters.hjson +++ b/target/rtl/cfg/occamy_cfg/snax_two_clusters.hjson @@ -116,7 +116,7 @@ }, clusters:[ "snax_KUL_cluster", - "snax_streamer_gemmX_xdma_cluster", + "snax_KUL_xdma_cluster", ], // peripherals diff --git a/target/rtl/snax_minimal.hjson b/target/rtl/snax_minimal.hjson deleted file mode 100755 index 2d5fc63da..000000000 --- a/target/rtl/snax_minimal.hjson +++ /dev/null @@ -1,198 +0,0 @@ -{ - // Remote CFG - is_remote_quadrant: false, - remote_quadrants: [], - addr_width: 48, - data_width: 64, - // XBARs - wide_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - quadrant_inter_xbar_slv_id_width_no_rocache: 3, - quadrant_inter_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - narrow_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - cuts: { - narrow_to_quad: 3, - quad_to_narrow: 3, - quad_to_pre: 1, - pre_to_inter: 1, - quad_to_inter: 1, - inter_to_quad: 3, - narrow_to_cva6: 2, - narrow_conv_to_spm_narrow_pre: 2, - narrow_conv_to_spm_narrow: 1, - narrow_and_pcie: 3, - narrow_and_wide: 1, - wide_conv_to_spm_wide: 3, - wide_to_wide_zero_mem: 0, - wide_to_hbm: 3, - wide_and_inter: 3, - wide_and_hbi: 3, - narrow_and_hbi: 3, - pre_to_hbmx: 3, - hbmx_to_hbm: 3, - atomic_adapter_narrow: 1, - atomic_adapter_narrow_wide: 1, - // Give some flexibility in peripheral xbar placement - periph_axi_lite_narrow: 2, - periph_axi_lite: 2, - periph_axi_lite_narrow_hbm_xbar_cfg: 2, - // Non-right-side chip peripherals - periph_axi_lite_narrow_hbm_cfg: 3, - periph_axi_lite_narrow_pcie_cfg: 3, - periph_axi_lite_narrow_chip_ctrl_cfg: 3, - periph_axi_lite_narrow_hbi_narrow_cfg: 3, - periph_axi_lite_narrow_hbi_wide_cfg: 3, - periph_axi_lite_narrow_bootrom_cfg: 3, - periph_axi_lite_narrow_fll_system_cfg: 3, - periph_axi_lite_narrow_fll_periph_cfg: 3, - periph_axi_lite_narrow_fll_hbm2e_cfg: 3, - // Right-side or latency-invariant chip peripherals - periph_axi_lite_narrow_soc_ctrl_cfg: 1, - periph_axi_lite_narrow_uart_cfg: 1, - periph_axi_lite_narrow_i2c_cfg: 1, - periph_axi_lite_narrow_gpio_cfg: 1, - periph_axi_lite_narrow_clint_cfg: 1, - periph_axi_lite_narrow_plic_cfg: 1, - periph_axi_lite_narrow_spim_cfg: 1, - periph_axi_lite_narrow_timer_cfg: 1, - }, - txns: { - wide_and_inter: 128, - wide_to_hbm: 128, - narrow_and_wide: 16, - rmq: 4, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 5, // clog2(total number of clusters) - nr_s1_quadrant: 1, - s1_quadrant: { - // number of pending transactions on the narrow/wide network - narrow_trans: 32, - wide_trans: 32, - // Disable for easier flow trials. - ro_cache_cfg: { - width: 1024, - count: 128, - sets: 2, - max_trans: 32, - address_regions: 4, - } - narrow_tlb_cfg: { - max_trans: 32, - l1_num_entries: 8, - l1_cut_ax: true, - } - wide_tlb_cfg: { - max_trans: 32, - l1_num_entries: 8, - l1_cut_ax: true, - } - wide_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - wide_xbar_slv_id_width: 3 - narrow_xbar: { - max_slv_trans: 8, - max_mst_trans: 8, - fall_through: false, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 5, // clog2(total number of clusters) - cfg_base_addr: 184549376, // 0x0b000000 - cfg_base_offset: 65536 // 0x10000 - }, - clusters:[ - "snax_minimal", - ], - - // peripherals - peripherals: { - rom: { - address: 16777216, // 0x0100_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - clint: { - address: 67108864, // 0x0400_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - axi_lite_peripherals: [ - { - name: "debug", - address: 0, // 0x0000_0000 - length: 4096, // 4 kiB 0x1000 - } - ], - axi_lite_narrow_peripherals: [ - { - name: "soc_ctrl", - address: 33554432, // 0x0200_0000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "uart", - address: 33562624, // 0x0200_2000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "gpio", - address: 33566720, // 0x0200_3000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "i2c", - address: 33570816, // 0x0200_4000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "chip_ctrl", - address: 33574912, // 0x0200_5000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "timer", - address: 33579008, // 0x0200_6000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "spim", - address: 50331648, // 0x0300_0000 - length: 131072, // 4 kiB 0x2_0000 - }, - { - name: "plic", - address: 201326592, // 0x0C00_0000 - length: 67108864, // 64 MiB 0x400_0000 - }, - ], - }, - // non-peripheral IPs - spm_narrow: { - address: 1879048192, // 0x7000_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - spm_wide: { - address: 2147483648, // 0x8000_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - wide_zero_mem: { - address: 68719476736, // 0x10_0000_0000 - length: 8589934592, // 8 GiB 0x11_0000_0000 - }, - sys_idma_cfg: { - address: 285212672, // 0x1100_0000 - length: 65536, // 64 kiB 0x1_0000 - }, -} diff --git a/target/rtl/snax_two_clusters.hjson b/target/rtl/snax_two_clusters.hjson deleted file mode 100755 index f03e41d2a..000000000 --- a/target/rtl/snax_two_clusters.hjson +++ /dev/null @@ -1,200 +0,0 @@ -{ - // Remote CFG - is_remote_quadrant: false, - remote_quadrants: [], - addr_width: 48, - data_width: 64, - // XBARs - wide_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - quadrant_inter_xbar_slv_id_width_no_rocache: 3, - quadrant_inter_xbar: { - max_slv_trans: 64, - max_mst_trans: 64, - fall_through: false, - }, - narrow_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - cuts: { - narrow_to_quad: 3, - quad_to_narrow: 3, - quad_to_pre: 1, - pre_to_inter: 1, - quad_to_inter: 1, - inter_to_quad: 3, - narrow_to_cva6: 2, - narrow_conv_to_spm_narrow_pre: 2, - narrow_conv_to_spm_narrow: 1, - narrow_and_pcie: 3, - narrow_and_wide: 1, - wide_conv_to_spm_wide: 3, - wide_to_wide_zero_mem: 0, - wide_to_hbm: 3, - wide_and_inter: 3, - wide_and_hbi: 3, - narrow_and_hbi: 3, - pre_to_hbmx: 3, - hbmx_to_hbm: 3, - atomic_adapter_narrow: 1, - atomic_adapter_narrow_wide: 1, - // Give some flexibility in peripheral xbar placement - periph_axi_lite_narrow: 2, - periph_axi_lite: 2, - periph_axi_lite_narrow_hbm_xbar_cfg: 2, - // Non-right-side chip peripherals - periph_axi_lite_narrow_hbm_cfg: 3, - periph_axi_lite_narrow_pcie_cfg: 3, - periph_axi_lite_narrow_chip_ctrl_cfg: 3, - periph_axi_lite_narrow_hbi_narrow_cfg: 3, - periph_axi_lite_narrow_hbi_wide_cfg: 3, - periph_axi_lite_narrow_bootrom_cfg: 3, - periph_axi_lite_narrow_fll_system_cfg: 3, - periph_axi_lite_narrow_fll_periph_cfg: 3, - periph_axi_lite_narrow_fll_hbm2e_cfg: 3, - // Right-side or latency-invariant chip peripherals - periph_axi_lite_narrow_soc_ctrl_cfg: 1, - periph_axi_lite_narrow_uart_cfg: 1, - periph_axi_lite_narrow_i2c_cfg: 1, - periph_axi_lite_narrow_gpio_cfg: 1, - periph_axi_lite_narrow_clint_cfg: 1, - periph_axi_lite_narrow_plic_cfg: 1, - periph_axi_lite_narrow_spim_cfg: 1, - periph_axi_lite_narrow_timer_cfg: 1, - }, - txns: { - wide_and_inter: 128, - wide_to_hbm: 128, - narrow_and_wide: 16, - rmq: 4, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 5, // clog2(total number of clusters) - nr_s1_quadrant: 1, - s1_quadrant: { - // number of pending transactions on the narrow/wide network - narrow_trans: 32, - wide_trans: 32, - // Disable for easier flow trials. - ro_cache_cfg: { - width: 1024, - count: 128, - sets: 2, - max_trans: 32, - address_regions: 4, - } - narrow_tlb_cfg: { - max_trans: 32, - l1_num_entries: 8, - l1_cut_ax: true, - } - wide_tlb_cfg: { - max_trans: 32, - l1_num_entries: 8, - l1_cut_ax: true, - } - wide_xbar: { - max_slv_trans: 32, - max_mst_trans: 32, - fall_through: false, - }, - wide_xbar_slv_id_width: 3 - narrow_xbar: { - max_slv_trans: 8, - max_mst_trans: 8, - fall_through: false, - }, - narrow_xbar_slv_id_width: 4, - narrow_xbar_user_width: 5, // clog2(total number of clusters) - cfg_base_addr: 184549376, // 0x0b000000 - cfg_base_offset: 65536 // 0x10000 - }, - clusters:[ - "snax_xiaoling", - "snax_gemmX_xdma", - ], - - // peripherals - peripherals: { - rom: { - address: 16777216, // 0x0100_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - clint: { - address: 67108864, // 0x0400_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - axi_lite_peripherals: [ - { - name: "debug", - address: 0, // 0x0000_0000 - length: 4096, // 4 kiB 0x1000 - } - ], - axi_lite_narrow_peripherals: [ - { - name: "soc_ctrl", - address: 33554432, // 0x0200_0000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "uart", - address: 33562624, // 0x0200_2000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "gpio", - address: 33566720, // 0x0200_3000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "i2c", - address: 33570816, // 0x0200_4000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "chip_ctrl", - address: 33574912, // 0x0200_5000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "timer", - address: 33579008, // 0x0200_6000 - length: 4096, // 4 kiB 0x1000 - }, - { - name: "spim", - address: 50331648, // 0x0300_0000 - length: 131072, // 4 kiB 0x2_0000 - }, - { - name: "plic", - address: 201326592, // 0x0C00_0000 - length: 67108864, // 64 MiB 0x400_0000 - }, - ], - }, - // non-peripheral IPs - spm_narrow: { - address: 1879048192, // 0x7000_0000 - length: 131072, // 128 kiB 0x2_0000 - }, - spm_wide: { - address: 2147483648, // 0x8000_0000 - length: 1048576, // 1 MiB 0x10_0000 - }, - wide_zero_mem: { - address: 68719476736, // 0x10_0000_0000 - length: 8589934592, // 8 GiB 0x11_0000_0000 - }, - sys_idma_cfg: { - address: 285212672, // 0x1100_0000 - length: 65536, // 64 kiB 0x1_0000 - }, - -} \ No newline at end of file