From c56b308f54c47c96f711ff40d59f429945f3de3b Mon Sep 17 00:00:00 2001 From: Yunhao Deng Date: Sat, 2 Nov 2024 16:04:49 +0100 Subject: [PATCH] Add DIMC and CGRA blocks (#52) (#72) * cfg: update hemaia configs * cfg: add snax cgra * cfg: add snax dimc * cfg: add hemaia tapeout version * cfg: replace unverilatable cgra * cfg: typo * cfg: update order of clusters * cfg: update order for hemaia_tapeout * cfg: change mem size to 128kb * cfg: update register count for gemmXdma * cfg: fix user width Co-authored-by: Ryan Antonio --- .../cfg/cluster_cfg/snax_KUL_cluster.hjson | 2 +- .../cluster_cfg/snax_KUL_xdma_cluster.hjson | 4 +- .../cfg/cluster_cfg/snax_alu_cluster.hjson | 2 +- .../cfg/cluster_cfg/snax_cgra_cluster.hjson | 170 +++++++++++++++ .../cfg/cluster_cfg/snax_dimc_cluster.hjson | 171 +++++++++++++++ .../cluster_cfg/snax_hypercorex_cluster.hjson | 2 +- target/rtl/cfg/occamy_cfg/hemaia.hjson | 2 +- .../rtl/cfg/occamy_cfg/hemaia_tapeout.hjson | 197 ++++++++++++++++++ 8 files changed, 544 insertions(+), 6 deletions(-) create mode 100644 target/rtl/cfg/cluster_cfg/snax_cgra_cluster.hjson create mode 100644 target/rtl/cfg/cluster_cfg/snax_dimc_cluster.hjson create mode 100755 target/rtl/cfg/occamy_cfg/hemaia_tapeout.hjson diff --git a/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson index 05011da4e..3f7036ad0 100644 --- a/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson +++ b/target/rtl/cfg/cluster_cfg/snax_KUL_cluster.hjson @@ -20,7 +20,7 @@ data_width: 64, user_width: 3, tcdm: { - size: 512, // 128K -> 512K + size: 128, // 128K -> 512K banks: 32, }, cluster_periph_size: 64, // kB diff --git a/target/rtl/cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson index c6332fc81..f4f67a9dd 100644 --- a/target/rtl/cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson +++ b/target/rtl/cfg/cluster_cfg/snax_KUL_xdma_cluster.hjson @@ -20,7 +20,7 @@ data_width: 64, user_width: 3, tcdm: { - size: 512, // 128K -> 512K + size: 128, // 128K -> 512K banks: 32, }, cluster_periph_size: 64, // kB @@ -121,7 +121,7 @@ bender_target: ["snax_gemmX"], snax_narrow_tcdm_ports: 8, snax_wide_tcdm_ports: 48, - snax_num_rw_csr: 10, + snax_num_rw_csr: 19, snax_num_ro_csr: 2, snax_gemmx_mesh_row: 8, snax_gemmx_tile_size: 8, diff --git a/target/rtl/cfg/cluster_cfg/snax_alu_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_alu_cluster.hjson index c9074da40..26fa414c4 100644 --- a/target/rtl/cfg/cluster_cfg/snax_alu_cluster.hjson +++ b/target/rtl/cfg/cluster_cfg/snax_alu_cluster.hjson @@ -20,7 +20,7 @@ data_width: 64, user_width: 3, tcdm: { - size: 512, // 128K -> 512K + size: 128, // 128K -> 512K banks: 32, }, cluster_periph_size: 64, // kB diff --git a/target/rtl/cfg/cluster_cfg/snax_cgra_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_cgra_cluster.hjson new file mode 100644 index 000000000..ca3653f76 --- /dev/null +++ b/target/rtl/cfg/cluster_cfg/snax_cgra_cluster.hjson @@ -0,0 +1,170 @@ +// Copyright 2024 KU Leuven. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Cluster configuration for a simple testbench system. +{ + nr_s1_quadrant: 1, + s1_quadrant: { + nr_clusters: 1, + }, + + cluster: { + name: "snax_cgra_cluster", + bender_target: ["snax_cgra_cluster"], + boot_addr: 4096, // 0x1000 + cluster_base_addr: 268435456, // 0x1000_0000 + cluster_base_offset: 1048576, // 256KB -> 1MB + cluster_base_hartid: 1, + addr_width: 48, + data_width: 64, + user_width: 3, + tcdm: { + size: 128, + banks: 32, + }, + cluster_periph_size: 64, // kB + zero_mem_size: 64, // kB + dma_data_width: 512, + dma_axi_req_fifo_depth: 3, + dma_req_fifo_depth: 3, + + // Additional parameters for Hemaia integration + narrow_trans: 4, + wide_trans: 32, + dma_user_width: 1, + // We don't need Snitch debugging in Hemaia + enable_debug: false, + // We don't need Snitch (core-internal) virtual memory support + vm_support: false, + // Memory configuration inputs + sram_cfg_expose: true, + sram_cfg_fields: { + ema: 3, + emaw: 2, + emas: 1 + }, + + // Timing parameters + timing: { + lat_comp_fp32: 3, + lat_comp_fp64: 3, + lat_comp_fp16: 2, + lat_comp_fp16_alt: 2, + lat_comp_fp8: 1, + lat_comp_fp8_alt: 1, + lat_noncomp: 1, + lat_conv: 1, + lat_sdotp: 2, + fpu_pipe_config: "BEFORE" + narrow_xbar_latency: "CUT_ALL_PORTS", + wide_xbar_latency: "CUT_ALL_PORTS", + // Isolate the core. + register_core_req: true, + register_core_rsp: true, + register_offload_req: true, + register_offload_rsp: true, + register_ext_narrow: true, + register_ext_wide: true, + }, + hives: [ + // Hive 0 + { + icache: { + size: 8, // total instruction cache size in kByte + sets: 2, // number of ways + cacheline: 256 // word size in bits + }, + cores: [ + { $ref: "#/snax_cgra_core_template" }, + { $ref: "#/dma_core_template" }, + ] + } + ] + }, + dram: { + // 0x8000_0000 + address: 2147483648, + // 0x8000_0000 + length: 2147483648 + }, + peripherals: { + clint: { + // 0xffff_0000 + address: 4294901760, + // 0x0000_1000 + length: 4096 + }, + }, + // SNAX Accelerator Core Templates + snax_cgra_core_template: { + isa: "rv32ima", + xssr: false, + xfrep: false, + xdma: false, + xf16: false, + xf16alt: false, + xf8: false, + xf8alt: false, + xfdotp: false, + xfvec: false, + snax_acc_cfg: { + snax_acc_name: "snax_cgra", + bender_target: ["cgra"], + snax_narrow_tcdm_ports: 16, + snax_num_rw_csr: 1, + snax_num_ro_csr: 4, + snax_streamer_cfg: {$ref: "#/snax_cgra_streamer_template" } + }, + snax_use_custom_ports: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + // Enable division/square root unit + // Xdiv_sqrt: true, + }, + dma_core_template: { + isa: "rv32ima", + // Xdiv_sqrt: true, + # isa: "rv32ema", + xdma: true + xssr: false + xfrep: false + xf16: false, + xf16alt: false, + xf8: false, + xf8alt: false, + xfdotp: false, + xfvec: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + } + // SNAX Streamer Templates + snax_cgra_streamer_template :{ + + data_reader_params: { + spatial_bounds: [[1], [1], [1], [1], [1], [1], [1], [1]], + temporal_dim: [2, 2, 2, 2, 2, 2, 2, 2], + num_channel: [1, 1, 1, 1, 1, 1, 1, 1], + fifo_depth: [8, 8, 8, 8, 8, 8, 8, 8], + }, + + data_writer_params: { + spatial_bounds: [[1], [1], [1], [1], [1], [1], [1], [1]], + temporal_dim: [2, 2, 2, 2, 2, 2, 2, 2], + num_channel: [1, 1, 1, 1, 1, 1, 1, 1], + fifo_depth: [8, 8, 8, 8, 8, 8, 8, 8], + }, + + snax_library_name: "snax-cgra", + } +} diff --git a/target/rtl/cfg/cluster_cfg/snax_dimc_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_dimc_cluster.hjson new file mode 100644 index 000000000..90517b6e9 --- /dev/null +++ b/target/rtl/cfg/cluster_cfg/snax_dimc_cluster.hjson @@ -0,0 +1,171 @@ +// Copyright 2024 KU Leuven. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Cluster configuration for a simple testbench system. +{ + nr_s1_quadrant: 1, + s1_quadrant: { + nr_clusters: 1, + }, + + cluster: { + name: "snax_dimc_cluster", + bender_target: ["snax_dimc_cluster"], + boot_addr: 4096, // 0x1000 + cluster_base_addr: 268435456, // 0x1000_0000 + cluster_base_offset: 1048576, // 256KB -> 1MB + cluster_base_hartid: 1, + addr_width: 48, + data_width: 64, + user_width: 3, + tcdm: { + size: 128, + banks: 32, + }, + cluster_periph_size: 64, // kB + zero_mem_size: 64, // kB + dma_data_width: 512, + dma_axi_req_fifo_depth: 16, + dma_req_fifo_depth: 8, + + // Additional parameters for Hemaia integration + narrow_trans: 4, + wide_trans: 32, + dma_user_width: 1, + // We don't need Snitch debugging in Hemaia + enable_debug: false, + // We don't need Snitch (core-internal) virtual memory support + vm_support: false, + // Memory configuration inputs + sram_cfg_expose: true, + sram_cfg_fields: { + ema: 3, + emaw: 2, + emas: 1 + }, + // Timing parameters + timing: { + lat_comp_fp32: 3, + lat_comp_fp64: 3, + lat_comp_fp16: 2, + lat_comp_fp16_alt: 2, + lat_comp_fp8: 1, + lat_comp_fp8_alt: 1, + lat_noncomp: 1, + lat_conv: 1, + lat_sdotp: 2, + fpu_pipe_config: "BEFORE" + narrow_xbar_latency: "CUT_ALL_PORTS", + wide_xbar_latency: "CUT_ALL_PORTS", + // Isolate the core. + register_core_req: true, + register_core_rsp: true, + register_offload_req: true, + register_offload_rsp: true, + register_ext_narrow: true, + register_ext_wide: true, + }, + hives: [ + // Hive 0 + { + icache: { + size: 8, // total instruction cache size in kByte + sets: 2, // number of ways + cacheline: 256 // word size in bits + }, + cores: [ + { $ref: "#/snax_dimc_core_template" }, + { $ref: "#/dma_core_template" }, + ] + } + ] + }, + dram: { + // 0x8000_0000 + address: 2147483648, + // 0x8000_0000 + length: 2147483648 + }, + peripherals: { + clint: { + // 0xffff_0000 + address: 4294901760, + // 0x0000_1000 + length: 4096 + }, + }, + // SNAX Accelerator Core Templates + snax_dimc_core_template: { + isa: "rv32ima", + xssr: false, + xfrep: false, + xdma: false, + xf16: false, + xf16alt: false, + xf8: false, + xf8alt: false, + xfdotp: false, + xfvec: false, + snax_acc_cfg: { + snax_acc_name: "snax_dimc", + bender_target: ["dimc"], + snax_wide_tcdm_ports: 40, + snax_num_rw_csr: 8, + snax_num_ro_csr: 0, + snax_disable_csr_manager: true, + snax_streamer_cfg: {$ref: "#/snax_dimc_streamer_template" } + }, + snax_use_custom_ports: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + // Enable division/square root unit + // Xdiv_sqrt: true, + }, + dma_core_template: { + isa: "rv32ima", + // Xdiv_sqrt: true, + # isa: "rv32ema", + xdma: true + xssr: false + xfrep: false + xf16: false, + xf16alt: false, + xf8: false, + xf8alt: false, + xfdotp: false, + xfvec: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + } + // SNAX Streamer Templates + snax_dimc_streamer_template :{ + + data_reader_params: { + spatial_bounds: [[8], [8], [8], [8]], + temporal_dim: [2, 2, 2, 2], + num_channel: [8, 8, 8, 8], + fifo_depth: [4, 4, 4, 4], + }, + + data_writer_params:{ + spatial_bounds: [[8]], + temporal_dim: [2], + num_channel: [8], + fifo_depth: [4], + }, + + snax_library_name: "dimc", + + } +} diff --git a/target/rtl/cfg/cluster_cfg/snax_hypercorex_cluster.hjson b/target/rtl/cfg/cluster_cfg/snax_hypercorex_cluster.hjson index a40653bb6..fd7cca817 100644 --- a/target/rtl/cfg/cluster_cfg/snax_hypercorex_cluster.hjson +++ b/target/rtl/cfg/cluster_cfg/snax_hypercorex_cluster.hjson @@ -20,7 +20,7 @@ data_width: 64, user_width: 3, tcdm: { - size: 512, // 128K -> 512K + size: 128, // 128K -> 512K banks: 32, }, cluster_periph_size: 64, // kB diff --git a/target/rtl/cfg/occamy_cfg/hemaia.hjson b/target/rtl/cfg/occamy_cfg/hemaia.hjson index f6aa203e9..6be54a0be 100755 --- a/target/rtl/cfg/occamy_cfg/hemaia.hjson +++ b/target/rtl/cfg/occamy_cfg/hemaia.hjson @@ -118,7 +118,7 @@ "snax_KUL_cluster", "snax_KUL_xdma_cluster", "snax_hypercorex_cluster", - "snax_hypercorex_cluster" + "snax_dimc_cluster" ], // peripherals diff --git a/target/rtl/cfg/occamy_cfg/hemaia_tapeout.hjson b/target/rtl/cfg/occamy_cfg/hemaia_tapeout.hjson new file mode 100755 index 000000000..3b4cb303e --- /dev/null +++ b/target/rtl/cfg/occamy_cfg/hemaia_tapeout.hjson @@ -0,0 +1,197 @@ +{ + bender_target: ["cv64a6_imafdc_sv39", "occamy"], + // Remote CFG, about to be removed + is_remote_quadrant: false, + remote_quadrants: [], + // Multi-chip configuration + hemaia_multichip: { + chip_id_width: 8 + } + addr_width: 48, + data_width: 64, + // XBARs + wide_xbar: { + max_slv_trans: 64, + max_mst_trans: 64, + fall_through: false, + }, + quadrant_inter_xbar_slv_id_width_no_rocache: 3, + quadrant_inter_xbar: { + max_slv_trans: 64, + max_mst_trans: 64, + fall_through: false, + }, + narrow_xbar: { + max_slv_trans: 32, + max_mst_trans: 32, + fall_through: false, + }, + cuts: { + narrow_to_quad: 3, + quad_to_narrow: 3, + wide_to_quad: 3, + quad_to_wide: 3, + narrow_to_cva6: 2, + narrow_conv_to_spm_narrow_pre: 2, + narrow_conv_to_spm_narrow: 1, + narrow_and_pcie: 3, + narrow_and_wide: 1, + wide_conv_to_spm_wide: 3, + wide_to_wide_zero_mem: 0, + wide_to_hbm: 3, + wide_and_inter: 3, + wide_and_hbi: 3, + narrow_and_hbi: 3, + pre_to_hbmx: 3, + hbmx_to_hbm: 3, + atomic_adapter_narrow: 1, + atomic_adapter_narrow_wide: 1, + // Give some flexibility in peripheral xbar placement + periph_axi_lite_narrow: 2, + periph_axi_lite: 2, + periph_axi_lite_narrow_hbm_xbar_cfg: 2, + // Non-right-side chip peripherals + periph_axi_lite_narrow_hbm_cfg: 3, + periph_axi_lite_narrow_pcie_cfg: 3, + periph_axi_lite_narrow_chip_ctrl_cfg: 3, + periph_axi_lite_narrow_hbi_narrow_cfg: 3, + periph_axi_lite_narrow_hbi_wide_cfg: 3, + periph_axi_lite_narrow_bootrom_cfg: 3, + periph_axi_lite_narrow_fll_system_cfg: 3, + periph_axi_lite_narrow_fll_periph_cfg: 3, + periph_axi_lite_narrow_fll_hbm2e_cfg: 3, + // Right-side or latency-invariant chip peripherals + periph_axi_lite_narrow_soc_ctrl_cfg: 1, + periph_axi_lite_narrow_uart_cfg: 1, + periph_axi_lite_narrow_i2c_cfg: 1, + periph_axi_lite_narrow_gpio_cfg: 1, + periph_axi_lite_narrow_clint_cfg: 1, + periph_axi_lite_narrow_plic_cfg: 1, + periph_axi_lite_narrow_spim_cfg: 1, + periph_axi_lite_narrow_timer_cfg: 1, + }, + txns: { + wide_and_inter: 128, + wide_to_hbm: 128, + narrow_and_wide: 16, + rmq: 4, + }, + narrow_xbar_slv_id_width: 4, + narrow_xbar_user_width: 3, // clog2(total number of clusters) + nr_s1_quadrant: 1, + s1_quadrant: { + // number of pending transactions on the narrow/wide network + narrow_trans: 32, + wide_trans: 32, + // Disable for easier flow trials. + ro_cache_cfg: { + width: 1024, + count: 128, + sets: 2, + max_trans: 32, + address_regions: 4, + } + wide_xbar: { + max_slv_trans: 32, + max_mst_trans: 32, + fall_through: false, + }, + wide_xbar_slv_id_width: 3 + narrow_xbar: { + max_slv_trans: 8, + max_mst_trans: 8, + fall_through: false, + }, + narrow_xbar_slv_id_width: 4, + narrow_xbar_user_width: 3, // clog2(total number of clusters) + cfg_base_addr: 184549376, // 0x0b000000 + cfg_base_offset: 65536 // 0x10000 + }, + clusters:[ + "snax_cgra_cluster", + "snax_KUL_xdma_cluster", + "snax_hypercorex_cluster", + "snax_dimc_cluster" + ], + + // peripherals + peripherals: { + rom: { + address: 16777216, // 0x0100_0000 + length: 131072, // 128 kiB 0x2_0000 + }, + clint: { + address: 67108864, // 0x0400_0000 + length: 1048576, // 1 MiB 0x10_0000 + }, + axi_lite_peripherals: [ + { + name: "debug", + address: 0, // 0x0000_0000 + length: 4096, // 4 kiB 0x1000 + } + ], + axi_lite_narrow_peripherals: [ + { + name: "soc_ctrl", + address: 33554432, // 0x0200_0000 + length: 4096, // 4 kiB 0x1000 + }, + { + name: "uart", + address: 33562624, // 0x0200_2000 + length: 4096, // 4 kiB 0x1000 + }, + { + name: "gpio", + address: 33566720, // 0x0200_3000 + length: 4096, // 4 kiB 0x1000 + }, + { + name: "i2c", + address: 33570816, // 0x0200_4000 + length: 4096, // 4 kiB 0x1000 + }, + { + name: "chip_ctrl", + address: 33574912, // 0x0200_5000 + length: 4096, // 4 kiB 0x1000 + }, + { + name: "timer", + address: 33579008, // 0x0200_6000 + length: 4096, // 4 kiB 0x1000 + }, + { + name: "spim", + address: 50331648, // 0x0300_0000 + length: 131072, // 4 kiB 0x2_0000 + }, + { + name: "plic", + address: 201326592, // 0x0C00_0000 + length: 67108864, // 64 MiB 0x400_0000 + }, + ], + }, + // non-peripheral IPs + spm_narrow: { + address: 1879048192, // 0x7000_0000 + length: 131072, // 128 kiB 0x2_0000 + }, + spm_wide: { + address: 2147483648, // 0x8000_0000 + length: 1048576, // 1 MiB 0x10_0000 + }, + wide_zero_mem: { + address: 68719476736, // 0x10_0000_0000 + length: 8589934592, // 8 GiB 0x11_0000_0000 + }, + sys_idma_cfg: { + address: 285212672, // 0x1100_0000 + length: 65536, // 64 kiB 0x1_0000 + }, + // backup boot address + backup_boot_addr: 2147483648 // 0x8000_0000 + +}