diff --git a/Bender.local b/Bender.local index 121e0ef294..9f9aee311a 100644 --- a/Bender.local +++ b/Bender.local @@ -4,4 +4,5 @@ overrides: # Some of our dependencies have false conflicts with our new AXI version; force our version. - axi: {git: https://github.com/pulp-platform/axi.git, version: 0.39.0} + axi: {git: https://github.com/pulp-platform/axi.git, rev: feature/multicast-xbar} + common_cells: {git: https://github.com/pulp-platform/common_cells.git, rev: multicast-xbar} diff --git a/hw/future/src/dma/axi_dma_backend.sv b/hw/future/src/dma/axi_dma_backend.sv index 09a27f0a97..e16f9138aa 100644 --- a/hw/future/src/dma/axi_dma_backend.sv +++ b/hw/future/src/dma/axi_dma_backend.sv @@ -16,6 +16,8 @@ module axi_dma_backend #( parameter int unsigned DataWidth = -1, /// Address width of the AXI bus parameter int unsigned AddrWidth = -1, + /// User width of the AXI bus + parameter int unsigned UserWidth = -1, /// ID width of the AXI bus parameter int unsigned IdWidth = -1, /// Number of AX beats that can be in-flight @@ -81,12 +83,15 @@ module axi_dma_backend #( typedef logic [OffsetWidth-1:0] offset_t; /// Address Type typedef logic [AddrWidth-1:0] addr_t; + /// User Type + typedef logic [UserWidth-1:0] user_t; /// AXI ID Type typedef logic [IdWidth-1:0] axi_id_t; /// id: AXI id /// last: last transaction in burst /// address: address of burst + /// user: user-defined signal /// length: burst length /// size: bytes in each burst /// burst: burst type; only INC supported @@ -95,6 +100,7 @@ module axi_dma_backend #( axi_id_t id; logic last; addr_t addr; + user_t user; axi_pkg::len_t len; axi_pkg::size_t size; axi_pkg::burst_t burst; @@ -207,6 +213,7 @@ module axi_dma_backend #( axi_dma_burst_reshaper #( .DataWidth (DataWidth), .AddrWidth (AddrWidth), + .UserWidth (UserWidth), .IdWidth (IdWidth), .burst_req_t(burst_req_t), .read_req_t (read_req_t), diff --git a/hw/future/src/dma/axi_dma_burst_reshaper.sv b/hw/future/src/dma/axi_dma_burst_reshaper.sv index fbea238192..e7b9b852dd 100644 --- a/hw/future/src/dma/axi_dma_burst_reshaper.sv +++ b/hw/future/src/dma/axi_dma_burst_reshaper.sv @@ -16,6 +16,8 @@ module axi_dma_burst_reshaper #( parameter int unsigned DataWidth = -1, /// Address width of the AXI bus parameter int unsigned AddrWidth = -1, + /// User width of the AXI bus + parameter int unsigned UserWidth = -1, /// ID width of the AXI bus parameter int unsigned IdWidth = -1, /// Arbitrary 1D burst request definition: @@ -97,6 +99,8 @@ module axi_dma_burst_reshaper #( typedef logic [OffsetWidth-1:0] offset_t; /// Address Type typedef logic [AddrWidth-1:0] addr_t; + /// User Type + typedef logic [UserWidth-1:0] user_t; /// AXI ID Type typedef logic [IdWidth-1:0] axi_id_t; @@ -105,6 +109,7 @@ module axi_dma_burst_reshaper #( axi_id_t id; addr_t addr; addr_t num_bytes; + user_t user; axi_pkg::cache_t cache; axi_pkg::burst_t burst; logic valid; @@ -264,6 +269,7 @@ module axi_dma_burst_reshaper #( write_req_o.aw.len = ((w_num_bytes + w_addr_offset - 1) >> OffsetWidth); write_req_o.aw.size = axi_pkg::size_t'(OffsetWidth); write_req_o.aw.id = burst_q.dst.id; + write_req_o.aw.user = burst_q.dst.user; // hand over internal transaction id write_req_o.aw.last = w_finish; write_req_o.aw.burst = burst_q.dst.burst; @@ -301,6 +307,7 @@ module axi_dma_burst_reshaper #( burst_d.dst.num_bytes = burst_req_i.num_bytes; burst_d.dst.cache = burst_req_i.cache_dst; burst_d.dst.burst = burst_req_i.burst_dst; + burst_d.dst.user = burst_req_i.user_dst; // check if transfer is possible -> num_bytes has to be larger than 0 burst_d.dst.valid = (burst_req_i.num_bytes == '0) ? 1'b0 : valid_i; diff --git a/hw/future/src/dma/axi_dma_data_mover.sv b/hw/future/src/dma/axi_dma_data_mover.sv index 9c11f72f9f..5d4b81bd31 100644 --- a/hw/future/src/dma/axi_dma_data_mover.sv +++ b/hw/future/src/dma/axi_dma_data_mover.sv @@ -321,6 +321,7 @@ module axi_dma_data_mover #( // AW signals axi_dma_req_o.aw.id = current_aw_req.id; axi_dma_req_o.aw.addr = current_aw_req.addr; + axi_dma_req_o.aw.user = current_aw_req.user; axi_dma_req_o.aw.len = current_aw_req.len; axi_dma_req_o.aw.size = current_aw_req.size; axi_dma_req_o.aw.burst = current_aw_req.burst; diff --git a/hw/snitch/src/riscv_instr.sv b/hw/snitch/src/riscv_instr.sv index b415628afb..6315f464e6 100644 --- a/hw/snitch/src/riscv_instr.sv +++ b/hw/snitch/src/riscv_instr.sv @@ -327,6 +327,7 @@ package riscv_instr; localparam logic [31:0] DMSTAT = 32'b0000101?????00000000?????0101011; localparam logic [31:0] DMSTR = 32'b0000110??????????000000000101011; localparam logic [31:0] DMREP = 32'b000011100000?????000000000101011; + localparam logic [31:0] DMMCAST = 32'b000100000000?????000000000101011; localparam logic [31:0] FREP_O = 32'b????????????????????????10001011; localparam logic [31:0] FREP_I = 32'b????????????????????????00001011; localparam logic [31:0] IREP = 32'b?????????????????????????0111111; diff --git a/hw/snitch/src/snitch.sv b/hw/snitch/src/snitch.sv index 58f582cc9e..dd832eca87 100644 --- a/hw/snitch/src/snitch.sv +++ b/hw/snitch/src/snitch.sv @@ -2167,6 +2167,16 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( illegal_inst = 1'b1; end end + DMMCAST: begin + if (Xdma) begin + acc_qreq_o.addr = DMA_SS; + opa_select = Reg; + acc_qvalid_o = valid_instr; + write_rd = 1'b0; + end else begin + illegal_inst = 1'b1; + end + end SCFGRI: begin if (Xssr) begin write_rd = 1'b0; diff --git a/hw/snitch_cluster/src/snitch_cc.sv b/hw/snitch_cluster/src/snitch_cc.sv index 2d38c63b3d..bf19c6e7ec 100644 --- a/hw/snitch_cluster/src/snitch_cc.sv +++ b/hw/snitch_cluster/src/snitch_cc.sv @@ -17,6 +17,8 @@ module snitch_cc #( parameter int unsigned DataWidth = 0, /// Data width of the AXI DMA buses. parameter int unsigned DMADataWidth = 0, + /// User width of the AXI DMA buses. + parameter int unsigned DMAUserWidth = 0, /// Id width of the AXI DMA bus. parameter int unsigned DMAIdWidth = 0, parameter int unsigned DMAAxiReqFifoDepth = 0, @@ -348,6 +350,7 @@ module snitch_cc #( axi_dma_tc_snitch_fe #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (DMAUserWidth), .DMADataWidth (DMADataWidth), .IdWidth (DMAIdWidth), .DMAAxiReqFifoDepth (DMAAxiReqFifoDepth), diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index 4f666075ed..b69bbbcac2 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -65,6 +65,8 @@ module snitch_cluster parameter int unsigned ICacheSets [NrHives] = '{default: 0}, /// Enable virtual memory support. parameter bit VMSupport = 1, + /// Enable multicast on DMA XBAR. + parameter bit EnableDMAMulticast = 0, /// Per-core enabling of the standard `E` ISA reduced-register extension. parameter bit [NrCores-1:0] RVE = '0, /// Per-core enabling of the standard `F` ISA extensions. @@ -283,10 +285,29 @@ module snitch_cluster UniqueIds: 1'b0, AxiAddrWidth: PhysicalAddrWidth, AxiDataWidth: NarrowDataWidth, - NoAddrRules: NrRules + NoAddrRules: NrRules, + default: '0 }; // DMA configuration struct + localparam axi_pkg::xbar_cfg_t DmaMcastXbarCfg = '{ + NoSlvPorts: NrWideMasters, + NoMstPorts: NrWideSlaves, + MaxMstTrans: WideMaxMstTrans, + MaxSlvTrans: WideMaxSlvTrans, + FallThrough: 1'b0, + LatencyMode: WideXbarLatency, + PipelineStages: 0, + AxiIdWidthSlvPorts: WideIdWidthIn, + AxiIdUsedSlvPorts: WideIdWidthIn, + UniqueIds: 1'b0, + AxiAddrWidth: PhysicalAddrWidth, + AxiDataWidth: WideDataWidth, + NoAddrRules: 2, + NoMulticastRules: 1, + NoMulticastPorts: 2, + default: '0 + }; localparam axi_pkg::xbar_cfg_t DmaXbarCfg = '{ NoSlvPorts: NrWideMasters, NoMstPorts: NrWideSlaves, @@ -300,7 +321,8 @@ module snitch_cluster UniqueIds: 1'b0, AxiAddrWidth: PhysicalAddrWidth, AxiDataWidth: WideDataWidth, - NoAddrRules: 2 + NoAddrRules: 2, + default: '0 }; function automatic int unsigned get_hive_size(int unsigned current_hive); @@ -331,7 +353,9 @@ module snitch_cluster typedef logic [WideIdWidthIn-1:0] id_dma_mst_t; typedef logic [WideIdWidthOut-1:0] id_dma_slv_t; typedef logic [NarrowUserWidth-1:0] user_t; - typedef logic [WideUserWidth-1:0] user_dma_t; + typedef struct packed { + logic [WideUserWidth-1:0] mcast; + } user_dma_t; typedef logic [TCDMMemAddrWidth-1:0] tcdm_mem_addr_t; typedef logic [TCDMAddrWidth-1:0] tcdm_addr_t; @@ -547,52 +571,89 @@ module snitch_cluster .mst_resp_i (wide_axi_mst_rsp[SoCDMAIn]) ); - logic [DmaXbarCfg.NoSlvPorts-1:0][$clog2(DmaXbarCfg.NoMstPorts)-1:0] dma_xbar_default_port; + xbar_rule_t dma_xbar_default_port; xbar_rule_t [DmaXbarCfg.NoAddrRules-1:0] dma_xbar_rule; - assign dma_xbar_default_port = '{default: SoCDMAOut}; + assign dma_xbar_default_port = '{ + idx: SoCDMAOut, + start_addr: tcdm_start_address, + end_addr: zero_mem_end_address + }; assign dma_xbar_rule = '{ - '{ - idx: TCDMDMA, - start_addr: tcdm_start_address, - end_addr: tcdm_end_address - }, '{ idx: ZeroMemory, start_addr: zero_mem_start_address, end_addr: zero_mem_end_address + }, + '{ + idx: TCDMDMA, + start_addr: tcdm_start_address, + end_addr: tcdm_end_address } }; localparam bit [DmaXbarCfg.NoSlvPorts-1:0] DMAEnableDefaultMstPort = '1; - axi_xbar #( - .Cfg (DmaXbarCfg), - .ATOPs (0), - .slv_aw_chan_t (axi_mst_dma_aw_chan_t), - .mst_aw_chan_t (axi_slv_dma_aw_chan_t), - .w_chan_t (axi_mst_dma_w_chan_t), - .slv_b_chan_t (axi_mst_dma_b_chan_t), - .mst_b_chan_t (axi_slv_dma_b_chan_t), - .slv_ar_chan_t (axi_mst_dma_ar_chan_t), - .mst_ar_chan_t (axi_slv_dma_ar_chan_t), - .slv_r_chan_t (axi_mst_dma_r_chan_t), - .mst_r_chan_t (axi_slv_dma_r_chan_t), - .slv_req_t (axi_mst_dma_req_t), - .slv_resp_t (axi_mst_dma_resp_t), - .mst_req_t (axi_slv_dma_req_t), - .mst_resp_t (axi_slv_dma_resp_t), - .rule_t (xbar_rule_t) - ) i_axi_dma_xbar ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .test_i (1'b0), - .slv_ports_req_i (wide_axi_mst_req), - .slv_ports_resp_o (wide_axi_mst_rsp), - .mst_ports_req_o (wide_axi_slv_req), - .mst_ports_resp_i (wide_axi_slv_rsp), - .addr_map_i (dma_xbar_rule), - .en_default_mst_port_i (DMAEnableDefaultMstPort), - .default_mst_port_i (dma_xbar_default_port) - ); + + if (EnableDMAMulticast) begin : gen_mcast_dma_xbar + axi_mcast_xbar #( + .Cfg (DmaMcastXbarCfg), + .ATOPs (0), + .slv_aw_chan_t (axi_mst_dma_aw_chan_t), + .mst_aw_chan_t (axi_slv_dma_aw_chan_t), + .w_chan_t (axi_mst_dma_w_chan_t), + .slv_b_chan_t (axi_mst_dma_b_chan_t), + .mst_b_chan_t (axi_slv_dma_b_chan_t), + .slv_ar_chan_t (axi_mst_dma_ar_chan_t), + .mst_ar_chan_t (axi_slv_dma_ar_chan_t), + .slv_r_chan_t (axi_mst_dma_r_chan_t), + .mst_r_chan_t (axi_slv_dma_r_chan_t), + .slv_req_t (axi_mst_dma_req_t), + .slv_resp_t (axi_mst_dma_resp_t), + .mst_req_t (axi_slv_dma_req_t), + .mst_resp_t (axi_slv_dma_resp_t), + .rule_t (xbar_rule_t) + ) i_axi_dma_xbar ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .test_i (1'b0), + .slv_ports_req_i (wide_axi_mst_req), + .slv_ports_resp_o (wide_axi_mst_rsp), + .mst_ports_req_o (wide_axi_slv_req), + .mst_ports_resp_i (wide_axi_slv_rsp), + .addr_map_i (dma_xbar_rule), + .en_default_mst_port_i (DMAEnableDefaultMstPort), + .default_mst_port_i ({DmaXbarCfg.NoSlvPorts{dma_xbar_default_port}}) + ); + end else begin : gen_dma_xbar + axi_xbar #( + .Cfg (DmaXbarCfg), + .ATOPs (0), + .slv_aw_chan_t (axi_mst_dma_aw_chan_t), + .mst_aw_chan_t (axi_slv_dma_aw_chan_t), + .w_chan_t (axi_mst_dma_w_chan_t), + .slv_b_chan_t (axi_mst_dma_b_chan_t), + .mst_b_chan_t (axi_slv_dma_b_chan_t), + .slv_ar_chan_t (axi_mst_dma_ar_chan_t), + .mst_ar_chan_t (axi_slv_dma_ar_chan_t), + .slv_r_chan_t (axi_mst_dma_r_chan_t), + .mst_r_chan_t (axi_slv_dma_r_chan_t), + .slv_req_t (axi_mst_dma_req_t), + .slv_resp_t (axi_mst_dma_resp_t), + .mst_req_t (axi_slv_dma_req_t), + .mst_resp_t (axi_slv_dma_resp_t), + .rule_t (xbar_rule_t) + ) i_axi_dma_xbar ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .test_i (1'b0), + .slv_ports_req_i (wide_axi_mst_req), + .slv_ports_resp_o (wide_axi_mst_rsp), + .mst_ports_req_o (wide_axi_slv_req), + .mst_ports_resp_i (wide_axi_slv_rsp), + .addr_map_i (dma_xbar_rule), + .en_default_mst_port_i (DMAEnableDefaultMstPort), + .default_mst_port_i ('{default: dma_xbar_default_port.idx}) + ); + end axi_zero_mem #( .axi_req_t (axi_slv_dma_req_t), @@ -819,6 +880,7 @@ module snitch_cluster .AddrWidth (PhysicalAddrWidth), .DataWidth (NarrowDataWidth), .DMADataWidth (WideDataWidth), + .DMAUserWidth (WideUserWidth), .DMAIdWidth (WideIdWidthIn), .SnitchPMACfg (SnitchPMACfg), .DMAAxiReqFifoDepth (DMAAxiReqFifoDepth), diff --git a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl index 6c1df0dc1f..f75e812a75 100644 --- a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl @@ -282,6 +282,7 @@ module ${cfg['name']}_wrapper ( .ICacheLineCount (${cfg['pkg_name']}::ICacheLineCount), .ICacheSets (${cfg['pkg_name']}::ICacheSets), .VMSupport (${int(cfg['vm_support'])}), + .EnableDMAMulticast (${int(cfg['enable_multicast'])}), .RVE (${core_isa('e')}), .RVF (${core_isa('f')}), .RVD (${core_isa('d')}), diff --git a/hw/snitch_dma/src/axi_dma_tc_snitch_fe.sv b/hw/snitch_dma/src/axi_dma_tc_snitch_fe.sv index 7fcb9d56f1..aeedfe545a 100644 --- a/hw/snitch_dma/src/axi_dma_tc_snitch_fe.sv +++ b/hw/snitch_dma/src/axi_dma_tc_snitch_fe.sv @@ -12,6 +12,7 @@ module axi_dma_tc_snitch_fe #( parameter int unsigned AddrWidth = 0, parameter int unsigned DataWidth = 0, + parameter int unsigned UserWidth = 0, parameter int unsigned DMADataWidth = 0, parameter int unsigned IdWidth = 0, parameter int unsigned DMAAxiReqFifoDepth = 3, @@ -22,7 +23,8 @@ module axi_dma_tc_snitch_fe #( parameter type dma_events_t = logic, /// Derived parameter *Do not override* parameter type addr_t = logic [AddrWidth-1:0], - parameter type data_t = logic [DataWidth-1:0] + parameter type data_t = logic [DataWidth-1:0], + parameter type user_t = logic [UserWidth-1:0] ) ( input logic clk_i, input logic rst_ni, @@ -60,6 +62,7 @@ module axi_dma_tc_snitch_fe #( typedef struct packed { id_t id; addr_t src, dst, num_bytes; + user_t user_src, user_dst; axi_pkg::cache_t cache_src, cache_dst; axi_pkg::burst_t burst_src, burst_dst; logic decouple_rw; @@ -70,6 +73,7 @@ module axi_dma_tc_snitch_fe #( typedef struct packed { id_t id; addr_t src, dst, num_bytes; + user_t user_src, user_dst; axi_pkg::cache_t cache_src, cache_dst; addr_t stride_src, stride_dst, num_repetitions; axi_pkg::burst_t burst_src, burst_dst; @@ -90,6 +94,7 @@ module axi_dma_tc_snitch_fe #( axi_dma_backend #( .DataWidth ( DMADataWidth ), .AddrWidth ( AddrWidth ), + .UserWidth ( UserWidth ), .IdWidth ( IdWidth ), .AxReqFifoDepth ( DMAAxiReqFifoDepth ), .TransFifoDepth ( DMAReqFifoDepth ), @@ -350,6 +355,14 @@ module axi_dma_tc_snitch_fe #( dma_op_name = "DMREP"; end + // write the multicast mask in the destination user signal + riscv_instr::DMMCAST : begin + twod_req_d.user_dst = acc_qdata_arga_i; + acc_qready_o = 1'b1; + is_dma_op = 1'b1; + dma_op_name = "DMMCAST"; + end + default:; endcase end diff --git a/hw/snitch_dma/src/axi_dma_twod_ext.sv b/hw/snitch_dma/src/axi_dma_twod_ext.sv index 8ca8b29d76..53a17c756c 100644 --- a/hw/snitch_dma/src/axi_dma_twod_ext.sv +++ b/hw/snitch_dma/src/axi_dma_twod_ext.sv @@ -91,6 +91,8 @@ module axi_dma_twod_ext #( burst_req_o.src = twod_req_current.src; burst_req_o.dst = twod_req_current.dst; burst_req_o.num_bytes = twod_req_current.num_bytes; + burst_req_o.user_src = twod_req_current.user_src; + burst_req_o.user_dst = twod_req_current.user_dst; burst_req_o.cache_src = twod_req_current.cache_src; burst_req_o.cache_dst = twod_req_current.cache_dst; burst_req_o.burst_src = twod_req_current.burst_src; diff --git a/sw/deps/riscv-opcodes b/sw/deps/riscv-opcodes index 94caf0e0fe..98d929cc88 160000 --- a/sw/deps/riscv-opcodes +++ b/sw/deps/riscv-opcodes @@ -1 +1 @@ -Subproject commit 94caf0e0fefff1009ba144bccb6d8f7d425ea2f5 +Subproject commit 98d929cc88cd8442976eb3c378287ccc5c0d00fb diff --git a/sw/snRuntime/src/dma.h b/sw/snRuntime/src/dma.h index 169e54d7be..9fed6b3af1 100644 --- a/sw/snRuntime/src/dma.h +++ b/sw/snRuntime/src/dma.h @@ -53,12 +53,79 @@ inline snrt_dma_txid_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src, } } +/// Initiate an asynchronous 1D DMA transfer with wide 64-bit pointers. +inline snrt_dma_txid_t snrt_dma_start_1d_mcast_wideptr(uint64_t dst, + uint64_t src, + uint32_t mcast, + size_t size) { + // Current DMA does not allow transfers with size == 0 (blocks) + // TODO(colluca) remove this check once new DMA is integrated + if (size > 0) { + register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10 + register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11 + register uint32_t reg_src_low asm("a2") = src >> 0; // 12 + register uint32_t reg_src_high asm("a3") = src >> 32; // 13 + register uint32_t reg_mcast asm("a4") = mcast; // 14 + register uint32_t reg_size asm("a5") = size; // 15 + + // dmmcast a4 + asm volatile( + ".word (0b0001000 << 25) | \ + ( (14) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_mcast)); + + // dmsrc a2, a3 + asm volatile( + ".word (0b0000000 << 25) | \ + ( (13) << 20) | \ + ( (12) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" ::"r"(reg_src_high), + "r"(reg_src_low)); + + // dmdst a0, a1 + asm volatile( + ".word (0b0000001 << 25) | \ + ( (11) << 20) | \ + ( (10) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" ::"r"(reg_dst_high), + "r"(reg_dst_low)); + + // dmcpyi a0, a5, 0b00 + register uint32_t reg_txid asm("a0"); // 10 + asm volatile( + ".word (0b0000010 << 25) | \ + ( 0b00000 << 20) | \ + ( (15) << 15) | \ + ( 0b000 << 12) | \ + ( (10) << 7) | \ + (0b0101011 << 0) \n" + : "=r"(reg_txid) + : "r"(reg_size)); + + return reg_txid; + } else { + return -1; + } +} + /// Initiate an asynchronous 1D DMA transfer. inline snrt_dma_txid_t snrt_dma_start_1d(void *dst, const void *src, size_t size) { return snrt_dma_start_1d_wideptr((size_t)dst, (size_t)src, size); } +/// Initiate an asynchronous 1D DMA transfer. +inline snrt_dma_txid_t snrt_dma_start_1d_mcast(void *dst, const void *src, + uint32_t mcast, size_t size) { + return snrt_dma_start_1d_mcast_wideptr((size_t)dst, (size_t)src, mcast, + size); +} + /// Initiate an asynchronous 2D DMA transfer with wide 64-bit pointers. inline snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, diff --git a/target/common/common.mk b/target/common/common.mk index 143f9b9a81..73ad1152c6 100644 --- a/target/common/common.mk +++ b/target/common/common.mk @@ -78,6 +78,7 @@ VLT_FLAGS += -Wno-UNOPTFLAT VLT_FLAGS += -Wno-fatal VLT_FLAGS += --unroll-count 1024 VLT_FLAGS += --timescale 1ns/1ps +VLT_FLAGS += -DCOMMON_CELLS_ASSERTS_OFF # Disable unsupported assertions in Verilator 4.110 VLT_CFLAGS += -std=c++14 -pthread VLT_CFLAGS +=-I ${VLT_BUILDDIR} -I $(VLT_ROOT)/include -I $(VLT_ROOT)/include/vltstd -I $(VLT_FESVR)/include -I $(TB_DIR) -I ${MKFILE_DIR}/test diff --git a/target/snitch_cluster/cfg/default.hjson b/target/snitch_cluster/cfg/default.hjson index c312a8a49e..e5a4fbe2d3 100644 --- a/target/snitch_cluster/cfg/default.hjson +++ b/target/snitch_cluster/cfg/default.hjson @@ -23,8 +23,10 @@ cluster_periph_size: 64, // kB zero_mem_size: 64, // kB dma_data_width: 512, + dma_user_width: 48, // same as addr_width dma_axi_req_fifo_depth: 3, dma_req_fifo_depth: 3, + enable_multicast: true, // Timing parameters timing: { lat_comp_fp32: 3,