From a85b0ac1699b8e3f6d7c894f309aa58eb10dda50 Mon Sep 17 00:00:00 2001 From: Thomas Benz Date: Wed, 6 Sep 2023 11:28:44 +0200 Subject: [PATCH] iDMA: Add 2D support in hardware, tune interconnect --------- Co-authored-by: Alessandro Ottaviano --- Bender.lock | 8 +-- Bender.yml | 2 +- cheshire.mk | 1 + hw/cheshire_pkg.sv | 18 ++++-- hw/cheshire_soc.sv | 23 +++++--- sw/include/dif/dma.h | 130 +++++++++++++++++++++++++++++++++++++++++ sw/include/regs/idma.h | 57 ++++++++++++++++++ sw/sw.mk | 1 + 8 files changed, 222 insertions(+), 18 deletions(-) create mode 100644 sw/include/dif/dma.h create mode 100644 sw/include/regs/idma.h diff --git a/Bender.lock b/Bender.lock index eca72d583..b08779bb9 100644 --- a/Bender.lock +++ b/Bender.lock @@ -15,8 +15,8 @@ packages: - apb - register_interface axi: - revision: bfee21757bf090ec8e358456314b0b0fd3c90809 - version: 0.39.0 + revision: fccffb5953ec8564218ba05e20adbedec845e014 + version: 0.39.1 source: Git: https://github.com/pulp-platform/axi.git dependencies: @@ -108,8 +108,8 @@ packages: dependencies: - common_cells idma: - revision: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1 - version: null + revision: 2c64e0773fab5a54757646715485fcdf3432c7c1 + version: 0.5.0 source: Git: https://github.com/pulp-platform/iDMA.git dependencies: diff --git a/Bender.yml b/Bender.yml index b977a6b7f..c9a5c7a48 100644 --- a/Bender.yml +++ b/Bender.yml @@ -21,7 +21,7 @@ dependencies: common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.29.0 } common_verification: { git: "https://github.com/pulp-platform/common_verification.git", version: 0.2.0 } cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: pulp-v0.4.3 } - iDMA: { git: "https://github.com/pulp-platform/iDMA.git", rev: 437ffa9 } # TODO: master commit; use next release once out + iDMA: { git: "https://github.com/pulp-platform/iDMA.git", version: 0.5.0 } opentitan_peripherals: { git: "https://github.com/pulp-platform/opentitan_peripherals.git", version: 0.4.0 } register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.1 } riscv-dbg: { git: "https://github.com/pulp-platform/riscv-dbg.git", version: 0.8.0 } diff --git a/cheshire.mk b/cheshire.mk index 4f3dffa73..afbfcc482 100644 --- a/cheshire.mk +++ b/cheshire.mk @@ -21,6 +21,7 @@ CHS_LLC_DIR := $(shell $(BENDER) path axi_llc) OTPROOT := $(shell $(BENDER) path opentitan_peripherals) CLINTROOT := $(shell $(BENDER) path clint) AXI_VGA_ROOT := $(shell $(BENDER) path axi_vga) +IDMA_ROOT := $(shell $(BENDER) path idma) REGTOOL ?= $(CHS_REG_DIR)/vendor/lowrisc_opentitan/util/regtool.py diff --git a/hw/cheshire_pkg.sv b/hw/cheshire_pkg.sv index 1d3b09df0..530cecdab 100644 --- a/hw/cheshire_pkg.sv +++ b/hw/cheshire_pkg.sv @@ -171,7 +171,12 @@ package cheshire_pkg; dw_bt DmaConfMaxReadTxns; dw_bt DmaConfMaxWriteTxns; aw_bt DmaConfAmoNumCuts; + dw_bt DmaNumAxInFlight; + dw_bt DmaMemSysDepth; + aw_bt DmaJobFifoDepth; + bit DmaRAWCouplingAvail; bit DmaConfAmoPostCut; + bit DmaConfEnableTwoD; // Parameters for GPIO bit GpioInputSyncs; // Parameters for AXI RT @@ -514,8 +519,8 @@ package cheshire_pkg; AxiDataWidth : 64, AxiUserWidth : 2, // AMO(2) AxiMstIdWidth : 2, - AxiMaxMstTrans : 8, - AxiMaxSlvTrans : 8, + AxiMaxMstTrans : 24, + AxiMaxSlvTrans : 24, AxiUserAmoMsb : 1, // Convention: lower AMO bits for cores, MSB for serial link AxiUserAmoLsb : 0, // Convention: lower AMO bits for cores, MSB for serial link AxiUserDefault : 0, @@ -550,8 +555,8 @@ package cheshire_pkg; LlcSetAssoc : 8, LlcNumLines : 256, LlcNumBlocks : 8, - LlcMaxReadTxns : 8, - LlcMaxWriteTxns : 8, + LlcMaxReadTxns : 16, + LlcMaxWriteTxns : 16, LlcAmoNumCuts : 1, LlcAmoPostCut : 1, LlcOutConnect : 1, @@ -576,7 +581,12 @@ package cheshire_pkg; DmaConfMaxReadTxns : 4, DmaConfMaxWriteTxns : 4, DmaConfAmoNumCuts : 1, + DmaNumAxInFlight : 16, + DmaMemSysDepth : 8, + DmaJobFifoDepth : 2, + DmaRAWCouplingAvail : 1, DmaConfAmoPostCut : 1, + DmaConfEnableTwoD : 1, // GPIOs GpioInputSyncs : 1, // AXI RT diff --git a/hw/cheshire_soc.sv b/hw/cheshire_soc.sv index 31812a530..b631db66a 100644 --- a/hw/cheshire_soc.sv +++ b/hw/cheshire_soc.sv @@ -1460,15 +1460,20 @@ module cheshire_soc import cheshire_pkg::*; #( end dma_core_wrap #( - .AxiAddrWidth ( Cfg.AddrWidth ), - .AxiDataWidth ( Cfg.AxiDataWidth ), - .AxiIdWidth ( Cfg.AxiMstIdWidth ), - .AxiUserWidth ( Cfg.AxiUserWidth ), - .AxiSlvIdWidth ( AxiSlvIdWidth ), - .axi_mst_req_t ( axi_mst_req_t ), - .axi_mst_rsp_t ( axi_mst_rsp_t ), - .axi_slv_req_t ( axi_slv_req_t ), - .axi_slv_rsp_t ( axi_slv_rsp_t ) + .AxiAddrWidth ( Cfg.AddrWidth ), + .AxiDataWidth ( Cfg.AxiDataWidth ), + .AxiIdWidth ( Cfg.AxiMstIdWidth ), + .AxiUserWidth ( Cfg.AxiUserWidth ), + .AxiSlvIdWidth ( AxiSlvIdWidth ), + .NumAxInFlight ( Cfg.DmaNumAxInFlight ), + .MemSysDepth ( Cfg.DmaMemSysDepth ), + .JobFifoDepth ( Cfg.DmaJobFifoDepth ), + .RAWCouplingAvail ( Cfg.DmaRAWCouplingAvail ), + .IsTwoD ( Cfg.DmaConfEnableTwoD ), + .axi_mst_req_t ( axi_mst_req_t ), + .axi_mst_rsp_t ( axi_mst_rsp_t ), + .axi_slv_req_t ( axi_slv_req_t ), + .axi_slv_rsp_t ( axi_slv_rsp_t ) ) i_dma ( .clk_i, .rst_ni, diff --git a/sw/include/dif/dma.h b/sw/include/dif/dma.h new file mode 100644 index 000000000..1f65162cc --- /dev/null +++ b/sw/include/dif/dma.h @@ -0,0 +1,130 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Alessandro Ottaviano +// Thomas Benz + +#include +#include "regs/idma.h" +#include "params.h" + +#define DMA_SRC_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET) +#define DMA_DST_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET) +#define DMA_NUMBYTES_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET) +#define DMA_CONF_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET) +#define DMA_STATUS_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET) +#define DMA_NEXTID_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET) +#define DMA_DONE_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET) +#define DMA_SRC_STRIDE_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET) +#define DMA_DST_STRIDE_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET) +#define DMA_NUM_REPS_ADDR(BASE) (BASE + IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET) + +#define DMA_CONF_DECOUPLE 0 +#define DMA_CONF_DEBURST 0 +#define DMA_CONF_SERIALIZE 0 + +#define X(NAME, BASE_ADDR) \ + extern volatile uint64_t *NAME##_dma_src_ptr(void); \ + extern volatile uint64_t *NAME##_dma_dst_ptr(void); \ + extern volatile uint64_t *NAME##_dma_num_bytes_ptr(void); \ + extern volatile uint64_t *NAME##_dma_conf_ptr(void); \ + extern volatile uint64_t *NAME##_dma_status_ptr(void); \ + extern volatile uint64_t *NAME##_dma_nextid_ptr(void); \ + extern volatile uint64_t *NAME##_dma_done_ptr(void); \ + extern volatile uint64_t *NAME##_dma_src_stride_ptr(void); \ + extern volatile uint64_t *NAME##_dma_dst_stride_ptr(void); \ + extern volatile uint64_t *NAME##_dma_num_reps_ptr(void); \ +\ + extern uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size); \ + extern void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size); \ + extern uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps); \ + extern void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps); \ +\ + inline volatile uint64_t *NAME##_dma_src_ptr(void) { \ + return (volatile uint64_t *)DMA_SRC_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_dst_ptr(void) { \ + return (volatile uint64_t *)DMA_DST_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_num_bytes_ptr(void) { \ + return (volatile uint64_t *)DMA_NUMBYTES_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_conf_ptr(void) { \ + return (volatile uint64_t *)DMA_CONF_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_status_ptr(void) { \ + return (volatile uint64_t *)DMA_STATUS_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_nextid_ptr(void) { \ + return (volatile uint64_t *)DMA_NEXTID_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_done_ptr(void) { \ + return (volatile uint64_t *)DMA_DONE_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_src_stride_ptr(void) { \ + return (volatile uint64_t *)DMA_SRC_STRIDE_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_dst_stride_ptr(void) { \ + return (volatile uint64_t *)DMA_DST_STRIDE_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_num_reps_ptr(void) { \ + return (volatile uint64_t *)DMA_NUM_REPS_ADDR(BASE_ADDR); \ + } \ +\ + inline uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \ + *(NAME##_dma_src_ptr()) = (uint64_t)src; \ + *(NAME##_dma_dst_ptr()) = (uint64_t)dst; \ + *(NAME##_dma_num_bytes_ptr()) = size; \ + *(NAME##_dma_num_reps_ptr()) = 0; \ + *(NAME##_dma_conf_ptr()) = \ + (DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \ + (DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \ + (DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \ + return *(NAME##_dma_nextid_ptr()); \ + } \ +\ + inline void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \ + volatile uint64_t tf_id = NAME##_dma_memcpy(dst, src, size); \ + while (*(NAME##_dma_done_ptr()) != tf_id) { \ + asm volatile("nop"); \ + } \ + } \ +\ + inline uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps) { \ + *(NAME##_dma_src_ptr()) = (uint64_t)src; \ + *(NAME##_dma_dst_ptr()) = (uint64_t)dst; \ + *(NAME##_dma_num_bytes_ptr()) = size; \ + *(NAME##_dma_conf_ptr()) = \ + (DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \ + (DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \ + (DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \ + *(NAME##_dma_src_stride_ptr()) = src_stride; \ + *(NAME##_dma_dst_stride_ptr()) = dst_stride; \ + *(NAME##_dma_num_reps_ptr()) = num_reps; \ + return *(NAME##_dma_nextid_ptr()); \ + } \ +\ + inline void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps) { \ + volatile uint64_t tf_id = \ + NAME##_dma_2d_memcpy(dst, src, size, dst_stride, src_stride, num_reps); \ + while (*(NAME##_dma_done_ptr()) != tf_id) { \ + asm volatile("nop"); \ + } \ + } \ +\ + inline uint64_t NAME##_dma_get_status(void) { \ + return *(NAME##_dma_status_ptr()); \ + } + +X(sys, __base_dma); + +#undef X diff --git a/sw/include/regs/idma.h b/sw/include/regs/idma.h new file mode 100644 index 000000000..7aa43052a --- /dev/null +++ b/sw/include/regs/idma.h @@ -0,0 +1,57 @@ +// Generated register defines for idma_reg64_2d_frontend + +// Copyright information found in source file: +// Copyright 2022 ETH Zurich and University of Bologna. + +// Licensing information found in source file: +// Licensed under Solderpad Hardware License, Version 0.51 +// SPDX-License-Identifier: SHL-0.51 + +#ifndef _IDMA_REG64_2D_FRONTEND_REG_DEFS_ +#define _IDMA_REG64_2D_FRONTEND_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Register width +#define IDMA_REG64_2D_FRONTEND_PARAM_REG_WIDTH 64 + +// Source Address +#define IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET 0x0 + +// Destination Address +#define IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET 0x8 + +// Number of bytes +#define IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET 0x10 + +// Configuration Register for DMA settings +#define IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET 0x18 +#define IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT 0 +#define IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT 1 +#define IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT 2 + +// DMA Status +#define IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET 0x20 +#define IDMA_REG64_2D_FRONTEND_STATUS_BUSY_BIT 0 + +// Next ID, launches transfer, returns 0 if transfer not set up properly. +#define IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET 0x28 + +// Get ID of finished transactions. +#define IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET 0x30 + +// Source Stride +#define IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET 0x38 + +// Destination Stride +#define IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET 0x40 + +// Number of 2D repetitions +#define IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET 0x48 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _IDMA_REG64_2D_FRONTEND_REG_DEFS_ +// End generated register defines for idma_reg64_2d_frontend \ No newline at end of file diff --git a/sw/sw.mk b/sw/sw.mk index aa708bb75..07ed3637b 100644 --- a/sw/sw.mk +++ b/sw/sw.mk @@ -77,6 +77,7 @@ endef $(eval $(call chs_sw_gen_hdr_rule,clint,$(CLINTROOT)/src/clint.hjson $(CLINTROOT)/.generated)) $(eval $(call chs_sw_gen_hdr_rule,serial_link,$(CHS_ROOT)/hw/serial_link.hjson $(CHS_SLINK_DIR)/.generated)) $(eval $(call chs_sw_gen_hdr_rule,axi_vga,$(AXI_VGA_ROOT)/data/axi_vga.hjson $(AXI_VGA_ROOT)/.generated)) +$(eval $(call chs_sw_gen_hdr_rule,idma,$(IDMA_ROOT)/src/frontends/register_64bit_2d/idma_reg64_2d_frontend.hjson)) $(eval $(call chs_sw_gen_hdr_rule,axi_llc,$(CHS_LLC_DIR)/data/axi_llc_regs.hjson)) $(eval $(call chs_sw_gen_hdr_rule,cheshire,$(CHS_ROOT)/hw/regs/cheshire_regs.hjson)) $(eval $(call chs_sw_gen_hdr_rule,axi_rt,$(CHS_ROOT)/hw/regs/axi_rt_regs.hjson))