Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hw: Add native iDMA 2D capabilities, tune interconnect #73

Merged
merged 2 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Bender.lock
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ packages:
- apb
- register_interface
axi:
revision: bfee21757bf090ec8e358456314b0b0fd3c90809
version: 0.39.0
revision: fccffb5953ec8564218ba05e20adbedec845e014
version: 0.39.1
source:
Git: https://github.com/pulp-platform/axi.git
dependencies:
Expand Down Expand Up @@ -108,8 +108,8 @@ packages:
dependencies:
- common_cells
idma:
revision: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1
version: null
revision: 2c64e0773fab5a54757646715485fcdf3432c7c1
version: 0.5.0
source:
Git: https://github.com/pulp-platform/iDMA.git
dependencies:
Expand Down
2 changes: 1 addition & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.29.0 }
common_verification: { git: "https://github.com/pulp-platform/common_verification.git", version: 0.2.0 }
cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: pulp-v0.4.3 }
iDMA: { git: "https://github.com/pulp-platform/iDMA.git", rev: 437ffa9 } # TODO: master commit; use next release once out
iDMA: { git: "https://github.com/pulp-platform/iDMA.git", version: 0.5.0 }
opentitan_peripherals: { git: "https://github.com/pulp-platform/opentitan_peripherals.git", version: 0.4.0 }
register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.1 }
riscv-dbg: { git: "https://github.com/pulp-platform/riscv-dbg.git", version: 0.8.0 }
Expand Down
1 change: 1 addition & 0 deletions cheshire.mk
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ CHS_LLC_DIR := $(shell $(BENDER) path axi_llc)
OTPROOT := $(shell $(BENDER) path opentitan_peripherals)
CLINTROOT := $(shell $(BENDER) path clint)
AXI_VGA_ROOT := $(shell $(BENDER) path axi_vga)
IDMA_ROOT := $(shell $(BENDER) path idma)

REGTOOL ?= $(CHS_REG_DIR)/vendor/lowrisc_opentitan/util/regtool.py

Expand Down
7 changes: 6 additions & 1 deletion docs/um/arch.md
Original file line number Diff line number Diff line change
Expand Up @@ -236,13 +236,18 @@ The [Serial Link](https://github.com/pulp-platform/serial_link) is a fully digit

### DMA engine

The [iDMA engine](https://github.com/pulp-platform/iDMA) enables high-throughput asynchronous transfers between any two subordinate address ranges in the system. It exposes the following parameters:
The [iDMA engine](https://github.com/pulp-platform/iDMA) enables high-throughput asynchronous transfers between any two subordinate address ranges in the system. The hardware supports, if enabled, up to two-dimensional transfers directly in hardware. It exposes the following parameters:

| Parameter | Type / Range | Description |
| ---------------------------- | ------------ | ------------------------------------------------- |
| `DmaConfMax(Read|Write)Txns` | `dw_bt` | Max. number of outstanding requests to DMA config |
| `DmaConfAmoNumCuts` | `aw_bt` | Number of timing cuts inside config AMO filter |
| `DmaConfAmoPostCut` | `bit` | Whether to insert a cut after config AMO filter |
| `DmaConfEnableTwoD` | `bit` | Whether the 2D hardware extension is present |
| `DmaNumAxInFlight` | `dw_bt` | Number of outstanding transfers the DMA launches |
| `DmaMemSysDepth` | `dw_bt` | The *approximate* depth of the memory system |
| `DmaJobFifoDepth` | `aw_bt` | The depth of the job FIFO |
| `DmaRAWCouplingAvail` | `bit` | Whether the R-AW coupling feature is available |

### I2C, SPI, GPIOs

Expand Down
18 changes: 14 additions & 4 deletions hw/cheshire_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ package cheshire_pkg;
dw_bt DmaConfMaxWriteTxns;
aw_bt DmaConfAmoNumCuts;
bit DmaConfAmoPostCut;
bit DmaConfEnableTwoD;
dw_bt DmaNumAxInFlight;
dw_bt DmaMemSysDepth;
aw_bt DmaJobFifoDepth;
bit DmaRAWCouplingAvail;
// Parameters for GPIO
bit GpioInputSyncs;
// Parameters for AXI RT
Expand Down Expand Up @@ -514,8 +519,8 @@ package cheshire_pkg;
AxiDataWidth : 64,
AxiUserWidth : 2, // AMO(2)
AxiMstIdWidth : 2,
AxiMaxMstTrans : 8,
AxiMaxSlvTrans : 8,
AxiMaxMstTrans : 24,
AxiMaxSlvTrans : 24,
AxiUserAmoMsb : 1, // Convention: lower AMO bits for cores, MSB for serial link
AxiUserAmoLsb : 0, // Convention: lower AMO bits for cores, MSB for serial link
AxiUserDefault : 0,
Expand Down Expand Up @@ -550,8 +555,8 @@ package cheshire_pkg;
LlcSetAssoc : 8,
LlcNumLines : 256,
LlcNumBlocks : 8,
LlcMaxReadTxns : 8,
LlcMaxWriteTxns : 8,
LlcMaxReadTxns : 16,
LlcMaxWriteTxns : 16,
LlcAmoNumCuts : 1,
LlcAmoPostCut : 1,
LlcOutConnect : 1,
Expand All @@ -577,6 +582,11 @@ package cheshire_pkg;
DmaConfMaxWriteTxns : 4,
DmaConfAmoNumCuts : 1,
DmaConfAmoPostCut : 1,
DmaConfEnableTwoD : 1,
DmaNumAxInFlight : 16,
DmaMemSysDepth : 8,
DmaJobFifoDepth : 2,
DmaRAWCouplingAvail : 1,
// GPIOs
GpioInputSyncs : 1,
// AXI RT
Expand Down
23 changes: 14 additions & 9 deletions hw/cheshire_soc.sv
Original file line number Diff line number Diff line change
Expand Up @@ -1460,15 +1460,20 @@ module cheshire_soc import cheshire_pkg::*; #(
end

dma_core_wrap #(
.AxiAddrWidth ( Cfg.AddrWidth ),
.AxiDataWidth ( Cfg.AxiDataWidth ),
.AxiIdWidth ( Cfg.AxiMstIdWidth ),
.AxiUserWidth ( Cfg.AxiUserWidth ),
.AxiSlvIdWidth ( AxiSlvIdWidth ),
.axi_mst_req_t ( axi_mst_req_t ),
.axi_mst_rsp_t ( axi_mst_rsp_t ),
.axi_slv_req_t ( axi_slv_req_t ),
.axi_slv_rsp_t ( axi_slv_rsp_t )
.AxiAddrWidth ( Cfg.AddrWidth ),
.AxiDataWidth ( Cfg.AxiDataWidth ),
.AxiIdWidth ( Cfg.AxiMstIdWidth ),
.AxiUserWidth ( Cfg.AxiUserWidth ),
.AxiSlvIdWidth ( AxiSlvIdWidth ),
.NumAxInFlight ( Cfg.DmaNumAxInFlight ),
.MemSysDepth ( Cfg.DmaMemSysDepth ),
.JobFifoDepth ( Cfg.DmaJobFifoDepth ),
.RAWCouplingAvail ( Cfg.DmaRAWCouplingAvail ),
.IsTwoD ( Cfg.DmaConfEnableTwoD ),
.axi_mst_req_t ( axi_mst_req_t ),
.axi_mst_rsp_t ( axi_mst_rsp_t ),
.axi_slv_req_t ( axi_slv_req_t ),
.axi_slv_rsp_t ( axi_slv_rsp_t )
) i_dma (
.clk_i,
.rst_ni,
Expand Down
130 changes: 130 additions & 0 deletions sw/include/dif/dma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2022 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Alessandro Ottaviano <[email protected]>
// Thomas Benz <[email protected]>

#include <stdint.h>
#include "regs/idma.h"
#include "params.h"

#define DMA_SRC_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET)
#define DMA_DST_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET)
#define DMA_NUMBYTES_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET)
#define DMA_CONF_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET)
#define DMA_STATUS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET)
#define DMA_NEXTID_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET)
#define DMA_DONE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET)
#define DMA_SRC_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET)
#define DMA_DST_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET)
#define DMA_NUM_REPS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET)

#define DMA_CONF_DECOUPLE 0
#define DMA_CONF_DEBURST 0
#define DMA_CONF_SERIALIZE 0

#define X(NAME, BASE_ADDR) \
extern volatile uint64_t *NAME##_dma_src_ptr(void); \
extern volatile uint64_t *NAME##_dma_dst_ptr(void); \
extern volatile uint64_t *NAME##_dma_num_bytes_ptr(void); \
extern volatile uint64_t *NAME##_dma_conf_ptr(void); \
extern volatile uint64_t *NAME##_dma_status_ptr(void); \
extern volatile uint64_t *NAME##_dma_nextid_ptr(void); \
extern volatile uint64_t *NAME##_dma_done_ptr(void); \
extern volatile uint64_t *NAME##_dma_src_stride_ptr(void); \
extern volatile uint64_t *NAME##_dma_dst_stride_ptr(void); \
extern volatile uint64_t *NAME##_dma_num_reps_ptr(void); \
\
extern uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size); \
extern void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size); \
extern uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps); \
extern void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps); \
\
inline volatile uint64_t *NAME##_dma_src_ptr(void) { \
return (volatile uint64_t *)DMA_SRC_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_dst_ptr(void) { \
return (volatile uint64_t *)DMA_DST_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_num_bytes_ptr(void) { \
return (volatile uint64_t *)DMA_NUMBYTES_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_conf_ptr(void) { \
return (volatile uint64_t *)DMA_CONF_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_status_ptr(void) { \
return (volatile uint64_t *)DMA_STATUS_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_nextid_ptr(void) { \
return (volatile uint64_t *)DMA_NEXTID_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_done_ptr(void) { \
return (volatile uint64_t *)DMA_DONE_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_src_stride_ptr(void) { \
return (volatile uint64_t *)DMA_SRC_STRIDE_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_dst_stride_ptr(void) { \
return (volatile uint64_t *)DMA_DST_STRIDE_ADDR(BASE_ADDR); \
} \
inline volatile uint64_t *NAME##_dma_num_reps_ptr(void) { \
return (volatile uint64_t *)DMA_NUM_REPS_ADDR(BASE_ADDR); \
} \
\
inline uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \
*(NAME##_dma_src_ptr()) = (uint64_t)src; \
*(NAME##_dma_dst_ptr()) = (uint64_t)dst; \
*(NAME##_dma_num_bytes_ptr()) = size; \
*(NAME##_dma_num_reps_ptr()) = 0; \
*(NAME##_dma_conf_ptr()) = \
(DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \
(DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \
(DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \
return *(NAME##_dma_nextid_ptr()); \
} \
\
inline void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \
volatile uint64_t tf_id = NAME##_dma_memcpy(dst, src, size); \
while (*(NAME##_dma_done_ptr()) != tf_id) { \
asm volatile("nop"); \
} \
} \
\
inline uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps) { \
*(NAME##_dma_src_ptr()) = (uint64_t)src; \
*(NAME##_dma_dst_ptr()) = (uint64_t)dst; \
*(NAME##_dma_num_bytes_ptr()) = size; \
*(NAME##_dma_conf_ptr()) = \
(DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \
(DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \
(DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \
*(NAME##_dma_src_stride_ptr()) = src_stride; \
*(NAME##_dma_dst_stride_ptr()) = dst_stride; \
*(NAME##_dma_num_reps_ptr()) = num_reps; \
return *(NAME##_dma_nextid_ptr()); \
} \
\
inline void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \
uint64_t dst_stride, uint64_t src_stride, \
uint64_t num_reps) { \
volatile uint64_t tf_id = \
NAME##_dma_2d_memcpy(dst, src, size, dst_stride, src_stride, num_reps); \
while (*(NAME##_dma_done_ptr()) != tf_id) { \
asm volatile("nop"); \
} \
} \
\
inline uint64_t NAME##_dma_get_status(void) { \
return *(NAME##_dma_status_ptr()); \
}

X(sys, &__base_dma);

#undef X
57 changes: 57 additions & 0 deletions sw/include/regs/idma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Generated register defines for idma_reg64_2d_frontend

// Copyright information found in source file:
// Copyright 2022 ETH Zurich and University of Bologna.

// Licensing information found in source file:
// Licensed under Solderpad Hardware License, Version 0.51
// SPDX-License-Identifier: SHL-0.51

#ifndef _IDMA_REG64_2D_FRONTEND_REG_DEFS_
#define _IDMA_REG64_2D_FRONTEND_REG_DEFS_

#ifdef __cplusplus
extern "C" {
#endif
// Register width
#define IDMA_REG64_2D_FRONTEND_PARAM_REG_WIDTH 64

// Source Address
#define IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET 0x0

// Destination Address
#define IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET 0x8

// Number of bytes
#define IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET 0x10

// Configuration Register for DMA settings
#define IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET 0x18
#define IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT 0
#define IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT 1
#define IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT 2

// DMA Status
#define IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET 0x20
#define IDMA_REG64_2D_FRONTEND_STATUS_BUSY_BIT 0

// Next ID, launches transfer, returns 0 if transfer not set up properly.
#define IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET 0x28

// Get ID of finished transactions.
#define IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET 0x30

// Source Stride
#define IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET 0x38

// Destination Stride
#define IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET 0x40

// Number of 2D repetitions
#define IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET 0x48

#ifdef __cplusplus
} // extern "C"
#endif
#endif // _IDMA_REG64_2D_FRONTEND_REG_DEFS_
// End generated register defines for idma_reg64_2d_frontend
1 change: 1 addition & 0 deletions sw/sw.mk
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ endef
$(eval $(call chs_sw_gen_hdr_rule,clint,$(CLINTROOT)/src/clint.hjson $(CLINTROOT)/.generated))
$(eval $(call chs_sw_gen_hdr_rule,serial_link,$(CHS_ROOT)/hw/serial_link.hjson $(CHS_SLINK_DIR)/.generated))
$(eval $(call chs_sw_gen_hdr_rule,axi_vga,$(AXI_VGA_ROOT)/data/axi_vga.hjson $(AXI_VGA_ROOT)/.generated))
$(eval $(call chs_sw_gen_hdr_rule,idma,$(IDMA_ROOT)/src/frontends/register_64bit_2d/idma_reg64_2d_frontend.hjson))
$(eval $(call chs_sw_gen_hdr_rule,axi_llc,$(CHS_LLC_DIR)/data/axi_llc_regs.hjson))
$(eval $(call chs_sw_gen_hdr_rule,cheshire,$(CHS_ROOT)/hw/regs/cheshire_regs.hjson))
$(eval $(call chs_sw_gen_hdr_rule,axi_rt,$(CHS_ROOT)/hw/regs/axi_rt_regs.hjson))
Expand Down
5 changes: 5 additions & 0 deletions target/xilinx/src/cheshire_top_xilinx.sv
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ module cheshire_top_xilinx
DmaConfMaxWriteTxns : 4,
DmaConfAmoNumCuts : 1,
DmaConfAmoPostCut : 1,
DmaConfEnableTwoD : 1,
DmaNumAxInFlight : 16,
DmaMemSysDepth : 8,
DmaJobFifoDepth : 2,
DmaRAWCouplingAvail : 1,
// GPIOs
GpioInputSyncs : 1,
// All non-set values should be zero
Expand Down
Loading