Skip to content

Commit

Permalink
treewide: Bump iDMA and update handling of zero-length transfers (#156)
Browse files Browse the repository at this point in the history
* treewide: Bump iDMA and update handling of zero-length transfers

* snRuntime: Fix synchronization bug

Since only the DMA core initializes the allocator structs, but all
cores make use of it, it is important to place a barrier between
the initialization and its successive use.

* Fix Github checkout action bug
  • Loading branch information
colluca authored Jul 4, 2024
1 parent 17aa42a commit 2fb4c70
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 100 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

# Required by Github checkout action on Ubuntu 18.04-based container,
# see https://github.com/actions/checkout/issues/1590
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

# Run functional regression checks
name: ci
on: [push, pull_request]
Expand Down
2 changes: 1 addition & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies:
tech_cells_generic: { git: https://github.com/pulp-platform/tech_cells_generic, version: 0.2.11 }
riscv-dbg: { git: https://github.com/pulp-platform/riscv-dbg, version: 0.8.0 }
cluster_icache: { git: https://github.com/pulp-platform/cluster_icache.git, version: 0.1.0 }
idma: { git: https://github.com/pulp-platform/iDMA, version: 0.6.0 }
idma: { git: https://github.com/pulp-platform/iDMA, version: 0.6.3 }

export_include_dirs:
- hw/reqrsp_interface/include
Expand Down
2 changes: 2 additions & 0 deletions sw/snRuntime/src/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ inline void snrt_alloc_init() {
snrt_l3_allocator()->size = 0;
snrt_l3_allocator()->next = snrt_l3_allocator()->base;
}
// Synchronize with other cores
snrt_cluster_hw_barrier();
}

// TODO colluca: optimize by using DMA
Expand Down
184 changes: 86 additions & 98 deletions sw/snRuntime/src/dma.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,49 +10,43 @@ typedef uint32_t snrt_dma_txid_t;
/// Initiate an asynchronous 1D DMA transfer with wide 64-bit pointers.
inline snrt_dma_txid_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src,
size_t size) {
// Current DMA does not allow transfers with size == 0 (blocks)
// TODO(colluca) remove this check once new DMA is integrated
if (size > 0) {
register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
register uint32_t reg_src_low asm("a2") = src >> 0; // 12
register uint32_t reg_src_high asm("a3") = src >> 32; // 13
register uint32_t reg_size asm("a4") = size; // 14
register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
register uint32_t reg_src_low asm("a2") = src >> 0; // 12
register uint32_t reg_src_high asm("a3") = src >> 32; // 13
register uint32_t reg_size asm("a4") = size; // 14

// dmsrc a2, a3
asm volatile(
".word (0b0000000 << 25) | \
( (13) << 20) | \
( (12) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_src_high),
"r"(reg_src_low));
// dmsrc a2, a3
asm volatile(
".word (0b0000000 << 25) | \
( (13) << 20) | \
( (12) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_src_high),
"r"(reg_src_low));

// dmdst a0, a1
asm volatile(
".word (0b0000001 << 25) | \
( (11) << 20) | \
( (10) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_dst_high),
"r"(reg_dst_low));
// dmdst a0, a1
asm volatile(
".word (0b0000001 << 25) | \
( (11) << 20) | \
( (10) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_dst_high),
"r"(reg_dst_low));

// dmcpyi a0, a4, 0b00
register uint32_t reg_txid asm("a0"); // 10
asm volatile(
".word (0b0000010 << 25) | \
( 0b00000 << 20) | \
( (14) << 15) | \
( 0b000 << 12) | \
( (10) << 7) | \
(0b0101011 << 0) \n"
: "=r"(reg_txid)
: "r"(reg_size));
// dmcpyi a0, a4, 0b00
register uint32_t reg_txid asm("a0"); // 10
asm volatile(
".word (0b0000010 << 25) | \
( 0b00000 << 20) | \
( (14) << 15) | \
( 0b000 << 12) | \
( (10) << 7) | \
(0b0101011 << 0) \n"
: "=r"(reg_txid)
: "r"(reg_size));

return reg_txid;
} else {
return 0;
}
return reg_txid;
}

/// Initiate an asynchronous 1D DMA transfer.
Expand All @@ -66,71 +60,65 @@ inline snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src,
size_t size, size_t dst_stride,
size_t src_stride,
size_t repeat) {
// Current DMA does not allow transfers with size == 0 (blocks)
// TODO(colluca) remove this check once new DMA is integrated
if (size > 0) {
register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
register uint32_t reg_src_low asm("a2") = src >> 0; // 12
register uint32_t reg_src_high asm("a3") = src >> 32; // 13
register uint32_t reg_size asm("a4") = size; // 14
register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15
register uint32_t reg_src_stride asm("a6") = src_stride; // 16
register uint32_t reg_repeat asm("a7") = repeat; // 17
register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
register uint32_t reg_src_low asm("a2") = src >> 0; // 12
register uint32_t reg_src_high asm("a3") = src >> 32; // 13
register uint32_t reg_size asm("a4") = size; // 14
register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15
register uint32_t reg_src_stride asm("a6") = src_stride; // 16
register uint32_t reg_repeat asm("a7") = repeat; // 17

// dmsrc a0, a1
asm volatile(
".word (0b0000000 << 25) | \
( (13) << 20) | \
( (12) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_src_high),
"r"(reg_src_low));
// dmsrc a0, a1
asm volatile(
".word (0b0000000 << 25) | \
( (13) << 20) | \
( (12) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_src_high),
"r"(reg_src_low));

// dmdst a0, a1
asm volatile(
".word (0b0000001 << 25) | \
( (11) << 20) | \
( (10) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_dst_high),
"r"(reg_dst_low));
// dmdst a0, a1
asm volatile(
".word (0b0000001 << 25) | \
( (11) << 20) | \
( (10) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n" ::"r"(reg_dst_high),
"r"(reg_dst_low));

// dmstr a5, a6
asm volatile(
".word (0b0000110 << 25) | \
( (15) << 20) | \
( (16) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n"
:
: "r"(reg_dst_stride), "r"(reg_src_stride));
// dmstr a5, a6
asm volatile(
".word (0b0000110 << 25) | \
( (15) << 20) | \
( (16) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n"
:
: "r"(reg_dst_stride), "r"(reg_src_stride));

// dmrep a7
asm volatile(
".word (0b0000111 << 25) | \
( (17) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n"
:
: "r"(reg_repeat));
// dmrep a7
asm volatile(
".word (0b0000111 << 25) | \
( (17) << 15) | \
( 0b000 << 12) | \
(0b0101011 << 0) \n"
:
: "r"(reg_repeat));

// dmcpyi a0, a4, 0b10
register uint32_t reg_txid asm("a0"); // 10
asm volatile(
".word (0b0000010 << 25) | \
( 0b00010 << 20) | \
( (14) << 15) | \
( 0b000 << 12) | \
( (10) << 7) | \
(0b0101011 << 0) \n"
: "=r"(reg_txid)
: "r"(reg_size));
// dmcpyi a0, a4, 0b10
register uint32_t reg_txid asm("a0"); // 10
asm volatile(
".word (0b0000010 << 25) | \
( 0b00010 << 20) | \
( (14) << 15) | \
( 0b000 << 12) | \
( (10) << 7) | \
(0b0101011 << 0) \n"
: "=r"(reg_txid)
: "r"(reg_size));

return reg_txid;
} else {
return 0;
}
return reg_txid;
}

/// Initiate an asynchronous 2D DMA transfer.
Expand Down
4 changes: 3 additions & 1 deletion sw/snRuntime/src/start.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,10 @@ void snrt_main() {
#endif

#if defined(SNRT_INIT_BSS) || defined(SNRT_INIT_CLS)
// Single DMA wait call for both snrt_init_bss() and snrt_init_cls()
// Single DMA wait call and barrier for both snrt_init_bss() and
// snrt_init_cls()
if (snrt_is_dm_core()) snrt_dma_wait_all();
snrt_cluster_hw_barrier();
#endif

#ifdef SNRT_CRT0_CALLBACK3
Expand Down

0 comments on commit 2fb4c70

Please sign in to comment.