diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0175bf9d..2f1ba548d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,6 +2,11 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 +# Required by Github checkout action on Ubuntu 18.04-based container, +# see https://github.com/actions/checkout/issues/1590 +env: + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + # Run functional regression checks name: ci on: [push, pull_request] diff --git a/Bender.yml b/Bender.yml index 40a690e25..bfea9e6a4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -27,7 +27,7 @@ dependencies: tech_cells_generic: { git: https://github.com/pulp-platform/tech_cells_generic, version: 0.2.11 } riscv-dbg: { git: https://github.com/pulp-platform/riscv-dbg, version: 0.8.0 } cluster_icache: { git: https://github.com/pulp-platform/cluster_icache.git, version: 0.1.0 } - idma: { git: https://github.com/pulp-platform/iDMA, version: 0.6.0 } + idma: { git: https://github.com/pulp-platform/iDMA, version: 0.6.3 } export_include_dirs: - hw/reqrsp_interface/include diff --git a/sw/snRuntime/src/alloc.h b/sw/snRuntime/src/alloc.h index ba1dee99e..6d5f250bb 100644 --- a/sw/snRuntime/src/alloc.h +++ b/sw/snRuntime/src/alloc.h @@ -91,6 +91,8 @@ inline void snrt_alloc_init() { snrt_l3_allocator()->size = 0; snrt_l3_allocator()->next = snrt_l3_allocator()->base; } + // Synchronize with other cores + snrt_cluster_hw_barrier(); } // TODO colluca: optimize by using DMA diff --git a/sw/snRuntime/src/dma.h b/sw/snRuntime/src/dma.h index 79b009d33..3e4dc0573 100644 --- a/sw/snRuntime/src/dma.h +++ b/sw/snRuntime/src/dma.h @@ -10,49 +10,43 @@ typedef uint32_t snrt_dma_txid_t; /// Initiate an asynchronous 1D DMA transfer with wide 64-bit pointers. inline snrt_dma_txid_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src, size_t size) { - // Current DMA does not allow transfers with size == 0 (blocks) - // TODO(colluca) remove this check once new DMA is integrated - if (size > 0) { - register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10 - register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11 - register uint32_t reg_src_low asm("a2") = src >> 0; // 12 - register uint32_t reg_src_high asm("a3") = src >> 32; // 13 - register uint32_t reg_size asm("a4") = size; // 14 + register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10 + register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11 + register uint32_t reg_src_low asm("a2") = src >> 0; // 12 + register uint32_t reg_src_high asm("a3") = src >> 32; // 13 + register uint32_t reg_size asm("a4") = size; // 14 - // dmsrc a2, a3 - asm volatile( - ".word (0b0000000 << 25) | \ - ( (13) << 20) | \ - ( (12) << 15) | \ - ( 0b000 << 12) | \ - (0b0101011 << 0) \n" ::"r"(reg_src_high), - "r"(reg_src_low)); + // dmsrc a2, a3 + asm volatile( + ".word (0b0000000 << 25) | \ + ( (13) << 20) | \ + ( (12) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" ::"r"(reg_src_high), + "r"(reg_src_low)); - // dmdst a0, a1 - asm volatile( - ".word (0b0000001 << 25) | \ - ( (11) << 20) | \ - ( (10) << 15) | \ - ( 0b000 << 12) | \ - (0b0101011 << 0) \n" ::"r"(reg_dst_high), - "r"(reg_dst_low)); + // dmdst a0, a1 + asm volatile( + ".word (0b0000001 << 25) | \ + ( (11) << 20) | \ + ( (10) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" ::"r"(reg_dst_high), + "r"(reg_dst_low)); - // dmcpyi a0, a4, 0b00 - register uint32_t reg_txid asm("a0"); // 10 - asm volatile( - ".word (0b0000010 << 25) | \ - ( 0b00000 << 20) | \ - ( (14) << 15) | \ - ( 0b000 << 12) | \ - ( (10) << 7) | \ - (0b0101011 << 0) \n" - : "=r"(reg_txid) - : "r"(reg_size)); + // dmcpyi a0, a4, 0b00 + register uint32_t reg_txid asm("a0"); // 10 + asm volatile( + ".word (0b0000010 << 25) | \ + ( 0b00000 << 20) | \ + ( (14) << 15) | \ + ( 0b000 << 12) | \ + ( (10) << 7) | \ + (0b0101011 << 0) \n" + : "=r"(reg_txid) + : "r"(reg_size)); - return reg_txid; - } else { - return 0; - } + return reg_txid; } /// Initiate an asynchronous 1D DMA transfer. @@ -66,71 +60,65 @@ inline snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat) { - // Current DMA does not allow transfers with size == 0 (blocks) - // TODO(colluca) remove this check once new DMA is integrated - if (size > 0) { - register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10 - register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11 - register uint32_t reg_src_low asm("a2") = src >> 0; // 12 - register uint32_t reg_src_high asm("a3") = src >> 32; // 13 - register uint32_t reg_size asm("a4") = size; // 14 - register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15 - register uint32_t reg_src_stride asm("a6") = src_stride; // 16 - register uint32_t reg_repeat asm("a7") = repeat; // 17 + register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10 + register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11 + register uint32_t reg_src_low asm("a2") = src >> 0; // 12 + register uint32_t reg_src_high asm("a3") = src >> 32; // 13 + register uint32_t reg_size asm("a4") = size; // 14 + register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15 + register uint32_t reg_src_stride asm("a6") = src_stride; // 16 + register uint32_t reg_repeat asm("a7") = repeat; // 17 - // dmsrc a0, a1 - asm volatile( - ".word (0b0000000 << 25) | \ - ( (13) << 20) | \ - ( (12) << 15) | \ - ( 0b000 << 12) | \ - (0b0101011 << 0) \n" ::"r"(reg_src_high), - "r"(reg_src_low)); + // dmsrc a0, a1 + asm volatile( + ".word (0b0000000 << 25) | \ + ( (13) << 20) | \ + ( (12) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" ::"r"(reg_src_high), + "r"(reg_src_low)); - // dmdst a0, a1 - asm volatile( - ".word (0b0000001 << 25) | \ - ( (11) << 20) | \ - ( (10) << 15) | \ - ( 0b000 << 12) | \ - (0b0101011 << 0) \n" ::"r"(reg_dst_high), - "r"(reg_dst_low)); + // dmdst a0, a1 + asm volatile( + ".word (0b0000001 << 25) | \ + ( (11) << 20) | \ + ( (10) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" ::"r"(reg_dst_high), + "r"(reg_dst_low)); - // dmstr a5, a6 - asm volatile( - ".word (0b0000110 << 25) | \ - ( (15) << 20) | \ - ( (16) << 15) | \ - ( 0b000 << 12) | \ - (0b0101011 << 0) \n" - : - : "r"(reg_dst_stride), "r"(reg_src_stride)); + // dmstr a5, a6 + asm volatile( + ".word (0b0000110 << 25) | \ + ( (15) << 20) | \ + ( (16) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_dst_stride), "r"(reg_src_stride)); - // dmrep a7 - asm volatile( - ".word (0b0000111 << 25) | \ - ( (17) << 15) | \ - ( 0b000 << 12) | \ - (0b0101011 << 0) \n" - : - : "r"(reg_repeat)); + // dmrep a7 + asm volatile( + ".word (0b0000111 << 25) | \ + ( (17) << 15) | \ + ( 0b000 << 12) | \ + (0b0101011 << 0) \n" + : + : "r"(reg_repeat)); - // dmcpyi a0, a4, 0b10 - register uint32_t reg_txid asm("a0"); // 10 - asm volatile( - ".word (0b0000010 << 25) | \ - ( 0b00010 << 20) | \ - ( (14) << 15) | \ - ( 0b000 << 12) | \ - ( (10) << 7) | \ - (0b0101011 << 0) \n" - : "=r"(reg_txid) - : "r"(reg_size)); + // dmcpyi a0, a4, 0b10 + register uint32_t reg_txid asm("a0"); // 10 + asm volatile( + ".word (0b0000010 << 25) | \ + ( 0b00010 << 20) | \ + ( (14) << 15) | \ + ( 0b000 << 12) | \ + ( (10) << 7) | \ + (0b0101011 << 0) \n" + : "=r"(reg_txid) + : "r"(reg_size)); - return reg_txid; - } else { - return 0; - } + return reg_txid; } /// Initiate an asynchronous 2D DMA transfer. diff --git a/sw/snRuntime/src/start.c b/sw/snRuntime/src/start.c index 31f6e7c97..779dfcf2f 100644 --- a/sw/snRuntime/src/start.c +++ b/sw/snRuntime/src/start.c @@ -139,8 +139,10 @@ void snrt_main() { #endif #if defined(SNRT_INIT_BSS) || defined(SNRT_INIT_CLS) - // Single DMA wait call for both snrt_init_bss() and snrt_init_cls() + // Single DMA wait call and barrier for both snrt_init_bss() and + // snrt_init_cls() if (snrt_is_dm_core()) snrt_dma_wait_all(); + snrt_cluster_hw_barrier(); #endif #ifdef SNRT_CRT0_CALLBACK3