diff --git a/sw/hero/device/apps/libomptarget_device/link.ld b/sw/hero/device/apps/libomptarget_device/link.ld index 0f54b201..1d213ed0 100644 --- a/sw/hero/device/apps/libomptarget_device/link.ld +++ b/sw/hero/device/apps/libomptarget_device/link.ld @@ -6,16 +6,17 @@ OUTPUT_ARCH( "riscv" ) ENTRY(_start) /* Memory section should be provided in a separate, platform-specific */ -/* file. It should define at least the L1 and L3 memory blocks. */ +/* file. It should define at least the L1 and L2 memory blocks. */ MEMORY { - L3 : ORIGIN = 0xC0000000, LENGTH = 0x800000 + L2 : ORIGIN = 0x78000000, LENGTH = 0x800000 + LOCAL : ORIGIN = 0x51000000, LENGTH = 0x20000 } SECTIONS { - /* Program code goes into L3 */ + /* Program code goes into L2 */ .text : { . = ALIGN(4); @@ -27,19 +28,19 @@ SECTIONS *(.text) . = ALIGN(4); _etext = .; - } >L3 + } >L2 - /* By default, constant data goes into L3, right after code section */ + /* By default, constant data goes into L2, right after code section */ .rodata : { . = ALIGN(4); *(.rodata) *(.rodata*) . = ALIGN(4); - } >L3 + } >L2 /* HTIF section for FESVR */ - .htif : { } >L3 + .htif : { } >L2 /* Thread Local Storage sections */ .tdata : @@ -47,14 +48,14 @@ SECTIONS __tdata_start = .; *(.tdata .tdata.* .gnu.linkonce.td.*) __tdata_end = .; - } >L3 + } >L2 .tbss : { __tbss_start = .; *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) __tbss_end = .; - } >L3 + } >L2 /* Cluster Local Storage sections */ .cdata : @@ -62,13 +63,13 @@ SECTIONS __cdata_start = .; *(.cdata .cdata.*) __cdata_end = .; - } >L3 + } >LOCAL .cbss : { __cbss_start = .; *(.cbss .cbss.*) __cbss_end = .; - } >L3 + } >LOCAL /* used by the startup to initialize data */ _sidata = LOADADDR(.data); @@ -80,15 +81,15 @@ SECTIONS __global_pointer$ = . + 0x7f0; *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*) *(.sdata .sdata.* .gnu.linkonce.s.*) - } >L3 + } >L2 - /* Initialized data sections goes into L3 */ + /* Initialized data sections goes into L2 */ .data : { __DATA_BEGIN__ = .; *(.data .data.* .gnu.linkonce.d.*) SORT(CONSTRUCTORS) - } >L3 + } >L2 _edata = .; PROVIDE (edata = .); /* small bss section */ @@ -99,7 +100,7 @@ SECTIONS *(.dynsbss) *(.sbss .sbss.* .gnu.linkonce.sb.*) *(.scommon) - } >L3 + } >L2 /* Uninitialized data section */ .bss : @@ -111,7 +112,7 @@ SECTIONS _end. Align after .bss to ensure correct alignment even if the .bss section disappears because there are no input sections. */ . = ALIGN(. != 0 ? 32 / 8 : 1); - } >L3 + } >L2 . = ALIGN(32 / 8); . = SEGMENT_START("ldata-segment", .); . = ALIGN(32 / 8); @@ -119,12 +120,11 @@ SECTIONS __bss_end = .; _end = .; PROVIDE (end = .); - /* Uninitialized data section in L3 */ + /* Uninitialized data section in L2 */ .dram : { *(.dram) _edram = .; - } >L3 + } >L2 - __uart = 0x2002000; } diff --git a/sw/hero/device/apps/libomptarget_device/src/io.h b/sw/hero/device/apps/libomptarget_device/src/io.h new file mode 100644 index 00000000..db3ce619 --- /dev/null +++ b/sw/hero/device/apps/libomptarget_device/src/io.h @@ -0,0 +1,94 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Robert Balas +// + +/* Description: Memory mapped register I/O access + */ + +#ifndef __IO_H +#define __IO_H + +#include + + +/* generic I/O write */ +static inline void writeb(uint8_t val, uintptr_t addr) +{ + asm volatile("sb %0, 0(%1)" + : + : "r"(val), "r"((volatile uint8_t *)addr) + : "memory"); +} + +static inline void writeh(uint16_t val, uintptr_t addr) +{ + asm volatile("sh %0, 0(%1)" + : + : "r"(val), "r"((volatile uint16_t *)addr) + : "memory"); +} + +static inline void writew(uint32_t val, uintptr_t addr) +{ + asm volatile("sw %0, 0(%1)" + : + : "r"(val), "r"((volatile uint32_t *)addr) + : "memory"); +} + +static inline void writed(uint64_t val, uintptr_t addr) +{ + asm volatile("sd %0, 0(%1)" + : + : "r"(val), "r"((volatile uint64_t *)addr) + : "memory"); +} + +/* generic I/O read */ +static inline uint8_t readb(const uintptr_t addr) +{ + uint8_t val; + + asm volatile("lb %0, 0(%1)" + : "=r"(val) + : "r"((const volatile uint8_t *)addr) + : "memory"); + return val; +} + +static inline uint16_t readh(const uintptr_t addr) +{ + uint16_t val; + + asm volatile("lh %0, 0(%1)" + : "=r"(val) + : "r"((const volatile uint16_t *)addr) + : "memory"); + return val; +} + +static inline uint32_t readw(const uintptr_t addr) +{ + uint32_t val; + + asm volatile("lw %0, 0(%1)" + : "=r"(val) + : "r"((const volatile uint32_t *)addr) + : "memory"); + return val; +} + +static inline uint64_t readd(const uintptr_t addr) +{ + uint64_t val; + + asm volatile("ld %0, 0(%1)" + : "=r"(val) + : "r"((const volatile uint64_t *)addr) + : "memory"); + return val; +} +#endif diff --git a/sw/hero/device/apps/libomptarget_device/src/main.c b/sw/hero/device/apps/libomptarget_device/src/main.c index 15e92c8a..507e8d3a 100644 --- a/sw/hero/device/apps/libomptarget_device/src/main.c +++ b/sw/hero/device/apps/libomptarget_device/src/main.c @@ -5,6 +5,35 @@ #include "sw_mailbox.h" #include "snrt.h" +#include "io.h" + +__attribute__((optimize("O0"))) void csleep(uint32_t cycles) { + uint32_t start = snrt_mcycle(); + while ((snrt_mcycle() - start) < cycles) {} +} + +void snrt_putchar(char c) { + writew(c, (uintptr_t) 0x3002000); + csleep(100000); +} + +void snrt_puthalfbyte(uint8_t halfbyte) { + uint32_t ascii[16] = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 65, 66, 67, 68, 69, 70}; + snrt_putchar(ascii[halfbyte]); +} + +void snrt_putbyte(uint8_t byte) { + snrt_puthalfbyte((byte >> 4) & 0xf); + snrt_puthalfbyte(byte & 0xf); +} + +void snrt_putword(uint32_t word) { + for(int i = 3; i >= 0; i--) + snrt_putbyte((word >> 8 * i) & 0xff); + snrt_putchar(10); +} + +//void _putchar(char character) {snrt_putchar(character);} //================================================================================ // MACROS AND SETTINGS @@ -59,7 +88,6 @@ typedef struct rab_miss_t { static volatile uint32_t g_printf_mutex = 0; static volatile uint32_t *soc_scratch = (uint32_t *)(0x02000014); -struct l3_layout l3l; const uint32_t snrt_stack_size __attribute__((weak, section(".rodata"))) = 12; @@ -106,6 +134,7 @@ void _snrt_hier_wakeup(void) { * @brief A re-entrant wrapper to printf * */ + void snrt_printf(const char *format, ...) { va_list args; @@ -288,20 +317,20 @@ int main(int argc, char *argv[]) { unsigned core_idx = snrt_cluster_core_idx(); unsigned core_num = snrt_cluster_core_num(); + /** * One core initializes the global data structures */ - if (snrt_is_dm_core()) { + if (core_idx == 0) { // read memory layout from scratch2 - memcpy(&l3l, (void *)soc_scratch[2], sizeof(struct l3_layout)); - g_a2h_rb = (struct ring_buf *)l3l.a2h_rb; - g_a2h_mbox = (struct ring_buf *)l3l.a2h_mbox; - g_h2a_mbox = (struct ring_buf *)l3l.h2a_mbox; + g_a2h_rb = NULL; + g_h2a_mbox = (struct ring_buf *)readw(0x3000000); + g_a2h_mbox = (struct ring_buf *)readw(0x3000004); } - snrt_cluster_hw_barrier(); + //snrt_cluster_hw_barrier(); - __snrt_omp_bootstrap(core_idx); + //__snrt_omp_bootstrap(core_idx); //snrt_trace("omp_bootstrap complete, core_idx: %d core_num: %d\n", core_idx, core_num); diff --git a/sw/hero/device/apps/libomptarget_device/src/sw_mailbox.c b/sw/hero/device/apps/libomptarget_device/src/sw_mailbox.c index 4ebfe486..0b194800 100644 --- a/sw/hero/device/apps/libomptarget_device/src/sw_mailbox.c +++ b/sw/hero/device/apps/libomptarget_device/src/sw_mailbox.c @@ -4,6 +4,7 @@ #include "sw_mailbox.h" #include "snrt.h" // snrt_mcycle +#include "io.h" /*********************************************************************************** * MACROS @@ -25,10 +26,6 @@ volatile struct ring_buf *g_h2a_mbox; /*********************************************************************************** * FUNCTIONS ***********************************************************************************/ -__attribute__((optimize("O0"))) void csleep(uint32_t cycles) { - uint32_t start = snrt_mcycle(); - while ((snrt_mcycle() - start) < cycles) {} -} int syscall(uint64_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) { @@ -55,12 +52,6 @@ int syscall(uint64_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2, return retries; } -void snrt_putchar(char c) { - *(volatile uint32_t *)0x2002000 = c; - csleep(10000); - //syscall(SYS_write, 1, c, 1, 0, 0); -} - void snrt_hero_exit(int code) { syscall(SYS_exit, code, 0, 0, 0, 0); } /*********************************************************************************** diff --git a/sw/hero/device/runtime/src/occamy_defs.h b/sw/hero/device/runtime/src/occamy_defs.h index 555ce560..7cbdcf02 100644 --- a/sw/hero/device/runtime/src/occamy_defs.h +++ b/sw/hero/device/runtime/src/occamy_defs.h @@ -7,10 +7,10 @@ #include "spatz_cluster_peripheral.h" // Hardware parameters -#define SNRT_BASE_HARTID 1 +#define SNRT_BASE_HARTID 16 #define SNRT_CLUSTER_CORE_NUM N_CORES_PER_CLUSTER #define SNRT_CLUSTER_NUM (N_QUADS * N_CLUSTERS_PER_QUAD) -#define SNRT_CLUSTER_DM_CORE_NUM 1 +#define SNRT_CLUSTER_DM_CORE_NUM 2 #define SNRT_TCDM_START_ADDR QUADRANT_0_CLUSTER_0_TCDM_BASE_ADDR #define SNRT_TCDM_SIZE \ (QUADRANT_0_CLUSTER_0_PERIPH_BASE_ADDR - \ diff --git a/sw/hero/device/runtime/src/occamy_start.S b/sw/hero/device/runtime/src/occamy_start.S index 5634cc2e..e6fbec98 100644 --- a/sw/hero/device/runtime/src/occamy_start.S +++ b/sw/hero/device/runtime/src/occamy_start.S @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 #define SNRT_INIT_INT_REGS -#define SNRT_INIT_FP_REGS +//#define SNRT_INIT_FP_REGS #define SNRT_INIT_GP #define SNRT_INIT_CORE_INFO #define SNRT_INIT_CLS diff --git a/sw/hero/device/runtime/src/occamy_start.c b/sw/hero/device/runtime/src/occamy_start.c index 9332a432..e063dcb1 100644 --- a/sw/hero/device/runtime/src/occamy_start.c +++ b/sw/hero/device/runtime/src/occamy_start.c @@ -5,6 +5,8 @@ #define SNRT_INIT_TLS #define SNRT_INIT_BSS #define SNRT_INIT_CLS +#define SNRT_CRT0_CALLBACK1 +#define SNRT_CRT0_CALLBACK2 #define SNRT_CRT0_CALLBACK3 #define SNRT_INIT_LIBS #define SNRT_CRT0_PRE_BARRIER @@ -15,8 +17,14 @@ static inline void snrt_exit(int exit_code) { } +static inline void snrt_crt0_callback1() { +} + +static inline void snrt_crt0_callback2() { +} + static inline void snrt_crt0_callback3() { - _snrt_cluster_hw_barrier = cluster_hw_barrier_addr(snrt_cluster_idx()); + //_snrt_cluster_hw_barrier = cluster_hw_barrier_addr(snrt_cluster_idx()); } static inline void snrt_crt0_callback7() { return_to_cva6(SYNC_CLUSTERS); } diff --git a/sw/hero/device/runtime/src/snrt.c b/sw/hero/device/runtime/src/snrt.c index 1fbe277b..ad725a97 100644 --- a/sw/hero/device/runtime/src/snrt.c +++ b/sw/hero/device/runtime/src/snrt.c @@ -19,5 +19,4 @@ #include "printf.c" #include "putchar.c" #include "sync.c" -#include "sys_dma.c" #include "team.c" \ No newline at end of file diff --git a/sw/hero/device/runtime/src/snrt.h b/sw/hero/device/runtime/src/snrt.h index b7903507..66318b66 100644 --- a/sw/hero/device/runtime/src/snrt.h +++ b/sw/hero/device/runtime/src/snrt.h @@ -11,7 +11,6 @@ // Occamy specific definitions #include "occamy_defs.h" #include "occamy_memory_map.h" -#include "sys_dma.h" // Forward declarations #include "alloc_decls.h" diff --git a/sw/hero/device/toolchain.mk b/sw/hero/device/toolchain.mk index 2dd49030..3590340e 100644 --- a/sw/hero/device/toolchain.mk +++ b/sw/hero/device/toolchain.mk @@ -4,6 +4,6 @@ # # Luca Colagrande -RISCV_LDFLAGS += -L$(HERO_INSTALL)/lib/clang/15.0.0/rv32imafdvzfh-ilp32d/lib/ +RISCV_LDFLAGS += -L$(HERO_INSTALL)/lib/clang/15.0.0/rv32imafd-ilp32d/lib/ RISCV_CFLAGS += --sysroot=$(HERO_INSTALL)/rv32imafd-ilp32d/riscv32-unknown-elf include $(dir $(realpath $(lastword $(MAKEFILE_LIST))))/../../../vendor/snitch/target/snitch_cluster/sw/toolchain.mk diff --git a/sw/hero/shared/platform/generated/bitfield.h b/sw/hero/shared/platform/generated/bitfield.h new file mode 100644 index 00000000..193eeb3c --- /dev/null +++ b/sw/hero/shared/platform/generated/bitfield.h @@ -0,0 +1,286 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef OPENTITAN_SW_DEVICE_LIB_BASE_BITFIELD_H_ +#define OPENTITAN_SW_DEVICE_LIB_BASE_BITFIELD_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @file + * @brief Bitfield Manipulation Functions + */ + +/** + * All the bitfield functions are pure (they do not modify their arguments), so + * the result must be used. We enable warnings to ensure this happens. + */ +#define BITFIELD_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) + +/** + * A field of a 32-bit bitfield. + * + * The following field definition: `{ .mask = 0b11, .index = 12 }` + * + * Denotes the X-marked bits in the following 32-bit bitfield: + * + * field: 0b--------'--------'--XX----'-------- + * index: 31 0 + * + * Restrictions: The index plus the width of the mask must not be greater than + * 31. + */ +typedef struct bitfield_field32 { + /** The field mask. Usually all ones. */ + uint32_t mask; + /** The field position in the bitfield, counting from the zero-bit. */ + uint32_t index; +} bitfield_field32_t; + +/** + * Reads a value from `field` in `bitfield`. + * + * This function uses the `field` parameter to read the value from `bitfield`. + * The resulting value will be shifted right and zero-extended so the field's + * zero-bit is the return value's zero-bit. + * + * @param bitfield Bitfield to get the field from. + * @param field Field to read out from. + * @return Zero-extended `field` from `bitfield`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline uint32_t bitfield_field32_read(uint32_t bitfield, + bitfield_field32_t field) { + return (bitfield >> field.index) & field.mask; +} + +/** + * Writes `value` to `field` in `bitfield`. + * + * This function uses the `field` parameter to set specific bits in `bitfield`. + * The relevant portion of `bitfield` is zeroed before the bits are set to + * `value`. + * + * @param bitfield Bitfield to set the field in. + * @param field Field within bitfield to be set. + * @param value Value for the new field. + * @return `bitfield` with `field` set to `value`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline uint32_t bitfield_field32_write(uint32_t bitfield, + bitfield_field32_t field, + uint32_t value) { + bitfield &= ~(field.mask << field.index); + bitfield |= (value & field.mask) << field.index; + return bitfield; +} + +/** + * A single bit in a 32-bit bitfield. + * + * This denotes the position of a single bit, counting from the zero-bit. + * + * For instance, `(bitfield_bit_index_t)4` denotes the X-marked bit in the + * following 32-bit bitfield: + * + * field: 0b--------'--------'--------'---X---- + * index: 31 0 + * + * Restrictions: The value must not be greater than 31. + */ +typedef uint32_t bitfield_bit32_index_t; + +/** + * Turns a `bitfield_bit32_index_t` into a `bitfield_field32_t` (which is more + * general). + * + * @param bit_index The corresponding single bit to turn into a field. + * @return A 1-bit field that corresponds to `bit_index`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline bitfield_field32_t bitfield_bit32_to_field32( + bitfield_bit32_index_t bit_index) { + return (bitfield_field32_t){ + .mask = 0x1, + .index = bit_index, + }; +} + +/** + * Reads the `bit_index`th bit in `bitfield`. + * + * @param bitfield Bitfield to get the bit from. + * @param bit_index Bit to read. + * @return `true` if the bit was one, `false` otherwise. + */ +BITFIELD_WARN_UNUSED_RESULT +inline bool bitfield_bit32_read(uint32_t bitfield, + bitfield_bit32_index_t bit_index) { + return bitfield_field32_read(bitfield, + bitfield_bit32_to_field32(bit_index)) == 0x1u; +} + +/** + * Writes `value` to the `bit_index`th bit in `bitfield`. + * + * @param bitfield Bitfield to update the bit in. + * @param bit_index Bit to update. + * @param value Bit value to write to `bitfield`. + * @return `bitfield` with the `bit_index`th bit set to `value`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline uint32_t bitfield_bit32_write(uint32_t bitfield, + bitfield_bit32_index_t bit_index, + bool value) { + return bitfield_field32_write( + bitfield, bitfield_bit32_to_field32(bit_index), value ? 0x1u : 0x0u); +} + +/** + * Find First Set Bit + * + * Returns one plus the index of the least-significant 1-bit of a 32-bit word. + * + * For instance, `bitfield_find_first_set32(field)` of the below 32-bit value + * returns `5`. + * + * field: 0b00000000'00000000'11111111'00010000 + * index: 31 0 + * + * This is the canonical definition for the GCC/Clang builtin `__builtin_ffs`, + * and hence takes and returns a signed integer. + * + * @param bitfield Bitfield to find the first set bit in. + * @return One plus the index of the least-significant 1-bit of `bitfield`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline int32_t bitfield_find_first_set32(int32_t bitfield) { + return __builtin_ffs(bitfield); +} + +/** + * Count Leading Zeroes + * + * Returns the number of leading 0-bits in `bitfield`, starting at the most + * significant bit position. If `bitfield` is 0, the result is 32, to match the + * RISC-V B Extension. + * + * For instance, `bitfield_count_leading_zeroes32(field)` of the below 32-bit + * value returns `16`. + * + * field: 0b00000000'00000000'11111111'00010000 + * index: 31 0 + * + * This is the canonical definition for the GCC/Clang builtin `__builtin_clz`, + * and hence returns a signed integer. + * + * @param bitfield Bitfield to count leading 0-bits from. + * @return The number of leading 0-bits in `bitfield`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline int32_t bitfield_count_leading_zeroes32(uint32_t bitfield) { + return (bitfield != 0) ? __builtin_clz(bitfield) : 32; +} + +/** + * Count Trailing Zeroes + * + * Returns the number of trailing 0-bits in `bitfield`, starting at the least + * significant bit position. If `bitfield` is 0, the result is 32, to match the + * RISC-V B Extension. + * + * For instance, `bitfield_count_trailing_zeroes32(field)` of the below 32-bit + * value returns `4`. + * + * field: 0b00000000'00000000'11111111'00010000 + * index: 31 0 + * + * This is the canonical definition for the GCC/Clang builtin `__builtin_ctz`, + * and hence returns a signed integer. + * + * @param bitfield Bitfield to count trailing 0-bits from. + * @return The number of trailing 0-bits in `bitfield`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline int32_t bitfield_count_trailing_zeroes32(uint32_t bitfield) { + return (bitfield != 0) ? __builtin_ctz(bitfield) : 32; +} + +/** + * Count Set Bits + * + * Returns the number of 1-bits in `bitfield`. + * + * For instance, `bitfield_popcount32(field)` of the below 32-bit value returns + * `9`. + * + * field: 0b00000000'00000000'11111111'00010000 + * index: 31 0 + * + * This is the canonical definition for the GCC/Clang builtin + * `__builtin_popcount`, and hence returns a signed integer. + * + * @param bitfield Bitfield to count 1-bits from. + * @return The number of 1-bits in `bitfield`. + */ +BITFIELD_WARN_UNUSED_RESULT +inline int32_t bitfield_popcount32(uint32_t bitfield) { + return __builtin_popcount(bitfield); +} + +/** + * Parity + * + * Returns the number of 1-bits in `bitfield`, modulo 2. + * + * For instance, `bitfield_parity32(field)` of the below 32-bit value returns + * `1`. + * + * field: 0b00000000'00000000'11111111'00010000 + * index: 31 0 + * + * This is the canonical definition for the GCC/Clang builtin + * `__builtin_parity`, and hence returns a signed integer. + * + * @param bitfield Bitfield to count 1-bits from. + * @return The number of 1-bits in `bitfield`, modulo 2. + */ +BITFIELD_WARN_UNUSED_RESULT +inline int32_t bitfield_parity32(uint32_t bitfield) { + return __builtin_parity(bitfield); +} + +/** + * Byte Swap + * + * Returns `field` with the order of the bytes reversed. Bytes here always means + * exactly 8 bits. + * + * For instance, `byteswap(field)` of the below 32-bit value returns `1`. + * + * field: 0bAAAAAAAA'BBBBBBBB'CCCCCCCC'DDDDDDDD + * index: 31 0 + * returns: 0bDDDDDDDD'CCCCCCCC'BBBBBBBB'AAAAAAAA + * + * This is the canonical definition for the GCC/Clang builtin + * `__builtin_bswap32`. + * + * @param bitfield Bitfield to reverse bytes of. + * @return `bitfield` with the order of bytes reversed. + */ +BITFIELD_WARN_UNUSED_RESULT +inline uint32_t bitfield_byteswap32(uint32_t bitfield) { + return __builtin_bswap32(bitfield); +} + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // OPENTITAN_SW_DEVICE_LIB_BASE_BITFIELD_H_ diff --git a/sw/hero/shared/platform/generated/occamy_cfg.h b/sw/hero/shared/platform/generated/occamy_cfg.h new file mode 100644 index 00000000..b23cb573 --- /dev/null +++ b/sw/hero/shared/platform/generated/occamy_cfg.h @@ -0,0 +1,7 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#define N_QUADS 1 +#define N_CLUSTERS_PER_QUAD 1 +#define N_CORES_PER_CLUSTER 2 \ No newline at end of file diff --git a/sw/hero/shared/platform/generated/spatz_cluster_peripheral.h b/sw/hero/shared/platform/generated/spatz_cluster_peripheral.h new file mode 100644 index 00000000..981d24c7 --- /dev/null +++ b/sw/hero/shared/platform/generated/spatz_cluster_peripheral.h @@ -0,0 +1,208 @@ +// Generated register defines for spatz_cluster_peripheral + +// Copyright information found in source file: +// Copyright 2020 ETH Zurich and University of Bologna. + +// Licensing information found in source file: +// Licensed under Solderpad Hardware License, Version 0.51, see LICENSE for +// details. SPDX-License-Identifier: SHL-0.51 + +#ifndef _SPATZ_CLUSTER_PERIPHERAL_REG_DEFS_ +#define _SPATZ_CLUSTER_PERIPHERAL_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Number of performance counters +#define SPATZ_CLUSTER_PERIPHERAL_PARAM_NUM_PERF_COUNTERS 2 + +// Register width +#define SPATZ_CLUSTER_PERIPHERAL_PARAM_REG_WIDTH 64 + +// Enable particular performance counter and start tracking. (common +// parameters) +// Enable particular performance counter and start tracking. +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_REG_OFFSET 0x0 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_CYCLE_0_BIT 0 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_TCDM_ACCESSED_0_BIT 1 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_TCDM_CONGESTED_0_BIT 2 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ISSUE_FPU_0_BIT 3 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ISSUE_FPU_SEQ_0_BIT 4 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ISSUE_CORE_TO_FPU_0_BIT 5 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_RETIRED_INSTR_0_BIT 6 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_RETIRED_LOAD_0_BIT 7 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_RETIRED_I_0_BIT 8 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_RETIRED_ACC_0_BIT 9 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_AW_STALL_0_BIT 10 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_AR_STALL_0_BIT 11 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_R_STALL_0_BIT 12 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_W_STALL_0_BIT 13 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_BUF_W_STALL_0_BIT 14 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_BUF_R_STALL_0_BIT 15 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_AW_DONE_0_BIT 16 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_AW_BW_0_BIT 17 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_AR_DONE_0_BIT 18 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_AR_BW_0_BIT 19 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_R_DONE_0_BIT 20 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_R_BW_0_BIT 21 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_W_DONE_0_BIT 22 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_W_BW_0_BIT 23 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_B_DONE_0_BIT 24 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_DMA_BUSY_0_BIT 25 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ICACHE_MISS_0_BIT 26 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ICACHE_HIT_0_BIT 27 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ICACHE_PREFETCH_0_BIT 28 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ICACHE_DOUBLE_HIT_0_BIT \ + 29 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_ICACHE_STALL_0_BIT 30 + +// Enable particular performance counter and start tracking. +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_REG_OFFSET 0x8 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_CYCLE_1_BIT 0 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_TCDM_ACCESSED_1_BIT 1 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_TCDM_CONGESTED_1_BIT 2 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ISSUE_FPU_1_BIT 3 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ISSUE_FPU_SEQ_1_BIT 4 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ISSUE_CORE_TO_FPU_1_BIT 5 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_RETIRED_INSTR_1_BIT 6 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_RETIRED_LOAD_1_BIT 7 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_RETIRED_I_1_BIT 8 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_RETIRED_ACC_1_BIT 9 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_AW_STALL_1_BIT 10 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_AR_STALL_1_BIT 11 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_R_STALL_1_BIT 12 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_W_STALL_1_BIT 13 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_BUF_W_STALL_1_BIT 14 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_BUF_R_STALL_1_BIT 15 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_AW_DONE_1_BIT 16 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_AW_BW_1_BIT 17 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_AR_DONE_1_BIT 18 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_AR_BW_1_BIT 19 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_R_DONE_1_BIT 20 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_R_BW_1_BIT 21 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_W_DONE_1_BIT 22 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_W_BW_1_BIT 23 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_B_DONE_1_BIT 24 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_DMA_BUSY_1_BIT 25 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ICACHE_MISS_1_BIT 26 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ICACHE_HIT_1_BIT 27 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ICACHE_PREFETCH_1_BIT 28 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ICACHE_DOUBLE_HIT_1_BIT \ + 29 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1_ICACHE_STALL_1_BIT 30 + +// Select from which hart in the cluster, starting from `0`, +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_HART_SELECT_FIELD_WIDTH 10 +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_HART_SELECT_FIELDS_PER_REG 6 +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_MULTIREG_COUNT 2 + +// Select from which hart in the cluster, starting from `0`, +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_0_REG_OFFSET 0x10 +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_0_HART_SELECT_0_MASK 0x3ff +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_0_HART_SELECT_0_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_0_HART_SELECT_0_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_0_HART_SELECT_0_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_0_HART_SELECT_0_OFFSET}) + +// Select from which hart in the cluster, starting from `0`, +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_1_REG_OFFSET 0x18 +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_1_HART_SELECT_1_MASK 0x3ff +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_1_HART_SELECT_1_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_1_HART_SELECT_1_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_1_HART_SELECT_1_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_1_HART_SELECT_1_OFFSET}) + +// Performance counter. Set corresponding PERF_COUNTER_ENABLE bits depending +// on what +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_PERF_COUNTER_FIELD_WIDTH 48 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_PERF_COUNTER_FIELDS_PER_REG 1 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_MULTIREG_COUNT 2 + +// Performance counter. Set corresponding PERF_COUNTER_ENABLE bits depending +// on what +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_REG_OFFSET 0x20 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_PERF_COUNTER_0_MASK \ + 0xffffffffffff +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_PERF_COUNTER_0_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_PERF_COUNTER_0_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_PERF_COUNTER_0_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_PERF_COUNTER_0_OFFSET}) + +// Performance counter. Set corresponding PERF_COUNTER_ENABLE bits depending +// on what +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_1_REG_OFFSET 0x28 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_1_PERF_COUNTER_1_MASK \ + 0xffffffffffff +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_1_PERF_COUNTER_1_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_1_PERF_COUNTER_1_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_1_PERF_COUNTER_1_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_1_PERF_COUNTER_1_OFFSET}) + +// Set bits in the cluster-local CLINT. Writing a 1 at location i sets the +// cluster-local interrupt +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_SET_REG_OFFSET 0x30 +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_SET_CL_CLINT_SET_MASK 0xffffffff +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_SET_CL_CLINT_SET_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_SET_CL_CLINT_SET_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_SET_CL_CLINT_SET_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_SET_CL_CLINT_SET_OFFSET}) + +// Clear bits in the cluster-local CLINT. Writing a 1 at location i clears +// the cluster-local interrupt +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_CLEAR_REG_OFFSET 0x38 +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_CLEAR_CL_CLINT_CLEAR_MASK 0xffffffff +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_CLEAR_CL_CLINT_CLEAR_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_CLEAR_CL_CLINT_CLEAR_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_CLEAR_CL_CLINT_CLEAR_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_CLEAR_CL_CLINT_CLEAR_OFFSET}) + +// Hardware barrier register. Loads to this register will block until all +// cores have +#define SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_REG_OFFSET 0x40 +#define SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_HW_BARRIER_MASK 0xffffffff +#define SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_HW_BARRIER_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_HW_BARRIER_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_HW_BARRIER_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_HW_BARRIER_OFFSET}) + +// Controls prefetching of the instruction cache. +#define SPATZ_CLUSTER_PERIPHERAL_ICACHE_PREFETCH_ENABLE_REG_OFFSET 0x48 +#define SPATZ_CLUSTER_PERIPHERAL_ICACHE_PREFETCH_ENABLE_ICACHE_PREFETCH_ENABLE_BIT \ + 0 + +// Sets the status of the Spatz cluster. +#define SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_REG_OFFSET 0x50 +#define SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_SPATZ_CLUSTER_PROBE_BIT 0 + +// Controls the cluster boot process. +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_REG_OFFSET 0x58 +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_MASK \ + 0xffffffff +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_MASK, \ + .index = \ + SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_OFFSET}) + +// End of computation and exit status register +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_REG_OFFSET 0x60 +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_EOC_EXIT_MASK 0xffffffff +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_EOC_EXIT_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_EOC_EXIT_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_EOC_EXIT_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_EOC_EXIT_OFFSET}) + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _SPATZ_CLUSTER_PERIPHERAL_REG_DEFS_ + // End generated register defines for spatz_cluster_peripheral \ No newline at end of file diff --git a/sw/hero/shared/platform/generated/tlb.h b/sw/hero/shared/platform/generated/tlb.h new file mode 100644 index 00000000..16be33ee --- /dev/null +++ b/sw/hero/shared/platform/generated/tlb.h @@ -0,0 +1,54 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "occamy_addrmap.h" +#include "snitch_quad_peripheral.h" + +static const uintptr_t QUAD_STRIDE = 0x10000; +static const uintptr_t TLB_ENTRY_STRIDE = 0x20; + +// TODO: create and use TLB entry struct type + +static inline void write_tlb_entry(uint32_t wide, uint32_t quad_idx, + uint32_t entry_idx, uint64_t page_start, + uint64_t page_end, uint64_t page_out, + uint32_t read_only, uint32_t valid) { + // Compute entry base address + volatile uint64_t* entry_base; + if (wide) { // wide case + const uintptr_t table_offs = + OCCAMY_QUADRANT_S1_TLB_WIDE_ENTRY_0_PAGEIN_FIRST_LOW_REG_OFFSET; + entry_base = (void*)(QUAD_0_CFG_BASE_ADDR + quad_idx * QUAD_STRIDE + + table_offs + entry_idx * TLB_ENTRY_STRIDE); + } else { // narrow case + const uintptr_t table_offs = + OCCAMY_QUADRANT_S1_TLB_NARROW_ENTRY_0_PAGEIN_FIRST_LOW_REG_OFFSET; + entry_base = (void*)(QUAD_0_CFG_BASE_ADDR + quad_idx * QUAD_STRIDE + + table_offs + entry_idx * TLB_ENTRY_STRIDE); + } + // Write entry + entry_base[0] = page_start; + entry_base[1] = page_end; + entry_base[2] = page_out; + entry_base[3] = ((read_only & 1) << 1) | (valid & 1); +} + +static inline void enable_tlb(uint32_t wide, uint32_t quad_idx, + uint32_t enable) { + // Compute entry base address + volatile uint32_t* enable_reg; + if (wide) { // wide case + enable_reg = (void*)(QUAD_0_CFG_BASE_ADDR + quad_idx * QUAD_STRIDE + + OCCAMY_QUADRANT_S1_TLB_WIDE_ENABLE_REG_OFFSET); + } else { // narrow case + enable_reg = (void*)(QUAD_0_CFG_BASE_ADDR + quad_idx * QUAD_STRIDE + + OCCAMY_QUADRANT_S1_TLB_NARROW_ENABLE_REG_OFFSET); + } + // Write entry + *enable_reg = enable; +} diff --git a/sw/hero/shared/platform/generated/uart.h b/sw/hero/shared/platform/generated/uart.h new file mode 100644 index 00000000..d81dcacb --- /dev/null +++ b/sw/hero/shared/platform/generated/uart.h @@ -0,0 +1,107 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "occamy_base_addr.h" + +#define UART_RBR UART_BASE_ADDR + 0 +#define UART_THR UART_BASE_ADDR + 0 +#define UART_INTERRUPT_ENABLE UART_BASE_ADDR + 4 +#define UART_INTERRUPT_IDENT UART_BASE_ADDR + 8 +#define UART_FIFO_CONTROL UART_BASE_ADDR + 8 +#define UART_LINE_CONTROL UART_BASE_ADDR + 12 +#define UART_MODEM_CONTROL UART_BASE_ADDR + 16 +#define UART_LINE_STATUS UART_BASE_ADDR + 20 +#define UART_MODEM_STATUS UART_BASE_ADDR + 24 +#define UART_DLAB_LSB UART_BASE_ADDR + 0 +#define UART_DLAB_MSB UART_BASE_ADDR + 4 + +inline static void write_reg_u8(uintptr_t addr, uint8_t value) { + volatile uint8_t *loc_addr = (volatile uint8_t *)addr; + *loc_addr = value; +} + +inline static uint8_t read_reg_u8(uintptr_t addr) { + return *(volatile uint8_t *)addr; +} + +inline static int is_transmit_empty() { + return read_reg_u8(UART_LINE_STATUS) & 0x20; +} + +inline static int is_transmit_done() { + return read_reg_u8(UART_LINE_STATUS) & 0x40; +} + +inline static void write_serial(char a) { + while (is_transmit_empty() == 0) { + }; + + write_reg_u8(UART_THR, a); +} + +inline static void init_uart(uint32_t freq, uint32_t baud) { + uint32_t divisor = freq / (baud << 4); + + write_reg_u8(UART_INTERRUPT_ENABLE, 0x00); // Disable all interrupts + write_reg_u8(UART_LINE_CONTROL, + 0x80); // Enable DLAB (set baud rate divisor) + write_reg_u8(UART_DLAB_LSB, divisor); // divisor (lo byte) + write_reg_u8(UART_DLAB_MSB, (divisor >> 8) & 0xFF); // divisor (hi byte) + write_reg_u8(UART_LINE_CONTROL, 0x03); // 8 bits, no parity, one stop bit + write_reg_u8(UART_FIFO_CONTROL, + 0xC7); // Enable FIFO, clear them, with 14-byte threshold + write_reg_u8(UART_MODEM_CONTROL, 0x20); // Autoflow mode +} + +inline static void print_uart(const char *str) { + const char *cur = &str[0]; + while (*cur != '\0') { + write_serial((uint8_t)*cur); + ++cur; + } + while (!is_transmit_done()) + ; +} + +static uint8_t bin_to_hex_table[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + +inline static void bin_to_hex(uint8_t inp, uint8_t res[2]) { + res[1] = bin_to_hex_table[inp & 0xf]; + res[0] = bin_to_hex_table[(inp >> 4) & 0xf]; + return; +} + +inline static void print_uart_int(uint32_t addr) { + int i; + for (i = 3; i > -1; i--) { + uint8_t cur = (addr >> (i * 8)) & 0xff; + uint8_t hex[2]; + bin_to_hex(cur, hex); + write_serial(hex[0]); + write_serial(hex[1]); + } +} + +inline static void print_uart_addr(uint64_t addr) { + int i; + for (i = 7; i > -1; i--) { + uint8_t cur = (addr >> (i * 8)) & 0xff; + uint8_t hex[2]; + bin_to_hex(cur, hex); + write_serial(hex[0]); + write_serial(hex[1]); + } +} + +inline static void print_uart_byte(uint8_t byte) { + uint8_t hex[2]; + bin_to_hex(byte, hex); + write_serial(hex[0]); + write_serial(hex[1]); +} diff --git a/sw/hero/shared/platform/heterogeneous_runtime.h b/sw/hero/shared/platform/heterogeneous_runtime.h new file mode 100644 index 00000000..77cfaeeb --- /dev/null +++ b/sw/hero/shared/platform/heterogeneous_runtime.h @@ -0,0 +1,54 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "occamy.h" +#include "occamy_memory_map.h" + +// *Note*: to ensure that the usr_data field is at the same offset +// in the host and device (resp. 64b and 32b architectures) +// usr_data is an explicitly-sized integer field instead of a pointer +typedef struct { + volatile uint32_t lock; + volatile uint32_t usr_data_ptr; +} comm_buffer_t; + +/**************/ +/* Interrupts */ +/**************/ + +inline void set_host_sw_interrupt() { *clint_msip_ptr(0) = 1; } + +inline void clear_host_sw_interrupt_unsafe() { *clint_msip_ptr(0) = 0; } + +inline void wait_host_sw_interrupt_clear() { + while (*clint_msip_ptr(0)) + ; +} + +inline void clear_host_sw_interrupt() { + clear_host_sw_interrupt_unsafe(); + wait_host_sw_interrupt_clear(); +} + +/**************************/ +/* Quadrant configuration */ +/**************************/ + +// Configure RO cache address range +inline void configure_read_only_cache_addr_rule(uint32_t quad_idx, + uint32_t rule_idx, + uint64_t start_addr, + uint64_t end_addr) { + volatile uint64_t* rule_ptr = + quad_cfg_ro_cache_addr_rule_ptr(quad_idx, rule_idx); + *(rule_ptr) = start_addr; + *(rule_ptr + 1) = end_addr; +} + +// Enable RO cache +inline void enable_read_only_cache(uint32_t quad_idx) { + *(quad_cfg_ro_cache_enable_ptr(quad_idx)) = 1; +} diff --git a/sw/hero/shared/platform/occamy.h b/sw/hero/shared/platform/occamy.h new file mode 100644 index 00000000..7ca437c8 --- /dev/null +++ b/sw/hero/shared/platform/occamy.h @@ -0,0 +1,10 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "occamy_cfg.h" +#include "occamy_memory_map.h" + +#define N_CLUSTERS (N_QUADS * N_CLUSTERS_PER_QUAD) +#define N_SNITCHES (N_CLUSTERS * N_CORES_PER_CLUSTER) +#define N_HARTS (N_SNITCHES + 1) diff --git a/sw/hero/shared/platform/occamy_base_addr.h b/sw/hero/shared/platform/occamy_base_addr.h new file mode 100644 index 00000000..4a85d1ff --- /dev/null +++ b/sw/hero/shared/platform/occamy_base_addr.h @@ -0,0 +1,11 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Generated by occamygen.py + +#pragma once + +#define UART_BASE_ADDR 0x03002000 +#define QUADRANT_0_CLUSTER_0_TCDM_BASE_ADDR 0x51000000 +#define QUADRANT_0_CLUSTER_0_PERIPH_BASE_ADDR 0x51010000 diff --git a/sw/hero/shared/platform/occamy_memory_map.h b/sw/hero/shared/platform/occamy_memory_map.h new file mode 100644 index 00000000..bfcf6c25 --- /dev/null +++ b/sw/hero/shared/platform/occamy_memory_map.h @@ -0,0 +1,135 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "occamy_base_addr.h" +#include "occamy_cfg.h" + +// Auto-generated headers +#include "spatz_cluster_peripheral.h" + +//=============================================================== +// Reggen +//=============================================================== + +//=============================================================== +// Base addresses +//=============================================================== + +#define cluster_clint_set_base \ + (QUADRANT_0_CLUSTER_0_PERIPH_BASE_ADDR + \ + SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_SET_REG_OFFSET) + +#define cluster_clint_clr_base \ + (QUADRANT_0_CLUSTER_0_PERIPH_BASE_ADDR + \ + SPATZ_CLUSTER_PERIPHERAL_CL_CLINT_CLEAR_REG_OFFSET) + +#define cluster_hw_barrier_base \ + (QUADRANT_0_CLUSTER_0_PERIPH_BASE_ADDR + \ + SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_REG_OFFSET) + +//=============================================================== +// Replicated address spaces +//=============================================================== + +#define cluster_offset 0x40000 + +inline uintptr_t translate_address(uintptr_t address, uint32_t instance, + uint32_t offset) { + return address + instance * offset; +} + +inline uintptr_t translate_cluster_address(uintptr_t address, + uint32_t cluster_idx) { + return translate_address(address, cluster_idx, cluster_offset); +} + +//=============================================================== +// Derived addresses +//=============================================================== + +inline uintptr_t cluster_clint_clr_addr(uint32_t cluster_idx) { + return translate_cluster_address(cluster_clint_clr_base, cluster_idx); +} + +inline uintptr_t cluster_clint_set_addr(uint32_t cluster_idx) { + return translate_cluster_address(cluster_clint_set_base, cluster_idx); +} + +inline uintptr_t cluster_tcdm_start_addr(uint32_t cluster_idx) { + return translate_cluster_address(QUADRANT_0_CLUSTER_0_TCDM_BASE_ADDR, + cluster_idx); +} + +inline uintptr_t cluster_tcdm_end_addr(uint32_t cluster_idx) { + return translate_cluster_address(QUADRANT_0_CLUSTER_0_PERIPH_BASE_ADDR, + cluster_idx); +} + +inline uintptr_t cluster_hw_barrier_addr(uint32_t cluster_idx) { + return translate_cluster_address(cluster_hw_barrier_base, cluster_idx); +} + +//=============================================================== +// Pointers +//=============================================================== + +// Don't mark as volatile pointer to favour compiler optimizations. +// Despite in our multicore scenario this value could change unexpectedly +// we can make some assumptions which prevent this. +// Namely, we assume that whenever this register is written to, cores +// synchronize (and execute a memory fence) before reading it. This is usually +// the case, as this register would only be written by CVA6 during +// initialization and never changed. +inline uint32_t* soc_ctrl_scratch_ptr(uint32_t reg_idx) { + return NULL; +} + +inline volatile uint32_t* cluster_clint_clr_ptr(uint32_t cluster_idx) { + return (volatile uint32_t*)cluster_clint_clr_addr(cluster_idx); +} + +inline volatile uint32_t* cluster_clint_set_ptr(uint32_t cluster_idx) { + return (volatile uint32_t*)cluster_clint_set_addr(cluster_idx); +} + +inline volatile uint32_t* cluster_hw_barrier_ptr(uint32_t cluster_idx) { + return (volatile uint32_t*)cluster_hw_barrier_addr(cluster_idx); +} + +inline volatile uint32_t* cluster_zero_memory_ptr(uint32_t cluster_idx) { + return NULL; +} + +inline volatile uint32_t* clint_msip_ptr(uint32_t hartid) { + return NULL; +} + +inline volatile uint32_t* quad_cfg_reset_n_ptr(uint32_t quad_idx) { + return NULL; +} + +inline volatile uint32_t* quad_cfg_clk_ena_ptr(uint32_t quad_idx) { + return NULL; +} + +inline volatile uint32_t* quad_cfg_isolate_ptr(uint32_t quad_idx) { + return NULL; +} + +inline volatile uint32_t* quad_cfg_isolated_ptr(uint32_t quad_idx) { + return NULL; +} + +inline volatile uint32_t* quad_cfg_ro_cache_enable_ptr(uint32_t quad_idx) { + return NULL; +} + +inline volatile uint64_t* quad_cfg_ro_cache_addr_rule_ptr(uint32_t quad_idx, + uint32_t rule_idx) { + return NULL; +} diff --git a/vendor/snitch/sw/deps/printf/printf.c b/vendor/snitch/sw/deps/printf/printf.c index 8a700add..60003530 100644 --- a/vendor/snitch/sw/deps/printf/printf.c +++ b/vendor/snitch/sw/deps/printf/printf.c @@ -132,6 +132,7 @@ typedef struct { // internal buffer output static inline void _out_buffer(char character, void* buffer, size_t idx, size_t maxlen) { + _putchar(68); //D if (idx < maxlen) { ((char*)buffer)[idx] = character; } @@ -149,6 +150,7 @@ static inline void _out_null(char character, void* buffer, size_t idx, size_t ma static inline void _out_char(char character, void* buffer, size_t idx, size_t maxlen) { (void)buffer; (void)idx; (void)maxlen; + _putchar(69); //E if (character) { _putchar(character); } @@ -572,6 +574,16 @@ static size_t _etoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, d #endif // PRINTF_SUPPORT_EXPONENTIAL #endif // PRINTF_SUPPORT_FLOAT +static inline uint32_t readw(const uintptr_t addr) +{ + uint32_t val; + + asm volatile("lw %0, 0(%1)" + : "=r"(val) + : "r"((const volatile uint32_t *)addr) + : "memory"); + return val; +} // internal vsnprintf static int _vsnprintf(out_fct_type out, char* buffer, const size_t maxlen, const char* format, va_list va) @@ -843,12 +855,17 @@ static int _vsnprintf(out_fct_type out, char* buffer, const size_t maxlen, const break; default : + _putchar(67); //C + _putchar(71); //G out(*format, buffer, idx++, maxlen); format++; break; } } + _putchar(67); //C + _putchar(80); //P + // termination out((char)0, buffer, idx < maxlen ? idx : maxlen - 1U, maxlen); diff --git a/vendor/snitch/sw/snRuntime/base.ld b/vendor/snitch/sw/snRuntime/base.ld index d0979b71..1d213ed0 100644 --- a/vendor/snitch/sw/snRuntime/base.ld +++ b/vendor/snitch/sw/snRuntime/base.ld @@ -6,13 +6,17 @@ OUTPUT_ARCH( "riscv" ) ENTRY(_start) /* Memory section should be provided in a separate, platform-specific */ -/* file. It should define at least the L1 and L3 memory blocks. */ -INCLUDE memory.ld +/* file. It should define at least the L1 and L2 memory blocks. */ +MEMORY +{ + L2 : ORIGIN = 0x78000000, LENGTH = 0x800000 + LOCAL : ORIGIN = 0x51000000, LENGTH = 0x20000 +} SECTIONS { - /* Program code goes into L3 */ + /* Program code goes into L2 */ .text : { . = ALIGN(4); @@ -24,19 +28,19 @@ SECTIONS *(.text) . = ALIGN(4); _etext = .; - } >L3 + } >L2 - /* By default, constant data goes into L3, right after code section */ + /* By default, constant data goes into L2, right after code section */ .rodata : { . = ALIGN(4); *(.rodata) *(.rodata*) . = ALIGN(4); - } >L3 + } >L2 /* HTIF section for FESVR */ - .htif : { } >L3 + .htif : { } >L2 /* Thread Local Storage sections */ .tdata : @@ -44,17 +48,14 @@ SECTIONS __tdata_start = .; *(.tdata .tdata.* .gnu.linkonce.td.*) __tdata_end = .; - } >L3 + } >L2 .tbss : { __tbss_start = .; *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) __tbss_end = .; - } >L3 - /* add a section after .tbss to put the __tbss_end symbol into for - the LLD linker */ - .tbssend : { __tbss_end2 = .; } + } >L2 /* Cluster Local Storage sections */ .cdata : @@ -62,13 +63,13 @@ SECTIONS __cdata_start = .; *(.cdata .cdata.*) __cdata_end = .; - } >L3 + } >LOCAL .cbss : { __cbss_start = .; *(.cbss .cbss.*) __cbss_end = .; - } >L3 + } >LOCAL /* used by the startup to initialize data */ _sidata = LOADADDR(.data); @@ -80,15 +81,15 @@ SECTIONS __global_pointer$ = . + 0x7f0; *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*) *(.sdata .sdata.* .gnu.linkonce.s.*) - } >L3 + } >L2 - /* Initialized data sections goes into L3 */ + /* Initialized data sections goes into L2 */ .data : { __DATA_BEGIN__ = .; *(.data .data.* .gnu.linkonce.d.*) SORT(CONSTRUCTORS) - } >L3 + } >L2 _edata = .; PROVIDE (edata = .); /* small bss section */ @@ -99,7 +100,7 @@ SECTIONS *(.dynsbss) *(.sbss .sbss.* .gnu.linkonce.sb.*) *(.scommon) - } >L3 + } >L2 /* Uninitialized data section */ .bss : @@ -111,7 +112,7 @@ SECTIONS _end. Align after .bss to ensure correct alignment even if the .bss section disappears because there are no input sections. */ . = ALIGN(. != 0 ? 32 / 8 : 1); - } >L3 + } >L2 . = ALIGN(32 / 8); . = SEGMENT_START("ldata-segment", .); . = ALIGN(32 / 8); @@ -119,10 +120,11 @@ SECTIONS __bss_end = .; _end = .; PROVIDE (end = .); - /* Uninitialized data section in L3 */ + /* Uninitialized data section in L2 */ .dram : { *(.dram) _edram = .; - } >L3 + } >L2 + }