Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CONVLVE ETH Cluster ITA Test #14

Draft
wants to merge 3 commits into
base: devel
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file removed tests/chimera-convolve/.gitkeep
Empty file.
12 changes: 12 additions & 0 deletions tests/chimera-convolve/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Moritz Scherer <[email protected]>
# Philip Wiese <[email protected]>

# Add test for host core
# add_subdirectory(host)

# Add test for snitchCluster
add_subdirectory(snitchCluster)
8 changes: 8 additions & 0 deletions tests/chimera-convolve/snitchCluster/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Moritz Scherer <[email protected]>
# Philip Wiese <[email protected]>

add_subdirectory(simpleITA)
49 changes: 49 additions & 0 deletions tests/chimera-convolve/snitchCluster/simpleITA/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Moritz Scherer <[email protected]>
# Philip Wiese <[email protected]>

set(TEST_NAME test_snitchCluster_simpleITA)

######## HOST Code #############################################################
file(GLOB_RECURSE TEST_HOST_SRCS
"src_host/*.c"
)

add_library(${TEST_NAME}_host OBJECT ${TEST_HOST_SRCS})
target_include_directories(${TEST_NAME}_host PUBLIC include)

# WIESEP: Set the correct ISA and ABI for the host
target_compile_options(${TEST_NAME}_host
PRIVATE
-O2
)
target_link_libraries(${TEST_NAME}_host PUBLIC runtime_host hal_host)

######## CLUSTER Code ##########################################################
file(GLOB_RECURSE TEST_SNITCH_SRCS
"src_cluster/*.c"
)

add_library(${TEST_NAME}_cluster OBJECT ${TEST_SNITCH_SRCS})
target_include_directories(${TEST_NAME}_cluster PUBLIC include)

# WIESEP: Set the correct ISA and ABI for the cluster
# WIESEP: Use -O1 optimization level for the cluster code otherwise the code currently breaks.
target_compile_options(${TEST_NAME}_cluster
PRIVATE
-O1
)

target_link_libraries(${TEST_NAME}_cluster PUBLIC runtime_cluster_snitch)

######## TEST Executable #######################################################
add_chimera_test(
${TEST_NAME}
)

# WIESEP: Link the host and cluster code to the test executable (chimera-sdk is already linked)
target_link_libraries(${TEST_NAME} PUBLIC ${TEST_NAME}_host)
target_link_libraries(${TEST_NAME} PUBLIC ${TEST_NAME}_cluster)
98 changes: 98 additions & 0 deletions tests/chimera-convolve/snitchCluster/simpleITA/include/ita.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Philip Wiese <[email protected]>

#ifndef _ITA_INCLUDE_GUARD_
#define _ITA_INCLUDE_GUARD_

#include "soc.h"
#include "cluster_4.h"

#include <stdint.h>

// WIESEP: This part should go into a cluster specific SDK

// ITA Accelerator
typedef struct __attribute__((__packed__)) {
int value : 24;
} ita_int24_t;

typedef enum {
ATTENTION = 0x0, // 00 in binary
FEEDFORWARD = 0x1, // 01 in binary
LINEAR = 0x2 // 10 in binary
} LayerType;

typedef enum {
IDENTITY = 0x0, // 00 in binary
GELU = 0x1, // 01 in binary
RELU = 0x2 // 10 in binary
} ActivationType;

#define ITA_TILES(s, e, p) (s | (e << 4) | (p << 8))
#define ITA_LAYER(layer, activation) ((layer) | ((activation) << 2))
#define ITA_FLAGS(weight_preload, weight_nextload, bias_disable, bias_direction, output_disable) \
((weight_preload) | ((weight_nextload) << 1) | ((bias_disable) << 2) | \
((bias_direction) << 3) | ((output_disable) << 4))

static inline void __attribute((always_inline))
ita_write_regs(uint32_t input_addr, uint32_t weight_addr, uint32_t weight_next_addr,
uint32_t bias_addr, uint32_t output_addr, uint32_t tiles, uint32_t layer,
uint32_t flags) {
// Program ITA
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x20) = input_addr - CLUSTER_4_BASE;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x24) = weight_addr - CLUSTER_4_BASE;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x28) = weight_next_addr - CLUSTER_4_BASE;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x2C) = bias_addr - CLUSTER_4_BASE;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x30) = output_addr - CLUSTER_4_BASE;
// unused sequence length
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x38) = tiles;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x54) = layer; // ctrl engine
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x58) = flags; // ctrl stream
}

static inline void __attribute((always_inline))
ita_write_rqs_params(uint32_t eps1, uint32_t eps2, uint32_t right_shift1, uint32_t right_shift2,
uint32_t add1, uint32_t add2) {
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x3C) = eps1;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x40) = eps2;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x44) = right_shift1;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x48) = right_shift2;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x4C) = add1;
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x50) = add2;
}

static inline void __attribute((always_inline)) ita_soft_clear() {
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x14) = 0;
for (volatile int i = 0; i < 10; i++)
;
}

static inline void __attribute((always_inline)) ita_soft_clear_keep_regs() {
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x14) = 1;
for (volatile uint32_t i = 0; i < 10; i++)
;
}

static inline void __attribute((always_inline)) ita_acquire_job() {
while (*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x04) < 1)
;
}

static inline void __attribute((always_inline)) ita_wait_job() {
while (*(volatile uint32_t *)(CLUSTER_4_HWPE_BUSY_ADDR) != 0)
;
}

static inline void __attribute((always_inline)) ita_trigger() {
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x00) = 0;
}

static inline void __attribute((always_inline)) ita_commit() {
*(volatile uint32_t *)(CLUSTER_4_HWPE_ITA_BASE_ADDR + 0x00) = 1;
}

#endif //_ITA_INCLUDE_GUARD_

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Philip Wiese <[email protected]>

#ifndef _TEST_CLUSTER_INCLUDE_GUARD_
#define _TEST_CLUSTER_INCLUDE_GUARD_

#include <stdint.h>

void clusterInterruptHandler();

int32_t testReturn(void *args);

#endif //_TEST_CLUSTER_INCLUDE_GUARD_
16 changes: 16 additions & 0 deletions tests/chimera-convolve/snitchCluster/simpleITA/include/test_host.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Philip Wiese <[email protected]>

#ifndef _TEST_HOST_INCLUDE_GUARD_
#define _TEST_HOST_INCLUDE_GUARD_

#define TESTVAL 0x050CCE55

typedef struct {
int value;
} offloadArgs_t;

#endif //_TEST_HOST_INCLUDE_GUARD_
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Moritz Scherer <[email protected]>

#include "test_cluster.h"
#include "test_host.h"
#include "ita.h"

#include "soc.h"
#include "cluster_4.h"

static uint32_t *clintPointer = (uint32_t *)CLINT_CTRL_BASE;

/**
* @brief Interrupt handler for the cluster, which clears the interrupt flag for the current hart.
*
* @warning Stack, thread and global pointer might not yet be set up!
*/
__attribute__((naked)) void clusterInterruptHandler() {
asm volatile(
// Load global pointer
".option push\n"
".option norelax\n" // Disable relaxation to ensure `la` behaves as expected
"la gp, __global_pointer$\n" // Load address of global pointer
".option pop\n"

// Set thread pointer (tp) to zero
"mv tp, zero\n"

// Load mhartid CSR into t0
"csrr t0, mhartid\n"
// Load the base address of clintPointer into t1
"lw t1, %0\n"
// Calculate the interrupt target address: t1 = t1 + (t0 * 4)
"slli t0, t0, 2\n"
"add t1, t1, t0\n"
// Store 0 to the interrupt target address
"sw zero, 0(t1)\n"
"ret"
:
: "m"(clintPointer) // Pass clintPointer as input
: "t0", "t1" // Declare clobbered registers
);
}

const uint32_t requant_eps_mult[3][2] = {
{1751606885, 27241}, // Packed values [101 102 103 104 105 106]
{1920036975, 29811}, // Packed values [111 112 113 114 115 116]
{2088467065, 32381} // Packed values [121 122 123 124 125 126]
};
const uint32_t requant_right_shift[3][2] = {
{3604337875, 55511}, // Packed values [211 212 213 214 215 216]
{3772767965, 58081}, // Packed values [221 222 223 224 225 226]
{3941198055, 60651} // Packed values [231 232 233 234 235 236]
};
const uint32_t requant_add[3][2] = {
{67305985, 1541}, // Packed values [1 2 3 4 5 6]
{235736075, 4111}, // Packed values [11 12 13 14 15 16]
{404166165, 6681} // Packed values [21 22 23 24 25 26]
};

/**
* @brief Main function of the cluster test.
*
* @return int Return 0 if the test was successful, -1 otherwise.
*/
int32_t testReturn(void *args) {

// Cast to the correct struct
offloadArgs_t *argsStruct = (offloadArgs_t *)args;

// Check if the value is correct
if (argsStruct->value != 0xdeadbeef) {
return -1;
}

ita_soft_clear();
ita_acquire_job();

// Programm the first context
ita_write_rqs_params(requant_eps_mult[0][0], requant_eps_mult[0][1], requant_right_shift[0][0],
requant_right_shift[0][1], requant_add[0][0], requant_add[0][1]);
ita_write_regs((uint32_t)CLUSTER_4_TCDM_START_ADDR, CLUSTER_4_TCDM_START_ADDR + 0x1000,
CLUSTER_4_TCDM_START_ADDR + 0x2000, CLUSTER_4_TCDM_START_ADDR + 0x3000,
CLUSTER_4_TCDM_START_ADDR + 0x4000, (uint32_t)ITA_TILES(1, 1, 1),
(uint32_t)ITA_LAYER(ATTENTION, IDENTITY), (uint32_t)ITA_FLAGS(1, 1, 0, 0, 0));

// Commit the first context into the queue
ita_commit();

// Program second context
ita_write_rqs_params(requant_eps_mult[1][0], requant_eps_mult[1][1], requant_right_shift[1][0],
requant_right_shift[1][1], requant_add[1][0], requant_add[1][1]);
ita_write_regs(
(uint32_t)CLUSTER_4_TCDM_START_ADDR + 0x100, CLUSTER_4_TCDM_START_ADDR + 0x1000 + 0x100,
CLUSTER_4_TCDM_START_ADDR + 0x2000 + 0x100, CLUSTER_4_TCDM_START_ADDR + 0x3000 + 0x100,
CLUSTER_4_TCDM_START_ADDR + 0x4000 + 0x100, (uint32_t)ITA_TILES(1, 1, 1),
(uint32_t)ITA_LAYER(ATTENTION, IDENTITY), (uint32_t)ITA_FLAGS(0, 1, 0, 0, 0));

// Commit the second context into the queue and start the execution
ita_trigger();

// Make sure accelerator can get new job into the queue
ita_acquire_job();

// Programm the third context
ita_write_rqs_params(requant_eps_mult[2][0], requant_eps_mult[2][1], requant_right_shift[2][0],
requant_right_shift[2][1], requant_add[2][0], requant_add[2][1]);
ita_write_regs(
(uint32_t)CLUSTER_4_TCDM_START_ADDR + 0x200, CLUSTER_4_TCDM_START_ADDR + 0x1000 + 0x200,
CLUSTER_4_TCDM_START_ADDR + 0x2000 + 0x200, CLUSTER_4_TCDM_START_ADDR + 0x3000 + 0x200,
CLUSTER_4_TCDM_START_ADDR + 0x4000 + 0x200, (uint32_t)ITA_TILES(1, 1, 1),
(uint32_t)ITA_LAYER(ATTENTION, IDENTITY), (uint32_t)ITA_FLAGS(0, 0, 0, 0, 0));

// Commit the third context into the queue and start the execution if not already running
ita_trigger();

// Wait for final result
ita_wait_job();

return TESTVAL;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Philip Wiese <[email protected]>

#include "test_cluster.h"
#include "test_host.h"

#include "soc.h"
#include "driver.h"
#include "cluster_4.h"

#define STACK_ADDRESS (CLUSTER_4_TCDM_END_ADDR - 8)

static uint32_t *clintPointer = (uint32_t *)CLINT_CTRL_BASE;

static offloadArgs_t offloadArgs = {.value = 0xdeadbeef};

int main() {
setup_snitchCluster_interruptHandler(clusterInterruptHandler);
offload_snitchCluster_core(testReturn, &offloadArgs, (void *)(STACK_ADDRESS), 4, 0);
uint32_t retVal = wait_snitchCluster_return(4);

return (retVal != (TESTVAL | 0x000000001));
}
Loading