Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cmake #7

Merged
merged 4 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
- isort check
- publication citation
- support 32bit scale
- cmake support
- const qualifier to `<acc>_dev_t` function arguments

### Changed

Expand All @@ -22,6 +24,10 @@
- `k_in_stride`, `w_in_stride`, `k_out_stride`, and `w_out_stride` from `ne16_nnx_dispatch_stride2x2`
- `mode` attribute from `ne16_quant_t` structure

### Fixed

- type conversion compiler warning

## [0.3.0] - 2024-01-14

### Added
Expand Down
52 changes: 52 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
cmake_minimum_required(VERSION 3.18)

project(pulp-nnx
VERSION 0.3.0
DESCRIPTION "Kernel library for PULP-based NN accelerators."
LANGUAGES C)

add_library(pulp-nnx STATIC)

target_sources(pulp-nnx PRIVATE util/pulp_nnx_util.c util/hwpe.c)
target_include_directories(pulp-nnx PUBLIC inc util)

option(USE_NE16 "Use the NE16 accelerator.")
option(USE_NEUREKA "Use the N-EUREKA accelerator.")

if (NOT ${USE_NE16} AND NOT ${USE_NEUREKA})
message(FATAL_ERROR "[PULP-NNX] No accelerator in use. Please set an appropriate USE_<acc> option.")
endif()

if (${USE_NE16})
message(STATUS "[PULP-NNX] Using the NE16 accelerator.")
target_sources(pulp-nnx
PRIVATE
ne16/bsp/ne16_pulp_bsp.c
ne16/hal/ne16.c
ne16/hal/ne16_task.c
src/pulp_nnx_ne16.c
)
target_include_directories(pulp-nnx
PUBLIC
ne16/bsp
ne16/hal
ne16/gvsoc
)
endif()

if (${USE_NEUREKA})
message(STATUS "[PULP-NNX] Using the N-EUREKA accelerator.")
target_sources(pulp-nnx
PRIVATE
neureka/bsp/neureka_siracusa_bsp.c
neureka/hal/neureka.c
neureka/hal/neureka_task.c
src/pulp_nnx_neureka.c
)
target_include_directories(pulp-nnx
PUBLIC
neureka/bsp
neureka/hal
neureka/gvsoc
)
endif()
16 changes: 8 additions & 8 deletions inc/pulp_nnx_ne16.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,40 @@

/* PULP-NNX interface */

void ne16_nnx_init(ne16_dev_t *dev, ne16_pulp_conf_t *conf);
void ne16_nnx_term(ne16_dev_t *dev);
void ne16_nnx_init(const ne16_dev_t *dev, ne16_pulp_conf_t *conf);
void ne16_nnx_term(const ne16_dev_t *dev);

/** ne16_nnx_dispatch_check
*
* Check whether you can dispatch to the accelerator.
*/
int ne16_nnx_dispatch_check(ne16_dev_t *dev);
int ne16_nnx_dispatch_check(const ne16_dev_t *dev);

/** ne16_nnx_dispatch_wait
*
* Block until you can dispatch to the accelerator.
*/
void ne16_nnx_dispatch_wait(ne16_dev_t *dev);
void ne16_nnx_dispatch_wait(const ne16_dev_t *dev);

/** ne16_nnx_dispatch
*
* Dispatch a task to the accelerator.
* Fails with return code 1 if the task cannot be dispatched. Otherwise returns
* 0.
*/
int ne16_nnx_dispatch(ne16_dev_t *dev, ne16_task_t *task);
int ne16_nnx_dispatch(const ne16_dev_t *dev, ne16_task_t *task);

/** ne16_nnx_resolve_check
*
* Check whether the task has been resolved.
*/
int ne16_nnx_resolve_check(ne16_dev_t *dev, ne16_task_t *task);
int ne16_nnx_resolve_check(const ne16_dev_t *dev, ne16_task_t *task);

/** ne16_nnx_resolve_wait
*
* Block until you can resolve the task.
*/
void ne16_nnx_resolve_wait(ne16_dev_t *dev, ne16_task_t *task);
void ne16_nnx_resolve_wait(const ne16_dev_t *dev, ne16_task_t *task);

/* Additional helper functions */

Expand All @@ -69,7 +69,7 @@ void ne16_nnx_resolve_wait(ne16_dev_t *dev, ne16_task_t *task);
* tile the tile to the subtile's spatial dimensions (in this case 3x3 output).
* Works only if the k_out is divisible by 2.
*/
void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task,
void ne16_nnx_dispatch_stride2x2(const ne16_dev_t *dev, ne16_task_t *task,
const uint32_t w_in, const uint32_t k_in,
const uint32_t h_out, const uint32_t w_out,
const uint32_t k_out, const uint8_t h_ker,
Expand Down
14 changes: 7 additions & 7 deletions inc/pulp_nnx_neureka.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,37 +25,37 @@

/* PULP-NNX interface */

void neureka_nnx_init(neureka_dev_t *dev, neureka_siracusa_conf_t *conf);
void neureka_nnx_term(neureka_dev_t *dev);
void neureka_nnx_init(const neureka_dev_t *dev, neureka_siracusa_conf_t *conf);
void neureka_nnx_term(const neureka_dev_t *dev);

/** neureka_nnx_dispatch_check
*
* Check whether you can dispatch to the accelerator.
*/
int neureka_nnx_dispatch_check(neureka_dev_t *dev);
int neureka_nnx_dispatch_check(const neureka_dev_t *dev);

/** neureka_nnx_dispatch_wait
*
* Block until you can dispatch to the accelerator.
*/
void neureka_nnx_dispatch_wait(neureka_dev_t *dev);
void neureka_nnx_dispatch_wait(const neureka_dev_t *dev);

/** neureka_nnx_dispatch
*
* Dispatch a task to the accelerator.
* Fails with return code 1 if the task cannot be dispatched. Otherwise returns
* 0.
*/
int neureka_nnx_dispatch(neureka_dev_t *dev, neureka_task_t *task);
int neureka_nnx_dispatch(const neureka_dev_t *dev, neureka_task_t *task);

/** neureka_nnx_resolve_check
*
* Check whether the task has been resolved.
*/
int neureka_nnx_resolve_check(neureka_dev_t *dev, neureka_task_t *task);
int neureka_nnx_resolve_check(const neureka_dev_t *dev, neureka_task_t *task);

/** neureka_nnx_resolve_wait
*
* Block until you can resolve the task.
*/
void neureka_nnx_resolve_wait(neureka_dev_t *dev, neureka_task_t *task);
void neureka_nnx_resolve_wait(const neureka_dev_t *dev, neureka_task_t *task);
4 changes: 2 additions & 2 deletions ne16/gvsoc/ne16_gvsoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ typedef enum ne16_gvsoc_log_level_e {
NE16_GVSOC_LOG_LEVEL_ALL = 3
} ne16_gvsoc_log_level_e;

static void ne16_gvsoc_log_activate(ne16_dev_t *dev,
static void ne16_gvsoc_log_activate(const ne16_dev_t *dev,
ne16_gvsoc_log_level_e log_level,
ne16_gvsoc_log_format_e format) {
hwpe_task_reg_write(&dev->hwpe_dev, NE16_REG_GVSOC_LOG_LEVEL, log_level);
hwpe_task_reg_write(&dev->hwpe_dev, NE16_REG_GVSOC_LOG_FORMAT, format);
}

static void ne16_gvsoc_log_deactivate(ne16_dev_t *dev) {
static void ne16_gvsoc_log_deactivate(const ne16_dev_t *dev) {
hwpe_task_reg_write(&dev->hwpe_dev, NE16_REG_GVSOC_LOG_LEVEL,
NE16_GVSOC_LOG_LEVEL_CONFIG);
}
Expand Down
6 changes: 3 additions & 3 deletions ne16/hal/ne16.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@
#define NE16_STATUS_EMPTY (0x000)
#define NE16_STATUS_FULL (0x101)

inline int ne16_task_queue_tasks_in_flight(ne16_dev_t *dev) {
inline int ne16_task_queue_tasks_in_flight(const ne16_dev_t *dev) {
uint32_t status = hwpe_task_queue_status(&dev->hwpe_dev);
return (status & 0x1) + ((status >> 8) & 0x1);
}

inline int ne16_task_queue_empty(ne16_dev_t *dev) {
inline int ne16_task_queue_empty(const ne16_dev_t *dev) {
return hwpe_task_queue_status(&dev->hwpe_dev) == NE16_STATUS_EMPTY;
}

inline int ne16_task_queue_full(ne16_dev_t *dev) {
inline int ne16_task_queue_full(const ne16_dev_t *dev) {
return hwpe_task_queue_status(&dev->hwpe_dev) == NE16_STATUS_FULL;
}
6 changes: 3 additions & 3 deletions ne16/hal/ne16.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ typedef struct ne16_dev_t {
hwpe_dev_t hwpe_dev; /* Implements the HWPE device interface */
} ne16_dev_t;

int ne16_task_queue_tasks_in_flight(ne16_dev_t *dev);
int ne16_task_queue_empty(ne16_dev_t *dev);
int ne16_task_queue_full(ne16_dev_t *dev);
int ne16_task_queue_tasks_in_flight(const ne16_dev_t *dev);
int ne16_task_queue_empty(const ne16_dev_t *dev);
int ne16_task_queue_full(const ne16_dev_t *dev);

#endif // __NE16_H__
4 changes: 2 additions & 2 deletions neureka/gvsoc/neureka_gvsoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ typedef enum neureka_gvsoc_log_level_e {
NEUREKA_GVSOC_LOG_LEVEL_ALL = 3
} neureka_gvsoc_log_level_e;

static void neureka_gvsoc_log_activate(neureka_dev_t *dev,
static void neureka_gvsoc_log_activate(const neureka_dev_t *dev,
neureka_gvsoc_log_level_e log_level,
neureka_gvsoc_log_format_e format) {
hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_REG_GVSOC_LOG_LEVEL, log_level);
hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_REG_GVSOC_LOG_FORMAT, format);
}

static void neureka_gvsoc_log_deactivate(neureka_dev_t *dev) {
static void neureka_gvsoc_log_deactivate(const neureka_dev_t *dev) {
hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_REG_GVSOC_LOG_LEVEL,
NEUREKA_GVSOC_LOG_LEVEL_JOB_START_END);
}
Expand Down
6 changes: 3 additions & 3 deletions neureka/hal/neureka.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@
#define NEUREKA_STATUS_EMPTY (0x000)
#define NEUREKA_STATUS_FULL (0x101)

inline int neureka_task_queue_tasks_in_flight(neureka_dev_t *dev) {
inline int neureka_task_queue_tasks_in_flight(const neureka_dev_t *dev) {
uint32_t status = hwpe_task_queue_status(&dev->hwpe_dev);
return (status & 0x1) + ((status >> 8) & 0x1);
}

inline int neureka_task_queue_empty(neureka_dev_t *dev) {
inline int neureka_task_queue_empty(const neureka_dev_t *dev) {
return hwpe_task_queue_status(&dev->hwpe_dev) == NEUREKA_STATUS_EMPTY;
}

inline int neureka_task_queue_full(neureka_dev_t *dev) {
inline int neureka_task_queue_full(const neureka_dev_t *dev) {
return hwpe_task_queue_status(&dev->hwpe_dev) == NEUREKA_STATUS_FULL;
}
6 changes: 3 additions & 3 deletions neureka/hal/neureka.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ typedef struct neureka_dev_t {
hwpe_dev_t hwpe_dev; /* Implements the HWPE device interface */
} neureka_dev_t;

int neureka_task_queue_tasks_in_flight(neureka_dev_t *dev);
int neureka_task_queue_empty(neureka_dev_t *dev);
int neureka_task_queue_full(neureka_dev_t *dev);
int neureka_task_queue_tasks_in_flight(const neureka_dev_t *dev);
int neureka_task_queue_empty(const neureka_dev_t *dev);
int neureka_task_queue_full(const neureka_dev_t *dev);

#endif // __NEUREKA_H__
21 changes: 10 additions & 11 deletions src/pulp_nnx_ne16.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,28 @@
#include "pulp_nnx_util.h"
#include <pmsis.h>
#include <stdint.h>
#include <sys/types.h>

void ne16_nnx_init(ne16_dev_t *dev, ne16_pulp_conf_t *conf) {
void ne16_nnx_init(const ne16_dev_t *dev, ne16_pulp_conf_t *conf) {
ne16_pulp_open(conf);
hwpe_soft_clear(&dev->hwpe_dev);
}

void ne16_nnx_term(ne16_dev_t *dev) {
void ne16_nnx_term(const ne16_dev_t *dev) {
hwpe_soft_clear(&dev->hwpe_dev);
ne16_pulp_close();
}

int ne16_nnx_dispatch_check(ne16_dev_t *dev) {
int ne16_nnx_dispatch_check(const ne16_dev_t *dev) {
return !ne16_task_queue_full(dev);
}

void ne16_nnx_dispatch_wait(ne16_dev_t *dev) {
void ne16_nnx_dispatch_wait(const ne16_dev_t *dev) {
while (!ne16_nnx_dispatch_check(dev)) {
ne16_pulp_event_wait_and_clear();
}
}

int ne16_nnx_dispatch(ne16_dev_t *dev, ne16_task_t *task) {
int ne16_nnx_dispatch(const ne16_dev_t *dev, ne16_task_t *task) {
if (hwpe_task_queue_acquire_task(&dev->hwpe_dev, &task->id)) {
return 1;
}
Expand All @@ -56,7 +55,7 @@ int ne16_nnx_dispatch(ne16_dev_t *dev, ne16_task_t *task) {
return 0;
}

int ne16_nnx_resolve_check(ne16_dev_t *dev, ne16_task_t *task) {
int ne16_nnx_resolve_check(const ne16_dev_t *dev, ne16_task_t *task) {
#if __PLATFORM__ == ARCHI_PLATFORM_GVSOC
// GVSOC model has a broken running_id so resolve_check
// conservativly looks if the task queue is empty.
Expand All @@ -69,7 +68,7 @@ int ne16_nnx_resolve_check(ne16_dev_t *dev, ne16_task_t *task) {
#endif
}

void ne16_nnx_resolve_wait(ne16_dev_t *dev, ne16_task_t *task) {
void ne16_nnx_resolve_wait(const ne16_dev_t *dev, ne16_task_t *task) {
while (!ne16_nnx_resolve_check(dev, task)) {
ne16_pulp_event_wait_and_clear();
}
Expand All @@ -84,7 +83,7 @@ static inline uint32_t _get_tile_ptr(uint32_t ptr, int i, int j, int size_i,
(j * (size_j - overlap_j) - offset_j) * stride_k;
}

void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task,
void ne16_nnx_dispatch_stride2x2(const ne16_dev_t *dev, ne16_task_t *task,
const uint32_t w_in, const uint32_t k_in,
const uint32_t h_out, const uint32_t w_out,
const uint32_t k_out, const uint8_t h_ker,
Expand All @@ -102,8 +101,8 @@ void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task,
const uint32_t output_base = task->data.outfeat_ptr;
const uint32_t tile_padding = task->data.cfg.padding;

for (int i = 0; i < n_h; i++) {
for (int j = 0; j < n_w; j++) {
for (uint32_t i = 0; i < n_h; i++) {
for (uint32_t j = 0; j < n_w; j++) {
task->data.infeat_ptr = _get_tile_ptr(
input_base, i, j, 3 + h_ker - 1, 3 + w_ker - 1, k_in,
task->data.cfg.input_stride.d1, task->data.cfg.input_stride.d0,
Expand Down
14 changes: 7 additions & 7 deletions src/pulp_nnx_neureka.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,27 @@
#include <stdint.h>
#include <sys/types.h>

void neureka_nnx_init(neureka_dev_t *dev, neureka_siracusa_conf_t *conf) {
void neureka_nnx_init(const neureka_dev_t *dev, neureka_siracusa_conf_t *conf) {
neureka_siracusa_open(conf);
hwpe_soft_clear(&dev->hwpe_dev);
}

void neureka_nnx_term(neureka_dev_t *dev) {
void neureka_nnx_term(const neureka_dev_t *dev) {
hwpe_soft_clear(&dev->hwpe_dev);
neureka_siracusa_close();
}

int neureka_nnx_dispatch_check(neureka_dev_t *dev) {
int neureka_nnx_dispatch_check(const neureka_dev_t *dev) {
return !neureka_task_queue_full(dev);
}

void neureka_nnx_dispatch_wait(neureka_dev_t *dev) {
void neureka_nnx_dispatch_wait(const neureka_dev_t *dev) {
while (!neureka_nnx_dispatch_check(dev)) {
neureka_siracusa_event_wait_and_clear();
}
}

int neureka_nnx_dispatch(neureka_dev_t *dev, neureka_task_t *task) {
int neureka_nnx_dispatch(const neureka_dev_t *dev, neureka_task_t *task) {
if (hwpe_task_queue_acquire_task(&dev->hwpe_dev, &task->id)) {
return 1;
}
Expand All @@ -56,7 +56,7 @@ int neureka_nnx_dispatch(neureka_dev_t *dev, neureka_task_t *task) {
return 0;
}

int neureka_nnx_resolve_check(neureka_dev_t *dev, neureka_task_t *task) {
int neureka_nnx_resolve_check(const neureka_dev_t *dev, neureka_task_t *task) {
#if __PLATFORM__ == ARCHI_PLATFORM_GVSOC
// GVSOC model has a broken running_id so resolve_check
// conservativly looks if the task queue is empty.
Expand All @@ -69,7 +69,7 @@ int neureka_nnx_resolve_check(neureka_dev_t *dev, neureka_task_t *task) {
#endif
}

void neureka_nnx_resolve_wait(neureka_dev_t *dev, neureka_task_t *task) {
void neureka_nnx_resolve_wait(const neureka_dev_t *dev, neureka_task_t *task) {
while (!neureka_nnx_resolve_check(dev, task)) {
neureka_siracusa_event_wait_and_clear();
}
Expand Down
Loading
Loading