diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ad1bd..711a051 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ - isort check - publication citation - support 32bit scale +- cmake support +- const qualifier to `_dev_t` function arguments ### Changed @@ -22,6 +24,10 @@ - `k_in_stride`, `w_in_stride`, `k_out_stride`, and `w_out_stride` from `ne16_nnx_dispatch_stride2x2` - `mode` attribute from `ne16_quant_t` structure +### Fixed + +- type conversion compiler warning + ## [0.3.0] - 2024-01-14 ### Added diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..171ef04 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.18) + +project(pulp-nnx + VERSION 0.3.0 + DESCRIPTION "Kernel library for PULP-based NN accelerators." + LANGUAGES C) + +add_library(pulp-nnx STATIC) + +target_sources(pulp-nnx PRIVATE util/pulp_nnx_util.c util/hwpe.c) +target_include_directories(pulp-nnx PUBLIC inc util) + +option(USE_NE16 "Use the NE16 accelerator.") +option(USE_NEUREKA "Use the N-EUREKA accelerator.") + +if (NOT ${USE_NE16} AND NOT ${USE_NEUREKA}) + message(FATAL_ERROR "[PULP-NNX] No accelerator in use. Please set an appropriate USE_ option.") +endif() + +if (${USE_NE16}) + message(STATUS "[PULP-NNX] Using the NE16 accelerator.") + target_sources(pulp-nnx + PRIVATE + ne16/bsp/ne16_pulp_bsp.c + ne16/hal/ne16.c + ne16/hal/ne16_task.c + src/pulp_nnx_ne16.c + ) + target_include_directories(pulp-nnx + PUBLIC + ne16/bsp + ne16/hal + ne16/gvsoc + ) +endif() + +if (${USE_NEUREKA}) + message(STATUS "[PULP-NNX] Using the N-EUREKA accelerator.") + target_sources(pulp-nnx + PRIVATE + neureka/bsp/neureka_siracusa_bsp.c + neureka/hal/neureka.c + neureka/hal/neureka_task.c + src/pulp_nnx_neureka.c + ) + target_include_directories(pulp-nnx + PUBLIC + neureka/bsp + neureka/hal + neureka/gvsoc + ) +endif() diff --git a/inc/pulp_nnx_ne16.h b/inc/pulp_nnx_ne16.h index 97e6e2e..9bd7dec 100644 --- a/inc/pulp_nnx_ne16.h +++ b/inc/pulp_nnx_ne16.h @@ -25,20 +25,20 @@ /* PULP-NNX interface */ -void ne16_nnx_init(ne16_dev_t *dev, ne16_pulp_conf_t *conf); -void ne16_nnx_term(ne16_dev_t *dev); +void ne16_nnx_init(const ne16_dev_t *dev, ne16_pulp_conf_t *conf); +void ne16_nnx_term(const ne16_dev_t *dev); /** ne16_nnx_dispatch_check * * Check whether you can dispatch to the accelerator. */ -int ne16_nnx_dispatch_check(ne16_dev_t *dev); +int ne16_nnx_dispatch_check(const ne16_dev_t *dev); /** ne16_nnx_dispatch_wait * * Block until you can dispatch to the accelerator. */ -void ne16_nnx_dispatch_wait(ne16_dev_t *dev); +void ne16_nnx_dispatch_wait(const ne16_dev_t *dev); /** ne16_nnx_dispatch * @@ -46,19 +46,19 @@ void ne16_nnx_dispatch_wait(ne16_dev_t *dev); * Fails with return code 1 if the task cannot be dispatched. Otherwise returns * 0. */ -int ne16_nnx_dispatch(ne16_dev_t *dev, ne16_task_t *task); +int ne16_nnx_dispatch(const ne16_dev_t *dev, ne16_task_t *task); /** ne16_nnx_resolve_check * * Check whether the task has been resolved. */ -int ne16_nnx_resolve_check(ne16_dev_t *dev, ne16_task_t *task); +int ne16_nnx_resolve_check(const ne16_dev_t *dev, ne16_task_t *task); /** ne16_nnx_resolve_wait * * Block until you can resolve the task. */ -void ne16_nnx_resolve_wait(ne16_dev_t *dev, ne16_task_t *task); +void ne16_nnx_resolve_wait(const ne16_dev_t *dev, ne16_task_t *task); /* Additional helper functions */ @@ -69,7 +69,7 @@ void ne16_nnx_resolve_wait(ne16_dev_t *dev, ne16_task_t *task); * tile the tile to the subtile's spatial dimensions (in this case 3x3 output). * Works only if the k_out is divisible by 2. */ -void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task, +void ne16_nnx_dispatch_stride2x2(const ne16_dev_t *dev, ne16_task_t *task, const uint32_t w_in, const uint32_t k_in, const uint32_t h_out, const uint32_t w_out, const uint32_t k_out, const uint8_t h_ker, diff --git a/inc/pulp_nnx_neureka.h b/inc/pulp_nnx_neureka.h index 25ef4a8..fea4bb4 100644 --- a/inc/pulp_nnx_neureka.h +++ b/inc/pulp_nnx_neureka.h @@ -25,20 +25,20 @@ /* PULP-NNX interface */ -void neureka_nnx_init(neureka_dev_t *dev, neureka_siracusa_conf_t *conf); -void neureka_nnx_term(neureka_dev_t *dev); +void neureka_nnx_init(const neureka_dev_t *dev, neureka_siracusa_conf_t *conf); +void neureka_nnx_term(const neureka_dev_t *dev); /** neureka_nnx_dispatch_check * * Check whether you can dispatch to the accelerator. */ -int neureka_nnx_dispatch_check(neureka_dev_t *dev); +int neureka_nnx_dispatch_check(const neureka_dev_t *dev); /** neureka_nnx_dispatch_wait * * Block until you can dispatch to the accelerator. */ -void neureka_nnx_dispatch_wait(neureka_dev_t *dev); +void neureka_nnx_dispatch_wait(const neureka_dev_t *dev); /** neureka_nnx_dispatch * @@ -46,16 +46,16 @@ void neureka_nnx_dispatch_wait(neureka_dev_t *dev); * Fails with return code 1 if the task cannot be dispatched. Otherwise returns * 0. */ -int neureka_nnx_dispatch(neureka_dev_t *dev, neureka_task_t *task); +int neureka_nnx_dispatch(const neureka_dev_t *dev, neureka_task_t *task); /** neureka_nnx_resolve_check * * Check whether the task has been resolved. */ -int neureka_nnx_resolve_check(neureka_dev_t *dev, neureka_task_t *task); +int neureka_nnx_resolve_check(const neureka_dev_t *dev, neureka_task_t *task); /** neureka_nnx_resolve_wait * * Block until you can resolve the task. */ -void neureka_nnx_resolve_wait(neureka_dev_t *dev, neureka_task_t *task); +void neureka_nnx_resolve_wait(const neureka_dev_t *dev, neureka_task_t *task); diff --git a/ne16/gvsoc/ne16_gvsoc.h b/ne16/gvsoc/ne16_gvsoc.h index f6626fd..99d249d 100644 --- a/ne16/gvsoc/ne16_gvsoc.h +++ b/ne16/gvsoc/ne16_gvsoc.h @@ -39,14 +39,14 @@ typedef enum ne16_gvsoc_log_level_e { NE16_GVSOC_LOG_LEVEL_ALL = 3 } ne16_gvsoc_log_level_e; -static void ne16_gvsoc_log_activate(ne16_dev_t *dev, +static void ne16_gvsoc_log_activate(const ne16_dev_t *dev, ne16_gvsoc_log_level_e log_level, ne16_gvsoc_log_format_e format) { hwpe_task_reg_write(&dev->hwpe_dev, NE16_REG_GVSOC_LOG_LEVEL, log_level); hwpe_task_reg_write(&dev->hwpe_dev, NE16_REG_GVSOC_LOG_FORMAT, format); } -static void ne16_gvsoc_log_deactivate(ne16_dev_t *dev) { +static void ne16_gvsoc_log_deactivate(const ne16_dev_t *dev) { hwpe_task_reg_write(&dev->hwpe_dev, NE16_REG_GVSOC_LOG_LEVEL, NE16_GVSOC_LOG_LEVEL_CONFIG); } diff --git a/ne16/hal/ne16.c b/ne16/hal/ne16.c index d92a7d5..872a2c5 100644 --- a/ne16/hal/ne16.c +++ b/ne16/hal/ne16.c @@ -23,15 +23,15 @@ #define NE16_STATUS_EMPTY (0x000) #define NE16_STATUS_FULL (0x101) -inline int ne16_task_queue_tasks_in_flight(ne16_dev_t *dev) { +inline int ne16_task_queue_tasks_in_flight(const ne16_dev_t *dev) { uint32_t status = hwpe_task_queue_status(&dev->hwpe_dev); return (status & 0x1) + ((status >> 8) & 0x1); } -inline int ne16_task_queue_empty(ne16_dev_t *dev) { +inline int ne16_task_queue_empty(const ne16_dev_t *dev) { return hwpe_task_queue_status(&dev->hwpe_dev) == NE16_STATUS_EMPTY; } -inline int ne16_task_queue_full(ne16_dev_t *dev) { +inline int ne16_task_queue_full(const ne16_dev_t *dev) { return hwpe_task_queue_status(&dev->hwpe_dev) == NE16_STATUS_FULL; } diff --git a/ne16/hal/ne16.h b/ne16/hal/ne16.h index 88ebee7..c0a58ed 100644 --- a/ne16/hal/ne16.h +++ b/ne16/hal/ne16.h @@ -30,8 +30,8 @@ typedef struct ne16_dev_t { hwpe_dev_t hwpe_dev; /* Implements the HWPE device interface */ } ne16_dev_t; -int ne16_task_queue_tasks_in_flight(ne16_dev_t *dev); -int ne16_task_queue_empty(ne16_dev_t *dev); -int ne16_task_queue_full(ne16_dev_t *dev); +int ne16_task_queue_tasks_in_flight(const ne16_dev_t *dev); +int ne16_task_queue_empty(const ne16_dev_t *dev); +int ne16_task_queue_full(const ne16_dev_t *dev); #endif // __NE16_H__ diff --git a/neureka/gvsoc/neureka_gvsoc.h b/neureka/gvsoc/neureka_gvsoc.h index 37eeab0..20b45ec 100644 --- a/neureka/gvsoc/neureka_gvsoc.h +++ b/neureka/gvsoc/neureka_gvsoc.h @@ -39,14 +39,14 @@ typedef enum neureka_gvsoc_log_level_e { NEUREKA_GVSOC_LOG_LEVEL_ALL = 3 } neureka_gvsoc_log_level_e; -static void neureka_gvsoc_log_activate(neureka_dev_t *dev, +static void neureka_gvsoc_log_activate(const neureka_dev_t *dev, neureka_gvsoc_log_level_e log_level, neureka_gvsoc_log_format_e format) { hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_REG_GVSOC_LOG_LEVEL, log_level); hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_REG_GVSOC_LOG_FORMAT, format); } -static void neureka_gvsoc_log_deactivate(neureka_dev_t *dev) { +static void neureka_gvsoc_log_deactivate(const neureka_dev_t *dev) { hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_REG_GVSOC_LOG_LEVEL, NEUREKA_GVSOC_LOG_LEVEL_JOB_START_END); } diff --git a/neureka/hal/neureka.c b/neureka/hal/neureka.c index dc829d9..bee3bb2 100644 --- a/neureka/hal/neureka.c +++ b/neureka/hal/neureka.c @@ -23,15 +23,15 @@ #define NEUREKA_STATUS_EMPTY (0x000) #define NEUREKA_STATUS_FULL (0x101) -inline int neureka_task_queue_tasks_in_flight(neureka_dev_t *dev) { +inline int neureka_task_queue_tasks_in_flight(const neureka_dev_t *dev) { uint32_t status = hwpe_task_queue_status(&dev->hwpe_dev); return (status & 0x1) + ((status >> 8) & 0x1); } -inline int neureka_task_queue_empty(neureka_dev_t *dev) { +inline int neureka_task_queue_empty(const neureka_dev_t *dev) { return hwpe_task_queue_status(&dev->hwpe_dev) == NEUREKA_STATUS_EMPTY; } -inline int neureka_task_queue_full(neureka_dev_t *dev) { +inline int neureka_task_queue_full(const neureka_dev_t *dev) { return hwpe_task_queue_status(&dev->hwpe_dev) == NEUREKA_STATUS_FULL; } diff --git a/neureka/hal/neureka.h b/neureka/hal/neureka.h index eae77a1..b17c8b5 100644 --- a/neureka/hal/neureka.h +++ b/neureka/hal/neureka.h @@ -30,8 +30,8 @@ typedef struct neureka_dev_t { hwpe_dev_t hwpe_dev; /* Implements the HWPE device interface */ } neureka_dev_t; -int neureka_task_queue_tasks_in_flight(neureka_dev_t *dev); -int neureka_task_queue_empty(neureka_dev_t *dev); -int neureka_task_queue_full(neureka_dev_t *dev); +int neureka_task_queue_tasks_in_flight(const neureka_dev_t *dev); +int neureka_task_queue_empty(const neureka_dev_t *dev); +int neureka_task_queue_full(const neureka_dev_t *dev); #endif // __NEUREKA_H__ diff --git a/src/pulp_nnx_ne16.c b/src/pulp_nnx_ne16.c index f9799fc..c286189 100644 --- a/src/pulp_nnx_ne16.c +++ b/src/pulp_nnx_ne16.c @@ -24,29 +24,28 @@ #include "pulp_nnx_util.h" #include #include -#include -void ne16_nnx_init(ne16_dev_t *dev, ne16_pulp_conf_t *conf) { +void ne16_nnx_init(const ne16_dev_t *dev, ne16_pulp_conf_t *conf) { ne16_pulp_open(conf); hwpe_soft_clear(&dev->hwpe_dev); } -void ne16_nnx_term(ne16_dev_t *dev) { +void ne16_nnx_term(const ne16_dev_t *dev) { hwpe_soft_clear(&dev->hwpe_dev); ne16_pulp_close(); } -int ne16_nnx_dispatch_check(ne16_dev_t *dev) { +int ne16_nnx_dispatch_check(const ne16_dev_t *dev) { return !ne16_task_queue_full(dev); } -void ne16_nnx_dispatch_wait(ne16_dev_t *dev) { +void ne16_nnx_dispatch_wait(const ne16_dev_t *dev) { while (!ne16_nnx_dispatch_check(dev)) { ne16_pulp_event_wait_and_clear(); } } -int ne16_nnx_dispatch(ne16_dev_t *dev, ne16_task_t *task) { +int ne16_nnx_dispatch(const ne16_dev_t *dev, ne16_task_t *task) { if (hwpe_task_queue_acquire_task(&dev->hwpe_dev, &task->id)) { return 1; } @@ -56,7 +55,7 @@ int ne16_nnx_dispatch(ne16_dev_t *dev, ne16_task_t *task) { return 0; } -int ne16_nnx_resolve_check(ne16_dev_t *dev, ne16_task_t *task) { +int ne16_nnx_resolve_check(const ne16_dev_t *dev, ne16_task_t *task) { #if __PLATFORM__ == ARCHI_PLATFORM_GVSOC // GVSOC model has a broken running_id so resolve_check // conservativly looks if the task queue is empty. @@ -69,7 +68,7 @@ int ne16_nnx_resolve_check(ne16_dev_t *dev, ne16_task_t *task) { #endif } -void ne16_nnx_resolve_wait(ne16_dev_t *dev, ne16_task_t *task) { +void ne16_nnx_resolve_wait(const ne16_dev_t *dev, ne16_task_t *task) { while (!ne16_nnx_resolve_check(dev, task)) { ne16_pulp_event_wait_and_clear(); } @@ -84,7 +83,7 @@ static inline uint32_t _get_tile_ptr(uint32_t ptr, int i, int j, int size_i, (j * (size_j - overlap_j) - offset_j) * stride_k; } -void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task, +void ne16_nnx_dispatch_stride2x2(const ne16_dev_t *dev, ne16_task_t *task, const uint32_t w_in, const uint32_t k_in, const uint32_t h_out, const uint32_t w_out, const uint32_t k_out, const uint8_t h_ker, @@ -102,8 +101,8 @@ void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task, const uint32_t output_base = task->data.outfeat_ptr; const uint32_t tile_padding = task->data.cfg.padding; - for (int i = 0; i < n_h; i++) { - for (int j = 0; j < n_w; j++) { + for (uint32_t i = 0; i < n_h; i++) { + for (uint32_t j = 0; j < n_w; j++) { task->data.infeat_ptr = _get_tile_ptr( input_base, i, j, 3 + h_ker - 1, 3 + w_ker - 1, k_in, task->data.cfg.input_stride.d1, task->data.cfg.input_stride.d0, diff --git a/src/pulp_nnx_neureka.c b/src/pulp_nnx_neureka.c index 0abb845..9847fc6 100644 --- a/src/pulp_nnx_neureka.c +++ b/src/pulp_nnx_neureka.c @@ -26,27 +26,27 @@ #include #include -void neureka_nnx_init(neureka_dev_t *dev, neureka_siracusa_conf_t *conf) { +void neureka_nnx_init(const neureka_dev_t *dev, neureka_siracusa_conf_t *conf) { neureka_siracusa_open(conf); hwpe_soft_clear(&dev->hwpe_dev); } -void neureka_nnx_term(neureka_dev_t *dev) { +void neureka_nnx_term(const neureka_dev_t *dev) { hwpe_soft_clear(&dev->hwpe_dev); neureka_siracusa_close(); } -int neureka_nnx_dispatch_check(neureka_dev_t *dev) { +int neureka_nnx_dispatch_check(const neureka_dev_t *dev) { return !neureka_task_queue_full(dev); } -void neureka_nnx_dispatch_wait(neureka_dev_t *dev) { +void neureka_nnx_dispatch_wait(const neureka_dev_t *dev) { while (!neureka_nnx_dispatch_check(dev)) { neureka_siracusa_event_wait_and_clear(); } } -int neureka_nnx_dispatch(neureka_dev_t *dev, neureka_task_t *task) { +int neureka_nnx_dispatch(const neureka_dev_t *dev, neureka_task_t *task) { if (hwpe_task_queue_acquire_task(&dev->hwpe_dev, &task->id)) { return 1; } @@ -56,7 +56,7 @@ int neureka_nnx_dispatch(neureka_dev_t *dev, neureka_task_t *task) { return 0; } -int neureka_nnx_resolve_check(neureka_dev_t *dev, neureka_task_t *task) { +int neureka_nnx_resolve_check(const neureka_dev_t *dev, neureka_task_t *task) { #if __PLATFORM__ == ARCHI_PLATFORM_GVSOC // GVSOC model has a broken running_id so resolve_check // conservativly looks if the task queue is empty. @@ -69,7 +69,7 @@ int neureka_nnx_resolve_check(neureka_dev_t *dev, neureka_task_t *task) { #endif } -void neureka_nnx_resolve_wait(neureka_dev_t *dev, neureka_task_t *task) { +void neureka_nnx_resolve_wait(const neureka_dev_t *dev, neureka_task_t *task) { while (!neureka_nnx_resolve_check(dev, task)) { neureka_siracusa_event_wait_and_clear(); } diff --git a/test/app/src/nnx_layer.c b/test/app/src/nnx_layer.c index 001029f..a426248 100644 --- a/test/app/src/nnx_layer.c +++ b/test/app/src/nnx_layer.c @@ -185,7 +185,7 @@ static void task_prepare(nnx_task_t *task) { .flag_bias = flag_bias, .flag_shift = nnxTaskFlagFalse}); - nnx_task_set_ptrs_norm_quant(task, (uint32_t)scale, NULL, bias_ptr); + nnx_task_set_ptrs_norm_quant(task, (uint32_t)scale, (uint32_t)NULL, bias_ptr); #endif // HAS_NORM_QUANT }