diff --git a/CHANGELOG.md b/CHANGELOG.md index 84b516f..78ad1bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,14 @@ - Support for kernels without normalization and quantization for NE16 - isort check - publication citation +- support 32bit scale ### Changed - `ne16_task_init` got split into smaller parts: `ne16_task_init`, `ne16_task_set_op_to_conv`, `ne16_task_set_weight_offset`, `ne16_task_set_bits`, `ne16_task_set_norm_quant` - strides in `ne16_task_set_strides`, `ne16_task_set_dims`, and `ne16_task_set_ptrs` are now strides between consecutive elements in that dimension - `ne16_task_queue_size` is now `NE16_TASK_QUEUE_SIZE` +- `ne16_task_set_ptrs` split into `ne16_task_set_ptrs_conv` and `ne16_task_set_ptrs_norm_quant` ### Removed diff --git a/ne16/README.md b/ne16/README.md index 9f05956..750ccd5 100644 --- a/ne16/README.md +++ b/ne16/README.md @@ -28,7 +28,7 @@ - [ ] Scale type - [x] uint8 - [ ] uint16 - - [ ] uint32 + - [x] uint32 - [x] Bias type - [x] int32 - [ ] Weight type diff --git a/ne16/hal/ne16_task.c b/ne16/hal/ne16_task.c index f8408da..a519ce8 100644 --- a/ne16/hal/ne16_task.c +++ b/ne16/hal/ne16_task.c @@ -113,15 +113,18 @@ uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, uint32_t width_stride, return ptr - (padding_top * width + padding_left) * width_stride; } -void ne16_task_set_ptrs(ne16_task_t *task, uint32_t input_ptr, uint32_t w_in, - uint32_t w_in_stride, uint8_t padding_top, - uint8_t padding_left, uint32_t output_ptr, - uint32_t weights_ptr, uint32_t scale_ptr, - uint32_t shift_ptr, uint32_t bias_ptr) { +void ne16_task_set_ptrs_conv(ne16_task_t *task, uint32_t input_ptr, + uint32_t w_in, uint32_t w_in_stride, + uint8_t padding_top, uint8_t padding_left, + uint32_t output_ptr, uint32_t weights_ptr) { task->data.infeat_ptr = ne16_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left); task->data.outfeat_ptr = output_ptr; task->data.weights_ptr = weights_ptr; +} + +void ne16_task_set_ptrs_norm_quant(ne16_task_t *task, uint32_t scale_ptr, + uint32_t shift_ptr, uint32_t bias_ptr) { task->data.scale_ptr = scale_ptr; task->data.scale_shift_ptr = shift_ptr; task->data.scale_bias_ptr = bias_ptr; @@ -206,8 +209,8 @@ void ne16_task_set_padding(ne16_task_t *task, const uint8_t top, } void ne16_task_set_mask_filter(ne16_task_t *task, const uint8_t top, - const uint8_t right, const uint8_t bottom, - const uint8_t left) { + const uint8_t bottom, const uint8_t left, + const uint8_t right) { task->data.cfg.filter_mask = ((top & 0xff) << 24) | ((right & 0xff) << 16) | ((bottom & 0xff) << 8) | ((left & 0xff) << 0); } @@ -219,8 +222,8 @@ void ne16_task_set_dims(ne16_task_t *task, const uint32_t w_in, const uint32_t h_out_stride, const uint32_t w_out_stride, const uint8_t padding_top, const uint8_t padding_bottom, - const uint8_t padding_right, - const uint8_t padding_left) { + const uint8_t padding_left, + const uint8_t padding_right) { ne16_task_set_strides(task, k_in, h_in_stride, w_in_stride, h_out_stride, w_out_stride); ne16_task_set_counters(task, k_in, h_out, w_out, k_out, padding_bottom, @@ -235,8 +238,8 @@ void ne16_task_set_dims_stride2x2( const uint32_t h_out, const uint32_t w_out, const uint32_t k_out, const uint32_t h_out_stride, const uint32_t w_out_stride, const uint8_t h_ker, const uint8_t w_ker, const uint8_t padding_top, - const uint8_t padding_bottom, const uint8_t padding_right, - const uint8_t padding_left) { + const uint8_t padding_bottom, const uint8_t padding_left, + const uint8_t padding_right) { const uint8_t stride = 2; // WARNING: works only for even output channel stride (divisible by 2) diff --git a/ne16/hal/ne16_task.h b/ne16/hal/ne16_task.h index 69bc78c..e18c256 100644 --- a/ne16/hal/ne16_task.h +++ b/ne16/hal/ne16_task.h @@ -42,8 +42,8 @@ typedef enum { typedef struct ne16_norm_t { ne16_norm_mode_e mode; - int flag_bias; - int flag_shift; + ne16_task_flag_e flag_bias; + ne16_task_flag_e flag_shift; } ne16_norm_t; typedef enum ne16_quant_mode_e { @@ -59,9 +59,9 @@ typedef enum ne16_quant_function_e { typedef struct ne16_quant_t { // Shift amount must be in range 0x00-0x1F - unsigned shift_amount; + uint8_t shift_amount; ne16_quant_function_e function; - int flag_rounding; + ne16_task_flag_e flag_rounding; } ne16_quant_t; typedef struct ne16_stride_t { @@ -133,11 +133,12 @@ uint32_t ne16_get_tile_padding(uint32_t padding, uint32_t i_height, uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, const uint32_t width_stride, const uint8_t padding_top, const uint8_t padding_left); -void ne16_task_set_ptrs(ne16_task_t *task, uint32_t input_ptr, uint32_t w_in, - uint32_t w_in_stride, uint8_t padding_top, - uint8_t padding_left, uint32_t output_ptr, - uint32_t weights_ptr, uint32_t scale_ptr, - uint32_t shift_ptr, uint32_t bias_ptr); +void ne16_task_set_ptrs_conv(ne16_task_t *task, uint32_t input_ptr, + uint32_t w_in, uint32_t w_in_stride, + uint8_t padding_top, uint8_t padding_left, + uint32_t output_ptr, uint32_t weights_ptr); +void ne16_task_set_ptrs_norm_quant(ne16_task_t *task, uint32_t scale_ptr, + uint32_t shift_ptr, uint32_t bias_ptr); /** ne16_task_set_strides * * All the strides variables are strides between elements alongside that @@ -157,8 +158,8 @@ void ne16_task_set_padding(ne16_task_t *task, const uint8_t top, const uint8_t bottom, const uint8_t left, const uint8_t right, const uint8_t value); void ne16_task_set_mask_filter(ne16_task_t *task, const uint8_t top, - const uint8_t right, const uint8_t bottom, - const uint8_t left); + const uint8_t bottom, const uint8_t left, + const uint8_t right); /** ne16_task_set_dims * * All the strides variables are strides between elements alongside that @@ -172,8 +173,8 @@ void ne16_task_set_dims(ne16_task_t *task, const uint32_t w_in, const uint32_t h_out_stride, const uint32_t w_out_stride, const uint8_t padding_top, const uint8_t padding_bottom, - const uint8_t padding_right, - const uint8_t padding_left); + const uint8_t padding_left, + const uint8_t padding_right); /** ne16_task_set_dims_stride2x2 * * All the strides variables are strides between elements alongside that @@ -186,7 +187,7 @@ void ne16_task_set_dims_stride2x2( const uint32_t h_out, const uint32_t w_out, const uint32_t k_out, const uint32_t h_out_stride, const uint32_t w_out_stride, const uint8_t h_ker, const uint8_t w_ker, const uint8_t padding_top, - const uint8_t padding_bottom, const uint8_t padding_right, - const uint8_t padding_left); + const uint8_t padding_bottom, const uint8_t padding_left, + const uint8_t padding_right); #endif // !__NE16_TASK_H__ diff --git a/neureka/README.md b/neureka/README.md index 9c83f4e..51586c3 100644 --- a/neureka/README.md +++ b/neureka/README.md @@ -16,7 +16,6 @@ Github repo [link](https://github.com/siracusa-soc/ne). - [x] Bias (w/ and w/o) - [ ] Per-channel shift - [x] Per-layer shift - - [ ] Rounding - [x] Input type - [x] uint8 - [x] int8 @@ -24,9 +23,9 @@ Github repo [link](https://github.com/siracusa-soc/ne). - [x] int8 - [x] uint8 (only w/ Relu) - [x] int32 -- [ ] Scale type +- [x] Scale type - [x] uint8 - - [ ] uint32 + - [x] uint32 - [x] Bias type - [x] int32 - [ ] Weight type diff --git a/neureka/bsp/siracusa/neureka_siracusa_bsp.h b/neureka/bsp/siracusa/neureka_siracusa_bsp.h index be75a20..8083d70 100644 --- a/neureka/bsp/siracusa/neureka_siracusa_bsp.h +++ b/neureka/bsp/siracusa/neureka_siracusa_bsp.h @@ -18,8 +18,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -#ifndef __NEUREKA_siracusa_BSP_H__ -#define __NEUREKA_siracusa_BSP_H__ +#ifndef __NEUREKA_SIRACUSA_BSP_H__ +#define __NEUREKA_SIRACUSA_BSP_H__ #include "neureka.h" #include @@ -64,4 +64,4 @@ void neureka_siracusa_close(); void neureka_siracusa_event_wait_and_clear(); const neureka_dev_t *neureka_siracusa_get_dev(); -#endif // !__NEUREKA_siracusa_BSP_H__ +#endif // !__NEUREKA_SIRACUSA_BSP_H__ diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index 16f54f8..84c2a15 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -47,8 +47,7 @@ void neureka_task_init(neureka_task_t *task) { void neureka_task_set_op_to_conv(neureka_task_t *task, const uint8_t kernel_shape, - const uint8_t depthwise, - const uint8_t stride) { + const uint8_t depthwise) { task->depthwise = depthwise; task->kernel_shape = kernel_shape; task->subtile_output_channel = depthwise ? NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 @@ -133,16 +132,18 @@ uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width, return ptr - (padding_top * width + padding_left) * width_stride; } -void neureka_task_set_ptrs(neureka_task_t *task, uint32_t input_ptr, - uint32_t w_in, uint32_t w_in_stride, - uint8_t padding_top, uint8_t padding_left, - uint32_t output_ptr, uint32_t weights_ptr, - uint32_t scale_ptr, uint32_t shift_ptr, - uint32_t bias_ptr) { +void neureka_task_set_ptrs_conv(neureka_task_t *task, uint32_t input_ptr, + uint32_t w_in, uint32_t w_in_stride, + uint8_t padding_top, uint8_t padding_left, + uint32_t output_ptr, uint32_t weights_ptr) { task->data.infeat_ptr = neureka_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left); task->data.outfeat_ptr = output_ptr; task->data.weights_ptr = weights_ptr; +} + +void neureka_task_set_ptrs_norm_quant(neureka_task_t *task, uint32_t scale_ptr, + uint32_t shift_ptr, uint32_t bias_ptr) { task->data.scale_ptr = scale_ptr; task->data.scale_shift_ptr = shift_ptr; task->data.scale_bias_ptr = bias_ptr; @@ -223,8 +224,8 @@ void neureka_task_set_padding(neureka_task_t *task, const uint8_t top, } void neureka_task_set_mask_filter(neureka_task_t *task, const uint8_t top, - const uint8_t right, const uint8_t bottom, - const uint8_t left) { + const uint8_t bottom, const uint8_t left, + const uint8_t right) { task->data.cfg.filter_mask = ((top & 0xff) << 24) | ((right & 0xff) << 16) | ((bottom & 0xff) << 8) | ((left & 0xff) << 0); } @@ -235,7 +236,7 @@ void neureka_task_set_dims( const uint32_t h_out, const uint32_t w_out, const uint32_t k_out, const uint32_t h_out_stride, const uint32_t w_out_stride, const uint8_t padding_top, const uint8_t padding_bottom, - const uint8_t padding_right, const uint8_t padding_left) { + const uint8_t padding_left, const uint8_t padding_right) { neureka_task_set_strides(task, k_in, h_in_stride, w_in_stride, h_out_stride, w_out_stride); neureka_task_set_counters(task, k_in, h_out, w_out, k_out, padding_bottom, diff --git a/neureka/hal/neureka_task.h b/neureka/hal/neureka_task.h index 5217a71..4022fc0 100644 --- a/neureka/hal/neureka_task.h +++ b/neureka/hal/neureka_task.h @@ -51,8 +51,8 @@ typedef enum { typedef struct neureka_norm_t { neureka_norm_mode_e mode; - int flag_bias; - int flag_shift; + neureka_task_flag_e flag_bias; + neureka_task_flag_e flag_shift; } neureka_norm_t; typedef enum neureka_quant_mode_e { @@ -67,9 +67,9 @@ typedef enum neureka_quant_function_e { typedef struct neureka_quant_t { // Shift amount must be in range 0x00-0x1F - unsigned shift_amount; + uint8_t shift_amount; neureka_quant_function_e function; - int flag_rounding; + neureka_task_flag_e flag_rounding; } neureka_quant_t; typedef struct neureka_stride_t { @@ -128,7 +128,7 @@ typedef struct neureka_task_t { void neureka_task_init(neureka_task_t *task); void neureka_task_set_op_to_conv(neureka_task_t *task, const uint8_t kernel_shape, - const uint8_t depthwise, const uint8_t stride); + const uint8_t depthwise); void neureka_task_set_bits(neureka_task_t *task, const uint8_t input_bits, const uint8_t output_bits, const uint8_t weight_bits); @@ -147,12 +147,12 @@ uint32_t neureka_get_tile_padding(uint32_t padding, uint32_t i_height, uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width, const uint32_t width_stride, const uint8_t padding_top, const uint8_t padding_left); -void neureka_task_set_ptrs(neureka_task_t *task, uint32_t input_ptr, - uint32_t w_in, uint32_t w_in_stride, - uint8_t padding_top, uint8_t padding_left, - uint32_t output_ptr, uint32_t weights_ptr, - uint32_t scale_ptr, uint32_t shift_ptr, - uint32_t bias_ptr); +void neureka_task_set_ptrs_conv(neureka_task_t *task, uint32_t input_ptr, + uint32_t w_in, uint32_t w_in_stride, + uint8_t padding_top, uint8_t padding_left, + uint32_t output_ptr, uint32_t weights_ptr); +void neureka_task_set_ptrs_norm_quant(neureka_task_t *task, uint32_t scale_ptr, + uint32_t shift_ptr, uint32_t bias_ptr); /** neureka_task_set_strides * * All the strides variables are strides between elements alongside that @@ -173,8 +173,8 @@ void neureka_task_set_padding(neureka_task_t *task, const uint8_t top, const uint8_t bottom, const uint8_t left, const uint8_t right, const uint8_t value); void neureka_task_set_mask_filter(neureka_task_t *task, const uint8_t top, - const uint8_t right, const uint8_t bottom, - const uint8_t left); + const uint8_t bottom, const uint8_t left, + const uint8_t right); /** neureka_task_set_dims * * All the strides variables are strides between elements alongside that @@ -187,6 +187,6 @@ void neureka_task_set_dims( const uint32_t h_out, const uint32_t w_out, const uint32_t k_out, const uint32_t h_out_stride, const uint32_t w_out_stride, const uint8_t padding_top, const uint8_t padding_bottom, - const uint8_t padding_right, const uint8_t padding_left); + const uint8_t padding_left, const uint8_t padding_right); #endif // !__NEUREKA_TASK_H__ diff --git a/test/NeuralEngineFunctionalModel.py b/test/NeuralEngineFunctionalModel.py index 08b3601..b41702b 100644 --- a/test/NeuralEngineFunctionalModel.py +++ b/test/NeuralEngineFunctionalModel.py @@ -28,24 +28,34 @@ def _norm_quant( bias_type: Optional[IntegerType], has_bias: bool, has_relu: bool, + verbose: bool, ) -> torch.Tensor: # Scale accumulators are in 48bit, so keeping the data in 64bit tensor = tensor * scale assert tensor.dtype == torch.int64 + if verbose: + print("INTERMEDIATE RESULTS (after scale):") + print(tensor) + if has_bias: assert bias is not None assert bias_type is not None - # Saturating cast to int32 + tensor = NeuralEngineFunctionalModel._cast( - tensor, bias_type, saturate=True + tensor, bias_type, saturate=False ).type(torch.int32) tensor = tensor + bias + tensor = NeuralEngineFunctionalModel._cast( - tensor, bias_type, saturate=False + tensor, bias_type, saturate=True ).type(torch.int32) + if verbose: + print("INTERMEDIATE RESULTS (after bias):") + print(tensor) + if has_relu: tensor = F.relu(tensor) @@ -118,6 +128,7 @@ def convolution( bias_type, has_bias, has_relu, + verbose, ) return output diff --git a/test/NeurekaMemoryLayout.py b/test/NeurekaMemoryLayout.py index 80a2786..028c7a3 100644 --- a/test/NeurekaMemoryLayout.py +++ b/test/NeurekaMemoryLayout.py @@ -20,8 +20,6 @@ import numpy as np import numpy.typing as npt -from TestClasses import IntegerType - class NeurekaMemoryLayout: _WEIGHT_BANDWIDTH = 256 diff --git a/test/NnxTestClasses.py b/test/NnxTestClasses.py index a7aaa00..edf227b 100644 --- a/test/NnxTestClasses.py +++ b/test/NnxTestClasses.py @@ -254,16 +254,22 @@ def from_conf( ).type(torch.int32) if global_shift is None: global_shift = torch.Tensor([0]).type(torch.int32) + conv_kwargs = { + **conf.__dict__, + "out_type": NeuralEngineFunctionalModel.ACCUMULATOR_TYPE, + } output = NeuralEngineFunctionalModel().convolution( input, weight, scale, bias, global_shift, - verbose=verbose, - **conf.__dict__, + verbose=False, + **conv_kwargs, + ) + global_shift = NnxTestGenerator._calculate_global_shift( + output, conf.out_type ) - NnxTestGenerator._calculate_global_shift(output, conf.out_type) output = NeuralEngineFunctionalModel().convolution( input, weight, scale, bias, global_shift, verbose=verbose, **conf.__dict__ diff --git a/test/app/src/nnx_layer.c b/test/app/src/nnx_layer.c index 004115e..6612619 100644 --- a/test/app/src/nnx_layer.c +++ b/test/app/src/nnx_layer.c @@ -31,10 +31,12 @@ typedef ne16_norm_mode_e nnx_norm_mode_e; typedef ne16_quant_t nnx_quant_t; +typedef ne16_quant_function_e nnx_quant_function_e; typedef ne16_norm_t nnx_norm_t; typedef ne16_task_t nnx_task_t; typedef ne16_dev_t nnx_dev_t; typedef ne16_pulp_conf_t nnx_bsp_conf_t; +typedef ne16_task_flag_e nnx_task_flag_e; #define nnxTaskFlagTrue ne16TaskFlagTrue #define nnxTaskFlagFalse ne16TaskFlagFalse @@ -46,7 +48,8 @@ typedef ne16_pulp_conf_t nnx_bsp_conf_t; #define nnx_task_set_weight_offset ne16_task_set_weight_offset #define nnx_task_set_dims ne16_task_set_dims #define nnx_task_set_dims_stride2x2 ne16_task_set_dims_stride2x2 -#define nnx_task_set_ptrs ne16_task_set_ptrs +#define nnx_task_set_ptrs_conv ne16_task_set_ptrs_conv +#define nnx_task_set_ptrs_norm_quant ne16_task_set_ptrs_norm_quant #define NNX_GVSOC_LOG_LEVEL NE16_GVSOC_LOG_LEVEL_ALL #define NNX_GVSOC_LOG_FORMAT NE16_GVSOC_LOG_FORMAT_HEXADECIMAL @@ -73,10 +76,12 @@ typedef ne16_pulp_conf_t nnx_bsp_conf_t; typedef neureka_norm_mode_e nnx_norm_mode_e; typedef neureka_quant_t nnx_quant_t; +typedef neureka_quant_function_e nnx_quant_function_e; typedef neureka_norm_t nnx_norm_t; typedef neureka_task_t nnx_task_t; typedef neureka_dev_t nnx_dev_t; typedef neureka_siracusa_conf_t nnx_bsp_conf_t; +typedef neureka_task_flag_e nnx_task_flag_e; #define nnxTaskFlagTrue neurekaTaskFlagTrue #define nnxTaskFlagFalse neurekaTaskFlagFalse @@ -87,7 +92,8 @@ typedef neureka_siracusa_conf_t nnx_bsp_conf_t; #define nnx_task_set_norm_quant neureka_task_set_norm_quant #define nnx_task_set_weight_offset neureka_task_set_weight_offset #define nnx_task_set_dims neureka_task_set_dims -#define nnx_task_set_ptrs neureka_task_set_ptrs +#define nnx_task_set_ptrs_conv neureka_task_set_ptrs_conv +#define nnx_task_set_ptrs_norm_quant neureka_task_set_ptrs_norm_quant #define NNX_GVSOC_LOG_LEVEL NEUREKA_GVSOC_LOG_LEVEL_ALL #define NNX_GVSOC_LOG_FORMAT NEUREKA_GVSOC_LOG_FORMAT_HEXADECIMAL @@ -114,26 +120,12 @@ typedef neureka_siracusa_conf_t nnx_bsp_conf_t; static void task_prepare(nnx_task_t *task) { nnx_task_init(task); +#ifdef NNX_NEUREKA + nnx_task_set_op_to_conv(task, WEIGHT_HEIGHT, GROUPS > 1); +#else nnx_task_set_op_to_conv(task, WEIGHT_HEIGHT, GROUPS > 1, STRIDE_HEIGHT); - nnx_task_set_bits(task, INPUT_BITS, OUTPUT_BITS, WEIGHT_BITS); - -#if HAS_NORM_QUANT == 1 -#if SCALE_BITS == 8 - const nnx_norm_mode_e normMode = normMode8Bit; -#elif SCALE_BITS == 32 - const nnx_norm_mode_e normMode = normMode32Bit; #endif - - nnx_task_set_norm_quant( - task, - (nnx_quant_t){.shift_amount = OUTSHIFT, - .function = - HAS_RELU ? quantFunctionRelu : quantFunctionIdentity, - .flag_rounding = nnxTaskFlagFalse}, - (nnx_norm_t){.mode = normMode, - .flag_bias = HAS_BIAS ? nnxTaskFlagTrue : nnxTaskFlagFalse, - .flag_shift = nnxTaskFlagFalse}); -#endif // HAS_NORM_QUANT + nnx_task_set_bits(task, INPUT_BITS, OUTPUT_BITS, WEIGHT_BITS); nnx_task_set_weight_offset(task, weightOffsetModeLayerWise, WEIGHT_OFFSET); @@ -159,29 +151,43 @@ static void task_prepare(nnx_task_t *task) { nnx_task_set_dims_stride2x2( task, INPUT_HEIGHT, INPUT_WIDTH, INPUT_CHANNEL, h_in_stride, w_in_stride, OUTPUT_HEIGHT, OUTPUT_WIDTH, OUTPUT_CHANNEL, h_out_stride, w_out_stride, - WEIGHT_HEIGHT, WEIGHT_WIDTH, PADDING_TOP, PADDING_BOTTOM, PADDING_RIGHT, - PADDING_LEFT); + WEIGHT_HEIGHT, WEIGHT_WIDTH, PADDING_TOP, PADDING_BOTTOM, PADDING_LEFT, + PADDING_RIGHT); #else nnx_task_set_dims(task, INPUT_WIDTH, INPUT_CHANNEL, h_in_stride, w_in_stride, OUTPUT_HEIGHT, OUTPUT_WIDTH, OUTPUT_CHANNEL, h_out_stride, - w_out_stride, PADDING_TOP, PADDING_BOTTOM, PADDING_RIGHT, - PADDING_LEFT); + w_out_stride, PADDING_TOP, PADDING_BOTTOM, PADDING_LEFT, + PADDING_RIGHT); #endif - nnx_task_set_ptrs(task, (uint32_t)input, INPUT_WIDTH, w_in_stride, - PADDING_TOP, PADDING_LEFT, (uint32_t)output, - (uint32_t)weight, + nnx_task_set_ptrs_conv(task, (uint32_t)input, INPUT_WIDTH, w_in_stride, + PADDING_TOP, PADDING_LEFT, (uint32_t)output, + (uint32_t)weight); + #if HAS_NORM_QUANT == 1 - (uint32_t)scale, NULL, -#if HAS_BIAS == 1 - (uint32_t)bias -#else - NULL -#endif -#else - NULL, NULL, NULL +#if SCALE_BITS == 8 + const nnx_norm_mode_e normMode = normMode8Bit; +#elif SCALE_BITS == 32 + const nnx_norm_mode_e normMode = normMode32Bit; #endif - ); + + const nnx_task_flag_e flag_bias = + HAS_BIAS ? nnxTaskFlagTrue : nnxTaskFlagFalse; + const uint32_t bias_ptr = (uint32_t)(HAS_BIAS ? bias : NULL); + + nnx_quant_function_e quant_function = + HAS_RELU ? quantFunctionRelu : quantFunctionIdentity; + + nnx_task_set_norm_quant(task, + (nnx_quant_t){.shift_amount = OUTSHIFT, + .function = quant_function, + .flag_rounding = nnxTaskFlagFalse}, + (nnx_norm_t){.mode = normMode, + .flag_bias = flag_bias, + .flag_shift = nnxTaskFlagFalse}); + + nnx_task_set_ptrs_norm_quant(task, (uint32_t)scale, NULL, bias_ptr); +#endif // HAS_NORM_QUANT } static void task_execute(nnx_task_t *task) { diff --git a/test/tests/test_116/conf.json b/test/tests/test_116/conf.json new file mode 100644 index 0000000..4858679 --- /dev/null +++ b/test/tests/test_116/conf.json @@ -0,0 +1,29 @@ +{ + "in_height": 3, + "in_width": 3, + "in_channel": 2, + "out_channel": 2, + "padding": { + "top": 0, + "bottom": 0, + "left": 0, + "right": 0 + }, + "kernel_shape": { + "height": 1, + "width": 1 + }, + "depthwise": false, + "stride": { + "height": 1, + "width": 1 + }, + "in_type": "int8", + "out_type": "int8", + "weight_type": "int8", + "scale_type": "uint32", + "bias_type": "int32", + "has_norm_quant": true, + "has_bias": true, + "has_relu": false +} \ No newline at end of file diff --git a/test/tests/test_117/conf.json b/test/tests/test_117/conf.json new file mode 100644 index 0000000..79beac9 --- /dev/null +++ b/test/tests/test_117/conf.json @@ -0,0 +1,29 @@ +{ + "in_height": 10, + "in_width": 10, + "in_channel": 10, + "out_channel": 10, + "padding": { + "top": 0, + "bottom": 0, + "left": 0, + "right": 0 + }, + "kernel_shape": { + "height": 1, + "width": 1 + }, + "depthwise": false, + "stride": { + "height": 1, + "width": 1 + }, + "in_type": "uint8", + "out_type": "int8", + "weight_type": "int8", + "scale_type": "uint32", + "bias_type": "int32", + "has_norm_quant": true, + "has_bias": true, + "has_relu": false +} \ No newline at end of file diff --git a/test/tests/test_118/conf.json b/test/tests/test_118/conf.json new file mode 100644 index 0000000..16616eb --- /dev/null +++ b/test/tests/test_118/conf.json @@ -0,0 +1,29 @@ +{ + "in_height": 10, + "in_width": 10, + "in_channel": 128, + "out_channel": 128, + "padding": { + "top": 0, + "bottom": 0, + "left": 0, + "right": 0 + }, + "kernel_shape": { + "height": 1, + "width": 1 + }, + "depthwise": false, + "stride": { + "height": 1, + "width": 1 + }, + "in_type": "uint8", + "out_type": "int8", + "weight_type": "int8", + "scale_type": "uint32", + "bias_type": "int32", + "has_norm_quant": true, + "has_bias": true, + "has_relu": false +} \ No newline at end of file