diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b6a7d0..84b516f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ ### Changed - `ne16_task_init` got split into smaller parts: `ne16_task_init`, `ne16_task_set_op_to_conv`, `ne16_task_set_weight_offset`, `ne16_task_set_bits`, `ne16_task_set_norm_quant` -- strides in `ne16_task_set_strides`, and `ne16_task_set_dims` are now strides between consecutive elements in that dimension +- strides in `ne16_task_set_strides`, `ne16_task_set_dims`, and `ne16_task_set_ptrs` are now strides between consecutive elements in that dimension - `ne16_task_queue_size` is now `NE16_TASK_QUEUE_SIZE` ### Removed diff --git a/ne16/hal/ne16_task.c b/ne16/hal/ne16_task.c index 5f856e4..f8408da 100644 --- a/ne16/hal/ne16_task.c +++ b/ne16/hal/ne16_task.c @@ -108,19 +108,18 @@ void ne16_task_set_weight_offset(ne16_task_t *task, * it was the start to the padded data. * Necessary for input pointer when it's padded. */ -uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, - const uint32_t channel, const uint8_t bits, +uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, uint32_t width_stride, const uint8_t padding_top, const uint8_t padding_left) { - return ptr - (padding_top * width + padding_left) * channel * bits / 8; + return ptr - (padding_top * width + padding_left) * width_stride; } void ne16_task_set_ptrs(ne16_task_t *task, uint32_t input_ptr, uint32_t w_in, - uint32_t k_in, uint8_t bits_in, uint8_t padding_top, + uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, uint32_t output_ptr, uint32_t weights_ptr, uint32_t scale_ptr, uint32_t shift_ptr, uint32_t bias_ptr) { task->data.infeat_ptr = - ne16_pad_ptr(input_ptr, w_in, k_in, bits_in, padding_top, padding_left); + ne16_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left); task->data.outfeat_ptr = output_ptr; task->data.weights_ptr = weights_ptr; task->data.scale_ptr = scale_ptr; diff --git a/ne16/hal/ne16_task.h b/ne16/hal/ne16_task.h index dd12c39..69bc78c 100644 --- a/ne16/hal/ne16_task.h +++ b/ne16/hal/ne16_task.h @@ -131,10 +131,10 @@ uint32_t ne16_get_tile_padding(uint32_t padding, uint32_t i_height, uint32_t i_width, uint32_t n_height, uint32_t n_width); uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, - const uint32_t channel, const uint8_t bits, - const uint8_t padding_top, const uint8_t padding_left); + const uint32_t width_stride, const uint8_t padding_top, + const uint8_t padding_left); void ne16_task_set_ptrs(ne16_task_t *task, uint32_t input_ptr, uint32_t w_in, - uint32_t k_in, uint8_t bits_in, uint8_t padding_top, + uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, uint32_t output_ptr, uint32_t weights_ptr, uint32_t scale_ptr, uint32_t shift_ptr, uint32_t bias_ptr); diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index 4541f9d..c99c649 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -122,20 +122,19 @@ void neureka_task_set_weight_source(neureka_task_t *task, * Necessary for input pointer when it's padded. */ uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width, - const uint32_t channel, const uint8_t bits, - const uint8_t padding_top, + const uint32_t width_stride, const uint8_t padding_top, const uint8_t padding_left) { return ptr - (padding_top * width + padding_left) * channel * bits / 8; } void neureka_task_set_ptrs(neureka_task_t *task, uint32_t input_ptr, - uint32_t w_in, uint32_t k_in, uint8_t bits_in, + uint32_t w_in, uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, uint32_t output_ptr, uint32_t weights_ptr, uint32_t scale_ptr, uint32_t shift_ptr, uint32_t bias_ptr) { - task->data.infeat_ptr = neureka_pad_ptr(input_ptr, w_in, k_in, bits_in, - padding_top, padding_left); + task->data.infeat_ptr = + neureka_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left); task->data.outfeat_ptr = output_ptr; task->data.weights_ptr = weights_ptr; task->data.scale_ptr = scale_ptr; diff --git a/neureka/hal/neureka_task.h b/neureka/hal/neureka_task.h index a265223..2d06468 100644 --- a/neureka/hal/neureka_task.h +++ b/neureka/hal/neureka_task.h @@ -140,10 +140,10 @@ uint32_t neureka_get_tile_padding(uint32_t padding, uint32_t i_height, uint32_t i_width, uint32_t n_height, uint32_t n_width); uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width, - const uint32_t channel, const uint8_t bits, - const uint8_t padding_top, const uint8_t padding_left); + const uint32_t width_stride, const uint8_t padding_top, + const uint8_t padding_left); void neureka_task_set_ptrs(neureka_task_t *task, uint32_t input_ptr, - uint32_t w_in, uint32_t k_in, uint8_t bits_in, + uint32_t w_in, uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, uint32_t output_ptr, uint32_t weights_ptr, uint32_t scale_ptr, uint32_t shift_ptr, diff --git a/test/app/src/nnx_layer.c b/test/app/src/nnx_layer.c index 486019d..0d98ff6 100644 --- a/test/app/src/nnx_layer.c +++ b/test/app/src/nnx_layer.c @@ -167,8 +167,8 @@ static void task_prepare(nnx_task_t *task) { PADDING_LEFT); #endif - nnx_task_set_ptrs(task, (uint32_t)input, INPUT_WIDTH, INPUT_CHANNEL, - INPUT_BITS, PADDING_TOP, PADDING_LEFT, (uint32_t)output, + nnx_task_set_ptrs(task, (uint32_t)input, INPUT_WIDTH, w_in_stride, + PADDING_TOP, PADDING_LEFT, (uint32_t)output, (uint32_t)weight, #if HAS_NORM_QUANT == 1 (uint32_t)scale, NULL,