diff --git a/ne16/hal/ne16_task.c b/ne16/hal/ne16_task.c index a519ce8..fdf5d29 100644 --- a/ne16/hal/ne16_task.c +++ b/ne16/hal/ne16_task.c @@ -102,32 +102,33 @@ void ne16_task_set_weight_offset(ne16_task_t *task, task->data.cfg.weight_offset_factor = weight_offset; } -/** ne16_pad_ptr +/** ne16_pad_addr * * Calculate the pointer to the start of the ptr as if * it was the start to the padded data. * Necessary for input pointer when it's padded. */ -uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, uint32_t width_stride, - const uint8_t padding_top, const uint8_t padding_left) { +uint32_t ne16_pad_addr(uint32_t ptr, const uint32_t width, + uint32_t width_stride, const uint8_t padding_top, + const uint8_t padding_left) { return ptr - (padding_top * width + padding_left) * width_stride; } -void ne16_task_set_ptrs_conv(ne16_task_t *task, uint32_t input_ptr, +void ne16_task_set_addr_conv(ne16_task_t *task, uint32_t input_addr, uint32_t w_in, uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, - uint32_t output_ptr, uint32_t weights_ptr) { - task->data.infeat_ptr = - ne16_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left); - task->data.outfeat_ptr = output_ptr; - task->data.weights_ptr = weights_ptr; + uint32_t output_addr, uint32_t weights_addr) { + task->data.infeat_addr = + ne16_pad_addr(input_addr, w_in, w_in_stride, padding_top, padding_left); + task->data.outfeat_addr = output_addr; + task->data.weights_addr = weights_addr; } -void ne16_task_set_ptrs_norm_quant(ne16_task_t *task, uint32_t scale_ptr, - uint32_t shift_ptr, uint32_t bias_ptr) { - task->data.scale_ptr = scale_ptr; - task->data.scale_shift_ptr = shift_ptr; - task->data.scale_bias_ptr = bias_ptr; +void ne16_task_set_addr_norm_quant(ne16_task_t *task, uint32_t scale_addr, + uint32_t shift_addr, uint32_t bias_addr) { + task->data.scale_addr = scale_addr; + task->data.scale_shift_addr = shift_addr; + task->data.scale_bias_addr = bias_addr; } void ne16_task_set_strides(ne16_task_t *task, const uint32_t k_in, diff --git a/ne16/hal/ne16_task.h b/ne16/hal/ne16_task.h index e18c256..5582cbf 100644 --- a/ne16/hal/ne16_task.h +++ b/ne16/hal/ne16_task.h @@ -98,12 +98,12 @@ typedef struct ne16_cfg_t { } ne16_cfg_t; typedef struct ne16_task_data_t { - uint32_t weights_ptr; - uint32_t infeat_ptr; - uint32_t outfeat_ptr; - uint32_t scale_ptr; - uint32_t scale_shift_ptr; - uint32_t scale_bias_ptr; + uint32_t weights_addr; + uint32_t infeat_addr; + uint32_t outfeat_addr; + uint32_t scale_addr; + uint32_t scale_shift_addr; + uint32_t scale_bias_addr; ne16_cfg_t cfg; } ne16_task_data_t; @@ -130,15 +130,15 @@ void ne16_task_set_weight_offset(ne16_task_t *task, uint32_t ne16_get_tile_padding(uint32_t padding, uint32_t i_height, uint32_t i_width, uint32_t n_height, uint32_t n_width); -uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, - const uint32_t width_stride, const uint8_t padding_top, - const uint8_t padding_left); -void ne16_task_set_ptrs_conv(ne16_task_t *task, uint32_t input_ptr, +uint32_t ne16_pad_addr(uint32_t ptr, const uint32_t width, + const uint32_t width_stride, const uint8_t padding_top, + const uint8_t padding_left); +void ne16_task_set_addr_conv(ne16_task_t *task, uint32_t input_addr, uint32_t w_in, uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, - uint32_t output_ptr, uint32_t weights_ptr); -void ne16_task_set_ptrs_norm_quant(ne16_task_t *task, uint32_t scale_ptr, - uint32_t shift_ptr, uint32_t bias_ptr); + uint32_t output_addr, uint32_t weights_addr); +void ne16_task_set_addr_norm_quant(ne16_task_t *task, uint32_t scale_addr, + uint32_t shift_addr, uint32_t bias_addr); /** ne16_task_set_strides * * All the strides variables are strides between elements alongside that diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index 9a731ad..9a311dc 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -114,37 +114,38 @@ void neureka_task_set_weight_source(neureka_task_t *task, task->data.cfg.conf0 |= weight_source; } -/** neureka_pad_ptr +/** neureka_pad_addr * * Calculate the pointer to the start of the ptr as if * it was the start to the padded data. * Necessary for input pointer when it's padded. */ -uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width, - const uint32_t width_stride, const uint8_t padding_top, - const uint8_t padding_left) { +uint32_t neureka_pad_addr(uint32_t ptr, const uint32_t width, + const uint32_t width_stride, + const uint8_t padding_top, + const uint8_t padding_left) { return ptr - (padding_top * width + padding_left) * width_stride; } -void neureka_task_set_ptrs_conv(neureka_task_t *task, uint32_t input_ptr, +void neureka_task_set_addr_conv(neureka_task_t *task, uint32_t input_addr, uint32_t w_in, uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, - uint32_t output_ptr, uint32_t weights_ptr) { - task->data.infeat_ptr = - neureka_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left); - task->data.outfeat_ptr = output_ptr; + uint32_t output_addr, uint32_t weights_addr) { + task->data.infeat_addr = neureka_pad_addr(input_addr, w_in, w_in_stride, + padding_top, padding_left); + task->data.outfeat_addr = output_addr; if ((task->data.cfg.conf0 & NEUREKA_MASK_FLAG_WEIGHT_SOURCE) == NEUREKA_FLAG_WEIGHT_SOURCE_WMEM) { - weights_ptr -= 0x10400000; + weights_addr -= 0x10400000; } - task->data.weights_ptr = weights_ptr; + task->data.weights_addr = weights_addr; } -void neureka_task_set_ptrs_norm_quant(neureka_task_t *task, uint32_t scale_ptr, - uint32_t shift_ptr, uint32_t bias_ptr) { - task->data.scale_ptr = scale_ptr; - task->data.scale_shift_ptr = shift_ptr; - task->data.scale_bias_ptr = bias_ptr; +void neureka_task_set_addr_norm_quant(neureka_task_t *task, uint32_t scale_addr, + uint32_t shift_addr, uint32_t bias_addr) { + task->data.scale_addr = scale_addr; + task->data.scale_shift_addr = shift_addr; + task->data.scale_bias_addr = bias_addr; } void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in, diff --git a/neureka/hal/neureka_task.h b/neureka/hal/neureka_task.h index 7af5cb5..9bdae0b 100644 --- a/neureka/hal/neureka_task.h +++ b/neureka/hal/neureka_task.h @@ -101,12 +101,12 @@ typedef struct neureka_cfg_t { } neureka_cfg_t; typedef struct neureka_task_data_t { - uint32_t weights_ptr; - uint32_t infeat_ptr; - uint32_t outfeat_ptr; - uint32_t scale_ptr; - uint32_t scale_shift_ptr; - uint32_t scale_bias_ptr; + uint32_t weights_addr; + uint32_t infeat_addr; + uint32_t outfeat_addr; + uint32_t scale_addr; + uint32_t scale_shift_addr; + uint32_t scale_bias_addr; neureka_cfg_t cfg; } neureka_task_data_t; @@ -139,15 +139,16 @@ void neureka_task_set_weight_source(neureka_task_t *task, uint32_t neureka_get_tile_padding(uint32_t padding, uint32_t i_height, uint32_t i_width, uint32_t n_height, uint32_t n_width); -uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width, - const uint32_t width_stride, const uint8_t padding_top, - const uint8_t padding_left); -void neureka_task_set_ptrs_conv(neureka_task_t *task, uint32_t input_ptr, +uint32_t neureka_pad_addr(uint32_t ptr, const uint32_t width, + const uint32_t width_stride, + const uint8_t padding_top, + const uint8_t padding_left); +void neureka_task_set_addr_conv(neureka_task_t *task, uint32_t input_addr, uint32_t w_in, uint32_t w_in_stride, uint8_t padding_top, uint8_t padding_left, - uint32_t output_ptr, uint32_t weights_ptr); -void neureka_task_set_ptrs_norm_quant(neureka_task_t *task, uint32_t scale_ptr, - uint32_t shift_ptr, uint32_t bias_ptr); + uint32_t output_addr, uint32_t weights_addr); +void neureka_task_set_addr_norm_quant(neureka_task_t *task, uint32_t scale_addr, + uint32_t shift_addr, uint32_t bias_addr); /** neureka_task_set_strides * * All the strides variables are strides between elements alongside that diff --git a/src/pulp_nnx_ne16.c b/src/pulp_nnx_ne16.c index c286189..a24f4bc 100644 --- a/src/pulp_nnx_ne16.c +++ b/src/pulp_nnx_ne16.c @@ -74,11 +74,11 @@ void ne16_nnx_resolve_wait(const ne16_dev_t *dev, ne16_task_t *task) { } } -static inline uint32_t _get_tile_ptr(uint32_t ptr, int i, int j, int size_i, - uint32_t size_j, uint32_t size_k, - uint32_t stride_j, uint32_t stride_k, - uint32_t overlap_i, uint32_t overlap_j, - uint32_t offset_i, uint32_t offset_j) { +static inline uint32_t _get_tile_addr(uint32_t ptr, int i, int j, int size_i, + uint32_t size_j, uint32_t size_k, + uint32_t stride_j, uint32_t stride_k, + uint32_t overlap_i, uint32_t overlap_j, + uint32_t offset_i, uint32_t offset_j) { return ptr + (i * (size_i - overlap_i) - offset_i) * stride_j + (j * (size_j - overlap_j) - offset_j) * stride_k; } @@ -97,18 +97,18 @@ void ne16_nnx_dispatch_stride2x2(const ne16_dev_t *dev, ne16_task_t *task, const uint32_t output_height_offset = h_out % stride == 1 ? 1 : 0; const uint32_t output_width_offset = w_out % stride == 1 ? 1 : 0; - const uint32_t input_base = task->data.infeat_ptr; - const uint32_t output_base = task->data.outfeat_ptr; + const uint32_t input_base = task->data.infeat_addr; + const uint32_t output_base = task->data.outfeat_addr; const uint32_t tile_padding = task->data.cfg.padding; for (uint32_t i = 0; i < n_h; i++) { for (uint32_t j = 0; j < n_w; j++) { - task->data.infeat_ptr = _get_tile_ptr( + task->data.infeat_addr = _get_tile_addr( input_base, i, j, 3 + h_ker - 1, 3 + w_ker - 1, k_in, task->data.cfg.input_stride.d1, task->data.cfg.input_stride.d0, h_ker - stride, w_ker - stride, i == 0 ? 0 : input_height_offset, j == 0 ? 0 : input_width_offset); - task->data.outfeat_ptr = _get_tile_ptr( + task->data.outfeat_addr = _get_tile_addr( output_base, i, j, 2, 2, k_out, task->data.cfg.output_stride.d2 << 1, task->data.cfg.output_stride.d1 << 1, 0, 0, i == 0 ? 0 : output_height_offset, j == 0 ? 0 : output_width_offset); diff --git a/test/app/src/nnx_layer.c b/test/app/src/nnx_layer.c index a471c0f..b6dca81 100644 --- a/test/app/src/nnx_layer.c +++ b/test/app/src/nnx_layer.c @@ -48,8 +48,8 @@ typedef ne16_task_flag_e nnx_task_flag_e; #define nnx_task_set_weight_offset ne16_task_set_weight_offset #define nnx_task_set_dims ne16_task_set_dims #define nnx_task_set_dims_stride2x2 ne16_task_set_dims_stride2x2 -#define nnx_task_set_ptrs_conv ne16_task_set_ptrs_conv -#define nnx_task_set_ptrs_norm_quant ne16_task_set_ptrs_norm_quant +#define nnx_task_set_addr_conv ne16_task_set_addr_conv +#define nnx_task_set_addr_norm_quant ne16_task_set_addr_norm_quant #define NNX_GVSOC_LOG_LEVEL NE16_GVSOC_LOG_LEVEL_ALL #define NNX_GVSOC_LOG_FORMAT NE16_GVSOC_LOG_FORMAT_HEXADECIMAL @@ -91,8 +91,8 @@ typedef neureka_task_flag_e nnx_task_flag_e; #define nnx_task_set_norm_quant neureka_task_set_norm_quant #define nnx_task_set_weight_offset neureka_task_set_weight_offset #define nnx_task_set_dims neureka_task_set_dims -#define nnx_task_set_ptrs_conv neureka_task_set_ptrs_conv -#define nnx_task_set_ptrs_norm_quant neureka_task_set_ptrs_norm_quant +#define nnx_task_set_addr_conv neureka_task_set_addr_conv +#define nnx_task_set_addr_norm_quant neureka_task_set_addr_norm_quant #define NNX_GVSOC_LOG_LEVEL NEUREKA_GVSOC_LOG_LEVEL_ALL #define NNX_GVSOC_LOG_FORMAT NEUREKA_GVSOC_LOG_FORMAT_HEXADECIMAL @@ -159,7 +159,7 @@ static void task_prepare(nnx_task_t *task) { PADDING_RIGHT); #endif - nnx_task_set_ptrs_conv(task, (uint32_t)input, INPUT_WIDTH, w_in_stride, + nnx_task_set_addr_conv(task, (uint32_t)input, INPUT_WIDTH, w_in_stride, PADDING_TOP, PADDING_LEFT, (uint32_t)output, (uint32_t)weight); @@ -172,7 +172,7 @@ static void task_prepare(nnx_task_t *task) { const nnx_task_flag_e flag_bias = HAS_BIAS ? nnxTaskFlagTrue : nnxTaskFlagFalse; - const uint32_t bias_ptr = (uint32_t)(HAS_BIAS ? bias : NULL); + const uint32_t bias_addr = (uint32_t)(HAS_BIAS ? bias : NULL); nnx_quant_function_e quant_function = HAS_RELU ? quantFunctionRelu : quantFunctionIdentity; @@ -185,7 +185,8 @@ static void task_prepare(nnx_task_t *task) { .flag_bias = flag_bias, .flag_shift = nnxTaskFlagFalse}); - nnx_task_set_ptrs_norm_quant(task, (uint32_t)scale, (uint32_t)NULL, bias_ptr); + nnx_task_set_addr_norm_quant(task, (uint32_t)scale, (uint32_t)NULL, + bias_addr); #endif // HAS_NORM_QUANT }