diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84b516f..78ad1bd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,12 +8,14 @@
 - Support for kernels without normalization and quantization for NE16
 - isort check
 - publication citation
+- support 32bit scale
 
 ### Changed
 
 - `ne16_task_init` got split into smaller parts: `ne16_task_init`, `ne16_task_set_op_to_conv`, `ne16_task_set_weight_offset`, `ne16_task_set_bits`, `ne16_task_set_norm_quant`
 - strides in `ne16_task_set_strides`, `ne16_task_set_dims`, and `ne16_task_set_ptrs` are now strides between consecutive elements in that dimension
 - `ne16_task_queue_size` is now `NE16_TASK_QUEUE_SIZE`
+- `ne16_task_set_ptrs` split into `ne16_task_set_ptrs_conv` and `ne16_task_set_ptrs_norm_quant`
 
 ### Removed
 
diff --git a/ne16/README.md b/ne16/README.md
index 9f05956..750ccd5 100644
--- a/ne16/README.md
+++ b/ne16/README.md
@@ -28,7 +28,7 @@
 - [ ] Scale type
     - [x] uint8
     - [ ] uint16
-    - [ ] uint32
+    - [x] uint32
 - [x] Bias type
     - [x] int32
 - [ ] Weight type
diff --git a/ne16/hal/ne16_task.c b/ne16/hal/ne16_task.c
index f8408da..a519ce8 100644
--- a/ne16/hal/ne16_task.c
+++ b/ne16/hal/ne16_task.c
@@ -113,15 +113,18 @@ uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width, uint32_t width_stride,
   return ptr - (padding_top * width + padding_left) * width_stride;
 }
 
-void ne16_task_set_ptrs(ne16_task_t *task, uint32_t input_ptr, uint32_t w_in,
-                        uint32_t w_in_stride, uint8_t padding_top,
-                        uint8_t padding_left, uint32_t output_ptr,
-                        uint32_t weights_ptr, uint32_t scale_ptr,
-                        uint32_t shift_ptr, uint32_t bias_ptr) {
+void ne16_task_set_ptrs_conv(ne16_task_t *task, uint32_t input_ptr,
+                             uint32_t w_in, uint32_t w_in_stride,
+                             uint8_t padding_top, uint8_t padding_left,
+                             uint32_t output_ptr, uint32_t weights_ptr) {
   task->data.infeat_ptr =
       ne16_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left);
   task->data.outfeat_ptr = output_ptr;
   task->data.weights_ptr = weights_ptr;
+}
+
+void ne16_task_set_ptrs_norm_quant(ne16_task_t *task, uint32_t scale_ptr,
+                                   uint32_t shift_ptr, uint32_t bias_ptr) {
   task->data.scale_ptr = scale_ptr;
   task->data.scale_shift_ptr = shift_ptr;
   task->data.scale_bias_ptr = bias_ptr;
@@ -206,8 +209,8 @@ void ne16_task_set_padding(ne16_task_t *task, const uint8_t top,
 }
 
 void ne16_task_set_mask_filter(ne16_task_t *task, const uint8_t top,
-                               const uint8_t right, const uint8_t bottom,
-                               const uint8_t left) {
+                               const uint8_t bottom, const uint8_t left,
+                               const uint8_t right) {
   task->data.cfg.filter_mask = ((top & 0xff) << 24) | ((right & 0xff) << 16) |
                                ((bottom & 0xff) << 8) | ((left & 0xff) << 0);
 }
@@ -219,8 +222,8 @@ void ne16_task_set_dims(ne16_task_t *task, const uint32_t w_in,
                         const uint32_t h_out_stride,
                         const uint32_t w_out_stride, const uint8_t padding_top,
                         const uint8_t padding_bottom,
-                        const uint8_t padding_right,
-                        const uint8_t padding_left) {
+                        const uint8_t padding_left,
+                        const uint8_t padding_right) {
   ne16_task_set_strides(task, k_in, h_in_stride, w_in_stride, h_out_stride,
                         w_out_stride);
   ne16_task_set_counters(task, k_in, h_out, w_out, k_out, padding_bottom,
@@ -235,8 +238,8 @@ void ne16_task_set_dims_stride2x2(
     const uint32_t h_out, const uint32_t w_out, const uint32_t k_out,
     const uint32_t h_out_stride, const uint32_t w_out_stride,
     const uint8_t h_ker, const uint8_t w_ker, const uint8_t padding_top,
-    const uint8_t padding_bottom, const uint8_t padding_right,
-    const uint8_t padding_left) {
+    const uint8_t padding_bottom, const uint8_t padding_left,
+    const uint8_t padding_right) {
   const uint8_t stride = 2;
 
   // WARNING: works only for even output channel stride (divisible by 2)
diff --git a/ne16/hal/ne16_task.h b/ne16/hal/ne16_task.h
index 69bc78c..e18c256 100644
--- a/ne16/hal/ne16_task.h
+++ b/ne16/hal/ne16_task.h
@@ -42,8 +42,8 @@ typedef enum {
 
 typedef struct ne16_norm_t {
   ne16_norm_mode_e mode;
-  int flag_bias;
-  int flag_shift;
+  ne16_task_flag_e flag_bias;
+  ne16_task_flag_e flag_shift;
 } ne16_norm_t;
 
 typedef enum ne16_quant_mode_e {
@@ -59,9 +59,9 @@ typedef enum ne16_quant_function_e {
 
 typedef struct ne16_quant_t {
   // Shift amount must be in range 0x00-0x1F
-  unsigned shift_amount;
+  uint8_t shift_amount;
   ne16_quant_function_e function;
-  int flag_rounding;
+  ne16_task_flag_e flag_rounding;
 } ne16_quant_t;
 
 typedef struct ne16_stride_t {
@@ -133,11 +133,12 @@ uint32_t ne16_get_tile_padding(uint32_t padding, uint32_t i_height,
 uint32_t ne16_pad_ptr(uint32_t ptr, const uint32_t width,
                       const uint32_t width_stride, const uint8_t padding_top,
                       const uint8_t padding_left);
-void ne16_task_set_ptrs(ne16_task_t *task, uint32_t input_ptr, uint32_t w_in,
-                        uint32_t w_in_stride, uint8_t padding_top,
-                        uint8_t padding_left, uint32_t output_ptr,
-                        uint32_t weights_ptr, uint32_t scale_ptr,
-                        uint32_t shift_ptr, uint32_t bias_ptr);
+void ne16_task_set_ptrs_conv(ne16_task_t *task, uint32_t input_ptr,
+                             uint32_t w_in, uint32_t w_in_stride,
+                             uint8_t padding_top, uint8_t padding_left,
+                             uint32_t output_ptr, uint32_t weights_ptr);
+void ne16_task_set_ptrs_norm_quant(ne16_task_t *task, uint32_t scale_ptr,
+                                   uint32_t shift_ptr, uint32_t bias_ptr);
 /** ne16_task_set_strides
  *
  * All the strides variables are strides between elements alongside that
@@ -157,8 +158,8 @@ void ne16_task_set_padding(ne16_task_t *task, const uint8_t top,
                            const uint8_t bottom, const uint8_t left,
                            const uint8_t right, const uint8_t value);
 void ne16_task_set_mask_filter(ne16_task_t *task, const uint8_t top,
-                               const uint8_t right, const uint8_t bottom,
-                               const uint8_t left);
+                               const uint8_t bottom, const uint8_t left,
+                               const uint8_t right);
 /** ne16_task_set_dims
  *
  * All the strides variables are strides between elements alongside that
@@ -172,8 +173,8 @@ void ne16_task_set_dims(ne16_task_t *task, const uint32_t w_in,
                         const uint32_t h_out_stride,
                         const uint32_t w_out_stride, const uint8_t padding_top,
                         const uint8_t padding_bottom,
-                        const uint8_t padding_right,
-                        const uint8_t padding_left);
+                        const uint8_t padding_left,
+                        const uint8_t padding_right);
 /** ne16_task_set_dims_stride2x2
  *
  * All the strides variables are strides between elements alongside that
@@ -186,7 +187,7 @@ void ne16_task_set_dims_stride2x2(
     const uint32_t h_out, const uint32_t w_out, const uint32_t k_out,
     const uint32_t h_out_stride, const uint32_t w_out_stride,
     const uint8_t h_ker, const uint8_t w_ker, const uint8_t padding_top,
-    const uint8_t padding_bottom, const uint8_t padding_right,
-    const uint8_t padding_left);
+    const uint8_t padding_bottom, const uint8_t padding_left,
+    const uint8_t padding_right);
 
 #endif // !__NE16_TASK_H__
diff --git a/neureka/README.md b/neureka/README.md
index 9c83f4e..51586c3 100644
--- a/neureka/README.md
+++ b/neureka/README.md
@@ -16,7 +16,6 @@ Github repo [link](https://github.com/siracusa-soc/ne).
     - [x] Bias (w/ and w/o)
     - [ ] Per-channel shift
     - [x] Per-layer shift
-    - [ ] Rounding
 - [x] Input type
     - [x] uint8
     - [x] int8
@@ -24,9 +23,9 @@ Github repo [link](https://github.com/siracusa-soc/ne).
     - [x] int8
     - [x] uint8 (only w/ Relu)
     - [x] int32
-- [ ] Scale type
+- [x] Scale type
     - [x] uint8
-    - [ ] uint32
+    - [x] uint32
 - [x] Bias type
     - [x] int32
 - [ ] Weight type
diff --git a/neureka/bsp/siracusa/neureka_siracusa_bsp.h b/neureka/bsp/siracusa/neureka_siracusa_bsp.h
index be75a20..8083d70 100644
--- a/neureka/bsp/siracusa/neureka_siracusa_bsp.h
+++ b/neureka/bsp/siracusa/neureka_siracusa_bsp.h
@@ -18,8 +18,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifndef __NEUREKA_siracusa_BSP_H__
-#define __NEUREKA_siracusa_BSP_H__
+#ifndef __NEUREKA_SIRACUSA_BSP_H__
+#define __NEUREKA_SIRACUSA_BSP_H__
 
 #include "neureka.h"
 #include <stdint.h>
@@ -64,4 +64,4 @@ void neureka_siracusa_close();
 void neureka_siracusa_event_wait_and_clear();
 const neureka_dev_t *neureka_siracusa_get_dev();
 
-#endif // !__NEUREKA_siracusa_BSP_H__
+#endif // !__NEUREKA_SIRACUSA_BSP_H__
diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c
index 16f54f8..84c2a15 100644
--- a/neureka/hal/neureka_task.c
+++ b/neureka/hal/neureka_task.c
@@ -47,8 +47,7 @@ void neureka_task_init(neureka_task_t *task) {
 
 void neureka_task_set_op_to_conv(neureka_task_t *task,
                                  const uint8_t kernel_shape,
-                                 const uint8_t depthwise,
-                                 const uint8_t stride) {
+                                 const uint8_t depthwise) {
   task->depthwise = depthwise;
   task->kernel_shape = kernel_shape;
   task->subtile_output_channel = depthwise ? NEUREKA_SUBTILE_INPUT_CHANNEL_3x3
@@ -133,16 +132,18 @@ uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width,
   return ptr - (padding_top * width + padding_left) * width_stride;
 }
 
-void neureka_task_set_ptrs(neureka_task_t *task, uint32_t input_ptr,
-                           uint32_t w_in, uint32_t w_in_stride,
-                           uint8_t padding_top, uint8_t padding_left,
-                           uint32_t output_ptr, uint32_t weights_ptr,
-                           uint32_t scale_ptr, uint32_t shift_ptr,
-                           uint32_t bias_ptr) {
+void neureka_task_set_ptrs_conv(neureka_task_t *task, uint32_t input_ptr,
+                                uint32_t w_in, uint32_t w_in_stride,
+                                uint8_t padding_top, uint8_t padding_left,
+                                uint32_t output_ptr, uint32_t weights_ptr) {
   task->data.infeat_ptr =
       neureka_pad_ptr(input_ptr, w_in, w_in_stride, padding_top, padding_left);
   task->data.outfeat_ptr = output_ptr;
   task->data.weights_ptr = weights_ptr;
+}
+
+void neureka_task_set_ptrs_norm_quant(neureka_task_t *task, uint32_t scale_ptr,
+                                      uint32_t shift_ptr, uint32_t bias_ptr) {
   task->data.scale_ptr = scale_ptr;
   task->data.scale_shift_ptr = shift_ptr;
   task->data.scale_bias_ptr = bias_ptr;
@@ -223,8 +224,8 @@ void neureka_task_set_padding(neureka_task_t *task, const uint8_t top,
 }
 
 void neureka_task_set_mask_filter(neureka_task_t *task, const uint8_t top,
-                                  const uint8_t right, const uint8_t bottom,
-                                  const uint8_t left) {
+                                  const uint8_t bottom, const uint8_t left,
+                                  const uint8_t right) {
   task->data.cfg.filter_mask = ((top & 0xff) << 24) | ((right & 0xff) << 16) |
                                ((bottom & 0xff) << 8) | ((left & 0xff) << 0);
 }
@@ -235,7 +236,7 @@ void neureka_task_set_dims(
     const uint32_t h_out, const uint32_t w_out, const uint32_t k_out,
     const uint32_t h_out_stride, const uint32_t w_out_stride,
     const uint8_t padding_top, const uint8_t padding_bottom,
-    const uint8_t padding_right, const uint8_t padding_left) {
+    const uint8_t padding_left, const uint8_t padding_right) {
   neureka_task_set_strides(task, k_in, h_in_stride, w_in_stride, h_out_stride,
                            w_out_stride);
   neureka_task_set_counters(task, k_in, h_out, w_out, k_out, padding_bottom,
diff --git a/neureka/hal/neureka_task.h b/neureka/hal/neureka_task.h
index 5217a71..4022fc0 100644
--- a/neureka/hal/neureka_task.h
+++ b/neureka/hal/neureka_task.h
@@ -51,8 +51,8 @@ typedef enum {
 
 typedef struct neureka_norm_t {
   neureka_norm_mode_e mode;
-  int flag_bias;
-  int flag_shift;
+  neureka_task_flag_e flag_bias;
+  neureka_task_flag_e flag_shift;
 } neureka_norm_t;
 
 typedef enum neureka_quant_mode_e {
@@ -67,9 +67,9 @@ typedef enum neureka_quant_function_e {
 
 typedef struct neureka_quant_t {
   // Shift amount must be in range 0x00-0x1F
-  unsigned shift_amount;
+  uint8_t shift_amount;
   neureka_quant_function_e function;
-  int flag_rounding;
+  neureka_task_flag_e flag_rounding;
 } neureka_quant_t;
 
 typedef struct neureka_stride_t {
@@ -128,7 +128,7 @@ typedef struct neureka_task_t {
 void neureka_task_init(neureka_task_t *task);
 void neureka_task_set_op_to_conv(neureka_task_t *task,
                                  const uint8_t kernel_shape,
-                                 const uint8_t depthwise, const uint8_t stride);
+                                 const uint8_t depthwise);
 void neureka_task_set_bits(neureka_task_t *task, const uint8_t input_bits,
                            const uint8_t output_bits,
                            const uint8_t weight_bits);
@@ -147,12 +147,12 @@ uint32_t neureka_get_tile_padding(uint32_t padding, uint32_t i_height,
 uint32_t neureka_pad_ptr(uint32_t ptr, const uint32_t width,
                          const uint32_t width_stride, const uint8_t padding_top,
                          const uint8_t padding_left);
-void neureka_task_set_ptrs(neureka_task_t *task, uint32_t input_ptr,
-                           uint32_t w_in, uint32_t w_in_stride,
-                           uint8_t padding_top, uint8_t padding_left,
-                           uint32_t output_ptr, uint32_t weights_ptr,
-                           uint32_t scale_ptr, uint32_t shift_ptr,
-                           uint32_t bias_ptr);
+void neureka_task_set_ptrs_conv(neureka_task_t *task, uint32_t input_ptr,
+                                uint32_t w_in, uint32_t w_in_stride,
+                                uint8_t padding_top, uint8_t padding_left,
+                                uint32_t output_ptr, uint32_t weights_ptr);
+void neureka_task_set_ptrs_norm_quant(neureka_task_t *task, uint32_t scale_ptr,
+                                      uint32_t shift_ptr, uint32_t bias_ptr);
 /** neureka_task_set_strides
  *
  * All the strides variables are strides between elements alongside that
@@ -173,8 +173,8 @@ void neureka_task_set_padding(neureka_task_t *task, const uint8_t top,
                               const uint8_t bottom, const uint8_t left,
                               const uint8_t right, const uint8_t value);
 void neureka_task_set_mask_filter(neureka_task_t *task, const uint8_t top,
-                                  const uint8_t right, const uint8_t bottom,
-                                  const uint8_t left);
+                                  const uint8_t bottom, const uint8_t left,
+                                  const uint8_t right);
 /** neureka_task_set_dims
  *
  * All the strides variables are strides between elements alongside that
@@ -187,6 +187,6 @@ void neureka_task_set_dims(
     const uint32_t h_out, const uint32_t w_out, const uint32_t k_out,
     const uint32_t h_out_stride, const uint32_t w_out_stride,
     const uint8_t padding_top, const uint8_t padding_bottom,
-    const uint8_t padding_right, const uint8_t padding_left);
+    const uint8_t padding_left, const uint8_t padding_right);
 
 #endif // !__NEUREKA_TASK_H__
diff --git a/test/NeuralEngineFunctionalModel.py b/test/NeuralEngineFunctionalModel.py
index 08b3601..b41702b 100644
--- a/test/NeuralEngineFunctionalModel.py
+++ b/test/NeuralEngineFunctionalModel.py
@@ -28,24 +28,34 @@ def _norm_quant(
         bias_type: Optional[IntegerType],
         has_bias: bool,
         has_relu: bool,
+        verbose: bool,
     ) -> torch.Tensor:
         # Scale accumulators are in 48bit, so keeping the data in 64bit
         tensor = tensor * scale
         assert tensor.dtype == torch.int64
 
+        if verbose:
+            print("INTERMEDIATE RESULTS (after scale):")
+            print(tensor)
+
         if has_bias:
             assert bias is not None
             assert bias_type is not None
-            # Saturating cast to int32
+
             tensor = NeuralEngineFunctionalModel._cast(
-                tensor, bias_type, saturate=True
+                tensor, bias_type, saturate=False
             ).type(torch.int32)
 
             tensor = tensor + bias
+
             tensor = NeuralEngineFunctionalModel._cast(
-                tensor, bias_type, saturate=False
+                tensor, bias_type, saturate=True
             ).type(torch.int32)
 
+            if verbose:
+                print("INTERMEDIATE RESULTS (after bias):")
+                print(tensor)
+
         if has_relu:
             tensor = F.relu(tensor)
 
@@ -118,6 +128,7 @@ def convolution(
                 bias_type,
                 has_bias,
                 has_relu,
+                verbose,
             )
 
         return output
diff --git a/test/NeurekaMemoryLayout.py b/test/NeurekaMemoryLayout.py
index 80a2786..028c7a3 100644
--- a/test/NeurekaMemoryLayout.py
+++ b/test/NeurekaMemoryLayout.py
@@ -20,8 +20,6 @@
 import numpy as np
 import numpy.typing as npt
 
-from TestClasses import IntegerType
-
 
 class NeurekaMemoryLayout:
     _WEIGHT_BANDWIDTH = 256
diff --git a/test/NnxTestClasses.py b/test/NnxTestClasses.py
index a7aaa00..edf227b 100644
--- a/test/NnxTestClasses.py
+++ b/test/NnxTestClasses.py
@@ -254,16 +254,22 @@ def from_conf(
                 ).type(torch.int32)
             if global_shift is None:
                 global_shift = torch.Tensor([0]).type(torch.int32)
+                conv_kwargs = {
+                    **conf.__dict__,
+                    "out_type": NeuralEngineFunctionalModel.ACCUMULATOR_TYPE,
+                }
                 output = NeuralEngineFunctionalModel().convolution(
                     input,
                     weight,
                     scale,
                     bias,
                     global_shift,
-                    verbose=verbose,
-                    **conf.__dict__,
+                    verbose=False,
+                    **conv_kwargs,
+                )
+                global_shift = NnxTestGenerator._calculate_global_shift(
+                    output, conf.out_type
                 )
-                NnxTestGenerator._calculate_global_shift(output, conf.out_type)
 
         output = NeuralEngineFunctionalModel().convolution(
             input, weight, scale, bias, global_shift, verbose=verbose, **conf.__dict__
diff --git a/test/app/src/nnx_layer.c b/test/app/src/nnx_layer.c
index 004115e..6612619 100644
--- a/test/app/src/nnx_layer.c
+++ b/test/app/src/nnx_layer.c
@@ -31,10 +31,12 @@
 
 typedef ne16_norm_mode_e nnx_norm_mode_e;
 typedef ne16_quant_t nnx_quant_t;
+typedef ne16_quant_function_e nnx_quant_function_e;
 typedef ne16_norm_t nnx_norm_t;
 typedef ne16_task_t nnx_task_t;
 typedef ne16_dev_t nnx_dev_t;
 typedef ne16_pulp_conf_t nnx_bsp_conf_t;
+typedef ne16_task_flag_e nnx_task_flag_e;
 
 #define nnxTaskFlagTrue ne16TaskFlagTrue
 #define nnxTaskFlagFalse ne16TaskFlagFalse
@@ -46,7 +48,8 @@ typedef ne16_pulp_conf_t nnx_bsp_conf_t;
 #define nnx_task_set_weight_offset ne16_task_set_weight_offset
 #define nnx_task_set_dims ne16_task_set_dims
 #define nnx_task_set_dims_stride2x2 ne16_task_set_dims_stride2x2
-#define nnx_task_set_ptrs ne16_task_set_ptrs
+#define nnx_task_set_ptrs_conv ne16_task_set_ptrs_conv
+#define nnx_task_set_ptrs_norm_quant ne16_task_set_ptrs_norm_quant
 
 #define NNX_GVSOC_LOG_LEVEL NE16_GVSOC_LOG_LEVEL_ALL
 #define NNX_GVSOC_LOG_FORMAT NE16_GVSOC_LOG_FORMAT_HEXADECIMAL
@@ -73,10 +76,12 @@ typedef ne16_pulp_conf_t nnx_bsp_conf_t;
 
 typedef neureka_norm_mode_e nnx_norm_mode_e;
 typedef neureka_quant_t nnx_quant_t;
+typedef neureka_quant_function_e nnx_quant_function_e;
 typedef neureka_norm_t nnx_norm_t;
 typedef neureka_task_t nnx_task_t;
 typedef neureka_dev_t nnx_dev_t;
 typedef neureka_siracusa_conf_t nnx_bsp_conf_t;
+typedef neureka_task_flag_e nnx_task_flag_e;
 
 #define nnxTaskFlagTrue neurekaTaskFlagTrue
 #define nnxTaskFlagFalse neurekaTaskFlagFalse
@@ -87,7 +92,8 @@ typedef neureka_siracusa_conf_t nnx_bsp_conf_t;
 #define nnx_task_set_norm_quant neureka_task_set_norm_quant
 #define nnx_task_set_weight_offset neureka_task_set_weight_offset
 #define nnx_task_set_dims neureka_task_set_dims
-#define nnx_task_set_ptrs neureka_task_set_ptrs
+#define nnx_task_set_ptrs_conv neureka_task_set_ptrs_conv
+#define nnx_task_set_ptrs_norm_quant neureka_task_set_ptrs_norm_quant
 
 #define NNX_GVSOC_LOG_LEVEL NEUREKA_GVSOC_LOG_LEVEL_ALL
 #define NNX_GVSOC_LOG_FORMAT NEUREKA_GVSOC_LOG_FORMAT_HEXADECIMAL
@@ -114,26 +120,12 @@ typedef neureka_siracusa_conf_t nnx_bsp_conf_t;
 
 static void task_prepare(nnx_task_t *task) {
   nnx_task_init(task);
+#ifdef NNX_NEUREKA
+  nnx_task_set_op_to_conv(task, WEIGHT_HEIGHT, GROUPS > 1);
+#else
   nnx_task_set_op_to_conv(task, WEIGHT_HEIGHT, GROUPS > 1, STRIDE_HEIGHT);
-  nnx_task_set_bits(task, INPUT_BITS, OUTPUT_BITS, WEIGHT_BITS);
-
-#if HAS_NORM_QUANT == 1
-#if SCALE_BITS == 8
-  const nnx_norm_mode_e normMode = normMode8Bit;
-#elif SCALE_BITS == 32
-  const nnx_norm_mode_e normMode = normMode32Bit;
 #endif
-
-  nnx_task_set_norm_quant(
-      task,
-      (nnx_quant_t){.shift_amount = OUTSHIFT,
-                    .function =
-                        HAS_RELU ? quantFunctionRelu : quantFunctionIdentity,
-                    .flag_rounding = nnxTaskFlagFalse},
-      (nnx_norm_t){.mode = normMode,
-                   .flag_bias = HAS_BIAS ? nnxTaskFlagTrue : nnxTaskFlagFalse,
-                   .flag_shift = nnxTaskFlagFalse});
-#endif // HAS_NORM_QUANT
+  nnx_task_set_bits(task, INPUT_BITS, OUTPUT_BITS, WEIGHT_BITS);
 
   nnx_task_set_weight_offset(task, weightOffsetModeLayerWise, WEIGHT_OFFSET);
 
@@ -159,29 +151,43 @@ static void task_prepare(nnx_task_t *task) {
   nnx_task_set_dims_stride2x2(
       task, INPUT_HEIGHT, INPUT_WIDTH, INPUT_CHANNEL, h_in_stride, w_in_stride,
       OUTPUT_HEIGHT, OUTPUT_WIDTH, OUTPUT_CHANNEL, h_out_stride, w_out_stride,
-      WEIGHT_HEIGHT, WEIGHT_WIDTH, PADDING_TOP, PADDING_BOTTOM, PADDING_RIGHT,
-      PADDING_LEFT);
+      WEIGHT_HEIGHT, WEIGHT_WIDTH, PADDING_TOP, PADDING_BOTTOM, PADDING_LEFT,
+      PADDING_RIGHT);
 #else
   nnx_task_set_dims(task, INPUT_WIDTH, INPUT_CHANNEL, h_in_stride, w_in_stride,
                     OUTPUT_HEIGHT, OUTPUT_WIDTH, OUTPUT_CHANNEL, h_out_stride,
-                    w_out_stride, PADDING_TOP, PADDING_BOTTOM, PADDING_RIGHT,
-                    PADDING_LEFT);
+                    w_out_stride, PADDING_TOP, PADDING_BOTTOM, PADDING_LEFT,
+                    PADDING_RIGHT);
 #endif
 
-  nnx_task_set_ptrs(task, (uint32_t)input, INPUT_WIDTH, w_in_stride,
-                    PADDING_TOP, PADDING_LEFT, (uint32_t)output,
-                    (uint32_t)weight,
+  nnx_task_set_ptrs_conv(task, (uint32_t)input, INPUT_WIDTH, w_in_stride,
+                         PADDING_TOP, PADDING_LEFT, (uint32_t)output,
+                         (uint32_t)weight);
+
 #if HAS_NORM_QUANT == 1
-                    (uint32_t)scale, NULL,
-#if HAS_BIAS == 1
-                    (uint32_t)bias
-#else
-                    NULL
-#endif
-#else
-                    NULL, NULL, NULL
+#if SCALE_BITS == 8
+  const nnx_norm_mode_e normMode = normMode8Bit;
+#elif SCALE_BITS == 32
+  const nnx_norm_mode_e normMode = normMode32Bit;
 #endif
-  );
+
+  const nnx_task_flag_e flag_bias =
+      HAS_BIAS ? nnxTaskFlagTrue : nnxTaskFlagFalse;
+  const uint32_t bias_ptr = (uint32_t)(HAS_BIAS ? bias : NULL);
+
+  nnx_quant_function_e quant_function =
+      HAS_RELU ? quantFunctionRelu : quantFunctionIdentity;
+
+  nnx_task_set_norm_quant(task,
+                          (nnx_quant_t){.shift_amount = OUTSHIFT,
+                                        .function = quant_function,
+                                        .flag_rounding = nnxTaskFlagFalse},
+                          (nnx_norm_t){.mode = normMode,
+                                       .flag_bias = flag_bias,
+                                       .flag_shift = nnxTaskFlagFalse});
+
+  nnx_task_set_ptrs_norm_quant(task, (uint32_t)scale, NULL, bias_ptr);
+#endif // HAS_NORM_QUANT
 }
 
 static void task_execute(nnx_task_t *task) {
diff --git a/test/tests/test_116/conf.json b/test/tests/test_116/conf.json
new file mode 100644
index 0000000..4858679
--- /dev/null
+++ b/test/tests/test_116/conf.json
@@ -0,0 +1,29 @@
+{
+    "in_height": 3,
+    "in_width": 3,
+    "in_channel": 2,
+    "out_channel": 2,
+    "padding": {
+        "top": 0,
+        "bottom": 0,
+        "left": 0,
+        "right": 0
+    },
+    "kernel_shape": {
+        "height": 1,
+        "width": 1
+    },
+    "depthwise": false,
+    "stride": {
+        "height": 1,
+        "width": 1
+    },
+    "in_type": "int8",
+    "out_type": "int8",
+    "weight_type": "int8",
+    "scale_type": "uint32",
+    "bias_type": "int32",
+    "has_norm_quant": true,
+    "has_bias": true,
+    "has_relu": false
+}
\ No newline at end of file
diff --git a/test/tests/test_117/conf.json b/test/tests/test_117/conf.json
new file mode 100644
index 0000000..79beac9
--- /dev/null
+++ b/test/tests/test_117/conf.json
@@ -0,0 +1,29 @@
+{
+    "in_height": 10,
+    "in_width": 10,
+    "in_channel": 10,
+    "out_channel": 10,
+    "padding": {
+        "top": 0,
+        "bottom": 0,
+        "left": 0,
+        "right": 0
+    },
+    "kernel_shape": {
+        "height": 1,
+        "width": 1
+    },
+    "depthwise": false,
+    "stride": {
+        "height": 1,
+        "width": 1
+    },
+    "in_type": "uint8",
+    "out_type": "int8",
+    "weight_type": "int8",
+    "scale_type": "uint32",
+    "bias_type": "int32",
+    "has_norm_quant": true,
+    "has_bias": true,
+    "has_relu": false
+}
\ No newline at end of file
diff --git a/test/tests/test_118/conf.json b/test/tests/test_118/conf.json
new file mode 100644
index 0000000..16616eb
--- /dev/null
+++ b/test/tests/test_118/conf.json
@@ -0,0 +1,29 @@
+{
+    "in_height": 10,
+    "in_width": 10,
+    "in_channel": 128,
+    "out_channel": 128,
+    "padding": {
+        "top": 0,
+        "bottom": 0,
+        "left": 0,
+        "right": 0
+    },
+    "kernel_shape": {
+        "height": 1,
+        "width": 1
+    },
+    "depthwise": false,
+    "stride": {
+        "height": 1,
+        "width": 1
+    },
+    "in_type": "uint8",
+    "out_type": "int8",
+    "weight_type": "int8",
+    "scale_type": "uint32",
+    "bias_type": "int32",
+    "has_norm_quant": true,
+    "has_bias": true,
+    "has_relu": false
+}
\ No newline at end of file