From 1d2f45e3105f4f8185abf7550e849b8b901523a9 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Tue, 13 Feb 2024 14:33:54 +0100
Subject: [PATCH 1/6] Fix functional model saturating casts and add more
 intermediate result prints

---
 test/NeuralEngineFunctionalModel.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/test/NeuralEngineFunctionalModel.py b/test/NeuralEngineFunctionalModel.py
index 08b3601..b41702b 100644
--- a/test/NeuralEngineFunctionalModel.py
+++ b/test/NeuralEngineFunctionalModel.py
@@ -28,24 +28,34 @@ def _norm_quant(
         bias_type: Optional[IntegerType],
         has_bias: bool,
         has_relu: bool,
+        verbose: bool,
     ) -> torch.Tensor:
         # Scale accumulators are in 48bit, so keeping the data in 64bit
         tensor = tensor * scale
         assert tensor.dtype == torch.int64
 
+        if verbose:
+            print("INTERMEDIATE RESULTS (after scale):")
+            print(tensor)
+
         if has_bias:
             assert bias is not None
             assert bias_type is not None
-            # Saturating cast to int32
+
             tensor = NeuralEngineFunctionalModel._cast(
-                tensor, bias_type, saturate=True
+                tensor, bias_type, saturate=False
             ).type(torch.int32)
 
             tensor = tensor + bias
+
             tensor = NeuralEngineFunctionalModel._cast(
-                tensor, bias_type, saturate=False
+                tensor, bias_type, saturate=True
             ).type(torch.int32)
 
+            if verbose:
+                print("INTERMEDIATE RESULTS (after bias):")
+                print(tensor)
+
         if has_relu:
             tensor = F.relu(tensor)
 
@@ -118,6 +128,7 @@ def convolution(
                 bias_type,
                 has_bias,
                 has_relu,
+                verbose,
             )
 
         return output

From faadd4ece937305d67caf0b87f522e455e57488f Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Tue, 13 Feb 2024 14:34:28 +0100
Subject: [PATCH 2/6] Fix global_shift calculation

---
 test/NnxTestClasses.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/NnxTestClasses.py b/test/NnxTestClasses.py
index a7aaa00..d94c0fa 100644
--- a/test/NnxTestClasses.py
+++ b/test/NnxTestClasses.py
@@ -254,16 +254,17 @@ def from_conf(
                 ).type(torch.int32)
             if global_shift is None:
                 global_shift = torch.Tensor([0]).type(torch.int32)
+                conv_kwargs = {**conf.__dict__, "out_type": NeuralEngineFunctionalModel.ACCUMULATOR_TYPE}
                 output = NeuralEngineFunctionalModel().convolution(
                     input,
                     weight,
                     scale,
                     bias,
                     global_shift,
-                    verbose=verbose,
-                    **conf.__dict__,
+                    verbose=False,
+                    **conv_kwargs,
                 )
-                NnxTestGenerator._calculate_global_shift(output, conf.out_type)
+                global_shift = NnxTestGenerator._calculate_global_shift(output, conf.out_type)
 
         output = NeuralEngineFunctionalModel().convolution(
             input, weight, scale, bias, global_shift, verbose=verbose, **conf.__dict__

From dfe0c4fb6cf4b2f50f5d136a6b5531cab9f89b94 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Tue, 13 Feb 2024 14:47:44 +0100
Subject: [PATCH 3/6] Add tests with 32bit scale

---
 test/tests/test_116/conf.json | 29 +++++++++++++++++++++++++++++
 test/tests/test_117/conf.json | 29 +++++++++++++++++++++++++++++
 test/tests/test_118/conf.json | 29 +++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 test/tests/test_116/conf.json
 create mode 100644 test/tests/test_117/conf.json
 create mode 100644 test/tests/test_118/conf.json

diff --git a/test/tests/test_116/conf.json b/test/tests/test_116/conf.json
new file mode 100644
index 0000000..4858679
--- /dev/null
+++ b/test/tests/test_116/conf.json
@@ -0,0 +1,29 @@
+{
+    "in_height": 3,
+    "in_width": 3,
+    "in_channel": 2,
+    "out_channel": 2,
+    "padding": {
+        "top": 0,
+        "bottom": 0,
+        "left": 0,
+        "right": 0
+    },
+    "kernel_shape": {
+        "height": 1,
+        "width": 1
+    },
+    "depthwise": false,
+    "stride": {
+        "height": 1,
+        "width": 1
+    },
+    "in_type": "int8",
+    "out_type": "int8",
+    "weight_type": "int8",
+    "scale_type": "uint32",
+    "bias_type": "int32",
+    "has_norm_quant": true,
+    "has_bias": true,
+    "has_relu": false
+}
\ No newline at end of file
diff --git a/test/tests/test_117/conf.json b/test/tests/test_117/conf.json
new file mode 100644
index 0000000..79beac9
--- /dev/null
+++ b/test/tests/test_117/conf.json
@@ -0,0 +1,29 @@
+{
+    "in_height": 10,
+    "in_width": 10,
+    "in_channel": 10,
+    "out_channel": 10,
+    "padding": {
+        "top": 0,
+        "bottom": 0,
+        "left": 0,
+        "right": 0
+    },
+    "kernel_shape": {
+        "height": 1,
+        "width": 1
+    },
+    "depthwise": false,
+    "stride": {
+        "height": 1,
+        "width": 1
+    },
+    "in_type": "uint8",
+    "out_type": "int8",
+    "weight_type": "int8",
+    "scale_type": "uint32",
+    "bias_type": "int32",
+    "has_norm_quant": true,
+    "has_bias": true,
+    "has_relu": false
+}
\ No newline at end of file
diff --git a/test/tests/test_118/conf.json b/test/tests/test_118/conf.json
new file mode 100644
index 0000000..16616eb
--- /dev/null
+++ b/test/tests/test_118/conf.json
@@ -0,0 +1,29 @@
+{
+    "in_height": 10,
+    "in_width": 10,
+    "in_channel": 128,
+    "out_channel": 128,
+    "padding": {
+        "top": 0,
+        "bottom": 0,
+        "left": 0,
+        "right": 0
+    },
+    "kernel_shape": {
+        "height": 1,
+        "width": 1
+    },
+    "depthwise": false,
+    "stride": {
+        "height": 1,
+        "width": 1
+    },
+    "in_type": "uint8",
+    "out_type": "int8",
+    "weight_type": "int8",
+    "scale_type": "uint32",
+    "bias_type": "int32",
+    "has_norm_quant": true,
+    "has_bias": true,
+    "has_relu": false
+}
\ No newline at end of file

From 6b0c166d08d2bb82ee0ec357cd136cc6181be893 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Tue, 13 Feb 2024 14:52:10 +0100
Subject: [PATCH 4/6] Fix formatting

---
 test/NnxTestClasses.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/test/NnxTestClasses.py b/test/NnxTestClasses.py
index d94c0fa..edf227b 100644
--- a/test/NnxTestClasses.py
+++ b/test/NnxTestClasses.py
@@ -254,7 +254,10 @@ def from_conf(
                 ).type(torch.int32)
             if global_shift is None:
                 global_shift = torch.Tensor([0]).type(torch.int32)
-                conv_kwargs = {**conf.__dict__, "out_type": NeuralEngineFunctionalModel.ACCUMULATOR_TYPE}
+                conv_kwargs = {
+                    **conf.__dict__,
+                    "out_type": NeuralEngineFunctionalModel.ACCUMULATOR_TYPE,
+                }
                 output = NeuralEngineFunctionalModel().convolution(
                     input,
                     weight,
@@ -264,7 +267,9 @@ def from_conf(
                     verbose=False,
                     **conv_kwargs,
                 )
-                global_shift = NnxTestGenerator._calculate_global_shift(output, conf.out_type)
+                global_shift = NnxTestGenerator._calculate_global_shift(
+                    output, conf.out_type
+                )
 
         output = NeuralEngineFunctionalModel().convolution(
             input, weight, scale, bias, global_shift, verbose=verbose, **conf.__dict__

From bb875a36bbac3a0d9647597fa2819a575641420f Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 14 Feb 2024 06:58:05 +0100
Subject: [PATCH 5/6] Updated changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84b516f..78ad1bd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,12 +8,14 @@
 - Support for kernels without normalization and quantization for NE16
 - isort check
 - publication citation
+- support 32bit scale
 
 ### Changed
 
 - `ne16_task_init` got split into smaller parts: `ne16_task_init`, `ne16_task_set_op_to_conv`, `ne16_task_set_weight_offset`, `ne16_task_set_bits`, `ne16_task_set_norm_quant`
 - strides in `ne16_task_set_strides`, `ne16_task_set_dims`, and `ne16_task_set_ptrs` are now strides between consecutive elements in that dimension
 - `ne16_task_queue_size` is now `NE16_TASK_QUEUE_SIZE`
+- `ne16_task_set_ptrs` split into `ne16_task_set_ptrs_conv` and `ne16_task_set_ptrs_norm_quant`
 
 ### Removed
 

From 1fed889537522e70787161efcc5d8aaab1fe3f86 Mon Sep 17 00:00:00 2001
From: Luka Macan <luka.macan@unibo.it>
Date: Wed, 14 Feb 2024 06:58:23 +0100
Subject: [PATCH 6/6] Updated accelerator features

---
 ne16/README.md    | 2 +-
 neureka/README.md | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/ne16/README.md b/ne16/README.md
index 9f05956..750ccd5 100644
--- a/ne16/README.md
+++ b/ne16/README.md
@@ -28,7 +28,7 @@
 - [ ] Scale type
     - [x] uint8
     - [ ] uint16
-    - [ ] uint32
+    - [x] uint32
 - [x] Bias type
     - [x] int32
 - [ ] Weight type
diff --git a/neureka/README.md b/neureka/README.md
index 9c83f4e..51586c3 100644
--- a/neureka/README.md
+++ b/neureka/README.md
@@ -16,7 +16,6 @@ Github repo [link](https://github.com/siracusa-soc/ne).
     - [x] Bias (w/ and w/o)
     - [ ] Per-channel shift
     - [x] Per-layer shift
-    - [ ] Rounding
 - [x] Input type
     - [x] uint8
     - [x] int8
@@ -24,9 +23,9 @@ Github repo [link](https://github.com/siracusa-soc/ne).
     - [x] int8
     - [x] uint8 (only w/ Relu)
     - [x] int32
-- [ ] Scale type
+- [x] Scale type
     - [x] uint8
-    - [ ] uint32
+    - [x] uint32
 - [x] Bias type
     - [x] int32
 - [ ] Weight type