From e2d895511dacd262b6d85f5d79aef9d62beb6650 Mon Sep 17 00:00:00 2001
From: Matteo Perotti <mperotti@iis.ee.ethz.ch>
Date: Sat, 24 Aug 2024 16:16:46 +0200
Subject: [PATCH] [apps] :art: clang-format pass

---
 apps/dtype-conv3d/kernel/bp-iconv3d.c | 121 ++++++++++----------------
 apps/dtype-conv3d/kernel/bp-iconv3d.h |  20 ++---
 apps/dtype-conv3d/kernel/dp-fconv3d.c |  16 ++--
 apps/dtype-conv3d/kernel/dp-fconv3d.h |  20 ++---
 apps/dtype-conv3d/kernel/dp-iconv3d.c |  81 ++++++++---------
 apps/dtype-conv3d/kernel/dp-iconv3d.h |  20 ++---
 apps/dtype-conv3d/kernel/hp-fconv3d.c |  87 +++++++++---------
 apps/dtype-conv3d/kernel/hp-fconv3d.h |  24 ++---
 apps/dtype-conv3d/kernel/hp-iconv3d.c |  81 ++++++++---------
 apps/dtype-conv3d/kernel/hp-iconv3d.h |  20 ++---
 apps/dtype-conv3d/kernel/sp-fconv3d.c |  98 ++++++++++-----------
 apps/dtype-conv3d/kernel/sp-fconv3d.h |  14 +--
 apps/dtype-conv3d/kernel/sp-iconv3d.c |  81 ++++++++---------
 apps/dtype-conv3d/kernel/sp-iconv3d.h |  20 ++---
 apps/dtype-conv3d/main.c              |   4 +-
 15 files changed, 318 insertions(+), 389 deletions(-)

diff --git a/apps/dtype-conv3d/kernel/bp-iconv3d.c b/apps/dtype-conv3d/kernel/bp-iconv3d.c
index 723b6760c..70ca4e5de 100644
--- a/apps/dtype-conv3d/kernel/bp-iconv3d.c
+++ b/apps/dtype-conv3d/kernel/bp-iconv3d.c
@@ -51,19 +51,19 @@
   Change vse64.v and store instructions.
   Adjust pointer arithmetic:
   Adjust the stride values for pointer increments (ldo, ldi_pad)
-  ldo (likely stands for "load output"): This is the stride value used to move to the next row in the output matrix.
-  ldi_pad (likely stands for "load input padded"): This is the stride value used to move to the next row in the padded input matrix.
-  Adjust data type in arithmetic instructions:
-  vfmacc.vf for float, vmacc.vx for int
-  Same for add and slidedown instruction
+  ldo (likely stands for "load output"): This is the stride value used to move
+  to the next row in the output matrix. ldi_pad (likely stands for "load input
+  padded"): This is the stride value used to move to the next row in the padded
+  input matrix. Adjust data type in arithmetic instructions: vfmacc.vf for
+  float, vmacc.vx for int Same for add and slidedown instruction
 */
 
 #include "bp-iconv3d.h"
 
 extern int64_t event_trigger;
 
-int bp_iconv3d_verify(int8_t *matrix, int8_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold) {
+int bp_iconv3d_verify(int8_t *matrix, int8_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold) {
   for (int r = 0; r < R; ++r)
     for (int c = 0; c < C; ++c)
       if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
@@ -76,7 +76,7 @@ int bp_iconv3d_verify(int8_t *matrix, int8_t *golden_matrix, int64_t R, int64_t
 }
 
 void bp_iconv3d_CHx7x7(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+                       int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -98,8 +98,8 @@ void bp_iconv3d_CHx7x7(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
   }
 }
 
-void bp_iconv3d_CHx7x7_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+void bp_iconv3d_CHx7x7_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -121,8 +121,8 @@ void bp_iconv3d_CHx7x7_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t
   }
 }
 
-void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N;
@@ -175,15 +175,9 @@ void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t
     i_slide_ptr_3 = i__ + n_ + 3 * (N + F - 1);
 
     // Load four input rows belonging to channel ch
-    asm volatile("vle8.v v0, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
-    asm volatile("vle8.v v4, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
-    asm volatile("vle8.v v8, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
+    asm volatile("vle8.v v0, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
+    asm volatile("vle8.v v4, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
+    asm volatile("vle8.v v8, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
     asm volatile("vle8.v v12, (%0); add %0, %0, %1"
                  : "+&r"(i__)
                  : "r"(ldi_pad));
@@ -275,12 +269,8 @@ void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-    asm volatile("vle8.v v2, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
-    asm volatile("vle8.v v6, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
+    asm volatile("vle8.v v2, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
+    asm volatile("vle8.v v6, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
     asm volatile("vle8.v v10, (%0); add %0, %0, %1"
                  : "+&r"(i__)
                  : "r"(ldi_pad));
@@ -710,15 +700,9 @@ void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t
     i_slide_ptr_3 = i__ + n_ + 3 * (N + F - 1);
 
     // Load other three input rows (one was already loaded)
-    asm volatile("vle8.v v0, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
-    asm volatile("vle8.v v4, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
-    asm volatile("vle8.v v8, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
+    asm volatile("vle8.v v0, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
+    asm volatile("vle8.v v4, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
+    asm volatile("vle8.v v8, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
     asm volatile("vle8.v v12, (%0); add %0, %0, %1"
                  : "+&r"(i__)
                  : "r"(ldi_pad));
@@ -864,12 +848,8 @@ void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-    asm volatile("vle8.v v2, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
-    asm volatile("vle8.v v6, (%0); add %0, %0, %1"
-                 : "+&r"(i__)
-                 : "r"(ldi_pad));
+    asm volatile("vle8.v v2, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
+    asm volatile("vle8.v v6, (%0); add %0, %0, %1" : "+&r"(i__) : "r"(ldi_pad));
     asm volatile("vle8.v v10, (%0); add %0, %0, %1"
                  : "+&r"(i__)
                  : "r"(ldi_pad));
@@ -928,9 +908,8 @@ void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t
   asm volatile("vse8.v  v28, (%0); add %0, %0, %1" : "+&r"(o) : "r"(ldo));
 }
 
-
 void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+                     int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N;
@@ -1027,7 +1006,6 @@ void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-
     // Main kernel, unrolled by 2
     for (int k = 0; k < F / 2; ++k) {
       // Two base indexes because of the unrolling
@@ -1063,7 +1041,6 @@ void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
 
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
-
     }
 
     // The very last iterations require mixing the instructions with the store
@@ -1080,7 +1057,6 @@ void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
   // Reuse preloaded coefficients
   // Buffer the next coefficients for faster use
 
-
   // Bump the input ptr
   i_ += 3 * (N + F - 1);
 
@@ -1145,43 +1121,41 @@ void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
 
       if (ch != C - 1) {
         int64_t base_idx_0 = (ch + 1) * fch_len;
-
       }
     }
-   }
+  }
 
-    // Bump the input ptr
-    i_ += N + F - 1;
+  // Bump the input ptr
+  i_ += N + F - 1;
 
 #ifdef VCD_DUMP
-    // Stop dumping VCD
-    event_trigger = -1;
+  // Stop dumping VCD
+  event_trigger = -1;
 #endif
 
-    //////////////
-    // UNROLL 1 //
-    //////////////
+  //////////////
+  // UNROLL 1 //
+  //////////////
 
-    // Loop on the channels
-    for (int ch = 0; ch < C; ++ch) {
+  // Loop on the channels
+  for (int ch = 0; ch < C; ++ch) {
 
-      // Point to the first element of the channel ch
-      i__ = i_ + ch * ich_len;
+    // Point to the first element of the channel ch
+    i__ = i_ + ch * ich_len;
 
-      // Start calculating the next pointers to the elements to be slided in
-      i_slide_ptr_1 = i__ + n_;
+    // Start calculating the next pointers to the elements to be slided in
+    i_slide_ptr_1 = i__ + n_;
 
-      for (int k = 0; k < F / 2; ++k) {
-        // Two base indexes because of the unrolling
-        // Point to the first element of the current column (k) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
-        // Point to the first element of the current column (k+1) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
+    for (int k = 0; k < F / 2; ++k) {
+      // Two base indexes because of the unrolling
+      // Point to the first element of the current column (k) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
+      // Point to the first element of the current column (k+1) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
     }
 
-
     // Bump the input ptr
     i_ += N + F - 1;
   }
@@ -1217,7 +1191,7 @@ void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
       if ((k | ch) == 0)
         asm volatile("vmul.vx v28, v0, %0" ::"r"(f[0 + base_idx_0]));
       else
-      asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
+        asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v14, v12, %0" ::"r"(*i_slide_ptr_3++));
 
@@ -1226,10 +1200,8 @@ void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v8, v10, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v12, v14, %0" ::"r"(*i_slide_ptr_3++));
     }
-
   }
 
-
   // Bump the input ptr
   i_ += 4 * (N + F - 1);
 
@@ -1272,7 +1244,6 @@ void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
   }
 }
 
-
 /*
   ////////////////////
   // MAIN ALGOMITHM //
diff --git a/apps/dtype-conv3d/kernel/bp-iconv3d.h b/apps/dtype-conv3d/kernel/bp-iconv3d.h
index cc9134303..f7ca39df6 100644
--- a/apps/dtype-conv3d/kernel/bp-iconv3d.h
+++ b/apps/dtype-conv3d/kernel/bp-iconv3d.h
@@ -19,10 +19,10 @@
 #ifndef ICONV3D_H
 #define ICONV3D_H
 
+#include "printf.h"
+#include "util.h"
 #include <stdint.h>
 #include <stdio.h>
-#include "util.h"
-#include "printf.h"
 
 // Threshold for FP numbers comparison during the final check
 #define THRESHOLD 0
@@ -34,19 +34,19 @@
 #define DATA_WIDTH "int8"
 
 void bp_iconv3d_CHx7x7(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F);
+                       int64_t C, int64_t F);
 
-void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F);
+void bp_iconv3d_CHx7x7_block(int8_t *o, int8_t *i, int8_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F);
 
-void bp_iconv3d_CHx7x7_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F);
+void bp_iconv3d_CHx7x7_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F);
 
 void bp_iconv3d_warm(int8_t *o, int8_t *i, int8_t *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F);
+                     int64_t n_, int64_t C, int64_t F);
 
 // Verify the matrices
-int bp_iconv3d_verify(int8_t *matrix, int8_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold);
+int bp_iconv3d_verify(int8_t *matrix, int8_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold);
 
 #endif
diff --git a/apps/dtype-conv3d/kernel/dp-fconv3d.c b/apps/dtype-conv3d/kernel/dp-fconv3d.c
index c487ea0f4..6c88644f8 100644
--- a/apps/dtype-conv3d/kernel/dp-fconv3d.c
+++ b/apps/dtype-conv3d/kernel/dp-fconv3d.c
@@ -53,8 +53,8 @@
 extern int64_t event_trigger;
 
 // Verify the matrices
-int dp_fconv3d_verify(double *matrix, double *golden_matrix, int64_t R, int64_t C,
-                  double threshold) {
+int dp_fconv3d_verify(double *matrix, double *golden_matrix, int64_t R,
+                      int64_t C, double threshold) {
   for (int r = 0; r < R; ++r)
     for (int c = 0; c < C; ++c)
       if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
@@ -67,7 +67,7 @@ int dp_fconv3d_verify(double *matrix, double *golden_matrix, int64_t R, int64_t
 }
 
 void dp_fconv3d_CHx7x7(double *o, double *i, double *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+                       int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -89,8 +89,8 @@ void dp_fconv3d_CHx7x7(double *o, double *i, double *f, int64_t M, int64_t N,
   }
 }
 
-void dp_fconv3d_CHx7x7_warm(double *o, double *i, double *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F) {
+void dp_fconv3d_CHx7x7_warm(double *o, double *i, double *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -112,8 +112,8 @@ void dp_fconv3d_CHx7x7_warm(double *o, double *i, double *f, int64_t M, int64_t
   }
 }
 
-void dp_fconv3d_CHx7x7_block(double *o, double *i, double *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+void dp_fconv3d_CHx7x7_block(double *o, double *i, double *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 3;
@@ -921,7 +921,7 @@ void dp_fconv3d_CHx7x7_block(double *o, double *i, double *f, int64_t M, int64_t
 }
 
 void dp_fconv3d_warm(double *o, double *i, double *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F) {
+                     int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 3;
diff --git a/apps/dtype-conv3d/kernel/dp-fconv3d.h b/apps/dtype-conv3d/kernel/dp-fconv3d.h
index 30f0955f8..20fe286d1 100644
--- a/apps/dtype-conv3d/kernel/dp-fconv3d.h
+++ b/apps/dtype-conv3d/kernel/dp-fconv3d.h
@@ -19,10 +19,10 @@
 #ifndef FCONV3D_H
 #define FCONV3D_H
 
+#include "printf.h"
+#include "util.h"
 #include <stdint.h>
 #include <stdio.h>
-#include "util.h"
-#include "printf.h"
 
 // Threshold for FP numbers comparison during the final check
 #define THRESHOLD 0.000000000001
@@ -34,19 +34,19 @@
 #define DATA_WIDTH "float64"
 
 void dp_fconv3d_CHx7x7(double *o, double *i, double *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F);
+                       int64_t C, int64_t F);
 
-void dp_fconv3d_CHx7x7_block(double *o, double *i, double *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F);
+void dp_fconv3d_CHx7x7_block(double *o, double *i, double *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F);
 
-void dp_fconv3d_CHx7x7_warm(double *o, double *i, double *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F);
+void dp_fconv3d_CHx7x7_warm(double *o, double *i, double *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F);
 
 void dp_fconv3d_warm(double *o, double *i, double *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F);
+                     int64_t n_, int64_t C, int64_t F);
 
 // Verify the matrices
-int dp_fconv3d_verify(double *matrix, double *golden_matrix, int64_t R, int64_t C,
-                  double threshold);
+int dp_fconv3d_verify(double *matrix, double *golden_matrix, int64_t R,
+                      int64_t C, double threshold);
 
 #endif
diff --git a/apps/dtype-conv3d/kernel/dp-iconv3d.c b/apps/dtype-conv3d/kernel/dp-iconv3d.c
index 3fe5a5595..2a0441786 100644
--- a/apps/dtype-conv3d/kernel/dp-iconv3d.c
+++ b/apps/dtype-conv3d/kernel/dp-iconv3d.c
@@ -53,11 +53,11 @@
   Change vse64.v and store instructions.
   Adjust pointer arithmetic:
   Adjust the stride values for pointer increments (ldo, ldi_pad)
-  ldo (likely stands for "load output"): This is the stride value used to move to the next row in the output matrix.
-  ldi_pad (likely stands for "load input padded"): This is the stride value used to move to the next row in the padded input matrix.
-  Adjust data type in arithmetic instructions:
-  vfmacc.vf for float, vmacc.vx for int
-  Same for add and slidedown instruction
+  ldo (likely stands for "load output"): This is the stride value used to move
+  to the next row in the output matrix. ldi_pad (likely stands for "load input
+  padded"): This is the stride value used to move to the next row in the padded
+  input matrix. Adjust data type in arithmetic instructions: vfmacc.vf for
+  float, vmacc.vx for int Same for add and slidedown instruction
 */
 
 #include "dp-iconv3d.h"
@@ -65,8 +65,8 @@
 extern int64_t event_trigger;
 
 // Verify the matrices
-int dp_iconv3d_verify(int64_t *matrix, int64_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold) {
+int dp_iconv3d_verify(int64_t *matrix, int64_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold) {
   for (int r = 0; r < R; ++r)
     for (int c = 0; c < C; ++c)
       if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
@@ -79,7 +79,7 @@ int dp_iconv3d_verify(int64_t *matrix, int64_t *golden_matrix, int64_t R, int64_
 }
 
 void dp_iconv3d_CHx7x7(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+                       int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -101,8 +101,8 @@ void dp_iconv3d_CHx7x7(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
   }
 }
 
-void dp_iconv3d_CHx7x7_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+void dp_iconv3d_CHx7x7_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -124,8 +124,8 @@ void dp_iconv3d_CHx7x7_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64
   }
 }
 
-void dp_iconv3d_CHx7x7_block(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+void dp_iconv3d_CHx7x7_block(int64_t *o, int64_t *i, int64_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 3;
@@ -930,9 +930,8 @@ void dp_iconv3d_CHx7x7_block(int64_t *o, int64_t *i, int64_t *f, int64_t M, int6
   asm volatile("vse64.v  v28, (%0); add %0, %0, %1" : "+&r"(o) : "r"(ldo));
 }
 
-
 void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+                     int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 2;
@@ -1029,7 +1028,6 @@ void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-
     // Main kernel, unrolled by 2
     for (int k = 0; k < F / 2; ++k) {
       // Two base indexes because of the unrolling
@@ -1065,7 +1063,6 @@ void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
 
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
-
     }
 
     // The very last iterations require mixing the instructions with the store
@@ -1082,7 +1079,6 @@ void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
   // Reuse preloaded coefficients
   // Buffer the next coefficients for faster use
 
-
   // Bump the input ptr
   i_ += 3 * (N + F - 1);
 
@@ -1147,43 +1143,41 @@ void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
 
       if (ch != C - 1) {
         int64_t base_idx_0 = (ch + 1) * fch_len;
-
       }
     }
-   }
+  }
 
-    // Bump the input ptr
-    i_ += N + F - 1;
+  // Bump the input ptr
+  i_ += N + F - 1;
 
 #ifdef VCD_DUMP
-    // Stop dumping VCD
-    event_trigger = -1;
+  // Stop dumping VCD
+  event_trigger = -1;
 #endif
 
-    //////////////
-    // UNROLL 1 //
-    //////////////
+  //////////////
+  // UNROLL 1 //
+  //////////////
 
-    // Loop on the channels
-    for (int ch = 0; ch < C; ++ch) {
+  // Loop on the channels
+  for (int ch = 0; ch < C; ++ch) {
 
-      // Point to the first element of the channel ch
-      i__ = i_ + ch * ich_len;
+    // Point to the first element of the channel ch
+    i__ = i_ + ch * ich_len;
 
-      // Start calculating the next pointers to the elements to be slided in
-      i_slide_ptr_1 = i__ + n_;
+    // Start calculating the next pointers to the elements to be slided in
+    i_slide_ptr_1 = i__ + n_;
 
-      for (int k = 0; k < F / 2; ++k) {
-        // Two base indexes because of the unrolling
-        // Point to the first element of the current column (k) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
-        // Point to the first element of the current column (k+1) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
+    for (int k = 0; k < F / 2; ++k) {
+      // Two base indexes because of the unrolling
+      // Point to the first element of the current column (k) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
+      // Point to the first element of the current column (k+1) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
     }
 
-
     // Bump the input ptr
     i_ += N + F - 1;
   }
@@ -1219,7 +1213,7 @@ void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
       if ((k | ch) == 0)
         asm volatile("vmul.vx v28, v0, %0" ::"r"(f[0 + base_idx_0]));
       else
-      asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
+        asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v14, v12, %0" ::"r"(*i_slide_ptr_3++));
 
@@ -1228,10 +1222,8 @@ void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v8, v10, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v12, v14, %0" ::"r"(*i_slide_ptr_3++));
     }
-
   }
 
-
   // Bump the input ptr
   i_ += 4 * (N + F - 1);
 
@@ -1274,7 +1266,6 @@ void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
   }
 }
 
-
 /*
   ////////////////////
   // MAIN ALGOMITHM //
diff --git a/apps/dtype-conv3d/kernel/dp-iconv3d.h b/apps/dtype-conv3d/kernel/dp-iconv3d.h
index bda3ad761..672b522fa 100644
--- a/apps/dtype-conv3d/kernel/dp-iconv3d.h
+++ b/apps/dtype-conv3d/kernel/dp-iconv3d.h
@@ -19,10 +19,10 @@
 #ifndef ICONV3D_H
 #define ICONV3D_H
 
+#include "printf.h"
+#include "util.h"
 #include <stdint.h>
 #include <stdio.h>
-#include "util.h"
-#include "printf.h"
 
 // Threshold for FP numbers comparison during the final check
 #define THRESHOLD 0
@@ -34,19 +34,19 @@
 #define DATA_WIDTH "int64"
 
 void dp_iconv3d_CHx7x7(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F);
+                       int64_t C, int64_t F);
 
-void dp_iconv3d_CHx7x7_block(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F);
+void dp_iconv3d_CHx7x7_block(int64_t *o, int64_t *i, int64_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F);
 
-void dp_iconv3d_CHx7x7_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F);
+void dp_iconv3d_CHx7x7_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F);
 
 void dp_iconv3d_warm(int64_t *o, int64_t *i, int64_t *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F);
+                     int64_t n_, int64_t C, int64_t F);
 
 // Verify the matrices
-int dp_iconv3d_verify(int64_t *matrix, int64_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold);
+int dp_iconv3d_verify(int64_t *matrix, int64_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold);
 
 #endif
diff --git a/apps/dtype-conv3d/kernel/hp-fconv3d.c b/apps/dtype-conv3d/kernel/hp-fconv3d.c
index 0219a2f49..c734f7498 100644
--- a/apps/dtype-conv3d/kernel/hp-fconv3d.c
+++ b/apps/dtype-conv3d/kernel/hp-fconv3d.c
@@ -51,11 +51,11 @@
   Change vse64.v and store instructions.
   Adjust pointer arithmetic:
   Adjust the stride values for pointer increments (ldo, ldi_pad)
-  ldo (likely stands for "load output"): This is the stride value used to move to the next row in the output matrix.
-  ldi_pad (likely stands for "load input padded"): This is the stride value used to move to the next row in the padded input matrix.
-  Adjust data type in arithmetic instructions:
-  vfmacc.vf for float, vmacc.vx for int
-  Same for add and slidedown instruction
+  ldo (likely stands for "load output"): This is the stride value used to move
+  to the next row in the output matrix. ldi_pad (likely stands for "load input
+  padded"): This is the stride value used to move to the next row in the padded
+  input matrix. Adjust data type in arithmetic instructions: vfmacc.vf for
+  float, vmacc.vx for int Same for add and slidedown instruction
 */
 
 #include "hp-fconv3d.h"
@@ -63,21 +63,21 @@
 extern int64_t event_trigger;
 
 // Verify the matrices
-int hp_fconv3d_verify(_Float16 *matrix, _Float16 *golden_matrix, int64_t R, int64_t C,
-                  _Float16 threshold) {
+int hp_fconv3d_verify(_Float16 *matrix, _Float16 *golden_matrix, int64_t R,
+                      int64_t C, _Float16 threshold) {
   for (int r = 0; r < R; ++r)
     for (int c = 0; c < C; ++c)
       if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
                             threshold)) {
         printf("Error: o[%d][%d] = %f, instead of %f\n", r, c,
-               (float) matrix[c + C * r], (float) golden_matrix[c + C * r]);
+               (float)matrix[c + C * r], (float)golden_matrix[c + C * r]);
         return 1;
       }
   return 0;
 }
 
-void hp_fconv3d_CHx7x7(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+void hp_fconv3d_CHx7x7(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                       int64_t N, int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -99,8 +99,8 @@ void hp_fconv3d_CHx7x7(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t
   }
 }
 
-void hp_fconv3d_CHx7x7_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+void hp_fconv3d_CHx7x7_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -122,8 +122,8 @@ void hp_fconv3d_CHx7x7_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, in
   }
 }
 
-void hp_fconv3d_CHx7x7_block(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+void hp_fconv3d_CHx7x7_block(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 1;
@@ -929,9 +929,8 @@ void hp_fconv3d_CHx7x7_block(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, i
   asm volatile("vse16.v  v28, (%0); add %0, %0, %1" : "+&r"(o) : "r"(ldo));
 }
 
-
-void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                     int64_t N, int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 1;
@@ -1028,7 +1027,6 @@ void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-
     // Main kernel, unrolled by 2
     for (int k = 0; k < F / 2; ++k) {
       // Two base indexes because of the unrolling
@@ -1064,7 +1062,6 @@ void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N
       asm volatile("vfslide1down.vf v6, v4, %0" ::"f"(*i_slide_ptr_1++));
 
       asm volatile("vfslide1down.vf v10, v8, %0" ::"f"(*i_slide_ptr_2++));
-
     }
 
     // The very last iterations require mixing the instructions with the store
@@ -1081,7 +1078,6 @@ void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N
   // Reuse preloaded coefficients
   // Buffer the next coefficients for faster use
 
-
   // Bump the input ptr
   i_ += 3 * (N + F - 1);
 
@@ -1146,43 +1142,41 @@ void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N
 
       if (ch != C - 1) {
         int64_t base_idx_0 = (ch + 1) * fch_len;
-
       }
     }
-   }
+  }
 
-    // Bump the input ptr
-    i_ += N + F - 1;
+  // Bump the input ptr
+  i_ += N + F - 1;
 
 #ifdef VCD_DUMP
-    // Stop dumping VCD
-    event_trigger = -1;
+  // Stop dumping VCD
+  event_trigger = -1;
 #endif
 
-    //////////////
-    // UNROLL 1 //
-    //////////////
+  //////////////
+  // UNROLL 1 //
+  //////////////
 
-    // Loop on the channels
-    for (int ch = 0; ch < C; ++ch) {
+  // Loop on the channels
+  for (int ch = 0; ch < C; ++ch) {
 
-      // Point to the first element of the channel ch
-      i__ = i_ + ch * ich_len;
+    // Point to the first element of the channel ch
+    i__ = i_ + ch * ich_len;
 
-      // Start calculating the next pointers to the elements to be slided in
-      i_slide_ptr_1 = i__ + n_;
+    // Start calculating the next pointers to the elements to be slided in
+    i_slide_ptr_1 = i__ + n_;
 
-      for (int k = 0; k < F / 2; ++k) {
-        // Two base indexes because of the unrolling
-        // Point to the first element of the current column (k) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
-        // Point to the first element of the current column (k+1) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
+    for (int k = 0; k < F / 2; ++k) {
+      // Two base indexes because of the unrolling
+      // Point to the first element of the current column (k) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
+      // Point to the first element of the current column (k+1) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
     }
 
-
     // Bump the input ptr
     i_ += N + F - 1;
   }
@@ -1218,7 +1212,7 @@ void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N
       if ((k | ch) == 0)
         asm volatile("vfmul.vf v28, v0, %0" ::"f"(f[0 + base_idx_0]));
       else
-      asm volatile("vfslide1down.vf v6, v4, %0" ::"f"(*i_slide_ptr_1++));
+        asm volatile("vfslide1down.vf v6, v4, %0" ::"f"(*i_slide_ptr_1++));
       asm volatile("vfslide1down.vf v10, v8, %0" ::"f"(*i_slide_ptr_2++));
       asm volatile("vfslide1down.vf v14, v12, %0" ::"f"(*i_slide_ptr_3++));
 
@@ -1227,10 +1221,8 @@ void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N
       asm volatile("vfslide1down.vf v8, v10, %0" ::"f"(*i_slide_ptr_2++));
       asm volatile("vfslide1down.vf v12, v14, %0" ::"f"(*i_slide_ptr_3++));
     }
-
   }
 
-
   // Bump the input ptr
   i_ += 4 * (N + F - 1);
 
@@ -1273,7 +1265,6 @@ void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N
   }
 }
 
-
 /*
   ////////////////////
   // MAIN ALGOMITHM //
diff --git a/apps/dtype-conv3d/kernel/hp-fconv3d.h b/apps/dtype-conv3d/kernel/hp-fconv3d.h
index 8c3aefb1e..76ed914b0 100644
--- a/apps/dtype-conv3d/kernel/hp-fconv3d.h
+++ b/apps/dtype-conv3d/kernel/hp-fconv3d.h
@@ -19,10 +19,10 @@
 #ifndef FCONV3D_H
 #define FCONV3D_H
 
-#include <stdint.h>
-#include <stdio.h>
 #include "printf.h"
 #include "util.h"
+#include <stdint.h>
+#include <stdio.h>
 
 // Threshold for FP numbers comparison during the final check
 #define THRESHOLD 1
@@ -33,20 +33,20 @@
 #define DTYPE_PREFIX "HP"
 #define DATA_WIDTH "float16"
 
-void hp_fconv3d_CHx7x7(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F);
+void hp_fconv3d_CHx7x7(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                       int64_t N, int64_t C, int64_t F);
 
-void hp_fconv3d_CHx7x7_block(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F);
+void hp_fconv3d_CHx7x7_block(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F);
 
-void hp_fconv3d_CHx7x7_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F);
+void hp_fconv3d_CHx7x7_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F);
 
-void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F);
+void hp_fconv3d_warm(_Float16 *o, _Float16 *i, _Float16 *f, int64_t M,
+                     int64_t N, int64_t n_, int64_t C, int64_t F);
 
 // Verify the matrices
-int hp_fconv3d_verify(_Float16 *matrix, _Float16 *golden_matrix, int64_t R, int64_t C,
-                  _Float16 threshold);
+int hp_fconv3d_verify(_Float16 *matrix, _Float16 *golden_matrix, int64_t R,
+                      int64_t C, _Float16 threshold);
 
 #endif
diff --git a/apps/dtype-conv3d/kernel/hp-iconv3d.c b/apps/dtype-conv3d/kernel/hp-iconv3d.c
index 46b33c73c..9fb180278 100644
--- a/apps/dtype-conv3d/kernel/hp-iconv3d.c
+++ b/apps/dtype-conv3d/kernel/hp-iconv3d.c
@@ -51,19 +51,19 @@
   Change vse64.v and store instructions.
   Adjust pointer arithmetic:
   Adjust the stride values for pointer increments (ldo, ldi_pad)
-  ldo (likely stands for "load output"): This is the stride value used to move to the next row in the output matrix.
-  ldi_pad (likely stands for "load input padded"): This is the stride value used to move to the next row in the padded input matrix.
-  Adjust data type in arithmetic instructions:
-  vfmacc.vf for float, vmacc.vx for int
-  Same for add and slidedown instruction
+  ldo (likely stands for "load output"): This is the stride value used to move
+  to the next row in the output matrix. ldi_pad (likely stands for "load input
+  padded"): This is the stride value used to move to the next row in the padded
+  input matrix. Adjust data type in arithmetic instructions: vfmacc.vf for
+  float, vmacc.vx for int Same for add and slidedown instruction
 */
 
 #include "hp-iconv3d.h"
 
 extern int64_t event_trigger;
 
-int hp_iconv3d_verify(int16_t *matrix, int16_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold) {
+int hp_iconv3d_verify(int16_t *matrix, int16_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold) {
   for (int r = 0; r < R; ++r)
     for (int c = 0; c < C; ++c)
       if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
@@ -76,7 +76,7 @@ int hp_iconv3d_verify(int16_t *matrix, int16_t *golden_matrix, int64_t R, int64_
 }
 
 void hp_iconv3d_CHx7x7(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+                       int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -98,8 +98,8 @@ void hp_iconv3d_CHx7x7(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
   }
 }
 
-void hp_iconv3d_CHx7x7_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+void hp_iconv3d_CHx7x7_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -121,8 +121,8 @@ void hp_iconv3d_CHx7x7_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64
   }
 }
 
-void hp_iconv3d_CHx7x7_block(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+void hp_iconv3d_CHx7x7_block(int16_t *o, int16_t *i, int16_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 1;
@@ -928,9 +928,8 @@ void hp_iconv3d_CHx7x7_block(int16_t *o, int16_t *i, int16_t *f, int64_t M, int6
   asm volatile("vse16.v  v28, (%0); add %0, %0, %1" : "+&r"(o) : "r"(ldo));
 }
 
-
 void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+                     int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 1;
@@ -1027,7 +1026,6 @@ void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-
     // Main kernel, unrolled by 2
     for (int k = 0; k < F / 2; ++k) {
       // Two base indexes because of the unrolling
@@ -1063,7 +1061,6 @@ void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
 
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
-
     }
 
     // The very last iterations require mixing the instructions with the store
@@ -1080,7 +1077,6 @@ void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
   // Reuse preloaded coefficients
   // Buffer the next coefficients for faster use
 
-
   // Bump the input ptr
   i_ += 3 * (N + F - 1);
 
@@ -1145,43 +1141,41 @@ void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
 
       if (ch != C - 1) {
         int64_t base_idx_0 = (ch + 1) * fch_len;
-
       }
     }
-   }
+  }
 
-    // Bump the input ptr
-    i_ += N + F - 1;
+  // Bump the input ptr
+  i_ += N + F - 1;
 
 #ifdef VCD_DUMP
-    // Stop dumping VCD
-    event_trigger = -1;
+  // Stop dumping VCD
+  event_trigger = -1;
 #endif
 
-    //////////////
-    // UNROLL 1 //
-    //////////////
+  //////////////
+  // UNROLL 1 //
+  //////////////
 
-    // Loop on the channels
-    for (int ch = 0; ch < C; ++ch) {
+  // Loop on the channels
+  for (int ch = 0; ch < C; ++ch) {
 
-      // Point to the first element of the channel ch
-      i__ = i_ + ch * ich_len;
+    // Point to the first element of the channel ch
+    i__ = i_ + ch * ich_len;
 
-      // Start calculating the next pointers to the elements to be slided in
-      i_slide_ptr_1 = i__ + n_;
+    // Start calculating the next pointers to the elements to be slided in
+    i_slide_ptr_1 = i__ + n_;
 
-      for (int k = 0; k < F / 2; ++k) {
-        // Two base indexes because of the unrolling
-        // Point to the first element of the current column (k) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
-        // Point to the first element of the current column (k+1) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
+    for (int k = 0; k < F / 2; ++k) {
+      // Two base indexes because of the unrolling
+      // Point to the first element of the current column (k) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
+      // Point to the first element of the current column (k+1) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
     }
 
-
     // Bump the input ptr
     i_ += N + F - 1;
   }
@@ -1217,7 +1211,7 @@ void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
       if ((k | ch) == 0)
         asm volatile("vmul.vx v28, v0, %0" ::"r"(f[0 + base_idx_0]));
       else
-      asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
+        asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v14, v12, %0" ::"r"(*i_slide_ptr_3++));
 
@@ -1226,10 +1220,8 @@ void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v8, v10, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v12, v14, %0" ::"r"(*i_slide_ptr_3++));
     }
-
   }
 
-
   // Bump the input ptr
   i_ += 4 * (N + F - 1);
 
@@ -1272,7 +1264,6 @@ void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
   }
 }
 
-
 /*
   ////////////////////
   // MAIN ALGOMITHM //
diff --git a/apps/dtype-conv3d/kernel/hp-iconv3d.h b/apps/dtype-conv3d/kernel/hp-iconv3d.h
index f10950489..2727a0057 100644
--- a/apps/dtype-conv3d/kernel/hp-iconv3d.h
+++ b/apps/dtype-conv3d/kernel/hp-iconv3d.h
@@ -19,10 +19,10 @@
 #ifndef ICONV3D_H
 #define ICONV3D_H
 
+#include "printf.h"
+#include "util.h"
 #include <stdint.h>
 #include <stdio.h>
-#include "util.h"
-#include "printf.h"
 
 // Threshold for FP numbers comparison during the final check
 #define THRESHOLD 0
@@ -34,19 +34,19 @@
 #define DATA_WIDTH "int16"
 
 void hp_iconv3d_CHx7x7(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F);
+                       int64_t C, int64_t F);
 
-void hp_iconv3d_CHx7x7_block(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F);
+void hp_iconv3d_CHx7x7_block(int16_t *o, int16_t *i, int16_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F);
 
-void hp_iconv3d_CHx7x7_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F);
+void hp_iconv3d_CHx7x7_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F);
 
 void hp_iconv3d_warm(int16_t *o, int16_t *i, int16_t *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F);
+                     int64_t n_, int64_t C, int64_t F);
 
 // Verify the matrices
-int hp_iconv3d_verify(int16_t *matrix, int16_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold);
+int hp_iconv3d_verify(int16_t *matrix, int16_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold);
 
 #endif
diff --git a/apps/dtype-conv3d/kernel/sp-fconv3d.c b/apps/dtype-conv3d/kernel/sp-fconv3d.c
index 095331ce8..5625d23a6 100644
--- a/apps/dtype-conv3d/kernel/sp-fconv3d.c
+++ b/apps/dtype-conv3d/kernel/sp-fconv3d.c
@@ -51,11 +51,11 @@
   Change vse64.v and store instructions.
   Adjust pointer arithmetic:
   Adjust the stride values for pointer increments (ldo, ldi_pad)
-  ldo (likely stands for "load output"): This is the stride value used to move to the next row in the output matrix.
-  ldi_pad (likely stands for "load input padded"): This is the stride value used to move to the next row in the padded input matrix.
-  Adjust data type in arithmetic instructions:
-  vfmacc.vf for float, vmacc.vx for int
-  Same for add and slidedown instruction
+  ldo (likely stands for "load output"): This is the stride value used to move
+  to the next row in the output matrix. ldi_pad (likely stands for "load input
+  padded"): This is the stride value used to move to the next row in the padded
+  input matrix. Adjust data type in arithmetic instructions: vfmacc.vf for
+  float, vmacc.vx for int Same for add and slidedown instruction
 */
 
 #include "sp-fconv3d.h"
@@ -65,7 +65,7 @@ extern int64_t event_trigger;
 // Verify the matrices
 
 int sp_fconv3d_verify(float *matrix, float *golden_matrix, int64_t R, int64_t C,
-                  float threshold) {
+                      float threshold) {
   for (int r = 0; r < R; ++r)
     for (int c = 0; c < C; ++c)
       if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
@@ -78,24 +78,27 @@ int sp_fconv3d_verify(float *matrix, float *golden_matrix, int64_t R, int64_t C,
 }
 
 /*
-int sp_fconv3d_verify(float *matrix, float *golden_matrix, int64_t R, int64_t C, float threshold) {
-  for (int64_t r = 0; r < R; ++r) {
-    for (int64_t c = 0; c < C; ++c) {
+int sp_fconv3d_verify(float *matrix, float *golden_matrix, int64_t R, int64_t C,
+float threshold) { for (int64_t r = 0; r < R; ++r) { for (int64_t c = 0; c < C;
+++c) {
       // if (1) {
-      if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r], threshold)) {
+      if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
+threshold)) {
         // Convert double to integer parts for matrix value
         int32_t mat_integer_part = (int32_t)matrix[c + C * r];
-        int32_t mat_fractional_part = (int32_t)((matrix[c + C * r] - mat_integer_part) * 1000000);
-        if (mat_fractional_part < 0) mat_fractional_part = -mat_fractional_part;
+        int32_t mat_fractional_part = (int32_t)((matrix[c + C * r] -
+mat_integer_part) * 1000000); if (mat_fractional_part < 0) mat_fractional_part =
+-mat_fractional_part;
 
         // Convert double to integer parts for golden matrix value
         int32_t gold_integer_part = (int32_t)golden_matrix[c + C * r];
-        int32_t gold_fractional_part = (int32_t)((golden_matrix[c + C * r] - gold_integer_part) * 1000000);
-        if (gold_fractional_part < 0) gold_fractional_part = -gold_fractional_part;
+        int32_t gold_fractional_part = (int32_t)((golden_matrix[c + C * r] -
+gold_integer_part) * 1000000); if (gold_fractional_part < 0)
+gold_fractional_part = -gold_fractional_part;
 
         printf("Error: o[%lld][%lld] = %lld.%06lld, instead of %lld.%06lld\n",
-               r, c, mat_integer_part, mat_fractional_part, gold_integer_part, gold_fractional_part);
-        return 1;
+               r, c, mat_integer_part, mat_fractional_part, gold_integer_part,
+gold_fractional_part); return 1;
       }
     }
   }
@@ -104,7 +107,7 @@ int sp_fconv3d_verify(float *matrix, float *golden_matrix, int64_t R, int64_t C,
 */
 
 void sp_fconv3d_CHx7x7(float *o, float *i, float *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+                       int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -127,7 +130,7 @@ void sp_fconv3d_CHx7x7(float *o, float *i, float *f, int64_t M, int64_t N,
 }
 
 void sp_fconv3d_CHx7x7_warm(float *o, float *i, float *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+                            int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -150,7 +153,7 @@ void sp_fconv3d_CHx7x7_warm(float *o, float *i, float *f, int64_t M, int64_t N,
 }
 
 void sp_fconv3d_CHx7x7_block(float *o, float *i, float *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+                             int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 2;
@@ -956,9 +959,8 @@ void sp_fconv3d_CHx7x7_block(float *o, float *i, float *f, int64_t M, int64_t N,
   asm volatile("vse32.v  v28, (%0); add %0, %0, %1" : "+&r"(o) : "r"(ldo));
 }
 
-
 void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+                     int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 2;
@@ -1055,7 +1057,6 @@ void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-
     // Main kernel, unrolled by 2
     for (int k = 0; k < F / 2; ++k) {
       // Two base indexes because of the unrolling
@@ -1091,7 +1092,6 @@ void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
       asm volatile("vfslide1down.vf v6, v4, %0" ::"f"(*i_slide_ptr_1++));
 
       asm volatile("vfslide1down.vf v10, v8, %0" ::"f"(*i_slide_ptr_2++));
-
     }
 
     // The very last iterations require mixing the instructions with the store
@@ -1108,7 +1108,6 @@ void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
   // Reuse preloaded coefficients
   // Buffer the next coefficients for faster use
 
-
   // Bump the input ptr
   i_ += 3 * (N + F - 1);
 
@@ -1173,43 +1172,41 @@ void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
 
       if (ch != C - 1) {
         int64_t base_idx_0 = (ch + 1) * fch_len;
-
       }
     }
-   }
+  }
 
-    // Bump the input ptr
-    i_ += N + F - 1;
+  // Bump the input ptr
+  i_ += N + F - 1;
 
 #ifdef VCD_DUMP
-    // Stop dumping VCD
-    event_trigger = -1;
+  // Stop dumping VCD
+  event_trigger = -1;
 #endif
 
-    //////////////
-    // UNROLL 1 //
-    //////////////
+  //////////////
+  // UNROLL 1 //
+  //////////////
 
-    // Loop on the channels
-    for (int ch = 0; ch < C; ++ch) {
+  // Loop on the channels
+  for (int ch = 0; ch < C; ++ch) {
 
-      // Point to the first element of the channel ch
-      i__ = i_ + ch * ich_len;
+    // Point to the first element of the channel ch
+    i__ = i_ + ch * ich_len;
 
-      // Start calculating the next pointers to the elements to be slided in
-      i_slide_ptr_1 = i__ + n_;
+    // Start calculating the next pointers to the elements to be slided in
+    i_slide_ptr_1 = i__ + n_;
 
-      for (int k = 0; k < F / 2; ++k) {
-        // Two base indexes because of the unrolling
-        // Point to the first element of the current column (k) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
-        // Point to the first element of the current column (k+1) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
+    for (int k = 0; k < F / 2; ++k) {
+      // Two base indexes because of the unrolling
+      // Point to the first element of the current column (k) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
+      // Point to the first element of the current column (k+1) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
     }
 
-
     // Bump the input ptr
     i_ += N + F - 1;
   }
@@ -1245,7 +1242,7 @@ void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
       if ((k | ch) == 0)
         asm volatile("vfmul.vf v28, v0, %0" ::"f"(f[0 + base_idx_0]));
       else
-      asm volatile("vfslide1down.vf v6, v4, %0" ::"f"(*i_slide_ptr_1++));
+        asm volatile("vfslide1down.vf v6, v4, %0" ::"f"(*i_slide_ptr_1++));
       asm volatile("vfslide1down.vf v10, v8, %0" ::"f"(*i_slide_ptr_2++));
       asm volatile("vfslide1down.vf v14, v12, %0" ::"f"(*i_slide_ptr_3++));
 
@@ -1254,10 +1251,8 @@ void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
       asm volatile("vfslide1down.vf v8, v10, %0" ::"f"(*i_slide_ptr_2++));
       asm volatile("vfslide1down.vf v12, v14, %0" ::"f"(*i_slide_ptr_3++));
     }
-
   }
 
-
   // Bump the input ptr
   i_ += 4 * (N + F - 1);
 
@@ -1300,7 +1295,6 @@ void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
   }
 }
 
-
 /*
   ////////////////////
   // MAIN ALGOMITHM //
diff --git a/apps/dtype-conv3d/kernel/sp-fconv3d.h b/apps/dtype-conv3d/kernel/sp-fconv3d.h
index fb5ae3672..9dcfb6b19 100644
--- a/apps/dtype-conv3d/kernel/sp-fconv3d.h
+++ b/apps/dtype-conv3d/kernel/sp-fconv3d.h
@@ -19,10 +19,10 @@
 #ifndef FCONV3D_H
 #define FCONV3D_H
 
-#include <stdint.h>
-#include <stdio.h>
 #include "printf.h"
 #include "util.h"
+#include <stdint.h>
+#include <stdio.h>
 
 // Threshold for FP numbers comparison during the final check
 #define THRESHOLD 0.0001
@@ -34,19 +34,19 @@
 #define DATA_WIDTH "float32"
 
 void sp_fconv3d_CHx7x7(float *o, float *i, float *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F);
+                       int64_t C, int64_t F);
 
 void sp_fconv3d_CHx7x7_block(float *o, float *i, float *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F);
+                             int64_t n_, int64_t C, int64_t F);
 
 void sp_fconv3d_CHx7x7_warm(float *o, float *i, float *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F);
+                            int64_t C, int64_t F);
 
 void sp_fconv3d_warm(float *o, float *i, float *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F);
+                     int64_t n_, int64_t C, int64_t F);
 
 // Verify the matrices
 int sp_fconv3d_verify(float *matrix, float *golden_matrix, int64_t R, int64_t C,
-                  float threshold);
+                      float threshold);
 
 #endif
diff --git a/apps/dtype-conv3d/kernel/sp-iconv3d.c b/apps/dtype-conv3d/kernel/sp-iconv3d.c
index 9a53ef474..621430645 100644
--- a/apps/dtype-conv3d/kernel/sp-iconv3d.c
+++ b/apps/dtype-conv3d/kernel/sp-iconv3d.c
@@ -51,19 +51,19 @@
   Change vse64.v and store instructions.
   Adjust pointer arithmetic:
   Adjust the stride values for pointer increments (ldo, ldi_pad)
-  ldo (likely stands for "load output"): This is the stride value used to move to the next row in the output matrix.
-  ldi_pad (likely stands for "load input padded"): This is the stride value used to move to the next row in the padded input matrix.
-  Adjust data type in arithmetic instructions:
-  vfmacc.vf for float, vmacc.vx for int
-  Same for add and slidedown instruction
+  ldo (likely stands for "load output"): This is the stride value used to move
+  to the next row in the output matrix. ldi_pad (likely stands for "load input
+  padded"): This is the stride value used to move to the next row in the padded
+  input matrix. Adjust data type in arithmetic instructions: vfmacc.vf for
+  float, vmacc.vx for int Same for add and slidedown instruction
 */
 
 #include "sp-iconv3d.h"
 
 extern int64_t event_trigger;
 
-int sp_iconv3d_verify(int32_t *matrix, int32_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold) {
+int sp_iconv3d_verify(int32_t *matrix, int32_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold) {
   for (int r = 0; r < R; ++r)
     for (int c = 0; c < C; ++c)
       if (!similarity_check(matrix[c + C * r], golden_matrix[c + C * r],
@@ -76,7 +76,7 @@ int sp_iconv3d_verify(int32_t *matrix, int32_t *golden_matrix, int64_t R, int64_
 }
 
 void sp_iconv3d_CHx7x7(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+                       int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -98,8 +98,8 @@ void sp_iconv3d_CHx7x7(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
   }
 }
 
-void sp_iconv3d_CHx7x7_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F) {
+void sp_iconv3d_CHx7x7_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F) {
 
   unsigned long int block_size_n;
 
@@ -121,8 +121,8 @@ void sp_iconv3d_CHx7x7_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64
   }
 }
 
-void sp_iconv3d_CHx7x7_block(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+void sp_iconv3d_CHx7x7_block(int32_t *o, int32_t *i, int32_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 2;
@@ -928,9 +928,8 @@ void sp_iconv3d_CHx7x7_block(int32_t *o, int32_t *i, int32_t *f, int64_t M, int6
   asm volatile("vse32.v  v28, (%0); add %0, %0, %1" : "+&r"(o) : "r"(ldo));
 }
 
-
 void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F) {
+                     int64_t n_, int64_t C, int64_t F) {
 
   // Helper variables
   int64_t ldo = N << 2;
@@ -1027,7 +1026,6 @@ void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
     i_slide_ptr_1 = i__ + n_ + 1 * (N + F - 1);
     i_slide_ptr_2 = i__ + n_ + 2 * (N + F - 1);
 
-
     // Main kernel, unrolled by 2
     for (int k = 0; k < F / 2; ++k) {
       // Two base indexes because of the unrolling
@@ -1063,7 +1061,6 @@ void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
 
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
-
     }
 
     // The very last iterations require mixing the instructions with the store
@@ -1080,7 +1077,6 @@ void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
   // Reuse preloaded coefficients
   // Buffer the next coefficients for faster use
 
-
   // Bump the input ptr
   i_ += 3 * (N + F - 1);
 
@@ -1145,43 +1141,41 @@ void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
 
       if (ch != C - 1) {
         int64_t base_idx_0 = (ch + 1) * fch_len;
-
       }
     }
-   }
+  }
 
-    // Bump the input ptr
-    i_ += N + F - 1;
+  // Bump the input ptr
+  i_ += N + F - 1;
 
 #ifdef VCD_DUMP
-    // Stop dumping VCD
-    event_trigger = -1;
+  // Stop dumping VCD
+  event_trigger = -1;
 #endif
 
-    //////////////
-    // UNROLL 1 //
-    //////////////
+  //////////////
+  // UNROLL 1 //
+  //////////////
 
-    // Loop on the channels
-    for (int ch = 0; ch < C; ++ch) {
+  // Loop on the channels
+  for (int ch = 0; ch < C; ++ch) {
 
-      // Point to the first element of the channel ch
-      i__ = i_ + ch * ich_len;
+    // Point to the first element of the channel ch
+    i__ = i_ + ch * ich_len;
 
-      // Start calculating the next pointers to the elements to be slided in
-      i_slide_ptr_1 = i__ + n_;
+    // Start calculating the next pointers to the elements to be slided in
+    i_slide_ptr_1 = i__ + n_;
 
-      for (int k = 0; k < F / 2; ++k) {
-        // Two base indexes because of the unrolling
-        // Point to the first element of the current column (k) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
-        // Point to the first element of the current column (k+1) of the current
-        // channel (ch) of the filter (f)
-        int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
+    for (int k = 0; k < F / 2; ++k) {
+      // Two base indexes because of the unrolling
+      // Point to the first element of the current column (k) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_0 = (2 * k + 2) + (ch * fch_len);
+      // Point to the first element of the current column (k+1) of the current
+      // channel (ch) of the filter (f)
+      int64_t base_idx_1 = (2 * k + 1) + (ch * fch_len);
     }
 
-
     // Bump the input ptr
     i_ += N + F - 1;
   }
@@ -1217,7 +1211,7 @@ void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
       if ((k | ch) == 0)
         asm volatile("vmul.vx v28, v0, %0" ::"r"(f[0 + base_idx_0]));
       else
-      asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
+        asm volatile("vslide1down.vx v6, v4, %0" ::"r"(*i_slide_ptr_1++));
       asm volatile("vslide1down.vx v10, v8, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v14, v12, %0" ::"r"(*i_slide_ptr_3++));
 
@@ -1226,10 +1220,8 @@ void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
       asm volatile("vslide1down.vx v8, v10, %0" ::"r"(*i_slide_ptr_2++));
       asm volatile("vslide1down.vx v12, v14, %0" ::"r"(*i_slide_ptr_3++));
     }
-
   }
 
-
   // Bump the input ptr
   i_ += 4 * (N + F - 1);
 
@@ -1272,7 +1264,6 @@ void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
   }
 }
 
-
 /*
   ////////////////////
   // MAIN ALGOMITHM //
diff --git a/apps/dtype-conv3d/kernel/sp-iconv3d.h b/apps/dtype-conv3d/kernel/sp-iconv3d.h
index c5c6b5066..cbe001401 100644
--- a/apps/dtype-conv3d/kernel/sp-iconv3d.h
+++ b/apps/dtype-conv3d/kernel/sp-iconv3d.h
@@ -19,10 +19,10 @@
 #ifndef ICONV3D_H
 #define ICONV3D_H
 
+#include "printf.h"
+#include "util.h"
 #include <stdint.h>
 #include <stdio.h>
-#include "util.h"
-#include "printf.h"
 
 // Threshold for FP numbers comparison during the final check
 #define THRESHOLD 0
@@ -34,19 +34,19 @@
 #define DATA_WIDTH "int32"
 
 void sp_iconv3d_CHx7x7(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                    int64_t C, int64_t F);
+                       int64_t C, int64_t F);
 
-void sp_iconv3d_CHx7x7_block(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                          int64_t n_, int64_t C, int64_t F);
+void sp_iconv3d_CHx7x7_block(int32_t *o, int32_t *i, int32_t *f, int64_t M,
+                             int64_t N, int64_t n_, int64_t C, int64_t F);
 
-void sp_iconv3d_CHx7x7_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                         int64_t C, int64_t F);
+void sp_iconv3d_CHx7x7_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M,
+                            int64_t N, int64_t C, int64_t F);
 
 void sp_iconv3d_warm(int32_t *o, int32_t *i, int32_t *f, int64_t M, int64_t N,
-                  int64_t n_, int64_t C, int64_t F);
+                     int64_t n_, int64_t C, int64_t F);
 
 // Verify the matrices
-int sp_iconv3d_verify(int32_t *matrix, int32_t *golden_matrix, int64_t R, int64_t C,
-                  int64_t threshold);
+int sp_iconv3d_verify(int32_t *matrix, int32_t *golden_matrix, int64_t R,
+                      int64_t C, int64_t threshold);
 
 #endif
diff --git a/apps/dtype-conv3d/main.c b/apps/dtype-conv3d/main.c
index da6f80b4a..53b1ba640 100644
--- a/apps/dtype-conv3d/main.c
+++ b/apps/dtype-conv3d/main.c
@@ -125,8 +125,8 @@ int main() {
   float utilization = 100 * performance / (2.0 * NR_LANES * DTYPE_FACTOR);
 
   printf("The execution took %d cycles.\n", runtime);
-  printf("The performance is %f %s-OP/cycle (%f%% utilization).\n",
-         performance, DTYPE_PREFIX, utilization);
+  printf("The performance is %f %s-OP/cycle (%f%% utilization).\n", performance,
+         DTYPE_PREFIX, utilization);
 
   // Verify correctness
   printf("Verifying result...\n");