diff --git a/test/Ne16MemoryLayout.py b/test/Ne16MemoryLayout.py
index 30729ab..db76fb1 100644
--- a/test/Ne16MemoryLayout.py
+++ b/test/Ne16MemoryLayout.py
@@ -63,15 +63,14 @@ def weightEncode(
         # (cout, cinMajor, Bits, flattened spatial, cinMinor)
         weight = weight.transpose(0, 1, 4, 3, 2)
 
-        # Prepare for packing
-        # (cout, cinMajor, Bits, flattened spatial, cinMinorBytes, 8)
-        cinMinorBytes = int(np.ceil(cinMinor / 8))
-        weight = np.stack(np.split(weight, cinMinorBytes, axis=-1), axis=-2)
-
-        # Pack
-        # (cout, cinMajor, Bits, flattened spatial, cinMinorBytes)
+        # Pack bits
+        # (-1, 8)
+        weight = weight.reshape(-1, 8)
+        # (-1, 1)
         weight = np.packbits(weight, axis=-1, bitorder="little")
 
+        # Flatten the weights
+        # (-1, )
         return weight.flatten()
 
     @staticmethod
diff --git a/test/NeurekaMemoryLayout.py b/test/NeurekaMemoryLayout.py
index 028c7a3..a9acb4c 100644
--- a/test/NeurekaMemoryLayout.py
+++ b/test/NeurekaMemoryLayout.py
@@ -105,15 +105,14 @@ def weightEncode(
                 cout * cinMajor, NeurekaMemoryLayout._WEIGHT_BANDWIDTH
             )  # cout*cinMajor, 256b
 
-        # Prepare for packing
-        # (-1, Weight Bandwidth Bytes, 8)
-        weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH / 8))
-        weight = np.stack(np.split(weight, weightBandwidthBytes, axis=-1), axis=-2)
-
         # Pack bits
-        # (-1, Weight Bandwidth Bytes)
+        # (-1, 8)
+        weight = weight.reshape(-1, 8)
+        # (-1, 1)
         weight = np.packbits(weight, axis=-1, bitorder="little")
 
+        # Flatten the weights
+        # (-1, )
         return weight.flatten()
 
     @staticmethod