Merge the logic of quantize and q_leaky_relu into one

KastnerRG · Sep 16, 2023 · 6e96871 · 6e96871
1 parent 3103116
commit 6e96871
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 39 deletions.
diff --git a/c/model.h b/c/model.h
@@ -1,25 +1,25 @@
 #define N_BUNDLES 7
 Bundle_t bundles [N_BUNDLES] = {
    {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=140, .w_bpt_p0=140, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 },
-   {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 },
-   {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=40, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 },
-   {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 },
-   {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=74, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 },
-   {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=1, .b_offset=98, .b_val_shift=9, .b_bias_shift=0, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 },
-   {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=170, .b_val_shift=9, .b_bias_shift=0, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
+   {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 },
+   {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 },
+   {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 },
+   {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=34, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 },
+   {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=0, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 },
+   {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
 };
 
 #define X_BITS_L2   2
 #define W_BITS_L2   2
 #define PE_ROWS     8
 #define PE_COLS     24
 
-#define WB_BYTES    98436
+#define WB_BYTES    98212
 #define W_BYTES     98048
 #define X_BYTES     2520
 #define X_BYTES_ALL 120040
 #define Y_BYTES     294920
 #define B_TYPE      signed short
-#define B_WORDS     194
+#define B_WORDS     82
 #define DATA_DIR   "D:/dnn-engine/test/vectors"
 
diff --git a/test/py/bundle.py b/test/py/bundle.py
@@ -73,10 +73,18 @@ def extract_act(signature):
             frac = d['bits']-int_bit-sign_bit
 
             if isinstance(ilayer.quantizer, quantized_bits):
-                return { 'layer':ilayer, 'type':'quant', 'bits':d['bits'], 'frac':frac}
+                return { 'layer':ilayer, 'type':'quant', 'bits':d['bits'], 'frac':frac, 'plog_slope': 0, 'non_zero':1}
             elif 'relu' in str(ilayer.quantizer.__class__) and ilayer.quantizer.negative_slope != 0:
-                return { 'layer':ilayer, 'type':'relu', 'slope':ilayer.quantizer.negative_slope, 'bits':d['bits'], 'frac':frac}
+                slope = ilayer.quantizer.negative_slope
+                if slope == 0:
+                    assert ilayer.quantizer.bits != 1, "Error: Cannot use bits=1 with Relu. Use leaky_relu. Reason: Qkeras keeps relu signed"
+                    ilayer.quantizer.bits -= 1
+                non_zero = 1*(slope != 0)
+                log_slope = np.log2(slope) if non_zero else 0
+                assert int(log_slope) == log_slope and log_slope <= 0, f"Error: negative_slope:{slope} of leaky_relu has to be a negative power of two. eg.0.125"
+                return { 'layer':ilayer, 'type':'relu', 'bits':d['bits'], 'frac':frac, 'slope':ilayer.quantizer.negative_slope, 'plog_slope':-int(log_slope), 'non_zero':non_zero}
             else:
+                # TODO: support relu (slope=0). Qkeras uses different range for relu
                 raise Exception("Only leaky_relu (relu with negative_slope > 0) is suppported!")
 
         '''
@@ -282,25 +290,15 @@ def div_round(n,d):
             return (n + (d//2) - (~(d|n//d) &1)) // d
 
         def apply_act(act_dict):
+            assert act_dict['type'] in ['quant', 'relu'], 'Error: Only quant & relu are supported yet'
+
             x = self.proc['int'].astype(np.int32)
-            frac, bits = act_dict['frac'], act_dict['bits']
-
-            if act_dict['type'] == 'quant':
-                shift_bits = self.proc['frac']-frac
-
-                x = shift_round(x, shift_bits) # = np.around(x/2**shift_bits)
-                x = np.clip(x, -2**(bits-1), 2**(bits-1)-1).astype(int)
-
-            elif act_dict['type'] == 'relu':
-                log_act = -int(np.log2(act_dict['slope']))
-                assert log_act == -np.log2(act_dict['slope']), f"Leaky Relu slope: {act_dict['slope']} should be a power of two (eg:0.125)"
-                shift_bits = log_act + self.proc['frac']-frac
-
-                x = (x<0)*x + (((x>0)*x) << log_act)
-                x = shift_round(x, shift_bits) # = np.around(x/2**shift_bits)
-                x = np.clip(x, -2**(bits-log_act-1), 2**(bits-1)-1).astype(int)
-            else:
-                raise Exception('Only relu is supported yet')
+            frac, bits, plog_slope, non_zero = act_dict['frac'], act_dict['bits'], act_dict['plog_slope'], act_dict['non_zero']
+            shift_bits = plog_slope + self.proc['frac']-frac
+
+            x = ((x<0)*x)*non_zero + (((x>0)*x) << plog_slope)
+            x = shift_round(x, shift_bits) # = np.around(x/2**shift_bits)
+            x = np.clip(x, -2**(bits-plog_slope-1), 2**(bits-1)-1).astype(int)
 
             self.proc['int'], self.proc['bits'], self.proc['frac'] = x, bits, frac
 

diff --git a/test/py/param_test.py b/test/py/param_test.py
@@ -159,6 +159,8 @@ def compile(c):
 class Config:
     K : int
     CO: int
+    is_bias: bool
+    act_q: str
     flatten: bool = False
     dense: bool = False
 
@@ -185,34 +187,34 @@ class Config:
                                                 READY_PROB = [1],
                                             )))
 def test_dnn_engine(COMPILE):
+    c = make_compile_params(COMPILE)
 
     input_shape = (8,16,8,3) # (XN, XH, XW, CI)
     model_config = [
-        Config(11, 16),
-        Config(1, 16),
-        Config(7, 16),
-        Config(5, 16),
-        Config(3, 24),
-        Config(1, 50, flatten=True),
-        Config(1, 10, dense= True),
+        Config(11, 16, True , f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'),
+        Config(1 , 16, False, f'quantized_bits({c.K_BITS},0,False,True,1)'),
+        Config(7 , 16, True , f'quantized_bits({c.K_BITS},0,False,True,1)'),
+        Config(5 , 16, False, f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'),
+        Config(3 , 24, True , f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'),
+        Config(1 , 50, False, f'quantized_relu({c.X_BITS},0,negative_slope=0.125)', flatten=True),
+        Config(1 , 10, True , f'quantized_relu({c.X_BITS},0,negative_slope=0.125)', dense= True),
     ]
 
     '''
     Build Model
     '''
-    c = make_compile_params(COMPILE)
     assert c.X_BITS in [1,2,4,8] and c.K_BITS in [1,2,4,8], "X_BITS and K_BITS should be in [1,2,4,8]"
     assert c.B_BITS in [8,16,32], "B_BITS should be in [8,16,32]"
-    xq, kq, bq, aq = f'quantized_bits({c.X_BITS},0,False,True,1)', f'quantized_bits({c.K_BITS},0,False,True,1)', f'quantized_bits({c.B_BITS},0,False,True,1)', f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'
+    xq, kq, bq = f'quantized_bits({c.X_BITS},0,False,True,1)', f'quantized_bits({c.K_BITS},0,False,True,1)', f'quantized_bits({c.B_BITS},0,False,True,1)'
     inp = {'bits':c.X_BITS, 'frac':c.X_BITS-1}
 
     x = x_in = Input(input_shape[1:], name='input')
     x = QActivation(xq)(x)
     for i, g in enumerate(model_config):
         if g.dense:
-            d = {'core': {'type':'dense', 'units':g.CO, 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True, 'act_str':aq}}
+            d = {'core': {'type':'dense', 'units':g.CO, 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':g.is_bias, 'act_str':g.act_q}}
         else:
-            d = {'core': {'type':'conv', 'filters':g.CO, 'kernel_size':(g.K,g.K), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':True, 'act_str':aq}, 'flatten':g.flatten,}
+            d = {'core': {'type':'conv', 'filters':g.CO, 'kernel_size':(g.K,g.K), 'strides':(1,1), 'padding':'same', 'kernel_quantizer':kq, 'bias_quantizer':bq, 'use_bias':g.is_bias, 'act_str':g.act_q}, 'flatten':g.flatten,}
         x = Bundle(**d)(x)
 
     model = Model(inputs=x_in, outputs=x)