diff --git a/c/model.h b/c/model.h
index ce3bffb..47b24d3 100644
--- a/c/model.h
+++ b/c/model.h
@@ -1,23 +1,23 @@
#define N_BUNDLES 7
Bundle_t bundles [N_BUNDLES] = {
- {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=140, .w_bpt_p0=140, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 },
- {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=3, .ca_pl_scale=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 },
- {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 },
- {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=6, .ca_pl_scale=3, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 },
- {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=34, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 },
- {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=0, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=6, .ca_pl_scale=3, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 },
- {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=6400, .co=10, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
+ {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=272, .w_bpt_p0=272, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 },
+ {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=392, .w_bpt_p0=392, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=7, .ca_pl_scale=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 },
+ {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=344, .w_bpt_p0=344, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 },
+ {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=488, .w_bpt_p0=488, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 },
+ {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=440, .w_bpt_p0=296, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 },
+ {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=488, .w_bpt_p0=104, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=0, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 },
+ {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=6400, .co=10, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=488, .w_bpt_p0=488, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
};
#define X_BITS_L2 2
-#define W_BITS_L2 2
+#define W_BITS_L2 3
#define X_PAD 5
#define KH_MAX 11
#define PE_ROWS 8
#define PE_COLS 24
-#define WB_BYTES 98212
-#define W_BYTES 98048
+#define WB_BYTES 192868
+#define W_BYTES 192704
#define X_BYTES 2520
#define X_BYTES_ALL 120040
#define Y_BYTES 294920
diff --git a/fpga/scripts/vivado_config.tcl b/fpga/scripts/vivado_config.tcl
index 0d23d71..17514c0 100644
--- a/fpga/scripts/vivado_config.tcl
+++ b/fpga/scripts/vivado_config.tcl
@@ -4,7 +4,7 @@
set ROWS 8
set COLS 24
set X_BITS 4
- set K_BITS 4
+ set K_BITS 8
set Y_BITS 24
set DELAY_W_RAM 2
set RAM_EDGES_DEPTH 288
diff --git a/rtl/include/params_input.svh b/rtl/include/params_input.svh
index ec0a092..2a28c98 100644
--- a/rtl/include/params_input.svh
+++ b/rtl/include/params_input.svh
@@ -4,7 +4,7 @@
`define ROWS 8 // PE rows, constrained by resources
`define COLS 24 // PE cols, constrained by resources
`define X_BITS 4 // Bits per word in input
- `define K_BITS 4 // Bits per word in input
+ `define K_BITS 8 // Bits per word in input
`define Y_BITS 24 // Bits per word in output of conv
`define KH_MAX 11 // max of kernel height, across layers
diff --git a/test/py/bundle.py b/test/py/bundle.py
index 198f261..5c118d3 100644
--- a/test/py/bundle.py
+++ b/test/py/bundle.py
@@ -706,4 +706,4 @@ def pack_words_into_bytes (arr, bits):
arr = arr.reshape(arr.size//w_words_per_byte, w_words_per_byte)
for i_word in range(1, w_words_per_byte):
arr[:,0] += arr[:,i_word] << (i_word * bits) # pack multiple words into a byte
- return arr[:,0] # packed byte
\ No newline at end of file
+ return arr[:,0].astype(np.uint8) # packed byte
\ No newline at end of file
diff --git a/test/py/param_test.py b/test/py/param_test.py
index ae9aa3e..de913ed 100644
--- a/test/py/param_test.py
+++ b/test/py/param_test.py
@@ -167,7 +167,7 @@ class Config:
@pytest.mark.parametrize("COMPILE", list(product_dict(
X_BITS = [4 ],
- K_BITS = [4 ],
+ K_BITS = [8 ],
B_BITS = [16 ],
Y_BITS = [24 ],
INT_BITS = [32 ], # size of integer in target CPU
@@ -183,8 +183,8 @@ class Config:
RAM_WEIGHTS_DEPTH = [20 ], # KH*CI + Config beats
RAM_EDGES_DEPTH = [288 ], # max(CI * XW * (XH/ROWS-1))
- VALID_PROB = [100],
- READY_PROB = [1],
+ VALID_PROB = [1],
+ READY_PROB = [100],
)))
def test_dnn_engine(COMPILE):
c = make_compile_params(COMPILE)
@@ -192,8 +192,8 @@ def test_dnn_engine(COMPILE):
input_shape = (8,16,8,3) # (XN, XH, XW, CI)
model_config = [
Config(11, 16, True , f'quantized_relu({c.X_BITS},0,negative_slope=0)'),
- Config(1 , 16, False, f'quantized_bits({c.K_BITS},0,False,False,1)'),
- Config(7 , 16, True , f'quantized_bits({c.K_BITS},0,False,True,1)'),
+ Config(1 , 16, False, f'quantized_bits({c.X_BITS},0,False,False,1)'),
+ Config(7 , 16, True , f'quantized_bits({c.X_BITS},0,False,True,1)'),
Config(5 , 16, False, f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'),
Config(3 , 24, True , f'quantized_relu({c.X_BITS},0,negative_slope=0)'),
Config(1 , 50, False, f'quantized_relu({c.X_BITS},0,negative_slope=0.125)', flatten=True),
diff --git a/test/wave/dnn_engine_tb_behav.wcfg b/test/wave/dnn_engine_tb_behav.wcfg
index 411347f..b6b6873 100644
--- a/test/wave/dnn_engine_tb_behav.wcfg
+++ b/test/wave/dnn_engine_tb_behav.wcfg
@@ -12,13 +12,13 @@
-
-
-
+
+
+
-
+
@@ -446,6 +446,7 @@
Weight Rot
label
+
aresetn
aresetn
@@ -453,6 +454,7 @@
Slave
label
+
s_axis_tready
s_axis_tready
@@ -466,13 +468,86 @@
s_axis_tlast
- s_axis_weights_tdata[15:0][3:0]
- s_axis_weights_tdata[15:0][3:0]
+ s_axis_weights_tdata[7:0][7:0]
+ s_axis_weights_tdata[7:0][7:0]
SIGNEDDECRADIX
+
+
+ [7][7:0]
+ [7][7:0]
+ SIGNEDDECRADIX
+
+
+ [6][7:0]
+ [6][7:0]
+ SIGNEDDECRADIX
+
+
+ [5][7:0]
+ [5][7:0]
+ SIGNEDDECRADIX
+
+
+ [4][7:0]
+ [4][7:0]
+ SIGNEDDECRADIX
+
+
+ [3][7:0]
+ [3][7:0]
+ SIGNEDDECRADIX
+
+
+ [2][7:0]
+ [2][7:0]
+ SIGNEDDECRADIX
+
+
+ [1][7:0]
+ [1][7:0]
+ SIGNEDDECRADIX
+
+
+ [0][7:0]
+ [0][7:0]
+ SIGNEDDECRADIX
+
+
+ [7][7:0]
+ [7][7:0]
+
+
+ [6][7:0]
+ [6][7:0]
+
+
+ [5][7:0]
+ [5][7:0]
+
+
+ [4][7:0]
+ [4][7:0]
+
+
+ [3][7:0]
+ [3][7:0]
+
+
+ [2][7:0]
+ [2][7:0]
+
+
+ [1][7:0]
+ [1][7:0]
+
+
+ [0][7:0]
+ [0][7:0]
+
- s_axis_tkeep[15:0]
- s_axis_tkeep[15:0]
+ s_axis_tkeep[7:0]
+ s_axis_tkeep[7:0]
@@ -659,8 +734,8 @@
bram_m_valid
- bram_m_data[1:0][95:0]
- bram_m_data[1:0][95:0]
+ bram_m_data[1:0][191:0]
+ bram_m_data[1:0][191:0]
BRAM_1
@@ -686,16 +761,12 @@
r_en
- s_data[95:0]
- s_data[95:0]
+ s_data[191:0]
+ s_data[191:0]
- m_data[95:0]
- m_data[95:0]
-
-
- m_valid
- m_valid
+ m_data[191:0]
+ m_data[191:0]
r_addr_max[4:0]
@@ -779,16 +850,12 @@
r_en
- s_data[95:0]
- s_data[95:0]
+ s_data[191:0]
+ s_data[191:0]
- m_data[95:0]
- m_data[95:0]
-
-
- m_valid
- m_valid
+ m_data[191:0]
+ m_data[191:0]
r_addr_max[4:0]
@@ -832,8 +899,8 @@
s_axis_tready
- m_axis_tdata[95:0]
- m_axis_tdata[95:0]
+ m_axis_tdata[191:0]
+ m_axis_tdata[191:0]
m_axis_tvalid
@@ -888,7 +955,6 @@
Sync
label
-
weights_m_valid
weights_m_valid
@@ -938,11 +1004,269 @@
s_data_pixels[7:0][3:0]
s_data_pixels[7:0][3:0]
SIGNEDDECRADIX
+
+
+ [7][3:0]
+ [7][3:0]
+ SIGNEDDECRADIX
+
+
+ [6][3:0]
+ [6][3:0]
+ SIGNEDDECRADIX
+
+
+ [5][3:0]
+ [5][3:0]
+ SIGNEDDECRADIX
+
+
+ [4][3:0]
+ [4][3:0]
+ SIGNEDDECRADIX
+
+
+ [3][3:0]
+ [3][3:0]
+ SIGNEDDECRADIX
+
+
+ [2][3:0]
+ [2][3:0]
+ SIGNEDDECRADIX
+
+
+ [1][3:0]
+ [1][3:0]
+ SIGNEDDECRADIX
+
+
+ [0][3:0]
+ [0][3:0]
+ SIGNEDDECRADIX
+
- s_data_weights[23:0][3:0]
- s_data_weights[23:0][3:0]
+ s_data_weights[23:0][7:0]
+ s_data_weights[23:0][7:0]
SIGNEDDECRADIX
+
+
+ [23][7:0]
+ [23][7:0]
+ SIGNEDDECRADIX
+
+
+ [22][7:0]
+ [22][7:0]
+ SIGNEDDECRADIX
+
+
+ [21][7:0]
+ [21][7:0]
+ SIGNEDDECRADIX
+
+
+ [20][7:0]
+ [20][7:0]
+ SIGNEDDECRADIX
+
+
+ [19][7:0]
+ [19][7:0]
+ SIGNEDDECRADIX
+
+
+ [18][7:0]
+ [18][7:0]
+ SIGNEDDECRADIX
+
+
+ [17][7:0]
+ [17][7:0]
+ SIGNEDDECRADIX
+
+
+ [16][7:0]
+ [16][7:0]
+ SIGNEDDECRADIX
+
+
+ [15][7:0]
+ [15][7:0]
+ SIGNEDDECRADIX
+
+
+ [14][7:0]
+ [14][7:0]
+ SIGNEDDECRADIX
+
+
+ [13][7:0]
+ [13][7:0]
+ SIGNEDDECRADIX
+
+
+ [12][7:0]
+ [12][7:0]
+ SIGNEDDECRADIX
+
+
+ [11][7:0]
+ [11][7:0]
+ SIGNEDDECRADIX
+
+
+ [10][7:0]
+ [10][7:0]
+ SIGNEDDECRADIX
+
+
+ [9][7:0]
+ [9][7:0]
+ SIGNEDDECRADIX
+
+
+ [8][7:0]
+ [8][7:0]
+ SIGNEDDECRADIX
+
+
+ [7][7:0]
+ [7][7:0]
+ SIGNEDDECRADIX
+
+
+ [6][7:0]
+ [6][7:0]
+ SIGNEDDECRADIX
+
+
+ [5][7:0]
+ [5][7:0]
+ SIGNEDDECRADIX
+
+
+ [4][7:0]
+ [4][7:0]
+ SIGNEDDECRADIX
+
+
+ [3][7:0]
+ [3][7:0]
+ SIGNEDDECRADIX
+
+
+ [2][7:0]
+ [2][7:0]
+ SIGNEDDECRADIX
+
+
+ [1][7:0]
+ [1][7:0]
+ SIGNEDDECRADIX
+
+
+ [0][7:0]
+ [0][7:0]
+ SIGNEDDECRADIX
+
+
+ [23][7:0]
+ [23][7:0]
+
+
+ [22][7:0]
+ [22][7:0]
+
+
+ [21][7:0]
+ [21][7:0]
+
+
+ [20][7:0]
+ [20][7:0]
+
+
+ [19][7:0]
+ [19][7:0]
+
+
+ [18][7:0]
+ [18][7:0]
+
+
+ [17][7:0]
+ [17][7:0]
+
+
+ [16][7:0]
+ [16][7:0]
+
+
+ [15][7:0]
+ [15][7:0]
+
+
+ [14][7:0]
+ [14][7:0]
+
+
+ [13][7:0]
+ [13][7:0]
+
+
+ [12][7:0]
+ [12][7:0]
+
+
+ [11][7:0]
+ [11][7:0]
+
+
+ [10][7:0]
+ [10][7:0]
+
+
+ [9][7:0]
+ [9][7:0]
+
+
+ [8][7:0]
+ [8][7:0]
+
+
+ [7][7:0]
+ [7][7:0]
+
+
+ [6][7:0]
+ [6][7:0]
+
+
+ [5][7:0]
+ [5][7:0]
+
+
+ [4][7:0]
+ [4][7:0]
+
+
+ [3][7:0]
+ [3][7:0]
+
+
+ [2][7:0]
+ [2][7:0]
+
+
+ [1][7:0]
+ [1][7:0]
+
+
+ [0][7:0]
+ [0][7:0]
+
s_valid
@@ -966,8 +1290,8 @@
label
- mul_m_data[23:0][7:0][7:0]
- mul_m_data[23:0][7:0][7:0]
+ mul_m_data[23:0][7:0][11:0]
+ mul_m_data[23:0][7:0][11:0]
SIGNEDDECRADIX
@@ -1123,24 +1447,24 @@
s_data_pixels[3:0]
- s_data_weights[3:0]
- s_data_weights[3:0]
+ s_data_weights[7:0]
+ s_data_weights[7:0]
shift_data[23:0]
shift_data[23:0]
- mul_m_data[7:0]
- mul_m_data[7:0]
+ mul_m_data[11:0]
+ mul_m_data[11:0]
m_data[23:0]
m_data[23:0]
- mul_m_data_d[7:0]
- mul_m_data_d[7:0]
+ mul_m_data_d[11:0]
+ mul_m_data_d[11:0]
add_in_1[23:0]
@@ -1453,7 +1777,6 @@
Master
label
-
s_axis_pixels_tkeep[7:0]
s_axis_pixels_tkeep[7:0]
@@ -1467,8 +1790,8 @@
s_axis_pixels_tkeep_words[15:0]
- s_axis_weights_tkeep_words[15:0]
- s_axis_weights_tkeep_words[15:0]
+ s_axis_weights_tkeep_words[7:0]
+ s_axis_weights_tkeep_words[7:0]