diff --git a/c/model.h b/c/model.h index ce3bffb..47b24d3 100644 --- a/c/model.h +++ b/c/model.h @@ -1,23 +1,23 @@ #define N_BUNDLES 7 Bundle_t bundles [N_BUNDLES] = { - {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=140, .w_bpt_p0=140, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 }, - {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=3, .ca_pl_scale=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 }, - {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 }, - {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=6, .ca_pl_scale=3, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 }, - {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=34, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 }, - {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=0, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=6, .ca_pl_scale=3, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 }, - {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=6400, .co=10, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 } + {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=272, .w_bpt_p0=272, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 }, + {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=392, .w_bpt_p0=392, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=7, .ca_pl_scale=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 }, + {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=344, .w_bpt_p0=344, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 }, + {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=488, .w_bpt_p0=488, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 }, + {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=440, .w_bpt_p0=296, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 }, + {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=488, .w_bpt_p0=104, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=0, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 }, + {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=6400, .co=10, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=488, .w_bpt_p0=488, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 } }; #define X_BITS_L2 2 -#define W_BITS_L2 2 +#define W_BITS_L2 3 #define X_PAD 5 #define KH_MAX 11 #define PE_ROWS 8 #define PE_COLS 24 -#define WB_BYTES 98212 -#define W_BYTES 98048 +#define WB_BYTES 192868 +#define W_BYTES 192704 #define X_BYTES 2520 #define X_BYTES_ALL 120040 #define Y_BYTES 294920 diff --git a/fpga/scripts/vivado_config.tcl b/fpga/scripts/vivado_config.tcl index 0d23d71..17514c0 100644 --- a/fpga/scripts/vivado_config.tcl +++ b/fpga/scripts/vivado_config.tcl @@ -4,7 +4,7 @@ set ROWS 8 set COLS 24 set X_BITS 4 - set K_BITS 4 + set K_BITS 8 set Y_BITS 24 set DELAY_W_RAM 2 set RAM_EDGES_DEPTH 288 diff --git a/rtl/include/params_input.svh b/rtl/include/params_input.svh index ec0a092..2a28c98 100644 --- a/rtl/include/params_input.svh +++ b/rtl/include/params_input.svh @@ -4,7 +4,7 @@ `define ROWS 8 // PE rows, constrained by resources `define COLS 24 // PE cols, constrained by resources `define X_BITS 4 // Bits per word in input - `define K_BITS 4 // Bits per word in input + `define K_BITS 8 // Bits per word in input `define Y_BITS 24 // Bits per word in output of conv `define KH_MAX 11 // max of kernel height, across layers diff --git a/test/py/bundle.py b/test/py/bundle.py index 198f261..5c118d3 100644 --- a/test/py/bundle.py +++ b/test/py/bundle.py @@ -706,4 +706,4 @@ def pack_words_into_bytes (arr, bits): arr = arr.reshape(arr.size//w_words_per_byte, w_words_per_byte) for i_word in range(1, w_words_per_byte): arr[:,0] += arr[:,i_word] << (i_word * bits) # pack multiple words into a byte - return arr[:,0] # packed byte \ No newline at end of file + return arr[:,0].astype(np.uint8) # packed byte \ No newline at end of file diff --git a/test/py/param_test.py b/test/py/param_test.py index ae9aa3e..de913ed 100644 --- a/test/py/param_test.py +++ b/test/py/param_test.py @@ -167,7 +167,7 @@ class Config: @pytest.mark.parametrize("COMPILE", list(product_dict( X_BITS = [4 ], - K_BITS = [4 ], + K_BITS = [8 ], B_BITS = [16 ], Y_BITS = [24 ], INT_BITS = [32 ], # size of integer in target CPU @@ -183,8 +183,8 @@ class Config: RAM_WEIGHTS_DEPTH = [20 ], # KH*CI + Config beats RAM_EDGES_DEPTH = [288 ], # max(CI * XW * (XH/ROWS-1)) - VALID_PROB = [100], - READY_PROB = [1], + VALID_PROB = [1], + READY_PROB = [100], ))) def test_dnn_engine(COMPILE): c = make_compile_params(COMPILE) @@ -192,8 +192,8 @@ def test_dnn_engine(COMPILE): input_shape = (8,16,8,3) # (XN, XH, XW, CI) model_config = [ Config(11, 16, True , f'quantized_relu({c.X_BITS},0,negative_slope=0)'), - Config(1 , 16, False, f'quantized_bits({c.K_BITS},0,False,False,1)'), - Config(7 , 16, True , f'quantized_bits({c.K_BITS},0,False,True,1)'), + Config(1 , 16, False, f'quantized_bits({c.X_BITS},0,False,False,1)'), + Config(7 , 16, True , f'quantized_bits({c.X_BITS},0,False,True,1)'), Config(5 , 16, False, f'quantized_relu({c.X_BITS},0,negative_slope=0.125)'), Config(3 , 24, True , f'quantized_relu({c.X_BITS},0,negative_slope=0)'), Config(1 , 50, False, f'quantized_relu({c.X_BITS},0,negative_slope=0.125)', flatten=True), diff --git a/test/wave/dnn_engine_tb_behav.wcfg b/test/wave/dnn_engine_tb_behav.wcfg index 411347f..b6b6873 100644 --- a/test/wave/dnn_engine_tb_behav.wcfg +++ b/test/wave/dnn_engine_tb_behav.wcfg @@ -12,13 +12,13 @@ - - - + + + - + @@ -446,6 +446,7 @@ Weight Rot label + aresetn aresetn @@ -453,6 +454,7 @@ Slave label + s_axis_tready s_axis_tready @@ -466,13 +468,86 @@ s_axis_tlast - s_axis_weights_tdata[15:0][3:0] - s_axis_weights_tdata[15:0][3:0] + s_axis_weights_tdata[7:0][7:0] + s_axis_weights_tdata[7:0][7:0] SIGNEDDECRADIX + + + [7][7:0] + [7][7:0] + SIGNEDDECRADIX + + + [6][7:0] + [6][7:0] + SIGNEDDECRADIX + + + [5][7:0] + [5][7:0] + SIGNEDDECRADIX + + + [4][7:0] + [4][7:0] + SIGNEDDECRADIX + + + [3][7:0] + [3][7:0] + SIGNEDDECRADIX + + + [2][7:0] + [2][7:0] + SIGNEDDECRADIX + + + [1][7:0] + [1][7:0] + SIGNEDDECRADIX + + + [0][7:0] + [0][7:0] + SIGNEDDECRADIX + + + [7][7:0] + [7][7:0] + + + [6][7:0] + [6][7:0] + + + [5][7:0] + [5][7:0] + + + [4][7:0] + [4][7:0] + + + [3][7:0] + [3][7:0] + + + [2][7:0] + [2][7:0] + + + [1][7:0] + [1][7:0] + + + [0][7:0] + [0][7:0] + - s_axis_tkeep[15:0] - s_axis_tkeep[15:0] + s_axis_tkeep[7:0] + s_axis_tkeep[7:0] @@ -659,8 +734,8 @@ bram_m_valid - bram_m_data[1:0][95:0] - bram_m_data[1:0][95:0] + bram_m_data[1:0][191:0] + bram_m_data[1:0][191:0] BRAM_1 @@ -686,16 +761,12 @@ r_en - s_data[95:0] - s_data[95:0] + s_data[191:0] + s_data[191:0] - m_data[95:0] - m_data[95:0] - - - m_valid - m_valid + m_data[191:0] + m_data[191:0] r_addr_max[4:0] @@ -779,16 +850,12 @@ r_en - s_data[95:0] - s_data[95:0] + s_data[191:0] + s_data[191:0] - m_data[95:0] - m_data[95:0] - - - m_valid - m_valid + m_data[191:0] + m_data[191:0] r_addr_max[4:0] @@ -832,8 +899,8 @@ s_axis_tready - m_axis_tdata[95:0] - m_axis_tdata[95:0] + m_axis_tdata[191:0] + m_axis_tdata[191:0] m_axis_tvalid @@ -888,7 +955,6 @@ Sync label - weights_m_valid weights_m_valid @@ -938,11 +1004,269 @@ s_data_pixels[7:0][3:0] s_data_pixels[7:0][3:0] SIGNEDDECRADIX + + + [7][3:0] + [7][3:0] + SIGNEDDECRADIX + + + [6][3:0] + [6][3:0] + SIGNEDDECRADIX + + + [5][3:0] + [5][3:0] + SIGNEDDECRADIX + + + [4][3:0] + [4][3:0] + SIGNEDDECRADIX + + + [3][3:0] + [3][3:0] + SIGNEDDECRADIX + + + [2][3:0] + [2][3:0] + SIGNEDDECRADIX + + + [1][3:0] + [1][3:0] + SIGNEDDECRADIX + + + [0][3:0] + [0][3:0] + SIGNEDDECRADIX + - s_data_weights[23:0][3:0] - s_data_weights[23:0][3:0] + s_data_weights[23:0][7:0] + s_data_weights[23:0][7:0] SIGNEDDECRADIX + + + [23][7:0] + [23][7:0] + SIGNEDDECRADIX + + + [22][7:0] + [22][7:0] + SIGNEDDECRADIX + + + [21][7:0] + [21][7:0] + SIGNEDDECRADIX + + + [20][7:0] + [20][7:0] + SIGNEDDECRADIX + + + [19][7:0] + [19][7:0] + SIGNEDDECRADIX + + + [18][7:0] + [18][7:0] + SIGNEDDECRADIX + + + [17][7:0] + [17][7:0] + SIGNEDDECRADIX + + + [16][7:0] + [16][7:0] + SIGNEDDECRADIX + + + [15][7:0] + [15][7:0] + SIGNEDDECRADIX + + + [14][7:0] + [14][7:0] + SIGNEDDECRADIX + + + [13][7:0] + [13][7:0] + SIGNEDDECRADIX + + + [12][7:0] + [12][7:0] + SIGNEDDECRADIX + + + [11][7:0] + [11][7:0] + SIGNEDDECRADIX + + + [10][7:0] + [10][7:0] + SIGNEDDECRADIX + + + [9][7:0] + [9][7:0] + SIGNEDDECRADIX + + + [8][7:0] + [8][7:0] + SIGNEDDECRADIX + + + [7][7:0] + [7][7:0] + SIGNEDDECRADIX + + + [6][7:0] + [6][7:0] + SIGNEDDECRADIX + + + [5][7:0] + [5][7:0] + SIGNEDDECRADIX + + + [4][7:0] + [4][7:0] + SIGNEDDECRADIX + + + [3][7:0] + [3][7:0] + SIGNEDDECRADIX + + + [2][7:0] + [2][7:0] + SIGNEDDECRADIX + + + [1][7:0] + [1][7:0] + SIGNEDDECRADIX + + + [0][7:0] + [0][7:0] + SIGNEDDECRADIX + + + [23][7:0] + [23][7:0] + + + [22][7:0] + [22][7:0] + + + [21][7:0] + [21][7:0] + + + [20][7:0] + [20][7:0] + + + [19][7:0] + [19][7:0] + + + [18][7:0] + [18][7:0] + + + [17][7:0] + [17][7:0] + + + [16][7:0] + [16][7:0] + + + [15][7:0] + [15][7:0] + + + [14][7:0] + [14][7:0] + + + [13][7:0] + [13][7:0] + + + [12][7:0] + [12][7:0] + + + [11][7:0] + [11][7:0] + + + [10][7:0] + [10][7:0] + + + [9][7:0] + [9][7:0] + + + [8][7:0] + [8][7:0] + + + [7][7:0] + [7][7:0] + + + [6][7:0] + [6][7:0] + + + [5][7:0] + [5][7:0] + + + [4][7:0] + [4][7:0] + + + [3][7:0] + [3][7:0] + + + [2][7:0] + [2][7:0] + + + [1][7:0] + [1][7:0] + + + [0][7:0] + [0][7:0] + s_valid @@ -966,8 +1290,8 @@ label - mul_m_data[23:0][7:0][7:0] - mul_m_data[23:0][7:0][7:0] + mul_m_data[23:0][7:0][11:0] + mul_m_data[23:0][7:0][11:0] SIGNEDDECRADIX @@ -1123,24 +1447,24 @@ s_data_pixels[3:0] - s_data_weights[3:0] - s_data_weights[3:0] + s_data_weights[7:0] + s_data_weights[7:0] shift_data[23:0] shift_data[23:0] - mul_m_data[7:0] - mul_m_data[7:0] + mul_m_data[11:0] + mul_m_data[11:0] m_data[23:0] m_data[23:0] - mul_m_data_d[7:0] - mul_m_data_d[7:0] + mul_m_data_d[11:0] + mul_m_data_d[11:0] add_in_1[23:0] @@ -1453,7 +1777,6 @@ Master label - s_axis_pixels_tkeep[7:0] s_axis_pixels_tkeep[7:0] @@ -1467,8 +1790,8 @@ s_axis_pixels_tkeep_words[15:0] - s_axis_weights_tkeep_words[15:0] - s_axis_weights_tkeep_words[15:0] + s_axis_weights_tkeep_words[7:0] + s_axis_weights_tkeep_words[7:0]