From 5f039e936ecc119ce66ccaa63b9dd6a8e6ce74e1 Mon Sep 17 00:00:00 2001 From: Aba Date: Sat, 16 Sep 2023 01:19:29 -0700 Subject: [PATCH] Fix Dense: 1. CONFIG_BEATS=1 causes odd CM, modified w_rot FSM to allow CONFIG_BEATS=0. 2. fix reshape in export to XN->XH --- c/model.h | 23 +++--- rtl/axis_weight_rotator.sv | 22 +++--- rtl/include/params_input.svh | 2 +- rtl/sram/cyclic_bram.sv | 1 - test/py/bundle.py | 7 +- test/py/param_test.py | 4 +- test/wave/dnn_engine_tb_behav.wcfg | 110 ++++++++++++++++------------- 7 files changed, 91 insertions(+), 78 deletions(-) diff --git a/c/model.h b/c/model.h index 391b395..60a6113 100644 --- a/c/model.h +++ b/c/model.h @@ -1,11 +1,12 @@ -#define N_BUNDLES 6 +#define N_BUNDLES 7 Bundle_t bundles [N_BUNDLES] = { - {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=152, .w_bpt_p0=152, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587446416637952, .w_header_p0=414341061322735616 }, - {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=19, .cm_p0=16, .w_bpt=212, .w_bpt_p0=212, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210803728023552, .w_header_p0=8700964375684448256 }, - {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=188, .w_bpt_p0=188, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=40, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933036414009344, .w_header_p0=846686625550303232 }, - {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=6, .cm=3, .cm_p0=1, .w_bpt=200, .w_bpt_p0=80, .x_bpt=2504, .x_bpt_p0=840, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=1351089783815798784, .x_header_p0=198168279208951808, .w_header=1351336203269439488, .w_header_p0=198168279208951808 }, - {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=236, .w_bpt_p0=164, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=74, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660891911585792, .w_header_p0=1855492942081294336 }, - {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=19, .cm_p0=5, .w_bpt=248, .w_bpt_p0=80, .x_bpt=15816, .x_bpt_p0=4168, .is_bias=1, .b_offset=98, .b_val_shift=9, .b_bias_shift=0, .x_header=10430346632594718720, .x_header_p0=2359896100346789888, .w_header=10430593086408097792, .w_header_p0=2359896100346789888 } + {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=140, .w_bpt_p0=140, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 }, + {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 }, + {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=40, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 }, + {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 }, + {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=74, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 }, + {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=1, .b_offset=98, .b_val_shift=9, .b_bias_shift=0, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 }, + {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=170, .b_val_shift=9, .b_bias_shift=0, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 } }; #define X_BITS_L2 2 @@ -13,12 +14,12 @@ Bundle_t bundles [N_BUNDLES] = { #define PE_ROWS 8 #define PE_COLS 24 -#define WB_BYTES 20436 -#define W_BYTES 20096 +#define WB_BYTES 98436 +#define W_BYTES 98048 #define X_BYTES 2520 -#define X_BYTES_ALL 75896 +#define X_BYTES_ALL 120040 #define Y_BYTES 294920 #define B_TYPE signed short -#define B_WORDS 170 +#define B_WORDS 194 #define DATA_DIR "D:/dnn-engine/test/vectors" diff --git a/rtl/axis_weight_rotator.sv b/rtl/axis_weight_rotator.sv index 580e193..120b94f 100644 --- a/rtl/axis_weight_rotator.sv +++ b/rtl/axis_weight_rotator.sv @@ -120,7 +120,7 @@ module axis_weight_rotator #( always_ff @(posedge aclk) if (!aresetn) state_read <= R_IDLE_S; else unique case (state_read) - R_IDLE_S : if (done_write [i_read]) state_read <= R_PASS_CONFIG_S; + R_IDLE_S : if (done_write [i_read]) state_read <= CONFIG_BEATS==0 ? R_READ_S : R_PASS_CONFIG_S; R_PASS_CONFIG_S : if (lc_config) state_read <= R_READ_S; R_READ_S : if (lc_xn ) state_read <= R_SWITCH_S; R_SWITCH_S : state_read <= R_IDLE_S; @@ -201,15 +201,18 @@ module axis_weight_rotator #( W_SWITCH_S : done_write_next [i] = 1; endcase - if (i==i_read) + if (i==i_read) begin + + if (CONFIG_BEATS==0 ? (state_read==R_IDLE_S && done_write [i_read]) : (state_read==R_PASS_CONFIG_S)) begin + done_read_next [i] = 0; + bram_m_ready [i] = 1; + end + case (state_read) - R_PASS_CONFIG_S : begin - done_read_next [i] = 0; - bram_m_ready [i] = m_axis_tready; - end - R_READ_S : bram_m_ready [i] = m_axis_tready; - R_SWITCH_S : done_read_next [i] = 1; + R_PASS_CONFIG_S, R_READ_S : bram_m_ready [i] = m_axis_tready; + R_SWITCH_S : done_read_next [i] = 1; endcase + end end config_st ref_i; @@ -288,7 +291,8 @@ module axis_weight_rotator #( config_st ref_i_read; assign ref_i_read = ref_config[i_read]; - counter #(.W(BITS_CONFIG_BEATS)) C_CONFIG (.clk(aclk), .reset(copy_config), .en(en_count_config), .max_in(BITS_CONFIG_BEATS'( CONFIG_BEATS-1 )), .last_clk(lc_config), .last(l_config) ); + wire [BITS_CONFIG_BEATS-1:0] config_beats_const = CONFIG_BEATS-1; + counter #(.W(BITS_CONFIG_BEATS)) C_CONFIG (.clk(aclk), .reset(copy_config), .en(en_count_config), .max_in( config_beats_const ), .last_clk(lc_config), .last(l_config) ); counter #(.W(BITS_KW )) C_KW (.clk(aclk), .reset(copy_config), .en(en_kw ), .max_in(BITS_KW '( 2*ref_i_read.kw2 )), .last_clk(lc_kw ), .last(l_kw ), .first(f_kw ) ); counter #(.W(BITS_CI )) C_CI (.clk(aclk), .reset(copy_config), .en(lc_kw ), .max_in(BITS_CI '( ref_i_read.cin_1 )), .last_clk(lc_cin ), .last(l_cin ), .first(f_cin ) ); counter #(.W(BITS_XW )) C_XW (.clk(aclk), .reset(copy_config), .en(lc_cin ), .max_in(BITS_XW '( ref_i_read.cols_1 )), .last_clk(lc_cols ), .last(l_cols ), .first(f_cols ), .count(c_cols)); diff --git a/rtl/include/params_input.svh b/rtl/include/params_input.svh index 8c805ef..ec0a092 100644 --- a/rtl/include/params_input.svh +++ b/rtl/include/params_input.svh @@ -13,7 +13,7 @@ `define XW_MAX 32 // max of input image width, across layers `define XN_MAX 16 // max of input batch size, across layers `define CI_MAX 2048 // max of input channels, across layers - `define CONFIG_BEATS 1 // constant, for now + `define CONFIG_BEATS 0 // constant, for now `define RAM_WEIGHTS_DEPTH 20 // CONFIG_BEATS + max(KW * CI), across layers `define RAM_EDGES_DEPTH 288 // max (KW * CI * XW), across layers when KW != 1 diff --git a/rtl/sram/cyclic_bram.sv b/rtl/sram/cyclic_bram.sv index bee5a41..f7e13bd 100644 --- a/rtl/sram/cyclic_bram.sv +++ b/rtl/sram/cyclic_bram.sv @@ -14,7 +14,6 @@ module cyclic_bram #( input logic w_en, r_en, input logic [W_DATA_WIDTH-1:0] s_data, output logic [R_DATA_WIDTH-1:0] m_data, - output logic m_valid, input logic [R_ADDR_WIDTH-1:0] r_addr_max, r_addr_min ); diff --git a/test/py/bundle.py b/test/py/bundle.py index c662af7..0a42f02 100644 --- a/test/py/bundle.py +++ b/test/py/bundle.py @@ -410,9 +410,10 @@ def export (self, c): print('Conv -> Dense Reshape') CI, CO = self.w['int'].shape XN, _ = self.inp['int'].shape - w_int = self.w['int'].reshape(1,1,CI,CO) # (CI,CO) -> (KH,KW,CI,CO) - x_int = self.inp['int'].reshape(XN,1,1,CI) # (XN,CI) -> (XN, XH, XW, CI) - y_int = self.y['int'].reshape(XN,1,1,CO) # (XN,CI) -> (XN, XH, XW, CI) + w_int = self.w ['int'].reshape(1,1,CI,CO) # (CI,CO) -> (KH,KW,CI,CO) + x_int = self.inp['int'].reshape(1,XN,1,CI) # (XN,CI) -> (XN, XH, XW, CI) + y_int = self.y ['int'].reshape(1,XN,1,CO) # (XN,CI) -> (XN, XH, XW, CI) + p_int = self.y_int_b. reshape(1,XN,1,CO) else: y_int = self.y['int'] p_int = self.y_int_b diff --git a/test/py/param_test.py b/test/py/param_test.py index 1dbccd7..a0dd1e9 100644 --- a/test/py/param_test.py +++ b/test/py/param_test.py @@ -60,7 +60,7 @@ def clog2(x): c = namedtuple("Compile", c._fields + n._fields)(*(c + n)) d = { - 'CONFIG_BEATS' : 1, + 'CONFIG_BEATS' : 0, 'X_PAD' : int(np.ceil(c.KH_MAX//2)), 'BITS_KW2' : clog2((c.KW_MAX+1)/2), 'BITS_KH2' : clog2((c.KH_MAX+1)/2), @@ -194,7 +194,7 @@ def test_dnn_engine(COMPILE): Config(5, 16), Config(3, 24), Config(1, 50, flatten=True), - # Config(1, 10, dense= True), + Config(1, 10, dense= True), ] ''' diff --git a/test/wave/dnn_engine_tb_behav.wcfg b/test/wave/dnn_engine_tb_behav.wcfg index 2fdbd3f..411347f 100644 --- a/test/wave/dnn_engine_tb_behav.wcfg +++ b/test/wave/dnn_engine_tb_behav.wcfg @@ -12,13 +12,13 @@ - - - + + + - + @@ -446,7 +446,6 @@ Weight Rot label - aresetn aresetn @@ -454,7 +453,6 @@ Slave label - s_axis_tready s_axis_tready @@ -550,7 +548,6 @@ REF label - en_ref[1:0] en_ref[1:0] @@ -562,8 +559,8 @@ - ref_config[1:0][28:0] - ref_config[1:0][28:0] + ref_config[1:0][29:0] + ref_config[1:0][29:0] \genblk1[0].ref_i @@ -581,6 +578,7 @@ READ label + state_read[31:0] state_read[31:0] @@ -642,6 +640,7 @@ BRAM label + bram_wen[1:0] bram_wen[1:0] @@ -662,12 +661,10 @@ bram_m_data[1:0][95:0] bram_m_data[1:0][95:0] - BRAM_1 label - clk clk @@ -701,23 +698,23 @@ m_valid - r_addr_max[3:0] - r_addr_max[3:0] + r_addr_max[4:0] + r_addr_max[4:0] UNSIGNEDDECRADIX - r_addr_min[3:0] - r_addr_min[3:0] + r_addr_min[4:0] + r_addr_min[4:0] UNSIGNEDDECRADIX - w_addr[3:0] - w_addr[3:0] + w_addr[4:0] + w_addr[4:0] UNSIGNEDDECRADIX - r_addr[3:0] - r_addr[3:0] + r_addr[4:0] + r_addr[4:0] UNSIGNEDDECRADIX @@ -760,6 +757,7 @@ BRAM_0 label + clk clk @@ -793,20 +791,20 @@ m_valid - r_addr_max[3:0] - r_addr_max[3:0] + r_addr_max[4:0] + r_addr_max[4:0] - r_addr_min[3:0] - r_addr_min[3:0] + r_addr_min[4:0] + r_addr_min[4:0] - w_addr[3:0] - w_addr[3:0] + w_addr[4:0] + w_addr[4:0] - r_addr[3:0] - r_addr[3:0] + r_addr[4:0] + r_addr[4:0] @@ -887,6 +885,39 @@ + + Sync + label + + + weights_m_valid + weights_m_valid + + + pixels_m_valid + pixels_m_valid + + + m_axis_tready + m_axis_tready + + + weights_m_user + weights_m_user + + + m_axis_tvalid + m_axis_tvalid + + + weights_m_ready + weights_m_ready + + + pixels_m_ready + pixels_m_ready + + PROC label @@ -894,6 +925,7 @@ Proc label + aclk aclk @@ -1166,7 +1198,6 @@ Output pipe label - aclk aclk @@ -1243,7 +1274,6 @@ Out RAM shift label - clk clk @@ -1276,33 +1306,11 @@ clk clk - - bram_addr_a[11:0] - bram_addr_a[11:0] - SIGNEDDECRADIX - ram_r_addr[9:0] ram_r_addr[9:0] UNSIGNEDDECRADIX - - bram_rddata_a[31:0] - bram_rddata_a[31:0] - SIGNEDDECRADIX - - - bram_en_a - bram_en_a - - - done_fill - done_fill - - - t_done_proc - t_done_proc - STATE