Skip to content

Commit

Permalink
Fix Dense: 1. CONFIG_BEATS=1 causes odd CM, modified w_rot FSM to all…
Browse files Browse the repository at this point in the history
…ow CONFIG_BEATS=0. 2. fix reshape in export to XN->XH
  • Loading branch information
Aba committed Sep 16, 2023
1 parent 47822e2 commit 5f039e9
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 78 deletions.
23 changes: 12 additions & 11 deletions c/model.h
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
#define N_BUNDLES 6
#define N_BUNDLES 7
Bundle_t bundles [N_BUNDLES] = {
{.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=152, .w_bpt_p0=152, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587446416637952, .w_header_p0=414341061322735616 },
{.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=19, .cm_p0=16, .w_bpt=212, .w_bpt_p0=212, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210803728023552, .w_header_p0=8700964375684448256 },
{.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=188, .w_bpt_p0=188, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=40, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933036414009344, .w_header_p0=846686625550303232 },
{.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=6, .cm=3, .cm_p0=1, .w_bpt=200, .w_bpt_p0=80, .x_bpt=2504, .x_bpt_p0=840, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=1351089783815798784, .x_header_p0=198168279208951808, .w_header=1351336203269439488, .w_header_p0=198168279208951808 },
{.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=236, .w_bpt_p0=164, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=74, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660891911585792, .w_header_p0=1855492942081294336 },
{.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=19, .cm_p0=5, .w_bpt=248, .w_bpt_p0=80, .x_bpt=15816, .x_bpt_p0=4168, .is_bias=1, .b_offset=98, .b_val_shift=9, .b_bias_shift=0, .x_header=10430346632594718720, .x_header_p0=2359896100346789888, .w_header=10430593086408097792, .w_header_p0=2359896100346789888 }
{.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=140, .w_bpt_p0=140, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 },
{.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 },
{.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=40, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 },
{.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 },
{.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=74, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 },
{.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=1, .b_offset=98, .b_val_shift=9, .b_bias_shift=0, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 },
{.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=170, .b_val_shift=9, .b_bias_shift=0, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
};

#define X_BITS_L2 2
#define W_BITS_L2 2
#define PE_ROWS 8
#define PE_COLS 24

#define WB_BYTES 20436
#define W_BYTES 20096
#define WB_BYTES 98436
#define W_BYTES 98048
#define X_BYTES 2520
#define X_BYTES_ALL 75896
#define X_BYTES_ALL 120040
#define Y_BYTES 294920
#define B_TYPE signed short
#define B_WORDS 170
#define B_WORDS 194
#define DATA_DIR "D:/dnn-engine/test/vectors"

22 changes: 13 additions & 9 deletions rtl/axis_weight_rotator.sv
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ module axis_weight_rotator #(
always_ff @(posedge aclk)
if (!aresetn) state_read <= R_IDLE_S;
else unique case (state_read)
R_IDLE_S : if (done_write [i_read]) state_read <= R_PASS_CONFIG_S;
R_IDLE_S : if (done_write [i_read]) state_read <= CONFIG_BEATS==0 ? R_READ_S : R_PASS_CONFIG_S;
R_PASS_CONFIG_S : if (lc_config) state_read <= R_READ_S;
R_READ_S : if (lc_xn ) state_read <= R_SWITCH_S;
R_SWITCH_S : state_read <= R_IDLE_S;
Expand Down Expand Up @@ -201,15 +201,18 @@ module axis_weight_rotator #(
W_SWITCH_S : done_write_next [i] = 1;
endcase

if (i==i_read)
if (i==i_read) begin

if (CONFIG_BEATS==0 ? (state_read==R_IDLE_S && done_write [i_read]) : (state_read==R_PASS_CONFIG_S)) begin
done_read_next [i] = 0;
bram_m_ready [i] = 1;
end

case (state_read)
R_PASS_CONFIG_S : begin
done_read_next [i] = 0;
bram_m_ready [i] = m_axis_tready;
end
R_READ_S : bram_m_ready [i] = m_axis_tready;
R_SWITCH_S : done_read_next [i] = 1;
R_PASS_CONFIG_S, R_READ_S : bram_m_ready [i] = m_axis_tready;
R_SWITCH_S : done_read_next [i] = 1;
endcase
end
end

config_st ref_i;
Expand Down Expand Up @@ -288,7 +291,8 @@ module axis_weight_rotator #(
config_st ref_i_read;
assign ref_i_read = ref_config[i_read];

counter #(.W(BITS_CONFIG_BEATS)) C_CONFIG (.clk(aclk), .reset(copy_config), .en(en_count_config), .max_in(BITS_CONFIG_BEATS'( CONFIG_BEATS-1 )), .last_clk(lc_config), .last(l_config) );
wire [BITS_CONFIG_BEATS-1:0] config_beats_const = CONFIG_BEATS-1;
counter #(.W(BITS_CONFIG_BEATS)) C_CONFIG (.clk(aclk), .reset(copy_config), .en(en_count_config), .max_in( config_beats_const ), .last_clk(lc_config), .last(l_config) );
counter #(.W(BITS_KW )) C_KW (.clk(aclk), .reset(copy_config), .en(en_kw ), .max_in(BITS_KW '( 2*ref_i_read.kw2 )), .last_clk(lc_kw ), .last(l_kw ), .first(f_kw ) );
counter #(.W(BITS_CI )) C_CI (.clk(aclk), .reset(copy_config), .en(lc_kw ), .max_in(BITS_CI '( ref_i_read.cin_1 )), .last_clk(lc_cin ), .last(l_cin ), .first(f_cin ) );
counter #(.W(BITS_XW )) C_XW (.clk(aclk), .reset(copy_config), .en(lc_cin ), .max_in(BITS_XW '( ref_i_read.cols_1 )), .last_clk(lc_cols ), .last(l_cols ), .first(f_cols ), .count(c_cols));
Expand Down
2 changes: 1 addition & 1 deletion rtl/include/params_input.svh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
`define XW_MAX 32 // max of input image width, across layers
`define XN_MAX 16 // max of input batch size, across layers
`define CI_MAX 2048 // max of input channels, across layers
`define CONFIG_BEATS 1 // constant, for now
`define CONFIG_BEATS 0 // constant, for now
`define RAM_WEIGHTS_DEPTH 20 // CONFIG_BEATS + max(KW * CI), across layers
`define RAM_EDGES_DEPTH 288 // max (KW * CI * XW), across layers when KW != 1

Expand Down
1 change: 0 additions & 1 deletion rtl/sram/cyclic_bram.sv
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ module cyclic_bram #(
input logic w_en, r_en,
input logic [W_DATA_WIDTH-1:0] s_data,
output logic [R_DATA_WIDTH-1:0] m_data,
output logic m_valid,
input logic [R_ADDR_WIDTH-1:0] r_addr_max, r_addr_min
);

Expand Down
7 changes: 4 additions & 3 deletions test/py/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,10 @@ def export (self, c):
print('Conv -> Dense Reshape')
CI, CO = self.w['int'].shape
XN, _ = self.inp['int'].shape
w_int = self.w['int'].reshape(1,1,CI,CO) # (CI,CO) -> (KH,KW,CI,CO)
x_int = self.inp['int'].reshape(XN,1,1,CI) # (XN,CI) -> (XN, XH, XW, CI)
y_int = self.y['int'].reshape(XN,1,1,CO) # (XN,CI) -> (XN, XH, XW, CI)
w_int = self.w ['int'].reshape(1,1,CI,CO) # (CI,CO) -> (KH,KW,CI,CO)
x_int = self.inp['int'].reshape(1,XN,1,CI) # (XN,CI) -> (XN, XH, XW, CI)
y_int = self.y ['int'].reshape(1,XN,1,CO) # (XN,CI) -> (XN, XH, XW, CI)
p_int = self.y_int_b. reshape(1,XN,1,CO)
else:
y_int = self.y['int']
p_int = self.y_int_b
Expand Down
4 changes: 2 additions & 2 deletions test/py/param_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def clog2(x):
c = namedtuple("Compile", c._fields + n._fields)(*(c + n))

d = {
'CONFIG_BEATS' : 1,
'CONFIG_BEATS' : 0,
'X_PAD' : int(np.ceil(c.KH_MAX//2)),
'BITS_KW2' : clog2((c.KW_MAX+1)/2),
'BITS_KH2' : clog2((c.KH_MAX+1)/2),
Expand Down Expand Up @@ -194,7 +194,7 @@ def test_dnn_engine(COMPILE):
Config(5, 16),
Config(3, 24),
Config(1, 50, flatten=True),
# Config(1, 10, dense= True),
Config(1, 10, dense= True),
]

'''
Expand Down
Loading

0 comments on commit 5f039e9

Please sign in to comment.