diff --git a/c/model.h b/c/model.h
index 391b395..60a6113 100644
--- a/c/model.h
+++ b/c/model.h
@@ -1,11 +1,12 @@
-#define N_BUNDLES 6
+#define N_BUNDLES 7
Bundle_t bundles [N_BUNDLES] = {
- {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=152, .w_bpt_p0=152, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587446416637952, .w_header_p0=414341061322735616 },
- {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=19, .cm_p0=16, .w_bpt=212, .w_bpt_p0=212, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210803728023552, .w_header_p0=8700964375684448256 },
- {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=188, .w_bpt_p0=188, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=40, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933036414009344, .w_header_p0=846686625550303232 },
- {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=6, .cm=3, .cm_p0=1, .w_bpt=200, .w_bpt_p0=80, .x_bpt=2504, .x_bpt_p0=840, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=1351089783815798784, .x_header_p0=198168279208951808, .w_header=1351336203269439488, .w_header_p0=198168279208951808 },
- {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=236, .w_bpt_p0=164, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=74, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660891911585792, .w_header_p0=1855492942081294336 },
- {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=19, .cm_p0=5, .w_bpt=248, .w_bpt_p0=80, .x_bpt=15816, .x_bpt_p0=4168, .is_bias=1, .b_offset=98, .b_val_shift=9, .b_bias_shift=0, .x_header=10430346632594718720, .x_header_p0=2359896100346789888, .w_header=10430593086408097792, .w_header_p0=2359896100346789888 }
+ {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=140, .w_bpt_p0=140, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=9, .b_bias_shift=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 },
+ {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=200, .w_bpt_p0=200, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=1, .b_offset=16, .b_val_shift=9, .b_bias_shift=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 },
+ {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=176, .w_bpt_p0=176, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=40, .b_val_shift=9, .b_bias_shift=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 },
+ {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=8, .h=16, .w=8, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=248, .w_bpt_p0=248, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=1, .b_offset=58, .b_val_shift=9, .b_bias_shift=0, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 },
+ {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=8, .h=16, .w=8, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=224, .w_bpt_p0=152, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=74, .b_val_shift=9, .b_bias_shift=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 },
+ {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=8, .h=16, .w=8, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=248, .w_bpt_p0=56, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=1, .b_offset=98, .b_val_shift=9, .b_bias_shift=0, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 },
+ {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .w_kw2=1, .t=1, .p=320, .cm=20, .cm_p0=20, .w_bpt=248, .w_bpt_p0=248, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=170, .b_val_shift=9, .b_bias_shift=0, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }
};
#define X_BITS_L2 2
@@ -13,12 +14,12 @@ Bundle_t bundles [N_BUNDLES] = {
#define PE_ROWS 8
#define PE_COLS 24
-#define WB_BYTES 20436
-#define W_BYTES 20096
+#define WB_BYTES 98436
+#define W_BYTES 98048
#define X_BYTES 2520
-#define X_BYTES_ALL 75896
+#define X_BYTES_ALL 120040
#define Y_BYTES 294920
#define B_TYPE signed short
-#define B_WORDS 170
+#define B_WORDS 194
#define DATA_DIR "D:/dnn-engine/test/vectors"
diff --git a/rtl/axis_weight_rotator.sv b/rtl/axis_weight_rotator.sv
index 580e193..120b94f 100644
--- a/rtl/axis_weight_rotator.sv
+++ b/rtl/axis_weight_rotator.sv
@@ -120,7 +120,7 @@ module axis_weight_rotator #(
always_ff @(posedge aclk)
if (!aresetn) state_read <= R_IDLE_S;
else unique case (state_read)
- R_IDLE_S : if (done_write [i_read]) state_read <= R_PASS_CONFIG_S;
+ R_IDLE_S : if (done_write [i_read]) state_read <= CONFIG_BEATS==0 ? R_READ_S : R_PASS_CONFIG_S;
R_PASS_CONFIG_S : if (lc_config) state_read <= R_READ_S;
R_READ_S : if (lc_xn ) state_read <= R_SWITCH_S;
R_SWITCH_S : state_read <= R_IDLE_S;
@@ -201,15 +201,18 @@ module axis_weight_rotator #(
W_SWITCH_S : done_write_next [i] = 1;
endcase
- if (i==i_read)
+ if (i==i_read) begin
+
+ if (CONFIG_BEATS==0 ? (state_read==R_IDLE_S && done_write [i_read]) : (state_read==R_PASS_CONFIG_S)) begin
+ done_read_next [i] = 0;
+ bram_m_ready [i] = 1;
+ end
+
case (state_read)
- R_PASS_CONFIG_S : begin
- done_read_next [i] = 0;
- bram_m_ready [i] = m_axis_tready;
- end
- R_READ_S : bram_m_ready [i] = m_axis_tready;
- R_SWITCH_S : done_read_next [i] = 1;
+ R_PASS_CONFIG_S, R_READ_S : bram_m_ready [i] = m_axis_tready;
+ R_SWITCH_S : done_read_next [i] = 1;
endcase
+ end
end
config_st ref_i;
@@ -288,7 +291,8 @@ module axis_weight_rotator #(
config_st ref_i_read;
assign ref_i_read = ref_config[i_read];
- counter #(.W(BITS_CONFIG_BEATS)) C_CONFIG (.clk(aclk), .reset(copy_config), .en(en_count_config), .max_in(BITS_CONFIG_BEATS'( CONFIG_BEATS-1 )), .last_clk(lc_config), .last(l_config) );
+ wire [BITS_CONFIG_BEATS-1:0] config_beats_const = CONFIG_BEATS-1;
+ counter #(.W(BITS_CONFIG_BEATS)) C_CONFIG (.clk(aclk), .reset(copy_config), .en(en_count_config), .max_in( config_beats_const ), .last_clk(lc_config), .last(l_config) );
counter #(.W(BITS_KW )) C_KW (.clk(aclk), .reset(copy_config), .en(en_kw ), .max_in(BITS_KW '( 2*ref_i_read.kw2 )), .last_clk(lc_kw ), .last(l_kw ), .first(f_kw ) );
counter #(.W(BITS_CI )) C_CI (.clk(aclk), .reset(copy_config), .en(lc_kw ), .max_in(BITS_CI '( ref_i_read.cin_1 )), .last_clk(lc_cin ), .last(l_cin ), .first(f_cin ) );
counter #(.W(BITS_XW )) C_XW (.clk(aclk), .reset(copy_config), .en(lc_cin ), .max_in(BITS_XW '( ref_i_read.cols_1 )), .last_clk(lc_cols ), .last(l_cols ), .first(f_cols ), .count(c_cols));
diff --git a/rtl/include/params_input.svh b/rtl/include/params_input.svh
index 8c805ef..ec0a092 100644
--- a/rtl/include/params_input.svh
+++ b/rtl/include/params_input.svh
@@ -13,7 +13,7 @@
`define XW_MAX 32 // max of input image width, across layers
`define XN_MAX 16 // max of input batch size, across layers
`define CI_MAX 2048 // max of input channels, across layers
- `define CONFIG_BEATS 1 // constant, for now
+ `define CONFIG_BEATS 0 // constant, for now
`define RAM_WEIGHTS_DEPTH 20 // CONFIG_BEATS + max(KW * CI), across layers
`define RAM_EDGES_DEPTH 288 // max (KW * CI * XW), across layers when KW != 1
diff --git a/rtl/sram/cyclic_bram.sv b/rtl/sram/cyclic_bram.sv
index bee5a41..f7e13bd 100644
--- a/rtl/sram/cyclic_bram.sv
+++ b/rtl/sram/cyclic_bram.sv
@@ -14,7 +14,6 @@ module cyclic_bram #(
input logic w_en, r_en,
input logic [W_DATA_WIDTH-1:0] s_data,
output logic [R_DATA_WIDTH-1:0] m_data,
- output logic m_valid,
input logic [R_ADDR_WIDTH-1:0] r_addr_max, r_addr_min
);
diff --git a/test/py/bundle.py b/test/py/bundle.py
index c662af7..0a42f02 100644
--- a/test/py/bundle.py
+++ b/test/py/bundle.py
@@ -410,9 +410,10 @@ def export (self, c):
print('Conv -> Dense Reshape')
CI, CO = self.w['int'].shape
XN, _ = self.inp['int'].shape
- w_int = self.w['int'].reshape(1,1,CI,CO) # (CI,CO) -> (KH,KW,CI,CO)
- x_int = self.inp['int'].reshape(XN,1,1,CI) # (XN,CI) -> (XN, XH, XW, CI)
- y_int = self.y['int'].reshape(XN,1,1,CO) # (XN,CI) -> (XN, XH, XW, CI)
+ w_int = self.w ['int'].reshape(1,1,CI,CO) # (CI,CO) -> (KH,KW,CI,CO)
+ x_int = self.inp['int'].reshape(1,XN,1,CI) # (XN,CI) -> (XN, XH, XW, CI)
+ y_int = self.y ['int'].reshape(1,XN,1,CO) # (XN,CI) -> (XN, XH, XW, CI)
+ p_int = self.y_int_b. reshape(1,XN,1,CO)
else:
y_int = self.y['int']
p_int = self.y_int_b
diff --git a/test/py/param_test.py b/test/py/param_test.py
index 1dbccd7..a0dd1e9 100644
--- a/test/py/param_test.py
+++ b/test/py/param_test.py
@@ -60,7 +60,7 @@ def clog2(x):
c = namedtuple("Compile", c._fields + n._fields)(*(c + n))
d = {
- 'CONFIG_BEATS' : 1,
+ 'CONFIG_BEATS' : 0,
'X_PAD' : int(np.ceil(c.KH_MAX//2)),
'BITS_KW2' : clog2((c.KW_MAX+1)/2),
'BITS_KH2' : clog2((c.KH_MAX+1)/2),
@@ -194,7 +194,7 @@ def test_dnn_engine(COMPILE):
Config(5, 16),
Config(3, 24),
Config(1, 50, flatten=True),
- # Config(1, 10, dense= True),
+ Config(1, 10, dense= True),
]
'''
diff --git a/test/wave/dnn_engine_tb_behav.wcfg b/test/wave/dnn_engine_tb_behav.wcfg
index 2fdbd3f..411347f 100644
--- a/test/wave/dnn_engine_tb_behav.wcfg
+++ b/test/wave/dnn_engine_tb_behav.wcfg
@@ -12,13 +12,13 @@
-
-
-
+
+
+
-
+
@@ -446,7 +446,6 @@
Weight Rot
label
-
aresetn
aresetn
@@ -454,7 +453,6 @@
Slave
label
-
s_axis_tready
s_axis_tready
@@ -550,7 +548,6 @@
REF
label
-
en_ref[1:0]
en_ref[1:0]
@@ -562,8 +559,8 @@
- ref_config[1:0][28:0]
- ref_config[1:0][28:0]
+ ref_config[1:0][29:0]
+ ref_config[1:0][29:0]
\genblk1[0].ref_i
@@ -581,6 +578,7 @@
READ
label
+
state_read[31:0]
state_read[31:0]
@@ -642,6 +640,7 @@
BRAM
label
+
bram_wen[1:0]
bram_wen[1:0]
@@ -662,12 +661,10 @@
bram_m_data[1:0][95:0]
bram_m_data[1:0][95:0]
-
BRAM_1
label
-
clk
clk
@@ -701,23 +698,23 @@
m_valid
- r_addr_max[3:0]
- r_addr_max[3:0]
+ r_addr_max[4:0]
+ r_addr_max[4:0]
UNSIGNEDDECRADIX
- r_addr_min[3:0]
- r_addr_min[3:0]
+ r_addr_min[4:0]
+ r_addr_min[4:0]
UNSIGNEDDECRADIX
- w_addr[3:0]
- w_addr[3:0]
+ w_addr[4:0]
+ w_addr[4:0]
UNSIGNEDDECRADIX
- r_addr[3:0]
- r_addr[3:0]
+ r_addr[4:0]
+ r_addr[4:0]
UNSIGNEDDECRADIX
@@ -760,6 +757,7 @@
BRAM_0
label
+
clk
clk
@@ -793,20 +791,20 @@
m_valid
- r_addr_max[3:0]
- r_addr_max[3:0]
+ r_addr_max[4:0]
+ r_addr_max[4:0]
- r_addr_min[3:0]
- r_addr_min[3:0]
+ r_addr_min[4:0]
+ r_addr_min[4:0]
- w_addr[3:0]
- w_addr[3:0]
+ w_addr[4:0]
+ w_addr[4:0]
- r_addr[3:0]
- r_addr[3:0]
+ r_addr[4:0]
+ r_addr[4:0]
@@ -887,6 +885,39 @@
+
+ Sync
+ label
+
+
+ weights_m_valid
+ weights_m_valid
+
+
+ pixels_m_valid
+ pixels_m_valid
+
+
+ m_axis_tready
+ m_axis_tready
+
+
+ weights_m_user
+ weights_m_user
+
+
+ m_axis_tvalid
+ m_axis_tvalid
+
+
+ weights_m_ready
+ weights_m_ready
+
+
+ pixels_m_ready
+ pixels_m_ready
+
+
PROC
label
@@ -894,6 +925,7 @@
Proc
label
+
aclk
aclk
@@ -1166,7 +1198,6 @@
Output pipe
label
-
aclk
aclk
@@ -1243,7 +1274,6 @@
Out RAM shift
label
-
clk
clk
@@ -1276,33 +1306,11 @@
clk
clk
-
- bram_addr_a[11:0]
- bram_addr_a[11:0]
- SIGNEDDECRADIX
-
ram_r_addr[9:0]
ram_r_addr[9:0]
UNSIGNEDDECRADIX
-
- bram_rddata_a[31:0]
- bram_rddata_a[31:0]
- SIGNEDDECRADIX
-
-
- bram_en_a
- bram_en_a
-
-
- done_fill
- done_fill
-
-
- t_done_proc
- t_done_proc
-
STATE