Skip to content

Commit

Permalink
Change datatypes using stdint.h
Browse files Browse the repository at this point in the history
  • Loading branch information
Aba committed Oct 31, 2023
1 parent 14ac970 commit 6f69af8
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 80 deletions.
155 changes: 78 additions & 77 deletions c/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <assert.h>
#include <stdlib.h>
#include <limits.h>
#include <stdint.h>

#ifdef VERILATOR
#define EXT_C "C"
Expand All @@ -10,14 +11,14 @@
#endif

typedef struct {
const int n, l, kw, coe, coe_tl, r_ll, h, w, ci, co, w_kw2, t, p, cm, cm_p0;
const int w_bpt, w_bpt_p0, x_bpt, x_bpt_p0, o_bytes; // bytes per transfer
const char is_bias, is_pool, is_flatten;
const int b_offset, b_val_shift, b_bias_shift;
const signed char ca_nzero, ca_shift, ca_pl_scale;
const int csh, ch, csh_shift, pkh, psh, ph, psh_shift, csw, cw, csw_shift, pkw, psw, pw, psw_shift, pool, on, oh, ow, oc;
const unsigned long long x_header, x_header_p0, w_header, w_header_p0; // 64 bits (at least)
const int debug_nhwc_words;
const int32_t n, l, kw, coe, coe_tl, r_ll, h, w, ci, co, w_kw2, t, p, cm, cm_p0;
const int32_t w_bpt, w_bpt_p0, x_bpt, x_bpt_p0, o_bytes; // bytes per transfer
const int8_t is_bias, is_pool, is_flatten;
const int32_t b_offset, b_val_shift, b_bias_shift;
const int8_t ca_nzero, ca_shift, ca_pl_scale;
const int32_t csh, ch, csh_shift, pkh, psh, ph, psh_shift, csw, cw, csw_shift, pkw, psw, pw, psw_shift, pool, on, oh, ow, oc;
const uint64_t x_header, x_header_p0, w_header, w_header_p0; // 64 bits (at least)
const int32_t debug_nhwc_words;
} Bundle_t;

typedef enum {POOL_NONE, POOL_MAX, POOL_AVG} Pool_t;
Expand All @@ -26,13 +27,13 @@ typedef enum {POOL_NONE, POOL_MAX, POOL_AVG} Pool_t;
#define X_BITS (1<<X_BITS_L2)

typedef struct {
char w [W_BYTES ];
int8_t w [W_BYTES ];
B_TYPE b [B_WORDS ]; // keep next to w. weights are loaded to w_ptr
char x [X_BYTES_ALL ];
char nx [O_BYTES_MAX ];
int y [O_WORDS ];
int nhwc [Y_BYTES/4 ];
int debug_nhwc [Y_BYTES/4];
int8_t x [X_BYTES_ALL ];
int8_t nx [O_BYTES_MAX ];
int32_t y [O_WORDS ];
int32_t nhwc [Y_BYTES/4 ];
int32_t debug_nhwc [Y_BYTES/4];
} Memory_st;
Memory_st mem;

Expand All @@ -46,19 +47,19 @@ Memory_st mem;
#define max(x, y) ((x) > (y) ? (x) : (y))
#define min(x, y) ((x) < (y) ? (x) : (y))
#define clip(x, xmin, xmax) (((x) < (xmin)) ? (xmin) : ((x) > (xmax)) ? (xmax) : (x))
#define shift_round(n, s) (((n) + (1<<((s)-1)) - (~((n)>>(s))&1) ) >> s) // === np.around(n/2**s).astype(int)
#define shift_round(n, s) (((n) + (1<<((s)-1)) - (~((n)>>(s))&1) ) >> s) // === np.around(n/2**s).astype(int32_t)
#define div_round(a, b) (((a)+((b)/2) - (~((b)|(a)/(b)) &1))/(b))


static inline int quant_lrelu(int x, signed char nzero, signed char shift, signed char pl_scale){
static inline int32_t quant_lrelu(int32_t x, int8_t nzero, int8_t shift, int8_t pl_scale){
x = x < 0 ? (nzero ? x: 0) : x << pl_scale; // Conditional, targeting ARM
x = shift_round(x, shift);
x = clip(x, -(1<<(X_BITS-pl_scale-1)), (1<<(X_BITS-1))-1);
return x;
}


static inline void write_x(signed char val, int ib, int ixp, int ixn, int ixl, int ixw, int ixcm, int ixr, Bundle_t *pb_out, int xcm ){
static inline void write_x(int8_t val, int32_t ib, int32_t ixp, int32_t ixn, int32_t ixl, int32_t ixw, int32_t ixcm, int32_t ixr, Bundle_t *pb_out, int32_t xcm ){

assert_printf (ixr , <, PE_ROWS+X_PAD, "write_x", "--- ib:%d ixp:%d ixn:%d ixl:%d ixw:%d ixcm:%d ixr:%d xcm :%d \n",ib,ixp,ixn,ixl,ixw,ixcm,ixr,xcm);
assert_printf (ixcm, <, xcm , "write_x", "--- ib:%d ixp:%d ixn:%d ixl:%d ixw:%d ixcm:%d ixr:%d xcm :%d \n",ib,ixp,ixn,ixl,ixw,ixcm,ixr,xcm);
Expand All @@ -67,13 +68,13 @@ static inline void write_x(signed char val, int ib, int ixp, int ixn, int ixl, i
assert_printf (ixn , <, pb_out->n , "write_x", "--- ib:%d ixp:%d ixn:%d ixl:%d ixw:%d ixcm:%d ixr:%d xcm :%d \n",ib,ixp,ixn,ixl,ixw,ixcm,ixr,xcm);
assert_printf (ixp , <, pb_out->p , "write_x", "--- ib:%d ixp:%d ixn:%d ixl:%d ixw:%d ixcm:%d ixr:%d xcm :%d \n",ib,ixp,ixn,ixl,ixw,ixcm,ixr,xcm);

int p_offset = (ixp == 0) ? 0 : (pb_out->cm_p0 + (ixp-1)*pb_out->cm) *pb_out->n*pb_out->l*pb_out->w*(PE_ROWS+X_PAD);
int flat_index_n2r = (((ixn*pb_out->l + ixl)*pb_out->w + ixw)*xcm + ixcm)*(PE_ROWS+X_PAD) + ixr; // multidim_index -> flat_index [n,l,w,cm,r]
int32_t p_offset = (ixp == 0) ? 0 : (pb_out->cm_p0 + (ixp-1)*pb_out->cm) *pb_out->n*pb_out->l*pb_out->w*(PE_ROWS+X_PAD);
int32_t flat_index_n2r = (((ixn*pb_out->l + ixl)*pb_out->w + ixw)*xcm + ixcm)*(PE_ROWS+X_PAD) + ixr; // multidim_index -> flat_index [n,l,w,cm,r]
mem.nx[p_offset + flat_index_n2r] = val;
}


static inline void tile_write( int out_val, int ib, Bundle_t *pb, int i_yn, int i_yh, int i_yw, int i_yc, int yn, int yh, int yw, int yc ) {
static inline void tile_write( int32_t out_val, int32_t ib, Bundle_t *pb, int32_t i_yn, int32_t i_yh, int32_t i_yw, int32_t i_yc, int32_t yn, int32_t yh, int32_t yw, int32_t yc ) {

// ------ FLATTEN ------
if (pb->is_flatten) {
Expand All @@ -98,37 +99,37 @@ static inline void tile_write( int out_val, int ib, Bundle_t *pb, int i_yn, int
// y [n,h,w,c] -> x[p, n, l, w,cmp, r+pad]

Bundle_t* pb_out = ib == N_BUNDLES-1 ? &bundles[ib] : &bundles[ib+1];
char yp_first = i_yc < pb_out->cm_p0;
int8_t yp_first = i_yc < pb_out->cm_p0;

div_t div_oh = div(i_yh, PE_ROWS);
int i_yr = div_oh.rem;
int i_yl = div_oh.quot;
int32_t i_yr = div_oh.rem;
int32_t i_yl = div_oh.quot;

div_t div_oc = div(i_yc-pb_out->cm_p0, pb_out->cm);
int i_yp = yp_first ? 0 : div_oc.quot + 1;
int i_ycm = yp_first ? i_yc : div_oc.rem;
int ycm = yp_first ? pb_out->cm_p0 : pb_out->cm ;
int32_t i_yp = yp_first ? 0 : div_oc.quot + 1;
int32_t i_ycm = yp_first ? i_yc : div_oc.rem;
int32_t ycm = yp_first ? pb_out->cm_p0 : pb_out->cm ;


// ------ STORE ------

int iy_nhwc = flatten_nhwc(i_yn,i_yh,i_yw,i_yc, yn,yh,yw,yc,,);
int32_t iy_nhwc = flatten_nhwc(i_yn,i_yh,i_yw,i_yc, yn,yh,yw,yc,,);
mem.debug_nhwc[iy_nhwc] = out_val;

if (ib == N_BUNDLES-1)
mem.y[iy_nhwc] = out_val; // Last bundle: save as NHWC
else {

// Other bundles: pad & save as tiled
int yr_sweep = i_yh==yh-1 ? PE_ROWS : i_yr + 1;
int32_t yr_sweep = i_yh==yh-1 ? PE_ROWS : i_yr + 1;

for (int i_yr_dest = i_yr; i_yr_dest < yr_sweep; i_yr_dest++) {
for (int32_t i_yr_dest = i_yr; i_yr_dest < yr_sweep; i_yr_dest++) {
write_x(out_val, ib, i_yp, i_yn, i_yl, i_yw, i_ycm, i_yr_dest, pb_out, ycm);

// --- PADDING: the [bottom X_PAD rows of previous block (l-1)] with [first X_PAD rows of this block (l)]
if (i_yr_dest < X_PAD) {
int pad_val = (i_yl == 0) ? 0 : out_val;
int dest_yl = (i_yl == 0) ? pb_out->l-1 : i_yl-1;
int32_t pad_val = (i_yl == 0) ? 0 : out_val;
int32_t dest_yl = (i_yl == 0) ? pb_out->l-1 : i_yl-1;
write_x(pad_val, ib, i_yp, i_yn, dest_yl, i_yw, i_ycm, i_yr_dest+PE_ROWS, pb_out, ycm);
}
out_val = 0;
Expand All @@ -137,49 +138,49 @@ static inline void tile_write( int out_val, int ib, Bundle_t *pb, int i_yn, int
}


extern EXT_C void load_y (unsigned char *p_done, unsigned char *pt_done_proc, const unsigned int *p_sram_u32) {
extern EXT_C void load_y (uint8_t *p_done, uint8_t *pt_done_proc, const uint32_t *p_sram_u32) {

static Bundle_t *pb = &bundles[0];
static int it_bias=0;
static int ib=0, ip=0, it=0, in=0, il=0, iw_kw2=0;
const int *p_sram = (const int *)p_sram_u32;
static int32_t it_bias=0;
static int32_t ib=0, ip=0, it=0, in=0, il=0, iw_kw2=0;
const int32_t *p_sram = (const int32_t *)p_sram_u32;

int iy_nhwc;
int32_t iy_nhwc;
div_t div_ch, div_cw, div_ixh, div_ixw;
int ph_end, ph_beg_const, ph_beg, ixh_beg, xh_sweep;
int pw_end, pw_beg_const, pw_beg, ixw_beg, xw_sweep;
int32_t ph_end, ph_beg_const, ph_beg, ixh_beg, xh_sweep;
int32_t pw_end, pw_beg_const, pw_beg, ixw_beg, xw_sweep;

char f_path_raw [1000], f_path_sum [1000]; // make sure full f_path_raw is shorter than 1000
int8_t f_path_raw [1000], f_path_sum [1000]; // make sure full f_path_raw is shorter than 1000
sprintf(f_path_raw, "%s/%0d_%0d_%0d_y_raw_sim.txt", DATA_DIR, ib, ip, it);
sprintf(f_path_sum, "%s/%0d_y_sum_sim.txt", DATA_DIR, ib);
FILE *fp_raw = fopen(f_path_raw, "a");
FILE *fp_sum = fopen(f_path_sum, "a");

//New iw_kw2:
int w_last = iw_kw2 == pb->w_kw2-1 ? pb->kw/2+1 : 1;
int sram_addr=0;
for (int icoe=0; icoe < pb->coe; icoe++) {
int i_bias = it_bias + icoe;
int32_t w_last = iw_kw2 == pb->w_kw2-1 ? pb->kw/2+1 : 1;
int32_t sram_addr=0;
for (int32_t icoe=0; icoe < pb->coe; icoe++) {
int32_t i_bias = it_bias + icoe;

for (int iw_last=0; iw_last<w_last; iw_last++) {
for (int ir=0; ir<PE_ROWS; ir++) {
for (int32_t iw_last=0; iw_last<w_last; iw_last++) {
for (int32_t ir=0; ir<PE_ROWS; ir++) {
// Indexing: [b, p, t, n, l, w | coe, w_last, r]

#define DEBUG_INFO "--- ib:%d ip:%d it:%d in:%d il:%d iw_kw2:%d icoe:%d iw_last:%d ir:%d \n",ib,ip,it,in,il,iw_kw2,icoe,iw_last,ir

int raw_val=0, out_val=0;
int32_t raw_val=0, out_val=0;

// Caculate y_index
int i_yn = in;
int i_yh = il*PE_ROWS + ir;
int i_yw = iw_kw2 + iw_last;
int i_yc = pb->coe*it + icoe;
int32_t i_yn = in;
int32_t i_yh = il*PE_ROWS + ir;
int32_t i_yw = iw_kw2 + iw_last;
int32_t i_yc = pb->coe*it + icoe;

// Save y_dims
int yn = pb->n;
int yh = pb->h;
int yw = pb->w;
int yc = pb->co;
int32_t yn = pb->n;
int32_t yh = pb->h;
int32_t yw = pb->w;
int32_t yc = pb->co;

// if out of bounds, early return
if (i_yh >= yh || i_yc >= yc) {
Expand Down Expand Up @@ -269,20 +270,20 @@ extern EXT_C void load_y (unsigned char *p_done, unsigned char *pt_done_proc, c
xw_sweep = i_yw == yw-1 ? pb->pw : ixw_beg+1; // But when iy(h,w) is at its edges, need to compute remaining ix(hw) pixels by sweeping

// Sweep the pooling window
for (int ixh = ixh_beg, ph_beg = ph_beg_const; ixh < xh_sweep; ixh++, ph_beg += pb->psh) {
for (int ixw = ixw_beg, pw_beg = pw_beg_const; ixw < xw_sweep; ixw++, pw_beg += pb->psw) {
for (int32_t ixh = ixh_beg, ph_beg = ph_beg_const; ixh < xh_sweep; ixh++, ph_beg += pb->psh) {
for (int32_t ixw = ixw_beg, pw_beg = pw_beg_const; ixw < xw_sweep; ixw++, pw_beg += pb->psw) {

// Traverse each pool window & perform pooling
int result = pb->pool == POOL_MAX ? INT_MIN : 0;
for (int ipyh = ph_end; ipyh > ph_beg; ipyh--){
for (int ipyw = pw_end; ipyw > pw_beg; ipyw--){
int32_t result = pb->pool == POOL_MAX ? INT_MIN : 0;
for (int32_t ipyh = ph_end; ipyh > ph_beg; ipyh--){
for (int32_t ipyw = pw_end; ipyw > pw_beg; ipyw--){

int read_idx = flatten_nhwc(i_yn, ipyh, ipyw, i_yc, yn, yh, yw, yc, "Inside pool window", DEBUG_INFO);
int read_val = mem.nhwc[read_idx];
int32_t read_idx = flatten_nhwc(i_yn, ipyh, ipyw, i_yc, yn, yh, yw, yc, "Inside pool window", DEBUG_INFO);
int32_t read_val = mem.nhwc[read_idx];
result = pb->pool==POOL_MAX ? max(result, read_val) : (result + read_val);
}
}
int count = (ph_end-ph_beg)*(pw_end-pw_beg);
int32_t count = (ph_end-ph_beg)*(pw_end-pw_beg);
result = pb->pool==POOL_MAX ? result : div_round(result, count);

// ------ POOL ACTIVATION ------
Expand Down Expand Up @@ -314,17 +315,17 @@ extern EXT_C void load_y (unsigned char *p_done, unsigned char *pt_done_proc, c

printf("done bundle!! iw_kw2:%d in:%d il:%d it:%d ip:%d ib:%d\n", iw_kw2, in, il, it, ip, ib);

char f_path_tiled [1000];
int8_t f_path_tiled [1000];
sprintf(f_path_tiled, "%s/%0d_y_tiled_sim.txt", DATA_DIR, ib);
FILE *fp_tiled = fopen(f_path_tiled, "w");
for (int i=0; i<pb->o_bytes; i++)
for (int32_t i=0; i<pb->o_bytes; i++)
fprintf(fp_tiled,"%d\n", ib == N_BUNDLES-1 ? mem.y[i] : mem.nx[i]);
fclose(fp_tiled);

char f_path_debug [1000];
int8_t f_path_debug [1000];
sprintf(f_path_debug, "%s/%0d_y_nhwc_sim.txt", DATA_DIR, ib);
FILE *fp_debug = fopen(f_path_debug, "w");
for (int i=0; i<pb->debug_nhwc_words; i++)
for (int32_t i=0; i<pb->debug_nhwc_words; i++)
fprintf(fp_debug,"%d\n", mem.debug_nhwc[i]);
fclose(fp_debug);

Expand All @@ -343,11 +344,11 @@ extern EXT_C void load_y (unsigned char *p_done, unsigned char *pt_done_proc, c
}


extern EXT_C void load_x (unsigned char *p_done, int *p_offset, int *p_bpt) {
extern EXT_C void load_x (uint8_t *p_done, int32_t *p_offset, int32_t *p_bpt) {

static int ib=0, ip=0, it=0, offset_next=0;
int offset = offset_next;
int bpt = ip == 0 ? bundles[ib].x_bpt_p0 : bundles[ib].x_bpt;
static int32_t ib=0, ip=0, it=0, offset_next=0;
int32_t offset = offset_next;
int32_t bpt = ip == 0 ? bundles[ib].x_bpt_p0 : bundles[ib].x_bpt;

*p_offset = offset;
*p_bpt = bpt;
Expand All @@ -364,12 +365,12 @@ extern EXT_C void load_x (unsigned char *p_done, int *p_offset, int *p_bpt) {
}


extern EXT_C void load_w (unsigned char *p_done, int *p_offset, int *p_bpt) {
extern EXT_C void load_w (uint8_t *p_done, int32_t *p_offset, int32_t *p_bpt) {

static int ib=0, ip=0, it=0, offset_next=0;
static int32_t ib=0, ip=0, it=0, offset_next=0;

int offset = offset_next;
int bpt = ip == 0 ? bundles[ib].w_bpt_p0 : bundles[ib].w_bpt;
int32_t offset = offset_next;
int32_t bpt = ip == 0 ? bundles[ib].w_bpt_p0 : bundles[ib].w_bpt;

*p_offset = offset;
*p_bpt = bpt;
Expand All @@ -387,7 +388,7 @@ extern EXT_C void load_w (unsigned char *p_done, int *p_offset, int *p_bpt) {

extern EXT_C void fill_memory (){
FILE *fp;
char f_path [1000];
int8_t f_path [1000];

sprintf(f_path, "%s/w.bin", DATA_DIR);
fp = fopen(f_path, "rb");
Expand All @@ -403,12 +404,12 @@ extern EXT_C void fill_memory (){
fread(mem.x, 1, X_BYTES_ALL, fp);
fclose(fp);

for (int i=0; i<B_WORDS; i++)
for (int32_t i=0; i<B_WORDS; i++)
printf("i:%d, bias:%d\n", i, mem.b[i]);
}


extern EXT_C char get_byte_wx (int addr, int mode){
extern EXT_C int8_t get_byte_wx (int32_t addr, int32_t mode){
if (mode==0) return mem.w[addr];
else if (mode==1) return mem.x[addr];
}
5 changes: 2 additions & 3 deletions test/py/param_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
print(SOURCES)

type_d = {
'np': {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
'c' : {8: 'signed char', 16: 'signed short', 32: 'signed int', 64: 'signed long long'}
'np': {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}
}

'''
Expand Down Expand Up @@ -330,7 +329,7 @@ def test_dnn_engine(COMPILE):
ch.write(f"#define O_BYTES_MAX {o_bytes_max}\n")
ch.write(f"#define X_BYTES_ALL {x_bytes_all}\n")
ch.write(f"#define Y_BYTES {y_bytes_max}\n")
ch.write(f"#define B_TYPE {type_d['c'][c.B_BITS]}\n")
ch.write(f"#define B_TYPE int{c.B_BITS}_t\n")
ch.write(f"#define B_WORDS {b_words}\n")
ch.write(f'#define DATA_DIR "{DATA_DIR}"\n\n')

Expand Down

0 comments on commit 6f69af8

Please sign in to comment.