Skip to content

Commit

Permalink
sw: verification framework for FusedConv (failing)
Browse files Browse the repository at this point in the history
  • Loading branch information
Viviane Potocnik committed Feb 7, 2024
1 parent 7a9b0a0 commit c1d5c75
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 78 deletions.
50 changes: 26 additions & 24 deletions sw/dnn/conv2d/src/conv2d.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,35 +144,37 @@ typedef struct conv_layer_struct {
*/

typedef struct {
uint32_t ch_in;
uint32_t ch_out;
uint32_t dim_in_x;
uint32_t dim_in_y;
uint32_t dim_kernel_x;
uint32_t dim_kernel_y;
uint32_t dim_out_x;
uint32_t dim_out_y;
uint32_t padding_y_top;
uint32_t padding_y_bottom;
uint32_t padding_x_left;
uint32_t padding_x_right;
uint32_t stride_x;
uint32_t stride_y;
uint32_t flag_relu;
int flag_batch_norm;
int depthwise;
int chw_layer;
int flag_y_accumulate_start;
int flag_y_accumulate_end;
float *pInBuffer;
uint16_t dim_in_x;
uint16_t dim_in_y;
uint16_t ch_in;
float *pWeight;
uint16_t ch_out;
uint16_t dim_kernel_x;
uint16_t dim_kernel_y;
uint16_t padding_y_top;
uint16_t padding_y_bottom;
uint16_t padding_x_left;
uint16_t padding_x_right;
uint16_t stride_x;
uint16_t stride_y;
int8_t *bias;
uint16_t bias_shift;
uint16_t out_shift;
uint16_t out_mult;
float *pOutBuffer;
uint16_t dim_out_x;
uint16_t dim_out_y;
float *kappa;
float *lambda;
float *kappa;
float *pOutBuffer;
int8_t *bias;
uint8_t *pIm2ColBuffer;
int flag_relu;
int flag_batch_norm;
int flag_y_accumulate_start;
int flag_y_accumulate_end;
unsigned int *memory_chan;
uint32_t bias_shift;
uint32_t out_shift;
uint32_t out_mult;
precision_t dtype;
} kernel_fp32;

Expand Down
55 changes: 39 additions & 16 deletions sw/dnn/fusedconv/data/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# Viviane Potocnik <[email protected]>

import argparse
import numpy as np
import pathlib
import hjson
import sys
Expand All @@ -17,15 +16,15 @@
import data_utils # noqa: E402
from data_utils import emit_license, \
format_struct_definition, format_array_definition, \
format_scalar_definition, format_array_declaration, \
format_ifdef_wrapper, NUMPY_T # noqa: E402
format_scalar_definition, format_array_declaration # noqa: E402

torch.manual_seed(42)

# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096


# FusedConv
def golden_model(ifmap, weights, bn_k, bn_l, padding, stride, bn, relu, accumulate, depthwise):

Expand Down Expand Up @@ -110,21 +109,20 @@ def emit_header(**kwargs):
kwargs['dim_kernel_x'], kwargs['ch_out'],
requires_grad=False, dtype=torch_type)


bn_k = torch.randn(kwargs['ch_out'], requires_grad=False, dtype=torch_type)
bn_l = torch.randn(kwargs['ch_out'], requires_grad=False, dtype=torch_type)

flag_y_accumulate_start = kwargs['flags']['flag_y_accumulate_start']

ofmap, ofmap_before, ifmap_padded = golden_model(ifmap, kernel,
ofmap, ofmap_before, ifmap_padded = golden_model(ifmap, kernel,
bn_k, bn_l,
kwargs['padding'],
kwargs['stride'],
kwargs['padding'],
kwargs['stride'],
kwargs['flags']['flag_batch_norm'],
kwargs['flags']['flag_relu'],
not flag_y_accumulate_start,
kwargs['depthwise'])

if kwargs['chw_layer']:
ifmap = ifmap.permute(2, 0, 1)
ifmap_padded = ifmap_padded.permute(2, 0, 1)
Expand Down Expand Up @@ -166,22 +164,47 @@ def emit_header(**kwargs):
'stride_y': kwargs['stride']['stride_y'],
'flag_relu': kwargs['flags']['flag_relu'],
'flag_batch_norm': kwargs['flags']['flag_batch_norm'],
'depthwise': kwargs['depthwise'],
'chw_layer': kwargs['chw_layer'],
'flag_y_accumulate_start': flag_y_accumulate_start,
'flag_y_accumulate_end': kwargs['flags']['flag_y_accumulate_end'],
'pInBuffer': 'fusedconv_pInBuffer_dram',
'pWeight': 'fusedconv_pWeight_dram',
'lambda': 'fusedconv_lambda_dram',
'kappa': 'fusedconv_kappa_dram',
'pOutBuffer': 'fusedconv_pOutBuffer_dram',
'dtype': 'FP' + prec
}

data_str = [emit_license()]
data_str += [format_struct_definition('kernel_fp32', 'k', layer_cfg)]
data_str += [format_array_declaration(ctype, 'fusedconv_pInBuffer_dram',
ifmap_padded.numpy().shape, BURST_ALIGNMENT)]
data_str += [format_array_declaration(ctype, 'fusedconv_pWeight_dram',
kernel.numpy().shape, BURST_ALIGNMENT)]
data_str += [format_array_declaration(ctype, 'fusedconv_lambda_dram',
bn_l.numpy().shape, BURST_ALIGNMENT)]
data_str += [format_array_declaration(ctype, 'fusedconv_kappa_dram',
bn_k.numpy().shape, BURST_ALIGNMENT)]
data_str += [format_array_declaration(ctype, 'fusedconv_pOutBuffer_dram',
ofmap_before.numpy().shape, BURST_ALIGNMENT)]
data_str += [format_array_declaration(ctype, 'fusedconv_pCheckOutBuffer_dram',
ofmap.numpy().shape, BURST_ALIGNMENT)]
data_str += [format_struct_definition('kernel_fp32', 'layer', layer_cfg)]
data_str += [format_scalar_definition('uint32_t', 'dw', kwargs['depthwise'])]
data_str += [format_scalar_definition('uint32_t', 'chw_layer', kwargs['chw_layer'])]
data_str += [format_array_definition(ctype, f'fusedconv_pInBuffer_dram', ifmap_padded.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_pWeight_dram', kernel.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_lambda_dram', bn_l.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_kappa_dram', bn_k.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_pOutBuffer_dram', ofmap_before.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_pCheckOutBuffer_dram', ofmap.numpy(), BURST_ALIGNMENT)]

data_str += [format_array_definition(ctype, 'fusedconv_pInBuffer_dram',
ifmap_padded.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, 'fusedconv_pWeight_dram',
kernel.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, 'fusedconv_lambda_dram',
bn_l.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, 'fusedconv_kappa_dram',
bn_k.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, 'fusedconv_pOutBuffer_dram',
ofmap_before.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, 'fusedconv_pCheckOutBuffer_dram',
ofmap.numpy(), BURST_ALIGNMENT)]

data_str = '\n\n'.join(data_str)

return data_str
Expand Down
2 changes: 1 addition & 1 deletion sw/dnn/fusedconv/data/params.hjson
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@
}
depthwise: 0,
chw_layer: 0,
prec: '32'
prec: '32',
}
51 changes: 17 additions & 34 deletions sw/dnn/fusedconv/src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@
void *share_ptr;

int main() {
uint32_t ifmap_size = (k.dim_in_x + k.padding_x_left + k.padding_x_right) *
(k.dim_in_y + k.padding_y_top + k.padding_y_bottom) *
k.ch_in;
uint32_t ifmap_size =
(layer.dim_in_x + layer.padding_x_left + layer.padding_x_right) *
(layer.dim_in_y + layer.padding_y_top + layer.padding_y_bottom) *
layer.ch_in;
uint32_t weights_size =
k.dim_kernel_x * k.dim_kernel_y * k.ch_in * k.ch_out;
uint32_t ofmap_size = k.dim_out_x * k.dim_out_y * k.ch_out;
layer.dim_kernel_x * layer.dim_kernel_y * layer.ch_in * layer.ch_out;
uint32_t ofmap_size = layer.dim_out_x * layer.dim_out_y * layer.ch_out;

uint32_t total_size =
ifmap_size + weights_size + k.ch_out + k.ch_out + ofmap_size;
ifmap_size + weights_size + layer.ch_out + layer.ch_out + ofmap_size;

float *ptr;

Expand All @@ -36,9 +37,9 @@ int main() {
float *pWeight = ptr;
ptr += weights_size;
float *kappa = ptr;
ptr += k.ch_out;
ptr += layer.ch_out;
float *lambda = ptr;
ptr += k.ch_out;
ptr += layer.ch_out;
float *pOutBuffer = ptr;
ptr += ofmap_size;

Expand All @@ -56,28 +57,28 @@ int main() {
snrt_dma_wait_all();
}

k.pInBuffer = pInBuffer;
k.pWeight = pWeight;
k.pOutBuffer = pOutBuffer;
k.kappa = kappa;
k.lambda = lambda;
layer.pInBuffer = pInBuffer;
layer.pWeight = pWeight;
layer.pOutBuffer = pOutBuffer;
layer.kappa = kappa;
layer.lambda = lambda;

snrt_cluster_hw_barrier();

for (int i = 0; i < 1; i++) {
if (snrt_is_compute_core() || (snrt_cluster_core_num() == 1)) {
if (dw) {
snrt_mcycle();
conv2d_dw_fp32(&k);
conv2d_dw_fp32(&layer);
snrt_mcycle();

} else if (chw_layer) {
snrt_mcycle();
conv2d_chw_fp32(&k);
conv2d_chw_fp32(&layer);
snrt_mcycle();
} else {
snrt_mcycle();
conv2d_fp32(&k);
conv2d_fp32(&layer);
snrt_mcycle();
}

Expand All @@ -88,23 +89,5 @@ int main() {
}
snrt_cluster_hw_barrier();

uint32_t errors = 0;
if (snrt_is_dm_core()) {
// Output feature map (H x W x Co)
const uint32_t output_w_stride = k.ch_out;
const uint32_t output_h_stride = output_w_stride * k.dim_out_x;
for (uint32_t i = 0; i < ofmap_size; i++) {
if (fabs(pOutBuffer[i] -
((float *)fusedconv_pCheckOutBuffer_dram)[i]) > 0.01) {
errors++;
printf("Error at h %d w %d co %d\n", i / output_h_stride,
(i % output_h_stride) / output_w_stride,
i % output_w_stride);
printf("Expected: %f, Got: %f\n", ((float *)fusedconv_pCheckOutBuffer_dram)[i], pOutBuffer[i]);
}
}
printf("%d/%d Errors\n", errors, ofmap_size);
}

return 0;
}
Loading

0 comments on commit c1d5c75

Please sign in to comment.