Skip to content

Commit

Permalink
sw: add FusedConv to compiled apps w/o verification
Browse files Browse the repository at this point in the history
  • Loading branch information
Viviane Potocnik committed Feb 5, 2024
1 parent 9b3c458 commit 6023983
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 8 deletions.
1 change: 1 addition & 0 deletions sw/dnn/conv2d/src/conv2d.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ typedef struct {
int flag_y_accumulate_start;
int flag_y_accumulate_end;
unsigned int *memory_chan;
precision_t dtype;
} kernel_fp32;

/**
Expand Down
220 changes: 220 additions & 0 deletions sw/dnn/fusedconv/data/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Viviane Potocnik <[email protected]>

import argparse
import numpy as np
import pathlib
import hjson
import sys
import os
import torch

sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
import data_utils # noqa: E402
from data_utils import emit_license, \
format_struct_definition, format_array_definition, \
format_scalar_definition, format_array_declaration, \
format_ifdef_wrapper, NUMPY_T # noqa: E402

torch.manual_seed(42)

# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096

# FusedConv
def golden_model(ifmap, weights, bn_k, bn_l, padding, stride, bn, relu, accumulate, depthwise):

ih, iw, ci = ifmap.shape
if not depthwise:
co, fh, fw, _ = weights.shape
else:
fh, fw, co = weights.shape
ci = co

ifmap_padded = torch.zeros(ih + padding['padding_y_top'] + padding['padding_y_bottom'], iw +
padding['padding_x_left'] + padding['padding_x_right'],
ci,
requires_grad=False, dtype=ifmap.dtype)
ifmap_padded[padding['padding_y_top']:ih+padding['padding_y_top'],
padding['padding_x_left']:iw+padding['padding_x_left']] = ifmap

# Don't cover undefined behaviour when there are steps without a complete kernel window
if (ifmap_padded.shape[0] - (fh - 1) - 1) % stride['stride_y'] != 0:
print("Warning: rounding h output dimension")
if (ifmap_padded.shape[1] - (fw - 1) - 1) % stride['stride_x'] != 0:
print("Warning: rounding w output dimension")

ofmap = torch.zeros((ifmap_padded.shape[0] - (fh - 1) - 1) // stride['stride_y'] + 1,
(ifmap_padded.shape[1] - (fw - 1) - 1) // stride['stride_x'] + 1, co)
if accumulate:
ofmap_before = torch.randn_like(ofmap, requires_grad=False)
else:
ofmap_before = torch.zeros_like(ofmap, requires_grad=False)

if (depthwise):
# depthwise Conv2d
for h in range(0, ifmap_padded.shape[0] - (fh - 1), stride['stride_y']):
for w in range(0, ifmap_padded.shape[1] - (fw - 1), stride['stride_x']):
for c in range(co):
ofmap[h//stride['stride_y'], w//stride['stride_x'],
c] = torch.dot(
ifmap_padded[h:h+fh, w:w+fw, c].flatten(),
weights[:, :, c].flatten())
else:
# Conv2d
for h in range(0, ifmap_padded.shape[0] - (fh - 1), stride['stride_y']):
for w in range(0, ifmap_padded.shape[1] - (fw - 1), stride['stride_x']):
for c in range(co):
ofmap[h//stride['stride_y'], w//stride['stride_x'],
c] = torch.dot(
ifmap_padded[h:h+fh, w:w+fw].flatten(),
weights[c].flatten())

ofmap += ofmap_before

# BatchNorm
if bn:
ofmap = ofmap * bn_k + bn_l

# ReLU
if relu:
ofmap = torch.nn.functional.relu(ofmap)

return ofmap, ofmap_before, ifmap_padded


def emit_header(**kwargs):
# in_channels = kwargs['channels']['in']
# out_channels = kwargs['channels']['out']
# input_dim = kwargs['input_dim'] # [mini_batch, height, width]
# filter = kwargs['filter'] # [height, width, padding, stride]
prec = kwargs['prec']

torch_type = data_utils.floating_point_torch_type(prec)

ifmap = torch.randn(kwargs['dim_in_y'],
kwargs['dim_in_x'],
kwargs['ch_in'], requires_grad=False, dtype=torch_type)

if not kwargs['depthwise']:
kernel = torch.randn(kwargs['ch_out'], kwargs['dim_kernel_y'],
kwargs['dim_kernel_x'], kwargs['ch_in'],
requires_grad=False, dtype=torch_type)
else:
kernel = torch.randn(kwargs['dim_kernel_y'],
kwargs['dim_kernel_x'], kwargs['ch_out'],
requires_grad=False, dtype=torch_type)


bn_k = torch.randn(kwargs['ch_out'], requires_grad=False, dtype=torch_type)
bn_l = torch.randn(kwargs['ch_out'], requires_grad=False, dtype=torch_type)

flag_y_accumulate_start = kwargs['flags']['flag_y_accumulate_start']

ofmap, ofmap_before, ifmap_padded = golden_model(ifmap, kernel,
bn_k, bn_l,
kwargs['padding'],
kwargs['stride'],
kwargs['flags']['flag_batch_norm'],
kwargs['flags']['flag_relu'],
not flag_y_accumulate_start,
kwargs['depthwise'])

if kwargs['chw_layer']:
ifmap = ifmap.permute(2, 0, 1)
ifmap_padded = ifmap_padded.permute(2, 0, 1)
kernel = kernel.permute(0, 3, 1, 2)

ctype = data_utils.floating_point_ctype(prec)

if kwargs['depthwise']:
ih, iw, ci = ifmap.shape
oh, ow, co = ofmap.shape
fh, fw, co = kernel.shape
ci = co
ih_pad, iw_pad, _ = ifmap_padded.shape
elif kwargs['chw_layer']:
ci, ih, iw = ifmap.shape
oh, ow, co = ofmap.shape
co, ci, fh, fw = kernel.shape
_, ih_pad, iw_pad = ifmap_padded.shape
else:
ih, iw, ci = ifmap.shape
oh, ow, co = ofmap.shape
_, fh, fw, _ = kernel.shape
ih_pad, iw_pad, _ = ifmap_padded.shape

layer_cfg = {
'ch_in': ci,
'ch_out': co,
'dim_in_x': iw,
'dim_in_y': ih,
'dim_kernel_x': fw,
'dim_kernel_y': fh,
'dim_out_x': ow,
'dim_out_y': oh,
'padding_y_top': kwargs['padding']['padding_y_top'],
'padding_y_bottom': kwargs['padding']['padding_y_bottom'],
'padding_x_left': kwargs['padding']['padding_x_left'],
'padding_x_right': kwargs['padding']['padding_x_right'],
'stride_x': kwargs['stride']['stride_x'],
'stride_y': kwargs['stride']['stride_y'],
'flag_relu': kwargs['flags']['flag_relu'],
'flag_batch_norm': kwargs['flags']['flag_batch_norm'],
'flag_y_accumulate_start': flag_y_accumulate_start,
'flag_y_accumulate_end': kwargs['flags']['flag_y_accumulate_end'],
'dtype': 'FP' + prec
}

data_str = [emit_license()]
data_str += [format_struct_definition('kernel_fp32', 'k', layer_cfg)]
data_str += [format_scalar_definition('uint32_t', 'dw', kwargs['depthwise'])]
data_str += [format_scalar_definition('uint32_t', 'chw_layer', kwargs['chw_layer'])]
data_str += [format_array_definition(ctype, f'fusedconv_pInBuffer_dram', ifmap_padded.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_pWeight_dram', kernel.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_lambda_dram', bn_l.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_kappa_dram', bn_k.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_pOutBuffer_dram', ofmap_before.numpy(), BURST_ALIGNMENT)]
data_str += [format_array_definition(ctype, f'fusedconv_pCheckOutBuffer_dram', ofmap.numpy(), BURST_ALIGNMENT)]

data_str = '\n\n'.join(data_str)

return data_str


def main():

parser = argparse.ArgumentParser(description='Generate data for layernorm kernel')
parser.add_argument(
"-c", "--cfg",
type=pathlib.Path,
required=True,
help='Select param config file kernel'
)
parser.add_argument(
'--section',
type=str,
help='Section to store matrices in')
parser.add_argument(
'output',
type=pathlib.Path,
help='Path of the output header file')
args = parser.parse_args()

# Load param config file
with args.cfg.open() as f:
param = hjson.loads(f.read())
param['section'] = args.section

# Emit header file
with open(args.output, 'w') as f:
f.write(emit_header(**param))


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@
}
depthwise: 0,
chw_layer: 0,
prec: 32
}
prec: '32'
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,11 @@ int main() {
printf("Error at h %d w %d co %d\n", i / output_h_stride,
(i % output_h_stride) / output_w_stride,
i % output_w_stride);
printf("Expected: %f, Got: %f\n", ((float *)fusedconv_pCheckOutBuffer_dram)[i], pOutBuffer[i]);
}
}
printf("%d/%d Errors\n", errors, ofmap_size);
}

return errors;
}
return 0;
}
1 change: 1 addition & 0 deletions sw/dnn/src/dnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ typedef struct network_single_cluster_t_ {

#include "../batchnorm/src/batchnorm.h"
#include "../concat/src/concat.h"
#include "../conv2d/src/conv2d.h"
#include "../flashattention_2/src/flashattention_2.h"
#include "../fused_concat_linear/src/fused_concat_linear.h"
#include "../gelu/src/gelu.h"
Expand Down
3 changes: 2 additions & 1 deletion target/snitch_cluster/sw/apps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>
# Viviane Potocnik <[email protected]>

SUBDIRS = lto
SUBDIRS += nop
SUBDIRS += blas/axpy
SUBDIRS += blas/gemm
SUBDIRS += dnn/batchnorm
SUBDIRS += dnn/conv2d
# SUBDIRS += dnn/fusedconv
SUBDIRS += dnn/fusedconv
SUBDIRS += dnn/gelu
SUBDIRS += dnn/gemm
SUBDIRS += dnn/layernorm
Expand Down
7 changes: 4 additions & 3 deletions target/snitch_cluster/sw/apps/dnn/fusedconv/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
# SPDX-License-Identifier: Apache-2.0
#
# Gianna Paulin <[email protected]>
# Viviane Potocnik <[email protected]>

APP = fusedconv
APP ?= fusedconv

include ../Makefile
include ../../../../../../sw/dnn/common.mk
include ../../common.mk

$(DEP): $(DATA_H)
$(DEP): $(DATA_H)

0 comments on commit 6023983

Please sign in to comment.