Skip to content

Commit

Permalink
dnn: Add FusedConcatLinear layer
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Nov 12, 2023
1 parent 2c57cfa commit 5f052c1
Show file tree
Hide file tree
Showing 10 changed files with 300 additions and 0 deletions.
113 changes: 113 additions & 0 deletions sw/dnn/fused_concat_linear/data/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

import argparse
import numpy as np
import pathlib
import hjson
import sys
import os
import torch

sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
import data_utils # noqa: E402
from data_utils import emit_license, \
format_struct_definition, format_array_definition, \
format_array_declaration, format_ifdef_wrapper # noqa: E402

torch.manual_seed(42)

# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096

PRECISION = {
'FP64': '64',
'FP32': '32',
'FP16': '16',
'FP8': '8'
}


def golden_model(inputs, weights):
innermost_dim = len(inputs[0].shape) - 1
concat_output = torch.cat(inputs, dim=innermost_dim)
linear_output = torch.matmul(concat_output, weights)
return linear_output, concat_output


def emit_header(section, params):
num_inputs = params['num_inputs']
input_shape = params['input_shape']
output_shape = params['output_shape']
prec = PRECISION[params['dtype']]

assert input_shape[0] == output_shape[0], 'Inconsistent input and output shapes'

torch_type = data_utils.floating_point_torch_type(prec)

inputs = [torch.rand(*input_shape, requires_grad=False, dtype=torch_type) for _ in range(num_inputs)]
weights = torch.rand([input_shape[1]*num_inputs, output_shape[1]], requires_grad=False, dtype=torch_type)
linear_output, concat_output = golden_model(inputs, weights)

ctype = data_utils.floating_point_ctype(prec)

layer_cfg = {
**params,
'inputs': 'inputs',
'weights': 'weights',
'concat_output': 'concat_output',
'linear_output': 'linear_output'
}

data_str = [emit_license()]
data_str += [format_array_declaration(ctype, f'input_{i}', input_shape) for i in range(num_inputs)]
data_str += [format_array_declaration('void*', 'inputs', [num_inputs])]
data_str += [format_array_declaration(ctype, 'concat_output', concat_output.shape)]
data_str += [format_array_declaration(ctype, 'linear_output', linear_output.shape)]
data_str += [format_array_declaration(ctype, 'weights', weights.shape)]
data_str += [format_struct_definition('fused_concat_linear_layer_t', 'layer', layer_cfg)]
data_str += [format_array_definition(ctype, f'input_{i}', t) for i, t in enumerate(inputs)]
data_str += [format_array_definition('void*', 'inputs', np.array([f'input_{i}' for i in range(num_inputs)]))]
data_str += [format_array_definition(ctype, 'weights', weights)]
result_def = format_array_definition(ctype, 'golden', linear_output)
data_str += [format_ifdef_wrapper('BIST', result_def)]
data_str = '\n\n'.join(data_str)

return data_str


def main():

parser = argparse.ArgumentParser(description='Generate data for layernorm kernel')
parser.add_argument(
"-c", "--cfg",
type=pathlib.Path,
required=True,
help='Select param config file kernel'
)
parser.add_argument(
'--section',
type=str,
help='Section to store matrices in')
parser.add_argument(
'output',
type=pathlib.Path,
help='Path of the output header file')
args = parser.parse_args()

# Load param config file
with args.cfg.open() as f:
param = hjson.loads(f.read())

# Emit header file
with open(args.output, 'w') as f:
f.write(emit_header(args.section, param))


if __name__ == '__main__':
main()
10 changes: 10 additions & 0 deletions sw/dnn/fused_concat_linear/data/params.hjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

{
num_inputs: 1
input_shape: [32, 4]
output_shape: [32, 16]
dtype: FP64
}
53 changes: 53 additions & 0 deletions sw/dnn/fused_concat_linear/src/fused_concat_linear.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2020 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Luca Colagrande <[email protected]>

#include "snrt.h"

/**
* @struct fused_concat_linear_layer_t
* @brief This structure contains all parameters necessary
* for computing a Concat layer.
* @var fused_concat_linear_layer_t::input_shape
* Shape of the input tensors
* @var fused_concat_linear_layer_t::num_inputs
* Number of input tensors to concatenate
* @var fused_concat_linear_layer_t::inputs
* Pointer to an array of pointers to the individual tensors to concatenate
* @var fused_concat_linear_layer_t::output
* Pointer to the concatenated output tensor
*/
typedef struct {
uint32_t num_inputs;
uint32_t input_shape[2];
uint32_t output_shape[2];
void **inputs;
void *weights;
void *concat_output;
void *linear_output;
precision_t dtype;
} fused_concat_linear_layer_t;

static inline int fused_concat_linear_layer(fused_concat_linear_layer_t l) {
// Concat layer
concat_layer_t concat_layer_cfg = {
.num_inputs = l.num_inputs,
.input_shape = {l.input_shape[0], l.input_shape[1]},
.inputs = l.inputs,
.output = l.concat_output,
.dtype = l.dtype
};
int nerr = concat_layer(concat_layer_cfg);

// Linear layer
uint32_t m = l.input_shape[0];
uint32_t k = l.input_shape[1] * l.num_inputs;
uint32_t n = l.output_shape[1];
gemm(l.dtype, 0, 0, 0, 0, m, n, k, 1.0, l.concat_output, k, l.weights, n, 0.0, l.linear_output, n);

snrt_global_barrier();

return nerr;
}
14 changes: 14 additions & 0 deletions sw/dnn/fused_concat_linear/src/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Luca Colagrande <[email protected]>

#include "dnn.h"

#include "data.h"

int main() {
uint32_t nerr = fused_concat_linear_layer(layer);
return nerr;
}
90 changes: 90 additions & 0 deletions sw/dnn/fused_concat_linear/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

import sys
from pathlib import Path
import numpy as np
import torch
from data.datagen import golden_model

sys.path.append(str(Path(__file__).parent / '../../../util/sim/'))
import verification # noqa: E402
from elf import Elf # noqa: E402
from data_utils import bytes_to_float, bytes_to_struct # noqa: E402


ERR_THRESHOLD = 1E-6

PRECISION_T = {
8: '64',
4: '32',
2: '16',
1: '8'
}

NUMPY_T = {
'64': np.float64,
'32': np.float32,
'16': np.float16
}


def main():
# Run simulation and get outputs
args = verification.parse_args()
raw_results = verification.simulate(sim_bin=args.sim_bin,
snitch_bin=args.snitch_bin,
symbols_bin=args.symbols_bin,
log=args.log,
output_uids=['linear_output'])

# Extract input operands from ELF file
if args.symbols_bin:
elf = Elf(args.symbols_bin)
else:
elf = Elf(args.snitch_bin)

layer_struct = {
'num_inputs': 'I',
'in_height': 'I',
'in_width': 'I',
'out_height': 'I',
'out_width': 'I',
'inputs': 'I',
'weights': 'I',
'concat_output': 'I',
'linear_output': 'I',
'dtype': 'I'
}
layer = bytes_to_struct(elf.get_symbol_contents('layer'), layer_struct)
num_inputs = layer['num_inputs']
input_shape = [layer['in_height'], layer['in_width']]
weights_shape = [layer['in_width']*num_inputs, layer['out_width']]
prec = PRECISION_T[layer['dtype']]

inputs = [np.array(bytes_to_float(elf.get_symbol_contents(f'input_{i}'), prec), dtype=NUMPY_T[prec]) for i in range(num_inputs)]
inputs = [torch.from_numpy(tensor.reshape(input_shape)) for tensor in inputs]
weights = np.array(bytes_to_float(elf.get_symbol_contents(f'weights'), prec), dtype=NUMPY_T[prec])
weights = torch.from_numpy(weights.reshape(weights_shape))

# Verify results
output_actual = np.array(bytes_to_float(raw_results['linear_output'], prec), dtype=NUMPY_T[prec])
output_golden, _ = golden_model(inputs, weights)
output_golden = output_golden.detach().numpy().flatten()

relative_err = np.absolute((output_golden - output_actual) / output_golden)
fail = np.any(relative_err > ERR_THRESHOLD)
if (fail):
verification.dump_results_to_csv([output_golden, output_actual, relative_err],
Path.cwd() / 'results.csv')
print('Maximum relative error:', np.max(relative_err))

return int(fail)


if __name__ == "__main__":
sys.exit(main())
1 change: 1 addition & 0 deletions sw/dnn/src/dnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,5 @@ typedef struct network_single_cluster_t_ {
#include "../maxpool/src/maxpool.h"
#include "../softmax/src/softmax.h"
#include "../concat/src/concat.h"
#include "../fused_concat_linear/src/fused_concat_linear.h"
// #include "utils.h"
4 changes: 4 additions & 0 deletions sw/snRuntime/src/team.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ inline uint32_t __attribute__((const)) snrt_global_core_num() {
return snrt_cluster_num() * snrt_cluster_core_num();
}

inline uint32_t __attribute__((const)) snrt_global_compute_core_num() {
return snrt_cluster_num() * snrt_cluster_compute_core_num();
}

inline uint32_t __attribute__((const)) snrt_global_core_idx() {
return snrt_hartid() - snrt_global_core_base_hartid();
}
Expand Down
1 change: 1 addition & 0 deletions target/snitch_cluster/sw/apps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ SUBDIRS += dnn/maxpool
SUBDIRS += dnn/softmax
SUBDIRS += dnn/flashattention_2
SUBDIRS += dnn/concat
SUBDIRS += dnn/fused_concat_linear
SUBDIRS += montecarlo/pi_estimation

.PHONY: all clean $(SUBDIRS)
Expand Down
12 changes: 12 additions & 0 deletions target/snitch_cluster/sw/apps/dnn/fused_concat_linear/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

APP ?= fused_concat_linear

include ../../../../../../sw/dnn/common.mk
include ../../common.mk

$(DEP): $(DATA_H)
2 changes: 2 additions & 0 deletions target/snitch_cluster/sw/run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,6 @@ runs:
cmd: ../../sw/dnn/flashattention_2/verify.py {sim_bin} {elf}
- elf: apps/dnn/concat/build/concat.elf
cmd: ../../sw/dnn/concat/verify.py {sim_bin} {elf}
- elf: apps/dnn/fused_concat_linear/build/fused_concat_linear.elf
cmd: ../../sw/dnn/fused_concat_linear/verify.py {sim_bin} {elf}
- elf: apps/montecarlo/pi_estimation/build/pi_estimation.elf

0 comments on commit 5f052c1

Please sign in to comment.