Skip to content

Commit

Permalink
dnn: Add Concat layer
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Nov 12, 2023
1 parent 074e76b commit 5ee6446
Show file tree
Hide file tree
Showing 9 changed files with 286 additions and 0 deletions.
102 changes: 102 additions & 0 deletions sw/dnn/concat/data/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

import argparse
import numpy as np
import pathlib
import hjson
import sys
import os
import torch

sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
import data_utils # noqa: E402
from data_utils import emit_license, \
format_struct_definition, format_array_definition, \
format_array_declaration, format_ifdef_wrapper # noqa: E402

torch.manual_seed(42)

# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096

PRECISION = {
'FP64': '64',
'FP32': '32',
'FP16': '16',
'FP8': '8'
}


def golden_model(inputs):
innermost_dim = len(inputs[0].shape) - 1
return torch.cat(inputs, dim=innermost_dim)


def emit_header(section, params):
num_inputs = params['num_inputs']
input_shape = params['input_shape']
prec = PRECISION[params['dtype']]

torch_type = data_utils.floating_point_torch_type(prec)

inputs = [torch.rand(*input_shape, requires_grad=False, dtype=torch_type) for _ in range(num_inputs)]
output = golden_model(inputs)

ctype = data_utils.floating_point_ctype(prec)

layer_cfg = {
**params,
'inputs': 'inputs',
'output': 'output'
}

data_str = [emit_license()]
data_str += [format_array_declaration(ctype, f'input_{i}', input_shape) for i in range(num_inputs)]
data_str += [format_array_declaration('void*', 'inputs', [num_inputs])]
data_str += [format_array_declaration(ctype, 'output', output.shape)]
data_str += [format_struct_definition('concat_layer_t', 'layer', layer_cfg)]
data_str += [format_array_definition(ctype, f'input_{i}', t) for i, t in enumerate(inputs)]
data_str += [format_array_definition('void*', 'inputs', np.array([f'input_{i}' for i in range(num_inputs)]))]
result_def = format_array_definition(ctype, 'golden', output)
data_str += [format_ifdef_wrapper('BIST', result_def)]
data_str = '\n\n'.join(data_str)

return data_str


def main():

parser = argparse.ArgumentParser(description='Generate data for layernorm kernel')
parser.add_argument(
"-c", "--cfg",
type=pathlib.Path,
required=True,
help='Select param config file kernel'
)
parser.add_argument(
'--section',
type=str,
help='Section to store matrices in')
parser.add_argument(
'output',
type=pathlib.Path,
help='Path of the output header file')
args = parser.parse_args()

# Load param config file
with args.cfg.open() as f:
param = hjson.loads(f.read())

# Emit header file
with open(args.output, 'w') as f:
f.write(emit_header(args.section, param))


if __name__ == '__main__':
main()
9 changes: 9 additions & 0 deletions sw/dnn/concat/data/params.hjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

{
num_inputs: 1
input_shape: [32, 4]
dtype: FP64
}
61 changes: 61 additions & 0 deletions sw/dnn/concat/src/concat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2020 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Luca Colagrande <[email protected]>

#include "snrt.h"

/**
* @struct concat_layer_t
* @brief This structure contains all parameters necessary
* for computing a Concat layer.
* @var concat_layer_t::input_shape
* Shape of the input tensors
* @var concat_layer_t::num_inputs
* Number of input tensors to concatenate
* @var concat_layer_t::inputs
* Pointer to an array of pointers to the individual tensors to concatenate
* @var concat_layer_t::output
* Pointer to the concatenated output tensor
*/
typedef struct {
uint32_t num_inputs;
uint32_t input_shape[2];
void **inputs;
void *output;
precision_t dtype;
} concat_layer_t;

// Concatenates a series of input tensors along the innermost axis.
// Every cluster stores one of the input tensors in the output tensor, all
// clusters operate in parallel.
// Note: currently requires that the number of inputs is smaller than the
// number of clusters in the system.
static inline int concat_layer(concat_layer_t l) {
// Return error if number of input tensors is greater than number of
// clusters
if (l.num_inputs > snrt_cluster_num()) return 1;

// Perform the concatenation
if (snrt_is_dm_core()) {
if (snrt_cluster_idx() < l.num_inputs) {
size_t row_size = l.input_shape[1] * sizeof(double);
size_t concatenated_row_size = row_size * l.num_inputs;
void *input = l.inputs[snrt_cluster_idx()];
void *output = l.output + snrt_cluster_idx() * row_size;
printf("%d: %x\n", snrt_cluster_idx(), output);
snrt_dma_start_2d(output, // dst
input, // src
row_size, // size
concatenated_row_size, // dst_stride
row_size, // src_stride
l.input_shape[0] // repeat
);
snrt_dma_wait_all();
}
}

snrt_global_barrier();
return 0;
}
14 changes: 14 additions & 0 deletions sw/dnn/concat/src/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Luca Colagrande <[email protected]>

#include "dnn.h"

#include "data.h"

int main() {
uint32_t nerr = concat_layer(layer);
return nerr;
}
84 changes: 84 additions & 0 deletions sw/dnn/concat/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

import sys
from pathlib import Path
import numpy as np
import torch
from data.datagen import golden_model

sys.path.append(str(Path(__file__).parent / '../../../util/sim/'))
import verification # noqa: E402
from elf import Elf # noqa: E402
from data_utils import bytes_to_float, bytes_to_struct # noqa: E402


ERR_THRESHOLD = 1E-6

PRECISION_T = {
8: '64',
4: '32',
2: '16',
1: '8'
}

NUMPY_T = {
'64': np.float64,
'32': np.float32,
'16': np.float16
}


def main():
# Run simulation and get outputs
args = verification.parse_args()
raw_results = verification.simulate(sim_bin=args.sim_bin,
snitch_bin=args.snitch_bin,
symbols_bin=args.symbols_bin,
log=args.log,
output_uids=['output'])

# Extract input operands from ELF file
if args.symbols_bin:
elf = Elf(args.symbols_bin)
else:
elf = Elf(args.snitch_bin)

layer_struct = {
'num_inputs': 'I',
'height': 'I',
'width': 'I',
'inputs': 'I',
'output': 'I',
'dtype': 'I'
}
layer = bytes_to_struct(elf.get_symbol_contents('layer'), layer_struct)
num_inputs = layer['num_inputs']
input_shape = [layer['height'], layer['width']]
inputs = layer['inputs']
output = layer['output']
prec = PRECISION_T[layer['dtype']]

inputs = [np.array(bytes_to_float(elf.get_symbol_contents(f'input_{i}'), prec), dtype=NUMPY_T[prec]) for i in range(num_inputs)]
inputs = [torch.from_numpy(tensor.reshape(input_shape)) for tensor in inputs]

# Verify results
output_actual = np.array(bytes_to_float(raw_results['output'], prec), dtype=NUMPY_T[prec])
output_golden = golden_model(inputs).detach().numpy().flatten()

relative_err = np.absolute((output_golden - output_actual) / output_golden)
fail = np.any(relative_err > ERR_THRESHOLD)
if (fail):
verification.dump_results_to_csv([output_golden, output_actual, relative_err],
Path.cwd() / 'concat_results.csv')
print('Maximum relative error:', np.max(relative_err))

return int(fail)


if __name__ == "__main__":
sys.exit(main())
1 change: 1 addition & 0 deletions sw/dnn/src/dnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,4 +204,5 @@ typedef struct network_single_cluster_t_ {
#include "../linear/src/linear.h"
#include "../maxpool/src/maxpool.h"
#include "../softmax/src/softmax.h"
#include "../concat/src/concat.h"
// #include "utils.h"
1 change: 1 addition & 0 deletions target/snitch_cluster/sw/apps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ SUBDIRS += dnn/linear
SUBDIRS += dnn/maxpool
SUBDIRS += dnn/softmax
SUBDIRS += dnn/flashattention_2
SUBDIRS += dnn/concat
SUBDIRS += montecarlo/pi_estimation

.PHONY: all clean $(SUBDIRS)
Expand Down
12 changes: 12 additions & 0 deletions target/snitch_cluster/sw/apps/dnn/concat/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

APP ?= concat

include ../../../../../../sw/dnn/common.mk
include ../../common.mk

$(DEP): $(DATA_H)
2 changes: 2 additions & 0 deletions target/snitch_cluster/sw/run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,6 @@ runs:
# Illegal FDIV without FDIV unit
- elf: apps/dnn/flashattention_2/build/flashattention_2.elf
cmd: ../../sw/dnn/flashattention_2/verify.py {sim_bin} {elf}
- elf: apps/dnn/concat/build/concat.elf
cmd: ../../sw/dnn/concat/verify.py {sim_bin} {elf}
- elf: apps/montecarlo/pi_estimation/build/pi_estimation.elf

0 comments on commit 5ee6446

Please sign in to comment.