diff --git a/sw/dnn/concat/data/datagen.py b/sw/dnn/concat/data/datagen.py new file mode 100755 index 0000000000..0cc39bc2e9 --- /dev/null +++ b/sw/dnn/concat/data/datagen.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Luca Colagrande + +import argparse +import numpy as np +import pathlib +import hjson +import sys +import os +import torch + +sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/")) +import data_utils # noqa: E402 +from data_utils import emit_license, \ + format_struct_definition, format_array_definition, \ + format_array_declaration, format_ifdef_wrapper # noqa: E402 + +torch.manual_seed(42) + +# AXI splits bursts crossing 4KB address boundaries. To minimize +# the occurrence of these splits the data should be aligned to 4KB +BURST_ALIGNMENT = 4096 + +PRECISION = { + 'FP64': '64', + 'FP32': '32', + 'FP16': '16', + 'FP8': '8' +} + + +def golden_model(inputs): + innermost_dim = len(inputs[0].shape) - 1 + return torch.cat(inputs, dim=innermost_dim) + + +def emit_header(section, params): + num_inputs = params['num_inputs'] + input_shape = params['input_shape'] + prec = PRECISION[params['dtype']] + + torch_type = data_utils.floating_point_torch_type(prec) + + inputs = [torch.rand(*input_shape, requires_grad=False, dtype=torch_type) for _ in range(num_inputs)] + output = golden_model(inputs) + + ctype = data_utils.floating_point_ctype(prec) + + layer_cfg = { + **params, + 'inputs': 'inputs', + 'output': 'output' + } + + data_str = [emit_license()] + data_str += [format_array_declaration(ctype, f'input_{i}', input_shape) for i in range(num_inputs)] + data_str += [format_array_declaration('void*', 'inputs', [num_inputs])] + data_str += [format_array_declaration(ctype, 'output', output.shape)] + data_str += [format_struct_definition('concat_layer_t', 'layer', layer_cfg)] + data_str += [format_array_definition(ctype, f'input_{i}', t) for i, t in enumerate(inputs)] + data_str += [format_array_definition('void*', 'inputs', np.array([f'input_{i}' for i in range(num_inputs)]))] + result_def = format_array_definition(ctype, 'golden', output) + data_str += [format_ifdef_wrapper('BIST', result_def)] + data_str = '\n\n'.join(data_str) + + return data_str + + +def main(): + + parser = argparse.ArgumentParser(description='Generate data for layernorm kernel') + parser.add_argument( + "-c", "--cfg", + type=pathlib.Path, + required=True, + help='Select param config file kernel' + ) + parser.add_argument( + '--section', + type=str, + help='Section to store matrices in') + parser.add_argument( + 'output', + type=pathlib.Path, + help='Path of the output header file') + args = parser.parse_args() + + # Load param config file + with args.cfg.open() as f: + param = hjson.loads(f.read()) + + # Emit header file + with open(args.output, 'w') as f: + f.write(emit_header(args.section, param)) + + +if __name__ == '__main__': + main() diff --git a/sw/dnn/concat/data/params.hjson b/sw/dnn/concat/data/params.hjson new file mode 100644 index 0000000000..8f02b5f479 --- /dev/null +++ b/sw/dnn/concat/data/params.hjson @@ -0,0 +1,9 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +{ + num_inputs: 1 + input_shape: [32, 4] + dtype: FP64 +} \ No newline at end of file diff --git a/sw/dnn/concat/src/concat.h b/sw/dnn/concat/src/concat.h new file mode 100644 index 0000000000..98464a1b87 --- /dev/null +++ b/sw/dnn/concat/src/concat.h @@ -0,0 +1,61 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Luca Colagrande + +#include "snrt.h" + +/** + * @struct concat_layer_t + * @brief This structure contains all parameters necessary + * for computing a Concat layer. + * @var concat_layer_t::input_shape + * Shape of the input tensors + * @var concat_layer_t::num_inputs + * Number of input tensors to concatenate + * @var concat_layer_t::inputs + * Pointer to an array of pointers to the individual tensors to concatenate + * @var concat_layer_t::output + * Pointer to the concatenated output tensor + */ +typedef struct { + uint32_t num_inputs; + uint32_t input_shape[2]; + void **inputs; + void *output; + precision_t dtype; +} concat_layer_t; + +// Concatenates a series of input tensors along the innermost axis. +// Every cluster stores one of the input tensors in the output tensor, all +// clusters operate in parallel. +// Note: currently requires that the number of inputs is smaller than the +// number of clusters in the system. +static inline int concat_layer(concat_layer_t l) { + // Return error if number of input tensors is greater than number of + // clusters + if (l.num_inputs > snrt_cluster_num()) return 1; + + // Perform the concatenation + if (snrt_is_dm_core()) { + if (snrt_cluster_idx() < l.num_inputs) { + size_t row_size = l.input_shape[1] * sizeof(double); + size_t concatenated_row_size = row_size * l.num_inputs; + void *input = l.inputs[snrt_cluster_idx()]; + void *output = l.output + snrt_cluster_idx() * row_size; + printf("%d: %x\n", snrt_cluster_idx(), output); + snrt_dma_start_2d(output, // dst + input, // src + row_size, // size + concatenated_row_size, // dst_stride + row_size, // src_stride + l.input_shape[0] // repeat + ); + snrt_dma_wait_all(); + } + } + + snrt_global_barrier(); + return 0; +} diff --git a/sw/dnn/concat/src/main.c b/sw/dnn/concat/src/main.c new file mode 100644 index 0000000000..8e9e434c4e --- /dev/null +++ b/sw/dnn/concat/src/main.c @@ -0,0 +1,14 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Luca Colagrande + +#include "dnn.h" + +#include "data.h" + +int main() { + uint32_t nerr = concat_layer(layer); + return nerr; +} diff --git a/sw/dnn/concat/verify.py b/sw/dnn/concat/verify.py new file mode 100755 index 0000000000..60196384d6 --- /dev/null +++ b/sw/dnn/concat/verify.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Luca Colagrande + +import sys +from pathlib import Path +import numpy as np +import torch +from data.datagen import golden_model + +sys.path.append(str(Path(__file__).parent / '../../../util/sim/')) +import verification # noqa: E402 +from elf import Elf # noqa: E402 +from data_utils import bytes_to_float, bytes_to_struct # noqa: E402 + + +ERR_THRESHOLD = 1E-6 + +PRECISION_T = { + 8: '64', + 4: '32', + 2: '16', + 1: '8' +} + +NUMPY_T = { + '64': np.float64, + '32': np.float32, + '16': np.float16 +} + + +def main(): + # Run simulation and get outputs + args = verification.parse_args() + raw_results = verification.simulate(sim_bin=args.sim_bin, + snitch_bin=args.snitch_bin, + symbols_bin=args.symbols_bin, + log=args.log, + output_uids=['output']) + + # Extract input operands from ELF file + if args.symbols_bin: + elf = Elf(args.symbols_bin) + else: + elf = Elf(args.snitch_bin) + + layer_struct = { + 'num_inputs': 'I', + 'height': 'I', + 'width': 'I', + 'inputs': 'I', + 'output': 'I', + 'dtype': 'I' + } + layer = bytes_to_struct(elf.get_symbol_contents('layer'), layer_struct) + num_inputs = layer['num_inputs'] + input_shape = [layer['height'], layer['width']] + inputs = layer['inputs'] + output = layer['output'] + prec = PRECISION_T[layer['dtype']] + + inputs = [np.array(bytes_to_float(elf.get_symbol_contents(f'input_{i}'), prec), dtype=NUMPY_T[prec]) for i in range(num_inputs)] + inputs = [torch.from_numpy(tensor.reshape(input_shape)) for tensor in inputs] + + # Verify results + output_actual = np.array(bytes_to_float(raw_results['output'], prec), dtype=NUMPY_T[prec]) + output_golden = golden_model(inputs).detach().numpy().flatten() + + relative_err = np.absolute((output_golden - output_actual) / output_golden) + fail = np.any(relative_err > ERR_THRESHOLD) + if (fail): + verification.dump_results_to_csv([output_golden, output_actual, relative_err], + Path.cwd() / 'concat_results.csv') + print('Maximum relative error:', np.max(relative_err)) + + return int(fail) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sw/dnn/src/dnn.h b/sw/dnn/src/dnn.h index 5c41041c83..313220493a 100644 --- a/sw/dnn/src/dnn.h +++ b/sw/dnn/src/dnn.h @@ -204,4 +204,5 @@ typedef struct network_single_cluster_t_ { #include "../linear/src/linear.h" #include "../maxpool/src/maxpool.h" #include "../softmax/src/softmax.h" +#include "../concat/src/concat.h" // #include "utils.h" diff --git a/target/snitch_cluster/sw/apps/Makefile b/target/snitch_cluster/sw/apps/Makefile index 596b37e4ff..222f75cd48 100644 --- a/target/snitch_cluster/sw/apps/Makefile +++ b/target/snitch_cluster/sw/apps/Makefile @@ -19,6 +19,7 @@ SUBDIRS += dnn/linear SUBDIRS += dnn/maxpool SUBDIRS += dnn/softmax SUBDIRS += dnn/flashattention_2 +SUBDIRS += dnn/concat SUBDIRS += montecarlo/pi_estimation .PHONY: all clean $(SUBDIRS) diff --git a/target/snitch_cluster/sw/apps/dnn/concat/Makefile b/target/snitch_cluster/sw/apps/dnn/concat/Makefile new file mode 100644 index 0000000000..088d29d663 --- /dev/null +++ b/target/snitch_cluster/sw/apps/dnn/concat/Makefile @@ -0,0 +1,12 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Luca Colagrande + +APP ?= concat + +include ../../../../../../sw/dnn/common.mk +include ../../common.mk + +$(DEP): $(DATA_H) \ No newline at end of file diff --git a/target/snitch_cluster/sw/run.yaml b/target/snitch_cluster/sw/run.yaml index 8e50eea97b..4a8499a636 100644 --- a/target/snitch_cluster/sw/run.yaml +++ b/target/snitch_cluster/sw/run.yaml @@ -88,4 +88,6 @@ runs: # Illegal FDIV without FDIV unit - elf: apps/dnn/flashattention_2/build/flashattention_2.elf cmd: ../../sw/dnn/flashattention_2/verify.py {sim_bin} {elf} + - elf: apps/dnn/concat/build/concat.elf + cmd: ../../sw/dnn/concat/verify.py {sim_bin} {elf} - elf: apps/montecarlo/pi_estimation/build/pi_estimation.elf