diff --git a/sw/blas/intsort/Makefile b/sw/blas/intsort/Makefile new file mode 100644 index 000000000..2b9d4ade6 --- /dev/null +++ b/sw/blas/intsort/Makefile @@ -0,0 +1,34 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Luca Colagrande +# Author: Nico Canzani + +# Usage of absolute paths is required to externally include this Makefile +MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) +DATA_DIR := $(realpath $(MK_DIR)/data) +SRC_DIR := $(realpath $(MK_DIR)/src) + +DATA_CFG ?= $(DATA_DIR)/params.json +SECTION ?= + +APP ?= intsort +SRCS ?= $(realpath $(SRC_DIR)/main.c) +INCDIRS ?= $(dir $(DATA_H)) $(SRC_DIR) + +DATAGEN_PY = $(MK_DIR)/scripts/datagen.py +DATA_H ?= $(DATA_DIR)/data.h + +$(dir $(DATA_H)): + mkdir -p $@ + +$(DATA_H): $(DATAGEN_PY) $(DATA_CFG) | $(dir $(DATA_H)) + $< -c $(DATA_CFG) --section="$(SECTION)" $@ + +.PHONY: clean-data clean + +clean-data: + rm -f $(DATA_H) + +clean: clean-data diff --git a/sw/blas/intsort/data/params.json b/sw/blas/intsort/data/params.json new file mode 100644 index 000000000..0634f44f1 --- /dev/null +++ b/sw/blas/intsort/data/params.json @@ -0,0 +1,7 @@ + +{ + "n": 512, + "min": -256, + "max": 256, + "syntetic": false +} diff --git a/sw/blas/intsort/scripts/datagen.py b/sw/blas/intsort/scripts/datagen.py new file mode 100644 index 000000000..3011d7268 --- /dev/null +++ b/sw/blas/intsort/scripts/datagen.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Nico Canzani +# Author: Luca Colagrande + +import random +import numpy as np +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/")) +from data_utils import format_scalar_definition, format_array_definition, \ + format_array_declaration, format_ifdef_wrapper, DataGen # noqa: E402 + + +class IntsortDataGen(DataGen): + # AXI splits bursts crossing 4KB address boundaries. To minimize + # the occurrence of these splits the data should be aligned to 4KB + BURST_ALIGNMENT = 4096 + + def golden_model(self, x): + return np.sort(x) + + def emit_header(self, **kwargs): + header = [super().emit_header()] + + n = kwargs['n'] + self.MIN = kwargs['min'] + self.MAX = kwargs['max'] + syntetic_data = kwargs['syntetic'] + if syntetic_data: + if len(range(self.MIN, self.MAX + 1)) == n: + print(f'Creating syntetic data') + x = np.arange(-n//2, n//2) + np.random.shuffle(x) + else: + print(f'Parameter Problem: n is set to {n}, min to max generates {len(range(self.MIN, self.MAX + 1))} numbers.\nExit Generator\n') + sys.exit() + else: + x = np.asarray([random.randrange(self.MIN, self.MAX + 1, 1) for i in range(n)]) + g = self.golden_model(x) + + assert (n % 8) == 0, "n must be an integer multiple of the number of cores (8)" + + header += [format_scalar_definition('const uint32_t', 'n', n)] + header += [format_scalar_definition('const int32_t', 'min', self.MIN)] + header += [format_scalar_definition('const int32_t', 'max', self.MAX)] + header += [format_array_definition('int32_t', 'x', x, alignment=self.BURST_ALIGNMENT, + section=kwargs['section'])] + header += [format_array_declaration('int32_t', 'z', [n], alignment=self.BURST_ALIGNMENT, + section=kwargs['section'])] + result_def = format_array_definition('int32_t', 'g', g) + header += [format_ifdef_wrapper('BIST', result_def)] + header = '\n\n'.join(header) + + return header + + +if __name__ == '__main__': + sys.exit(IntsortDataGen().main()) diff --git a/sw/blas/intsort/scripts/verify.py b/sw/blas/intsort/scripts/verify.py new file mode 100644 index 000000000..12f81030f --- /dev/null +++ b/sw/blas/intsort/scripts/verify.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Author: Nico Canzani +# Author: Luca Colagrande + +import sys +from pathlib import Path +from datagen import IntsortDataGen + +sys.path.append(str(Path(__file__).parent / '../../../../util/sim/')) +from verif_utils import Verifier # noqa: E402 + + +class IntsortVerifier(Verifier): + + OUTPUT_UIDS = ['z'] + + def get_actual_results(self): + return self.get_output_from_symbol('z', 'int32_t') + + def get_expected_results(self): + x = self.get_input_from_symbol('x', 'int32_t') + return IntsortDataGen().golden_model(x) + + def check_results(self, *args): + return super().check_results(*args, rtol=1e-10) + + +if __name__ == "__main__": + sys.exit(IntsortVerifier().main()) diff --git a/sw/blas/intsort/src/intsort.h b/sw/blas/intsort/src/intsort.h new file mode 100644 index 000000000..59d6d3fab --- /dev/null +++ b/sw/blas/intsort/src/intsort.h @@ -0,0 +1,121 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Nico Canzani +// Author: Luca Colagrande + +#include "snrt.h" + +void swap(int32_t* a, int32_t* b) { + int32_t temp = *a; + *a = *b; + *b = temp; +} + +int32_t* partition(int32_t* low, int32_t* high) { + int32_t pivot = *high; // Pivot element (can be chosen randomly) + int32_t* i = low - 1; // Index of smaller element + + for (int32_t* j = low; j < high; j++) { + if (*j <= pivot) { + i++; + swap(i, j); + } + } + swap(i + 1, high); + return (i + 1); +} + +void quicksort(int32_t* low, int32_t* high) { + if (low < high) { + int32_t* pi = partition(low, high); // Partitioning index + + quicksort(low, pi - 1); + quicksort(pi + 1, high); + } +} + +void prefixSum(const int* a, int* b, size_t n) { + // Initialize the first element of the prefix sum array to 0 + b[0] = 0; + + // Loop through the array and compute prefix sums + for (size_t i = 1; i < n; ++i) { + b[i] = b[i - 1] + a[i - 1]; + } +} + +void bucketSort(int32_t* x, uint32_t n, uint32_t numBuckets, int32_t maximum, + int32_t minimum) { + int32_t ttemp = snrt_mcycle(); + + int32_t core_idx = snrt_cluster_core_idx(); + int frac_core = n / snrt_cluster_compute_core_num(); + int offset_core = core_idx * frac_core; + + // Create buckets shared over all cores in Cluster + int32_t* bucketscratchpad = x + n; + int32_t* buckets[numBuckets]; + int32_t* bucket_count = (int32_t*)(bucketscratchpad + numBuckets * n); + + // Initialize buckets and bucket counts. + // Since each core uses the same variables, they need to be initialized only + // once. + if (core_idx == 0) { + for (int32_t i = 0; i < numBuckets; i++) { + bucket_count[i] = 0; + buckets[i] = &bucketscratchpad[i * n]; + } + } + snrt_cluster_hw_barrier(); + ttemp = snrt_mcycle(); + + // Distribute array elements into buckets + if (snrt_is_compute_core()) { + int32_t range = (maximum - minimum) / numBuckets + 1; + + for (int32_t i = offset_core; i < offset_core + frac_core; i++) { + int32_t bucketIndex = (x[i] - minimum) / range; + int32_t current_index = __atomic_fetch_add( + &bucket_count[bucketIndex], 1, __ATOMIC_SEQ_CST); + buckets[bucketIndex][current_index] = x[i]; + } + } + + // Before sorting the buckets, the data needs to be distributed + snrt_cluster_hw_barrier(); + ttemp = snrt_mcycle(); + + // Sort each bucket + if (snrt_is_compute_core()) { + for (uint8_t next_bucket = 0 + core_idx; next_bucket < numBuckets; + next_bucket += snrt_cluster_compute_core_num()) { + if (bucket_count[next_bucket] > 0) { + quicksort(buckets[next_bucket], + buckets[next_bucket] + bucket_count[next_bucket] - 1); + } + } + } + + // Before merging the buckets, all of them need to be sorted + snrt_cluster_hw_barrier(); + ttemp = snrt_mcycle(); + + // Make a cumulative sum array, to know the offset per bucket + int idx_offset[numBuckets]; + prefixSum(bucket_count, idx_offset, numBuckets); + + // Merge buckets and store into x + if (snrt_is_compute_core()) { + for (uint8_t next_bucket = 0 + core_idx; next_bucket < numBuckets; + next_bucket += snrt_cluster_compute_core_num()) { + uint32_t i_x; + for (uint32_t j = 0; j < bucket_count[core_idx]; j++) { + i_x = j + idx_offset[core_idx]; + x[i_x] = buckets[core_idx][j]; + } + } + } + ttemp = snrt_mcycle(); +} diff --git a/sw/blas/intsort/src/main.c b/sw/blas/intsort/src/main.c new file mode 100644 index 000000000..0ac53efa4 --- /dev/null +++ b/sw/blas/intsort/src/main.c @@ -0,0 +1,53 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Nico Canzani +// Author: Luca Colagrande + +#include "snrt.h" + +#include "data.h" +#include "intsort.h" + +// Define Number of Buckets, use multiple of 8 +#define N_BUCKETS 8 + +int main() { + int32_t *local_x; + int32_t *remote_x, *remote_z; + + // Calculate size and pointers for each cluster + uint32_t frac = n / snrt_cluster_num(); + uint32_t offset = frac * snrt_cluster_idx(); + remote_x = x + offset; + remote_z = z + offset; + + // Allocate space in TCDM + local_x = (int32_t *)snrt_l1_next(); + + // Copy data in TCDM + if (snrt_is_dm_core()) { + size_t size = frac * sizeof(int32_t); + snrt_dma_start_1d(local_x, remote_x, size); + snrt_dma_wait_all(); + } + + snrt_cluster_hw_barrier(); + + // Compute + bucketSort(local_x, n, N_BUCKETS, max, min); + + snrt_cluster_hw_barrier(); + + // Copy data out of TCDM + if (snrt_is_dm_core()) { + size_t size = frac * sizeof(int32_t); + snrt_dma_start_1d(remote_z, local_x, size); + snrt_dma_wait_all(); + } + + snrt_cluster_hw_barrier(); + + return 0; +} diff --git a/target/snitch_cluster/sw.mk b/target/snitch_cluster/sw.mk index 6083a8ee1..84ef4c027 100644 --- a/target/snitch_cluster/sw.mk +++ b/target/snitch_cluster/sw.mk @@ -53,6 +53,7 @@ APPS += sw/apps/blas/gemm APPS += sw/apps/blas/gemv APPS += sw/apps/blas/dot APPS += sw/apps/blas/syrk +APPS += sw/apps/blas/intsort APPS += sw/apps/dnn/batchnorm APPS += sw/apps/dnn/conv2d APPS += sw/apps/dnn/fusedconv diff --git a/target/snitch_cluster/sw/apps/blas/intsort/Makefile b/target/snitch_cluster/sw/apps/blas/intsort/Makefile new file mode 100644 index 000000000..80f9a96c4 --- /dev/null +++ b/target/snitch_cluster/sw/apps/blas/intsort/Makefile @@ -0,0 +1,10 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Luca Colagrande + +include ../../../../../../sw/blas/intsort/Makefile +include ../../common.mk + +$(DEP): $(DATA_H) diff --git a/util/sim/data_utils.py b/util/sim/data_utils.py index fce39a9b5..fd9e1c60c 100644 --- a/util/sim/data_utils.py +++ b/util/sim/data_utils.py @@ -273,6 +273,7 @@ def from_buffer(byte_array, ctype='uint32_t'): # Types which have a direct correspondence in Numpy NP_DTYPE_FROM_CTYPE = { 'uint32_t': np.uint32, + 'int32_t': np.int32, 'double': np.float64, 'float': np.float32, '__fp16': np.float16