pulp-platform · nicoca20 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
@@ -0,0 +1,34 @@
+# Copyright 2023 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Author: Luca Colagrande <[email protected]>
+# Author: Nico Canzani <[email protected]>
+
+# Usage of absolute paths is required to externally include this Makefile
+MK_DIR   := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+DATA_DIR := $(realpath $(MK_DIR)/data)
+SRC_DIR  := $(realpath $(MK_DIR)/src)
+
+DATA_CFG ?= $(DATA_DIR)/params.json
+SECTION  ?=
+
+APP     ?= intsort
+SRCS    ?= $(realpath $(SRC_DIR)/main.c)
+INCDIRS ?= $(dir $(DATA_H)) $(SRC_DIR)
+
+DATAGEN_PY = $(MK_DIR)/scripts/datagen.py
+DATA_H    ?= $(DATA_DIR)/data.h
+
+$(dir $(DATA_H)):
+	mkdir -p $@
+
+$(DATA_H): $(DATAGEN_PY) $(DATA_CFG) | $(dir $(DATA_H))
+	$< -c $(DATA_CFG) --section="$(SECTION)" $@
+
+.PHONY: clean-data clean
+
+clean-data:
+	rm -f $(DATA_H)
+
+clean: clean-data
@@ -0,0 +1,7 @@
+
+{
+    "n": 512,
+    "min": -256,
+    "max": 256,
+    "syntetic": false
+}
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+# Copyright 2023 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Author: Nico Canzani <[email protected]>
+# Author: Luca Colagrande <[email protected]>
+
+import random
+import numpy as np
+import os
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
+from data_utils import format_scalar_definition, format_array_definition, \
+                       format_array_declaration, format_ifdef_wrapper, DataGen  # noqa: E402
+
+
+class IntsortDataGen(DataGen):
+    # AXI splits bursts crossing 4KB address boundaries. To minimize
+    # the occurrence of these splits the data should be aligned to 4KB
+    BURST_ALIGNMENT = 4096
+
+    def golden_model(self, x):
+        return np.sort(x)
+
+    def emit_header(self, **kwargs):
+        header = [super().emit_header()]
+
+        n = kwargs['n']
+        self.MIN = kwargs['min']
+        self.MAX = kwargs['max']
+        syntetic_data = kwargs['syntetic']
+        if syntetic_data:
+            if len(range(self.MIN, self.MAX + 1)) == n:
+                print(f'Creating syntetic data')
+                x = np.arange(-n//2, n//2)
+                np.random.shuffle(x)
+            else:
+                print(f'Parameter Problem: n is set to {n}, min to max generates {len(range(self.MIN, self.MAX + 1))} numbers.\nExit Generator\n')
+                sys.exit()
+        else:
+            x = np.asarray([random.randrange(self.MIN, self.MAX + 1, 1) for i in range(n)])
+        g = self.golden_model(x)
+
+        assert (n % 8) == 0, "n must be an integer multiple of the number of cores (8)"
+
+        header += [format_scalar_definition('const uint32_t', 'n', n)]
+        header += [format_scalar_definition('const int32_t', 'min', self.MIN)]
+        header += [format_scalar_definition('const int32_t', 'max', self.MAX)]
+        header += [format_array_definition('int32_t', 'x', x, alignment=self.BURST_ALIGNMENT,
+                                           section=kwargs['section'])]
+        header += [format_array_declaration('int32_t', 'z', [n], alignment=self.BURST_ALIGNMENT,
+                                            section=kwargs['section'])]
+        result_def = format_array_definition('int32_t', 'g', g)
+        header += [format_ifdef_wrapper('BIST', result_def)]
+        header = '\n\n'.join(header)
+
+        return header
+
+
+if __name__ == '__main__':
+    sys.exit(IntsortDataGen().main())
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# Copyright 2023 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Author: Nico Canzani <[email protected]>
+# Author: Luca Colagrande <[email protected]>
+
+import sys
+from pathlib import Path
+from datagen import IntsortDataGen
+
+sys.path.append(str(Path(__file__).parent / '../../../../util/sim/'))
+from verif_utils import Verifier  # noqa: E402
+
+
+class IntsortVerifier(Verifier):
+
+    OUTPUT_UIDS = ['z']
+
+    def get_actual_results(self):
+        return self.get_output_from_symbol('z', 'int32_t')
+
+    def get_expected_results(self):
+        x = self.get_input_from_symbol('x', 'int32_t')
+        return IntsortDataGen().golden_model(x)
+
+    def check_results(self, *args):
+        return super().check_results(*args, rtol=1e-10)
+
+
+if __name__ == "__main__":
+    sys.exit(IntsortVerifier().main())
@@ -0,0 +1,121 @@
+// Copyright 2023 ETH Zurich and University of Bologna.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Author: Nico Canzani <[email protected]>
+// Author: Luca Colagrande <[email protected]>
+
+#include "snrt.h"
+
+void swap(int32_t* a, int32_t* b) {
+    int32_t temp = *a;
+    *a = *b;
+    *b = temp;
+}
+
+int32_t* partition(int32_t* low, int32_t* high) {
+    int32_t pivot = *high;  // Pivot element (can be chosen randomly)
+    int32_t* i = low - 1;   // Index of smaller element
+
+    for (int32_t* j = low; j < high; j++) {
+        if (*j <= pivot) {
+            i++;
+            swap(i, j);
+        }
+    }
+    swap(i + 1, high);
+    return (i + 1);
+}
+
+void quicksort(int32_t* low, int32_t* high) {
+    if (low < high) {
+        int32_t* pi = partition(low, high);  // Partitioning index
+
+        quicksort(low, pi - 1);
+        quicksort(pi + 1, high);
+    }
+}
+
+void prefixSum(const int* a, int* b, size_t n) {
+    // Initialize the first element of the prefix sum array to 0
+    b[0] = 0;
+
+    // Loop through the array and compute prefix sums
+    for (size_t i = 1; i < n; ++i) {
+        b[i] = b[i - 1] + a[i - 1];
+    }
+}
+
+void bucketSort(int32_t* x, uint32_t n, uint32_t numBuckets, int32_t maximum,
+                int32_t minimum) {
+    int32_t ttemp = snrt_mcycle();
+
+    int32_t core_idx = snrt_cluster_core_idx();
+    int frac_core = n / snrt_cluster_compute_core_num();
+    int offset_core = core_idx * frac_core;
+
+    // Create buckets shared over all cores in Cluster
+    int32_t* bucketscratchpad = x + n;
+    int32_t* buckets[numBuckets];
+    int32_t* bucket_count = (int32_t*)(bucketscratchpad + numBuckets * n);
+
+    // Initialize buckets and bucket counts.
+    // Since each core uses the same variables, they need to be initialized only
+    // once.
+    if (core_idx == 0) {
+        for (int32_t i = 0; i < numBuckets; i++) {
+            bucket_count[i] = 0;
+            buckets[i] = &bucketscratchpad[i * n];
+        }
+    }
+    snrt_cluster_hw_barrier();
+    ttemp = snrt_mcycle();
+
+    // Distribute array elements into buckets
+    if (snrt_is_compute_core()) {
+        int32_t range = (maximum - minimum) / numBuckets + 1;
+
+        for (int32_t i = offset_core; i < offset_core + frac_core; i++) {
+            int32_t bucketIndex = (x[i] - minimum) / range;
+            int32_t current_index = __atomic_fetch_add(
+                &bucket_count[bucketIndex], 1, __ATOMIC_SEQ_CST);
+            buckets[bucketIndex][current_index] = x[i];
+        }
+    }
+
+    // Before sorting the buckets, the data needs to be distributed
+    snrt_cluster_hw_barrier();
+    ttemp = snrt_mcycle();
+
+    // Sort each bucket
+    if (snrt_is_compute_core()) {
+        for (uint8_t next_bucket = 0 + core_idx; next_bucket < numBuckets;
+             next_bucket += snrt_cluster_compute_core_num()) {
+            if (bucket_count[next_bucket] > 0) {
+                quicksort(buckets[next_bucket],
+                          buckets[next_bucket] + bucket_count[next_bucket] - 1);
+            }
+        }
+    }
+
+    // Before merging the buckets, all of them need to be sorted
+    snrt_cluster_hw_barrier();
+    ttemp = snrt_mcycle();
+
+    // Make a cumulative sum array, to know the offset per bucket
+    int idx_offset[numBuckets];
+    prefixSum(bucket_count, idx_offset, numBuckets);
+
+    // Merge buckets and store into x
+    if (snrt_is_compute_core()) {
+        for (uint8_t next_bucket = 0 + core_idx; next_bucket < numBuckets;
+             next_bucket += snrt_cluster_compute_core_num()) {
+            uint32_t i_x;
+            for (uint32_t j = 0; j < bucket_count[core_idx]; j++) {
+                i_x = j + idx_offset[core_idx];
+                x[i_x] = buckets[core_idx][j];
+            }
+        }
+    }
+    ttemp = snrt_mcycle();
+}
@@ -0,0 +1,53 @@
+// Copyright 2023 ETH Zurich and University of Bologna.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Author: Nico Canzani <[email protected]>
+// Author: Luca Colagrande <[email protected]>
+
+#include "snrt.h"
+
+#include "data.h"
+#include "intsort.h"
+
+// Define Number of Buckets, use multiple of 8
+#define N_BUCKETS 8
+
+int main() {
+    int32_t *local_x;
+    int32_t *remote_x, *remote_z;
+
+    // Calculate size and pointers for each cluster
+    uint32_t frac = n / snrt_cluster_num();
+    uint32_t offset = frac * snrt_cluster_idx();
+    remote_x = x + offset;
+    remote_z = z + offset;
+
+    // Allocate space in TCDM
+    local_x = (int32_t *)snrt_l1_next();
+
+    // Copy data in TCDM
+    if (snrt_is_dm_core()) {
+        size_t size = frac * sizeof(int32_t);
+        snrt_dma_start_1d(local_x, remote_x, size);
+        snrt_dma_wait_all();
+    }
+
+    snrt_cluster_hw_barrier();
+
+    // Compute
+    bucketSort(local_x, n, N_BUCKETS, max, min);
+
+    snrt_cluster_hw_barrier();
+
+    // Copy data out of TCDM
+    if (snrt_is_dm_core()) {
+        size_t size = frac * sizeof(int32_t);
+        snrt_dma_start_1d(remote_z, local_x, size);
+        snrt_dma_wait_all();
+    }
+
+    snrt_cluster_hw_barrier();
+
+    return 0;
+}
@@ -53,6 +53,7 @@ APPS += sw/apps/blas/gemm
 APPS += sw/apps/blas/gemv
 APPS += sw/apps/blas/dot
 APPS += sw/apps/blas/syrk
+APPS += sw/apps/blas/intsort
 APPS += sw/apps/dnn/batchnorm
 APPS += sw/apps/dnn/conv2d
 APPS += sw/apps/dnn/fusedconv

@@ -0,0 +1,10 @@
+# Copyright 2023 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Luca Colagrande <[email protected]>
+
+include ../../../../../../sw/blas/intsort/Makefile
+include ../../common.mk
+
+$(DEP): $(DATA_H)
@@ -273,6 +273,7 @@ def from_buffer(byte_array, ctype='uint32_t'):
     # Types which have a direct correspondence in Numpy
     NP_DTYPE_FROM_CTYPE = {
         'uint32_t': np.uint32,
+        'int32_t': np.int32,
         'double': np.float64,
         'float': np.float32,
         '__fp16': np.float16