Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: integer sorting example #199

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions sw/blas/intsort/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Author: Luca Colagrande <[email protected]>
# Author: Nico Canzani <[email protected]>

# Usage of absolute paths is required to externally include this Makefile
MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
DATA_DIR := $(realpath $(MK_DIR)/data)
SRC_DIR := $(realpath $(MK_DIR)/src)

DATA_CFG ?= $(DATA_DIR)/params.json
SECTION ?=

APP ?= intsort
SRCS ?= $(realpath $(SRC_DIR)/main.c)
INCDIRS ?= $(dir $(DATA_H)) $(SRC_DIR)

DATAGEN_PY = $(MK_DIR)/scripts/datagen.py
DATA_H ?= $(DATA_DIR)/data.h

$(dir $(DATA_H)):
mkdir -p $@

$(DATA_H): $(DATAGEN_PY) $(DATA_CFG) | $(dir $(DATA_H))
$< -c $(DATA_CFG) --section="$(SECTION)" $@

.PHONY: clean-data clean

clean-data:
rm -f $(DATA_H)

clean: clean-data
7 changes: 7 additions & 0 deletions sw/blas/intsort/data/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

{
"n": 512,
"min": -256,
"max": 256,
"syntetic": false
}
63 changes: 63 additions & 0 deletions sw/blas/intsort/scripts/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Author: Nico Canzani <[email protected]>
# Author: Luca Colagrande <[email protected]>

import random
import numpy as np
import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
from data_utils import format_scalar_definition, format_array_definition, \
format_array_declaration, format_ifdef_wrapper, DataGen # noqa: E402


class IntsortDataGen(DataGen):
# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096

def golden_model(self, x):
return np.sort(x)

def emit_header(self, **kwargs):
header = [super().emit_header()]

n = kwargs['n']
self.MIN = kwargs['min']
self.MAX = kwargs['max']
syntetic_data = kwargs['syntetic']
if syntetic_data:
if len(range(self.MIN, self.MAX + 1)) == n:
print(f'Creating syntetic data')
x = np.arange(-n//2, n//2)
np.random.shuffle(x)
else:
print(f'Parameter Problem: n is set to {n}, min to max generates {len(range(self.MIN, self.MAX + 1))} numbers.\nExit Generator\n')
sys.exit()
else:
x = np.asarray([random.randrange(self.MIN, self.MAX + 1, 1) for i in range(n)])
g = self.golden_model(x)

assert (n % 8) == 0, "n must be an integer multiple of the number of cores (8)"

header += [format_scalar_definition('const uint32_t', 'n', n)]
header += [format_scalar_definition('const int32_t', 'min', self.MIN)]
header += [format_scalar_definition('const int32_t', 'max', self.MAX)]
header += [format_array_definition('int32_t', 'x', x, alignment=self.BURST_ALIGNMENT,
section=kwargs['section'])]
header += [format_array_declaration('int32_t', 'z', [n], alignment=self.BURST_ALIGNMENT,
section=kwargs['section'])]
result_def = format_array_definition('int32_t', 'g', g)
header += [format_ifdef_wrapper('BIST', result_def)]
header = '\n\n'.join(header)

return header


if __name__ == '__main__':
sys.exit(IntsortDataGen().main())
33 changes: 33 additions & 0 deletions sw/blas/intsort/scripts/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Author: Nico Canzani <[email protected]>
# Author: Luca Colagrande <[email protected]>

import sys
from pathlib import Path
from datagen import IntsortDataGen

sys.path.append(str(Path(__file__).parent / '../../../../util/sim/'))
from verif_utils import Verifier # noqa: E402


class IntsortVerifier(Verifier):

OUTPUT_UIDS = ['z']

def get_actual_results(self):
return self.get_output_from_symbol('z', 'int32_t')

def get_expected_results(self):
x = self.get_input_from_symbol('x', 'int32_t')
return IntsortDataGen().golden_model(x)

def check_results(self, *args):
return super().check_results(*args, rtol=1e-10)


if __name__ == "__main__":
sys.exit(IntsortVerifier().main())
121 changes: 121 additions & 0 deletions sw/blas/intsort/src/intsort.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Author: Nico Canzani <[email protected]>
// Author: Luca Colagrande <[email protected]>

#include "snrt.h"

void swap(int32_t* a, int32_t* b) {
int32_t temp = *a;
*a = *b;
*b = temp;
}

int32_t* partition(int32_t* low, int32_t* high) {
int32_t pivot = *high; // Pivot element (can be chosen randomly)
int32_t* i = low - 1; // Index of smaller element

for (int32_t* j = low; j < high; j++) {
if (*j <= pivot) {
i++;
swap(i, j);
}
}
swap(i + 1, high);
return (i + 1);
}

void quicksort(int32_t* low, int32_t* high) {
if (low < high) {
int32_t* pi = partition(low, high); // Partitioning index

quicksort(low, pi - 1);
quicksort(pi + 1, high);
}
}

void prefixSum(const int* a, int* b, size_t n) {
// Initialize the first element of the prefix sum array to 0
b[0] = 0;

// Loop through the array and compute prefix sums
for (size_t i = 1; i < n; ++i) {
b[i] = b[i - 1] + a[i - 1];
}
}

void bucketSort(int32_t* x, uint32_t n, uint32_t numBuckets, int32_t maximum,
int32_t minimum) {
int32_t ttemp = snrt_mcycle();

int32_t core_idx = snrt_cluster_core_idx();
int frac_core = n / snrt_cluster_compute_core_num();
int offset_core = core_idx * frac_core;

// Create buckets shared over all cores in Cluster
int32_t* bucketscratchpad = x + n;
int32_t* buckets[numBuckets];
int32_t* bucket_count = (int32_t*)(bucketscratchpad + numBuckets * n);

// Initialize buckets and bucket counts.
// Since each core uses the same variables, they need to be initialized only
// once.
if (core_idx == 0) {
for (int32_t i = 0; i < numBuckets; i++) {
bucket_count[i] = 0;
buckets[i] = &bucketscratchpad[i * n];
}
}
snrt_cluster_hw_barrier();
ttemp = snrt_mcycle();

// Distribute array elements into buckets
if (snrt_is_compute_core()) {
int32_t range = (maximum - minimum) / numBuckets + 1;

for (int32_t i = offset_core; i < offset_core + frac_core; i++) {
int32_t bucketIndex = (x[i] - minimum) / range;
int32_t current_index = __atomic_fetch_add(
&bucket_count[bucketIndex], 1, __ATOMIC_SEQ_CST);
buckets[bucketIndex][current_index] = x[i];
}
}

// Before sorting the buckets, the data needs to be distributed
snrt_cluster_hw_barrier();
ttemp = snrt_mcycle();

// Sort each bucket
if (snrt_is_compute_core()) {
for (uint8_t next_bucket = 0 + core_idx; next_bucket < numBuckets;
next_bucket += snrt_cluster_compute_core_num()) {
if (bucket_count[next_bucket] > 0) {
quicksort(buckets[next_bucket],
buckets[next_bucket] + bucket_count[next_bucket] - 1);
}
}
}

// Before merging the buckets, all of them need to be sorted
snrt_cluster_hw_barrier();
ttemp = snrt_mcycle();

// Make a cumulative sum array, to know the offset per bucket
int idx_offset[numBuckets];
prefixSum(bucket_count, idx_offset, numBuckets);

// Merge buckets and store into x
if (snrt_is_compute_core()) {
for (uint8_t next_bucket = 0 + core_idx; next_bucket < numBuckets;
next_bucket += snrt_cluster_compute_core_num()) {
uint32_t i_x;
for (uint32_t j = 0; j < bucket_count[core_idx]; j++) {
i_x = j + idx_offset[core_idx];
x[i_x] = buckets[core_idx][j];
}
}
}
ttemp = snrt_mcycle();
}
53 changes: 53 additions & 0 deletions sw/blas/intsort/src/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Author: Nico Canzani <[email protected]>
// Author: Luca Colagrande <[email protected]>

#include "snrt.h"

#include "data.h"
#include "intsort.h"

// Define Number of Buckets, use multiple of 8
#define N_BUCKETS 8

int main() {
int32_t *local_x;
int32_t *remote_x, *remote_z;

// Calculate size and pointers for each cluster
uint32_t frac = n / snrt_cluster_num();
uint32_t offset = frac * snrt_cluster_idx();
remote_x = x + offset;
remote_z = z + offset;

// Allocate space in TCDM
local_x = (int32_t *)snrt_l1_next();

// Copy data in TCDM
if (snrt_is_dm_core()) {
size_t size = frac * sizeof(int32_t);
snrt_dma_start_1d(local_x, remote_x, size);
snrt_dma_wait_all();
}

snrt_cluster_hw_barrier();

// Compute
bucketSort(local_x, n, N_BUCKETS, max, min);

snrt_cluster_hw_barrier();

// Copy data out of TCDM
if (snrt_is_dm_core()) {
size_t size = frac * sizeof(int32_t);
snrt_dma_start_1d(remote_z, local_x, size);
snrt_dma_wait_all();
}

snrt_cluster_hw_barrier();

return 0;
}
1 change: 1 addition & 0 deletions target/snitch_cluster/sw.mk
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ APPS += sw/apps/blas/gemm
APPS += sw/apps/blas/gemv
APPS += sw/apps/blas/dot
APPS += sw/apps/blas/syrk
APPS += sw/apps/blas/intsort
APPS += sw/apps/dnn/batchnorm
APPS += sw/apps/dnn/conv2d
APPS += sw/apps/dnn/fusedconv
Expand Down
10 changes: 10 additions & 0 deletions target/snitch_cluster/sw/apps/blas/intsort/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright 2023 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

include ../../../../../../sw/blas/intsort/Makefile
include ../../common.mk

$(DEP): $(DATA_H)
1 change: 1 addition & 0 deletions util/sim/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def from_buffer(byte_array, ctype='uint32_t'):
# Types which have a direct correspondence in Numpy
NP_DTYPE_FROM_CTYPE = {
'uint32_t': np.uint32,
'int32_t': np.int32,
'double': np.float64,
'float': np.float32,
'__fp16': np.float16
Expand Down