Skip to content

Commit

Permalink
sw: Add K-means app
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Jan 26, 2024
1 parent 100e127 commit eb38f12
Show file tree
Hide file tree
Showing 11 changed files with 458 additions and 0 deletions.
3 changes: 3 additions & 0 deletions python-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ editorconfig-checker==2.3.51
flake8
gitpython
hjson
json5
jsonref
jsonschema
mako
matplotlib
progressbar2
tabulate
yamllint
pyyaml
pytablewriter
scikit-learn
termcolor
pandas
pyelftools
Expand Down
1 change: 1 addition & 0 deletions sw/apps/kmeans/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/data.h
30 changes: 30 additions & 0 deletions sw/apps/kmeans/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

# Usage of absolute paths is required to externally include this Makefile
MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
DATA_DIR := $(realpath $(MK_DIR)/data)
SRC_DIR := $(realpath $(MK_DIR)/src)

DATA_CFG ?= $(DATA_DIR)/params.json
SECTION ?=

APP ?= kmeans
SRCS ?= $(realpath $(SRC_DIR)/main.c)
INCDIRS ?= $(DATA_DIR) $(SRC_DIR)

DATAGEN_PY = $(DATA_DIR)/datagen.py
DATA_H = $(DATA_DIR)/data.h

$(DATA_H): $(DATAGEN_PY) $(DATA_CFG)
$< -c $(DATA_CFG) --no-gui --section="$(SECTION)" > $@

.PHONY: clean-data clean

clean-data:
rm -f $(DATA_H)

clean: clean-data
128 changes: 128 additions & 0 deletions sw/apps/kmeans/data/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env python3
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Authors: Luca Colagrande <[email protected]>

import argparse
import hjson
import matplotlib.pyplot as plt
import numpy as np
import os
import pathlib
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import sys


sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
from data_utils import emit_license, format_scalar_definition, \
format_vector_definition # noqa: E402


# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096


def golden_model(samples, n_clusters, initial_centroids, max_iter):
# Apply k-means clustering
kmeans = KMeans(
n_clusters=n_clusters,
init=initial_centroids,
max_iter=max_iter
)
kmeans.fit(samples)
return kmeans.cluster_centers_, kmeans.n_iter_


def visualize_clusters(samples, centroids, title=None):
plt.scatter(samples[:, 0], samples[:, 1], s=30)
plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=200, linewidths=3, color='red')
if not title:
title = "K-means clusters"
plt.title(title)
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()


def emit_header(**kwargs):

# Aliases
n_samples = kwargs['n_samples']
n_features = kwargs['n_features']
n_clusters = kwargs['n_clusters']
max_iter = kwargs['max_iter']
seed = kwargs['seed']
gui = not kwargs['no_gui']

# Generate random samples
X, _ = make_blobs(
n_samples=n_samples,
n_features=n_features,
centers=n_clusters,
random_state=seed
)

# Generate initial centroids randomly
rng = np.random.default_rng(seed=seed)
initial_centroids = rng.uniform(low=X.min(axis=0), high=X.max(axis=0),
size=(n_clusters, n_features))

# Visualize the generated samples
if gui:
visualize_clusters(X, initial_centroids)

# Apply k-means clustering
centers, n_iter = golden_model(X, n_clusters, initial_centroids, max_iter)

# Visualize the clusters
if gui:
visualize_clusters(X, centers)

# Generate header
data_str = [emit_license()]
data_str += [format_scalar_definition('uint32_t', 'n_samples', n_samples)]
data_str += [format_scalar_definition('uint32_t', 'n_features', n_features)]
data_str += [format_scalar_definition('uint32_t', 'n_clusters', n_clusters)]
data_str += [format_scalar_definition('uint32_t', 'n_iter', n_iter)]
data_str += [format_vector_definition('double', 'centroids', initial_centroids.flatten(),
alignment=BURST_ALIGNMENT, section=kwargs['section'])]
data_str += [format_vector_definition('double', 'samples', X.flatten(),
alignment=BURST_ALIGNMENT, section=kwargs['section'])]
data_str = '\n\n'.join(data_str)
return data_str


def main():

parser = argparse.ArgumentParser(description='Generate data for kernels')
parser.add_argument(
"-c", "--cfg",
type=pathlib.Path,
required=True,
help='Select param config file kernel')
parser.add_argument(
'--section',
type=str,
help='Section to store matrices in')
parser.add_argument(
'--no-gui',
action='store_true',
help='Run without visualization')
args = parser.parse_args()

# Load param config file
with args.cfg.open() as f:
param = hjson.loads(f.read())
param['section'] = args.section
param['no_gui'] = args.no_gui

# Emit header file
print(emit_header(**param))


if __name__ == '__main__':
main()
11 changes: 11 additions & 0 deletions sw/apps/kmeans/data/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

{
n_clusters: 3,
n_features: 2,
n_samples: 128,
max_iter: 3,
seed: 42
}
192 changes: 192 additions & 0 deletions sw/apps/kmeans/src/kmeans.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Author: Luca Colagrande <[email protected]>

#include <stdint.h>

#include "math.h"
#include "snrt.h"

double euclidean_distance_squared(uint32_t n_features, double* point1,
double* point2) {
double sum = 0;
for (uint32_t i = 0; i < n_features; i++) {
double diff = point1[i] - point2[i];
sum += diff * diff;
}
return sum;
}

void kmeans(uint32_t n_samples, uint32_t n_features, uint32_t n_clusters,
uint32_t n_iter, double* samples, double* centroids) {
// Distribute work across clusters
uint32_t n_samples_per_cluster = n_samples / snrt_cluster_num();

// Dynamically allocate space in TCDM
double* local_samples = snrt_l1_alloc_cluster_local(
n_samples_per_cluster * n_features * sizeof(double), sizeof(double));
double* local_centroids = snrt_l1_alloc_cluster_local(
n_clusters * n_features * sizeof(double), sizeof(double));
uint32_t* membership = snrt_l1_alloc_cluster_local(
n_samples_per_cluster * sizeof(uint32_t), sizeof(uint32_t));
uint32_t* partial_membership_cnt = snrt_l1_alloc_compute_core_local(
n_clusters * sizeof(uint32_t), sizeof(uint32_t));
// First core's partial centroids will store final centroids
double* final_centroids = snrt_l1_next();
double* partial_centroids = snrt_l1_alloc_compute_core_local(
n_clusters * n_features * sizeof(double), sizeof(double));

// Transfer samples and initial centroids with DMA
size_t size;
size_t offset;
if (snrt_is_dm_core()) {
size = n_samples_per_cluster * n_features * sizeof(double);
offset = snrt_cluster_idx() * size;
snrt_dma_start_1d((void*)local_samples, (void*)samples + offset, size);
size = n_clusters * n_features * sizeof(double);
snrt_dma_start_1d((void*)local_centroids, (void*)centroids, size);
snrt_dma_wait_all();
}

snrt_cluster_hw_barrier();

// Iterations of Lloyd's K-means algorithm
for (uint32_t iter_idx = 0; iter_idx < n_iter; iter_idx++) {
// Distribute work across compute cores in a cluster
uint32_t n_samples_per_core;
uint32_t start_sample_idx;
uint32_t end_sample_idx;
if (snrt_is_compute_core()) {
n_samples_per_core =
n_samples_per_cluster / snrt_cluster_compute_core_num();
start_sample_idx = snrt_cluster_core_idx() * n_samples_per_core;
end_sample_idx = start_sample_idx + n_samples_per_core;

// Assignment step
for (uint32_t centroid_idx = 0; centroid_idx < n_clusters;
centroid_idx++) {
partial_membership_cnt[centroid_idx] = 0;
}
snrt_fpu_fence();
for (uint32_t sample_idx = start_sample_idx;
sample_idx < end_sample_idx; sample_idx++) {
double min_dist = INFINITY;
membership[sample_idx] = 0;

for (uint32_t centroid_idx = 0; centroid_idx < n_clusters;
centroid_idx++) {
double dist = euclidean_distance_squared(
n_features, &local_samples[sample_idx * n_features],
&local_centroids[centroid_idx * n_features]);
if (dist < min_dist) {
min_dist = dist;
membership[sample_idx] = centroid_idx;
}
}
partial_membership_cnt[membership[sample_idx]]++;
}
}

snrt_global_barrier();

if (snrt_is_compute_core()) {
// Update step
for (uint32_t centroid_idx = 0; centroid_idx < n_clusters;
centroid_idx++) {
for (uint32_t feature_idx = 0; feature_idx < n_features;
feature_idx++) {
partial_centroids[centroid_idx * n_features + feature_idx] =
0;
}
}
snrt_fpu_fence();
for (uint32_t sample_idx = start_sample_idx;
sample_idx < end_sample_idx; sample_idx++) {
for (uint32_t feature_idx = 0; feature_idx < n_features;
feature_idx++) {
partial_centroids[membership[sample_idx] * n_features +
feature_idx] +=
local_samples[sample_idx * n_features + feature_idx];
}
}
if (snrt_cluster_core_idx() == 0) {
// Intra-cluster reduction
for (uint32_t core_idx = 1;
core_idx < snrt_cluster_compute_core_num(); core_idx++) {
// Pointers to variables of the other core
uint32_t* remote_partial_membership_cnt =
partial_membership_cnt + core_idx * n_clusters;
double* remote_partial_centroids =
partial_centroids + core_idx * n_clusters * n_features;
for (uint32_t centroid_idx = 0; centroid_idx < n_clusters;
centroid_idx++) {
// Accumulate membership counters
partial_membership_cnt[centroid_idx] +=
remote_partial_membership_cnt[centroid_idx];
// Accumulate centroid features
for (uint32_t feature_idx = 0; feature_idx < n_features;
feature_idx++) {
partial_centroids[centroid_idx * n_features +
feature_idx] +=
remote_partial_centroids[centroid_idx *
n_features +
feature_idx];
}
}
}
snrt_inter_cluster_barrier();
if (snrt_cluster_idx() == 0) {
// Inter-cluster reduction
for (uint32_t cluster_idx = 1;
cluster_idx < snrt_cluster_num(); cluster_idx++) {
// Pointers to variables of remote clusters
uint32_t* remote_partial_membership_cnt =
(uint32_t*)snrt_remote_l1_ptr(
partial_membership_cnt, 0, cluster_idx);
double* remote_partial_centroids =
(double*)snrt_remote_l1_ptr(partial_centroids,
0, cluster_idx);
for (uint32_t centroid_idx = 0;
centroid_idx < n_clusters; centroid_idx++) {
// Accumulate membership counters
partial_membership_cnt[centroid_idx] +=
remote_partial_membership_cnt[centroid_idx];
// Accumulate centroid features
for (uint32_t feature_idx = 0;
feature_idx < n_features; feature_idx++) {
final_centroids[centroid_idx * n_features +
feature_idx] +=
remote_partial_centroids[centroid_idx *
n_features +
feature_idx];
}
}
}
// Normalize
for (uint32_t centroid_idx = 0; centroid_idx < n_clusters;
centroid_idx++) {
for (uint32_t feature_idx = 0; feature_idx < n_features;
feature_idx++) {
final_centroids[centroid_idx * n_features +
feature_idx] /=
partial_membership_cnt[centroid_idx];
}
}
}
}
}

snrt_global_barrier();
local_centroids = final_centroids;
}

snrt_cluster_hw_barrier();

// Transfer final centroids with DMA
if (snrt_is_dm_core() && snrt_cluster_idx() == 0) {
snrt_dma_start_1d((void*)centroids, (void*)final_centroids, size);
snrt_dma_wait_all();
}
}
Loading

0 comments on commit eb38f12

Please sign in to comment.