Skip to content

Commit

Permalink
sw: Implement K-means kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Jan 25, 2024
1 parent 18554fc commit d22f384
Show file tree
Hide file tree
Showing 13 changed files with 480 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,28 @@ jobs:
run: |
./run.py sw/run.yaml --simulator verilator -j
# Tests requiring hardware FDIV unit
sw-snitch-cluster-fdiv-vlt:
name: Simulate FDIV SW on Snitch Cluster w/ Verilator
runs-on: ubuntu-22.04
container:
image: ghcr.io/pulp-platform/snitch_cluster:tracer-dma
steps:
- uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: Build Software
run: |
bender vendor init
make -C target/snitch_cluster CFG_OVERRIDE=target/snitch_cluster/cfg/fdiv.hjson sw
- name: Build Hardware
run: |
make -C target/snitch_cluster CFG_OVERRIDE=target/snitch_cluster/cfg/fdiv.hjson bin/snitch_cluster.vlt
- name: Run Tests
working-directory: target/snitch_cluster
run: |
./run.py sw/fdiv.yaml --simulator verilator -j
#########################################
# Build SW on Snitch Cluster w/ Banshee #
#########################################
Expand Down
8 changes: 8 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,11 @@ snitch-cluster-banshee:
- cargo install --debug --path .
- cd ../target/snitch_cluster
- ./run.py sw/run.yaml --simulator banshee -j --run-dir runs/banshee

# Tests requiring hardware FDIV unit
snitch-cluster-fdiv-vsim:
script:
- cd target/snitch_cluster
- make CFG_OVERRIDE=cfg/fdiv sw
- make bin/snitch_cluster.vsim
- ./run.py sw/fdiv.yaml --simulator vsim -j --run-dir runs/vsim
1 change: 1 addition & 0 deletions python-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ tabulate
yamllint
pyyaml
pytablewriter
scikit-learn
termcolor
pandas
pyelftools
Expand Down
1 change: 1 addition & 0 deletions sw/apps/kmeans/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/data.h
30 changes: 30 additions & 0 deletions sw/apps/kmeans/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

# Usage of absolute paths is required to externally include this Makefile
MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
DATA_DIR := $(realpath $(MK_DIR)/data)
SRC_DIR := $(realpath $(MK_DIR)/src)

DATA_CFG ?= $(DATA_DIR)/params.json
SECTION ?=

APP ?= kmeans
SRCS ?= $(realpath $(SRC_DIR)/main.c)
INCDIRS ?= $(DATA_DIR) $(SRC_DIR)

DATAGEN_PY = $(DATA_DIR)/datagen.py
DATA_H = $(DATA_DIR)/data.h

$(DATA_H): $(DATAGEN_PY) $(DATA_CFG)
$< -c $(DATA_CFG) --no-gui --section="$(SECTION)" > $@

.PHONY: clean-data clean

clean-data:
rm -f $(DATA_H)

clean: clean-data
127 changes: 127 additions & 0 deletions sw/apps/kmeans/data/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env python3
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Authors: Luca Colagrande <[email protected]>

import argparse
import json5
import matplotlib.pyplot as plt
import numpy as np
import os
import pathlib
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import sys


sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
from data_utils import emit_license, format_scalar_definition, \
format_vector_definition, format_ifdef_wrapper # noqa: E402


# AXI splits bursts crossing 4KB address boundaries. To minimize
# the occurrence of these splits the data should be aligned to 4KB
BURST_ALIGNMENT = 4096


def golden_model(samples, n_clusters, initial_centroids, max_iter):
# Apply k-means clustering
kmeans = KMeans(
n_clusters=n_clusters,
init=initial_centroids,
max_iter=max_iter
)
kmeans.fit(samples)
return kmeans.cluster_centers_, kmeans.n_iter_


def visualize_clusters(samples, centroids, title=None):
plt.scatter(samples[:, 0], samples[:, 1], s=30)
plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=200, linewidths=3, color='red')
if not title:
title = "K-means clusters"
plt.title(title)
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()


def emit_header(**kwargs):

# Aliases
n_samples = kwargs['n_samples']
n_features = kwargs['n_features']
n_clusters = kwargs['n_clusters']
max_iter = kwargs['max_iter']
seed = kwargs['seed']
gui = not kwargs['no_gui']

# Generate random samples
X, _ = make_blobs(
n_samples=n_samples,
n_features=n_features,
centers=n_clusters,
random_state=seed
)

# Generate initial centroids randomly
rng = np.random.default_rng(seed=seed)
initial_centroids = rng.uniform(low=X.min(axis=0), high=X.max(axis=0), size=(n_clusters, n_features))

# Visualize the generated samples
if gui:
visualize_clusters(X, initial_centroids)

# Apply k-means clustering
centers, n_iter = golden_model(X, n_clusters, initial_centroids, max_iter)

# Visualize the clusters
if gui:
visualize_clusters(X, centers)

# Generate header
data_str = [emit_license()]
data_str += [format_scalar_definition('uint32_t', 'n_samples', n_samples)]
data_str += [format_scalar_definition('uint32_t', 'n_features', n_features)]
data_str += [format_scalar_definition('uint32_t', 'n_clusters', n_clusters)]
data_str += [format_scalar_definition('uint32_t', 'n_iter', n_iter)]
data_str += [format_vector_definition('double', 'centroids', initial_centroids.flatten(),
alignment=BURST_ALIGNMENT, section=kwargs['section'])]
data_str += [format_vector_definition('double', 'samples', X.flatten(),
alignment=BURST_ALIGNMENT, section=kwargs['section'])]
data_str = '\n\n'.join(data_str)
return data_str


def main():

parser = argparse.ArgumentParser(description='Generate data for kernels')
parser.add_argument(
"-c", "--cfg",
type=pathlib.Path,
required=True,
help='Select param config file kernel')
parser.add_argument(
'--section',
type=str,
help='Section to store matrices in')
parser.add_argument(
'--no-gui',
action='store_true',
help='Run without visualization')
args = parser.parse_args()

# Load param config file
with args.cfg.open() as f:
param = json5.loads(f.read())
param['section'] = args.section
param['no_gui'] = args.no_gui

# Emit header file
print(emit_header(**param))


if __name__ == '__main__':
main()
11 changes: 11 additions & 0 deletions sw/apps/kmeans/data/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

{
n_clusters: 3,
n_features: 2,
n_samples: 128,
max_iter: 3,
seed: 42
}
Loading

0 comments on commit d22f384

Please sign in to comment.