sw: Implement K-means kernel

pulp-platform · Jan 25, 2024 · d22f384 · d22f384
1 parent 18554fc
commit d22f384
Show file tree

Hide file tree

Showing 13 changed files with 480 additions and 0 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -46,6 +46,28 @@ jobs:
         run: |
           ./run.py sw/run.yaml --simulator verilator -j
 
+  # Tests requiring hardware FDIV unit
+  sw-snitch-cluster-fdiv-vlt:
+    name: Simulate FDIV SW on Snitch Cluster w/ Verilator
+    runs-on: ubuntu-22.04
+    container:
+      image: ghcr.io/pulp-platform/snitch_cluster:tracer-dma
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: 'recursive'
+      - name: Build Software
+        run: |
+          bender vendor init
+          make -C target/snitch_cluster CFG_OVERRIDE=target/snitch_cluster/cfg/fdiv.hjson sw
+      - name: Build Hardware
+        run: |
+          make -C target/snitch_cluster CFG_OVERRIDE=target/snitch_cluster/cfg/fdiv.hjson bin/snitch_cluster.vlt
+      - name: Run Tests
+        working-directory: target/snitch_cluster
+        run: |
+          ./run.py sw/fdiv.yaml --simulator verilator -j
+
   #########################################
   # Build SW on Snitch Cluster w/ Banshee #
   #########################################

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -134,3 +134,11 @@ snitch-cluster-banshee:
     - cargo install --debug --path .
     - cd ../target/snitch_cluster
     - ./run.py sw/run.yaml --simulator banshee -j --run-dir runs/banshee
+
+# Tests requiring hardware FDIV unit
+snitch-cluster-fdiv-vsim:
+  script:
+    - cd target/snitch_cluster
+    - make CFG_OVERRIDE=cfg/fdiv sw
+    - make bin/snitch_cluster.vsim
+    - ./run.py sw/fdiv.yaml --simulator vsim -j --run-dir runs/vsim
diff --git a/python-requirements.txt b/python-requirements.txt
@@ -16,6 +16,7 @@ tabulate
 yamllint
 pyyaml
 pytablewriter
+scikit-learn
 termcolor
 pandas
 pyelftools

diff --git a/sw/apps/kmeans/.gitignore b/sw/apps/kmeans/.gitignore
@@ -0,0 +1 @@
+data/data.h
diff --git a/sw/apps/kmeans/Makefile b/sw/apps/kmeans/Makefile
@@ -0,0 +1,30 @@
+# Copyright 2024 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Luca Colagrande <[email protected]>
+
+# Usage of absolute paths is required to externally include this Makefile
+MK_DIR   := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+DATA_DIR := $(realpath $(MK_DIR)/data)
+SRC_DIR  := $(realpath $(MK_DIR)/src)
+
+DATA_CFG ?= $(DATA_DIR)/params.json
+SECTION  ?=
+
+APP     ?= kmeans
+SRCS    ?= $(realpath $(SRC_DIR)/main.c)
+INCDIRS ?= $(DATA_DIR) $(SRC_DIR)
+
+DATAGEN_PY = $(DATA_DIR)/datagen.py
+DATA_H     = $(DATA_DIR)/data.h
+
+$(DATA_H): $(DATAGEN_PY) $(DATA_CFG)
+	$< -c $(DATA_CFG) --no-gui --section="$(SECTION)" > $@
+
+.PHONY: clean-data clean
+
+clean-data:
+	rm -f $(DATA_H)
+
+clean: clean-data
diff --git a/sw/apps/kmeans/data/datagen.py b/sw/apps/kmeans/data/datagen.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+# Copyright 2024 ETH Zurich and University of Bologna.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Authors: Luca Colagrande <[email protected]>
+
+import argparse
+import json5
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pathlib
+from sklearn.datasets import make_blobs
+from sklearn.cluster import KMeans
+import sys
+
+
+sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
+from data_utils import emit_license, format_scalar_definition, \
+                       format_vector_definition, format_ifdef_wrapper  # noqa: E402
+
+
+# AXI splits bursts crossing 4KB address boundaries. To minimize
+# the occurrence of these splits the data should be aligned to 4KB
+BURST_ALIGNMENT = 4096
+
+
+def golden_model(samples, n_clusters, initial_centroids, max_iter):
+    # Apply k-means clustering
+    kmeans = KMeans(
+        n_clusters=n_clusters,
+        init=initial_centroids,
+        max_iter=max_iter
+    )
+    kmeans.fit(samples)
+    return kmeans.cluster_centers_, kmeans.n_iter_
+
+
+def visualize_clusters(samples, centroids, title=None):
+    plt.scatter(samples[:, 0], samples[:, 1], s=30)
+    plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=200, linewidths=3, color='red')
+    if not title:
+        title = "K-means clusters"
+    plt.title(title)
+    plt.xlabel("Feature 1")
+    plt.ylabel("Feature 2")
+    plt.show()
+
+
+def emit_header(**kwargs):
+
+    # Aliases
+    n_samples = kwargs['n_samples']
+    n_features = kwargs['n_features']
+    n_clusters = kwargs['n_clusters']
+    max_iter = kwargs['max_iter']
+    seed = kwargs['seed']
+    gui = not kwargs['no_gui']
+
+    # Generate random samples    
+    X, _ = make_blobs(
+        n_samples=n_samples,
+        n_features=n_features,
+        centers=n_clusters,
+        random_state=seed
+    )
+
+    # Generate initial centroids randomly
+    rng = np.random.default_rng(seed=seed)
+    initial_centroids = rng.uniform(low=X.min(axis=0), high=X.max(axis=0), size=(n_clusters, n_features))
+
+    # Visualize the generated samples
+    if gui:
+        visualize_clusters(X, initial_centroids)
+
+    # Apply k-means clustering
+    centers, n_iter = golden_model(X, n_clusters, initial_centroids, max_iter)
+
+    # Visualize the clusters
+    if gui:
+        visualize_clusters(X, centers)
+
+    # Generate header
+    data_str = [emit_license()]
+    data_str += [format_scalar_definition('uint32_t', 'n_samples', n_samples)]
+    data_str += [format_scalar_definition('uint32_t', 'n_features', n_features)]
+    data_str += [format_scalar_definition('uint32_t', 'n_clusters', n_clusters)]
+    data_str += [format_scalar_definition('uint32_t', 'n_iter', n_iter)]
+    data_str += [format_vector_definition('double', 'centroids', initial_centroids.flatten(),
+                 alignment=BURST_ALIGNMENT, section=kwargs['section'])]
+    data_str += [format_vector_definition('double', 'samples', X.flatten(),
+                 alignment=BURST_ALIGNMENT, section=kwargs['section'])]
+    data_str = '\n\n'.join(data_str)
+    return data_str
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description='Generate data for kernels')
+    parser.add_argument(
+        "-c", "--cfg",
+        type=pathlib.Path,
+        required=True,
+        help='Select param config file kernel')
+    parser.add_argument(
+        '--section',
+        type=str,
+        help='Section to store matrices in')
+    parser.add_argument(
+        '--no-gui',
+        action='store_true',
+        help='Run without visualization')
+    args = parser.parse_args()
+
+    # Load param config file
+    with args.cfg.open() as f:
+        param = json5.loads(f.read())
+    param['section'] = args.section
+    param['no_gui'] = args.no_gui
+
+    # Emit header file
+    print(emit_header(**param))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/sw/apps/kmeans/data/params.json b/sw/apps/kmeans/data/params.json
@@ -0,0 +1,11 @@
+// Copyright 2024 ETH Zurich and University of Bologna.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+{
+    n_clusters: 3,
+    n_features: 2,
+    n_samples: 128,
+    max_iter: 3,
+    seed: 42
+}