From da1f28b159b48070845e96ad37f496e9b6e15981 Mon Sep 17 00:00:00 2001 From: Matteo Perotti Date: Fri, 5 Jul 2024 17:44:31 +0200 Subject: [PATCH] [cheshire] Back-ref sw compilation flow for fmatmul --- cheshire/Makefile | 51 +++++---- cheshire/README.md | 43 ++------ cheshire/sw/Makefile | 43 ++++++-- cheshire/sw/README.md | 25 ++++- cheshire/sw/encoding.h | 1 - cheshire/sw/{ => include}/cheshire_util.h | 4 +- cheshire/sw/include/encoding.h | 1 + cheshire/sw/include/fmatmul.c.h | 1 + cheshire/sw/include/fmatmul.h | 1 + cheshire/sw/{ => include}/vector_util.h | 22 ++++ cheshire/sw/src/fmatmul.c | 126 ++++++++++++++++++++++ cheshire/sw/{ => src}/vector_helloworld.c | 0 12 files changed, 243 insertions(+), 75 deletions(-) delete mode 120000 cheshire/sw/encoding.h rename cheshire/sw/{ => include}/cheshire_util.h (90%) create mode 120000 cheshire/sw/include/encoding.h create mode 120000 cheshire/sw/include/fmatmul.c.h create mode 120000 cheshire/sw/include/fmatmul.h rename cheshire/sw/{ => include}/vector_util.h (54%) create mode 100644 cheshire/sw/src/fmatmul.c rename cheshire/sw/{ => src}/vector_helloworld.c (100%) diff --git a/cheshire/Makefile b/cheshire/Makefile index 5e88fced9..0194ece01 100644 --- a/cheshire/Makefile +++ b/cheshire/Makefile @@ -1,34 +1,39 @@ # Copyright 2024 ETH Zurich and University of Bologna. -# Solderpad Hardware License, Version 0.51, see LICENSE for details. -# SPDX-License-Identifier: SHL-0.51 +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 # -# Author: Moritz Imfeld -# Author: Matteo Perotti +# Matteo Perotti # +# Copy and compile vector software on Cheshire -# Chshire root reposiotry -MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) -ARA_ROOT := $(MAKEFILE_DIR)/.. -BACKREF_CHS_ROOT ?= $(realpath ../../../../..) -BACKREF_CHS_XIL_SCRIPTS := $(BACKREF_CHS_ROOT)/target/xilinx/scripts +CHS_ROOT ?= $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../../../../../.. +ARA_ROOT := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../.. +CHS_SW := $(CHS_ROOT)/sw +ARA_SW := $(ARA_ROOT)/cheshire/sw +ARA_APPS := $(ARA_ROOT)/apps +APPS := $(patsubst $(ARA_APPS)/%/main.c,%,$(shell find $(ARA_APPS) -name "main.c")) +SW_C := $(wildcard $(ARA_SW)/src/*.c) +DEPS_H := $(wildcard $(ARA_SW)/include/*.h) -# Set up Bender targets and defines -# default configuration for Cheshire + Ara is 2_lanes -ARA_CONFIGURATION ?= 2_lanes +ARA_CONFIGURATION ?= 2_lanes include $(ARA_ROOT)/config/$(ARA_CONFIGURATION).mk -BOARD := vcu128 -CUSTOM_BENDER_TARGETS := -t fpga -t cv64a6_imafdcv_sv39 -t cva6 -t $(BOARD) --define ARA --define NR_LANES=$(nr_lanes) --define VLEN=$(vlen) -.PHONY: ara-chs-xilinx-$(BOARD) update_xilinx_src clean +# Get the original compiler options and add the support for vector extension +CHS_SW_FLAGS ?= $(shell grep "^CHS_SW_FLAGS\s\+?=\s\+" -- $(CHS_SW)/sw.mk | sed 's/^.*?= //' | sed s/rv64gc/rv64gcv/) +# Tweak the compilation to include Cheshire-related headers and files +CHS_SW_FLAGS += -DCHESHIRE -DNR_LANES=$(nr_lanes) -DVLEN=$(vlen) -ara-chs-xilinx-$(BOARD): update_xilinx_src - make -C $(BACKREF_CHS_ROOT) chs-xilinx-$(BOARD) +.PHONY: chs-sw-all copy_vector_sw copy-vector-deps -update_xilinx_src: - cd $(BACKREF_CHS_ROOT) && \ - bender script vivado $(CUSTOM_BENDER_TARGETS) > $(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl +# Forward build command to the main Cheshire makefile and attach the correct -march +chs-sw-all: copy-vector-sw copy-vector-deps + make -C $(CHS_ROOT) $@ CHS_SW_FLAGS="$(CHS_SW_FLAGS)" -clean: - rm $(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl - rm $(MAKEFILE_DIR)/add_sources.vcu128.tcl +# Copy the dependencies from this folder to Cheshire +copy-vector-deps: $(DEPS_H) + cp $^ $(CHS_SW)/tests + +# Copy the vector programs from the src folder to cheshire +copy-vector-sw: $(SW_C) + cp $^ $(CHS_SW)/tests diff --git a/cheshire/README.md b/cheshire/README.md index d59cae828..3eb04fb86 100644 --- a/cheshire/README.md +++ b/cheshire/README.md @@ -1,42 +1,11 @@ -## Introduction +# Build software for Cheshire Ara -Support for FPGA synthesis was added to Ara by integrating it into Cheshire. Since we don't want to directly add our custom compile flow into Cheshire, we use a technique called back-referencing. This method allows us to utilize Cheshire's compile flow from outside the repository. Our entry point is to generate a custom `add_sources.vcu128.tcl` file with specific Ara targets, copy this file into the Cheshire directory, and then use the default Cheshire compile flow, which will use our provided TCL file. +## Compile the vector code for Cheshire -## How to Use +Compile the source files with the vector extension support enable: -### Generate Bitstream - -1. **Navigate to the Root Directory** - Ensure you are in the root directory where the Makefile is located. - -2. **Set up environment** - Set the `BACKREF_CHS_ROOT` variable to root directory of the Cheshire repository where you want to build the bitstream. - -3. **Run the Makefile Target**: +```bash +make chs-sw-all ``` -make ara-chs-xilinx-all -``` -This command will: -- Generate a custom `add_sources.vcu128.tcl` file with Ara-specific targets. -- Copy this TCL file into the Cheshire directory. -- Start the Cheshire compile flow using the copied TCL file. - -## Back-Referencing Explained - -Here's how we use back-referencing in our setup: - -1. **Generate Custom TCL File**: - - - We generate a custom `add_sources.vcu128.tcl` file using the `bender script vivado` command with our specific targets (`-t fpga -t cv64a6_imafdcv_sv39 -t cva6 -t vcu128 --define ARA`). - - This custom TCL file includes all the necessary sources and configurations required for the FPGA synthesis with Cheshire + Ara. - -2. **Copy Custom TCL File**: - - - The generated custom TCL file is then copied into the Cheshire directory (`$(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl`). - -3. **Invoke Cheshire Compile Flow**: - - - With the custom TCL file in place, we invoke the Cheshire compile flow by running `make -C $(BACKREF_CHS_ROOT) chs-xilinx-all`. - - The Cheshire compile flow target depends on the `add_sources.vcu128.tcl` file, and since we have provided our custom version, it will use ours for the synthesis process. -This method ensures that we can extend and customize the compile flow for our specific needs without modifying the Cheshire repository directly. +This command will also copy the necessary dependencies to `sw/tests` and enable the vector extension at compile time. diff --git a/cheshire/sw/Makefile b/cheshire/sw/Makefile index 56bd291cc..c76737e5d 100644 --- a/cheshire/sw/Makefile +++ b/cheshire/sw/Makefile @@ -6,22 +6,45 @@ # # Copy and compile vector software on Cheshire -CHS_ROOT ?= $(realpath ../../../../../..) -ARA_SW := $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) +CHS_ROOT ?= $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../../../../../.. +ARA_ROOT := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../.. CHS_SW := $(CHS_ROOT)/sw -SRC := $(wildcard $(ARA_SW)/*.c) $(wildcard $(ARA_SW)/*.h) +ARA_SW := $(ARA_ROOT)/cheshire/sw +ARA_APPS := $(ARA_ROOT)/apps + +APPS := $(patsubst $(ARA_APPS)/%/main.c,%,$(shell find $(ARA_APPS) -name "main.c")) +SW_C := $(wildcard $(ARA_SW)/*.c) +DEPS_H := $(wildcard $(ARA_SW)/*.h) + +ARA_CONFIGURATION ?= 2_lanes +include $(ARA_ROOT)/config/$(ARA_CONFIGURATION).mk # Get the original compiler options and add the support for vector extension CHS_SW_FLAGS ?= $(shell grep "^CHS_SW_FLAGS\s\+?=\s\+" -- $(CHS_SW)/sw.mk | sed 's/^.*?= //' | sed s/rv64gc/rv64gcv/) +# Tweak the compilation to include Cheshire-related headers and files +CHS_SW_FLAGS += -DCHESHIRE -DNR_LANES=$(nr_lanes) -DVLEN=$(vlen) -.PHONY: chs-sw-all copy_vector_sw +.PHONY: chs-sw-all copy_vector_sw copy-vector-deps # Forward build command to the main Cheshire makefile and attach the correct -march -# Rename the .c vector files not to break the cheshire vanilla flow -chs-sw-all: copy-vector-sw +chs-sw-all: copy-vector-deps make -C $(CHS_ROOT) $@ CHS_SW_FLAGS="$(CHS_SW_FLAGS)" - for f in $(filter %.c, $(SRC)); do mv $(CHS_SW)/tests/$f $(CHS_SW)/tests/$f.bkp; done -# Copy the vector programs to cheshire -copy-vector-sw: - cp $(SRC) $(CHS_SW)/tests +# Copy the dependencies from this folder to Cheshire +copy-vector-deps: $(DEPS_H) + cp $^ $(CHS_SW)/tests + +# Copy the vector programs from this folder to Cheshire +copy-vector-sw: $(SW_C) + cp $^ $(CHS_SW)/tests + +# Copy the apps from the app folder to Cheshire +define app_copy_template +.PHONY: copy-$1 + +# Create the data first and then copy everything to Cheshire +copy-$1: $(shell find $(ARA_APPS)/$(1) -name "*.c" -o -name "*.S" -o -name "*.h") + $(MAKE) -C $(ARA_APPS) $1/data.S def_args_$$1="$(def_args_$1)" + cp $$^ $(CHS_SW)/tests +endef +$(foreach app,$(APPS),$(eval $(call app_copy_template,$(app)))) diff --git a/cheshire/sw/README.md b/cheshire/sw/README.md index e4be744d2..e11e9a934 100644 --- a/cheshire/sw/README.md +++ b/cheshire/sw/README.md @@ -1,9 +1,30 @@ # Build software for Cheshire Ara -Compile the `.c` programs in this folder with: +## Copy the source files to Cheshire + +Copy the source files from this folder into Cheshire's `sw/tests` directory. + +```bash +make copy-vector-sw +``` + +## Copy an app to Cheshire + +Copy one of the app files from the `app` folder into Cheshire's `sw/tests` directory. + +Use the target `copy-$app` to move the necessary source files to Cheshire's `sw/tests`. +For example, to move the `fmatmul` app: + +```bash +make copy-fmatmul +``` + +## Compile the vector code for Cheshire + +Compile the source files with the vector extension support enable: ```bash make chs-sw-all ``` -This command will copy the necessary source files into Cheshire's `sw/tests` directory and compile them with the support for vector extension. \ No newline at end of file +This command will also copy the necessary dependencies to `sw/tests` and enable the vector extension at compile time. diff --git a/cheshire/sw/encoding.h b/cheshire/sw/encoding.h deleted file mode 120000 index d2d456631..000000000 --- a/cheshire/sw/encoding.h +++ /dev/null @@ -1 +0,0 @@ -../../apps/common/encoding.h \ No newline at end of file diff --git a/cheshire/sw/cheshire_util.h b/cheshire/sw/include/cheshire_util.h similarity index 90% rename from cheshire/sw/cheshire_util.h rename to cheshire/sw/include/cheshire_util.h index ca1bd5b29..9d57d7cc4 100644 --- a/cheshire/sw/cheshire_util.h +++ b/cheshire/sw/include/cheshire_util.h @@ -11,14 +11,14 @@ #include "printf.h" -inline void cheshire_start() { +void cheshire_start() { // Initialize Cheshire's UART uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500); uart_init(&__base_uart, reset_freq, __BOOT_BAUDRATE); } -inline void cheshire_finish() { +void cheshire_end() { // Flush teh UART uart_write_flush(&__base_uart); } diff --git a/cheshire/sw/include/encoding.h b/cheshire/sw/include/encoding.h new file mode 120000 index 000000000..674da338d --- /dev/null +++ b/cheshire/sw/include/encoding.h @@ -0,0 +1 @@ +../../../apps/common/encoding.h \ No newline at end of file diff --git a/cheshire/sw/include/fmatmul.c.h b/cheshire/sw/include/fmatmul.c.h new file mode 120000 index 000000000..1aa8fb602 --- /dev/null +++ b/cheshire/sw/include/fmatmul.c.h @@ -0,0 +1 @@ +../../../apps/fmatmul/kernel/fmatmul.c \ No newline at end of file diff --git a/cheshire/sw/include/fmatmul.h b/cheshire/sw/include/fmatmul.h new file mode 120000 index 000000000..928a355e6 --- /dev/null +++ b/cheshire/sw/include/fmatmul.h @@ -0,0 +1 @@ +../../../apps/fmatmul/kernel/fmatmul.h \ No newline at end of file diff --git a/cheshire/sw/vector_util.h b/cheshire/sw/include/vector_util.h similarity index 54% rename from cheshire/sw/vector_util.h rename to cheshire/sw/include/vector_util.h index 9526ffb66..77e032ef6 100644 --- a/cheshire/sw/vector_util.h +++ b/cheshire/sw/include/vector_util.h @@ -13,9 +13,31 @@ #include #include "encoding.h" +#define start_timer() +#define stop_timer() +#define get_timer() 0 + +#define FABS(x) ((x < 0) ? -x : x) + inline void enable_rvv() { asm volatile ("li t0, %0" :: "i"(MSTATUS_VS)); asm volatile ("csrs mstatus, t0" ); } +inline int similarity_check(double a, double b, double threshold) { + double diff = a - b; + if (FABS(diff) > threshold) + return 0; + else + return 1; +} + +inline int similarity_check_32b(float a, float b, float threshold) { + float diff = a - b; + if (FABS(diff) > threshold) + return 0; + else + return 1; +} + #endif diff --git a/cheshire/sw/src/fmatmul.c b/cheshire/sw/src/fmatmul.c new file mode 100644 index 000000000..54ec72c02 --- /dev/null +++ b/cheshire/sw/src/fmatmul.c @@ -0,0 +1,126 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Matteo Perotti +// +// fmatmul wrapper for Cheshire + +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "dif/uart.h" +#include "params.h" +#include "util.h" + +#include "cheshire_util.h" +#include "vector_util.h" + +#include "fmatmul.c.h" + +#ifndef _MM_SIZE_ +#define _MM_SIZE_ 32 +#endif + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +uint64_t M = _MM_SIZE_; +uint64_t N = _MM_SIZE_; +uint64_t P = _MM_SIZE_; + +// Max matrix size: 256x256 +double a[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); +double b[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); +double c[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); +// Gold results +double g[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); + +#define THRESHOLD 0.001 + +// Verify the matrix +int verify_matrix(double *result, double *gold, size_t R, size_t C, + double threshold) { + for (uint64_t i = 0; i < R; ++i) { + for (uint64_t j = 0; j < C; ++j) { + int idx = i * C + j; + if (!similarity_check(result[idx], gold[idx], threshold)) { + return (i + j) == 0 ? -1 : idx; + } + } + } + return 0; +} + +int main() { + printf("\n"); + printf("=============\n"); + printf("= FMATMUL =\n"); + printf("=============\n"); + printf("\n"); + printf("------------------------------------------------------------\n"); + printf("Calculating a (%d x %d) x (%d x %d) matrix multiplication...\n", M, + N, N, P); + printf("------------------------------------------------------------\n"); + printf("\n"); + + cheshire_start(); + enable_rvv(); + + unsigned int s = M; + + // Initialize matrices + for (unsigned int i = 0; i < s; ++i) { + for (unsigned int k = 0; k < s; ++k) { + a[k + i*s] = (double) (i + k); + } + } + for (unsigned int k = 0; k < s; ++k) { + for (unsigned int j = 0; j < s; ++j) { + b[j + k*s] = (double) (k - j); + } + } + + // Run scalar check + printf("Calculating fmatmul on scalar core...\n"); + for (unsigned int i = 0; i < s; ++i) { + for (unsigned int j = 0; j < s; ++j) { + double sum = 0; + for (unsigned int k = 0; k < s; ++k) { + sum += a[k + i * s] * b[j + k * s]; + } + g[i + j*s] = sum; + } + } + + // Run vector kernel + printf("Calculating fmatmul on vector core...\n"); + start_timer(); + fmatmul(c, a, b, s, s, s); + stop_timer(); + + // Metrics + int64_t runtime = get_timer(); + float performance = 2.0 * s * s * s / runtime; + float utilization = 100 * performance / (2.0 * NR_LANES); + + printf("The execution took %d cycles.\n", runtime); + printf("The performance is %f FLOP/cycle (%f%% utilization).\n", + performance, utilization); + + // Verify the result only for s == M (to keep it simple) + if (s == M) { + printf("Verifying result...\n"); + int error = verify_matrix(c, g, s, s, THRESHOLD); + if (error != 0) { + printf("Error code %d\n", error); + printf("c[%d]=%d\n", error, c[error]); + return error; + } else { + printf("Passed.\n"); + } + } + + + cheshire_end(); + + return 0; +} diff --git a/cheshire/sw/vector_helloworld.c b/cheshire/sw/src/vector_helloworld.c similarity index 100% rename from cheshire/sw/vector_helloworld.c rename to cheshire/sw/src/vector_helloworld.c