Skip to content

Commit

Permalink
[cheshire] Back-ref sw compilation flow for fmatmul
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Jul 9, 2024
1 parent e91f42c commit da1f28b
Show file tree
Hide file tree
Showing 12 changed files with 243 additions and 75 deletions.
51 changes: 28 additions & 23 deletions cheshire/Makefile
Original file line number Diff line number Diff line change
@@ -1,34 +1,39 @@
# Copyright 2024 ETH Zurich and University of Bologna.
# Solderpad Hardware License, Version 0.51, see LICENSE for details.
# SPDX-License-Identifier: SHL-0.51
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Author: Moritz Imfeld <[email protected]>
# Author: Matteo Perotti <[email protected]>
# Matteo Perotti <[email protected]>
#
# Copy and compile vector software on Cheshire

# Chshire root reposiotry
MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
ARA_ROOT := $(MAKEFILE_DIR)/..
BACKREF_CHS_ROOT ?= $(realpath ../../../../..)
BACKREF_CHS_XIL_SCRIPTS := $(BACKREF_CHS_ROOT)/target/xilinx/scripts
CHS_ROOT ?= $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../../../../../..
ARA_ROOT := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../..
CHS_SW := $(CHS_ROOT)/sw
ARA_SW := $(ARA_ROOT)/cheshire/sw
ARA_APPS := $(ARA_ROOT)/apps

APPS := $(patsubst $(ARA_APPS)/%/main.c,%,$(shell find $(ARA_APPS) -name "main.c"))
SW_C := $(wildcard $(ARA_SW)/src/*.c)
DEPS_H := $(wildcard $(ARA_SW)/include/*.h)

# Set up Bender targets and defines
# default configuration for Cheshire + Ara is 2_lanes
ARA_CONFIGURATION ?= 2_lanes
ARA_CONFIGURATION ?= 2_lanes
include $(ARA_ROOT)/config/$(ARA_CONFIGURATION).mk
BOARD := vcu128
CUSTOM_BENDER_TARGETS := -t fpga -t cv64a6_imafdcv_sv39 -t cva6 -t $(BOARD) --define ARA --define NR_LANES=$(nr_lanes) --define VLEN=$(vlen)

.PHONY: ara-chs-xilinx-$(BOARD) update_xilinx_src clean
# Get the original compiler options and add the support for vector extension
CHS_SW_FLAGS ?= $(shell grep "^CHS_SW_FLAGS\s\+?=\s\+" -- $(CHS_SW)/sw.mk | sed 's/^.*?= //' | sed s/rv64gc/rv64gcv/)
# Tweak the compilation to include Cheshire-related headers and files
CHS_SW_FLAGS += -DCHESHIRE -DNR_LANES=$(nr_lanes) -DVLEN=$(vlen)

ara-chs-xilinx-$(BOARD): update_xilinx_src
make -C $(BACKREF_CHS_ROOT) chs-xilinx-$(BOARD)
.PHONY: chs-sw-all copy_vector_sw copy-vector-deps

update_xilinx_src:
cd $(BACKREF_CHS_ROOT) && \
bender script vivado $(CUSTOM_BENDER_TARGETS) > $(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl
# Forward build command to the main Cheshire makefile and attach the correct -march
chs-sw-all: copy-vector-sw copy-vector-deps
make -C $(CHS_ROOT) $@ CHS_SW_FLAGS="$(CHS_SW_FLAGS)"

clean:
rm $(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl
rm $(MAKEFILE_DIR)/add_sources.vcu128.tcl
# Copy the dependencies from this folder to Cheshire
copy-vector-deps: $(DEPS_H)
cp $^ $(CHS_SW)/tests

# Copy the vector programs from the src folder to cheshire
copy-vector-sw: $(SW_C)
cp $^ $(CHS_SW)/tests
43 changes: 6 additions & 37 deletions cheshire/README.md
Original file line number Diff line number Diff line change
@@ -1,42 +1,11 @@
## Introduction
# Build software for Cheshire Ara

Support for FPGA synthesis was added to Ara by integrating it into Cheshire. Since we don't want to directly add our custom compile flow into Cheshire, we use a technique called back-referencing. This method allows us to utilize Cheshire's compile flow from outside the repository. Our entry point is to generate a custom `add_sources.vcu128.tcl` file with specific Ara targets, copy this file into the Cheshire directory, and then use the default Cheshire compile flow, which will use our provided TCL file.
## Compile the vector code for Cheshire

## How to Use
Compile the source files with the vector extension support enable:

### Generate Bitstream

1. **Navigate to the Root Directory**
Ensure you are in the root directory where the Makefile is located.

2. **Set up environment**
Set the `BACKREF_CHS_ROOT` variable to root directory of the Cheshire repository where you want to build the bitstream.

3. **Run the Makefile Target**:
```bash
make chs-sw-all
```
make ara-chs-xilinx-all
```
This command will:
- Generate a custom `add_sources.vcu128.tcl` file with Ara-specific targets.
- Copy this TCL file into the Cheshire directory.
- Start the Cheshire compile flow using the copied TCL file.

## Back-Referencing Explained

Here's how we use back-referencing in our setup:

1. **Generate Custom TCL File**:

- We generate a custom `add_sources.vcu128.tcl` file using the `bender script vivado` command with our specific targets (`-t fpga -t cv64a6_imafdcv_sv39 -t cva6 -t vcu128 --define ARA`).
- This custom TCL file includes all the necessary sources and configurations required for the FPGA synthesis with Cheshire + Ara.

2. **Copy Custom TCL File**:

- The generated custom TCL file is then copied into the Cheshire directory (`$(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl`).

3. **Invoke Cheshire Compile Flow**:

- With the custom TCL file in place, we invoke the Cheshire compile flow by running `make -C $(BACKREF_CHS_ROOT) chs-xilinx-all`.
- The Cheshire compile flow target depends on the `add_sources.vcu128.tcl` file, and since we have provided our custom version, it will use ours for the synthesis process.

This method ensures that we can extend and customize the compile flow for our specific needs without modifying the Cheshire repository directly.
This command will also copy the necessary dependencies to `sw/tests` and enable the vector extension at compile time.
43 changes: 33 additions & 10 deletions cheshire/sw/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,45 @@
#
# Copy and compile vector software on Cheshire

CHS_ROOT ?= $(realpath ../../../../../..)
ARA_SW := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))
CHS_ROOT ?= $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../../../../../..
ARA_ROOT := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../..
CHS_SW := $(CHS_ROOT)/sw
SRC := $(wildcard $(ARA_SW)/*.c) $(wildcard $(ARA_SW)/*.h)
ARA_SW := $(ARA_ROOT)/cheshire/sw
ARA_APPS := $(ARA_ROOT)/apps

APPS := $(patsubst $(ARA_APPS)/%/main.c,%,$(shell find $(ARA_APPS) -name "main.c"))
SW_C := $(wildcard $(ARA_SW)/*.c)
DEPS_H := $(wildcard $(ARA_SW)/*.h)

ARA_CONFIGURATION ?= 2_lanes
include $(ARA_ROOT)/config/$(ARA_CONFIGURATION).mk

# Get the original compiler options and add the support for vector extension
CHS_SW_FLAGS ?= $(shell grep "^CHS_SW_FLAGS\s\+?=\s\+" -- $(CHS_SW)/sw.mk | sed 's/^.*?= //' | sed s/rv64gc/rv64gcv/)
# Tweak the compilation to include Cheshire-related headers and files
CHS_SW_FLAGS += -DCHESHIRE -DNR_LANES=$(nr_lanes) -DVLEN=$(vlen)

.PHONY: chs-sw-all copy_vector_sw
.PHONY: chs-sw-all copy_vector_sw copy-vector-deps

# Forward build command to the main Cheshire makefile and attach the correct -march
# Rename the .c vector files not to break the cheshire vanilla flow
chs-sw-all: copy-vector-sw
chs-sw-all: copy-vector-deps
make -C $(CHS_ROOT) $@ CHS_SW_FLAGS="$(CHS_SW_FLAGS)"
for f in $(filter %.c, $(SRC)); do mv $(CHS_SW)/tests/$f $(CHS_SW)/tests/$f.bkp; done

# Copy the vector programs to cheshire
copy-vector-sw:
cp $(SRC) $(CHS_SW)/tests
# Copy the dependencies from this folder to Cheshire
copy-vector-deps: $(DEPS_H)
cp $^ $(CHS_SW)/tests

# Copy the vector programs from this folder to Cheshire
copy-vector-sw: $(SW_C)
cp $^ $(CHS_SW)/tests

# Copy the apps from the app folder to Cheshire
define app_copy_template
.PHONY: copy-$1

# Create the data first and then copy everything to Cheshire
copy-$1: $(shell find $(ARA_APPS)/$(1) -name "*.c" -o -name "*.S" -o -name "*.h")
$(MAKE) -C $(ARA_APPS) $1/data.S def_args_$$1="$(def_args_$1)"
cp $$^ $(CHS_SW)/tests
endef
$(foreach app,$(APPS),$(eval $(call app_copy_template,$(app))))
25 changes: 23 additions & 2 deletions cheshire/sw/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,30 @@
# Build software for Cheshire Ara

Compile the `.c` programs in this folder with:
## Copy the source files to Cheshire

Copy the source files from this folder into Cheshire's `sw/tests` directory.

```bash
make copy-vector-sw
```

## Copy an app to Cheshire

Copy one of the app files from the `app` folder into Cheshire's `sw/tests` directory.

Use the target `copy-$app` to move the necessary source files to Cheshire's `sw/tests`.
For example, to move the `fmatmul` app:

```bash
make copy-fmatmul
```

## Compile the vector code for Cheshire

Compile the source files with the vector extension support enable:

```bash
make chs-sw-all
```

This command will copy the necessary source files into Cheshire's `sw/tests` directory and compile them with the support for vector extension.
This command will also copy the necessary dependencies to `sw/tests` and enable the vector extension at compile time.
1 change: 0 additions & 1 deletion cheshire/sw/encoding.h

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@

#include "printf.h"

inline void cheshire_start() {
void cheshire_start() {
// Initialize Cheshire's UART
uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET);
uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500);
uart_init(&__base_uart, reset_freq, __BOOT_BAUDRATE);
}

inline void cheshire_finish() {
void cheshire_end() {
// Flush teh UART
uart_write_flush(&__base_uart);
}
Expand Down
1 change: 1 addition & 0 deletions cheshire/sw/include/encoding.h
1 change: 1 addition & 0 deletions cheshire/sw/include/fmatmul.c.h
1 change: 1 addition & 0 deletions cheshire/sw/include/fmatmul.h
22 changes: 22 additions & 0 deletions cheshire/sw/vector_util.h → cheshire/sw/include/vector_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,31 @@
#include <riscv_vector.h>
#include "encoding.h"

#define start_timer()
#define stop_timer()
#define get_timer() 0

#define FABS(x) ((x < 0) ? -x : x)

inline void enable_rvv() {
asm volatile ("li t0, %0" :: "i"(MSTATUS_VS));
asm volatile ("csrs mstatus, t0" );
}

inline int similarity_check(double a, double b, double threshold) {
double diff = a - b;
if (FABS(diff) > threshold)
return 0;
else
return 1;
}

inline int similarity_check_32b(float a, float b, float threshold) {
float diff = a - b;
if (FABS(diff) > threshold)
return 0;
else
return 1;
}

#endif
126 changes: 126 additions & 0 deletions cheshire/sw/src/fmatmul.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Matteo Perotti <[email protected]>
//
// fmatmul wrapper for Cheshire

#include "regs/cheshire.h"
#include "dif/clint.h"
#include "dif/uart.h"
#include "params.h"
#include "util.h"

#include "cheshire_util.h"
#include "vector_util.h"

#include "fmatmul.c.h"

#ifndef _MM_SIZE_
#define _MM_SIZE_ 32
#endif

// Define Matrix dimensions:
// C = AB with A=[MxN], B=[NxP], C=[MxP]
uint64_t M = _MM_SIZE_;
uint64_t N = _MM_SIZE_;
uint64_t P = _MM_SIZE_;

// Max matrix size: 256x256
double a[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));
double b[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));
double c[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));
// Gold results
double g[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES)));

#define THRESHOLD 0.001

// Verify the matrix
int verify_matrix(double *result, double *gold, size_t R, size_t C,
double threshold) {
for (uint64_t i = 0; i < R; ++i) {
for (uint64_t j = 0; j < C; ++j) {
int idx = i * C + j;
if (!similarity_check(result[idx], gold[idx], threshold)) {
return (i + j) == 0 ? -1 : idx;
}
}
}
return 0;
}

int main() {
printf("\n");
printf("=============\n");
printf("= FMATMUL =\n");
printf("=============\n");
printf("\n");
printf("------------------------------------------------------------\n");
printf("Calculating a (%d x %d) x (%d x %d) matrix multiplication...\n", M,
N, N, P);
printf("------------------------------------------------------------\n");
printf("\n");

cheshire_start();
enable_rvv();

unsigned int s = M;

// Initialize matrices
for (unsigned int i = 0; i < s; ++i) {
for (unsigned int k = 0; k < s; ++k) {
a[k + i*s] = (double) (i + k);
}
}
for (unsigned int k = 0; k < s; ++k) {
for (unsigned int j = 0; j < s; ++j) {
b[j + k*s] = (double) (k - j);
}
}

// Run scalar check
printf("Calculating fmatmul on scalar core...\n");
for (unsigned int i = 0; i < s; ++i) {
for (unsigned int j = 0; j < s; ++j) {
double sum = 0;
for (unsigned int k = 0; k < s; ++k) {
sum += a[k + i * s] * b[j + k * s];
}
g[i + j*s] = sum;
}
}

// Run vector kernel
printf("Calculating fmatmul on vector core...\n");
start_timer();
fmatmul(c, a, b, s, s, s);
stop_timer();

// Metrics
int64_t runtime = get_timer();
float performance = 2.0 * s * s * s / runtime;
float utilization = 100 * performance / (2.0 * NR_LANES);

printf("The execution took %d cycles.\n", runtime);
printf("The performance is %f FLOP/cycle (%f%% utilization).\n",
performance, utilization);

// Verify the result only for s == M (to keep it simple)
if (s == M) {
printf("Verifying result...\n");
int error = verify_matrix(c, g, s, s, THRESHOLD);
if (error != 0) {
printf("Error code %d\n", error);
printf("c[%d]=%d\n", error, c[error]);
return error;
} else {
printf("Passed.\n");
}
}


cheshire_end();

return 0;
}
File renamed without changes.

0 comments on commit da1f28b

Please sign in to comment.