Skip to content

Commit

Permalink
dot: Various changes
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Jun 12, 2024
1 parent bee30fd commit 55b09e9
Show file tree
Hide file tree
Showing 16 changed files with 204 additions and 220 deletions.
1 change: 1 addition & 0 deletions sw/blas/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/data/data.h
1 change: 0 additions & 1 deletion sw/blas/axpy/.gitignore

This file was deleted.

2 changes: 1 addition & 1 deletion sw/blas/blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
#pragma once

#include "axpy/src/axpy.h"
#include "dot/src/dot.h"
#include "gemm/src/gemm.h"
#include "dotp/src/dotp.h"
4 changes: 2 additions & 2 deletions sw/blas/dotp/Makefile → sw/blas/dot/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 ETH Zurich and University of Bologna.
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -10,7 +10,7 @@ SRC_DIR := $(realpath $(MK_DIR)/src)
DATA_CFG ?= $(DATA_DIR)/params.json
SECTION ?=

APP ?= dotp
APP ?= dot
SRCS ?= $(realpath $(SRC_DIR)/main.c)
INCDIRS ?= $(dir $(DATA_H)) $(SRC_DIR)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,10 +9,10 @@

sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
from data_utils import format_scalar_definition, format_array_definition, \
format_array_declaration, format_ifdef_wrapper, DataGen # noqa: E402
format_scalar_declaration, format_ifdef_wrapper, DataGen # noqa: E402


class AxpyDataGen(DataGen):
class DotDataGen(DataGen):

MIN = -1000
MAX = +1000
Expand All @@ -38,8 +38,8 @@ def emit_header(self, **kwargs):
section=kwargs['section'])]
header += [format_array_definition('double', 'y', y, alignment=self.BURST_ALIGNMENT,
section=kwargs['section'])]
header += [format_array_declaration('double', 'z', [n], alignment=self.BURST_ALIGNMENT,
section=kwargs['section'])]
header += [format_scalar_declaration('double', 'result', alignment=self.BURST_ALIGNMENT,
section=kwargs['section'])]
result_def = format_scalar_definition('double', 'g', g)
header += [format_ifdef_wrapper('BIST', result_def)]
header = '\n\n'.join(header)
Expand All @@ -48,4 +48,4 @@ def emit_header(self, **kwargs):


if __name__ == '__main__':
sys.exit(AxpyDataGen().main())
sys.exit(DotDataGen().main())
15 changes: 7 additions & 8 deletions sw/blas/dotp/scripts/verify.py → sw/blas/dot/scripts/verify.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
#!/usr/bin/env python3
# Copyright 2023 ETH Zurich and University of Bologna.
# Copyright 2024 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

import sys
from pathlib import Path
from datagen import AxpyDataGen
from datagen import DotDataGen

sys.path.append(str(Path(__file__).parent / '../../../../util/sim/'))
from verif_utils import Verifier # noqa: E402


class AxpyVerifier(Verifier):
class DotVerifier(Verifier):

OUTPUT_UIDS = ['z']
OUTPUT_UIDS = ['result']

def get_actual_results(self):
return self.get_output_from_symbol('z', 'double')
return self.get_output_from_symbol('result', 'double')

def get_expected_results(self):
a = self.get_input_from_symbol('a', 'double')
x = self.get_input_from_symbol('x', 'double')
y = self.get_input_from_symbol('y', 'double')
return AxpyDataGen().golden_model(a, x, y)
return DotDataGen().golden_model(x, y)

def check_results(self, *args):
return super().check_results(*args, rtol=1e-10)


if __name__ == "__main__":
sys.exit(AxpyVerifier().main())
sys.exit(DotVerifier().main())
145 changes: 145 additions & 0 deletions sw/blas/dot/src/dot.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#include "snrt.h"

inline void dot_seq(uint32_t n, double *x, double *y, double *output) {
// Start of SSR region.
register volatile double ft0 asm("ft0");
register volatile double ft1 asm("ft1");
asm volatile("" : "=f"(ft0), "=f"(ft1));

snrt_ssr_loop_1d(SNRT_SSR_DM0, n, sizeof(double));
snrt_ssr_loop_1d(SNRT_SSR_DM1, n, sizeof(double));

snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_1D, x);
snrt_ssr_read(SNRT_SSR_DM1, SNRT_SSR_1D, y);

register volatile double res_ssr asm("fs0") = 0;

snrt_ssr_enable();

const register uint32_t Nm1 asm("t0") = n - 1;
asm volatile(
"frep.o %[n_frep], 1, 0, 0 \n"
"fmadd.d %0, ft0, ft1, %0"
: "=f"(res_ssr) /* output operands */
: "f"(ft0), "f"(ft1), "0"(res_ssr),
[ n_frep ] "r"(Nm1) /* input operands */
:);

// End of SSR region.
snrt_fpu_fence();
snrt_ssr_disable();
asm volatile("" : : "f"(ft0), "f"(ft1));
output[0] = res_ssr;
}

inline void dot_seq_4_acc(uint32_t n, double *x, double *y, double *output) {
// Start of SSR region.
register volatile double ft0 asm("ft0");
register volatile double ft1 asm("ft1");
asm volatile("" : "=f"(ft0), "=f"(ft1));

snrt_ssr_loop_1d(SNRT_SSR_DM0, n, sizeof(double));
snrt_ssr_loop_1d(SNRT_SSR_DM1, n, sizeof(double));

snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_1D, x);
snrt_ssr_read(SNRT_SSR_DM1, SNRT_SSR_1D, y);

register volatile double res_ssr_0 asm("fs0") = 0;
register volatile double res_ssr_1 asm("fs1") = 0;
register volatile double res_ssr_2 asm("fs2") = 0;
register volatile double res_ssr_3 asm("fs3") = 0;

snrt_ssr_enable();

const register uint32_t Nm1 asm("t0") = (n >> 2) - 1;
asm volatile(
"frep.o %[n_frep], 4, 0, 0 \n"
"fmadd.d %0, ft0, ft1, %0 \n"
"fmadd.d %1, ft0, ft1, %1 \n"
"fmadd.d %2, ft0, ft1, %2 \n"
"fmadd.d %3, ft0, ft1, %3"
: "=f"(res_ssr_0), "=f"(res_ssr_1), "=f"(res_ssr_2),
"=f"(res_ssr_3) /* output operands */
: "f"(ft0), "f"(ft1), "0"(res_ssr_0), "1"(res_ssr_1), "2"(res_ssr_2),
"3"(res_ssr_3), [ n_frep ] "r"(Nm1) /* input operands */
:);

// End of SSR region.
snrt_fpu_fence();
snrt_ssr_disable();

asm volatile(
"fadd.d %[res_ssr_0], %[res_ssr_0], %[res_ssr_1] \n"
"fadd.d %[res_ssr_2], %[res_ssr_2], %[res_ssr_3] \n"
"fadd.d %[res_ssr_0], %[res_ssr_0], %[res_ssr_2]"
: [ res_ssr_0 ] "=f"(res_ssr_0),
[ res_ssr_2 ] "=f"(res_ssr_2) /* output operands */
: [ res_ssr_1 ] "f"(res_ssr_1),
[ res_ssr_3 ] "f"(res_ssr_3) /* input operands */
:);

asm volatile("" : : "f"(ft0), "f"(ft1));
output[0] = res_ssr_0;
}

static inline void dot(uint32_t n, double *x, double *y, double *result) {
double *local_x, *local_y, *partial_sums;

uint32_t start_cycle, end_cycle;

// Allocate space in TCDM
local_x = (double *)snrt_l1_next();
local_y = local_x + n;
partial_sums = local_y + n;

// Copy data in TCDM
if (snrt_is_dm_core()) {
size_t size = n * sizeof(double);
snrt_dma_start_1d(local_x, x, size);
snrt_dma_start_1d(local_y, y, size);
snrt_dma_wait_all();
}

// Calculate size and pointers for each core
int core_idx = snrt_cluster_core_idx();
int frac_core = n / snrt_cluster_compute_core_num();
int offset_core = core_idx * frac_core;
local_x += offset_core;
local_y += offset_core;

snrt_cluster_hw_barrier();

start_cycle = snrt_mcycle();

// Compute partial sums
if (snrt_is_compute_core()) {
dot_seq_4_acc(frac_core, local_x, local_y, &partial_sums[core_idx]);
}

snrt_cluster_hw_barrier();

// Reduce partial sums on core 0
#ifndef _DOTP_EXCLUDE_FINAL_SYNC_
if (snrt_cluster_core_idx() == 0) {
for (uint32_t i = 1; i < snrt_cluster_compute_core_num(); i++) {
partial_sums[0] += partial_sums[i];
}
snrt_fpu_fence();
}
#endif

end_cycle = snrt_mcycle();

snrt_cluster_hw_barrier();

// Copy data out of TCDM
if (snrt_is_dm_core()) {
*result = partial_sums[0];
}

snrt_cluster_hw_barrier();
}
27 changes: 27 additions & 0 deletions sw/blas/dot/src/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#include "snrt.h"

#include "data.h"
#include "dot.h"

int main() {
dot(n, x, y, &result);

// TODO: currently only works for single cluster otherwise need to
// synchronize all cores here
#ifdef BIST
uint32_t nerr = 1;

// Check computation is correct
if (snrt_global_core_idx() == 0) {
if (result == g) nerr--;
return nerr;
}

#endif

return 0;
}
88 changes: 0 additions & 88 deletions sw/blas/dotp/src/dotp.h

This file was deleted.

Loading

0 comments on commit 55b09e9

Please sign in to comment.