diff --git a/sw/apps/covariance/src/covariance.h b/sw/apps/covariance/src/covariance.h index e1df4b426c..0af8014758 100644 --- a/sw/apps/covariance/src/covariance.h +++ b/sw/apps/covariance/src/covariance.h @@ -6,8 +6,8 @@ // Luca Colagrande #include "args.h" +#include "blas.h" #include "snrt.h" -#include "ata.h" #define DOUBLE_BUFFER 1 @@ -41,7 +41,7 @@ void covariance_naive(uint32_t m, uint32_t n, double inv_n, snrt_cluster_hw_barrier(); // Compute covariance matrix - ata_naive(inv_n_m1, m, n, data, datat, cov); + syrk_naive(m, n, inv_n_m1, data, datat, 0, cov); } void covariance_baseline(uint32_t m, uint32_t n, double inv_n, @@ -74,7 +74,7 @@ void covariance_baseline(uint32_t m, uint32_t n, double inv_n, snrt_cluster_hw_barrier(); // Compute covariance matrix - ata_baseline(inv_n_m1, m, n, data, datat, cov); + syrk_baseline(m, n, inv_n_m1, data, datat, 0, cov); } void covariance_opt(uint32_t m, uint32_t n, double inv_n, diff --git a/sw/blas/blas.h b/sw/blas/blas.h index 33c29e1753..69005ccb75 100644 --- a/sw/blas/blas.h +++ b/sw/blas/blas.h @@ -4,6 +4,20 @@ #pragma once +// Floating-point multiplications by zero cannot be optimized as in some +// edge cases they do not yield zero: +// - 0f * NaN = NaN +// - 0f * INFINITY == NaN +// Thus in order to optimize it, we need to test for zero. You can use this +// function for free when `multiplier` is a constant. +static inline double multiply_opt(double multiplicand, double multiplier) { + if (multiplier) + return multiplicand * multiplier; + else + return 0; +} + #include "axpy/src/axpy.h" #include "dot/src/dot.h" #include "gemm/src/gemm.h" +#include "syrk/src/syrk.h" diff --git a/sw/blas/gemm/src/gemm.h b/sw/blas/gemm/src/gemm.h index 43a9745564..29de895807 100644 --- a/sw/blas/gemm/src/gemm.h +++ b/sw/blas/gemm/src/gemm.h @@ -24,19 +24,6 @@ typedef float v2f32 __attribute__((vector_size(8))); typedef __fp16 v4f16 __attribute__((vector_size(8))); typedef char v8f8 __attribute__((vector_size(8))); -// Floating-point multiplications by zero cannot be optimized as in some -// edge cases they do not yield zero: -// - 0f * NaN = NaN -// - 0f * INFINITY == NaN -// Thus in order to optimize it, we need to test for zero. You can use this -// function for free when `multiplier` is a constant. -static inline double multiply_opt(double multiplicand, double multiplier) { - if (multiplier) - return multiplicand * multiplier; - else - return 0; -} - #include "gemm_fp16.h" #include "gemm_fp32.h" #include "gemm_fp64.h" diff --git a/sw/blas/gemm/src/main.c b/sw/blas/gemm/src/main.c index 17f3936b09..9760000c6b 100644 --- a/sw/blas/gemm/src/main.c +++ b/sw/blas/gemm/src/main.c @@ -9,7 +9,7 @@ #include #include -#include "gemm.h" +#include "blas.h" #include "data.h" #include "snrt.h" diff --git a/sw/apps/ata/.gitignore b/sw/blas/syrk/.gitignore similarity index 100% rename from sw/apps/ata/.gitignore rename to sw/blas/syrk/.gitignore diff --git a/sw/apps/ata/data/params.json b/sw/blas/syrk/data/params.json similarity index 63% rename from sw/apps/ata/data/params.json rename to sw/blas/syrk/data/params.json index 1db35db089..492d8e0ccf 100644 --- a/sw/apps/ata/data/params.json +++ b/sw/blas/syrk/data/params.json @@ -3,8 +3,10 @@ // SPDX-License-Identifier: Apache-2.0 { - "m": 16, - "n": 4, - "m_tiles": 2, - "funcptr": "ata_opt" + "m": 8, + "n": 2, + "alpha": 1.5, + "beta": 3.2, + "m_tiles": 1, + "funcptr": "syrk_opt" } diff --git a/sw/apps/ata/scripts/datagen.py b/sw/blas/syrk/scripts/datagen.py similarity index 56% rename from sw/apps/ata/scripts/datagen.py rename to sw/blas/syrk/scripts/datagen.py index f6474f2e67..05cd2f0381 100755 --- a/sw/apps/ata/scripts/datagen.py +++ b/sw/blas/syrk/scripts/datagen.py @@ -14,25 +14,27 @@ DOUBLE_BUFFER = True -class AtaDataGen(DataGen): +class SyrkDataGen(DataGen): # Function pointers to alternative implementations - FUNCPTRS = ["ata_naive", "ata_baseline", "ata_opt"] + FUNCPTRS = ["syrk_naive", "syrk_baseline", "syrk_opt"] - def golden_model(self, alpha, A): - return alpha * np.matmul(A, A.transpose()) + def golden_model(self, alpha, A, beta, C): + return alpha * np.matmul(A, A.transpose()) + beta * C def validate(self, **kwargs): + n_cores = 8 assert (kwargs['m'] % kwargs['m_tiles']) == 0, "m must be an integer multiple of m_tiles" m_frac = kwargs['m'] / kwargs['m_tiles'] - assert (m_frac % 8) == 0, "m_frac must be an integer multiple of the number of cores" - assert (m_frac % 4) == 0, "m_frac must be an integer multiple of the unroll factor 4" + assert (m_frac % n_cores) == 0, "m_frac must be an integer multiple of the number of cores" + if kwargs['funcptr'] != "syrk_naive": + assert (m_frac % 4) == 0, "m_frac must be an integer multiple of the unroll factor 4" assert kwargs['funcptr'] in self.FUNCPTRS, f"Function pointer must be among {self.FUNCPTRS}" # Calculate total TCDM occupation a_tile_size = m_frac * kwargs['n'] * 8 - b_tile_size = m_frac * m_frac * 8 - total_size = 2 * a_tile_size + b_tile_size + c_tile_size = m_frac * m_frac * 8 + total_size = 2 * a_tile_size + c_tile_size if DOUBLE_BUFFER: total_size *= 2 data_utils.validate_tcdm_footprint(total_size) @@ -42,33 +44,43 @@ def emit_header(self, **kwargs): self.validate(**kwargs) + if 'alpha' in kwargs: + alpha = kwargs['alpha'] + else: + alpha = np.random.randint(-200, 100)/100 + if 'beta' in kwargs: + beta = kwargs['beta'] + else: + beta = np.random.randint(-200, 100)/100 + A = np.random.randint(-200, 100, size=(kwargs['m'], kwargs['n']))/100 - alpha = np.random.randint(-200, 100)/100 - B = self.golden_model(alpha, A) + C_in = np.random.randint(-200, 100, size=(kwargs['m'], kwargs['m']))/100 + C_out = self.golden_model(alpha, A, beta, C_in) A = A.flatten() - B = B.flatten() + C_in = C_in.flatten() A_uid = 'A' - B_uid = 'B' + C_uid = 'C' cfg = { - 'alpha': alpha, 'm': kwargs['m'], 'n': kwargs['n'], + 'alpha': alpha, + 'beta': beta, 'a': A_uid, - 'b': B_uid, + 'c': C_uid, 'm_tiles': kwargs['m_tiles'], 'funcptr': kwargs['funcptr'] } header += [format_array_definition('double', A_uid, A)] - header += [format_array_declaration('double', B_uid, B.shape)] - header += [format_struct_definition('ata_args_t', 'args', cfg)] + header += [format_array_definition('double', C_uid, C_in)] + header += [format_struct_definition('syrk_args_t', 'args', cfg)] header = '\n\n'.join(header) return header if __name__ == '__main__': - AtaDataGen().main() + SyrkDataGen().main() diff --git a/sw/apps/ata/scripts/verify.py b/sw/blas/syrk/scripts/verify.py similarity index 70% rename from sw/apps/ata/scripts/verify.py rename to sw/blas/syrk/scripts/verify.py index 206af870a3..0624156cb2 100755 --- a/sw/apps/ata/scripts/verify.py +++ b/sw/blas/syrk/scripts/verify.py @@ -7,23 +7,24 @@ import numpy as np import sys -from datagen import AtaDataGen +from datagen import SyrkDataGen from snitch.util.sim.verif_utils import Verifier -class AtaVerifier(Verifier): +class SyrkVerifier(Verifier): - OUTPUT_UIDS = ['B'] + OUTPUT_UIDS = ['C'] def __init__(self): super().__init__() self.func_args = { - 'alpha': 'd', 'm': 'I', 'n': 'I', + 'alpha': 'd', + 'beta': 'd', 'A': 'I', - 'B': 'I', + 'C': 'I', 'm_tiles': 'I', 'funcptr': 'I' } @@ -34,12 +35,17 @@ def get_actual_results(self): def get_expected_results(self): A = self.get_input_from_symbol('A', 'double') + C = self.get_input_from_symbol('C', 'double') A = np.reshape(A, (self.func_args['m'], self.func_args['n'])) - return AtaDataGen().golden_model(self.func_args['alpha'], A).flatten() + C = np.reshape(C, (self.func_args['m'], self.func_args['m'])) + return SyrkDataGen().golden_model( + self.func_args['alpha'], A, + self.func_args['beta'], C + ).flatten() def check_results(self, *args): return super().check_results(*args, rtol=1e-10) if __name__ == "__main__": - sys.exit(AtaVerifier().main()) + sys.exit(SyrkVerifier().main()) diff --git a/sw/apps/ata/src/args.h b/sw/blas/syrk/src/args.h similarity index 66% rename from sw/apps/ata/src/args.h rename to sw/blas/syrk/src/args.h index f65a6a13fe..6bb58e00ec 100644 --- a/sw/apps/ata/src/args.h +++ b/sw/blas/syrk/src/args.h @@ -7,15 +7,16 @@ #pragma once #include -typedef void (*ata_fp_t)(double alpha, uint32_t m, uint32_t n, double *a, - double *at, double *b); +typedef void (*syrk_fp_t)(uint32_t m, uint32_t n, double alpha, double *a, + double *at, double beta, double *b); typedef struct { - double alpha; uint32_t m; uint32_t n; + double alpha; + double beta; double *a; - double *b; + double *c; uint32_t m_tiles; - ata_fp_t funcptr; -} ata_args_t; + syrk_fp_t funcptr; +} syrk_args_t; diff --git a/sw/apps/ata/src/main.c b/sw/blas/syrk/src/main.c similarity index 88% rename from sw/apps/ata/src/main.c rename to sw/blas/syrk/src/main.c index c8df4bea90..9f1ad7163d 100644 --- a/sw/apps/ata/src/main.c +++ b/sw/blas/syrk/src/main.c @@ -6,12 +6,12 @@ #include "snrt.h" -#include "ata.h" +#include "blas.h" #include "data.h" int main() { - ata_job(&args); + syrk_job(&args); return 0; } diff --git a/sw/apps/ata/src/ata.h b/sw/blas/syrk/src/syrk.h similarity index 69% rename from sw/apps/ata/src/ata.h rename to sw/blas/syrk/src/syrk.h index 8673353a46..9494f2777c 100644 --- a/sw/apps/ata/src/ata.h +++ b/sw/blas/syrk/src/syrk.h @@ -7,26 +7,27 @@ #include "args.h" #include "snrt.h" -#define DOUBLE_BUFFER 1 - __thread int setup_ssr = 1; -void ata_naive(double alpha, uint32_t m, uint32_t n, double *a, double *at, double *b) { +void syrk_naive(uint32_t m, uint32_t n, double alpha, double *a, double *at, + double beta, double *c) { uint32_t offset = snrt_cluster_core_idx(); uint32_t stride = snrt_cluster_compute_core_num(); for (uint32_t i = offset; i < m; i += stride) { for (uint32_t j = 0; j < m; j++) { - b[i * m + j] = 0; + double acc = 0; for (uint32_t k = 0; k < n; k++) { - b[i * m + j] += a[i * n + k] * at[j * n + k]; + acc += a[i * n + k] * at[j * n + k]; } - b[i * m + j] *= alpha; + c[i * m + j] = multiply_opt(c[i * m + j], beta); + c[i * m + j] += alpha * acc; } } } -void ata_baseline(double alpha, uint32_t m, uint32_t n, double *a, double *at, double *b) { +void syrk_baseline(uint32_t m, uint32_t n, double alpha, double *a, double *at, + double beta, double *c) { uint32_t offset = snrt_cluster_core_idx(); uint32_t stride = snrt_cluster_compute_core_num(); @@ -89,15 +90,20 @@ void ata_baseline(double alpha, uint32_t m, uint32_t n, double *a, double *at, d ); } - b[i * m + j + 0] = alpha * acc[0]; - b[i * m + j + 1] = alpha * acc[1]; - b[i * m + j + 2] = alpha * acc[2]; - b[i * m + j + 3] = alpha * acc[3]; + c[i * m + j + 0] = multiply_opt(c[i * m + j + 0], beta); + c[i * m + j + 1] = multiply_opt(c[i * m + j + 1], beta); + c[i * m + j + 2] = multiply_opt(c[i * m + j + 2], beta); + c[i * m + j + 3] = multiply_opt(c[i * m + j + 3], beta); + c[i * m + j + 0] += alpha * acc[0]; + c[i * m + j + 1] += alpha * acc[1]; + c[i * m + j + 2] += alpha * acc[2]; + c[i * m + j + 3] += alpha * acc[3]; } } } -void ata_opt(double alpha, uint32_t m, uint32_t n, double *a, double *at, double *b) { +void syrk_opt(uint32_t m, uint32_t n, double alpha, double *a, double *at, + double beta, double *c) { uint32_t offset = snrt_cluster_core_idx(); uint32_t stride = snrt_cluster_compute_core_num(); @@ -148,16 +154,20 @@ void ata_opt(double alpha, uint32_t m, uint32_t n, double *a, double *at, double "fmadd.d %[acc1], ft0, ft1, %[acc1] \n" "fmadd.d %[acc2], ft0, ft1, %[acc2] \n" "fmadd.d %[acc3], ft0, ft1, %[acc3] \n" - "fmul.d %[b0], %[acc0], %[alpha] \n" - "fmul.d %[b1], %[acc1], %[alpha] \n" - "fmul.d %[b2], %[acc2], %[alpha] \n" - "fmul.d %[b3], %[acc3], %[alpha] \n" - : [ acc0 ] "+f"(acc[0]), [ acc1 ] "+f"(acc[1]), - [ acc2 ] "+f"(acc[2]), [ acc3 ] "+f"(acc[3]), - [ b0 ] "=f"(b[i * m + j + 0]), [ b1 ] "=f"(b[i * m + j + 1]), - [ b2 ] "=f"(b[i * m + j + 2]), [ b3 ] "=f"(b[i * m + j + 3]) + "fmul.d %[acc0], %[acc0], %[alpha] \n" + "fmul.d %[acc1], %[acc1], %[alpha] \n" + "fmul.d %[acc2], %[acc2], %[alpha] \n" + "fmul.d %[acc3], %[acc3], %[alpha] \n" + "fmadd.d %[c0], %[c0], %[beta], %[acc0] \n" + "fmadd.d %[c1], %[c1], %[beta], %[acc1] \n" + "fmadd.d %[c2], %[c2], %[beta], %[acc2] \n" + "fmadd.d %[c3], %[c3], %[beta], %[acc3] \n" + : [ c0 ] "+f"(c[i * m + j + 0]), [ c1 ] "+f"(c[i * m + j + 1]), + [ c2 ] "+f"(c[i * m + j + 2]), [ c3 ] "+f"(c[i * m + j + 3]), + [ acc0 ] "+f"(acc[0]), [ acc1 ] "+f"(acc[1]), + [ acc2 ] "+f"(acc[2]), [ acc3 ] "+f"(acc[3]) : [ n_frep ] "r"(n - 1), [ unroll ] "i"(unroll), - [ alpha ] "f"(alpha) + [ alpha ] "f"(alpha), [ beta ] "f"(beta) : "ft0", "ft1", "ft2"); } } @@ -166,23 +176,23 @@ void ata_opt(double alpha, uint32_t m, uint32_t n, double *a, double *at, double snrt_fpu_fence(); } -void ata_job(ata_args_t *args) { - uint32_t m_frac, a_tile_size, a_tile_bytes, b_tile_size, b_tile_bytes; - uint64_t local_a0_addr, local_at0_addr, local_b0_addr, - local_a1_addr, local_at1_addr, local_b1_addr; +void syrk_job(syrk_args_t *args) { + uint32_t m_frac, a_tile_size, a_tile_bytes, c_tile_size, c_tile_bytes; + uint64_t local_a0_addr, local_at0_addr, local_c0_addr, + local_a1_addr, local_at1_addr, local_c1_addr; double *local_a[2]; double *local_at[2]; - double *local_b[2]; - uint32_t iterations, sb_iterations; + double *local_c[2]; + uint32_t n_tiles, iterations; uint32_t i, i_dma_in, i_compute, i_dma_out, i_row, i_col, buff_idx; #ifndef JOB_ARGS_PRELOADED // Allocate space for job arguments in TCDM - ata_args_t *local_args = (ata_args_t *)snrt_l1_next(); + syrk_args_t *local_args = (syrk_args_t *)snrt_l1_next(); // Copy job arguments to TCDM if (snrt_is_dm_core()) { - snrt_dma_start_1d(local_args, args, sizeof(ata_args_t)); + snrt_dma_start_1d(local_args, args, sizeof(syrk_args_t)); snrt_dma_wait_all(); } snrt_cluster_hw_barrier(); @@ -192,43 +202,66 @@ void ata_job(ata_args_t *args) { // Calculate size of each tile m_frac = args->m / args->m_tiles; a_tile_size = args->n * m_frac; - b_tile_size = m_frac * m_frac; + c_tile_size = m_frac * m_frac; a_tile_bytes = a_tile_size * sizeof(double); - b_tile_bytes = b_tile_size * sizeof(double); + c_tile_bytes = c_tile_size * sizeof(double); // Allocate space for job operands in TCDM // Align X with the 1st bank in TCDM, Y with the 8th and Z with the 16th. - local_a0_addr = (uint64_t)args + sizeof(ata_args_t); + local_a0_addr = (uint64_t)args + sizeof(syrk_args_t); local_at0_addr = local_a0_addr + a_tile_bytes; - local_b0_addr = local_at0_addr + a_tile_bytes; + local_c0_addr = local_at0_addr + a_tile_bytes; local_a[0] = (double *)local_a0_addr; local_at[0] = (double *)local_at0_addr; - local_b[0] = (double *)local_b0_addr; - if (DOUBLE_BUFFER) { - local_a1_addr = local_b0_addr + b_tile_bytes; - local_at1_addr = local_a1_addr + a_tile_bytes; - local_b1_addr = local_at1_addr + a_tile_bytes; - local_a[1] = (double *)local_a1_addr; - local_at[1] = (double *)local_at1_addr; - local_b[1] = (double *)local_b1_addr; - } + local_c[0] = (double *)local_c0_addr; + local_a1_addr = local_c0_addr + c_tile_bytes; + local_at1_addr = local_a1_addr + a_tile_bytes; + local_c1_addr = local_at1_addr + a_tile_bytes; + local_a[1] = (double *)local_a1_addr; + local_at[1] = (double *)local_at1_addr; + local_c[1] = (double *)local_c1_addr; // Calculate number of iterations - sb_iterations = args->m_tiles * args->m_tiles; - if (DOUBLE_BUFFER) iterations = sb_iterations + 2; - else iterations = sb_iterations; + n_tiles = args->m_tiles * args->m_tiles; + iterations = n_tiles + 2; // Iterate over all tiles for (i = 0; i < iterations; i++) { if (snrt_is_dm_core()) { + // DMA out + // (out before in to avoid overwriting data) + if (i > 1) { + snrt_mcycle(); + + // Compute tile and buffer indices + i_dma_out = i - 2; + buff_idx = i_dma_out % 2; + i_row = i_dma_out / args->m_tiles; + i_col = i_dma_out % args->m_tiles; + + // Copy job outputs from TCDM + snrt_dma_store_2d_tile( + args->c, + local_c[buff_idx], + i_row, + i_col, + m_frac, + m_frac, + args->m, + sizeof(double)); + snrt_dma_wait_all(); + + snrt_mcycle(); + } + // DMA in - if (!DOUBLE_BUFFER || (i < sb_iterations)) { + if (i < n_tiles) { snrt_mcycle(); // Compute tile and buffer indices i_dma_in = i; - buff_idx = DOUBLE_BUFFER ? i_dma_in % 2 : 0; + buff_idx = i_dma_in % 2; i_row = i_dma_in / args->m_tiles; i_col = i_dma_in % args->m_tiles; @@ -245,35 +278,17 @@ void ata_job(ata_args_t *args) { i_col, a_tile_size, sizeof(double)); - snrt_dma_wait_all(); - - snrt_mcycle(); - } - - // Additional barriers required when not double buffering - if (!DOUBLE_BUFFER) snrt_cluster_hw_barrier(); - if (!DOUBLE_BUFFER) snrt_cluster_hw_barrier(); - - // DMA out - if (!DOUBLE_BUFFER || (i > 1)) { - snrt_mcycle(); - - // Compute tile and buffer indices - i_dma_out = DOUBLE_BUFFER ? i - 2 : i; - buff_idx = DOUBLE_BUFFER ? i_dma_out % 2 : 0; - i_row = i_dma_out / args->m_tiles; - i_col = i_dma_out % args->m_tiles; - - // Copy job outputs from TCDM - snrt_dma_store_2d_tile( - args->b, - local_b[buff_idx], - i_row, - i_col, - m_frac, - m_frac, - args->m, - sizeof(double)); + if (args->funcptr == syrk_opt || args->beta != 0) { + snrt_dma_load_2d_tile( + local_c[buff_idx], + args->c, + i_row, + i_col, + m_frac, + m_frac, + args->m, + sizeof(double)); + } snrt_dma_wait_all(); snrt_mcycle(); @@ -282,27 +297,22 @@ void ata_job(ata_args_t *args) { // Compute if (snrt_is_compute_core()) { - // Additional barrier required when not double buffering - if (!DOUBLE_BUFFER) snrt_cluster_hw_barrier(); - - if (!DOUBLE_BUFFER || (i > 0 && i < (sb_iterations + 1))) { + if (i > 0 && i < (n_tiles + 1)) { snrt_mcycle(); // Compute tile and buffer indices - i_compute = DOUBLE_BUFFER ? i - 1 : i; - buff_idx = DOUBLE_BUFFER ? i_compute % 2 : 0; + i_compute = i - 1; + buff_idx = i_compute % 2; // Perform tile computation - ata_fp_t fp = args->funcptr; - fp(args->alpha, m_frac, args->n, local_a[buff_idx], - local_at[buff_idx], local_b[buff_idx]); + syrk_fp_t fp = args->funcptr; + fp(m_frac, args->n, args->alpha, local_a[buff_idx], + local_at[buff_idx], args->beta, local_c[buff_idx]); snrt_mcycle(); } - - // Additional barrier required when not double buffering - if (!DOUBLE_BUFFER) snrt_cluster_hw_barrier(); } + // Synchronize cores after every iteration snrt_cluster_hw_barrier(); } diff --git a/target/snitch_cluster/sw.mk b/target/snitch_cluster/sw.mk index 0a1e4c00c3..674ea2cadb 100644 --- a/target/snitch_cluster/sw.mk +++ b/target/snitch_cluster/sw.mk @@ -51,6 +51,7 @@ APPS = sw/apps/nop APPS += sw/apps/blas/axpy APPS += sw/apps/blas/gemm APPS += sw/apps/blas/dot +APPS += sw/apps/blas/syrk APPS += sw/apps/dnn/batchnorm APPS += sw/apps/dnn/conv2d APPS += sw/apps/dnn/fusedconv @@ -63,7 +64,6 @@ APPS += sw/apps/dnn/concat APPS += sw/apps/dnn/fused_concat_linear APPS += sw/apps/dnn/transpose APPS += sw/apps/montecarlo/pi_estimation -APPS += sw/apps/ata APPS += sw/apps/atax APPS += sw/apps/correlation APPS += sw/apps/covariance diff --git a/target/snitch_cluster/sw/apps/blas/gemm/app.mk b/target/snitch_cluster/sw/apps/blas/gemm/app.mk index 5d2b540687..f50f6d21c1 100644 --- a/target/snitch_cluster/sw/apps/blas/gemm/app.mk +++ b/target/snitch_cluster/sw/apps/blas/gemm/app.mk @@ -8,6 +8,7 @@ APP := gemm $(APP)_BUILD_DIR ?= $(ROOT)/target/snitch_cluster/sw/apps/blas/$(APP)/build SRC_DIR := $(ROOT)/sw/blas/$(APP)/src SRCS := $(SRC_DIR)/main.c +$(APP)_INCDIRS := $(ROOT)/sw/blas include $(ROOT)/sw/apps/common.mk include $(ROOT)/target/snitch_cluster/sw/apps/common.mk diff --git a/target/snitch_cluster/sw/apps/ata/app.mk b/target/snitch_cluster/sw/apps/blas/syrk/app.mk similarity index 65% rename from target/snitch_cluster/sw/apps/ata/app.mk rename to target/snitch_cluster/sw/apps/blas/syrk/app.mk index af63400b4a..c0fd050442 100644 --- a/target/snitch_cluster/sw/apps/ata/app.mk +++ b/target/snitch_cluster/sw/apps/blas/syrk/app.mk @@ -4,10 +4,11 @@ # # Luca Colagrande -APP := ata -$(APP)_BUILD_DIR ?= $(ROOT)/target/snitch_cluster/sw/apps/$(APP)/build -SRC_DIR := $(ROOT)/sw/apps/$(APP)/src +APP := syrk +$(APP)_BUILD_DIR ?= $(ROOT)/target/snitch_cluster/sw/apps/blas/$(APP)/build +SRC_DIR := $(ROOT)/sw/blas/$(APP)/src SRCS := $(SRC_DIR)/main.c +$(APP)_INCDIRS := $(ROOT)/sw/blas include $(ROOT)/sw/apps/common.mk include $(ROOT)/target/snitch_cluster/sw/apps/common.mk diff --git a/target/snitch_cluster/sw/apps/covariance/app.mk b/target/snitch_cluster/sw/apps/covariance/app.mk index 005791c791..e985e671e7 100644 --- a/target/snitch_cluster/sw/apps/covariance/app.mk +++ b/target/snitch_cluster/sw/apps/covariance/app.mk @@ -8,7 +8,7 @@ APP := covariance $(APP)_BUILD_DIR ?= $(ROOT)/target/snitch_cluster/sw/apps/$(APP)/build SRC_DIR := $(ROOT)/sw/apps/$(APP)/src SRCS := $(SRC_DIR)/main.c -$(APP)_INCDIRS := $(ROOT)/sw/apps/ata/src/ +$(APP)_INCDIRS := $(ROOT)/sw/blas/ include $(ROOT)/sw/apps/common.mk include $(ROOT)/target/snitch_cluster/sw/apps/common.mk