From 693fd7e9f81dae2b9f2b856ffadc2aa49e92218c Mon Sep 17 00:00:00 2001 From: Luca Colagrande Date: Wed, 21 Aug 2024 14:49:49 +0200 Subject: [PATCH] Correct linting --- sw/apps/doitgen/src/args.h | 4 +- sw/apps/doitgen/src/doitgen.h | 83 +++++++++++++++++------------------ sw/apps/doitgen/src/main.c | 2 +- 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/sw/apps/doitgen/src/args.h b/sw/apps/doitgen/src/args.h index 8061862995..5d3f56ce45 100644 --- a/sw/apps/doitgen/src/args.h +++ b/sw/apps/doitgen/src/args.h @@ -7,8 +7,8 @@ #pragma once #include -typedef void (*doitgen_fp_t)(uint32_t r, uint32_t q, uint32_t s, - double *A, double *x, double *Aout); +typedef void (*doitgen_fp_t)(uint32_t r, uint32_t q, uint32_t s, double *A, + double *x, double *Aout); typedef struct { uint32_t r; diff --git a/sw/apps/doitgen/src/doitgen.h b/sw/apps/doitgen/src/doitgen.h index 47250cb57c..2f7bc61288 100644 --- a/sw/apps/doitgen/src/doitgen.h +++ b/sw/apps/doitgen/src/doitgen.h @@ -11,7 +11,8 @@ __thread int setup_ssr = 1; -void doitgen_naive(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, double *Aout) { +void doitgen_naive(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, + double *Aout) { uint32_t offset = snrt_cluster_core_idx(); uint32_t stride = snrt_cluster_compute_core_num(); @@ -30,7 +31,8 @@ void doitgen_naive(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, dou snrt_fpu_fence(); } -void doitgen_baseline(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, double *Aout) { +void doitgen_baseline(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, + double *Aout) { uint32_t offset = snrt_cluster_core_idx(); uint32_t stride = snrt_cluster_compute_core_num(); @@ -43,7 +45,6 @@ void doitgen_baseline(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, for (uint32_t i = offset; i < r; i += stride) { for (uint32_t j = 0; j < q; j++) { for (uint32_t k = 0; k < s; k += unroll1) { - double acc[4]; acc[0] = 0; acc[1] = 0; @@ -51,7 +52,6 @@ void doitgen_baseline(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, acc[3] = 0; for (uint32_t l = 0; l < s; l += unroll0) { - asm volatile( "fmadd.d %[acc0], %[a0], %[x0], %[acc0] \n" "fmadd.d %[acc1], %[a0], %[x1], %[acc1] \n" @@ -70,28 +70,27 @@ void doitgen_baseline(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, "fmadd.d %[acc2], %[a3], %[x14], %[acc2] \n" "fmadd.d %[acc3], %[a3], %[x15], %[acc3] \n" : [ acc0 ] "+f"(acc[0]), [ acc1 ] "+f"(acc[1]), - [ acc2 ] "+f"(acc[2]), [ acc3 ] "+f"(acc[3]) - : - [ a0 ] "f"(A[i * q * s + j * s + l + 0]), - [ a1 ] "f"(A[i * q * s + j * s + l + 1]), - [ a2 ] "f"(A[i * q * s + j * s + l + 2]), - [ a3 ] "f"(A[i * q * s + j * s + l + 3]), - [ x0 ] "f"(x[(k + 0) * s + l + 0]), - [ x1 ] "f"(x[(k + 1) * s + l + 0]), - [ x2 ] "f"(x[(k + 2) * s + l + 0]), - [ x3 ] "f"(x[(k + 3) * s + l + 0]), - [ x4 ] "f"(x[(k + 0) * s + l + 1]), - [ x5 ] "f"(x[(k + 1) * s + l + 1]), - [ x6 ] "f"(x[(k + 2) * s + l + 1]), - [ x7 ] "f"(x[(k + 3) * s + l + 1]), - [ x8 ] "f"(x[(k + 0) * s + l + 2]), - [ x9 ] "f"(x[(k + 1) * s + l + 2]), - [ x10 ] "f"(x[(k + 2) * s + l + 2]), - [ x11 ] "f"(x[(k + 3) * s + l + 2]), - [ x12 ] "f"(x[(k + 0) * s + l + 3]), - [ x13 ] "f"(x[(k + 1) * s + l + 3]), - [ x14 ] "f"(x[(k + 2) * s + l + 3]), - [ x15 ] "f"(x[(k + 3) * s + l + 3]) + [ acc2 ] "+f"(acc[2]), [ acc3 ] "+f"(acc[3]) + : [ a0 ] "f"(A[i * q * s + j * s + l + 0]), + [ a1 ] "f"(A[i * q * s + j * s + l + 1]), + [ a2 ] "f"(A[i * q * s + j * s + l + 2]), + [ a3 ] "f"(A[i * q * s + j * s + l + 3]), + [ x0 ] "f"(x[(k + 0) * s + l + 0]), + [ x1 ] "f"(x[(k + 1) * s + l + 0]), + [ x2 ] "f"(x[(k + 2) * s + l + 0]), + [ x3 ] "f"(x[(k + 3) * s + l + 0]), + [ x4 ] "f"(x[(k + 0) * s + l + 1]), + [ x5 ] "f"(x[(k + 1) * s + l + 1]), + [ x6 ] "f"(x[(k + 2) * s + l + 1]), + [ x7 ] "f"(x[(k + 3) * s + l + 1]), + [ x8 ] "f"(x[(k + 0) * s + l + 2]), + [ x9 ] "f"(x[(k + 1) * s + l + 2]), + [ x10 ] "f"(x[(k + 2) * s + l + 2]), + [ x11 ] "f"(x[(k + 3) * s + l + 2]), + [ x12 ] "f"(x[(k + 0) * s + l + 3]), + [ x13 ] "f"(x[(k + 1) * s + l + 3]), + [ x14 ] "f"(x[(k + 2) * s + l + 3]), + [ x15 ] "f"(x[(k + 3) * s + l + 3]) :); } @@ -106,7 +105,8 @@ void doitgen_baseline(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, snrt_fpu_fence(); } -void doitgen_opt(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, double *Aout) { +void doitgen_opt(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, + double *Aout) { uint32_t bound = r / snrt_cluster_compute_core_num(); uint32_t offset = bound * snrt_cluster_core_idx(); @@ -127,8 +127,8 @@ void doitgen_opt(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, doubl // ft1.push(x[k * s + l]) const uint32_t ssr0_b[4] = {unroll, s, s / unroll, q * bound}; const uint32_t ssr0_i[4] = {0, sizeof(double), 0, s * sizeof(double)}; - snrt_ssr_loop_3d(SNRT_SSR_DM0, ssr0_b[1], ssr0_b[2], - ssr0_b[3], ssr0_i[1], ssr0_i[2], ssr0_i[3]); + snrt_ssr_loop_3d(SNRT_SSR_DM0, ssr0_b[1], ssr0_b[2], ssr0_b[3], + ssr0_i[1], ssr0_i[2], ssr0_i[3]); snrt_ssr_repeat(SNRT_SSR_DM0, unroll); const uint32_t ssr1_b[4] = {unroll, s, s / unroll, q * bound}; const uint32_t ssr1_i[4] = {s * sizeof(double), sizeof(double), @@ -146,14 +146,13 @@ void doitgen_opt(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, doubl for (uint32_t i = offset; i < (offset + bound); i++) { for (uint32_t j = 0; j < q; j++) { for (uint32_t k = 0; k < s; k += unroll) { - double acc[unroll]; acc[0] = 0; acc[1] = 0; acc[2] = 0; acc[3] = 0; - asm volatile ( + asm volatile( "frep.o %[n_frep], %[unroll], 0, 0 \n" "fmadd.d %[acc0], ft0, ft1, %[acc0] \n" "fmadd.d %[acc1], ft0, ft1, %[acc1] \n" @@ -162,8 +161,7 @@ void doitgen_opt(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, doubl : [ acc0 ] "+f"(acc[0]), [ acc1 ] "+f"(acc[1]), [ acc2 ] "+f"(acc[2]), [ acc3 ] "+f"(acc[3]) : [ n_frep ] "r"(s - 1), [ unroll ] "i"(unroll) - : "ft0", "ft1", "ft2" - ); + : "ft0", "ft1", "ft2"); Aout[i * q * s + j * s + k + 0] = acc[0]; Aout[i * q * s + j * s + k + 1] = acc[1]; @@ -179,8 +177,8 @@ void doitgen_opt(uint32_t r, uint32_t q, uint32_t s, double *A, double *x, doubl void doitgen_job(doitgen_args_t *args) { uint32_t r_frac, q_frac, a_tile_size, a_tile_bytes, x_size, x_bytes; - uint64_t local_a0_addr, local_aout0_addr, local_x0_addr, - local_a1_addr, local_aout1_addr; + uint64_t local_a0_addr, local_aout0_addr, local_x0_addr, local_a1_addr, + local_aout1_addr; double *local_a[2]; double *local_aout[2]; double *local_x; @@ -244,10 +242,9 @@ void doitgen_job(doitgen_args_t *args) { // Copy job operands in TCDM snrt_dma_load_2d_tile(local_a[buff_idx], args->A, i_r, i_q, - r_frac, q_frac * args->s, args->q * args->s, - sizeof(double)); - if (i_dma_in == 0) - snrt_dma_start_1d(local_x, args->x, x_bytes); + r_frac, q_frac * args->s, + args->q * args->s, sizeof(double)); + if (i_dma_in == 0) snrt_dma_start_1d(local_x, args->x, x_bytes); snrt_dma_wait_all(); snrt_mcycle(); @@ -269,8 +266,8 @@ void doitgen_job(doitgen_args_t *args) { // Copy job outputs from TCDM snrt_dma_store_2d_tile(args->A, local_aout[buff_idx], i_r, i_q, - r_frac, q_frac * args->s, args->q * args->s, - sizeof(double)); + r_frac, q_frac * args->s, + args->q * args->s, sizeof(double)); snrt_dma_wait_all(); snrt_mcycle(); @@ -291,8 +288,8 @@ void doitgen_job(doitgen_args_t *args) { // Perform tile computation doitgen_fp_t fp = args->funcptr; - fp(r_frac, q_frac, args->s, local_a[buff_idx], - local_x, local_aout[buff_idx]); + fp(r_frac, q_frac, args->s, local_a[buff_idx], local_x, + local_aout[buff_idx]); snrt_mcycle(); } diff --git a/sw/apps/doitgen/src/main.c b/sw/apps/doitgen/src/main.c index 049e2297f2..60ee10c4ca 100644 --- a/sw/apps/doitgen/src/main.c +++ b/sw/apps/doitgen/src/main.c @@ -6,8 +6,8 @@ #include "snrt.h" -#include "doitgen.h" #include "data.h" +#include "doitgen.h" int main() { doitgen_job(&args);