-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sw: Add GEMV, K-Means and update ATAX, Correlation, Covariance (#190)
- Loading branch information
Showing
36 changed files
with
1,139 additions
and
123 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ pyflexfloat | |
pytablewriter | ||
pytest | ||
pyyaml | ||
scikit-learn | ||
tabulate | ||
termcolor | ||
yamllint | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
# Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Author: Jose Pedro Castro Fonseca <jose.pc.fonseca@gmail, [email protected]> | ||
# Author: Jose Pedro Castro Fonseca <[email protected]> | ||
# Luca Colagrande <[email protected]> | ||
|
||
import numpy as np | ||
|
@@ -21,17 +21,31 @@ class AtaxDataGen(du.DataGen): | |
def golden_model(self, A, x): | ||
return np.matmul(A.transpose(), np.matmul(A, x)) | ||
|
||
def validate(self, M, N, **kwargs): | ||
assert (N % 8) == 0, "N must be an integer multiple of the number of cores" | ||
|
||
# Calculate total TCDM occupation | ||
a_size = M * N * 8 | ||
x_size = N * 8 | ||
y_size = N * 8 | ||
tmp_size = M * 8 | ||
total_size = a_size | ||
total_size += x_size | ||
total_size += y_size | ||
total_size += tmp_size | ||
du.validate_tcdm_footprint(total_size) | ||
|
||
def emit_header(self, **kwargs): | ||
header = [super().emit_header()] | ||
|
||
# Validate parameters | ||
self.validate(**kwargs) | ||
|
||
M, N = kwargs['M'], kwargs['N'] | ||
A = du.generate_random_array((M, N)) | ||
x = du.generate_random_array((N, 1)) | ||
y = self.golden_model(A, x) | ||
|
||
assert (M % 8) == 0, "M must be an integer multiple of the number of cores" | ||
assert (N % 8) == 0, "N must be an integer multiple of the number of cores" | ||
|
||
A = A.flatten() | ||
x = x.flatten() | ||
y = y.flatten() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// Copyright 2024 ETH Zurich and University of Bologna. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
// Author: Luca Colagrande <[email protected]> | ||
|
||
#pragma once | ||
#include <stdint.h> | ||
|
||
typedef struct { | ||
uint32_t M; | ||
uint32_t N; | ||
uint64_t A_addr; | ||
uint64_t x_addr; | ||
uint64_t y_addr; | ||
} atax_args_t; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,43 +6,114 @@ | |
// Luca Colagrande <[email protected]> | ||
|
||
#include <stdint.h> | ||
#include "args.h" | ||
#include "blas.h" | ||
#include "snrt.h" | ||
|
||
void kernel_atax(uint32_t M, uint32_t N, double *A, double *x, double *y, | ||
double *tmp) { | ||
static inline void atax(uint32_t M, uint32_t N, double *A, double *x, double *y, | ||
double *tmp) { | ||
double tmp_fs; | ||
int core_range, core_offset; | ||
int core_range, core_offset, cluster_core_offset; | ||
|
||
// tmp = A * x | ||
if (snrt_is_compute_core()) { | ||
core_range = M / snrt_cluster_compute_core_num(); | ||
core_offset = snrt_cluster_core_idx() * core_range; | ||
for (int i1 = 0; i1 < core_range; i1++) { | ||
int i = core_offset + i1; | ||
tmp_fs = 0.0; | ||
for (int j = 0; j < N; j++) { | ||
tmp_fs += A[i * N + j] * x[j]; | ||
} | ||
tmp[i] = tmp_fs; | ||
} | ||
snrt_mcycle(); | ||
gemv(0, M, N, 1, A, x, 1, tmp); | ||
snrt_mcycle(); | ||
} | ||
|
||
snrt_cluster_hw_barrier(); | ||
|
||
// y = At * tmp | ||
if (snrt_is_compute_core()) { | ||
core_range = N / snrt_cluster_compute_core_num(); | ||
core_offset = snrt_cluster_core_idx() * core_range; | ||
snrt_mcycle(); | ||
core_range = N / snrt_global_compute_core_num(); | ||
core_offset = snrt_global_compute_core_idx() * core_range; | ||
cluster_core_offset = snrt_cluster_core_idx() * core_range; | ||
for (int j1 = 0; j1 < core_range; j1++) { | ||
int j = core_offset + j1; | ||
int cluster_j = cluster_core_offset + j1; | ||
tmp_fs = 0.0; | ||
for (int i = 0; i < M; i++) { | ||
// The order of the for loops was exchanged, so that each loop | ||
// reduces in y at position j, iterating through the i | ||
// positions. | ||
tmp_fs += A[i * N + j] * tmp[i]; | ||
} | ||
y[j] = tmp_fs; | ||
y[cluster_j] = tmp_fs; | ||
} | ||
snrt_fpu_fence(); | ||
snrt_mcycle(); | ||
} | ||
} | ||
|
||
void atax_job(void *args) { | ||
double *local_A; | ||
double *local_x; | ||
double *local_y; | ||
double *local_tmp; | ||
atax_args_t *local_args; | ||
|
||
#ifndef JOB_ARGS_PRELOADED | ||
// Allocate space for job arguments in TCDM | ||
local_args = (atax_args_t *)snrt_l1_alloc_cluster_local(sizeof(atax_args_t), | ||
sizeof(double)); | ||
|
||
// Copy job arguments to TCDM | ||
if (snrt_is_dm_core()) { | ||
snrt_dma_start_1d(local_args, args, sizeof(atax_args_t)); | ||
snrt_dma_wait_all(); | ||
} | ||
snrt_cluster_hw_barrier(); | ||
#else | ||
local_args = (atax_args_t *)args; | ||
#endif | ||
|
||
// Aliases | ||
uint32_t M = local_args->M; | ||
uint32_t N = local_args->N; | ||
double *A = (double *)(local_args->A_addr); | ||
double *x = (double *)(local_args->x_addr); | ||
double *y = (double *)(local_args->y_addr); | ||
|
||
// Allocate local variables | ||
size_t size_A = M * N * sizeof(double); | ||
size_t size_x = N * sizeof(double); | ||
size_t size_y = N * sizeof(double); | ||
size_t size_tmp = M * sizeof(double); | ||
size_t size_y_tile = size_y / snrt_cluster_num(); | ||
local_A = snrt_l1_alloc_cluster_local(size_A, sizeof(double)); | ||
local_x = snrt_l1_alloc_cluster_local(size_x, sizeof(double)); | ||
local_y = snrt_l1_alloc_cluster_local(size_y_tile, sizeof(double)); | ||
local_tmp = snrt_l1_alloc_cluster_local(size_tmp, sizeof(double)); | ||
|
||
// Initialize input matrices | ||
if (snrt_is_dm_core()) { | ||
snrt_dma_start_1d(local_A, A, size_A); | ||
snrt_dma_start_1d(local_x, x, size_x); | ||
snrt_dma_wait_all(); | ||
} | ||
snrt_mcycle(); | ||
snrt_cluster_hw_barrier(); | ||
|
||
// Compute | ||
atax(M, N, local_A, local_x, local_y, local_tmp); | ||
snrt_cluster_hw_barrier(); | ||
snrt_mcycle(); | ||
|
||
// Writeback results | ||
if (snrt_is_dm_core()) { | ||
snrt_dma_store_1d_tile(y, local_y, snrt_cluster_idx(), | ||
N / snrt_cluster_num(), sizeof(double)); | ||
snrt_dma_wait_all(); | ||
snrt_mcycle(); | ||
} | ||
snrt_cluster_hw_barrier(); | ||
|
||
// Free memory | ||
#ifndef JOB_ARGS_PRELOADED | ||
snrt_l1_update_next_v2(local_args); | ||
#else | ||
snrt_l1_update_next_v2(local_A); | ||
#endif | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
# Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# Author: Jose Pedro Castro Fonseca <jose.pc.fonseca@gmail, [email protected]> | ||
# Author: Jose Pedro Castro Fonseca <[email protected]> | ||
# Luca Colagrande <[email protected]> | ||
|
||
import numpy as np | ||
|
@@ -21,9 +21,24 @@ class CorrelationDataGen(du.DataGen): | |
def golden_model(self, data): | ||
return np.corrcoef(data, rowvar=False) | ||
|
||
def validate(self, M, N, **kwargs): | ||
assert (M % 8) == 0, "M must be an integer multiple of the number of cores" | ||
|
||
# Calculate total TCDM occupation | ||
data_size = N * M * 8 | ||
corr_size = M * M * 8 | ||
stddev_size = M * 8 | ||
total_size = data_size | ||
total_size += corr_size | ||
total_size += stddev_size | ||
du.validate_tcdm_footprint(total_size) | ||
|
||
def emit_header(self, **kwargs): | ||
header = [super().emit_header()] | ||
|
||
# Validate parameters | ||
self.validate(**kwargs) | ||
|
||
M, N = kwargs['M'], kwargs['N'] | ||
data = du.generate_random_array((N, M)) | ||
corr = self.golden_model(data) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// Copyright 2024 ETH Zurich and University of Bologna. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
// Author: Luca Colagrande <[email protected]> | ||
|
||
#pragma once | ||
#include <stdint.h> | ||
|
||
typedef struct { | ||
uint32_t N; | ||
uint32_t M; | ||
uint64_t data_addr; | ||
uint64_t corr_addr; | ||
} correlation_args_t; |
Oops, something went wrong.