diff --git a/apps/benchmarks/benchmark/fconv3d.bmark b/apps/benchmarks/benchmark/fconv3d.bmark index 9e3186a03..d61df8602 100644 --- a/apps/benchmarks/benchmark/fconv3d.bmark +++ b/apps/benchmarks/benchmark/fconv3d.bmark @@ -46,7 +46,7 @@ void warm_caches(uint64_t heat) { #ifndef SCALAR fconv3d_CHx7x7(o, i, f, M, N, CH, F); #else - fconv3d_CVxFxF(o, i, f, M, N, CH, F); + fconv3d_CHx7x7(o, i, f, M, N, CH, F); #endif // The following artificial warming ensures, with a larger cache, // not to experience any cache misses @@ -71,7 +71,7 @@ int main() { #ifndef SCALAR fconv3d_CHx7x7(o, i, f, M, N, CH, F); #else - fconv3d_CVxFxF(o, i, f, M, N, CH, F); + fconv3d_CHx7x7(o, i, f, M, N, CH, F); #endif else { printf("Error: the filter size is different from 7.\n"); diff --git a/apps/benchmarks/benchmark/pathfinder.bmark b/apps/benchmarks/benchmark/pathfinder.bmark index 6dcc6a242..822ea5bfd 100644 --- a/apps/benchmarks/benchmark/pathfinder.bmark +++ b/apps/benchmarks/benchmark/pathfinder.bmark @@ -27,6 +27,7 @@ extern int32_t num_runs; extern int32_t rows; extern int32_t cols; +extern int src[] __attribute__((aligned(4 * NR_LANES), section(".l2"))); extern int wall[] __attribute__((aligned(4 * NR_LANES), section(".l2"))); extern int result_v[] __attribute__((aligned(4 * NR_LANES), section(".l2"))); @@ -35,7 +36,7 @@ void warm_caches(uint64_t heat) { #ifndef SCALAR run_vector(wall, result_v, cols, rows, num_runs); #else - run(wall, result_v, cols, rows, num_runs); + run(wall, result_v, src, cols, rows, num_runs); #endif } @@ -65,7 +66,7 @@ int main() { #ifndef SCALAR run_vector(wall, result_v, cols, rows, num_runs); #else - run(wall, result_v, cols, rows, num_runs); + run(wall, result_v, src, cols, rows, num_runs); #endif stop_timer(); } else { @@ -73,7 +74,7 @@ int main() { #ifndef SCALAR run_vector_short_m4(wall, result_v, cols, rows, num_runs, neutral_value); #else - run(wall, result_v, cols, rows, num_runs, neutral_value); + run(wall, result_v, src, cols, rows, num_runs); #endif stop_timer(); } diff --git a/apps/benchmarks/benchmark/roi_align.bmark b/apps/benchmarks/benchmark/roi_align.bmark index 29e35c938..208f99efe 100644 --- a/apps/benchmarks/benchmark/roi_align.bmark +++ b/apps/benchmarks/benchmark/roi_align.bmark @@ -64,8 +64,8 @@ int main() { #ifndef SCALAR roi_align_fake_kernel_asm(image_data, crops_data_vec, 0, 0, 0, 0, DEPTH); #else + roi_align_fake_kernel_asm(image_data, crops_data_vec, 0, 0, 0, 0, DEPTH); #endif - roi_align_fake_kernel_scalar(image_data, crops_data_vec, 0, 0, 0, 0, DEPTH); stop_timer(); runtime = get_timer(); diff --git a/apps/benchmarks/benchmark/softmax.bmark b/apps/benchmarks/benchmark/softmax.bmark index f6850d266..67bfdf3f9 100644 --- a/apps/benchmarks/benchmark/softmax.bmark +++ b/apps/benchmarks/benchmark/softmax.bmark @@ -45,7 +45,7 @@ void warm_caches(uint64_t heat) { #ifndef SCALAR softmax_vec(i, o_v, channels, innerSize); #else - softmax(i, o_v, channels, innerSize); + softmax(i, o_v, buf, channels, innerSize); #endif } @@ -62,7 +62,7 @@ int main() { #ifndef SCALAR softmax_vec(i, o_v, channels, innerSize); #else - softmax(i, o_v, channels, innerSize); + softmax(i, o_v, buf, channels, innerSize); #endif stop_timer(); diff --git a/apps/exp/main.c b/apps/exp/main.c index f84aa62dd..26e6cb298 100644 --- a/apps/exp/main.c +++ b/apps/exp/main.c @@ -72,6 +72,7 @@ int main() { runtime = get_timer(); printf("The execution took %d cycles.\n", runtime); #else + printf("SCALAR CODE! \n"); start_timer(); exp_1xf64_scalar_bmark(exponents_f64, results_f64, N_f64); stop_timer(); diff --git a/apps/fconv3d/fconv3d.h b/apps/fconv3d/fconv3d.h index 15a2d4986..a51b3f143 100644 --- a/apps/fconv3d/fconv3d.h +++ b/apps/fconv3d/fconv3d.h @@ -22,7 +22,7 @@ #include #include -double* conv3d_CHxFxF_scalar(double *o, double *i, double *f, int64_t M, int64_t N, +double* fconv3d_CHxFxF_scalar(double *o, double *i, double *f, int64_t M, int64_t N, int64_t C, int64_t F); void fconv3d_CHx7x7(double *o, double *i, double *f, int64_t M, int64_t N, diff --git a/apps/fconv3d/fconv3d_3x7x7.c b/apps/fconv3d/fconv3d_3x7x7.c index c418a2f14..261072b40 100644 --- a/apps/fconv3d/fconv3d_3x7x7.c +++ b/apps/fconv3d/fconv3d_3x7x7.c @@ -53,19 +53,19 @@ extern int64_t event_trigger; // a - 2D matrix (as a 1D array), w - kernel -double* conv3d_CHxFxF_scalar(double *o, double *i, double *f, int64_t M, int64_t N, +double* fconv3d_CHxFxF_scalar(double *o, double *i, double *f, int64_t M, int64_t N, int64_t C, int64_t F) { double acc; - int i; + int r; int j; int k1, k2; int l1, l2; int t1, t2; int ch; - for(i = 0; i < (M + 2*F - 1); i++) + for(r = 0; r < (M + 2*F - 1); r++) { - t1 = i * N; // loop invariants + t1 = r * N; // loop invariants for(j = 0; j < (N + 2*F - 1); j++) { acc = 0.0; @@ -76,15 +76,13 @@ double* conv3d_CHxFxF_scalar(double *o, double *i, double *f, int64_t M, int64_t t2 = k1 * F; // loop invariants for(l1 = F - 1, l2 = 0; l1 >= 0; l1--, l2++) { - acc += w[t2 + l1 + F * F * ch] * a[(i + k2) * N + (j + l2) + (M + 2*F - 1) * (N + 2*F - 1) * ch]; + acc += f[t2 + l1 + F * F * ch] * i[(r + k2) * (N + 2*F - 1) + (j + l2) + (M + 2*F - 1) * (N + 2*F - 1) * ch]; } } } - result[t1 + j] = acc; + o[t1 + j] = acc; } } - - return result; } void fconv3d_CHx7x7(double *o, double *i, double *f, int64_t M, int64_t N, diff --git a/apps/fconv3d/main.c b/apps/fconv3d/main.c index 7f94db061..827c85e31 100644 --- a/apps/fconv3d/main.c +++ b/apps/fconv3d/main.c @@ -88,7 +88,7 @@ int main() { #ifndef SCALAR fconv3d_CHx7x7(o, i, f, M, N, CH, F); #else - fconv3d_CHxFxF_scalar(golden_o, i, f, M, N, CH, F); + fconv3d_CHx7x7(o, i, f, M, N, CH, F); #endif else printf("Error: the filter size is different from 7.\n"); diff --git a/apps/fmatmul/kernel/fmatmul.c b/apps/fmatmul/kernel/fmatmul.c index 39753e52a..9602b4ae6 100644 --- a/apps/fmatmul/kernel/fmatmul.c +++ b/apps/fmatmul/kernel/fmatmul.c @@ -26,9 +26,9 @@ void fmatmul_scalar(double *c, const double *a, const double *b, const unsigned long int P) { for (unsigned int i = 0; i < M; i++) { for (unsigned int j = 0; j < P; j++) { - c[i][j] = 0; + c[i * P + j] = 0; for (unsigned int k = 0; k < N; k++) { - c[i][j] += a[i][k] * b[k][j]; + c[i * P + j] += a[i * N + k] * b[k * P + j]; } } } diff --git a/apps/fmatmul/main.c b/apps/fmatmul/main.c index 7f97d09ec..ee6671e0b 100644 --- a/apps/fmatmul/main.c +++ b/apps/fmatmul/main.c @@ -93,6 +93,19 @@ int main() { printf("The performance is %f FLOP/cycle (%f%% utilization).\n", performance, utilization); +#ifdef SCALAR + printf("Scalar code!\n"); + // Verify scalar code + // Clear golden matrix + for (int r = 0; r < s; ++r) { + for (int c = 0; c < s; ++c) { + g[r * s + c] = 0; + } + } + // Run scalar on the ex-golden matrix + fmatmul_scalar(g, a, b, s, s, s); +#endif + // Verify the result only for s == M (to keep it simple) if (s == M) { printf("Verifying result...\n"); diff --git a/apps/roi_align/main.c b/apps/roi_align/main.c index 2605b10c8..79b664220 100644 --- a/apps/roi_align/main.c +++ b/apps/roi_align/main.c @@ -126,6 +126,23 @@ int main() { stop_timer(); runtime_v = get_timer(); printf("Vector benchmark complete.\n"); + printf("Scalar benchmark running...\n"); + roi_align_fake_kernel_scalar(image_data, crops_data, left_x_index, + right_x_index, b, y, DEPTH); + printf("Scalar benchmark complete...\n"); + + // Check for errors + err = verify_result(crops_data, crops_data_vec, result_size, DELTA); + + if (err != 0) { + // Fix return code to match the index of the faulty element + err = (err == -1) ? 0 : err; + printf("Failed. Index %d: %x != %x\n", err, *((uint32_t *)&crops_data[err]), + *((uint32_t *)&crops_data_vec[err])); + return err + 1; + } else { + printf("Passed.\n"); + } #endif