diff --git a/apps/benchmarks/benchmark/fconv3d.bmark b/apps/benchmarks/benchmark/fconv3d.bmark index b2af37e3e..9e3186a03 100644 --- a/apps/benchmarks/benchmark/fconv3d.bmark +++ b/apps/benchmarks/benchmark/fconv3d.bmark @@ -43,7 +43,11 @@ void warm_caches(uint64_t heat) { volatile double buf; for (uint64_t k = 0; k < heat; ++k) +#ifndef SCALAR fconv3d_CHx7x7(o, i, f, M, N, CH, F); +#else + fconv3d_CVxFxF(o, i, f, M, N, CH, F); +#endif // The following artificial warming ensures, with a larger cache, // not to experience any cache misses #ifdef AD_HOC_WARMING @@ -64,7 +68,11 @@ int main() { HW_CNT_READY; start_timer(); if (F == 7) +#ifndef SCALAR fconv3d_CHx7x7(o, i, f, M, N, CH, F); +#else + fconv3d_CVxFxF(o, i, f, M, N, CH, F); +#endif else { printf("Error: the filter size is different from 7.\n"); return -1; diff --git a/apps/fconv3d/fconv3d.h b/apps/fconv3d/fconv3d.h index 26b24d659..15a2d4986 100644 --- a/apps/fconv3d/fconv3d.h +++ b/apps/fconv3d/fconv3d.h @@ -22,6 +22,9 @@ #include #include +double* conv3d_CHxFxF_scalar(double *o, double *i, double *f, int64_t M, int64_t N, + int64_t C, int64_t F); + void fconv3d_CHx7x7(double *o, double *i, double *f, int64_t M, int64_t N, int64_t C, int64_t F); diff --git a/apps/fconv3d/fconv3d_3x7x7.c b/apps/fconv3d/fconv3d_3x7x7.c index d4da1b9e9..c418a2f14 100644 --- a/apps/fconv3d/fconv3d_3x7x7.c +++ b/apps/fconv3d/fconv3d_3x7x7.c @@ -52,6 +52,41 @@ extern int64_t event_trigger; +// a - 2D matrix (as a 1D array), w - kernel +double* conv3d_CHxFxF_scalar(double *o, double *i, double *f, int64_t M, int64_t N, + int64_t C, int64_t F) { + double acc; + int i; + int j; + int k1, k2; + int l1, l2; + int t1, t2; + int ch; + + for(i = 0; i < (M + 2*F - 1); i++) + { + t1 = i * N; // loop invariants + for(j = 0; j < (N + 2*F - 1); j++) + { + acc = 0.0; + for (ch = 0; ch < C; ch++) + { + for(k1 = F - 1, k2 = 0; k1 >= 0; k1--, k2++) + { + t2 = k1 * F; // loop invariants + for(l1 = F - 1, l2 = 0; l1 >= 0; l1--, l2++) + { + acc += w[t2 + l1 + F * F * ch] * a[(i + k2) * N + (j + l2) + (M + 2*F - 1) * (N + 2*F - 1) * ch]; + } + } + } + result[t1 + j] = acc; + } + } + + return result; +} + void fconv3d_CHx7x7(double *o, double *i, double *f, int64_t M, int64_t N, int64_t C, int64_t F) { diff --git a/apps/fconv3d/main.c b/apps/fconv3d/main.c index 991691e29..7f94db061 100644 --- a/apps/fconv3d/main.c +++ b/apps/fconv3d/main.c @@ -85,7 +85,11 @@ int main() { // Call the main kernel, and measure cycles start_timer(); if (F == 7) +#ifndef SCALAR fconv3d_CHx7x7(o, i, f, M, N, CH, F); +#else + fconv3d_CHxFxF_scalar(golden_o, i, f, M, N, CH, F); +#endif else printf("Error: the filter size is different from 7.\n"); stop_timer();