Skip to content

Commit

Permalink
[apps, scripts] Adjust kernel performance
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Nov 28, 2023
1 parent dd26ba3 commit 90588c3
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion apps/fft/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ int main() {
stop_timer();
runtime = get_timer();

float perf = (float)10.0 * NFFT * (31 - __builtin_clz(NFFT)) / runtime;
float perf = (float)5.0 * NFFT * (31 - __builtin_clz(NFFT)) / runtime;
float max_perf = 6.0 / 5.0 * NR_LANES * 8.0 / sizeof(float);

printf("Performance: %f. Max perf: %f. Actual performance is %f%% of max.\n",
Expand Down
1 change: 1 addition & 0 deletions apps/jacobi2d/kernel/jacobi2d.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ WITH ACCESS OR USE OF THE SOFTWARE.
// Author: Matteo Perotti, ETH Zurich, <[email protected]>

#include "jacobi2d.h"
#define DOUBLE_BUFFERING

void j2d_s(uint64_t r, uint64_t c, DATA_TYPE *A, DATA_TYPE *B,
uint64_t tsteps) {
Expand Down
2 changes: 1 addition & 1 deletion apps/roi_align/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

// Execute only the central kernel with fake data
// Execute only on the channels
//#define V_KERNEL_ONLY 1
#define V_KERNEL_ONLY 1

#define EXTRAPOLATION_VALUE 0

Expand Down
4 changes: 2 additions & 2 deletions scripts/performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def fconv3d(args, cycles):
def jacobi2d(args, cycles):
size = int(args[0])
trash_0 = args[1]
performance = 5 * (size-1) * (size-1) / cycles
performance = 2 * 5 * (size-1) * (size-1) / cycles
return [size, performance]
def dropout(args, cycles):
size = int(args[0])
Expand All @@ -71,7 +71,7 @@ def dropout(args, cycles):
def fft(args, cycles):
size = int(args[0])
dtype = args[1]
performance = 10 * size * np.log2(size) / cycles
performance = 5 * size * np.log2(size) / cycles
return [size, performance]
def dwt(args, cycles):
size = int(args[0])
Expand Down

0 comments on commit 90588c3

Please sign in to comment.