-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add benchmark data and plots * Add benchmark page to docs * Update benchmarks/README.md * Add alt text
- Loading branch information
Showing
27 changed files
with
7,941 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[deps] | ||
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" | ||
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" | ||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" | ||
FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" | ||
FINUFFT = "d8beea63-0952-562e-9c6a-8e8ef7364055" | ||
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" | ||
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" | ||
NonuniformFFTs = "cd96f58b-6017-4a02-bb9e-f4d81626177f" | ||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | ||
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" | ||
ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042" | ||
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" | ||
|
||
[extras] | ||
CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Benchmarks | ||
|
||
This directory contains scripts for executing benchmarks (`run_benchmarks.jl`) | ||
and generated plotting results (`plots/plot_benchmarks.jl`). | ||
|
||
It also contains raw benchmark results (in `results`) and their associated | ||
plots (`plots/*.svg`) which are discussed in the Benchmarks section of the docs. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[deps] | ||
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" | ||
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" | ||
GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
using GLMakie | ||
using CairoMakie | ||
using DelimitedFiles | ||
|
||
GLMakie.activate!() | ||
|
||
function read_timings(io::IO; nufft_type::Int) | ||
while Char(peek(io)) == '#' | ||
readline(io) | ||
end | ||
data = readdlm(io, Float64) | ||
(; Np = @view(data[:, 1]), times = @view(data[:, 1 + nufft_type]),) | ||
end | ||
|
||
read_timings(fname::AbstractString; kws...) = open(io -> read_timings(io; kws...), fname) | ||
|
||
function plot_from_file!(ax::Axis, filename; nufft_type, zorder = nothing, kws...) | ||
data = read_timings(filename; nufft_type) | ||
(; Np, times,) = data | ||
l = scatterlines!(ax, Np, times; kws...) | ||
l.strokecolor[] = l.color[] | ||
if zorder !== nothing | ||
translate!(l, 0, 0, zorder) | ||
end | ||
l | ||
end | ||
|
||
function plot_benchmark(::Type{T}; nufft_type = 1, save_svg = false,) where {T <: Number} | ||
Ns = (256, 256, 256) | ||
N = first(Ns) | ||
Ngrid = prod(Ns) | ||
Z = complex(T) # for FINUFFT (complex data only) | ||
|
||
fig = Figure(size = (750, 440)) | ||
ax = Axis( | ||
fig[1, 1]; | ||
xscale = log10, yscale = log10, | ||
xlabel = L"Number of nonuniform points $N$", xlabelsize = 16, | ||
ylabel = "Time (s)", | ||
xticks = LogTicks(0:20), xminorticks = IntervalsBetween(9), xminorticksvisible = true, | ||
yticks = LogTicks(-8:3), yminorticks = IntervalsBetween(9), yminorticksvisible = true, | ||
xgridvisible = false, ygridvisible = false, | ||
limits = (nothing, (1e-3, 1e1)), | ||
) | ||
limits_top = lift(ax.finallimits) do lims | ||
xlims = Makie.xlimits(lims) ./ Ngrid | ||
ylims = Makie.ylimits(lims) | ||
xlims, ylims | ||
end | ||
ax_top = Axis( | ||
fig[1, 1]; | ||
xscale = ax.xscale, yscale = ax.yscale, | ||
xaxisposition = :top, | ||
# xgridvisible = true, ygridvisible = false, | ||
xticks = LogTicks(-8:2), xminorticks = IntervalsBetween(9), xminorticksvisible = true, | ||
xlabel = L"Point density $ρ = N / M^3$", xlabelsize = 16, | ||
limits = limits_top, | ||
) | ||
hidespines!(ax_top) | ||
hideydecorations!(ax_top; grid = false) | ||
|
||
colours = Makie.wong_colors() | ||
|
||
kws_all = (; nufft_type,) | ||
kws_cpu = (; marker = :x, markersize = 16, strokewidth = 0,) | ||
kws_gpu = (; marker = :circle, markersize = 10, strokewidth = 2,) | ||
kws_gpu_sm = (; kws_gpu..., markercolor = :transparent,) # open symbols | ||
kws_nonuniform = (; linestyle = :solid, color = colours[1], zorder = 10,) | ||
kws_finufft = (; linestyle = :dash, color = colours[2],) | ||
|
||
# Leftmost point of all CPU/GPU curves (for annotating later) | ||
first_points_cpu = Point2{Float64}[] | ||
first_points_gpu = Point2{Float64}[] | ||
last_points_gpu = Point2{Float64}[] | ||
|
||
l = plot_from_file!(ax, "../results/NonuniformFFTs_$(N)_$(T)_CPU.dat"; label = "NonuniformFFTs CPU", kws_nonuniform..., kws_cpu..., kws_all...) | ||
push!(first_points_cpu, l[1][][1]) # get first datapoint in line | ||
|
||
l = plot_from_file!(ax, "../results/NonuniformFFTs_$(N)_$(T)_CUDABackend_global_memory.dat"; label = "NonuniformFFTs GPU", kws_nonuniform..., kws_gpu..., kws_all...) | ||
push!(first_points_gpu, l[1][][1]) # get first datapoint in line | ||
push!(last_points_gpu, l[1][][end]) # get last datapoint in line | ||
|
||
l = plot_from_file!(ax, "../results/NonuniformFFTs_$(N)_$(T)_CUDABackend_shared_memory.dat"; label = "NonuniformFFTs GPU (SM)", kws_nonuniform..., kws_gpu_sm..., kws_all...) | ||
push!(first_points_gpu, l[1][][1]) # get first datapoint in line | ||
push!(last_points_gpu, l[1][][end]) # get last datapoint in line | ||
|
||
l = plot_from_file!(ax, "../results/FINUFFT_$(N)_$(Z)_CPU.dat"; label = "FINUFFT CPU", kws_finufft..., kws_cpu..., kws_all...) | ||
push!(first_points_cpu, l[1][][1]) # get first datapoint in line | ||
|
||
l = plot_from_file!(ax, "../results/CuFINUFFT_$(N)_$(Z)_global_memory.dat"; label = "CuFINUFFT GPU", kws_finufft..., kws_gpu..., kws_all...) | ||
push!(first_points_gpu, l[1][][1]) # get first datapoint in line | ||
push!(last_points_gpu, l[1][][end]) # get last datapoint in line | ||
|
||
l = plot_from_file!(ax, "../results/CuFINUFFT_$(N)_$(Z)_shared_memory.dat"; label = "CuFINUFFT GPU (SM)", kws_finufft..., kws_gpu_sm..., kws_all...) | ||
push!(first_points_gpu, l[1][][1]) # get first datapoint in line | ||
push!(last_points_gpu, l[1][][end]) # get last datapoint in line | ||
|
||
let points = first_points_cpu, text = "CPU" | ||
x = minimum(p -> p[1], points) # generally all points x are the same | ||
y = maximum(p -> p[2], points) | ||
text!(ax, x, y; text = rich(text; font = :bold), align = (:left, :bottom), offset = (0, 8), fontsize = 16) | ||
end | ||
|
||
let points = first_points_gpu, text = "GPU" | ||
x = minimum(p -> p[1], points) # generally all points x are the same | ||
y = maximum(p -> p[2], points) | ||
text!(ax, x, y; text = rich(text; font = :bold), align = (:left, :bottom), offset = (0, 8), fontsize = 16) | ||
end | ||
|
||
let xs = logrange(0.5, 10.0; length = 3) | ||
ymin, n = findmin(p -> p[2], last_points_gpu) | ||
xmin = last_points_gpu[n][1] / Ngrid # as a density ρ | ||
scale = ymin / xmin * 0.7 | ||
ys = @. xs * scale | ||
lines!(ax_top, xs, ys; linestyle = :dash, color = :black, linewidth = 3) | ||
text!(ax_top, xs[2], ys[2]; text = L"∼N", align = (:left, :top), fontsize = 18) | ||
end | ||
|
||
Label( | ||
fig[1, 2][1, 1], | ||
""" | ||
Type-$(nufft_type) NUFFT | ||
$(N)³ Fourier modes | ||
$T data | ||
6-digit accuracy | ||
"""; | ||
justification = :left, fontsize = 16, lineheight = 1.2, | ||
) | ||
Legend(fig[1, 2][2, 1], ax; framevisible = false, rowgap = 8, labelsize = 14) | ||
|
||
if save_svg | ||
save("benchmark_$(T)_type$(nufft_type).svg", fig; backend = CairoMakie) | ||
end | ||
|
||
fig | ||
end | ||
|
||
for T ∈ (Float64, ComplexF64), nufft_type ∈ (1, 2) | ||
plot_benchmark(T; nufft_type, save_svg = true) | ||
end | ||
|
||
fig = plot_benchmark(ComplexF64; nufft_type = 1) |
22 changes: 22 additions & 0 deletions
22
benchmarks/results/CuFINUFFT_256_ComplexF64_global_memory.dat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# CuFINUFFT | ||
# Benchmark: NUFFT of scalar data | ||
# - Device: NVIDIA A100 80GB PCIe | ||
# - Element type: ComplexF64 | ||
# - Grid size: (256, 256, 256) | ||
# - Relative tolerance: 1.0e-6 | ||
# - GPU sort: 1 | ||
# - GPU kernel evaluation method: 1 | ||
# - Order of Fourier modes (modeord): 1 | ||
# - GPU method: 1 (global_memory) | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.010049466 0.010189318 1.3870120840117604e-6 3.1010784379441936e-7 | ||
5305 0.010149413 0.0102016 1.3863655070435886e-6 3.1806259113920807e-7 | ||
16777 0.010432813 0.0102544095 1.3870558057308985e-6 3.4374717946022066e-7 | ||
53054 0.01117762 0.010611818 1.3888666894696818e-6 4.022040070391591e-7 | ||
167772 0.01320493 0.011400021 1.3864687098220945e-6 5.396416394710086e-7 | ||
530542 0.017729944 0.013321508 1.3879307982260866e-6 8.077188843675696e-7 | ||
1677722 0.030144937 0.0174889445 1.3883252357972235e-6 1.1943118827017768e-6 | ||
5305422 0.071517286 0.031759459 1.3881328727539812e-6 1.57036542866348e-6 | ||
16777216 0.2098746265 0.070962529 1.3868072524375616e-6 1.803777670735224e-6 | ||
53054215 0.741148733 0.182403937 1.388143599891377e-6 1.9030418314879922e-6 | ||
167772160 2.606663643 0.5031452625 1.387107956477105e-6 1.9385382768826457e-6 |
22 changes: 22 additions & 0 deletions
22
benchmarks/results/CuFINUFFT_256_ComplexF64_shared_memory.dat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# CuFINUFFT | ||
# Benchmark: NUFFT of scalar data | ||
# - Device: NVIDIA A100 80GB PCIe | ||
# - Element type: ComplexF64 | ||
# - Grid size: (256, 256, 256) | ||
# - Relative tolerance: 1.0e-6 | ||
# - GPU sort: 1 | ||
# - GPU kernel evaluation method: 1 | ||
# - Order of Fourier modes (modeord): 1 | ||
# - GPU method: 2 (shared_memory) | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.011692011 0.010875831 1.387012084011762e-6 3.101078437955394e-7 | ||
5305 0.014491329 0.011954708 1.3863655070436594e-6 3.180625911384002e-7 | ||
16777 0.022250966 0.014949265 1.3870558057312356e-6 3.437471794564674e-7 | ||
53054 0.0403717795 0.021438011 1.3888666894692987e-6 4.0220400703767747e-7 | ||
167772 0.076147689 0.032283476 1.3864687098221654e-6 5.39641639470162e-7 | ||
530542 0.1424077785 0.046160006 1.3879307982261694e-6 8.07718884367156e-7 | ||
1677722 0.24217535 0.061705163 1.3883252357980463e-6 1.1943118827025802e-6 | ||
5305422 0.3729179835 0.077908472 1.3881328727557852e-6 1.5703654286660455e-6 | ||
16777216 0.59926713 0.099986922 1.3868072524389908e-6 1.8037776707365977e-6 | ||
53054215 1.140496802 0.152142785 1.3881435998922012e-6 1.9030418314885585e-6 | ||
167772160 2.7159741345 0.309835595 1.3871079564877375e-6 1.9385382768972833e-6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# FINUFFT (CPU) | ||
# Benchmark: NUFFT of scalar data | ||
# - Device: AMD EPYC 7302 16-Core Processor (znver2, 32 threads | ||
# - Number of threads: 32 | ||
# - Element type: ComplexF64 | ||
# - Grid size: (256, 256, 256) | ||
# - Relative tolerance: 1.0e-6 | ||
# - Sort (spread_sort): 1 | ||
# - Kernel evaluation method (spread_kerevalmeth): 1 | ||
# - Order of Fourier modes (modeord): 1 | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.5005381625 0.253770516 3.731950120546327e-6 6.646216249772271e-7 | ||
5305 0.552043342 0.239666509 3.7290631645038357e-6 6.72066525127654e-7 | ||
16777 0.577708378 0.261397829 3.734294780588139e-6 7.464022115781271e-7 | ||
53054 0.673337338 0.274950167 3.7239928569652815e-6 9.119287514989875e-7 | ||
167772 0.729330451 0.283593921 3.729391822696644e-6 1.3275103721977031e-6 | ||
530542 0.738622538 0.32117297 3.7359429392970702e-6 2.0583360832214665e-6 | ||
1677722 0.800928979 0.368688791 3.7286290440589845e-6 3.078078920315979e-6 | ||
5305422 0.9500394215 0.5086966405 3.7276066153558594e-6 4.075858583640244e-6 | ||
16777216 1.219703752 0.916775313 3.7307501803740638e-6 4.693500207626813e-6 | ||
53054215 2.09705826 1.437311869 3.724478960801549e-6 4.947480478356764e-6 | ||
167772160 4.7336734635 3.0110193405 3.7328363128680543e-6 5.056647405745642e-6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# NonuniformFFTs.jl using CPU | ||
# Benchmark: NUFFT of scalar data | ||
# - Backend: CPU | ||
# - Device: AMD EPYC 7302 16-Core Processor (znver2, 32 threads | ||
# - Element type: ComplexF64 | ||
# - Grid size: (256, 256, 256) | ||
# - Oversampling factor: 1.5 | ||
# - Half support: HalfSupport{4}() | ||
# - Number of threads: 32 | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.401702684 0.217824823 1.281824057186365e-6 3.011939154273146e-7 | ||
5305 0.478089718 0.226900624 1.277407211115232e-6 3.060539615508211e-7 | ||
16777 0.556644054 0.226762416 1.2776077397706963e-6 3.201846369029206e-7 | ||
53054 0.6612915435 0.24357653 1.2839845138440086e-6 3.7866796040564127e-7 | ||
167772 0.705163086 0.264036265 1.286857150143559e-6 5.066732411096722e-7 | ||
530542 0.791394353 0.282972265 1.2874042722043801e-6 7.458672638907114e-7 | ||
1677722 0.803317388 0.3235546355 1.2875233459096014e-6 1.0983844251011532e-6 | ||
5305422 0.890094409 0.4209180075 1.285417860634058e-6 1.4413882842227143e-6 | ||
16777216 1.108990892 0.875500309 1.2870935078768847e-6 1.65653108658738e-6 | ||
53054215 1.807307983 1.752785424 1.2862609599745603e-6 1.745912493773992e-6 | ||
167772160 5.076364579 4.7441131055 1.2854795078661256e-6 1.7777658759107162e-6 |
21 changes: 21 additions & 0 deletions
21
benchmarks/results/NonuniformFFTs_256_ComplexF64_CUDABackend_global_memory.dat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# NonuniformFFTs.jl using CUDABackend | ||
# Benchmark: NUFFT of scalar data | ||
# - Backend: CUDABackend | ||
# - Device: NVIDIA A100 80GB PCIe | ||
# - Element type: ComplexF64 | ||
# - Grid size: (256, 256, 256) | ||
# - Oversampling factor: 1.5 | ||
# - Half support: HalfSupport{4}() | ||
# - GPU method: global_memory | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.007174481 0.0069437445 1.7461299210416866e-6 3.51735849884217e-7 | ||
5305 0.007340271 0.006967824 1.7448299030724668e-6 3.5935428480849213e-7 | ||
16777 0.007364155 0.006959589 1.7441440240783446e-6 3.8526407361623966e-7 | ||
53054 0.008016956 0.0071371565 1.7459043209583552e-6 4.681070024635566e-7 | ||
167772 0.01129348 0.0080823985 1.744026447406056e-6 6.545796093721018e-7 | ||
530542 0.018176627 0.0098429595 1.7433317941794872e-6 9.95894287098444e-7 | ||
1677722 0.036135638 0.013180793 1.7455342688942634e-6 1.485682919911522e-6 | ||
5305422 0.095645657 0.023485831 1.743499377366084e-6 1.961321173351077e-6 | ||
16777216 0.308876142 0.057238519 1.7457314354108132e-6 2.2579240564636625e-6 | ||
53054215 1.102950989 0.166080969 1.742616495241337e-6 2.3789078434566558e-6 | ||
167772160 3.9515899275 0.51076222 1.743980969237664e-6 2.4261231727166925e-6 |
21 changes: 21 additions & 0 deletions
21
benchmarks/results/NonuniformFFTs_256_ComplexF64_CUDABackend_shared_memory.dat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# NonuniformFFTs.jl using CUDABackend | ||
# Benchmark: NUFFT of scalar data | ||
# - Backend: CUDABackend | ||
# - Device: NVIDIA A100 80GB PCIe | ||
# - Element type: ComplexF64 | ||
# - Grid size: (256, 256, 256) | ||
# - Oversampling factor: 1.5 | ||
# - Half support: HalfSupport{4}() | ||
# - GPU method: shared_memory | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.017566898 0.013264335 1.7461299210424742e-6 3.517358498891171e-7 | ||
5305 0.018137655 0.013418704 1.7448299030716066e-6 3.59354284813164e-7 | ||
16777 0.019053637 0.013943315 1.7441440240776934e-6 3.852640736163668e-7 | ||
53054 0.020030834 0.014643423 1.7459043209587112e-6 4.681070024594046e-7 | ||
167772 0.021095564 0.015789195 1.7440264474057985e-6 6.545796093692288e-7 | ||
530542 0.022440168 0.0175126215 1.7433317941788803e-6 9.958942870956675e-7 | ||
1677722 0.026032583 0.020145207 1.7455342688943773e-6 1.4856829199119722e-6 | ||
5305422 0.0378931535 0.0263968385 1.7434993773628165e-6 1.9613211733482794e-6 | ||
16777216 0.078095254 0.046994287 1.7457314354124795e-6 2.2579240564680298e-6 | ||
53054215 0.2090714075 0.114592533 1.7426164952268956e-6 2.3789078434342945e-6 | ||
167772160 0.6257227505 0.3285903345 1.743980969245702e-6 2.426123172726822e-6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# NonuniformFFTs.jl using CPU | ||
# Benchmark: NUFFT of scalar data | ||
# - Backend: CPU | ||
# - Device: AMD EPYC 7302 16-Core Processor (znver2, 32 threads | ||
# - Element type: Float64 | ||
# - Grid size: (256, 256, 256) | ||
# - Oversampling factor: 1.5 | ||
# - Half support: HalfSupport{4}() | ||
# - Number of threads: 32 | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.157921409 0.057483433 1.2992421887135856e-6 3.0117054596563075e-7 | ||
5305 0.185425857 0.054794236 1.2840271425855802e-6 2.9623411061857803e-7 | ||
16777 0.2107738795 0.060259554 1.2883263838044824e-6 3.169193033522629e-7 | ||
53054 0.259005555 0.067185219 1.2977247862698859e-6 3.801684137736015e-7 | ||
167772 0.299163513 0.082973939 1.2934667124620822e-6 5.032368971604971e-7 | ||
530542 0.295613095 0.093088267 1.2963805116600738e-6 7.476721873945614e-7 | ||
1677722 0.338260935 0.122719703 1.296796876868053e-6 1.1055503659183577e-6 | ||
5305422 0.4419285785 0.217629578 1.2973073932578797e-6 1.4521190807869027e-6 | ||
16777216 0.650143119 0.586211104 1.2963264136956878e-6 1.6655878400956973e-6 | ||
53054215 1.364857875 1.474264205 1.297942847667679e-6 1.7599064232383652e-6 |
21 changes: 21 additions & 0 deletions
21
benchmarks/results/NonuniformFFTs_256_Float64_CUDABackend_global_memory.dat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# NonuniformFFTs.jl using CUDABackend | ||
# Benchmark: NUFFT of scalar data | ||
# - Backend: CUDABackend | ||
# - Device: NVIDIA A100 80GB PCIe | ||
# - Element type: Float64 | ||
# - Grid size: (256, 256, 256) | ||
# - Oversampling factor: 1.5 | ||
# - Half support: HalfSupport{4}() | ||
# - GPU method: global_memory | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.003320917 0.003230889 1.7468019419364787e-6 3.5090636814544796e-7 | ||
5305 0.003324334 0.003240768 1.7436883516306644e-6 3.569900784629132e-7 | ||
16777 0.003340074 0.003251238 1.7428919558873212e-6 3.933815099546998e-7 | ||
53054 0.003735293 0.003365211 1.74393734550516e-6 4.7633118362671775e-7 | ||
167772 0.00541354 0.00402824 1.7443022952469846e-6 6.543784470139837e-7 | ||
530542 0.008519906 0.006309474 1.7447103632569267e-6 9.96697648602348e-7 | ||
1677722 0.01911457 0.008761147 1.7430259600594823e-6 1.4888903328036116e-6 | ||
5305422 0.051812251 0.015686412 1.7458544701717223e-6 1.9669560205696403e-6 | ||
16777216 0.176896208 0.039465031 1.7465137652756605e-6 2.2609508058937542e-6 | ||
53054215 0.6754469325 0.1147949335 1.7441674213003973e-6 2.3841887129635894e-6 | ||
167772160 2.503391267 0.351748967 1.7445912696046888e-6 2.428869915799653e-6 |
21 changes: 21 additions & 0 deletions
21
benchmarks/results/NonuniformFFTs_256_Float64_CUDABackend_shared_memory.dat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# NonuniformFFTs.jl using CUDABackend | ||
# Benchmark: NUFFT of scalar data | ||
# - Backend: CUDABackend | ||
# - Device: NVIDIA A100 80GB PCIe | ||
# - Element type: Float64 | ||
# - Grid size: (256, 256, 256) | ||
# - Oversampling factor: 1.5 | ||
# - Half support: HalfSupport{4}() | ||
# - GPU method: shared_memory | ||
# (1) Number of points (2) Type 1 (median, s) (3) Type 2 (median, s) (4) Relative error type 1 (5) Relative error type 2 | ||
1678 0.008353976 0.0070949475 1.7468019419382034e-6 3.5090636817904113e-7 | ||
5305 0.008659046 0.0072636425 1.7436883516277096e-6 3.5699007845828123e-7 | ||
16777 0.009093549 0.007541187 1.7428919558873898e-6 3.9338150996385125e-7 | ||
53054 0.009523572 0.007862112 1.7439373455033794e-6 4.7633118363122153e-7 | ||
167772 0.0100324315 0.008284588 1.7443022952456069e-6 6.543784470131989e-7 | ||
530542 0.011138013 0.008922181 1.744710363257602e-6 9.966976486010409e-7 | ||
1677722 0.014576812 0.010465275 1.743025960053548e-6 1.4888903327977372e-6 | ||
5305422 0.025735163 0.0149117685 1.7458544701617326e-6 1.9669560205599723e-6 | ||
16777216 0.063339967 0.030928607 1.7465137652730675e-6 2.26095080589055e-6 | ||
53054215 0.1827624215 0.08283842 1.7441674213202952e-6 2.3841887129839585e-6 | ||
167772160 0.560682647 0.246603554 1.7445912696151422e-6 2.4288699158223415e-6 |
Oops, something went wrong.