Skip to content

Commit

Permalink
don't use CUDA as default
Browse files Browse the repository at this point in the history
  • Loading branch information
korbinian90 committed Dec 15, 2023
1 parent 916cda8 commit c57cb19
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 51 deletions.
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
name = "QuantitativeSusceptibilityMappingTGV"
uuid = "bd393529-335a-4aed-902f-5de61cc7ff49"
authors = ["Korbinian Eckstein [email protected]"]
version = "0.2.1"
version = "0.3.0"

[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5"
ImageMorphology = "787d08f9-d448-5407-9aad-5290dd7ab264"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
Expand All @@ -18,7 +17,6 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
CUDA = "4,5"
ImageFiltering = "0.7"
ImageMorphology = "0.4"
Interpolations = "0.14"
Expand Down
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ Under Linux: Make the file executable with `chmod +x tgv_qsm.jl` and run via
TE = 0.004 # in [s]
fieldstrength = 3 # in [T]
# Automatically runs on GPU, if a CUDA device is detected
chi = qsm_tgv(phase, mask, res; TE=TE, fieldstrength=fieldstrength);
savenii(chi, "chi", "<folder-to-save>")
Expand All @@ -97,11 +96,6 @@ The first execution might take some time to compile the kernels (~1min).

The default settings were optimized for brain QSM and should lead to good results independently of the acquired resolution.

```julia
# Run on CPU in parallel
chi = qsm_tgv(phase, mask, res; TE, fieldstrength, gpu=false);
```

It uses the number of CPU threads julia was started with. You can use `julia --threads=auto` or set it to a specific number of threads.

### Other optional keyword arguments
Expand Down Expand Up @@ -146,10 +140,15 @@ qsm = qsm_tgv(phase, mask, res; TE, fieldstrength, laplacian=get_laplace_phase3,

The parallel CPU version is about twice as fast as the Cython version, the GPU version is about 10x faster than the Cython version (on a RTX 3060 Laptop GPU 6GB)

## Run with other GPU types
## Run on GPU

Other GPU types don't work with the command line script. They have to be accessed via Julia (or the command line script modified).

```julia
using CUDA
chi = qsm_tgv(phase, mask, res; TE, fieldstrength, gpu=CUDA);
```

```julia
using AMDGPU
chi = qsm_tgv(phase, mask, res; TE, fieldstrength, gpu=AMDGPU);
Expand Down
2 changes: 1 addition & 1 deletion src/QuantitativeSusceptibilityMappingTGV.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module QuantitativeSusceptibilityMappingTGV

using CUDA, KernelAbstractions, PaddedViews, ImageMorphology, Interpolations, Rotations, OffsetArrays, StaticArrays, ProgressMeter, Statistics, ImageFiltering, ROMEO
using KernelAbstractions, PaddedViews, ImageMorphology, Interpolations, Rotations, OffsetArrays, StaticArrays, ProgressMeter, Statistics, ImageFiltering, ROMEO

include("tgv.jl")
include("tgv_helper.jl")
Expand Down
22 changes: 2 additions & 20 deletions src/tgv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Example:
chi = qsm_tgv(randn(20,20,20), trues(20,20,20), [1,1,2]; TE=0.025, fieldstrength=3)
"""
function qsm_tgv(phase, mask, res; TE, B0_dir=[0, 0, 1], fieldstrength=3, regularization=2, alpha=get_default_alpha(regularization), step_size=3, iterations=get_default_iterations(res, step_size), erosions=3, dedimensionalize=false, correct_laplacian=true, laplacian=get_laplace_phase_del, type=Float32, gpu=CUDA.functional(), nblocks=32)
function qsm_tgv(phase, mask, res; TE, B0_dir=[0, 0, 1], fieldstrength=3, regularization=2, alpha=get_default_alpha(regularization), step_size=3, iterations=get_default_iterations(res, step_size), erosions=3, dedimensionalize=false, correct_laplacian=true, laplacian=get_laplace_phase_del, type=Float32, gpu=nothing, nblocks=32)
device, cu = select_device(gpu)
phase, res, alpha, fieldstrength, mask = adjust_types(type, phase, res, alpha, fieldstrength, mask)

Expand Down Expand Up @@ -163,15 +163,7 @@ function reduce_to_mask_box(laplace_phi0, mask)
return laplace_phi0, mask, box_indices, original_size
end

function default_backend()
if CUDA.functional()
return CUDA.CUDAKernels.CUDABackend()
else
return CPU()
end
end

function select_device(library::Module)
function select_device(library)
if Symbol(library) == :CUDA
println("Using the GPU via CUDA")
return library.CUDAKernels.CUDABackend(), library.cu
Expand All @@ -190,16 +182,6 @@ function select_device(library::Module)
end
end

function select_device(gpu)
if gpu
println("Using the GPU")
return CUDA.CUDAKernels.CUDABackend(), CUDA.cu
else
println("Using $(Threads.nthreads()) CPU threads")
return CPU(), identity
end
end

function erode_mask(mask)
SE = strel(CartesianIndex, strel_diamond(mask))
erode_vox(I) = minimum(mask[I+J] for J in SE if checkbounds(Bool, mask, I + J))
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
22 changes: 3 additions & 19 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,9 @@ using TestItemRunner
@test size(chi) == sz
end

@testitem "GPU" begin
if QuantitativeSusceptibilityMappingTGV.CUDA.functional()
sz = (20, 20, 20)
phase = randn(sz)
mask = trues(sz)
res = [1, 1, 1]
TE = 1

iterations = 10
chi_cpu = qsm_tgv(phase, mask, res; TE, iterations, gpu=false)
chi_gpu = qsm_tgv(phase, mask, res; TE, iterations, gpu=true)

relative_diff(A, B) = sum(abs.(A .- B)) / sum(abs.(B))
@test relative_diff(Array(chi_gpu), chi_cpu) < 1e-7
end
end

@testitem "CUDA" begin
if QuantitativeSusceptibilityMappingTGV.CUDA.functional()
using CUDA
if CUDA.functional()
sz = (20, 20, 20)
phase = randn(sz)
mask = trues(sz)
Expand All @@ -44,7 +28,7 @@ end

iterations = 10
chi_cpu = qsm_tgv(phase, mask, res; TE, iterations, gpu=false)
chi_gpu = qsm_tgv(phase, mask, res; TE, iterations, gpu=QuantitativeSusceptibilityMappingTGV.CUDA)
chi_gpu = qsm_tgv(phase, mask, res; TE, iterations, gpu=CUDA)

relative_diff(A, B) = sum(abs.(A .- B)) / sum(abs.(B))
@test relative_diff(Array(chi_gpu), chi_cpu) < 1e-7
Expand Down
2 changes: 1 addition & 1 deletion tgv_qsm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ end
version = Comonicon.get_version(QuantitativeSusceptibilityMappingTGV)
Comonicon.get_version(::Module) = version

@main function tgv_qsm(fn_phase, fn_mask; TE::Float64, output::String="output.nii.gz", fieldstrength::Float64=3.0, regularization::Float64=2.0, erosions::Int=3, B0_dir::Array{Int}=[0,0,1], dedimensionalize::Bool=false, no_laplacian_correction::Bool=false, step_size::Float64=3.0, no_gpu::Bool=false, type::DataType=Float32, nblocks::Int=32)
@main function tgv_qsm(fn_phase, fn_mask; TE::Float64, output::String="output.nii.gz", fieldstrength::Float64=3.0, regularization::Float64=2.0, erosions::Int=3, B0_dir::Array{Int}=[0,0,1], dedimensionalize::Bool=false, no_laplacian_correction::Bool=false, step_size::Float64=3.0, type::DataType=Float32, nblocks::Int=32)
println("Starting calculation...")
phase = readphase(fn_phase)
mask = niread(fn_mask) .!= 0
Expand Down
37 changes: 37 additions & 0 deletions tgv_qsm_cuda.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env -S julia --color=yes --startup-file=no --threads=auto

## Usage

# Call with: `<path-to-file>/tgv_qsm.jl ARGS`
# On windows use: `julia --threads=auto <path-to-file>/tgv_qsm.jl ARGS`

# Example call:
# `./tgv_qsm.jl phase.nii.gz mask.nii.gz --TE 0.025 --output output.nii.gz --no-gpu

import Pkg

## Uncomment to use a local julia package directory instead of the global one
# package_dir = joinpath(@__DIR__, ".tgv_cmd_packages")
# mkpath(package_dir)
# Pkg.activate(package_dir)

try
using CUDA, QuantitativeSusceptibilityMappingTGV, MriResearchTools, Comonicon
catch
Pkg.add(["CUDA", "QuantitativeSusceptibilityMappingTGV", "MriResearchTools", "Comonicon"])
using CUDA, QuantitativeSusceptibilityMappingTGV, MriResearchTools, Comonicon
end

version = Comonicon.get_version(QuantitativeSusceptibilityMappingTGV)
Comonicon.get_version(::Module) = version

@main function tgv_qsm(fn_phase, fn_mask; TE::Float64, output::String="output.nii.gz", fieldstrength::Float64=3.0, regularization::Float64=2.0, erosions::Int=3, B0_dir::Array{Int}=[0,0,1], dedimensionalize::Bool=false, no_laplacian_correction::Bool=false, step_size::Float64=3.0, type::DataType=Float32, nblocks::Int=32)
println("Starting calculation...")
phase = readphase(fn_phase)
mask = niread(fn_mask) .!= 0
res = header(phase).pixdim[2:4]
println("Resolution from NIfTI header [mm]: $(round.(Float64.(res); digits=2))")
chi = qsm_tgv(phase, mask, res; TE, B0_dir, fieldstrength, regularization, erosions, dedimensionalize, correct_laplacian=!no_laplacian_correction, gpu=!no_gpu, step_size, type, nblocks, gpu=CUDA)
println("Writing output")
savenii(chi, output; header=header(phase))
end

0 comments on commit c57cb19

Please sign in to comment.