From b9c9beca65c9c7f6862e1c1ca50c70eaafd1fbfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 27 Nov 2023 14:58:05 +0000 Subject: [PATCH] Use float gam instead of double in CudaOptimiser Even though tests show otherwise, using float gets better results in real world scenarios. --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaOptimiser.cu | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 67bf40fe..ba300673 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -371 +372 diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu index 587b4f7d..fb662d23 100644 --- a/reg-lib/cuda/CudaOptimiser.cu +++ b/reg-lib/cuda/CudaOptimiser.cu @@ -237,7 +237,7 @@ void GetConjugateGradient(float4 *gradientCuda, return make_double2(dgg, gg); }; - double gam; + float gam; thrust::counting_iterator it(0); const double2 gg = thrust::transform_reduce(thrust::device, it, it + nVoxels, [=]__device__(const int index) { return calcGam(gradientTexture, conjugateGTexture, conjugateHTexture, index); @@ -247,8 +247,8 @@ void GetConjugateGradient(float4 *gradientCuda, const double2 ggBw = thrust::transform_reduce(thrust::device, it, it + nVoxelsBw, [=]__device__(const int index) { return calcGam(gradientBwTexture, conjugateGBwTexture, conjugateHBwTexture, index); }, make_double2(0, 0), thrust::plus()); - gam = (gg.x + ggBw.x) / (gg.y + ggBw.y); - } else gam = gg.x / gg.y; + gam = static_cast((gg.x + ggBw.x) / (gg.y + ggBw.y)); + } else gam = static_cast(gg.x / gg.y); // Conjugate gradient auto conjugate = [gam]__device__(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda,