Skip to content

Commit

Permalink
Use real index numbers returned from maskCuda in deformationFieldCuda…
Browse files Browse the repository at this point in the history
… and warpedGradientCuda #92
  • Loading branch information
onurulgen committed Jan 30, 2024
1 parent f7d5fc5 commit 0a4ba26
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 58 deletions.
2 changes: 1 addition & 1 deletion niftyreg_build_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
397
398
3 changes: 2 additions & 1 deletion reg-lib/cuda/CudaCompute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
con.GetFloatingCuda(),
con.Content::GetWarped(),
con.GetWarpedCuda(),
con.Content::GetDeformationField(),
con.GetDeformationFieldCuda(),
con.GetReferenceMaskCuda(),
con.GetActiveVoxelNumber(),
Expand Down Expand Up @@ -186,8 +187,8 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
getImageGradient(floating,
con.GetFloatingCuda(),
con.GetDeformationFieldCuda(),
con.DefContent::GetWarpedGradient(),
con.GetWarpedGradientCuda(),
con.GetActiveVoxelNumber(),
interpolation,
paddingValue,
activeTimePoint);
Expand Down
10 changes: 4 additions & 6 deletions reg-lib/cuda/CudaLocalTransformation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ void GetDeformationField(const nifti_image *controlPointImage,
controlPointImage->dz / referenceImage->dz);

auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
auto controlPointTexture = *controlPointTexturePtr;
auto maskTexture = *maskTexturePtr;

// Get the reference matrix if composition is required
thrust::device_vector<mat44> realToVoxelCudaVec;
Expand All @@ -46,13 +44,13 @@ void GetDeformationField(const nifti_image *controlPointImage,
const auto realToVoxelCuda = composition ? realToVoxelCudaVec.data().get() : nullptr;

if (referenceImage->nz > 1) {
thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
GetDeformationField3d<composition, bspline>(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda,
thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
GetDeformationField3d<composition, bspline>(deformationFieldCuda, controlPointTexture, realToVoxelCuda,
referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
});
} else {
thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
GetDeformationField2d<composition, bspline>(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda,
thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
GetDeformationField2d<composition, bspline>(deformationFieldCuda, controlPointTexture, realToVoxelCuda,
referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
});
}
Expand Down
8 changes: 2 additions & 6 deletions reg-lib/cuda/CudaLocalTransformationKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
template<bool composition, bool bspline>
__device__ void GetDeformationField3d(float4 *deformationField,
cudaTextureObject_t controlPointTexture,
cudaTextureObject_t maskTexture,
const mat44 *realToVoxel,
const int3 referenceImageDim,
const int3 controlPointImageDim,
Expand Down Expand Up @@ -207,8 +206,7 @@ __device__ void GetDeformationField3d(float4 *deformationField,
nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
} else { // starting deformation field is blank - !composition
const int voxel = tex1Dfetch<int>(maskTexture, index);
const auto [x, y, z] = reg_indexToDims_cuda<true>(voxel, referenceImageDim);
const auto [x, y, z] = reg_indexToDims_cuda<true>(index, referenceImageDim);
// The "nearest previous" node is determined [0,0,0]
const float xVoxel = float(x) / controlPointVoxelSpacing.x;
const float yVoxel = float(y) / controlPointVoxelSpacing.y;
Expand Down Expand Up @@ -245,7 +243,6 @@ __device__ void GetDeformationField3d(float4 *deformationField,
template<bool composition, bool bspline>
__device__ void GetDeformationField2d(float4 *deformationField,
cudaTextureObject_t controlPointTexture,
cudaTextureObject_t maskTexture,
const mat44 *realToVoxel,
const int3 referenceImageDim,
const int3 controlPointImageDim,
Expand All @@ -272,8 +269,7 @@ __device__ void GetDeformationField2d(float4 *deformationField,
nodePre = { Floor(xVoxel), Floor(yVoxel) };
basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
} else { // starting deformation field is blank - !composition
const int voxel = tex1Dfetch<int>(maskTexture, index);
const auto [x, y, z] = reg_indexToDims_cuda<false>(voxel, referenceImageDim);
const auto [x, y, z] = reg_indexToDims_cuda<false>(index, referenceImageDim);
// The "nearest previous" node is determined [0,0,0]
const float xVoxel = float(x) / controlPointVoxelSpacing.x;
const float yVoxel = float(y) / controlPointVoxelSpacing.y;
Expand Down
56 changes: 27 additions & 29 deletions reg-lib/cuda/CudaResampling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void ResampleImage(const nifti_image *floatingImage,
const float *floatingImageCuda,
const nifti_image *warpedImage,
float *warpedImageCuda,
const nifti_image *deformationField,
const float4 *deformationFieldCuda,
const int *maskCuda,
const size_t activeVoxelNumber,
Expand All @@ -73,25 +74,23 @@ void ResampleImage(const nifti_image *floatingImage,
if (interpolation != 1)
NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");

const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
const size_t defVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, defVoxelNumber, cudaChannelFormatKindFloat, 4);
auto deformationFieldTexture = *deformationFieldTexturePtr;
auto maskTexture = *maskTexturePtr;
// Get the real to voxel matrix
const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;

for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) {
NR_DEBUG((is3d ? "3" : "2") << "D resampling of volume number " << t);
auto curWarpedCuda = warpedImageCuda + t * voxelNumber;
auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
auto curWarpedCuda = warpedImageCuda + t * floVoxelNumber;
auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * floVoxelNumber, floVoxelNumber, cudaChannelFormatKindFloat, 1);
auto floatingTexture = *floatingTexturePtr;
thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
curWarpedCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDim, paddingValue
thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [
curWarpedCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue
]__device__(const int index) {
// Get the real world deformation in the floating space
const int voxel = tex1Dfetch<int>(maskTexture, index);
const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, index);

// Get the voxel-based deformation in the floating space and compute the linear interpolation
Expand Down Expand Up @@ -141,36 +140,37 @@ void ResampleImage(const nifti_image *floatingImage,
}
}

curWarpedCuda[voxel] = intensity;
curWarpedCuda[index] = intensity;
});
}
}
template void ResampleImage<false>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
template void ResampleImage<true>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
template void ResampleImage<false>(const nifti_image*, const float*, const nifti_image*, float*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
template void ResampleImage<true>(const nifti_image*, const float*, const nifti_image*, float*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
/* *************************************************************** */
template<bool is3d>
void GetImageGradient(const nifti_image *floatingImage,
const float *floatingImageCuda,
const float4 *deformationFieldCuda,
const nifti_image *warpedGradient,
float4 *warpedGradientCuda,
const size_t activeVoxelNumber,
const int interpolation,
float paddingValue,
const int activeTimePoint) {
if (interpolation != 1)
NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");

const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
const size_t refVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 3);
const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
if (paddingValue != paddingValue) paddingValue = 0;
auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * floVoxelNumber, floVoxelNumber, cudaChannelFormatKindFloat, 1);
auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, refVoxelNumber, cudaChannelFormatKindFloat, 4);
auto floatingTexture = *floatingTexturePtr;
auto deformationFieldTexture = *deformationFieldTexturePtr;
// Get the real to voxel matrix
const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;

thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), refVoxelNumber, [
warpedGradientCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue
]__device__(const int index) {
// Get the real world deformation in the floating space
Expand Down Expand Up @@ -230,8 +230,8 @@ void GetImageGradient(const nifti_image *floatingImage,
warpedGradientCuda[index] = gradientValue;
});
}
template void GetImageGradient<false>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
template void GetImageGradient<true>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
template void GetImageGradient<false>(const nifti_image*, const float*, const float4*, const nifti_image*, float4*, const int, float, const int);
template void GetImageGradient<true>(const nifti_image*, const float*, const float4*, const nifti_image*, float4*, const int, float, const int);
/* *************************************************************** */
template<bool is3d>
static float3 GetRealImageSpacing(const nifti_image *image) {
Expand Down Expand Up @@ -273,15 +273,14 @@ void ResampleGradient(const nifti_image *floatingImage,
if (interpolation != 1)
NR_FATAL_ERROR("Only linear interpolation is supported");

const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
const size_t defVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
const int3 floatingDims = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
const int3 defFieldDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, floVoxelNumber, cudaChannelFormatKindFloat, 4);
auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, defVoxelNumber, cudaChannelFormatKindFloat, 4);
auto floatingTexture = *floatingTexturePtr;
auto deformationFieldTexture = *deformationFieldTexturePtr;
auto maskTexture = *maskTexturePtr;

// Get the real to voxel matrix
const mat44& floatingMatrix = floatingImage->sform_code != 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
Expand All @@ -293,11 +292,10 @@ void ResampleGradient(const nifti_image *floatingImage,
// Reorientation matrix is assessed in order to remove the rigid component
const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));

thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
warpedImageCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue
thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [
warpedImageCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue
]__device__(const int index) {
// Get the real world deformation in the floating space
const int voxel = tex1Dfetch<int>(maskTexture, index);
const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, index);

// Get the voxel-based deformation in the floating space and compute the linear interpolation
Expand Down Expand Up @@ -346,7 +344,7 @@ void ResampleGradient(const nifti_image *floatingImage,
// Compute the Jacobian matrix
constexpr float basis[] = { 1.f, 0.f };
constexpr float deriv[] = { -1.f, 1.f };
auto [x, y, z] = reg_indexToDims_cuda<is3d>(voxel, defFieldDims);
auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, defFieldDims);
mat33 jacMat{};
for (char c = 0; c < (is3d ? 2 : 1); c++) {
if constexpr (is3d) {
Expand Down Expand Up @@ -432,7 +430,7 @@ void ResampleGradient(const nifti_image *floatingImage,
warpedValue.x = jacMat.m[0][0] * gradientValue.x + jacMat.m[0][1] * gradientValue.y;
warpedValue.y = jacMat.m[1][0] * gradientValue.x + jacMat.m[1][1] * gradientValue.y;
}
warpedImageCuda[voxel] = warpedValue;
warpedImageCuda[index] = warpedValue;
});
}
template void ResampleGradient<false>(const nifti_image*, const float4*, const nifti_image*, float4*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
Expand Down
3 changes: 2 additions & 1 deletion reg-lib/cuda/CudaResampling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ void ResampleImage(const nifti_image *floatingImage,
const float *floatingImageCuda,
const nifti_image *warpedImage,
float *warpedImageCuda,
const nifti_image *deformationField,
const float4 *deformationFieldCuda,
const int *maskCuda,
const size_t activeVoxelNumber,
Expand All @@ -32,8 +33,8 @@ template<bool is3d>
void GetImageGradient(const nifti_image *floatingImage,
const float *floatingImageCuda,
const float4 *deformationFieldCuda,
const nifti_image *warpedGradient,
float4 *warpedGradientCuda,
const size_t activeVoxelNumber,
const int interpolation,
float paddingValue,
const int activeTimePoint);
Expand Down
11 changes: 5 additions & 6 deletions reg-lib/cuda/_reg_nmi_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,10 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
auto warpedImageTexture = *warpedImageTexturePtr;
auto warpedGradientTexture = *warpedGradientTexturePtr;

thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
const int voxel = maskCuda[index];
const float refValue = tex1Dfetch<float>(referenceImageTexture, voxel);
thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
const float refValue = tex1Dfetch<float>(referenceImageTexture, index);
if (refValue != refValue) return;
const float warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
const float warValue = tex1Dfetch<float>(warpedImageTexture, index);
if (warValue != warValue) return;
const float4 warGradValue = tex1Dfetch<float4>(warpedGradientTexture, index);

Expand Down Expand Up @@ -367,12 +366,12 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
}

// (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
float4 gradValue = voxelBasedGradientCuda[voxel];
float4 gradValue = voxelBasedGradientCuda[index];
gradValue.x += static_cast<float>(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE);
gradValue.y += static_cast<float>(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE);
if constexpr (is3d)
gradValue.z += static_cast<float>(timePointWeight * (refDeriv.z + warDeriv.z - nmi * jointDeriv.z) / normalisedJE);
voxelBasedGradientCuda[voxel] = gradValue;
voxelBasedGradientCuda[index] = gradValue;
});
}
/* *************************************************************** */
Expand Down
Loading

0 comments on commit 0a4ba26

Please sign in to comment.