diff --git a/.gitignore b/.gitignore
index de49771d..9accdc5d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@
 *.app
 
 # IDE
+.devcontainer
 .idea
 .vscode
 .vs
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8c0a1869..9c6f0c3e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-363
+364
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 68dfcceb..b03f5837 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -2021,7 +2021,7 @@ class NiftiImage
      * @param dimCount Number of dimensions to consider
      * @return The number of voxels in the image
      */
-    static size_t calcVoxelNumber(const nifti_image *image, const int& dimCount) {
+    static size_t calcVoxelNumber(const nifti_image *image, const int dimCount) {
         if (image == nullptr)
             return 0;
         size_t voxelNumber = 1;
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index bd71257a..9757f5fe 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -36,6 +36,6 @@ class AladinContent: public Content {
 protected:
 #endif
     // Functions for testing
-    virtual void SetCaptureRange(const int& captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; }
+    virtual void SetCaptureRange(const int captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; }
     virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; }
 };
diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h
index ca1001f9..4d9ddddc 100644
--- a/reg-lib/ContentCreatorFactory.h
+++ b/reg-lib/ContentCreatorFactory.h
@@ -10,7 +10,7 @@ enum class ContentType { Base, Aladin, Def, F3d, F3d2 };
 
 class ContentCreatorFactory {
 public:
-    virtual ContentCreator* Produce(const ContentType& conType) {
+    virtual ContentCreator* Produce(const ContentType conType) {
         switch (conType) {
         case ContentType::Base:
             return new ContentCreator();
diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp
index e61a7ce1..bd586b8b 100644
--- a/reg-lib/Measure.cpp
+++ b/reg-lib/Measure.cpp
@@ -7,7 +7,7 @@
 #include "_reg_mind.h"
 
 /* *************************************************************** */
-reg_measure* Measure::Create(const MeasureType& measureType) {
+reg_measure* Measure::Create(const MeasureType measureType) {
     switch (measureType) {
     case MeasureType::Nmi:
         return new reg_nmi();
diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h
index f8527631..c20989d7 100644
--- a/reg-lib/Measure.h
+++ b/reg-lib/Measure.h
@@ -7,6 +7,6 @@ enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, MindSsc };
 
 class Measure {
 public:
-    virtual reg_measure* Create(const MeasureType& measureType);
+    virtual reg_measure* Create(const MeasureType measureType);
     virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr);
 };
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 19826418..8e609ffe 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -17,7 +17,7 @@
 #endif
 
 /* *************************************************************** */
-Platform::Platform(const PlatformType& platformTypeIn) {
+Platform::Platform(const PlatformType platformTypeIn) {
     platformType = platformTypeIn;
     if (platformType == PlatformType::Cpu) {
         platformName = "CPU";
@@ -102,7 +102,7 @@ Compute* Platform::CreateCompute(Content& con) const {
     return computeFactory->Produce(con);
 }
 /* *************************************************************** */
-ContentCreator* Platform::CreateContentCreator(const ContentType& conType) const {
+ContentCreator* Platform::CreateContentCreator(const ContentType conType) const {
     return contentCreatorFactory->Produce(conType);
 }
 /* *************************************************************** */
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 42a0a823..b049732a 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -20,7 +20,7 @@ constexpr PlatformType PlatformTypes[] = {
 
 class Platform {
 public:
-    Platform(const PlatformType& platformTypeIn);
+    Platform(const PlatformType platformTypeIn);
     ~Platform();
 
     std::string GetName() const;
@@ -29,7 +29,7 @@ class Platform {
     void SetGpuIdx(unsigned gpuIdxIn);
 
     Compute* CreateCompute(Content& con) const;
-    ContentCreator* CreateContentCreator(const ContentType& conType = ContentType::Base) const;
+    ContentCreator* CreateContentCreator(const ContentType conType = ContentType::Base) const;
     Kernel* CreateKernel(const std::string& name, Content *con) const;
     Measure* CreateMeasure() const;
     template<typename Type>
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 8f47979b..59c99fa2 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -169,7 +169,7 @@ class reg_aladin {
     }
     NiftiImage GetFinalWarpedImage();
 
-    void SetPlatformType(const PlatformType& platformTypeIn) {
+    void SetPlatformType(const PlatformType platformTypeIn) {
         this->platformType = platformTypeIn;
     }
     void SetGpuIdx(unsigned gpuIdxIn) {
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 007f26ec..c589afe7 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -141,11 +141,11 @@ class reg_base: public InterfaceOptimiser {
     virtual bool GetSymmetricStatus() { return false; }
 
     // Platform
-    virtual void SetPlatformType(const PlatformType& platformType) {
+    virtual void SetPlatformType(const PlatformType platformType) {
         platform.reset(new Platform(platformType));
         measure.reset(platform->CreateMeasure());
     }
-    virtual void SetGpuIdx(const unsigned& gpuIdx) { platform->SetGpuIdx(gpuIdx); }
+    virtual void SetGpuIdx(const unsigned gpuIdx) { platform->SetGpuIdx(gpuIdx); }
 
     // Optimisation-related functions
     virtual void SetMaximalIterationNumber(unsigned);
diff --git a/reg-lib/cl/ClContentCreatorFactory.h b/reg-lib/cl/ClContentCreatorFactory.h
index cc6f8620..ecba7ae0 100644
--- a/reg-lib/cl/ClContentCreatorFactory.h
+++ b/reg-lib/cl/ClContentCreatorFactory.h
@@ -5,7 +5,7 @@
 
 class ClContentCreatorFactory: public ContentCreatorFactory {
 public:
-    virtual ContentCreator* Produce(const ContentType& conType) override {
+    virtual ContentCreator* Produce(const ContentType conType) override {
         switch (conType) {
         case ContentType::Aladin:
             return new ClAladinContentCreator();
diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h
index 591ec0ca..4b1917a8 100755
--- a/reg-lib/cpu/_reg_globalTrans.h
+++ b/reg-lib/cpu/_reg_globalTrans.h
@@ -37,7 +37,7 @@ struct _reg_sorted_point3D
         warped[2] = r[2];
     }
 
-    bool operator <(const _reg_sorted_point3D &sp) const
+    bool operator <(const _reg_sorted_point3D& sp) const
     {
         return (sp.distance < distance);
     }
@@ -61,7 +61,7 @@ struct _reg_sorted_point2D
         warped[0] = r[0];
         warped[1] = r[1];
     }
-    bool operator <(const _reg_sorted_point2D &sp) const
+    bool operator <(const _reg_sorted_point2D& sp) const
     {
         return (sp.distance < distance);
     }
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index 68de1aa8..eefab0bc 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -232,13 +232,13 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                     tempValue *= jacPtr[voxel];
 
                 // Ensure that gradient of the warpedImage image along x-axis is not NaN
-                const double& tempGradX = currentGradPtrX[voxel];
+                const double tempGradX = currentGradPtrX[voxel];
                 if (tempGradX == tempGradX)
                     // Update the gradient along the x-axis
                     measureGradPtrX[voxel] -= static_cast<DataType>(tempValue * tempGradX);
 
                 // Ensure that gradient of the warpedImage image along y-axis is not NaN
-                const double& tempGradY = currentGradPtrY[voxel];
+                const double tempGradY = currentGradPtrY[voxel];
                 if (tempGradY == tempGradY)
                     // Update the gradient along the y-axis
                     measureGradPtrY[voxel] -= static_cast<DataType>(tempValue * tempGradY);
@@ -246,7 +246,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                 // Check if the current images are 3D
                 if (referenceImage->nz > 1) {
                     // Ensure that gradient of the warpedImage image along z-axis is not NaN
-                    const double& tempGradZ = currentGradPtrZ[voxel];
+                    const double tempGradZ = currentGradPtrZ[voxel];
                     if (tempGradZ == tempGradZ)
                         // Update the gradient along the z-axis
                         measureGradPtrZ[voxel] -= static_cast<DataType>(tempValue * tempGradZ);
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 9b823da1..6ce58b3f 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -441,10 +441,10 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            const double& refMeanValue = meanImgPtr[voxel];
-            const double& warMeanValue = warMeanPtr[voxel];
-            const double& refSdevValue = sdevImgPtr[voxel];
-            const double& warSdevValue = warSdevPtr[voxel];
+            const double refMeanValue = meanImgPtr[voxel];
+            const double warMeanValue = warMeanPtr[voxel];
+            const double refSdevValue = sdevImgPtr[voxel];
+            const double warSdevValue = warSdevPtr[voxel];
             const double correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue);
             double temp1 = 1.0 / (refSdevValue * warSdevValue);
             double temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue);
@@ -511,7 +511,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
     shared(voxelNumber, measureGradPtrX)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
-        const DataType& val = measureGradPtrX[voxel];
+        const DataType val = measureGradPtrX[voxel];
         if (val != val || isinf(val))
             measureGradPtrX[voxel] = 0;
     }
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 46a3928c..44feb651 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -1152,8 +1152,8 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
             for (int b = -1; b < 2; b++) {
                 for (int a = -1; a < 2; a++) {
                     const int index = (y + b) * splineControlPoint->nx + x + a;
-                    const DataType& splineCoeffX = splinePtrX[index];
-                    const DataType& splineCoeffY = splinePtrY[index];
+                    const DataType splineCoeffX = splinePtrX[index];
+                    const DataType splineCoeffY = splinePtrY[index];
 
                     matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
                     matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
@@ -1221,9 +1221,9 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
                     for (int b = -1; b < 2; b++) {
                         for (int a = -1; a < 2; a++) {
                             const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
-                            const DataType& splineCoeffX = splinePtrX[index];
-                            const DataType& splineCoeffY = splinePtrY[index];
-                            const DataType& splineCoeffZ = splinePtrZ[index];
+                            const DataType splineCoeffX = splinePtrX[index];
+                            const DataType splineCoeffY = splinePtrY[index];
+                            const DataType splineCoeffZ = splinePtrZ[index];
 
                             matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
                             matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 30e15cff..ff5ae86d 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -17,9 +17,9 @@ template <class DataType>
 void ShiftImage(const nifti_image *inputImage,
                 nifti_image *shiftedImage,
                 const int *mask,
-                const int& tx,
-                const int& ty,
-                const int& tz) {
+                const int tx,
+                const int ty,
+                const int tz) {
     const DataType* inputData = static_cast<DataType*>(inputImage->data);
     DataType* shiftImageData = static_cast<DataType*>(shiftedImage->data);
 #ifdef _OPENMP
@@ -57,8 +57,8 @@ template <class DataType>
 void GetMindImageDescriptorCore(const nifti_image *inputImage,
                                 nifti_image *mindImage,
                                 const int *mask,
-                                const int& descriptorOffset,
-                                const int& currentTimePoint) {
+                                const int descriptorOffset,
+                                const int currentTimePoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -131,7 +131,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
 
             mindIndex = voxelIndex;
             for (int t = 0; t < samplingNbr; t++) {
-                const DataType& descValue = mindImgDataPtr[mindIndex];
+                const DataType descValue = mindImgDataPtr[mindIndex];
                 mindImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
@@ -147,8 +147,8 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
 void GetMindImageDescriptor(const nifti_image *inputImage,
                             nifti_image *mindImage,
                             const int *mask,
-                            const int& descriptorOffset,
-                            const int& currentTimePoint) {
+                            const int descriptorOffset,
+                            const int currentTimePoint) {
     if (inputImage->datatype != mindImage->datatype)
         NR_FATAL_ERROR("The input image and the MIND image must have the same datatype");
     std::visit([&](auto&& imgType) {
@@ -162,8 +162,8 @@ template <class DataType>
 void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
                                    nifti_image *mindSscImage,
                                    const int *mask,
-                                   const int& descriptorOffset,
-                                   const int& currentTimePoint) {
+                                   const int descriptorOffset,
+                                   const int currentTimePoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -253,7 +253,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
 
             mindIndex = voxelIndex;
             for (int t = 0; t < lengthDescriptor; t++) {
-                const DataType& descValue = mindSscImgDataPtr[mindIndex];
+                const DataType descValue = mindSscImgDataPtr[mindIndex];
                 mindSscImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
@@ -271,8 +271,8 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
 void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                nifti_image *mindSscImage,
                                const int *mask,
-                               const int& descriptorOffset,
-                               const int& currentTimePoint) {
+                               const int descriptorOffset,
+                               const int currentTimePoint) {
     if (inputImage->datatype != mindSscImage->datatype)
         NR_FATAL_ERROR("The input image and the MINDSSC image must have the same datatype!");
     std::visit([&](auto&& imgType) {
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index b32dee3e..35c21203 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -74,12 +74,12 @@ class reg_mindssc: public reg_mind {
 void GetMindImageDescriptor(const nifti_image *inputImage,
                             nifti_image *mindImage,
                             const int *mask,
-                            const int& descriptorOffset,
-                            const int& currentTimePoint);
+                            const int descriptorOffset,
+                            const int currentTimePoint);
 /* *************************************************************** */
 void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                nifti_image *mindSscImage,
                                const int *mask,
-                               const int& descriptorOffset,
-                               const int& currentTimePoint);
+                               const int descriptorOffset,
+                               const int currentTimePoint);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 9e3801c1..9918c5e7 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -198,8 +198,8 @@ void reg_getNmiValue(const nifti_image *referenceImage,
                 // No approximation is used for the Parzen windowing
                 for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                     if (referenceMask[voxel] > -1) {
-                        const DataType& refValue = refPtr[voxel];
-                        const DataType& warValue = warPtr[voxel];
+                        const DataType refValue = refPtr[voxel];
+                        const DataType warValue = warPtr[voxel];
                         if (refValue == refValue && warValue == warValue) {
                             for (int r = int(refValue - 1); r < int(refValue + 3); ++r) {
                                 if (0 <= r && r < referenceBinNumber[t]) {
@@ -220,8 +220,8 @@ void reg_getNmiValue(const nifti_image *referenceImage,
                 // the histogram is convolved with a spine kernel function.
                 for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                     if (referenceMask[voxel] > -1) {
-                        const DataType& refValue = refPtr[voxel];
-                        const DataType& warValue = warPtr[voxel];
+                        const DataType refValue = refPtr[voxel];
+                        const DataType warValue = warPtr[voxel];
                         if (refValue == refValue && warValue == warValue &&
                             0 <= refValue && refValue < referenceBinNumber[t] &&
                             0 <= warValue && warValue < floatingBinNumber[t]) {
@@ -295,9 +295,9 @@ void reg_getNmiValue(const nifti_image *referenceImage,
             // Compute the entropy of the reference image
             double referenceEntropy = 0;
             for (int r = 0; r < referenceBinNumber[t]; ++r) {
-                const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
+                const double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
                 if (valPro > 0) {
-                    const double& valLog = log(valPro);
+                    const double valLog = log(valPro);
                     referenceEntropy -= valPro * valLog;
                     jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = valLog;
                 }
@@ -306,9 +306,9 @@ void reg_getNmiValue(const nifti_image *referenceImage,
             // Compute the entropy of the warped floating image
             double warpedEntropy = 0;
             for (int f = 0; f < floatingBinNumber[t]; ++f) {
-                const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f];
+                const double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f];
                 if (valPro > 0) {
-                    const double& valLog = log(valPro);
+                    const double valLog = log(valPro);
                     warpedEntropy -= valPro * valLog;
                     jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = valLog;
                 }
@@ -317,9 +317,9 @@ void reg_getNmiValue(const nifti_image *referenceImage,
             // Compute the joint entropy
             double jointEntropy = 0;
             for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) {
-                const double& valPro = jointHistoProPtr[i];
+                const double valPro = jointHistoProPtr[i];
                 if (valPro > 0) {
-                    const double& valLog = log(valPro);
+                    const double valLog = log(valPro);
                     jointEntropy -= valPro * valLog;
                     jointHistoLogPtr[i] = valLog;
                 }
@@ -455,9 +455,9 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                             if (-1 < w && w < floatingBinNumber[currentTimePoint]) {
                                 const double common = GetBasisSplineValue<double>(refValue - r) *
                                     GetBasisSplineDerivativeValue<double>(warValue - w);
-                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
-                                const double& refLog = logHistoPtr[r + referenceOffset];
-                                const double& warLog = logHistoPtr[w + floatingOffset];
+                                const double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
+                                const double refLog = logHistoPtr[r + referenceOffset];
+                                const double warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     jointDeriv[0] += common * gradX * jointLog;
                                     refDeriv[0] += common * gradX * refLog;
@@ -542,9 +542,9 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                             if (-1 < w && w < floatingBinNumber[currentTimePoint]) {
                                 const double common = GetBasisSplineValue<double>(refValue - r) *
                                     GetBasisSplineDerivativeValue<double>(warValue - w);
-                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
-                                const double& refLog = logHistoPtr[r + referenceOffset];
-                                const double& warLog = logHistoPtr[w + floatingOffset];
+                                const double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
+                                const double refLog = logHistoPtr[r + referenceOffset];
+                                const double warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     refDeriv[0] += common * gradX * refLog;
                                     warDeriv[0] += common * gradX * warLog;
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 1c01ba91..16fbda9f 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -108,8 +108,8 @@ class SafeArray {
     }
 
 private:
-    void operator=(const SafeArray &) {};
-    SafeArray(const SafeArray &) {};
+    void operator=(const SafeArray&) {};
+    SafeArray(const SafeArray&) {};
 
     DataTYPE *data;
 };
@@ -141,7 +141,7 @@ class Multi_Loop {
     }
 
     /// Gets the index or iterator for the specified loop.
-    const T &operator [](int index) const {
+    const T& operator [](int index) const {
         return (current[index]);
     }
 
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 1f41f389..2a130c4d 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -139,7 +139,7 @@ double reg_getSsdValue(const nifti_image *referenceImage,
                         const double diff = std::pow(refValue - warValue, 2.0);
 #endif
                         // Jacobian determinant modulation of the ssd if required
-                        const DataType& val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1);
+                        const DataType val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1);
                         ssdLocal += diff * val;
                         n += val;
                     }
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 91a85e3a..f363d8ee 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -446,7 +446,7 @@ template void reg_tools_changeDatatype<double>(nifti_image*, int);
 struct Operation {
     enum class Type { Add, Subtract, Multiply, Divide } type;
     Operation(Type type) : type(type) {}
-    double operator()(const double& lhs, const double& rhs) const {
+    double operator()(const double lhs, const double rhs) const {
         switch (type) {
         case Type::Add:
             return lhs + rhs;
@@ -2564,7 +2564,7 @@ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) {
     return newImage;
 }
 /* *************************************************************** */
-void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose) {
+void PrintCmdLine(const int argc, const char *const *argv, const bool verbose) {
 #ifdef NDEBUG
     if (!verbose) return;
 #endif
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index c014e6d1..5064d800 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -426,5 +426,5 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
 nifti_image* nifti_dup(const nifti_image& image, const bool copyData = true);
 /* *************************************************************** */
 /// @brief Prints the command line
-void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose);
+void PrintCmdLine(const int argc, const char *const *argv, const bool verbose);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index ad6ff06d..9b32dd4d 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -37,7 +37,7 @@ namespace NiftyReg::Cuda {
 /* *************************************************************** */
 namespace Internal {
 /* *************************************************************** */
-inline void SafeCall(const std::string& file, const int& line, const std::string& funcName) {
+inline void SafeCall(const std::string& file, const int line, const std::string& funcName) {
 #if CUDART_VERSION >= 3200
 	const cudaError_t err = cudaPeekAtLastError();
 #else
@@ -47,7 +47,7 @@ inline void SafeCall(const std::string& file, const int& line, const std::string
         NiftyReg::Internal::FatalError(file, line, funcName, "CUDA error: "s + cudaGetErrorString(err));
 }
 /* *************************************************************** */
-inline void CheckKernel(const std::string& file, const int& line, const std::string& funcName, const dim3& grid, const dim3& block) {
+inline void CheckKernel(const std::string& file, const int line, const std::string& funcName, const dim3& grid, const dim3& block) {
 #if CUDART_VERSION >= 3200
 	cudaDeviceSynchronize();
 	const cudaError_t err = cudaPeekAtLastError();
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index f569f1bc..02c83dc8 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -261,7 +261,7 @@ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) {
 /* *************************************************************** */
 void CudaCompute::DefFieldCompose(const nifti_image *defField) {
     CudaContent& con = dynamic_cast<CudaContent&>(this->con);
-    const size_t& voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
     thrust::device_vector<float4> defFieldCuda(voxelNumber);
     Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField);
     reg_defField_compose_gpu(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h
index a42360a3..72e42885 100644
--- a/reg-lib/cuda/CudaContentCreatorFactory.h
+++ b/reg-lib/cuda/CudaContentCreatorFactory.h
@@ -9,7 +9,7 @@
 
 class CudaContentCreatorFactory: public ContentCreatorFactory {
 public:
-    virtual ContentCreator* Produce(const ContentType& conType) override {
+    virtual ContentCreator* Produce(const ContentType conType) override {
         switch (conType) {
         case ContentType::Base:
             return new CudaContentCreator();
diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu
index a9b9ece2..ff2037ff 100644
--- a/reg-lib/cuda/CudaKernelConvolution.cu
+++ b/reg-lib/cuda/CudaKernelConvolution.cu
@@ -50,7 +50,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
         if (!activeTimePoints[t]) continue;
 
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
-            const float& intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
+            const float intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
             float& densityVal = densityCudaPtr[index];
             bool& nanImageVal = nanImageCudaPtr[index];
             densityVal = intensityVal == intensityVal ? 1.f : 0;
@@ -185,7 +185,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                         // Increment the current value by performing the weighted sum
                         double intensitySum = 0, densitySum = 0;
                         for (int k = shiftPre; k < shiftPst; k++, kernelIndex++) {
-                            const float& kernelValue = tex1Dfetch<float>(kernelTexture, kernelIndex);
+                            const float kernelValue = tex1Dfetch<float>(kernelTexture, kernelIndex);
                             intensitySum += kernelValue * bufferIntensityPtr[k];
                             densitySum += kernelValue * bufferDensityPtr[k];
                         }
@@ -228,12 +228,12 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
 
         // Normalise per time point
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
-            const bool& nanImageVal = tex1Dfetch<char>(nanImageTexture, index);
+            const bool nanImageVal = tex1Dfetch<char>(nanImageTexture, index);
             if (nanImageVal) {
                 reinterpret_cast<float*>(&imageCuda[index])[t] = std::numeric_limits<float>::quiet_NaN();
             } else {
-                const float& intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
-                const float& densityVal = tex1Dfetch<float>(densityTexture, index);
+                const float intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
+                const float densityVal = tex1Dfetch<float>(densityTexture, index);
                 reinterpret_cast<float*>(&imageCuda[index])[t] = intensityVal / densityVal;
             }
         });
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index 4cdfbdc8..793aa61a 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -4,7 +4,7 @@
 #include "_reg_ssd_gpu.h"
 
 /* *************************************************************** */
-reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
+reg_measure* CudaMeasure::Create(const MeasureType measureType) {
     switch (measureType) {
     case MeasureType::Nmi:
         return new reg_nmi_gpu();
diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h
index 928f4fc4..76f73900 100644
--- a/reg-lib/cuda/CudaMeasure.h
+++ b/reg-lib/cuda/CudaMeasure.h
@@ -4,6 +4,6 @@
 
 class CudaMeasure: public Measure {
 public:
-    virtual reg_measure* Create(const MeasureType& measureType) override;
+    virtual reg_measure* Create(const MeasureType measureType) override;
     virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr) override;
 };
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index c61ecb13..85a250a5 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -9,7 +9,7 @@ float GetMaximalLength(const float4 *imageCuda, const size_t nVoxels) {
     auto imageTexture = *imageTexturePtr;
     thrust::counting_iterator<unsigned> index(0);
     return thrust::transform_reduce(thrust::device, index, index + nVoxels, [=]__device__(const unsigned index) {
-        const float4& val = tex1Dfetch<float4>(imageTexture, index);
+        const float4 val = tex1Dfetch<float4>(imageTexture, index);
         return sqrtf((optimiseX ? Square(val.x) : 0) +
                      (optimiseY ? Square(val.y) : 0) +
                      (optimiseZ ? Square(val.z) : 0));
@@ -51,7 +51,7 @@ void NormaliseGradient(float4 *imageCuda, const size_t nVoxels, const double max
                                                      nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
     auto imageTexture = *imageTexturePtr;
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), nVoxels, [=]__device__(const unsigned index) {
-        const float4& val = tex1Dfetch<float4>(imageTexture, index);
+        const float4 val = tex1Dfetch<float4>(imageTexture, index);
         imageCuda[index] = make_float4(optimiseX ? val.x * maxGradLengthInv : 0,
                                        optimiseY ? val.y * maxGradLengthInv : 0,
                                        optimiseZ ? val.z * maxGradLengthInv : 0,
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 43783b4d..ee0e4bcf 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -152,7 +152,7 @@ __device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dim
     else rem = index;
     const int z = quot;
     reg_div_cuda(rem, dims.x, quot, rem);
-    const int& y = quot, &x = rem;
+    const int y = quot, x = rem;
     return { x, y, z };
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 9328aff8..569136b1 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -121,7 +121,7 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const unsigned index
                 int indexXYZ = (indexZ + b) * controlPointImageDim.x + x - 1;
                 for (int a = x - 1; a < x + 2; a++, basInd++, indexXYZ++) {
                     if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
-                    const float3& controlPointValue = make_float3(tex1Dfetch<float4>(controlPointTexture, indexXYZ));
+                    const float3 controlPointValue = make_float3(tex1Dfetch<float4>(controlPointTexture, indexXYZ));
                     secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
                     secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
                     secondDerivative.zz = secondDerivative.zz + basis.zz[basInd] * controlPointValue;
@@ -137,7 +137,7 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const unsigned index
             int indexXY = b * controlPointImageDim.x + x - 1;
             for (int a = x - 1; a < x + 2; a++, basInd++, indexXY++) {
                 if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
-                const float2& controlPointValue = make_float2(tex1Dfetch<float4>(controlPointTexture, indexXY));
+                const float2 controlPointValue = make_float2(tex1Dfetch<float4>(controlPointTexture, indexXY));
                 secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
                 secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
                 secondDerivative.xy = secondDerivative.xy + basis.xy[basInd] * controlPointValue;
@@ -243,17 +243,17 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
                     int indexXYZ = ((indexZ + b) * controlPointImageDim.x + x - 1) * 6;
                     for (int a = x - 1; a < x + 2; a++, basInd++) {
                         if (a < 0 || a >= controlPointImageDim.x) { indexXYZ += 6; continue; }
-                        const float3& secondDerivativeXX = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeXX = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
-                        const float3& secondDerivativeYY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeYY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd];
-                        const float3& secondDerivativeZZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeZZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeZZ * basis.zz[basInd];
-                        const float3& secondDerivativeXY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeXY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd];
-                        const float3& secondDerivativeYZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeYZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeYZ * basis.yz[basInd];
-                        const float3& secondDerivativeXZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeXZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeXZ * basis.xz[basInd];
                     }
                 }
@@ -264,11 +264,11 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
                 int indexXY = (b * controlPointImageDim.x + x - 1) * 3;
                 for (int a = x - 1; a < x + 2; a++, basInd++) {
                     if (a < 0 || a >= controlPointImageDim.x) { indexXY += 3; continue; }
-                    const float2& secondDerivativeXX = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    const float2 secondDerivativeXX = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
                     gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
-                    const float2& secondDerivativeYY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    const float2 secondDerivativeYY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
                     gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd];
-                    const float2& secondDerivativeXY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    const float2 secondDerivativeXY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
                     gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd];
                 }
             }
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 43708ec5..342864aa 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -128,9 +128,9 @@ __device__ float4 GetSlidedValues(int x, int y,
 
     x -= newX;
     y -= newY;
-    const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
-                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
-                                             0.f, 0.f);
+    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
+                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
+                                            0.f, 0.f);
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, newY * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
@@ -159,10 +159,10 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
     x -= newX;
     y -= newY;
     z -= newZ;
-    const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2],
-                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
-                                             x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
-                                             0.f);
+    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2],
+                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
+                                            x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
+                                            0.f);
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
@@ -207,7 +207,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        const auto&& [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
+        const auto [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -243,7 +243,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
             int indexXYZ = indexYZ + nodePre.x;
             const float basisY = yBasis[sharedMemIndex + b];
             for (char a = 0; a < 4; a++, indexXYZ++) {
-                const float4& nodeCoeff = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
+                const float4 nodeCoeff = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
                 const float xyzBasis = xBasis[a] * basisY * basisZ;
                 displacement.x += xyzBasis * nodeCoeff.x;
                 displacement.y += xyzBasis * nodeCoeff.y;
@@ -288,7 +288,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        const auto&& [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
+        const auto [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -313,7 +313,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
         int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
         const float basis = yBasis[sharedMemIndex + b];
         for (char a = 0; a < 4; a++, index++) {
-            const float4& nodeCoeff = tex1Dfetch<float4>(controlPointTexture, index);
+            const float4 nodeCoeff = tex1Dfetch<float4>(controlPointTexture, index);
             const float xyBasis = xBasis[a] * basis;
             displacement.x += xyBasis * nodeCoeff.x;
             displacement.y += xyBasis * nodeCoeff.y;
@@ -1248,7 +1248,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
                                                  const int3& cppDims,
                                                  const Basis1st<is3d>& basis,
                                                  const mat33& reorientation) {
-    const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
     if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 ||
         (is3d && (z < 1 || z >= cppDims.z - 1))) return {};
 
@@ -1260,7 +1260,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
                 const int yInd = (zInd + y + b) * cppDims.x;
                 for (int a = -1; a < 2; a++, basInd++) {
                     const int index = yInd + x + a;
-                    const float4& splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+                    const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
 
                     matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
                     matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
@@ -1282,7 +1282,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
             const int yInd = (y + b) * cppDims.x;
             for (int a = -1; a < 2; a++, basInd++) {
                 const int index = yInd + x + a;
-                const float4& splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+                const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
 
                 matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
                 matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
@@ -1325,7 +1325,7 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie
                                                              const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (index >= voxelNumber) return;
-    const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
     auto gradVal = transGradient[index];
 
     if constexpr (is3d) {
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index d0c3056d..45a6616d 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -53,7 +53,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
         this->jointHistogramLogBwCudaVecs.resize(this->referenceTimePoints);
         this->jointHistogramProBwCudaVecs.resize(this->referenceTimePoints);
     }
-    for (int i = 0; i < this->referenceTimePoints; ++i) {
+    for (int i = 0; i < this->referenceTimePoints; i++) {
         if (this->timePointWeights[i] > 0) {
             this->jointHistogramLogCudaVecs[i].resize(this->totalBinNumber[i]);
             this->jointHistogramProCudaVecs[i].resize(this->totalBinNumber[i]);
@@ -92,9 +92,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
     for (int t = 0; t < referenceTimePoints; t++) {
         if (timePointWeights[t] <= 0) continue;
         NR_DEBUG("Computing NMI for time point " << t);
-        const auto& curTotalBinNumber = totalBinNumber[t];
-        const auto& curRefBinNumber = referenceBinNumber[t];
-        const auto& curFloBinNumber = floatingBinNumber[t];
+        const auto curTotalBinNumber = totalBinNumber[t];
+        const auto curRefBinNumber = referenceBinNumber[t];
+        const auto curFloBinNumber = floatingBinNumber[t];
         // Define the current histograms
         thrust::fill(thrust::device, jointHistogramLogCudaVecs[t].begin(), jointHistogramLogCudaVecs[t].end(), 0.0);
         thrust::fill(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0);
@@ -116,10 +116,10 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
                 if (refValue != refValue) return;
                 for (int r = int(refValue - 1); r < int(refValue + 3); r++) {
                     if (0 <= r && r < curRefBinNumber) {
-                        const double& refBasis = GetBasisSplineValue<double>(refValue - r);
-                        for (int w = int(warValue - 1); w < int(warValue + 3); w++) {
+                        const double refBasis = GetBasisSplineValue<double>(refValue - r);
+                        for (int w = int(warValue) - 1; w < int(warValue) + 3; w++) {
                             if (0 <= w && w < curFloBinNumber) {
-                                const double& warBasis = GetBasisSplineValue<double>(warValue - w);
+                                const double warBasis = GetBasisSplineValue<double>(warValue - w);
                                 atomicAdd(&jointHistogramProCuda[r + w * curRefBinNumber], refBasis * warBasis);
                             }
                         }
@@ -170,7 +170,7 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
             });
         }
         // Normalise the histogram
-        const double& activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus<double>());
+        const double activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus<double>());
         entropyValues[t][3] = activeVoxel;
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), curTotalBinNumber, [=]__device__(const unsigned index) {
             jointHistogramProCuda[index] /= activeVoxel;
@@ -194,9 +194,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         // Compute the entropy of the reference image
         thrust::counting_iterator<unsigned short> it(0);
         entropyValues[t][0] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber, [=]__device__(const unsigned short r) {
-            const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r];
+            const double valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r];
             if (valPro > 0) {
-                const double& valLog = log(valPro);
+                const double valLog = log(valPro);
                 jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + r] = valLog;
                 return -valPro * valLog;
             } else return 0.0;
@@ -204,9 +204,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         // Compute the entropy of the warped floating image
         it = thrust::counting_iterator<unsigned short>(0);
         entropyValues[t][1] = thrust::transform_reduce(thrust::device, it, it + curFloBinNumber, [=]__device__(const unsigned short f) {
-            const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f];
+            const double valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f];
             if (valPro > 0) {
-                const double& valLog = log(valPro);
+                const double valLog = log(valPro);
                 jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f] = valLog;
                 return -valPro * valLog;
             } else return 0.0;
@@ -214,9 +214,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         // Compute the joint entropy
         it = thrust::counting_iterator<unsigned short>(0);
         entropyValues[t][2] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber * curFloBinNumber, [=]__device__(const unsigned short index) {
-            const double& valPro = jointHistogramProCuda[index];
+            const double valPro = jointHistogramProCuda[index];
             if (valPro > 0) {
-                const double& valLog = log(valPro);
+                const double valLog = log(valPro);
                 jointHistogramLogCuda[index] = valLog;
                 return -valPro * valLog;
             } else return 0.0;
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index a97a2455..45b9f2a0 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -62,8 +62,8 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageC
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 value = controlPointImageCuda[tid];
-        const float4& bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
-        const float4& gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
+        const float4 bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
+        const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
         if (optimiseX)
             value.x = bestValue.x + scale * gradValue.x;
         if (optimiseY)
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 8a04ce12..0782a984 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -31,7 +31,7 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
     if (tid >= activeVoxelNumber) return;
     // Get the real world deformation in the floating space
     const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
+    const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
     // Get the voxel-based deformation in the floating space
     double2 voxelDeformation;
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 6c2e6c69..bf414396 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -117,8 +117,8 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
                                       const float *localWeightSimCuda,
                                       float4 *ssdGradientCuda,
                                       const int *maskCuda,
-                                      const size_t& activeVoxelNumber,
-                                      const float& timepointWeight) {
+                                      const size_t activeVoxelNumber,
+                                      const float timepointWeight) {
     // Copy the constant memory variables
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index 794c3a23..3b0255e7 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -73,7 +73,12 @@ __global__ void GetSsdGradientKernel(float4 *ssdGradient,
 
         const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
         const float common = -2.f * (refValue - warValue) * adjustedWeight * val;
-        ssdGradient[index] = ssdGradient[index] + make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f);
+
+        float4 ssdGradientValue = ssdGradient[index];
+        ssdGradientValue.x += common * spaGradientValue.x;
+        ssdGradientValue.y += common * spaGradientValue.y;
+        ssdGradientValue.z += common * spaGradientValue.z;
+        ssdGradient[index] = ssdGradientValue;
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index aa8f8c38..2a4bb2bb 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -92,7 +92,7 @@ void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ
 /* *************************************************************** */
 void reg_gaussianSmoothing_gpu(const nifti_image *image,
                                float4 *imageCuda,
-                               const float& sigma,
+                               const float sigma,
                                const bool smoothXYZ[8]) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
@@ -254,7 +254,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
     }
 }
 /* *************************************************************** */
-void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
+void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -263,7 +263,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float&
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
+void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -272,7 +272,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
+void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -281,7 +281,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
+void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -290,17 +290,17 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) {
+float reg_sumReduction_gpu(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *arrayCuda, const size_t& size) {
+float reg_maxReduction_gpu(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum<float>());
 }
 /* *************************************************************** */
-float reg_minReduction_gpu(float *arrayCuda, const size_t& size) {
+float reg_minReduction_gpu(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum<float>());
 }
@@ -328,7 +328,7 @@ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4
 }
 /* *************************************************************** */
 template<bool isMin>
-DEVICE static inline float MinMax(const float& lhs, const float& rhs) {
+DEVICE static inline float MinMax(const float lhs, const float rhs) {
     if constexpr (isMin) return lhs < rhs ? lhs : rhs;
     else return lhs > rhs ? lhs : rhs;
 }
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 6d60ea4d..7cbb1e8a 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -29,26 +29,26 @@ void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ
 /* *************************************************************** */
 void reg_gaussianSmoothing_gpu(const nifti_image *image,
                                float4 *imageCuda,
-                               const float& sigma,
+                               const float sigma,
                                const bool axisToSmooth[8]);
 /* *************************************************************** */
 void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
                                        float4 *imageCuda,
                                        const float *smoothingRadius);
 /* *************************************************************** */
-void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
+void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value);
 /* *************************************************************** */
-void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
+void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value);
 /* *************************************************************** */
-void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
+void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
-void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
+void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *arrayCuda, const size_t& size);
+float reg_sumReduction_gpu(float *arrayCuda, const size_t size);
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *arrayCuda, const size_t& size);
+float reg_maxReduction_gpu(float *arrayCuda, const size_t size);
 /* *************************************************************** */
-float reg_minReduction_gpu(float *arrayCuda, const size_t& size);
+float reg_minReduction_gpu(float *arrayCuda, const size_t size);
 /* *************************************************************** */
 void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 8782ded1..2dcf468a 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -51,7 +51,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda,
                             const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX;
                             float linearWeight = basisX[a] * basisY[b];
                             if constexpr (is3d) linearWeight *= basisZ[c];
-                            const float4& voxelValue = tex1Dfetch<float4>(voxelImageTexture, index);
+                            const float4 voxelValue = tex1Dfetch<float4>(voxelImageTexture, index);
                             interpolatedValue[0] += linearWeight * voxelValue.x;
                             interpolatedValue[1] += linearWeight * voxelValue.y;
                             if constexpr (is3d)
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 57555e12..0a97bd01 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -131,10 +131,10 @@ class ConjugateGradientTest: public InterfaceOptimiser {
     void UpdateControlPointPosition(NiftiImage& currentDof,
                                     const NiftiImage& bestDof,
                                     const NiftiImage& gradient,
-                                    const float& scale,
-                                    const bool& optimiseX,
-                                    const bool& optimiseY,
-                                    const bool& optimiseZ) {
+                                    const float scale,
+                                    const bool optimiseX,
+                                    const bool optimiseY,
+                                    const bool optimiseZ) {
         // Update the values for the x-axis displacement
         if (optimiseX) {
             auto currentDofPtr = currentDof.data(0);
@@ -161,7 +161,7 @@ class ConjugateGradientTest: public InterfaceOptimiser {
         }
     }
 
-    void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall, const bool& isSymmetric, NiftiImage *gradientBw) {
+    void UpdateGradientValues(NiftiImage& gradient, const bool firstCall, const bool isSymmetric, NiftiImage *gradientBw) {
         // Create array1 and array2
         static NiftiImage array1, array1Bw;
         static NiftiImage array2, array2Bw;
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index e98dd2e2..528a1642 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -237,7 +237,7 @@ class LnccTest {
                         for (int i = -kernel.radius[0]; i <= kernel.radius[0]; i++) {
                             int xx = x + i;
                             if (0 <= xx && xx < ref->nx) {
-                                const double& kernelValue = *kernelPtr;
+                                const double kernelValue = *kernelPtr;
                                 const int index = (zz * ref->ny + yy) * ref->nx + xx;
                                 meanRef += kernelValue * static_cast<double>(refPtr[index]);
                                 meanFlo += kernelValue * static_cast<double>(floPtr[index]);
@@ -257,7 +257,7 @@ class LnccTest {
         const float *kernelPtr = kernel.ptr.get();
         const auto refPtr = ref.data();
         const auto floPtr = flo.data();
-        const auto& [meanRef, meanFlo] = means;
+        const auto [meanRef, meanFlo] = means;
         double varRef = 0, varFlo = 0, wdiff = 0, kernelSum = 0;
         for (int k = -kernel.radius[2]; k <= kernel.radius[2]; k++) {
             int zz = z + k;