spirit-code · GPMueller · Mar 28, 2020 · Apr 2, 2020 · Apr 6, 2020 · Apr 20, 2020
diff --git a/core/include/engine/Backend_par.hpp b/core/include/engine/Backend_par.hpp
@@ -4,6 +4,8 @@
 
 #include <engine/Vectormath_Defines.hpp>
 
+#include <tuple>
+
 #ifdef SPIRIT_USE_CUDA
     #include <cub/cub.cuh>
     #define SPIRIT_LAMBDA __device__
@@ -20,28 +22,46 @@ namespace par
 
 #ifdef SPIRIT_USE_CUDA
 
-    template<typename F>
+    // f(i) for all i
+    template<typename Lambda>
     __global__
-    void cu_apply(int N, F f)
+    void cu_apply(int N, Lambda f)
     {
         int idx = blockIdx.x * blockDim.x + threadIdx.x;
         if(idx < N)
             f(idx);
     }
-
-    // vf1[i] = f( vf2[i] )
-    template<typename F>
-    void apply(int N, F f)
+    // f(i) for all i
+    template<typename Lambda>
+    void apply(int N, Lambda f)
     {
         cu_apply<<<(N+1023)/1024, 1024>>>(N, f);
         CU_CHECK_AND_SYNC();
     }
 
-    template<typename F>
-    scalar reduce(int N, const F f)
+    // vf_to[i] = f(args[i]...) for all i
+    template<typename A, typename Lambda, typename... args>
+    __global__
+    void cu_assign_lambda(int N, A * vf_to, Lambda lambda, const args *... arg_fields)
+    {
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        if(idx < N)
+            vf_to[idx] = lambda(arg_fields[idx]...);
+    }
+    // field[i] = f(args[i]...) for all i
+    template<typename A, typename Lambda, typename... args>
+    void assign(field<A> & vf_to, Lambda lambda, const field<args> &... vf_args)
+    {
+        int N = vf_to.size();
+        cu_assign_lambda<<<(N+1023)/1024, 1024>>>(N, vf_to.data(), lambda, vf_args.data()...);
+        CU_CHECK_AND_SYNC();
+    }
+
+
+    template<typename Lambda>
+    scalar reduce(int N, const Lambda f)
     {
         static scalarfield sf(N, 0);
-        // Vectormath::fill(sf, 0);
 
         if(sf.size() != N)
             sf.resize(N);
@@ -63,15 +83,14 @@ namespace par
         return ret[0];
     }
 
-    template<typename A, typename F>
-    scalar reduce(const field<A> & vf1, const F f)
+    template<typename A, typename Lambda>
+    scalar reduce(int N, const Lambda f, const field<A> & vf1)
     {
         // TODO: remove the reliance on a temporary scalar field (maybe thrust::dot with generalized operations)
         // We also use this workaround for a single field as argument, because cub does not support non-commutative reduction operations
 
         int n = vf1.size();
         static scalarfield sf(n, 0);
-        // Vectormath::fill(sf, 0);
 
         if(sf.size() != vf1.size())
             sf.resize(vf1.size());
@@ -81,7 +100,6 @@ namespace par
         apply( n, [f, s, v1] SPIRIT_LAMBDA (int idx) { s[idx] = f(v1[idx]); } );
 
         static scalarfield ret(1, 0);
-        // Vectormath::fill(ret, 0);
 
         // Determine temporary storage size and allocate
         void * d_temp_storage = NULL;
@@ -96,12 +114,11 @@ namespace par
     }
 
     template<typename A, typename B, typename F>
-    scalar reduce(const field<A> & vf1, const field<B> & vf2, const F f)
+    scalar reduce(int N, const F f, const field<A> & vf1, const field<B> & vf2)
     {
         // TODO: remove the reliance on a temporary scalar field (maybe thrust::dot with generalized operations)
         int n = vf1.size();
         static scalarfield sf(n, 0);
-        // Vectormath::fill(sf, 0);
 
         if(sf.size() != vf1.size())
             sf.resize(vf1.size());
@@ -112,7 +129,6 @@ namespace par
         apply( n, [f, s, v1, v2] SPIRIT_LAMBDA (int idx) { s[idx] = f(v1[idx], v2[idx]); } );
 
         static scalarfield ret(1, 0);
-        // Vectormath::fill(ret, 0);
         // Determine temporary storage size and allocate
         void * d_temp_storage = NULL;
         size_t temp_storage_bytes = 0;
@@ -125,92 +141,59 @@ namespace par
         return ret[0];
     }
 
-
-    // vf1[i] = f( vf2[i] )
-    template<typename A, typename B, typename F>
-    __global__
-    void cu_set_lambda(A * vf1, const B * vf2, F f, int N)
-    {
-        int idx = blockIdx.x * blockDim.x + threadIdx.x;
-        if(idx < N)
-        {
-            vf1[idx] = f(vf2[idx]);
-        }
-    }
-
-    // vf1[i] = f( vf2[i] )
-    template<typename A, typename B, typename F>
-    void set(field<A> & vf1, const field<B> & vf2, F f)
-    {
-        int N = vf1.size();
-        cu_set_lambda<<<(N+1023)/1024, 1024>>>(vf1.data(), vf2.data(), f, N);
-        CU_CHECK_AND_SYNC();
-    }
-
 #else
 
+    // f(i) for all i
     template<typename F>
-    scalar reduce(int N, const F f)
+    void apply(int N, const F & f)
     {
-        scalar res = 0;
-        #pragma omp parallel for reduction(+:res)
+        #pragma omp parallel for
         for(unsigned int idx = 0; idx < N; ++idx)
-        {
-            res += f(idx);
-        }
-        return res;
+            f(idx);
     }
 
-    template<typename A, typename F>
-    scalar reduce(const field<A> & vf1, const F f)
+    // vf_to[i] = f(args[i]...) for all i
+    template<typename A, typename Lambda, typename... args> inline
+    void pointer_assign(int N, A * vf_to, Lambda lambda, const args *... vf_args)
     {
-        scalar res=0;
-        #pragma omp parallel for reduction(+:res)
-        for(unsigned int idx = 0; idx < vf1.size(); ++idx)
-        {
-            res += f(vf1[idx]);
-        }
-        return res;
+        #pragma omp parallel for
+        for(unsigned int idx = 0; idx < N; ++idx)
+            vf_to[idx] = lambda(vf_args[idx]...);
     }
-
-    // result = sum_i  f( vf1[i], vf2[i] )
-    template<typename A, typename B, typename F>
-    scalar reduce(const field<A> & vf1, const field<B> & vf2, const F & f)
+    // vf_to[i] = f(args[i]...) for all i
+    template<typename A, typename Lambda, typename... args> inline
+    void assign(field<A> & vf_to, Lambda lambda, const field<args> &... vf_args)
     {
-        scalar res=0;
-        #pragma omp parallel for reduction(+:res)
-        for(unsigned int idx = 0; idx < vf1.size(); ++idx)
-        {
-            res += f(vf1[idx], vf2[idx]);
-        }
-        return res;
+        auto N = vf_to.size();
+        // We take this umweg so that `.data()` won't be called for every element of each field
+        pointer_assign(N, vf_to.data(), lambda, vf_args.data()...);
     }
 
-    // vf1[i] = f( vf2[i] )
-    template<typename A, typename B, typename F>
-    void set(field<A> & vf1, const field<B> & vf2, const F & f)
-    {
-        #pragma omp parallel for
-        for(unsigned int idx = 0; idx < vf1.size(); ++idx)
-        {
-            vf1[idx] = f(vf2[idx]);
-        }
-    }
 
-    // f( vf1[idx], idx ) for all i
-    template<typename F>
-    void apply(int N, const F & f)
+    // result = sum_i f(args[i]...)
+    template<typename Lambda, typename... args> inline
+    scalar pointer_reduce(int N, const Lambda & lambda, const args *... vf_args)
     {
-        #pragma omp parallel for
+        scalar res = 0;
+
+        #pragma omp parallel for reduction(+:res)
         for(unsigned int idx = 0; idx < N; ++idx)
-        {
-            f(idx);
-        }
-    }
+            res += lambda(vf_args[idx]...);
 
+        return res;
+    }
+    // result = sum_i f(args[i]...)
+    template<typename Lambda, typename... args> inline
+    scalar reduce(int N, const Lambda & lambda, const field<args> &... vf_args)
+    {
+        // We take this umweg so that `.data()` won't be called for every element of each field
+        return pointer_reduce(N, lambda, vf_args.data()...);
+    }
 
 #endif
+
 } // namespace par
 } // namespace Backend
 } // namespace Engine
+
 #endif
diff --git a/core/include/engine/Hamiltonian_Heisenberg.hpp b/core/include/engine/Hamiltonian_Heisenberg.hpp
@@ -152,11 +152,12 @@ namespace Engine
         // Calculate the Quadruplet energy
         void E_Quadruplet(const vectorfield & spins, scalarfield & Energy);
 
-        private:
-        int idx_zeeman, idx_anisotropy, idx_exchange, idx_dmi, idx_ddi, idx_quadruplet;
         void Gradient_DDI_Cutoff(const vectorfield& spins, vectorfield & gradient);
         void Gradient_DDI_Direct(const vectorfield& spins, vectorfield & gradient);
         void Gradient_DDI_FFT(const vectorfield& spins, vectorfield & gradient);
+
+    private:
+        int idx_zeeman, idx_anisotropy, idx_exchange, idx_dmi, idx_ddi, idx_quadruplet;
         void E_DDI_Direct(const vectorfield& spins, scalarfield & Energy);
         void E_DDI_Cutoff(const vectorfield& spins, scalarfield & Energy);
         void E_DDI_FFT(const vectorfield& spins, scalarfield & Energy);

diff --git a/core/include/engine/Solver_Depondt.hpp b/core/include/engine/Solver_Depondt.hpp
@@ -41,12 +41,15 @@ void Method_Solver<Solver::Depondt>::Iteration ()
     {
         auto& conf           = *this->configurations[i];
         auto& conf_predictor = *this->configurations_predictor[i];
+        auto anglep = angle.data();
+        auto axisp = rotationaxis[i].data();
+        auto f_virtual = forces_virtual[i].data();
 
         // For Rotation matrix R := R( H_normed, angle )
-        Vectormath::norm( forces_virtual[i], angle );   // angle = |forces_virtual|
-
-        Vectormath::set_c_a( 1, forces_virtual[i], rotationaxis[i] );  // rotationaxis = |forces_virtual|
-        Vectormath::normalize_vectors( rotationaxis[i] );            // normalize rotation axis
+        Backend::par::apply(nos, [anglep, axisp, f_virtual] SPIRIT_LAMBDA (int idx) {
+            anglep[idx] = f_virtual[idx].norm(); // angle = |forces_virtual|
+            axisp[idx] = f_virtual[idx].normalized(); // rotationaxis = forces_virtual/|forces_virtual|
+        });
 
         // Get spin predictor n' = R(H) * n
         Vectormath::rotate( conf, rotationaxis[i], angle, conf_predictor );
@@ -60,16 +63,19 @@ void Method_Solver<Solver::Depondt>::Iteration ()
     for (int i=0; i < this->noi; i++)
     {
         auto& conf   = *this->configurations[i];
-
-        // Calculate the linear combination of the two forces_virtuals
-        Vectormath::set_c_a( 0.5, forces_virtual[i], temp1);   // H = H/2
-        Vectormath::add_c_a( 0.5, forces_virtual_predictor[i], temp1 ); // H = (H + H')/2
-
-        // Get the rotation angle as norm of temp1 ...For Rotation matrix R' := R( H'_normed, angle' )
-        Vectormath::norm( temp1, angle );   // angle' = |forces_virtual lin combination|
-
-        // Normalize temp1 to get rotation axes
-        Vectormath::normalize_vectors( temp1 );
+        auto temp1p = temp1.data();
+        auto anglep = angle.data();
+        auto f_virtual = forces_virtual[i].data();
+        auto f_virtual_predictor = forces_virtual_predictor[i].data();
+
+        Backend::par::apply(nos, [temp1p, anglep, f_virtual, f_virtual_predictor] SPIRIT_LAMBDA (int idx) {
+            // Calculate the linear combination of the two forces_virtuals
+            temp1p[idx] = 0.5 * (f_virtual[idx] + f_virtual_predictor[idx]);
+            // Get the rotation angle as norm of temp1 ...For Rotation matrix R' := R( H'_normed, angle' )
+            anglep[idx] = temp1p[idx].norm();
+            // Normalize temp1 to get rotation axes
+            temp1p[idx].normalize();
+        });
 
         // Get new spin conf n_new = R( (H+H')/2 ) * n
         Vectormath::rotate( conf, temp1, angle, conf );

diff --git a/core/include/engine/Solver_Heun.hpp b/core/include/engine/Solver_Heun.hpp
@@ -40,17 +40,14 @@ void Method_Solver<Solver::Heun>::Iteration ()
     // Predictor for each image
     for (int i = 0; i < this->noi; ++i)
     {
-        auto& conf           = *this->configurations[i];
-        auto& conf_temp      = *this->configurations_temp[i];
-        auto& conf_predictor = *this->configurations_predictor[i];
+        auto conf           = this->configurations[i]->data();
+        auto conf_predictor = this->configurations_predictor[i]->data();
+        auto f_virtual      = this->forces_virtual[i].data();
 
         // First step - Predictor
-        Vectormath::set_c_cross( -1, conf, forces_virtual[i], conf_temp );  // temp1 = -( conf x A )
-        Vectormath::set_c_a( 1, conf, conf_predictor );                   // configurations_predictor = conf
-        Vectormath::add_c_a( 1, conf_temp, conf_predictor );         // configurations_predictor = conf + dt*temp1
-
-        // Normalize spins
-        Vectormath::normalize_vectors( conf_predictor );
+        Backend::par::apply( nos, [conf, conf_predictor, f_virtual] SPIRIT_LAMBDA (int idx) {
+            conf_predictor[idx] = ( conf[idx] - conf[idx].cross( f_virtual[idx] ) ).normalized();
+        } );
     }
 
     // Calculate_Force for the Corrector
@@ -60,21 +57,17 @@ void Method_Solver<Solver::Heun>::Iteration ()
     // Corrector step for each image
     for (int i=0; i < this->noi; i++)
     {
-        auto& conf           = *this->configurations[i];
-        auto& conf_temp      = *this->configurations_temp[i];
-        auto& conf_predictor = *this->configurations_predictor[i];
+        auto conf           = this->configurations[i]->data();
+        auto conf_predictor = this->configurations_predictor[i]->data();
+        auto f_virtual      = this->forces_virtual[i].data();
+        auto f_virtual_predictor = this->forces_virtual_predictor[i].data();
 
         // Second step - Corrector
-        Vectormath::scale( conf_temp, 0.5 );                                     // configurations_temp = 0.5 * configurations_temp
-        Vectormath::add_c_a( 1, conf, conf_temp );                               // configurations_temp = conf + 0.5 * configurations_temp
-        Vectormath::set_c_cross( -1, conf_predictor, forces_virtual_predictor[i], temp1 );   // temp1 = - ( conf' x A' )
-        Vectormath::add_c_a( 0.5, temp1, conf_temp );                            // configurations_temp = conf + 0.5 * configurations_temp + 0.5 * temp1
-
-        // Normalize spins
-        Vectormath::normalize_vectors( conf_temp );
-
-        // Copy out
-        conf = conf_temp;
+        Backend::par::apply( nos, [conf, conf_predictor, f_virtual, f_virtual_predictor] SPIRIT_LAMBDA (int idx) {
+            conf[idx] = (
+                conf[idx] - 0.5 * ( conf[idx].cross( f_virtual[idx] ) + conf_predictor[idx].cross(f_virtual_predictor[idx]) )
+            ).normalized();
+        } );
     }
 };