diff --git a/core/include/engine/Backend_par.hpp b/core/include/engine/Backend_par.hpp index 8099aa5ec..b2312e1b5 100644 --- a/core/include/engine/Backend_par.hpp +++ b/core/include/engine/Backend_par.hpp @@ -4,6 +4,8 @@ #include +#include + #ifdef SPIRIT_USE_CUDA #include #define SPIRIT_LAMBDA __device__ @@ -20,28 +22,46 @@ namespace par #ifdef SPIRIT_USE_CUDA - template + // f(i) for all i + template __global__ - void cu_apply(int N, F f) + void cu_apply(int N, Lambda f) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if(idx < N) f(idx); } - - // vf1[i] = f( vf2[i] ) - template - void apply(int N, F f) + // f(i) for all i + template + void apply(int N, Lambda f) { cu_apply<<<(N+1023)/1024, 1024>>>(N, f); CU_CHECK_AND_SYNC(); } - template - scalar reduce(int N, const F f) + // vf_to[i] = f(args[i]...) for all i + template + __global__ + void cu_assign_lambda(int N, A * vf_to, Lambda lambda, const args *... arg_fields) + { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if(idx < N) + vf_to[idx] = lambda(arg_fields[idx]...); + } + // field[i] = f(args[i]...) for all i + template + void assign(field & vf_to, Lambda lambda, const field &... vf_args) + { + int N = vf_to.size(); + cu_assign_lambda<<<(N+1023)/1024, 1024>>>(N, vf_to.data(), lambda, vf_args.data()...); + CU_CHECK_AND_SYNC(); + } + + + template + scalar reduce(int N, const Lambda f) { static scalarfield sf(N, 0); - // Vectormath::fill(sf, 0); if(sf.size() != N) sf.resize(N); @@ -63,15 +83,14 @@ namespace par return ret[0]; } - template - scalar reduce(const field & vf1, const F f) + template + scalar reduce(int N, const Lambda f, const field & vf1) { // TODO: remove the reliance on a temporary scalar field (maybe thrust::dot with generalized operations) // We also use this workaround for a single field as argument, because cub does not support non-commutative reduction operations int n = vf1.size(); static scalarfield sf(n, 0); - // Vectormath::fill(sf, 0); if(sf.size() != vf1.size()) sf.resize(vf1.size()); @@ -81,7 +100,6 @@ namespace par apply( n, [f, s, v1] SPIRIT_LAMBDA (int idx) { s[idx] = f(v1[idx]); } ); static scalarfield ret(1, 0); - // Vectormath::fill(ret, 0); // Determine temporary storage size and allocate void * d_temp_storage = NULL; @@ -96,12 +114,11 @@ namespace par } template - scalar reduce(const field & vf1, const field & vf2, const F f) + scalar reduce(int N, const F f, const field & vf1, const field & vf2) { // TODO: remove the reliance on a temporary scalar field (maybe thrust::dot with generalized operations) int n = vf1.size(); static scalarfield sf(n, 0); - // Vectormath::fill(sf, 0); if(sf.size() != vf1.size()) sf.resize(vf1.size()); @@ -112,7 +129,6 @@ namespace par apply( n, [f, s, v1, v2] SPIRIT_LAMBDA (int idx) { s[idx] = f(v1[idx], v2[idx]); } ); static scalarfield ret(1, 0); - // Vectormath::fill(ret, 0); // Determine temporary storage size and allocate void * d_temp_storage = NULL; size_t temp_storage_bytes = 0; @@ -125,92 +141,59 @@ namespace par return ret[0]; } - - // vf1[i] = f( vf2[i] ) - template - __global__ - void cu_set_lambda(A * vf1, const B * vf2, F f, int N) - { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx < N) - { - vf1[idx] = f(vf2[idx]); - } - } - - // vf1[i] = f( vf2[i] ) - template - void set(field & vf1, const field & vf2, F f) - { - int N = vf1.size(); - cu_set_lambda<<<(N+1023)/1024, 1024>>>(vf1.data(), vf2.data(), f, N); - CU_CHECK_AND_SYNC(); - } - #else + // f(i) for all i template - scalar reduce(int N, const F f) + void apply(int N, const F & f) { - scalar res = 0; - #pragma omp parallel for reduction(+:res) + #pragma omp parallel for for(unsigned int idx = 0; idx < N; ++idx) - { - res += f(idx); - } - return res; + f(idx); } - template - scalar reduce(const field & vf1, const F f) + // vf_to[i] = f(args[i]...) for all i + template inline + void pointer_assign(int N, A * vf_to, Lambda lambda, const args *... vf_args) { - scalar res=0; - #pragma omp parallel for reduction(+:res) - for(unsigned int idx = 0; idx < vf1.size(); ++idx) - { - res += f(vf1[idx]); - } - return res; + #pragma omp parallel for + for(unsigned int idx = 0; idx < N; ++idx) + vf_to[idx] = lambda(vf_args[idx]...); } - - // result = sum_i f( vf1[i], vf2[i] ) - template - scalar reduce(const field & vf1, const field & vf2, const F & f) + // vf_to[i] = f(args[i]...) for all i + template inline + void assign(field & vf_to, Lambda lambda, const field &... vf_args) { - scalar res=0; - #pragma omp parallel for reduction(+:res) - for(unsigned int idx = 0; idx < vf1.size(); ++idx) - { - res += f(vf1[idx], vf2[idx]); - } - return res; + auto N = vf_to.size(); + // We take this umweg so that `.data()` won't be called for every element of each field + pointer_assign(N, vf_to.data(), lambda, vf_args.data()...); } - // vf1[i] = f( vf2[i] ) - template - void set(field & vf1, const field & vf2, const F & f) - { - #pragma omp parallel for - for(unsigned int idx = 0; idx < vf1.size(); ++idx) - { - vf1[idx] = f(vf2[idx]); - } - } - // f( vf1[idx], idx ) for all i - template - void apply(int N, const F & f) + // result = sum_i f(args[i]...) + template inline + scalar pointer_reduce(int N, const Lambda & lambda, const args *... vf_args) { - #pragma omp parallel for + scalar res = 0; + + #pragma omp parallel for reduction(+:res) for(unsigned int idx = 0; idx < N; ++idx) - { - f(idx); - } - } + res += lambda(vf_args[idx]...); + return res; + } + // result = sum_i f(args[i]...) + template inline + scalar reduce(int N, const Lambda & lambda, const field &... vf_args) + { + // We take this umweg so that `.data()` won't be called for every element of each field + return pointer_reduce(N, lambda, vf_args.data()...); + } #endif + } // namespace par } // namespace Backend } // namespace Engine + #endif \ No newline at end of file diff --git a/core/include/engine/Hamiltonian_Heisenberg.hpp b/core/include/engine/Hamiltonian_Heisenberg.hpp index e0cfa4008..918a84634 100644 --- a/core/include/engine/Hamiltonian_Heisenberg.hpp +++ b/core/include/engine/Hamiltonian_Heisenberg.hpp @@ -152,11 +152,12 @@ namespace Engine // Calculate the Quadruplet energy void E_Quadruplet(const vectorfield & spins, scalarfield & Energy); - private: - int idx_zeeman, idx_anisotropy, idx_exchange, idx_dmi, idx_ddi, idx_quadruplet; void Gradient_DDI_Cutoff(const vectorfield& spins, vectorfield & gradient); void Gradient_DDI_Direct(const vectorfield& spins, vectorfield & gradient); void Gradient_DDI_FFT(const vectorfield& spins, vectorfield & gradient); + + private: + int idx_zeeman, idx_anisotropy, idx_exchange, idx_dmi, idx_ddi, idx_quadruplet; void E_DDI_Direct(const vectorfield& spins, scalarfield & Energy); void E_DDI_Cutoff(const vectorfield& spins, scalarfield & Energy); void E_DDI_FFT(const vectorfield& spins, scalarfield & Energy); diff --git a/core/include/engine/Solver_Depondt.hpp b/core/include/engine/Solver_Depondt.hpp index 88435a53b..cef342cfd 100644 --- a/core/include/engine/Solver_Depondt.hpp +++ b/core/include/engine/Solver_Depondt.hpp @@ -41,12 +41,15 @@ void Method_Solver::Iteration () { auto& conf = *this->configurations[i]; auto& conf_predictor = *this->configurations_predictor[i]; + auto anglep = angle.data(); + auto axisp = rotationaxis[i].data(); + auto f_virtual = forces_virtual[i].data(); // For Rotation matrix R := R( H_normed, angle ) - Vectormath::norm( forces_virtual[i], angle ); // angle = |forces_virtual| - - Vectormath::set_c_a( 1, forces_virtual[i], rotationaxis[i] ); // rotationaxis = |forces_virtual| - Vectormath::normalize_vectors( rotationaxis[i] ); // normalize rotation axis + Backend::par::apply(nos, [anglep, axisp, f_virtual] SPIRIT_LAMBDA (int idx) { + anglep[idx] = f_virtual[idx].norm(); // angle = |forces_virtual| + axisp[idx] = f_virtual[idx].normalized(); // rotationaxis = forces_virtual/|forces_virtual| + }); // Get spin predictor n' = R(H) * n Vectormath::rotate( conf, rotationaxis[i], angle, conf_predictor ); @@ -60,16 +63,19 @@ void Method_Solver::Iteration () for (int i=0; i < this->noi; i++) { auto& conf = *this->configurations[i]; - - // Calculate the linear combination of the two forces_virtuals - Vectormath::set_c_a( 0.5, forces_virtual[i], temp1); // H = H/2 - Vectormath::add_c_a( 0.5, forces_virtual_predictor[i], temp1 ); // H = (H + H')/2 - - // Get the rotation angle as norm of temp1 ...For Rotation matrix R' := R( H'_normed, angle' ) - Vectormath::norm( temp1, angle ); // angle' = |forces_virtual lin combination| - - // Normalize temp1 to get rotation axes - Vectormath::normalize_vectors( temp1 ); + auto temp1p = temp1.data(); + auto anglep = angle.data(); + auto f_virtual = forces_virtual[i].data(); + auto f_virtual_predictor = forces_virtual_predictor[i].data(); + + Backend::par::apply(nos, [temp1p, anglep, f_virtual, f_virtual_predictor] SPIRIT_LAMBDA (int idx) { + // Calculate the linear combination of the two forces_virtuals + temp1p[idx] = 0.5 * (f_virtual[idx] + f_virtual_predictor[idx]); + // Get the rotation angle as norm of temp1 ...For Rotation matrix R' := R( H'_normed, angle' ) + anglep[idx] = temp1p[idx].norm(); + // Normalize temp1 to get rotation axes + temp1p[idx].normalize(); + }); // Get new spin conf n_new = R( (H+H')/2 ) * n Vectormath::rotate( conf, temp1, angle, conf ); diff --git a/core/include/engine/Solver_Heun.hpp b/core/include/engine/Solver_Heun.hpp index 8a403b759..d72820c60 100644 --- a/core/include/engine/Solver_Heun.hpp +++ b/core/include/engine/Solver_Heun.hpp @@ -40,17 +40,14 @@ void Method_Solver::Iteration () // Predictor for each image for (int i = 0; i < this->noi; ++i) { - auto& conf = *this->configurations[i]; - auto& conf_temp = *this->configurations_temp[i]; - auto& conf_predictor = *this->configurations_predictor[i]; + auto conf = this->configurations[i]->data(); + auto conf_predictor = this->configurations_predictor[i]->data(); + auto f_virtual = this->forces_virtual[i].data(); // First step - Predictor - Vectormath::set_c_cross( -1, conf, forces_virtual[i], conf_temp ); // temp1 = -( conf x A ) - Vectormath::set_c_a( 1, conf, conf_predictor ); // configurations_predictor = conf - Vectormath::add_c_a( 1, conf_temp, conf_predictor ); // configurations_predictor = conf + dt*temp1 - - // Normalize spins - Vectormath::normalize_vectors( conf_predictor ); + Backend::par::apply( nos, [conf, conf_predictor, f_virtual] SPIRIT_LAMBDA (int idx) { + conf_predictor[idx] = ( conf[idx] - conf[idx].cross( f_virtual[idx] ) ).normalized(); + } ); } // Calculate_Force for the Corrector @@ -60,21 +57,17 @@ void Method_Solver::Iteration () // Corrector step for each image for (int i=0; i < this->noi; i++) { - auto& conf = *this->configurations[i]; - auto& conf_temp = *this->configurations_temp[i]; - auto& conf_predictor = *this->configurations_predictor[i]; + auto conf = this->configurations[i]->data(); + auto conf_predictor = this->configurations_predictor[i]->data(); + auto f_virtual = this->forces_virtual[i].data(); + auto f_virtual_predictor = this->forces_virtual_predictor[i].data(); // Second step - Corrector - Vectormath::scale( conf_temp, 0.5 ); // configurations_temp = 0.5 * configurations_temp - Vectormath::add_c_a( 1, conf, conf_temp ); // configurations_temp = conf + 0.5 * configurations_temp - Vectormath::set_c_cross( -1, conf_predictor, forces_virtual_predictor[i], temp1 ); // temp1 = - ( conf' x A' ) - Vectormath::add_c_a( 0.5, temp1, conf_temp ); // configurations_temp = conf + 0.5 * configurations_temp + 0.5 * temp1 - - // Normalize spins - Vectormath::normalize_vectors( conf_temp ); - - // Copy out - conf = conf_temp; + Backend::par::apply( nos, [conf, conf_predictor, f_virtual, f_virtual_predictor] SPIRIT_LAMBDA (int idx) { + conf[idx] = ( + conf[idx] - 0.5 * ( conf[idx].cross( f_virtual[idx] ) + conf_predictor[idx].cross(f_virtual_predictor[idx]) ) + ).normalized(); + } ); } }; diff --git a/core/include/engine/Solver_Kernels.hpp b/core/include/engine/Solver_Kernels.hpp index 7a1cb343b..a703ddb16 100644 --- a/core/include/engine/Solver_Kernels.hpp +++ b/core/include/engine/Solver_Kernels.hpp @@ -38,8 +38,8 @@ namespace Solver_Kernels std::vector>> & delta_a, std::vector>> & delta_grad, const std::vector> & grad, std::vector> & grad_pr, const int num_mem, const scalar maxmove) { // std::cerr << "lbfgs searchdir \n"; - static auto dot = [] SPIRIT_LAMBDA (const Vec & v1, const Vec &v2) {return v1.dot(v2);}; - static auto set = [] SPIRIT_LAMBDA (const Vec & x) {return x;}; + static auto set = [] SPIRIT_LAMBDA (const Vec & x) -> Vec {return x;}; + static auto dot = [] SPIRIT_LAMBDA (const Vec & v1, const Vec &v2) -> scalar {return v1.dot(v2);}; scalar epsilon = sizeof(scalar) == sizeof(float) ? 1e-30 : 1e-300; @@ -50,11 +50,15 @@ namespace Solver_Kernels if (local_iter == 0) // gradient descent { - for(int img=0; img epsilon) rho[m_index] = 1.0 / rinv_temp; @@ -96,14 +102,14 @@ namespace Solver_Kernels } for (int img=0; img -1; k--) { c_ind = (k + m_index + 1) % num_mem; scalar temp=0; for (int img=0; img::Iteration() // Scale by averaging for(int img=0; imgatlas_directions[img], [] SPIRIT_LAMBDA (const Vector2 & v){ return v.squaredNorm(); }) / nos ))); + a_norm_rms = std::max( a_norm_rms, scalar( sqrt( + Backend::par::reduce(nos, [] SPIRIT_LAMBDA (const Vector2 & v){ return v.squaredNorm(); }, this->atlas_directions[img]) / nos + ))); } scalar scaling = (a_norm_rms > maxmove) ? maxmove/a_norm_rms : 1.0; diff --git a/core/include/engine/Solver_RK4.hpp b/core/include/engine/Solver_RK4.hpp index 19a8bcfa9..a16ecd826 100644 --- a/core/include/engine/Solver_RK4.hpp +++ b/core/include/engine/Solver_RK4.hpp @@ -51,19 +51,15 @@ void Method_Solver::Iteration () // Predictor for each image for (int i = 0; i < this->noi; ++i) { - auto& conf = *this->configurations[i]; - auto& k1 = *this->configurations_k1[i]; - auto& conf_predictor = *this->configurations_predictor[i]; - auto& force = this->forces_virtual[i]; - - // k1 - Vectormath::set_c_cross( -1, conf, force, k1 ); - - // Predictor for k2 - Vectormath::set_c_a( 1, conf, conf_predictor ); - Vectormath::add_c_a( 0.5, k1, conf_predictor ); - // Normalize - Vectormath::normalize_vectors( conf_predictor ); + auto conf = this->configurations[i]->data(); + auto conf_predictor = this->configurations_predictor[i]->data(); + auto force = this->forces_virtual[i].data(); + auto k1 = this->configurations_k1[i]->data(); + + Backend::par::apply( nos, [conf, conf_predictor, force, k1] SPIRIT_LAMBDA (int idx) { + k1[idx] = - conf[idx].cross(force[idx]); + conf_predictor[idx] = (conf[idx] + 0.5*k1[idx]).normalized(); + } ); } // Calculate_Force for the predictor @@ -73,19 +69,15 @@ void Method_Solver::Iteration () // Predictor for each image for (int i = 0; i < this->noi; ++i) { - auto& conf = *this->configurations[i]; - auto& k2 = *this->configurations_k2[i]; - auto& conf_predictor = *this->configurations_predictor[i]; - auto& force = this->forces_virtual_predictor[i]; - - // k2 - Vectormath::set_c_cross( -1, conf_predictor, force, k2 ); - - // Predictor for k3 - Vectormath::set_c_a( 1, conf, conf_predictor ); - Vectormath::add_c_a( 0.5, k2, conf_predictor ); - // Normalize - Vectormath::normalize_vectors( conf_predictor ); + auto conf = this->configurations[i]->data(); + auto conf_predictor = this->configurations_predictor[i]->data(); + auto force = this->forces_virtual_predictor[i].data(); + auto k2 = this->configurations_k2[i]->data(); + + Backend::par::apply( nos, [conf, conf_predictor, force, k2] SPIRIT_LAMBDA (int idx) { + k2[idx] = - conf_predictor[idx].cross(force[idx]); + conf_predictor[idx] = (conf[idx] + 0.5*k2[idx]).normalized(); + } ); } // Calculate_Force for the predictor (k3) @@ -95,19 +87,15 @@ void Method_Solver::Iteration () // Predictor for each image for (int i = 0; i < this->noi; ++i) { - auto& conf = *this->configurations[i]; - auto& k3 = *this->configurations_k3[i]; - auto& conf_predictor = *this->configurations_predictor[i]; - auto& force = this->forces_virtual_predictor[i]; - - // k3 - Vectormath::set_c_cross( -1, conf_predictor, force, k3 ); - - // Predictor for k4 - Vectormath::set_c_a( 1, conf, conf_predictor ); - Vectormath::add_c_a( 1, k3, conf_predictor ); - // Normalize - Vectormath::normalize_vectors( conf_predictor ); + auto conf = this->configurations[i]->data(); + auto conf_predictor = this->configurations_predictor[i]->data(); + auto force = this->forces_virtual_predictor[i].data(); + auto k3 = this->configurations_k3[i]->data(); + + Backend::par::apply( nos, [conf, conf_predictor, force, k3] SPIRIT_LAMBDA (int idx) { + k3[idx] = - conf_predictor[idx].cross(force[idx]); + conf_predictor[idx] = (conf[idx] + k3[idx]).normalized(); + } ); } // Calculate_Force for the predictor (k4) @@ -117,30 +105,18 @@ void Method_Solver::Iteration () // Corrector step for each image for (int i=0; i < this->noi; i++) { - auto& conf = *this->configurations[i]; - auto& k1 = *this->configurations_k1[i]; - auto& k2 = *this->configurations_k2[i]; - auto& k3 = *this->configurations_k3[i]; - auto& k4 = *this->configurations_k4[i]; - auto& conf_predictor = *this->configurations_predictor[i]; - auto& conf_temp = *this->configurations_temp[i]; - auto& force = this->forces_virtual_predictor[i]; - - // k4 - Vectormath::set_c_cross( -1, conf_predictor, force, k4 ); - - // 4th order Runge Kutta step - Vectormath::set_c_a( 1, conf, conf_temp ); - Vectormath::add_c_a( 1.0/6.0, k1, conf_temp ); - Vectormath::add_c_a( 1.0/3.0, k2, conf_temp ); - Vectormath::add_c_a( 1.0/3.0, k3, conf_temp ); - Vectormath::add_c_a( 1.0/6.0, k4, conf_temp ); - - // Normalize spins - Vectormath::normalize_vectors( conf_temp ); - - // Copy out - conf = conf_temp; + auto conf = this->configurations[i]->data(); + auto conf_predictor = this->configurations_predictor[i]->data(); + auto force = this->forces_virtual_predictor[i].data(); + auto k1 = this->configurations_k1[i]->data(); + auto k2 = this->configurations_k2[i]->data(); + auto k3 = this->configurations_k3[i]->data(); + auto k4 = this->configurations_k4[i]->data(); + + Backend::par::apply( nos, [conf, conf_predictor, force, k1, k2, k3, k4] SPIRIT_LAMBDA (int idx) { + k4[idx] = - conf_predictor[idx].cross(force[idx]); + conf[idx] = (conf[idx] + 1.0/6.0*k1[idx] + 1.0/3.0*k2[idx] + 1.0/3.0*k3[idx] + 1.0/6.0*k4[idx]).normalized(); + } ); } }; diff --git a/core/include/engine/Solver_SIB.hpp b/core/include/engine/Solver_SIB.hpp index 6b361502f..e79be815d 100644 --- a/core/include/engine/Solver_SIB.hpp +++ b/core/include/engine/Solver_SIB.hpp @@ -32,10 +32,13 @@ void Method_Solver::Iteration () { auto& image = *this->systems[i]->spins; auto& predictor = *this->configurations_predictor[i]; - Solver_Kernels::sib_transform(image, forces_virtual[i], predictor); - Vectormath::add_c_a(1, image, predictor); - Vectormath::scale(predictor, 0.5); + + auto imagep = this->systems[i]->spins->data(); + auto predictorp = this->configurations_predictor[i]->data(); + Backend::par::apply( nos, [imagep, predictorp] SPIRIT_LAMBDA (int idx) { + predictorp[idx] = 0.5 * (imagep[idx] + predictorp[idx]); + } ); } // Second part of the step @@ -43,8 +46,7 @@ void Method_Solver::Iteration () this->Calculate_Force_Virtual(this->configurations_predictor, this->forces_predictor, this->forces_virtual_predictor); for (int i = 0; i < this->noi; ++i) { - auto& image = *this->systems[i]->spins; - + auto& image = *this->systems[i]->spins; Solver_Kernels::sib_transform(image, forces_virtual_predictor[i], image); } }; diff --git a/core/include/engine/Solver_VP.hpp b/core/include/engine/Solver_VP.hpp index 8be52fa76..8c3c1556a 100644 --- a/core/include/engine/Solver_VP.hpp +++ b/core/include/engine/Solver_VP.hpp @@ -20,11 +20,11 @@ void Method_Solver::Initialize () /* Template instantiation of the Simulation class for use with the VP Solver. - The velocity projection method is often efficient for direct minimization, - but deals poorly with quickly varying fields or stochastic noise. - Paper: P. F. Bessarab et al., Method for finding mechanism and activation energy - of magnetic transitions, applied to skyrmion and antivortex annihilation, - Comp. Phys. Comm. 196, 335 (2015). + The velocity projection method is often efficient for direct minimization, + but deals poorly with quickly varying fields or stochastic noise. + Paper: P. F. Bessarab et al., Method for finding mechanism and activation energy + of magnetic transitions, applied to skyrmion and antivortex annihilation, + Comp. Phys. Comm. 196, 335 (2015). */ template <> inline void Method_Solver::Iteration () @@ -39,7 +39,6 @@ void Method_Solver::Iteration () auto f_pr = forces_previous[i].data(); auto v = velocities[i].data(); auto v_pr = velocities_previous[i].data(); - Backend::par::apply( forces[i].size(), [f, f_pr, v, v_pr] SPIRIT_LAMBDA (int idx) { f_pr[idx] = f[idx]; v_pr[idx] = v[idx]; @@ -52,21 +51,17 @@ void Method_Solver::Iteration () for (int i = 0; i < noi; ++i) { - auto& velocity = velocities[i]; - auto& force = forces[i]; - auto& force_prev = forces_previous[i]; - auto f = forces[i].data(); auto f_pr = forces_previous[i].data(); auto v = velocities[i].data(); auto m_temp = this->m; - - // Calculate the new velocity - Backend::par::apply(force.size(), [f,f_pr,v,m_temp] SPIRIT_LAMBDA (int idx) { + Backend::par::apply(nos, [f,f_pr,v,m_temp] SPIRIT_LAMBDA (int idx) { v[idx] += 0.5/m_temp * (f_pr[idx] + f[idx]); }); // Get the projection of the velocity on the force + auto& velocity = velocities[i]; + auto& force = forces[i]; projection[i] = Vectormath::dot(velocity, force); force_norm2[i] = Vectormath::dot(force, force); } @@ -77,11 +72,6 @@ void Method_Solver::Iteration () } for (int i = 0; i < noi; ++i) { - auto& velocity = velocities[i]; - auto& force = forces[i]; - auto& configuration = *(configurations[i]); - auto& configuration_temp = *(configurations_temp[i]); - auto f = forces[i].data(); auto v = velocities[i].data(); auto conf = (configurations[i])->data(); @@ -93,18 +83,18 @@ void Method_Solver::Iteration () // Calculate the projected velocity if (projection_full <= 0) + Backend::par::assign(velocities[i], [] SPIRIT_LAMBDA () -> Vector3 {return {0, 0, 0};}); + else { - Vectormath::fill(velocity, { 0,0,0 }); - } else { - Backend::par::apply(force.size(), [f,v,ratio] SPIRIT_LAMBDA (int idx) { + Backend::par::apply(nos, [f,v,ratio] SPIRIT_LAMBDA (int idx) { v[idx] = f[idx] * ratio; }); } - Backend::par::apply( force.size(), [conf, conf_temp, dt, m_temp, v, f] SPIRIT_LAMBDA (int idx) { + Backend::par::apply( nos, [conf, conf_temp, dt, m_temp, v, f] SPIRIT_LAMBDA (int idx) { conf_temp[idx] = conf[idx] + dt * v[idx] + 0.5/m_temp * dt * f[idx]; conf[idx] = conf_temp[idx].normalized(); - }); + }); } }; diff --git a/core/include/engine/Solver_VP_OSO.hpp b/core/include/engine/Solver_VP_OSO.hpp index de8034b2c..f500b763f 100644 --- a/core/include/engine/Solver_VP_OSO.hpp +++ b/core/include/engine/Solver_VP_OSO.hpp @@ -21,11 +21,11 @@ void Method_Solver::Initialize () /* Template instantiation of the Simulation class for use with the VP Solver. - The velocity projection method is often efficient for direct minimization, - but deals poorly with quickly varying fields or stochastic noise. - Paper: P. F. Bessarab et al., Method for finding mechanism and activation energy - of magnetic transitions, applied to skyrmion and antivortex annihilation, - Comp. Phys. Comm. 196, 335 (2015). + The velocity projection method is often efficient for direct minimization, + but deals poorly with quickly varying fields or stochastic noise. + Paper: P. F. Bessarab et al., Method for finding mechanism and activation energy + of magnetic transitions, applied to skyrmion and antivortex annihilation, + Comp. Phys. Comm. 196, 335 (2015). Instead of the cartesian update scheme with re-normalization, this implementation uses the orthogonal spin optimization scheme, described by A. Ivanov in https://arxiv.org/abs/1904.02669. @@ -97,9 +97,9 @@ void Method_Solver::Iteration () // Calculate the projected velocity if (projection_full <= 0) + Backend::par::assign(velocities[img], [] SPIRIT_LAMBDA () -> Vector3 {return {0, 0, 0};}); + else { - Vectormath::fill(velocities[img], { 0,0,0 }); - } else { Backend::par::apply(nos, [g,v,ratio] SPIRIT_LAMBDA (int idx) { v[idx] = g[idx] * ratio; }); @@ -107,7 +107,7 @@ void Method_Solver::Iteration () Backend::par::apply( nos, [sd, dt, m_temp, v, g] SPIRIT_LAMBDA (int idx) { sd[idx] = dt * v[idx] + 0.5/m_temp * dt * g[idx]; - }); + }); } Solver_Kernels::oso_rotate( this->configurations, this->searchdir); } diff --git a/core/include/engine/Vectormath.hpp b/core/include/engine/Vectormath.hpp index 077f02bf9..2eb94d476 100644 --- a/core/include/engine/Vectormath.hpp +++ b/core/include/engine/Vectormath.hpp @@ -550,14 +550,6 @@ namespace Engine ///////////////////////////////////////////////////////////////// //////// Vectormath-like operations - // sets sf := s - // sf is a scalarfield - // s is a scalar - void fill(scalarfield & sf, scalar s); - - // TODO: Add the test - void fill(scalarfield & sf, scalar s, const intfield & mask); - // Scale a scalarfield by a given value void scale(scalarfield & sf, scalar s); @@ -573,14 +565,6 @@ namespace Engine // Cut off all values to remain in a certain range void set_range(scalarfield & sf, scalar sf_min, scalar sf_max); - // sets vf := v - // vf is a vectorfield - // v is a vector - void fill(vectorfield & vf, const Vector3 & v); - void fill(vectorfield & vf, const Vector3 & v, const intfield & mask); - - // Normalize the vectors of a vectorfield - void normalize_vectors(vectorfield & vf); // Get the norm of a vectorfield void norm( const vectorfield & vf, scalarfield & norm ); diff --git a/core/src/engine/Hamiltonian_Gaussian.cpp b/core/src/engine/Hamiltonian_Gaussian.cpp index 55a22129d..616ca7bda 100644 --- a/core/src/engine/Hamiltonian_Gaussian.cpp +++ b/core/src/engine/Hamiltonian_Gaussian.cpp @@ -1,5 +1,6 @@ #include #include +#include #include using namespace Data; @@ -82,7 +83,8 @@ namespace Engine if (this->energy_contributions_per_spin[0].second.size() != nos) this->energy_contributions_per_spin = { { "Gaussian", scalarfield(nos,0) } }; // Set to zero - for (auto& pair : energy_contributions_per_spin) Vectormath::fill(pair.second, 0); + for (auto& pair : energy_contributions_per_spin) + Backend::par::assign(pair.second, [] SPIRIT_LAMBDA () {return 0;}); for (int i = 0; i < this->n_gaussians; ++i) { diff --git a/core/src/engine/Hamiltonian_Heisenberg.cpp b/core/src/engine/Hamiltonian_Heisenberg.cpp index ce0969bd0..981ccd7c1 100644 --- a/core/src/engine/Hamiltonian_Heisenberg.cpp +++ b/core/src/engine/Hamiltonian_Heisenberg.cpp @@ -227,9 +227,11 @@ namespace Engine for( auto& contrib : contributions ) { // Allocate if not already allocated - if (contrib.second.size() != nos) contrib.second = scalarfield(nos, 0); + if (contrib.second.size() != nos) + contrib.second = scalarfield(nos, 0); // Otherwise set to zero - else Vectormath::fill(contrib.second, 0); + else + Backend::par::assign(contrib.second, [] SPIRIT_LAMBDA () {return 0;}); } // External field @@ -339,7 +341,7 @@ namespace Engine { vectorfield gradients_temp; gradients_temp.resize(geometry->nos); - Vectormath::fill(gradients_temp, {0,0,0}); + Backend::par::assign(gradients_temp, [] SPIRIT_LAMBDA () -> Vector3 {return {0,0,0};}); this->Gradient_DDI_Direct(spins, gradients_temp); #pragma omp parallel for @@ -388,13 +390,13 @@ namespace Engine scalar Energy_DDI = 0; vectorfield gradients_temp; gradients_temp.resize(geometry->nos); - Vectormath::fill(gradients_temp, {0,0,0}); + Backend::par::assign(gradients_temp, [] SPIRIT_LAMBDA () -> Vector3 {return {0,0,0};}); this->Gradient_DDI_FFT(spins, gradients_temp); // === DEBUG: begin gradient comparison === // vectorfield gradients_temp_dir; // gradients_temp_dir.resize(this->geometry->nos); - // Vectormath::fill(gradients_temp_dir, {0,0,0}); + // Backend::par::assign(gradients_temp_dir, [] SPIRIT_LAMBDA (int idx) {return {0,0,0};}); // Gradient_DDI_Direct(spins, gradients_temp_dir); // //get deviation @@ -568,7 +570,7 @@ namespace Engine void Hamiltonian_Heisenberg::Gradient(const vectorfield & spins, vectorfield & gradient) { // Set to zero - Vectormath::fill(gradient, {0,0,0}); + Backend::par::assign(gradient, [] SPIRIT_LAMBDA () -> Vector3 {return {0,0,0};}); // External field if(idx_zeeman >= 0) @@ -597,9 +599,8 @@ namespace Engine void Hamiltonian_Heisenberg::Gradient_and_Energy(const vectorfield & spins, vectorfield & gradient, scalar & energy) { - // Set to zero - Vectormath::fill(gradient, {0,0,0}); + Backend::par::assign(gradient, [] SPIRIT_LAMBDA () { return Vector3{0,0,0}; }); energy = 0; auto N = spins.size(); @@ -623,14 +624,18 @@ namespace Engine if(idx_ddi >= 0) this->Gradient_DDI(spins, gradient); - energy += Backend::par::reduce( N, [s,g] SPIRIT_LAMBDA ( int idx ) { return 0.5 * g[idx].dot(s[idx]) ;} ); + energy += Backend::par::reduce( spins.size(), [] SPIRIT_LAMBDA ( const Vector3 & s, const Vector3 & g ) { + return 0.5 * g.dot(s); + }, spins, gradient ); // External field if(idx_zeeman >= 0) { Vector3 ext_field = external_field_normal * external_field_magnitude; this->Gradient_Zeeman(gradient); - energy += Backend::par::reduce( N, [s, ext_field, mu_s] SPIRIT_LAMBDA ( int idx ) { return -mu_s[idx] * ext_field.dot(s[idx]) ;} ); + energy += Backend::par::reduce( spins.size(), [ext_field] SPIRIT_LAMBDA ( const Vector3 & s, const scalar & mu_s ) { + return -mu_s * ext_field.dot(s); + }, spins, geometry->mu_s ); } // Quadruplets diff --git a/core/src/engine/Hamiltonian_Heisenberg.cu b/core/src/engine/Hamiltonian_Heisenberg.cu index b35a92cff..7efbcef83 100644 --- a/core/src/engine/Hamiltonian_Heisenberg.cu +++ b/core/src/engine/Hamiltonian_Heisenberg.cu @@ -225,7 +225,11 @@ namespace Engine // Allocate if not already allocated if (pair.second.size() != nos) pair.second = scalarfield(nos, 0); // Otherwise set to zero - else for (auto& pair : contributions) Vectormath::fill(pair.second, 0); + else + { + for (auto& pair : contributions) + Backend::par::assign(pair.second, [] SPIRIT_LAMBDA () {return 0;}); + } } // External field @@ -366,13 +370,11 @@ namespace Engine void Hamiltonian_Heisenberg::E_DDI_Direct(const vectorfield & spins, scalarfield & Energy) { - vectorfield gradients_temp; - gradients_temp.resize(geometry->nos); - Vectormath::fill(gradients_temp, {0,0,0}); + vectorfield gradients_temp(spins.size(), {0, 0, 0}); this->Gradient_DDI_Direct(spins, gradients_temp); #pragma omp parallel for - for (int ispin = 0; ispin < geometry->nos; ispin++) + for (int ispin = 0; ispin < spins.size(); ispin++) { Energy[ispin] += 0.5 * spins[ispin].dot(gradients_temp[ispin]); } @@ -420,17 +422,17 @@ namespace Engine } void Hamiltonian_Heisenberg::E_DDI_FFT(const vectorfield & spins, scalarfield & Energy) { + auto N = spins.size(); //todo maybe the gradient should be cached somehow, it is quite inefficient to calculate it //again just for the energy - vectorfield gradients_temp(geometry->nos); - Vectormath::fill(gradients_temp, {0,0,0}); - this->Gradient_DDI(spins, gradients_temp); - CU_E_DDI_FFT<<<(geometry->nos + 1023)/1024, 1024>>>(Energy.data(), spins.data(), gradients_temp.data(), geometry->nos, geometry->n_cell_atoms, geometry->mu_s.data()); + vectorfield gradients_temp(N, {0, 0, 0}); + this->Gradient_DDI_FFT(spins, gradients_temp); + CU_E_DDI_FFT<<<(N + 1023)/1024, 1024>>>(Energy.data(), spins.data(), gradients_temp.data(), N, geometry->n_cell_atoms, geometry->mu_s.data()); // === DEBUG: begin gradient comparison === // vectorfield gradients_temp_dir; // gradients_temp_dir.resize(this->geometry->nos); - // Vectormath::fill(gradients_temp_dir, {0,0,0}); + // Backend::par::assign(gradients_temp_dir, [] SPIRIT_LAMBDA () -> Vector3 {return {0, 0, 0};}); // Gradient_DDI_Direct(spins, gradients_temp_dir); // //get deviation @@ -622,7 +624,7 @@ namespace Engine void Hamiltonian_Heisenberg::Gradient(const vectorfield & spins, vectorfield & gradient) { // Set to zero - Vectormath::fill(gradient, {0,0,0}); + Backend::par::assign(gradient, [] SPIRIT_LAMBDA () -> Vector3 {return {0, 0, 0};}); // External field if(idx_zeeman >= 0) @@ -652,7 +654,7 @@ namespace Engine void Hamiltonian_Heisenberg::Gradient_and_Energy(const vectorfield & spins, vectorfield & gradient, scalar & energy) { // Set to zero - Vectormath::fill(gradient, {0,0,0}); + Backend::par::assign(gradient, [] SPIRIT_LAMBDA () -> Vector3 {return {0, 0, 0};}); energy = 0; auto N = spins.size(); diff --git a/core/src/engine/Method_GNEB.cpp b/core/src/engine/Method_GNEB.cpp index 37c9d1bce..94d9b7819 100644 --- a/core/src/engine/Method_GNEB.cpp +++ b/core/src/engine/Method_GNEB.cpp @@ -97,10 +97,10 @@ namespace Engine // while the gradient force is manipulated (e.g. projected) auto eff_field = this->chain->images[img]->effective_field.data(); auto f_grad = F_gradient[img].data(); - Backend::par::apply( image.size(), + Backend::par::apply( image.size(), [eff_field, f_grad] SPIRIT_LAMBDA (int idx) { - eff_field[idx] *= -1; + eff_field[idx] *= -1; f_grad[idx] = eff_field[idx]; } ); @@ -231,7 +231,7 @@ namespace Engine } else { - Vectormath::fill(F_total[img], { 0,0,0 }); + Backend::par::assign(F_total[img], [] SPIRIT_LAMBDA () -> Vector3 {return { 0,0,0 };}); } // Apply pinning mask #ifdef SPIRIT_ENABLE_PINNING @@ -373,20 +373,22 @@ namespace Engine // TODO: Find a better way to do this without so much code duplication. Probably requires extension of the Hamiltonian in a way that allows to request information about the different contributions. for(int img=0; imgconfigurations[img]; if(ham.Idx_Exchange() >= 0) { - Vectormath::fill(temp_field, Vector3::Zero()); - Vectormath::fill(temp_energy, 0); ham.E_Exchange(image, temp_energy); - temp_energies[ham.Idx_Exchange()][img] = Vectormath::sum(temp_energy); ham.Gradient_Exchange(image, temp_field); + temp_energies[ham.Idx_Exchange()][img] = Vectormath::sum(temp_energy); temp_dE_dRx[ham.Idx_Exchange()][img] = -Vectormath::dot(temp_field, this->tangents[img]); } if(ham.Idx_Zeeman() >= 0) { - Vectormath::fill(temp_field, Vector3::Zero()); - Vectormath::fill(temp_energy, 0); ham.E_Zeeman(image, temp_energy); temp_energies[ham.Idx_Zeeman()][img] = Vectormath::sum(temp_energy); ham.Gradient_Zeeman(temp_field); @@ -394,8 +396,6 @@ namespace Engine } if(ham.Idx_Anisotropy() >= 0) { - Vectormath::fill(temp_field, Vector3::Zero()); - Vectormath::fill(temp_energy, 0); ham.E_Anisotropy(image, temp_energy); temp_energies[ham.Idx_Anisotropy()][img] = Vectormath::sum(temp_energy); ham.Gradient_Anisotropy(image, temp_field); @@ -403,8 +403,6 @@ namespace Engine } if(ham.Idx_DMI() >= 0) { - Vectormath::fill(temp_field, Vector3::Zero()); - Vectormath::fill(temp_energy, 0); ham.E_DMI(image, temp_energy); temp_energies[ham.Idx_DMI()][img] = Vectormath::sum(temp_energy); ham.Gradient_DMI(image, temp_field); @@ -412,8 +410,6 @@ namespace Engine } if(ham.Idx_DDI() >= 0) { - Vectormath::fill(temp_field, Vector3::Zero()); - Vectormath::fill(temp_energy, 0); ham.E_DDI(image, temp_energy); temp_energies[ham.Idx_DDI()][img] = Vectormath::sum(temp_energy); ham.Gradient_DDI(image, temp_field); @@ -421,8 +417,6 @@ namespace Engine } if(ham.Idx_Quadruplet() >= 0) { - Vectormath::fill(temp_field, Vector3::Zero()); - Vectormath::fill(temp_energy, 0); ham.E_Quadruplet(image, temp_energy); temp_energies[ham.Idx_Quadruplet()][img] = Vectormath::sum(temp_energy); ham.Gradient_Quadruplet(image, temp_field); diff --git a/core/src/engine/Method_MMF.cpp b/core/src/engine/Method_MMF.cpp index bc91b435d..623374050 100644 --- a/core/src/engine/Method_MMF.cpp +++ b/core/src/engine/Method_MMF.cpp @@ -368,7 +368,7 @@ namespace Engine // Spectra was not successful in calculating an eigenvector Log(Log_Level::Error, Log_Sender::MMF, "Failed to calculate eigenvectors of the Hessian!"); Log(Log_Level::Info, Log_Sender::MMF, "Zeroing the MMF force..."); - Vectormath::fill(force, Vector3{0,0,0}); + Backend::par::assign(force, [] SPIRIT_LAMBDA () -> Vector3 {return {0, 0, 0};}); } } diff --git a/core/src/engine/Solver_Kernels.cpp b/core/src/engine/Solver_Kernels.cpp index a2ff7296c..131031c22 100644 --- a/core/src/engine/Solver_Kernels.cpp +++ b/core/src/engine/Solver_Kernels.cpp @@ -61,11 +61,11 @@ namespace Solver_Kernels auto s = configurations[img]->data(); auto sd = searchdir[img].data(); - - Backend::par::apply( nos, [s, sd] SPIRIT_LAMBDA (int idx) + + Backend::par::apply( nos, [s, sd] SPIRIT_LAMBDA (int idx) { scalar theta = (sd[idx]).norm(); - scalar q = cos(theta), w = 1-q, + scalar q = cos(theta), w = 1-q, x = -sd[idx][0]/theta, y = -sd[idx][1]/theta, z = -sd[idx][2]/theta, s1 = -y*z*w, s2 = x*z*w, s3 = -x*y*w, p1 = x*sin(theta), p2 = y*sin(theta), p3 = z*sin(theta); @@ -89,7 +89,7 @@ namespace Solver_Kernels { int nos = searchdir.size(); scalar theta_rms = 0; - theta_rms = sqrt( Backend::par::reduce(searchdir, [] SPIRIT_LAMBDA (const Vector3 & v){ return v.squaredNorm(); }) / nos ); + theta_rms = sqrt( Backend::par::reduce( nos, [] SPIRIT_LAMBDA (const Vector3 & v){ return v.squaredNorm(); }, searchdir) / nos ); scalar scaling = (theta_rms > maxmove) ? maxmove/theta_rms : 1.0; return scaling; } @@ -167,7 +167,7 @@ namespace Solver_Kernels { rho[n] = 1/rho[n]; } - + for(int img=0; img>>(sf.data(), s, n); - CU_CHECK_AND_SYNC(); - } - __global__ void cu_fill_mask(scalar *sf, scalar s, const int * mask, size_t N) - { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx < N) - { - sf[idx] = mask[idx]*s; - } - } - void fill(scalarfield & sf, scalar s, const intfield & mask) - { - int n = sf.size(); - cu_fill_mask<<<(n+1023)/1024, 1024>>>(sf.data(), s, mask.data(), n); - CU_CHECK_AND_SYNC(); - } __global__ void cu_scale(scalar *sf, scalar s, size_t N) { @@ -179,7 +150,7 @@ namespace Engine scalar sum(const scalarfield & sf) { static scalarfield ret(1, 0); - Vectormath::fill(ret, 0); + Backend::par::assign(ret, [] SPIRIT_LAMBDA (int idx) {return 0;}); // Determine temporary storage size and allocate void * d_temp_storage = NULL; size_t temp_storage_bytes = 0; @@ -219,50 +190,6 @@ namespace Engine sf[i] = std::min( std::max( sf_min, sf[i] ), sf_max ); } - __global__ void cu_fill(Vector3 *vf1, Vector3 v2, size_t N) - { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx < N) - { - vf1[idx] = v2; - } - } - void fill(vectorfield & vf, const Vector3 & v) - { - int n = vf.size(); - cu_fill<<<(n+1023)/1024, 1024>>>(vf.data(), v, n); - CU_CHECK_AND_SYNC(); - } - __global__ void cu_fill_mask(Vector3 *vf1, Vector3 v2, const int * mask, size_t N) - { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx < N) - { - vf1[idx] = v2; - } - } - void fill(vectorfield & vf, const Vector3 & v, const intfield & mask) - { - int n = vf.size(); - cu_fill_mask<<<(n+1023)/1024, 1024>>>(vf.data(), v, mask.data(), n); - CU_CHECK_AND_SYNC(); - } - - __global__ void cu_normalize_vectors(Vector3 *vf, size_t N) - { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx < N) - { - vf[idx].normalize(); - } - } - void normalize_vectors(vectorfield & vf) - { - int n = vf.size(); - cu_normalize_vectors<<<(n+1023)/1024, 1024>>>(vf.data(), n); - CU_CHECK_AND_SYNC(); - } - __global__ void cu_norm(const Vector3 * vf, scalar * norm, size_t N) { int idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -387,7 +314,7 @@ namespace Engine Vector3 sum(const vectorfield & vf) { static vectorfield ret(1, {0,0,0}); - Vectormath::fill(ret, {0,0,0}); + Backend::par::assign(ret, [] SPIRIT_LAMBDA (int idx) -> Vector3 {return {0, 0, 0};}); // Declare, allocate, and initialize device-accessible pointers for input and output CustomAdd add_op; static const Vector3 init{0,0,0}; @@ -423,11 +350,11 @@ namespace Engine { int n = vf1.size(); static scalarfield sf(n, 0); - + if(sf.size() != vf1.size()) sf.resize(vf1.size()); - Vectormath::fill(sf, 0); + Backend::par::assign(sf, [] SPIRIT_LAMBDA (int idx) {return 0;}); scalar ret; // Dot product diff --git a/core/src/io/Dataparser.cpp b/core/src/io/Dataparser.cpp index 3ae70baaf..850be0664 100644 --- a/core/src/io/Dataparser.cpp +++ b/core/src/io/Dataparser.cpp @@ -46,10 +46,9 @@ namespace IO geometry.atom_types[i] = -1; #endif } - } - // normalize read in spins - Vectormath::normalize_vectors( spins ); + spins[i].normalize(); + } } diff --git a/core/src/utility/Configurations.cpp b/core/src/utility/Configurations.cpp index 24533e0a9..4fbcbae53 100644 --- a/core/src/utility/Configurations.cpp +++ b/core/src/utility/Configurations.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -137,9 +138,15 @@ namespace Utility auto distribution = std::uniform_real_distribution(-1, 1); Engine::Vectormath::get_random_vectorfield_unitsphere(*prng, xi); - Engine::Vectormath::scale(xi, epsilon); - Engine::Vectormath::add_c_a(1, xi, *s.spins, mask); - Engine::Vectormath::normalize_vectors(*s.spins); + auto xip = xi.data(); + auto maskp = mask.data(); + auto spinsp = spins.data(); + Engine::Backend::par::apply(spins.size(), [epsilon, spinsp, maskp, xip] SPIRIT_LAMBDA (int idx) { + xip[idx] *= epsilon; + if( maskp[idx] ) + spinsp[idx] += xip[idx]; + spinsp[idx].normalize(); + }); } void Hopfion(Data::Spin_System & s, Vector3 pos, scalar r, int order, filterfunction filter) diff --git a/core/test/test_manifoldmath.cpp b/core/test/test_manifoldmath.cpp index e7cef2de7..ac61f3156 100644 --- a/core/test/test_manifoldmath.cpp +++ b/core/test/test_manifoldmath.cpp @@ -2,6 +2,7 @@ #include #include #include +#include TEST_CASE( "Manifold operations", "[manifold]" ) @@ -53,56 +54,68 @@ TEST_CASE( "Manifold operations", "[manifold]" ) Engine::Manifoldmath::invert_orthogonal(v1,v2); REQUIRE( Engine::Vectormath::dot(v1, v3) == Approx(-proj_prev) ); } - + SECTION( "Projection: tangetial") { REQUIRE_FALSE( Engine::Vectormath::dot(v1,v2) == Approx(0) ); // Assert they are not orthogonal - Engine::Vectormath::normalize_vectors( v2 ); // Normalize all vector3 of v2 for projection - Engine::Manifoldmath::project_tangential(v1,v2); + // Normalize all vector3 of v2 for projection + auto v2p = v2.data(); + Engine::Backend::par::apply(v2.size(), [v2p] SPIRIT_LAMBDA (int idx) { + v2p[idx].normalize(); + }); + Engine::Manifoldmath::project_tangential(v1,v2); for (int i=0; i #include #include +#include TEST_CASE( "Vectormath operations", "[vectormath]" ) { int N = 10000; - int N_check = std::min(100, N); + int N_check = std::min(100, N); scalarfield sf(N, 1); vectorfield vf1(N, Vector3{ 1.0, 1.0, 1.0 }); vectorfield vf2(N, Vector3{ -1.0, 1.0, 1.0 }); SECTION("Magnetization") { - Engine::Vectormath::fill( vf1, { 0, 0, 1 } ); + Engine::Backend::par::assign(vf1, [] SPIRIT_LAMBDA () -> Vector3 {return {0, 0, 1};}); auto m = Engine::Vectormath::Magnetization( vf1 ); REQUIRE( m[0] == Approx( 0 ) ); REQUIRE( m[1] == Approx( 0 ) ); @@ -50,8 +51,8 @@ TEST_CASE( "Vectormath operations", "[vectormath]" ) { scalar stest = 333; Vector3 vtest{ 0, stest, 0 }; - Engine::Vectormath::fill(sf, stest); - Engine::Vectormath::fill(vf1, vtest); + Engine::Backend::par::assign(sf, [stest] SPIRIT_LAMBDA () -> scalar {return stest;}); + Engine::Backend::par::assign(vf1, [vtest] SPIRIT_LAMBDA () -> Vector3 {return vtest;}); for (int i = 0; i < N_check; ++i) { REQUIRE(sf[i] == stest); @@ -114,8 +115,12 @@ TEST_CASE( "Vectormath operations", "[vectormath]" ) Vector3 vtest1 = vf1[0].normalized(); Vector3 vtest2 = vf2[0].normalized(); - Engine::Vectormath::normalize_vectors(vf1); - Engine::Vectormath::normalize_vectors(vf2); + auto vf1p = vf1.data(); + auto vf2p = vf2.data(); + Engine::Backend::par::apply(vf1.size(), [vf1p, vf2p] SPIRIT_LAMBDA (int idx) { + vf1p[idx].normalize(); + vf2p[idx].normalize(); + }); for (int i = 0; i < N_check; ++i) { REQUIRE(vf1[i] == vtest1); @@ -130,8 +135,12 @@ TEST_CASE( "Vectormath operations", "[vectormath]" ) scalar vmc1 = std::max( vtest1[0], std::max( vtest1[1], vtest1[2] ) ); scalar vmc2 = std::max( vtest2[0], std::max( vtest2[1], vtest2[2] ) ); - Engine::Vectormath::normalize_vectors(vf1); - Engine::Vectormath::normalize_vectors(vf2); + auto vf1p = vf1.data(); + auto vf2p = vf2.data(); + Engine::Backend::par::apply(vf1.size(), [vf1p, vf2p] SPIRIT_LAMBDA (int idx) { + vf1p[idx].normalize(); + vf2p[idx].normalize(); + }); scalar vfmc1 = Engine::Vectormath::max_abs_component( vf1 ); scalar vfmc2 = Engine::Vectormath::max_abs_component( vf2 );