From 631f2ad1f9bb7d43208f765e57c3a2fafce26f52 Mon Sep 17 00:00:00 2001 From: Ricardo Fabbri Date: Fri, 3 Nov 2023 02:55:12 -0300 Subject: [PATCH] [Trifocal+P2Pt] optimized Eigen includes --- minus/Eigen/LU | 10 +++++----- minus/lsolve.hxx | 2 ++ minus/minus.hxx | 3 +-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/minus/Eigen/LU b/minus/Eigen/LU index e468b0f..6e4222a 100644 --- a/minus/Eigen/LU +++ b/minus/Eigen/LU @@ -26,7 +26,7 @@ //#include "src/misc/Kernel.h" //#include "src/misc/Image.h" //#include "src/LU/FullPivLU.h" -#include "src/LU/PartialPivLU.h" +//#include "src/LU/PartialPivLU.h" /* #ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_MKL @@ -38,13 +38,13 @@ #endif #include "src/LU/Determinant.h" */ -#include "src/LU/InverseImpl.h" +//#include "src/LU/InverseImpl.h" // Use the SSE optimized version whenever possible. At the moment the // SSE version doesn't compile when AVX is enabled -#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX - #include "src/LU/arch/Inverse_SSE.h" -#endif +//#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX +// #include "src/LU/arch/Inverse_SSE.h" +//#endif /* #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/minus/lsolve.hxx b/minus/lsolve.hxx index 074d994..a19da37 100644 --- a/minus/lsolve.hxx +++ b/minus/lsolve.hxx @@ -1,3 +1,4 @@ + // Specific to Chicago template __attribute__((always_inline)) inline void @@ -71,5 +72,6 @@ lsolve( x(2) -= (m(2,3)*x(3)+m(2,4)*x(4)+m(2,5)*x(5)+m(2,6)*x(6)+m(2,7)*x(7)+m(2,8)*x(8)+m(2,9)*x(9)+m(2,10)*x(10)+m(2,11)*x(11)+m(2,12)*x(12)+m(2,13)*x(13)); x(2) /= m(2,2); x(1) -= (m(1,2)*x(2)+m(1,3)*x(3)+m(1,4)*x(4)+m(1,5)*x(5)+m(1,6)*x(6)+m(1,7)*x(7)+m(1,8)*x(8)+m(1,9)*x(9)+m(1,10)*x(10)+m(1,11)*x(11)+m(1,12)*x(12)+m(1,13)*x(13)); x(1) /= m(1,1); x(0) -= (m(0,1)*x(1)+m(0,2)*x(2)+m(0,3)*x(3)+m(0,4)*x(4)+m(0,5)*x(5)+m(0,6)*x(6)+m(0,7)*x(7)+m(0,8)*x(8)+m(0,9)*x(9)+m(0,10)*x(10)+m(0,11)*x(11)+m(0,12)*x(12)+m(0,13)*x(13)); x(0) /= m(0,0); + asm("#------ END Lsolve itself"); // it is not inlining it, and also there is too many vmovsd moving data. It is sub-vectorized, using only xmm no y or zmm } diff --git a/minus/minus.hxx b/minus/minus.hxx index 9a2ba1d..82056cb 100644 --- a/minus/minus.hxx +++ b/minus/minus.hxx @@ -12,7 +12,7 @@ #include "minus.h" #include "internal-util.hxx" -#include "Eigen/LU" +#include "Eigen/Core" namespace MiNuS { @@ -53,7 +53,6 @@ track(const track_settings &s, const C s_sols[f::nve*f::nsols], const C pa Map, f::nve, 1>, Aligned > bb(RHS); static constexpr F the_smallest_number = 1e-13; // XXX BENCHMARK THIS typedef minus_array v; typedef minus_array vp; - PartialPivLU, f::nve, f::nve> > lu; solution *t_s = raw_solutions + sol_min; // current target solution const C* __restrict s_s = s_sols + sol_min*f::nve; // current start solution