diff --git a/CMakeLists.txt b/CMakeLists.txt
index 58794058a9..6c2e21353c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -410,6 +410,7 @@ endif()
 # Elemental's mod's of Parallel Multiple Relatively Robust Representations
 # ------------------------------------------------------------------------
 add_subdirectory(external/pmrrr)
+include_directories(external/pmrrr/include)
 if(EL_BUILT_SCALAPACK)
   add_dependencies(pmrrr project_scalapack)
 else()
diff --git a/examples/lapack_like/HermitianEig.cpp b/examples/lapack_like/HermitianEig.cpp
index 07c7010714..fd42a8882b 100644
--- a/examples/lapack_like/HermitianEig.cpp
+++ b/examples/lapack_like/HermitianEig.cpp
@@ -7,12 +7,88 @@
    http://opensource.org/licenses/BSD-2-Clause
 */
 #include <El.hpp>
+
 using namespace std;
 using namespace El;
 
-// Typedef our real and complex types to 'Real' and 'C' for convenience
-typedef double Real;
-typedef Complex<Real> C;
+template<typename Real>
+void run_example(Int n, bool print)
+{
+    typedef Complex<Real> C;
+
+    // Create a 2d process grid from a communicator. In our case, it is
+    // mpi::COMM_WORLD. There is another constructor that allows you to 
+    // specify the grid dimensions, Grid g( comm, r ), which creates a
+    // grid of height r.
+    Grid g( mpi::COMM_WORLD );
+
+    // Create an n x n complex distributed matrix, 
+    // We distribute the matrix using grid 'g'.
+    //
+    // There are quite a few available constructors, including ones that 
+    // allow you to pass in your own local buffer and to specify the 
+    // distribution alignments (i.e., which process row and column owns the
+    // top-left element)
+    DistMatrix<C> H( n, n, g );
+
+    // Fill the matrix since we did not pass in a buffer. 
+    //
+    // We will fill entry (i,j) with the complex value (i+j,i-j) so that 
+    // the global matrix is Hermitian. However, only one triangle of the 
+    // matrix actually needs to be filled, the symmetry can be implicit.
+    //
+    const Int localHeight = H.LocalHeight();
+    const Int localWidth = H.LocalWidth();
+    for( Int jLoc=0; jLoc<localWidth; ++jLoc )
+    {
+        // Our process owns the rows colShift:colStride:n,
+        //           and the columns rowShift:rowStride:n
+        const Int j = H.GlobalCol(jLoc);
+        for( Int iLoc=0; iLoc<localHeight; ++iLoc )
+        {
+            const Int i = H.GlobalRow(iLoc);
+            H.SetLocal( iLoc, jLoc, C(i+j,i-j) );
+        }
+    }
+
+    // Make a backup of H before we overwrite it within the eigensolver
+    auto HCopy( H );
+
+    // Call the eigensolver. We first create an empty complex eigenvector 
+    // matrix, Q[MC,MR], and an eigenvalue column vector, w[VR,* ]
+    //
+    // Optional: set blocksizes and algorithmic choices here. See the 
+    //           'Tuning' section of the README for details.
+    DistMatrix<Real,VR,STAR> w( g );
+    DistMatrix<C> Q( g );
+    HermitianEig( LOWER, H, w, Q ); 
+
+    if( print )
+    {
+        Print( HCopy, "H" );
+        Print( Q, "Q" );
+        Print( w, "w" );
+    }
+
+    // Check the residual, || H Q - Omega Q ||_F
+    const Real frobH = HermitianFrobeniusNorm( LOWER, HCopy );
+    auto E( Q );
+    DiagonalScale( RIGHT, NORMAL, w, E );
+    Hemm( LEFT, LOWER, C(-1), HCopy, Q, C(1), E );
+    const Real frobResid = FrobeniusNorm( E );
+
+    // Check the orthogonality of Q
+    Identity( E, n, n );
+    Herk( LOWER, ADJOINT, Real(-1), Q, Real(1), E );
+    const Real frobOrthog = HermitianFrobeniusNorm( LOWER, E );
+
+    if( mpi::Rank() == 0 )
+        Output
+        ("|| H ||_F = ",frobH,"\n",
+         "|| H Q - Q Omega ||_F / || A ||_F = ",frobResid/frobH,"\n",
+         "|| Q' Q - I ||_F = ",frobOrthog,"\n");
+}
+
 
 int
 main( int argc, char* argv[] )
@@ -27,80 +103,14 @@ main( int argc, char* argv[] )
     {
         const Int n = Input("--size","size of matrix",100);
         const bool print = Input("--print","print matrices?",false);
+        const bool single_precision = Input("--single", "single precision?", false);
         ProcessInput();
         PrintInputReport();
-
-        // Create a 2d process grid from a communicator. In our case, it is
-        // mpi::COMM_WORLD. There is another constructor that allows you to 
-        // specify the grid dimensions, Grid g( comm, r ), which creates a
-        // grid of height r.
-        Grid g( mpi::COMM_WORLD );
-    
-        // Create an n x n complex distributed matrix, 
-        // We distribute the matrix using grid 'g'.
-        //
-        // There are quite a few available constructors, including ones that 
-        // allow you to pass in your own local buffer and to specify the 
-        // distribution alignments (i.e., which process row and column owns the
-        // top-left element)
-        DistMatrix<C> H( n, n, g );
-
-        // Fill the matrix since we did not pass in a buffer. 
-        //
-        // We will fill entry (i,j) with the complex value (i+j,i-j) so that 
-        // the global matrix is Hermitian. However, only one triangle of the 
-        // matrix actually needs to be filled, the symmetry can be implicit.
-        //
-        const Int localHeight = H.LocalHeight();
-        const Int localWidth = H.LocalWidth();
-        for( Int jLoc=0; jLoc<localWidth; ++jLoc )
-        {
-            // Our process owns the rows colShift:colStride:n,
-            //           and the columns rowShift:rowStride:n
-            const Int j = H.GlobalCol(jLoc);
-            for( Int iLoc=0; iLoc<localHeight; ++iLoc )
-            {
-                const Int i = H.GlobalRow(iLoc);
-                H.SetLocal( iLoc, jLoc, C(i+j,i-j) );
-            }
+        if(single_precision) {
+            run_example<float>(n, print);
+        } else {
+            run_example<double>(n, print);
         }
-
-        // Make a backup of H before we overwrite it within the eigensolver
-        auto HCopy( H );
-
-        // Call the eigensolver. We first create an empty complex eigenvector 
-        // matrix, Q[MC,MR], and an eigenvalue column vector, w[VR,* ]
-        //
-        // Optional: set blocksizes and algorithmic choices here. See the 
-        //           'Tuning' section of the README for details.
-        DistMatrix<Real,VR,STAR> w( g );
-        DistMatrix<C> Q( g );
-        HermitianEig( LOWER, H, w, Q ); 
-
-        if( print )
-        {
-            Print( HCopy, "H" );
-            Print( Q, "Q" );
-            Print( w, "w" );
-        }
-
-        // Check the residual, || H Q - Omega Q ||_F
-        const Real frobH = HermitianFrobeniusNorm( LOWER, HCopy );
-        auto E( Q );
-        DiagonalScale( RIGHT, NORMAL, w, E );
-        Hemm( LEFT, LOWER, C(-1), HCopy, Q, C(1), E );
-        const Real frobResid = FrobeniusNorm( E );
-
-        // Check the orthogonality of Q
-        Identity( E, n, n );
-        Herk( LOWER, ADJOINT, Real(-1), Q, Real(1), E );
-        const Real frobOrthog = HermitianFrobeniusNorm( LOWER, E );
-
-        if( mpi::Rank() == 0 )
-            Output
-            ("|| H ||_F = ",frobH,"\n",
-             "|| H Q - Q Omega ||_F / || A ||_F = ",frobResid/frobH,"\n",
-             "|| Q' Q - I ||_F = ",frobOrthog,"\n");
     }
     catch( exception& e ) { ReportException(e); }
 
diff --git a/external/pmrrr/CMakeLists.txt b/external/pmrrr/CMakeLists.txt
index e6e7347502..e8265f9242 100644
--- a/external/pmrrr/CMakeLists.txt
+++ b/external/pmrrr/CMakeLists.txt
@@ -10,10 +10,13 @@
 #  http://opensource.org/licenses/BSD-2-Clause
 #
 
+# Create a set of defines for including in templated code
+set(pmrrr_defines "\n")
+
 option(HAVE_SPINLOCKS "Enable if pthread lib supports spinlocks" OFF)
 MARK_AS_ADVANCED(HAVE_SPINLOCKS)
 if(NOT HAVE_SPINLOCKS)
-  add_definitions(-DNOSPINLOCKS)
+  set(pmrrr_defines "${pmrrr_defines}#define NOSPINLOCKS\n")
 endif()
 
 # Include the header directory
@@ -24,9 +27,6 @@ include_directories(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/include")
 
 # Define the header files installation rules
 # ------------------------------------------
-install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
-	DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 
-  FILES_MATCHING PATTERN "*.h")
 if(WIN32 OR APPLE OR NOT EL_HYBRID)
   # Disable for Windows because of lack of POSIX support;
   # disable for Mac OS X because of deprecation of unnamed semaphores
@@ -36,9 +36,21 @@ else()
   option(DISABLE_PTHREADS "Disable pthreads?" OFF)
 endif()
 if(DISABLE_PTHREADS)
-  add_definitions(-DDISABLE_PTHREADS)
+  set(pmrrr_defines "${pmrrr_defines}#define DISABLE_PTHREADS\n")
 endif()
 
+#Build directory
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/global.h.in"
+               "${PROJECT_BINARY_DIR}/include/pmrrr/definitions/global.h"
+               @ONLY)
+#Install directory
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/global.h.in"
+               "${CMAKE_INSTALL_INCLUDEDIR}/pmrrr/definitions/lobal.h"
+               @ONLY)
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+  FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
+
 # Ensure that an MPI C compiler was found
 if(NOT MPI_C_FOUND)
   message(FATAL_ERROR "No MPI C compiler was found, so PMRRR cannot be built")
@@ -74,7 +86,7 @@ set(CMAKE_THREAD_LIBS_INIT ${CMAKE_THREAD_LIBS_INIT} PARENT_SCOPE)
 
 # Define the main library and its link libraries
 # ----------------------------------------------
-file(GLOB_RECURSE PMRRR_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.c" "*.h")
+file(GLOB_RECURSE PMRRR_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp" "*.h")
 add_library(pmrrr ${LIBRARY_TYPE} ${PMRRR_SRC})
 if(DISABLE_PTHREADS)
   target_link_libraries(pmrrr ${MPI_C_LIBRARIES} ${MATH_LIBS})
diff --git a/external/pmrrr/LICENSE b/external/pmrrr/LICENSE
deleted file mode 100644
index 669f274118..0000000000
--- a/external/pmrrr/LICENSE
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2010, RWTH Aachen University
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or 
-without modification, are permitted provided that the following
-conditions are met:
-  * Redistributions of source code must retain the above 
-    copyright notice, this list of conditions and the following
-    disclaimer.
-  * Redistributions in binary form must reproduce the above 
-    copyright notice, this list of conditions and the following 
-    disclaimer in the documentation and/or other materials 
-    provided with the distribution.
-  * Neither the name of the RWTH Aachen University nor the
-    names of its contributors may be used to endorse or promote 
-    products derived from this software without specific prior 
-    written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
-AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
-USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
-SUCH DAMAGE.
diff --git a/external/pmrrr/README b/external/pmrrr/README
deleted file mode 100644
index f27c2a620d..0000000000
--- a/external/pmrrr/README
+++ /dev/null
@@ -1,194 +0,0 @@
-These files provide the routine 'pmrrr'. Its purpose is to compute all 
-or a subset of eigenvalues and optionally eigenvectors of a symmetric 
-tridiagonal matrix. 'pmrrr' is based on the algorithm of Multiple 
-Relatively Robust Representations (MRRR). The routine thereby targets 
-hybrid architectures consisting of multiple SMP nodes. It also runs in 
-fully distributed mode, with each node having only one processor, and 
-fully SMP mode, in which case no message passing is required. The 
-implementation is based on LAPACK's routine 'dstemr'.
-
-
-Using libmrrr.a:
-----------------
-The folder EXAMPLES contains examples of how to use the routine 
-'pmrrr' in C and Fortran code. Edit the 'Makefile' in these folders if 
-you do not use the GNU compilers and run 'make' to compile the 
-examples.
-
-In general, the code that calls 'pmrrr' needs to be linked to the 
-library 'libpmrrr.a', which is created in the LIB folder. 
-Additionally, it has to be linked to the libraries on which it 
-depends (see example files).
-
-Below are given the C and Fortran prototypes of the function 'pmrrr'.
-For more information please see 'INCLUDE/pmrrr.h'.
-
-
-######################################################################
-# C function prototype:                                              #
-###################################################################### 
-# int pmrrr(char *jobz, char *range, int *n, double  *D,             #
-#           double *E, double *vl, double *vu, int *il, int *iu,     #
-#           int *tryrac, MPI_Comm comm, int *nz, int *offset,        #
-#           double *W, double *Z, int *ldz, int *Zsupp);             #
-#                                                                    #
-# Arguments:                                                         #
-# ----------                                                         #
-#                                                                    #
-# INPUTS:                                                            #
-# -------                                                            #
-# jobz              "N" or "n" - compute only eigenvalues            #
-#                   "V" or "v" - compute also eigenvectors           #
-#                   "C" or "c" - count the maximal number of         #
-#                                locally computed eigenvectors       #
-# range             "A" or "a" - all                                 #
-#                   "V" or "v" - by interval: (VL,VU]                #
-#                   "I" or "i" - by index:     IL-IU                 #
-# n                 matrix size                                      #
-# ldz               must be set on input to the leading dimension    #
-#                   of of eigenvector matrix Z; this is often equal  #
-#                   to matrix size n (not changed on output)         #
-#                                                                    #
-# INPUT + OUTPUT:                                                    #
-# ---------------                                                    #
-# D (double[n])     Diagonal elements of tridiagonal T.              #
-#                   (On output the array will be overwritten).       #
-# E (double[n])     Off-diagonal elements of tridiagonal T.          #
-#                   First n-1 elements contain off-diagonals,        #
-#                   the last element can have an abitrary value.     #
-#                   (On output the array will be overwritten.)       #
-# vl                If range="V", lower bound of interval            #
-#                   (vl,vu], on output refined.                      #
-#                   If range="A" or "I" not referenced as input.     #
-#                   On output the interval (vl,vu] contains ALL      #
-#                   the computed eigenvalues.                        #
-# vu                If range="V", upper bound of interval            #
-#                   (vl,vu], on output refined.                      #
-#                   If range="A" or "I" not referenced as input.     #
-#                   On output the interval (vl,vu] contains ALL      #
-#                   the computed eigenvalues.                        #
-# il                If range="I", lower index (1-based indexing) of  #
-#                   the subset 'il' to 'iu'.                         #
-#                   If range="A" or "V" not referenced as input.     #
-#                   On output the eigenvalues with index il to iu    #
-#                   are computed by ALL processes.                   #
-# iu                If range="I", upper index (1-based indexing) of  #
-#                   the subset 'il' to 'iu'.                         #
-#                   If range="A" or "V" not referenced as input.     #
-#                   On output the eigenvalues with index il to iu    # 
-#                   are computed by ALL processes.                   #
-# tryrac            0 - do not try to achieve high relative accuracy.#
-#                   1 - relative accuracy will be attempted;         #
-#                       on output it is set to zero if high relative #
-#                       accuracy is not achieved.                    #
-# comm              MPI communicator; commonly: MPI_COMM_WORLD.      #
-#                                                                    #
-# OUTPUT:                                                            #
-# -------                                                            #
-# nz                Number of eigenvalues and eigenvectors computed  #
-#                   locally.                                         #
-#                   If jobz="C", 'nz' will be set to the maximal     #
-#                   number of locally computed eigenvectors such     #
-#                   that double[n*nz] will provide enough memory     #
-#                   for the local eigenvectors;  this is only        #
-#                   important in case of range="V" since             #
-#                   '#eigenpairs' are not known in advance           #
-# offset            Index, relative to the computed eigenvalues, of  #
-#                   the smallest eigenvalue computed locally         #
-#                   (0-based indexing).                              #
-# W (double[n])     Locally computed eigenvalues;                    #
-#                   The first nz entries contain the eigenvalues     #
-#                   computed locally; the first entry contains the   #
-#                   'offset + 1'-th eigenvalue computed and the      #
-#                   'offset + il'-th eigenvalue of the input matrix  #
-#                   (1-based indexing in both cases).                #
-#                   In some situations it is desirable to have all   #
-#                   computed eigenvalues in W, instead of only       #
-#                   those computed locally. In this case the         #
-#                   routine 'PMR_comm_eigvals' can be called right   #
-#                   after 'pmrrr' returns (see below).               #
-# Z                 Locally computed eigenvectors.                   #
-# (double[n*nz])    Enough space must be provided to store the       #
-#                   vectors. 'nz' should be bigger or equal          #
-#                   to ceil('#eigenpairs'/'#processes'), where       #
-#                   '#eigenpairs' is 'n' in case of range="A" and    #
-#                  'iu-il+1' in case of range="I". Alternatively,    #
-#                   and for range="V" 'nz' can be obtained           #
-#                   by running the routine with jobz="C".            #
-# Zsupp             Support of eigenvectors, which is given by       #
-# (double[2*n])     Z[2*i] to Z[2*i+1] for the i-th eigenvector      #
-#                   stored locally (1-based indexing in both         #
-#                   cases).                                          #
-#                                                                    #
-# RETURN VALUE:                                                      #
-# -------------                                                      #
-#                 0 - success                                        #
-#                 1 - wrong input parameter                          #
-#                 2 - misc errors                                    #
-#                                                                    #
-######################################################################
-
-######################################################################
-# Fortran prototype:                                                 #
-######################################################################
-# PMRRR(JOBZ, RANGE, N, D, E, VL, VU, IL, IU, TRYRAC,                #
-#       MPI_COMM, NZ, OFFSET, W, Z, LDZ, ZSUPP, INFO)                #
-#                                                                    #
-# CHARACTER        JOBZ, RANGE                                       #
-# INTEGER          N, IL, IU, TRYRAC, MPI_COMM, NZ, OFFSET, LDZ,     # 
-#                  ZSUPP(*), INFO                                    #
-# DOUBLE PRECISION D(*), D(*), VL, VU, W(*), Z(*,*)                  #
-######################################################################
-
-
-The function is called by every process of the MPI communicator 
-specified as an argument. Each process is assumed to have the diagonal 
-and sub-diagonal of the symmetric tridiagonal input matrix. It is 
-thereby important that MPI is always initialized by 
-'MPI_Init_thread' (as opposed to 'MPI_Init'!). If multithreading is 
-desired, the thread level support MPI_THREAD_MULTIPLE has to be 
-requested in 'MPI_Init_thread'. If no multithreading is used, the 
-thread support 'MPI_THREAD_SINGLE' can be requested. Notice that 
-multithreading is disabled if the implementation of the MPI library 
-does not support MPI_THREAD_MULTIPLE.
-The number of threads created in each process can be specified via the 
-environment variable PMR_NUM_THREADS. In case this variable is not 
-defined, then the routine will create as many threads as specified by 
-the variable DEFAULT_NUM_THREADS (which is set in 'pmrrr.h'). 
-Note: Do not forget to tell mpiexec/mpirun about the environment 
-variable PMR_NUM_THREADS if you use it. This is typically done via the 
-option '-env' or '-x'.
-
-
-Some additinal comments:
-------------------------
-
-COMMUNICATING COMPUTED EIGENVALUES:
-Upon completion of routine 'pmrrr', each process has a subset of the 
-eigenvalues, stored in the array W (see 'pmrrr.h'). In some situations
-it is desirable to let all processes have all the computed eigenvalues. 
-To this end, an additional routine 'PMR_comm_eigvals' is provided. It 
-can be called right after 'pmrrr' returned. Upon completion, each 
-process has all the computed eigenvalues, stored in W.
-The C and Fortran prototypes of the function 'PMR_comm_eigvals' are 
-given below. For more information please see 'pmrrr.h'.
-
-######################################################################
-# C function prototype:                                              #
-###################################################################### 
-# int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *offset,          # 
-#                      double *W);                                   #
-######################################################################
-
-######################################################################
-# Fortran prototype:                                                 #
-######################################################################
-# PMR_COMM_EIGVALS(MPI_COMM, NZ, OFFSET, W, INFO)                    #
-#                                                                    #
-# INTEGER          MPI_COMM, NZ, OFFSET, INFO                        #
-# DOUBLE PRECISION W(*)                                              #
-######################################################################
-
-
-COMMENTS AND BUGS:
-petschow@aices.rwth-aachen.de
diff --git a/external/pmrrr/include/pmrrr/global.h b/external/pmrrr/cmake/global.h.in
similarity index 60%
rename from external/pmrrr/include/pmrrr/global.h
rename to external/pmrrr/cmake/global.h.in
index 5668e25ff2..b9d0f2d3d6 100644
--- a/external/pmrrr/include/pmrrr/global.h
+++ b/external/pmrrr/cmake/global.h.in
@@ -41,56 +41,91 @@
 #ifndef GGLOBAL_H
 #define GGLOBAL_H
 
+#include <mpi.h>
+
 #ifdef __STDC__
- /* Some version of Standard C */
-#if defined (__STDC_VERSION__) && __STDC_VERSION__>=199901L
- /* C99 */
+ 	/* Some version of Standard C */
+	#if defined (__STDC_VERSION__) && __STDC_VERSION__>=199901L
+	 /* C99 */
 
-#include <stdbool.h>
-/* #pragma STDC FP_CONTRACT ON|OFF|DEFAULT */
+		#include <stdbool.h>
+		/* #pragma STDC FP_CONTRACT ON|OFF|DEFAULT */
 
-#else
- /* C89 with or without Amendment 1, see H&S p.53 */
+	#else
+		 /* C89 with or without Amendment 1, see H&S p.53 */
 
-#define restrict /*nothing*/
-#define inline   /*nothing*/
+		#define restrict /*nothing*/
+		//#define inline   /*nothing*/
 
-#define fmax(a,b) ( (a) > (b) ? (a) : (b) )
-#define fmin(a,b) ( (a) < (b) ? (a) : (b) )
+		/*
+		#define fmax(a,b) ( (a) > (b) ? (a) : (b) )
+		#define fmin(a,b) ( (a) < (b) ? (a) : (b) )
+		*/
 
-typedef int    bool;
-#define false  0
-#define true   1
+		/* C++ provides a full support for booleans, but LAPACK->C interface accepts integers.
+		   For compatibility with previous code, provide an integer_boolean
+		*/
+		#ifndef __cplusplus
+		typedef int    bool;
+		#define false  0
+		#define true   1
+		#endif
 
-#endif
+	#endif
 #else  /* __STDC__ not defined */
- /* Not Standard C */
-
-#define inline /*nothing*/
+	 /* Not Standard C */
 
- /* in case compiler does not support type qualifiers
-  * see Harbison and Steele p. 89*/
-#define const /*nothing*/
-#define volatile /*nothing*/
-#define restrict /*nothing*/
+	//#define inline /*nothing*/
 
-#define fmax(a,b) ( (a) > (b) ? (a) : (b) )
-#define fmin(a,b) ( (a) < (b) ? (a) : (b) )
-
-typedef int    bool;
-#define false  0
-#define true   1
- 
- /* need also remove all // style comments */
- /* inline ... */
+	 /* in case compiler does not support type qualifiers
+	  * see Harbison and Steele p. 89*/
+	#define const /*nothing*/
+	#define volatile /*nothing*/
+	#define restrict /*nothing*/
+	/*
+	#define fmax(a,b) ( (a) > (b) ? (a) : (b) )
+	#define fmin(a,b) ( (a) < (b) ? (a) : (b) )
+	*/
+	typedef int    bool_;
+	#define false  0
+	#define true   1
 
+	 /* need also remove all // style comments */
+	 /* inline ... */
 #endif
 /* see H&S p.70 to include C++ in the list above */
 
+//#define restrict /**/
 /* max/min/ceil for integers */
 #define imax(a,b) ( (a) > (b) ? (a) : (b) )
 #define imin(a,b) ( (a) < (b) ? (a) : (b) )
 #define iceil(a,b) ( (((a)%(b))!=0) ? (((a)/(b))+1) : ((a)/(b)) )
 
+namespace pmrrr {
+
+	template<typename FloatingType>
+	struct float_traits;
+
+	template<>
+	struct float_traits<float> {
+		static MPI_Datatype mpi_type()
+		{
+			return MPI_FLOAT;
+		}
+	};
+
+	template<>
+	struct float_traits<double> {
+		static MPI_Datatype mpi_type()
+		{
+			return MPI_DOUBLE;
+		}
+	};
+
+}
+
+@pmrrr_defines@
+
 /* End of header file */
 #endif
+
diff --git a/external/pmrrr/include/pmrrr.h b/external/pmrrr/include/pmrrr.h
deleted file mode 100644
index 1b83d813d1..0000000000
--- a/external/pmrrr/include/pmrrr.h
+++ /dev/null
@@ -1,313 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015 Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-
-#ifndef PPMRRR_H
-#define PPMRRR_H
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-#include <ctype.h>
-#include "mpi.h"
-#include "pmrrr/global.h"
-
-/* Parallel computation of all or a subset of eigenvalues and 
- * optionally eigenvectors of a symmetric tridiagonal matrix based on 
- * the algorithm of Multiple Relatively Robust Representations (MRRR). 
- * The routine targets hybrid architectures consisting of multiple SMP 
- * nodes. It also runs in fully distributed mode, with each node 
- * having only one processor, and fully SMP mode, in which case no 
- * message passing is required. The implementation is based on 
- * LAPACK's routine 'dstemr'.
- *
- * Function prototype: */
-
-int pmrrr(char *jobz, char *range, int *n, double  *D,
-	  double *E, double *vl, double *vu, int *il, int *iu,
-	  int *tryrac, MPI_Comm comm, int *nz, int *offset,
-	  double *W, double *Z, int *ldz, int *Zsupp);
-
-/* Arguments:
- * ----------
- *
- * INPUTS: 
- * -------
- * jobz              "N" or "n" - compute only eigenvalues
- *                   "V" or "v" - compute also eigenvectors
- *                   "C" or "c" - count the maximal number of 
- *                                locally computed eigenvectors
- * range             "A" or "a" - all
- *                   "V" or "v" - by interval: (VL,VU]
- *                   "I" or "i" - by index:     IL-IU
- * n                 matrix size
- * ldz               must be set on input to the leading dimension 
- *                   of of eigenvector matrix Z; this is often equal 
- *                   to matrix size n (not changed on output)
- *
- * INPUT + OUTPUT: 
- * ---------------
- * D (double[n])     Diagonal elements of tridiagonal T.
- *                   (On output the array will be overwritten).
- * E (double[n])     Off-diagonal elements of tridiagonal T.
- *                   First n-1 elements contain off-diagonals,
- *                   the last element can have an abitrary value. 
- *                   (On output the array will be overwritten.)
- * vl                If range="V", lower bound of interval
- *                   (vl,vu], on output refined.
- *                   If range="A" or "I" not referenced as input.
- *                   On output the interval (vl,vu] contains ALL
- *                   the computed eigenvalues.
- * vu                If range="V", upper bound of interval
- *                   (vl,vu], on output refined.
- *                   If range="A" or "I" not referenced as input.
- *                   On output the interval (vl,vu] contains ALL
- *                   the computed eigenvalues.
- * il                If range="I", lower index (1-based indexing) of 
- *                   the subset 'il' to 'iu'.
- *                   If range="A" or "V" not referenced as input.
- *                   On output the eigenvalues with index il to iu are 
- *                   computed by ALL processes.
- * iu                If range="I", upper index (1-based indexing) of 
- *                   the subset 'il' to 'iu'.
- *                   If range="A" or "V" not referenced as input.
- *                   On output the eigenvalues with index il to iu are 
- *                   computed by ALL processes.
- * tryrac            0 - do not try to achieve high relative accuracy.
- *                   NOTE: this should be the default in context of  
- *                         dense eigenproblems.
- *                   1 - relative accuracy will be attempted; 
- *                       on output it is set to zero if high relative 
- *                       accuracy is not achieved.
- * comm              MPI communicator; commonly: MPI_COMM_WORLD.
- *
- * OUTPUT: 
- * -------
- * nz                Number of eigenvalues and eigenvectors computed 
- *                   locally.
- *                   If jobz="C", 'nz' will be set to the maximal
- *                   number of locally computed eigenvectors such 
- *                   that double[n*nz] will provide enough memory 
- *                   for the local eigenvectors;  this is only 
- *                   important in case of range="V" since 
- *                   '#eigenpairs' are not known in advance
- * offset            Index, relative to the computed eigenvalues, of 
- *                   the smallest eigenvalue computed locally
- *                   (0-based indexing).
- * W (double[n])     Locally computed eigenvalues;
- *                   The first nz entries contain the eigenvalues 
- *                   computed locally; the first entry contains the 
- *                   'offset + 1'-th computed eigenvalue, which is the 
- *                   'offset + il'-th eigenvalue of the input matrix 
- *                   (1-based indexing in both cases).
- *                   In some situations it is desirable to have all 
- *                   computed eigenvalues in W, instead of only 
- *                   those computed locally. In this case, call 
- *                   routine 'PMR_comm_eigvals' after 
- *                   'pmrrr' returns (see example and interface below).
- * Z                 Locally computed eigenvectors.
- * (double[n*nz])    Enough space must be provided to store the
- *                   vectors. 'nz' should be bigger or equal 
- *                   to ceil('#eigenpairs'/'#processes'), where 
- *                   '#eigenpairs' is 'n' in case of range="A" and
- *                   'iu-il+1' in case of range="I". Alternatively, 
- *                   and for range="V" 'nz' can be obtained 
- *                   by running the routine with jobz="C". 
- * Zsupp             Support of eigenvectors, which is given by
- * (double[2*n])     i1=Zsupp[2*i] to i2=Zsupp[2*i+1] for the i-th local eigenvector
- *                   (returns 1-based indexing; e.g. in C Z[i1-1:i2-1] are non-zero and
- *                   in Fotran Z(i1:i2) are non-zero).
- *
- * RETURN VALUE: 
- * -------------
- *                 0 - success  
- *                 1 - wrong input parameter
- *                 2 - misc errors  
- *
- * The Fortran interface takes an additinal integer argument INFO
- * to retrieve the return value. 
- * An example call in Fortran looks therefore like
- *
- * CALL PMRRR('V', 'A', N, D, E, VL, VU, IL, IU, TRYRAC, 
- *            MPI_COMM_WORLD, NZ, MYFIRST, W, Z, LDZ, ZSUPP, INFO)
- *
- *
- * EXAMPLE CALL: 
- * -------------
- * char    *jobz, *range;
- * int     n, il, iu, tryRAC=0, nz, offset, ldz, *Zsupp;
- * double  *D, *E, *W, *Z, vl, vu;
- *
- * // allocate space for D, E, W, Z
- * // initialize D, E
- * // set jobz, range, ldz, and if necessary, il, iu or vl, vu  
- * 
- * info = pmrrr(jobz, range, &n, D, E, &vl, &vu, &il, &iu,
- *              &tryRAC, MPI_COMM_WORLD, &nz, &myfirst, W,
- *	        Z, &ldz , Zsupp);
- *
- * // optional: 
- * PMR_comm_eigvals(MPI_COMM_WORLD, &nz, &myfirst, W);
- *
- */
-
-
-
-/* Set the number of threads in case PMR_NUM_THREADS is not 
- * specified */
-#define DEFAULT_NUM_THREADS 1
-
-/* Call LAPACK's dstemr in every process to compute all desiered 
- * eigenpairs redundantly (and discard the once that would usually 
- * not be computed by the process) if n < DSTEMR_IF_SMALLER; 
- * default: 4 */ 
-#define DSTEMR_IF_SMALLER   4
-
-/* Make sure that eigenpairs are sorted globally; if set to false
- * they are in most cases sorted, but it is not double checked and 
- * can therefore not be guaranteed; default: true */
-#define ASSERT_SORTED_EIGENPAIRS false
-
-/* Set flag if Rayleigh Quotient Correction should be used, 
- * which is usually faster; default: true */
-#define TRY_RQC          true
-
-/* Maximum numver of iterations of inverse iteration;
- * default: 10 */
-#define MAXITER            10
-
-/* Set the min. relative gap for an eigenvalue to be considered 
- * well separated, that is a singleton; this is a very important 
- * parameter of the computation; default: 10e-3 */
-#define MIN_RELGAP       1e-3
-
-/* Set the maximal allowed element growth for being accepted as 
- * an RRR, that is if max. pivot < MAX_GROWTH * 'spectral diameter'
- * the RRR is accepted; default: 64.0 */
-#define MAX_GROWTH         64.0
-
-/* Set how many iterations should be executed to find the root 
- * representation; default: 6 */
-#define MAX_TRY_RRR       10
-
-
-
-/*
- * Routine to communicate eigenvalues such that every process has
- * all computed eigenvalues (iu-il+1) in W; this routine is designed 
- * to be called right after 'pmrrr'.
- */
-int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *ifirst, double *W);
-/* Arguments:
- * ----------
- *
- * INPUTS: 
- * -------
- * comm              MPI communicator; commonly: MPI_COMM_WORLD.
- * nz                Number of eigenvalues local in W as returned 
- *                   from 'pmrrr'.
- * offset            Index, relative to the computed eigenvalues, of 
- *                   the smallest eigenvalue computed locally
- *                   (0-based indexing).
- *
- * INPUT + OUTPUT: 
- * ---------------
- * W (double[n])     Eigenvalues. 
- *                   On input the first nz elements of W contain 
- *                   the eigenvalues computed locally.
- *                   On output the first 'iu-il+1' of W contain 
- *                   all computed eigenvalues.
- *
- * The Fortran interface takes an additinal integer argument INFO 
- * to retrieve the return value. 
- * An example call in Fortran looks therefore like
- *
- * CALL PMR_COMM_EIGVALS(MPI_COMM_WORLD, NZ, OFFSET, W, INFO)
- *
- */
-
-
-/* LAPACK and BLAS function prototypes
- * Note: type specifier 'extern' does not matter in declaration
- * so here used to mark routines from LAPACK and BLAS libraries */
-extern int    odscal(int*, double*, double*, int*);
-extern double odnst(char*, int*, double*, double*);
-extern void   odrrr(int*, double*, double*, int*);
-extern void   odrra(int*, double*, double*, double*, double*, 
-		    double*, int*, int*, int*);
-extern void   odrrc(char*, int*, double*, double*, double*, double*,
-		    double*, int*, int*, int*, int*);
-extern void   odrrd(char*, char*, int*, double*, double*, int*, 
-		    int*, double*, double*, double*, double*, 
-		    double*, double*, int*, int*, int*, double*, 
-		    double*, double*, double*, int*, int*, double*, 
-		    int*, int*);
-extern void   odrrb(int*, double*, double*, int*, int*, double*,
-		    double*, int*, double*, double*, double*, double*,
-		    int*, double*, double*, int*, int*);
-extern void   odrrk(int*, int*, double*, double*, double*, double*,
-		    double*, double*, double*, double*, int*);
-extern void   odebz(int*, int*, int*, int*, int*, int*, double*, 
-		    double*, double*, double*, double*, double*,
-		    int*, double*, double*, int*, int*, double*,
-		    int*, int*);
-extern void   odrnv(int*, int*, int*, double*);
-extern void   odrrf(int*, double*, double*, double*, int*, int*, 
-		    double*, double*, double*, double*, double*, 
-		    double*, double*, double*, double*, double*, 
-		    double*, int*);
-extern void   odr1v(int*, int*, int*, double*, double*, double*, 
-		    double*, double*, double*, double*, double*, 
-		    bool*, int*, double*, double*, int*, int*, 
-		    double*, double*, double*, double*);
-extern void   odrrj(int*, double*, double*, int*, int*, double*, 
-		    int*, double*, double*, double*, int*, double*, 
-		    double*, int*);
-extern void   odstmr(char*, char*, int*, double*, double*, double*, 
-		     double*, int*, int*, int*, double*, double*, 
-		     int*, int*, int*, int*, double*, int*, int*, 
-		     int*, int*);
-
-#endif /* End of header file */
diff --git a/external/pmrrr/include/pmrrr/blas/odcpy.hpp b/external/pmrrr/include/pmrrr/blas/odcpy.hpp
new file mode 100644
index 0000000000..2473c56776
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/blas/odcpy.hpp
@@ -0,0 +1,115 @@
+/* odcpy.f -- translated by f2c (version 20061008) */
+
+
+#ifndef __ODCPY_HPP__
+#define __ODCPY_HPP__
+
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <cfloat>
+#include <cassert>
+
+namespace pmrrr { namespace blas {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odcpy(int *n, FloatingType *dx, int *incx, 
+		FloatingType *dy, int *incy)
+	{
+		/* System generated locals */
+		int i__1;
+
+		/* Local variables */
+		int i__, m, ix, iy, mp1;
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*     copies a vector, x, to a vector, y. */
+	/*     uses unrolled loops for increments equal to one. */
+	/*     jack dongarra, linpack, 3/11/78. */
+	/*     modified 12/3/93, array(1) declarations changed to array(*) */
+
+
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+		/* Parameter adjustments */
+		--dy;
+		--dx;
+
+		/* Function Body */
+		if (*n <= 0) {
+		return 0;
+		}
+		if (*incx == 1 && *incy == 1) {
+		goto L20;
+		}
+
+	/*        code for unequal increments or equal increments */
+	/*          not equal to 1 */
+
+		ix = 1;
+		iy = 1;
+		if (*incx < 0) {
+		ix = (-(*n) + 1) * *incx + 1;
+		}
+		if (*incy < 0) {
+		iy = (-(*n) + 1) * *incy + 1;
+		}
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		dy[iy] = dx[ix];
+		ix += *incx;
+		iy += *incy;
+	/* L10: */
+		}
+		return 0;
+
+	/*        code for both increments equal to 1 */
+
+
+	/*        clean-up loop */
+
+	L20:
+		m = *n % 7;
+		if (m == 0) {
+		goto L40;
+		}
+		i__1 = m;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		dy[i__] = dx[i__];
+	/* L30: */
+		}
+		if (*n < 7) {
+		return 0;
+		}
+	L40:
+		mp1 = m + 1;
+		i__1 = *n;
+		for (i__ = mp1; i__ <= i__1; i__ += 7) {
+		dy[i__] = dx[i__];
+		dy[i__ + 1] = dx[i__ + 1];
+		dy[i__ + 2] = dx[i__ + 2];
+		dy[i__ + 3] = dx[i__ + 3];
+		dy[i__ + 4] = dx[i__ + 4];
+		dy[i__ + 5] = dx[i__ + 5];
+		dy[i__ + 6] = dx[i__ + 6];
+	/* L50: */
+		}
+		return 0;
+	} /* odcpy_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/blas/odscal.hpp b/external/pmrrr/include/pmrrr/blas/odscal.hpp
new file mode 100644
index 0000000000..6ef6be31ab
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/blas/odscal.hpp
@@ -0,0 +1,107 @@
+/**
+	C++ template version of BLAS routine odscal.
+
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSCAL_HPP__
+#define __ODSCAL_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace blas {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odscal(int *n, FloatingType *da, FloatingType *dx, 
+		int *incx)
+	{
+		/* System generated locals */
+		int i__1, i__2;
+
+		/* Local variables */
+		int i__, m, mp1, nincx;
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+	/* * */
+	/*     scales a vector by a constant. */
+	/*     uses unrolled loops for increment equal to one. */
+	/*     jack dongarra, linpack, 3/11/78. */
+	/*     modified 3/93 to return if incx .le. 0. */
+	/*     modified 12/3/93, array(1) declarations changed to array(*) */
+
+
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+		/* Parameter adjustments */
+		--dx;
+
+		/* Function Body */
+		if (*n <= 0 || *incx <= 0) {
+		return 0;
+		}
+		if (*incx == 1) {
+		goto L20;
+		}
+
+	/*        code for increment not equal to 1 */
+
+		nincx = *n * *incx;
+		i__1 = nincx;
+		i__2 = *incx;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+		dx[i__] = *da * dx[i__];
+	/* L10: */
+		}
+		return 0;
+
+	/*        code for increment equal to 1 */
+
+
+	/*        clean-up loop */
+
+	L20:
+		m = *n % 5;
+		if (m == 0) {
+		goto L40;
+		}
+		i__2 = m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		dx[i__] = *da * dx[i__];
+	/* L30: */
+		}
+		if (*n < 5) {
+		return 0;
+		}
+	L40:
+		mp1 = m + 1;
+		i__2 = *n;
+		for (i__ = mp1; i__ <= i__2; i__ += 5) {
+		dx[i__] = *da * dx[i__];
+		dx[i__ + 1] = *da * dx[i__ + 1];
+		dx[i__ + 2] = *da * dx[i__ + 2];
+		dx[i__ + 3] = *da * dx[i__ + 3];
+		dx[i__ + 4] = *da * dx[i__ + 4];
+	/* L50: */
+		}
+		return 0;
+	} 
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/blas/odswap.hpp b/external/pmrrr/include/pmrrr/blas/odswap.hpp
new file mode 100644
index 0000000000..5c49d3518a
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/blas/odswap.hpp
@@ -0,0 +1,121 @@
+/* dswap.f -- translated by f2c (version 20061008) */
+
+#ifndef __ODSWAP_HPP__
+#define __ODSWAP_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace blas {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odswap(int *n, FloatingType *dx, int *incx, 
+		FloatingType *dy, int *incy)
+	{
+		/* System generated locals */
+		int i__1;
+
+		/* Local variables */
+		int i__, m, ix, iy, mp1;
+		FloatingType dtemp;
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*     interchanges two vectors. */
+	/*     uses unrolled loops for increments equal one. */
+	/*     jack dongarra, linpack, 3/11/78. */
+	/*     modified 12/3/93, array(1) declarations changed to array(*) */
+
+
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+		/* Parameter adjustments */
+		--dy;
+		--dx;
+
+		/* Function Body */
+		if (*n <= 0) {
+		return 0;
+		}
+		if (*incx == 1 && *incy == 1) {
+		goto L20;
+		}
+
+	/*       code for unequal increments or equal increments not equal */
+	/*         to 1 */
+
+		ix = 1;
+		iy = 1;
+		if (*incx < 0) {
+		ix = (-(*n) + 1) * *incx + 1;
+		}
+		if (*incy < 0) {
+		iy = (-(*n) + 1) * *incy + 1;
+		}
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		dtemp = dx[ix];
+		dx[ix] = dy[iy];
+		dy[iy] = dtemp;
+		ix += *incx;
+		iy += *incy;
+	/* L10: */
+		}
+		return 0;
+
+	/*       code for both increments equal to 1 */
+
+
+	/*       clean-up loop */
+
+	L20:
+		m = *n % 3;
+		if (m == 0) {
+		goto L40;
+		}
+		i__1 = m;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		dtemp = dx[i__];
+		dx[i__] = dy[i__];
+		dy[i__] = dtemp;
+	/* L30: */
+		}
+		if (*n < 3) {
+		return 0;
+		}
+	L40:
+		mp1 = m + 1;
+		i__1 = *n;
+		for (i__ = mp1; i__ <= i__1; i__ += 3) {
+		dtemp = dx[i__];
+		dx[i__] = dy[i__];
+		dy[i__] = dtemp;
+		dtemp = dx[i__ + 1];
+		dx[i__ + 1] = dy[i__ + 1];
+		dy[i__ + 1] = dtemp;
+		dtemp = dx[i__ + 2];
+		dx[i__ + 2] = dy[i__ + 2];
+		dy[i__ + 2] = dtemp;
+	/* L50: */
+		}
+		return 0;
+	} /* dswap_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/rrr.h b/external/pmrrr/include/pmrrr/definitions/counter.h
similarity index 66%
rename from external/pmrrr/include/pmrrr/rrr.h
rename to external/pmrrr/include/pmrrr/definitions/counter.h
index df78e4984c..a560b52d14 100644
--- a/external/pmrrr/include/pmrrr/rrr.h
+++ b/external/pmrrr/include/pmrrr/definitions/counter.h
@@ -41,46 +41,40 @@
  *
  */
 
-#ifndef RRR_H
-#define RRR_H
+#ifndef CCOUNTER_H
+#define CCOUNTER_H
 
 #ifndef DISABLE_PTHREADS
  #include <pthread.h>
 #endif
-#include "global.h"
-
-typedef struct {
-  double          *restrict D;
-  double          *restrict L;
-  double          *restrict DL;
-  double          *restrict DLL;
-  int             size;
-  int             depth;
-  bool            parent_processed;
-  bool            copied_parent_rrr;
-  int             ndepend;
-#ifndef DISABLE_PTHREADS
-  pthread_mutex_t mutex;
-#endif
-} rrr_t;
 
+#include <pmrrr/definitions/global.h>
 
-rrr_t *PMR_create_rrr(double *restrict D, double *restrict L,
-		      double *restrict DL, double *restrict DLL,
-		      int size, int depth);
+namespace pmrrr { namespace detail {
 
-rrr_t *PMR_reset_rrr (rrr_t *restrict RRR, double *restrict D,
-		      double *restrict L, double *restrict DL,
-		      double *restrict DLL, int size, int depth);
+	typedef struct {
+	  int             value;
+	#ifndef DISABLE_PTHREADS
+	  #ifdef NOSPINLOCKS
+	    pthread_mutex_t    lock;
+	  #else
+	    pthread_spinlock_t lock;
+	  #endif
+	#endif
+	} counter_t;
 
-int  PMR_increment_rrr_dependencies(rrr_t *RRR);
-int  PMR_set_parent_processed_flag (rrr_t *RRR);
-int  PMR_set_copied_parent_rrr_flag(rrr_t *RRR, bool val);
-int  PMR_try_destroy_rrr(rrr_t *RRR);
+	counter_t *PMR_create_counter(int init_value);
+	int PMR_get_counter_value(counter_t *counter);
+	int PMR_set_counter_value(counter_t *counter, int value);
+	int PMR_decrement_counter(counter_t *counter, int amount);
+	int PMR_increment_counter(counter_t *counter, int amount);
+	void PMR_destroy_counter(counter_t *counter);
 
-int PMR_rrr_init_lock(rrr_t *RRR);
-void PMR_rrr_destroy_lock(rrr_t *RRR);
-int PMR_rrr_lock(rrr_t *RRR);
-int PMR_rrr_unlock(rrr_t *RRR);
+	int PMR_counter_init_lock(counter_t *counter);
+	void PMR_counter_destroy_lock(counter_t *counter);
+	int PMR_counter_lock(counter_t *counter);
+	int PMR_counter_unlock(counter_t *counter);
 
+} }
+	
 #endif
diff --git a/external/pmrrr/include/pmrrr/plarre.h b/external/pmrrr/include/pmrrr/definitions/plarre.h
similarity index 93%
rename from external/pmrrr/include/pmrrr/plarre.h
rename to external/pmrrr/include/pmrrr/definitions/plarre.h
index 66a47070c2..5db7be4fc9 100644
--- a/external/pmrrr/include/pmrrr/plarre.h
+++ b/external/pmrrr/include/pmrrr/definitions/plarre.h
@@ -41,8 +41,8 @@
 #ifndef PPLARRE_H
 #define PPLARRE_H
 
-#include "global.h"
-#include "structs.h"
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/structs.h>
 
 /*
  * Computing the eigenvalues and of a symmetric tridiagonal matrix 
@@ -51,8 +51,6 @@
  * Note: this implementation is here is not really optimized in
  * terms of performance and memory usage.
  */
-int plarre(proc_t *procinfo, char *jobz, char *range, in_t *Dstruct, 
-	       val_t *Wstruct, tol_t *tolstruct, int *nzp, int *offsetp);
 
 /* Perturb the initial root representation by "1 + eps*RAND_FACTOR*rand"; 
  * default: 8.0 */ 
diff --git a/external/pmrrr/include/pmrrr/plarrv.h b/external/pmrrr/include/pmrrr/definitions/plarrv.h
similarity index 92%
rename from external/pmrrr/include/pmrrr/plarrv.h
rename to external/pmrrr/include/pmrrr/definitions/plarrv.h
index 8d921aa270..9e0a289a61 100644
--- a/external/pmrrr/include/pmrrr/plarrv.h
+++ b/external/pmrrr/include/pmrrr/definitions/plarrv.h
@@ -41,16 +41,12 @@
 #ifndef PPLARRV_H
 #define PPLARRV_H
 
-#include "global.h"
-#include "structs.h"
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/structs.h>
 
 /*
  * Computation of eigenvectors of a symmetric tridiagonal
  */
-int plarrv(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct,
-	   vec_t *Zstruct, tol_t *tolstruct, int *nzp,
-	   int *myfirstp);
-
 #define COMM_COMPLETE        0
 #define COMM_INCOMPLETE      1
 #define C_TASK_PROCESSED     0
diff --git a/external/pmrrr/include/pmrrr/definitions/pmrrr.h b/external/pmrrr/include/pmrrr/definitions/pmrrr.h
new file mode 100644
index 0000000000..4a514602be
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/definitions/pmrrr.h
@@ -0,0 +1,105 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015 Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef PPMRRR_H
+#define PPMRRR_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+#include <ctype.h>
+#include <mpi.h>
+
+#include <pmrrr/definitions/global.h>
+
+/* Parallel computation of all or a subset of eigenvalues and 
+ * optionally eigenvectors of a symmetric tridiagonal matrix based on 
+ * the algorithm of Multiple Relatively Robust Representations (MRRR). 
+ * The routine targets hybrid architectures consisting of multiple SMP 
+ * nodes. It also runs in fully distributed mode, with each node 
+ * having only one processor, and fully SMP mode, in which case no 
+ * message passing is required. The implementation is based on 
+ * LAPACK's routine 'dstemr'.
+ */
+
+/* Set the number of threads in case PMR_NUM_THREADS is not 
+ * specified */
+#define DEFAULT_NUM_THREADS 1
+
+/* Call LAPACK's dstemr in every process to compute all desiered 
+ * eigenpairs redundantly (and discard the once that would usually 
+ * not be computed by the process) if n < DSTEMR_IF_SMALLER; 
+ * default: 4 */ 
+#define DSTEMR_IF_SMALLER   4
+
+/* Make sure that eigenpairs are sorted globally; if set to false
+ * they are in most cases sorted, but it is not double checked and 
+ * can therefore not be guaranteed; default: true */
+#define ASSERT_SORTED_EIGENPAIRS false
+
+/* Set flag if Rayleigh Quotient Correction should be used, 
+ * which is usually faster; default: true */
+#define TRY_RQC          true
+
+/* Maximum numver of iterations of inverse iteration;
+ * default: 10 */
+#define MAXITER            10
+
+/* Set the min. relative gap for an eigenvalue to be considered 
+ * well separated, that is a singleton; this is a very important 
+ * parameter of the computation; default: 10e-3 */
+#define MIN_RELGAP       1e-3
+
+/* Set the maximal allowed element growth for being accepted as 
+ * an RRR, that is if max. pivot < MAX_GROWTH * 'spectral diameter'
+ * the RRR is accepted; default: 64.0 */
+#define MAX_GROWTH         64.0
+
+/* Set how many iterations should be executed to find the root 
+ * representation; default: 6 */
+#define MAX_TRY_RRR       10
+
+#endif /* End of header file */
diff --git a/external/pmrrr/include/pmrrr/queue.h b/external/pmrrr/include/pmrrr/definitions/queue.h
similarity index 64%
rename from external/pmrrr/include/pmrrr/queue.h
rename to external/pmrrr/include/pmrrr/definitions/queue.h
index 266737caa9..6e6ec4ba7d 100644
--- a/external/pmrrr/include/pmrrr/queue.h
+++ b/external/pmrrr/include/pmrrr/definitions/queue.h
@@ -47,42 +47,41 @@
 #ifndef DISABLE_PTHREADS
  #include <pthread.h>
 #endif
-#include "global.h"
 
-typedef struct task_aux task_t;
-struct task_aux {
-  void       *data;       /* ptr to data, has to be casted */
-  int         flag;       /* flag specifying the task */
-  task_t     *next;       /* ptr to next  task; NULL if non-existing; */
-  task_t     *prev;       /* ptr to prev. task; NULL if non-existing; */
-};
+#include <pmrrr/definitions/global.h>
 
-typedef struct {
-  int                num_tasks;
-  task_t            *head;
-  task_t            *back;
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  pthread_mutex_t    lock;
- #else
-  pthread_spinlock_t lock;
- #endif
-#endif
-} queue_t;
+namespace pmrrr { namespace detail {
+
+	typedef struct task_aux task_t;
+	struct task_aux {
+	  void       *data;       /* ptr to data, has to be casted */
+	  int         flag;       /* flag specifying the task */
+	  task_t     *next;       /* ptr to next  task; NULL if non-existing; */
+	  task_t     *prev;       /* ptr to prev. task; NULL if non-existing; */
+	};
 
+	typedef struct {
+	  int                num_tasks;
+	  task_t            *head;
+	  task_t            *back;
+	#ifndef DISABLE_PTHREADS
+	  #ifdef NOSPINLOCKS
+	    pthread_mutex_t    lock;
+	  #else
+	    pthread_spinlock_t lock;
+  	  #endif
+	#endif
+	} queue_t;
 
-/* functionality of the queue */
-queue_t *PMR_create_empty_queue  (void);
-int     PMR_insert_task_at_front (queue_t *queue, task_t *task);
-int     PMR_insert_task_at_back  (queue_t *queue, task_t *task);
-task_t  *PMR_remove_task_at_front(queue_t *queue);
-task_t  *PMR_remove_task_at_back (queue_t *queue);
-int     PMR_get_num_tasks(queue_t *queue);
-void    PMR_destroy_queue(queue_t *queue);
+	/* functionality of the queue */
+	queue_t *PMR_create_empty_queue  (void);
+	int     PMR_insert_task_at_front (queue_t *queue, task_t *task);
+	int     PMR_insert_task_at_back  (queue_t *queue, task_t *task);
+	task_t  *PMR_remove_task_at_front(queue_t *queue);
+	task_t  *PMR_remove_task_at_back (queue_t *queue);
+	int     PMR_get_num_tasks(queue_t *queue);
+	void    PMR_destroy_queue(queue_t *queue);
 
-int PMR_queue_init_lock(queue_t *queue);
-void PMR_queue_destroy_lock(queue_t *queue);
-int PMR_queue_lock(queue_t *queue);
-int PMR_queue_unlock(queue_t *queue);
+} }
 
 #endif
diff --git a/external/pmrrr/include/pmrrr/counter.h b/external/pmrrr/include/pmrrr/definitions/rrr.h
similarity index 73%
rename from external/pmrrr/include/pmrrr/counter.h
rename to external/pmrrr/include/pmrrr/definitions/rrr.h
index e31d50e45a..d98c77e0d1 100644
--- a/external/pmrrr/include/pmrrr/counter.h
+++ b/external/pmrrr/include/pmrrr/definitions/rrr.h
@@ -41,35 +41,33 @@
  *
  */
 
-#ifndef CCOUNTER_H
-#define CCOUNTER_H
+#ifndef RRR_H
+#define RRR_H
 
 #ifndef DISABLE_PTHREADS
  #include <pthread.h>
 #endif
-#include "global.h"
 
-typedef struct {
-  int             value;
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  pthread_mutex_t    lock;
- #else
-  pthread_spinlock_t lock;
- #endif
-#endif
-} counter_t;
+#include <pmrrr/definitions/global.h>
+
+namespace pmrrr { namespace detail {
 
-counter_t *PMR_create_counter(int init_value);
-int PMR_get_counter_value(counter_t *counter);
-int PMR_set_counter_value(counter_t *counter, int value);
-int PMR_decrement_counter(counter_t *counter, int amount);
-int PMR_increment_counter(counter_t *counter, int amount);
-void PMR_destroy_counter(counter_t *counter);
+	template<typename FloatingType>
+	struct rrr_t {
+	  FloatingType          *restrict D;
+	  FloatingType          *restrict L;
+	  FloatingType          *restrict DL;
+	  FloatingType          *restrict DLL;
+	  int             size;
+	  int             depth;
+	  bool            parent_processed;
+	  bool            copied_parent_rrr;
+	  int             ndepend;
+	#ifndef DISABLE_PTHREADS
+	  pthread_mutex_t mutex;
+	#endif
+	};
 
-int PMR_counter_init_lock(counter_t *counter);
-void PMR_counter_destroy_lock(counter_t *counter);
-int PMR_counter_lock(counter_t *counter);
-int PMR_counter_unlock(counter_t *counter);
+}}
 
 #endif
diff --git a/external/pmrrr/include/pmrrr/definitions/structs.h b/external/pmrrr/include/pmrrr/definitions/structs.h
new file mode 100644
index 0000000000..6cf43f4c72
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/definitions/structs.h
@@ -0,0 +1,176 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef SSTRUCTS_H
+#define SSTRUCTS_H
+
+#include <mpi.h>
+
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/counter.h>
+#include <pmrrr/definitions/queue.h>
+
+namespace pmrrr { namespace detail {
+
+	template<typename FloatingType>
+	struct in_t {
+	  int              n;
+	  FloatingType *restrict D;
+	  FloatingType *restrict E;
+	  int              nsplit;
+	  int    *restrict isplit ;
+	  FloatingType           spdiam;
+	};
+
+	template<typename FloatingType>
+	struct val_t {
+	  int              n;
+	  FloatingType           *vl;
+	  FloatingType           *vu;
+	  int              *il;
+	  int              *iu;
+	  FloatingType *restrict W;
+	  FloatingType *restrict Werr;
+	  FloatingType *restrict Wgap;
+	  int    *restrict Windex;
+	  int    *restrict iblock;
+	  int    *restrict iproc;
+	  FloatingType *restrict Wshifted;
+	  FloatingType *restrict gersch;
+	};
+
+	template<typename FloatingType>
+	struct vec_t {
+	  int              ldz;
+	  int              nz;
+	  FloatingType *restrict Z;
+	  int    *restrict Zsupp;
+	  int    *restrict Zindex;
+	};
+
+	struct proc_t {
+	  int      pid;
+	  int      nproc;
+	  MPI_Comm comm;
+	  int      nthreads;
+	  int      thread_support;
+	};
+
+	template<typename FloatingType>
+	struct tol_t {
+	  FloatingType split;
+	  FloatingType rtol1;
+	  FloatingType rtol2;
+	  FloatingType pivmin;
+	};
+
+	typedef struct {
+	  int         num_messages;
+	  MPI_Request *requests;
+	  MPI_Status  *stats;
+	} comm_t;
+
+	typedef struct {
+	  queue_t *r_queue;
+	  queue_t *s_queue;
+	  queue_t *c_queue;
+	} workQ_t;
+
+	template<typename FloatingType>
+	struct sort_struct_t{
+	  FloatingType lambda;
+	  int    local_ind;
+	  int    block_ind;
+	  int    ind;
+	};
+
+	template<typename FloatingType>
+	struct auxarg1_t {
+	  int    n;
+	  FloatingType *D;
+	  FloatingType *E;
+	  FloatingType *E2;
+	  int    il;
+	  int    iu;
+	  int    my_il;
+	  int    my_iu;
+	  int    nsplit;
+	  int    *isplit;
+	  FloatingType bsrtol;
+	  FloatingType pivmin;
+	  FloatingType *gersch;
+	  FloatingType *W;
+	  FloatingType *Werr;
+	  int    *Windex;
+	  int   	 *iblock;
+	};
+
+	template<typename FloatingType>
+	struct auxarg2_t {
+	  int          bl_size;
+	  FloatingType       *D;
+	  FloatingType       *DE2;
+	  int          rf_begin;
+	  int          rf_end;
+	  FloatingType        *W;
+	  FloatingType        *Werr;
+	  FloatingType        *Wgap;
+	  int            *Windex;
+	  FloatingType       rtol1;
+	  FloatingType       rtol2;
+	  FloatingType       pivmin;
+	  FloatingType       bl_spdiam;
+	};
+
+	template<typename FloatingType>
+	struct auxarg3_t {
+	  int          tid;
+	  proc_t       *procinfo;
+	  val_t<FloatingType>        *Wstruct;
+	  vec_t<FloatingType>        *Zstruct;
+	  tol_t<FloatingType>        *tolstruct;
+	  workQ_t      *workQ;
+	  counter_t    *num_left;
+	};
+
+}	// namespace detail
+}	// namespace pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/tasks.h b/external/pmrrr/include/pmrrr/definitions/tasks.h
similarity index 54%
rename from external/pmrrr/include/pmrrr/tasks.h
rename to external/pmrrr/include/pmrrr/definitions/tasks.h
index 3a6d47a7f1..56e951b284 100644
--- a/external/pmrrr/include/pmrrr/tasks.h
+++ b/external/pmrrr/include/pmrrr/definitions/tasks.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2015, Jack Poulson
  * All rights reserved.
- *
+ * 
  * Redistribution and use in source and binary forms, with or 
  * without modification, are permitted provided that the following
  * conditions are met:
@@ -47,79 +47,65 @@
 #ifndef DISABLE_PTHREADS
  #include <semaphore.h>
 #endif
-#include "global.h"
-#include "queue.h"
-#include "rrr.h"
-#include "structs.h"
+
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/queue.h>
+#include <pmrrr/definitions/structs.h>
+#include <pmrrr/definitions/rrr.h>
 
 #define SINGLETON_TASK_FLAG  0
 #define CLUSTER_TASK_FLAG    1
 #define REFINE_TASK_FLAG     2
 
-typedef struct {
-  int        begin;
-  int        end;
-  int        depth;
-  int        bl_begin;
-  int        bl_end;
-  double     bl_spdiam;
-  double     lgap;
-  rrr_t      *RRR;
-} singleton_t;
-
-typedef struct {
-  int        begin;
-  int        end;
-  int        depth;
-  int        bl_begin;   /* In priciple not needed since info */
-  int        bl_end;     /* also contained in iblock+isplit */
-  double     bl_spdiam;
-  double     lgap;
-  int        proc_W_begin;
-  int        proc_W_end;
-  int        left_pid;
-  int        right_pid;
-  rrr_t      *RRR;
-  bool       wait_until_refined;
-  comm_t     *messages;
-} cluster_t;
-
-
-typedef struct {
-  int        begin;
-  int        end;
-  double     *D;
-  double     *DLL;
-  int        p;
-  int        q;
-  int        bl_size;
-  double     bl_spdiam;
-  int        producer_tid; // not longer needed
-#ifndef DISABLE_PTHREADS
-  sem_t      *sem; /* since semt_t is a handle could also store it
-		      instead of pointer to it, but pointer is all
-                      that is needed */
-#endif
-} refine_t;
-
-
-task_t *PMR_create_s_task(int first, int last, int depth,
-			  int bl_begin, int bl_end, double spdiam,
-			  double lgap, rrr_t *RRR);
+namespace pmrrr { namespace detail {
 
-task_t *PMR_create_c_task(int first, int last, int depth,
-			  int bl_begin, int bl_end, double spdiam,
-			  double lgap, int proc_W_begin, 
-			  int proc_W_end, int left_pid, int right_pid, 
-			  rrr_t *RRR);
+	template<typename FloatingType>
+	struct singleton_t {
+	  int        begin;
+	  int        end;
+	  int        depth;
+	  int        bl_begin;
+	  int        bl_end;
+	  FloatingType     bl_spdiam;
+	  FloatingType     lgap;
+	  rrr_t<FloatingType>      *RRR;
+	};
 
-task_t *PMR_create_r_task(int begin, int end, double *D,
-			  double *DLL, int p, int q, int bl_size,
-			  double bl_spdiam, int tid);
+	template<typename FloatingType>
+	struct cluster_t {
+	  int        begin;
+	  int        end;
+	  int        depth;
+	  int        bl_begin;   /* In priciple not needed since info */
+	  int        bl_end;     /* also contained in iblock+isplit */
+	  FloatingType     bl_spdiam;
+	  FloatingType     lgap;
+	  int        proc_W_begin;
+	  int        proc_W_end;
+	  int        left_pid;
+	  int        right_pid;
+	  rrr_t<FloatingType>      *RRR;
+	  bool       wait_until_refined;
+	  comm_t     *messages;
+	};
 
-int PMR_refine_sem_init(refine_t *refine);
-int PMR_refine_sem_destroy(refine_t *refine);
-int PMR_refine_sem_post(refine_t *refine);
-int PMR_refine_sem_wait(refine_t *refine);
+	template<typename FloatingType>
+	struct refine_t{
+	  int        begin;
+	  int        end;
+	  FloatingType     *D;
+	  FloatingType     *DLL;
+	  int        p;
+	  int        q;
+	  int        bl_size;
+	  FloatingType     bl_spdiam;
+	  int        producer_tid; // not longer needed
+	#ifndef DISABLE_PTHREADS
+	  sem_t      *sem; /* since semt_t is a handle could also store it
+				  instead of pointer to it, but pointer is all
+		                  that is needed */
+	#endif
+	};	
 
+} }
 #endif
diff --git a/external/pmrrr/include/pmrrr/process_task.h b/external/pmrrr/include/pmrrr/lapack/lapack.hpp
similarity index 67%
rename from external/pmrrr/include/pmrrr/process_task.h
rename to external/pmrrr/include/pmrrr/lapack/lapack.hpp
index 5add95c779..bfad85b239 100644
--- a/external/pmrrr/include/pmrrr/process_task.h
+++ b/external/pmrrr/include/pmrrr/lapack/lapack.hpp
@@ -1,4 +1,7 @@
 /* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2016, Marcin Copik
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or 
@@ -38,33 +41,17 @@
  *
  */
 
-#ifndef PPROCESS_TASK_H
-#define PPROCESS_TASK_H
-
-#include "global.h"
-#include "structs.h"
-#include "tasks.h"
-#include "counter.h"
-#include "queue.h"
+#ifndef __LAPACK_HPP___
+#define __LAPACK_HPP___
 
-/* Function prototypes */
-int PMR_process_c_task(cluster_t *cl, int tid, proc_t *procinfo,
-		       val_t *Wstruct, vec_t *Zstruct, 
-		       tol_t *tolstruct, workQ_t *workQ, 
-		       counter_t *num_left, double *work, int *iwork);
+namespace pmrrr { namespace lapack {
 
-int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo,
-		       val_t *Wstruct, vec_t *Zstruct, 
-		       tol_t *tolstruct, counter_t *num_left, 
-		       double *work, int *iwork);
+	/* Subroutine */ 
+	int oerbla(const char *srname, int *info);
+	
+	int olsame(const char *ca, const char *cb);
 
-int PMR_process_r_task(refine_t *rf, proc_t *procinfo, val_t *Wstruct,
-		       tol_t *tolstruct, double *work, int *iwork);
+} }
 
-void PMR_process_r_queue(int tid, proc_t *procinfo, val_t *Wstruct,
-			 vec_t *Zstruct, tol_t *tolstruct,
-			 workQ_t *workQ, 
-			 counter_t *num_left, double *work, 
-			 int *iwork);
 
-#endif /* End of header file */
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/ode2.hpp b/external/pmrrr/include/pmrrr/lapack/ode2.hpp
new file mode 100644
index 0000000000..378c0e2964
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/ode2.hpp
@@ -0,0 +1,152 @@
+/**
+	C++ template version of LAPACK routine dlae2.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODE2_HPP__
+#define __ODE2_HPP__
+
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <cfloat>
+#include <cassert>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */
+	template<typename FloatingType>
+	int ode2(FloatingType *a, FloatingType *b, FloatingType *c__, 
+		FloatingType *rt1, FloatingType *rt2)
+	{
+		/* System generated locals */
+		FloatingType d__1;
+
+		/* Builtin functions */
+		// double sqrt(double);
+
+		/* Local variables */
+		FloatingType ab, df, tb, sm, rt, adf, acmn, acmx;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODE2  computes the eigenvalues of a 2-by-2 symmetric matrix */
+	/*     [  A   B  ] */
+	/*     [  B   C  ]. */
+	/*  On return, RT1 is the eigenvalue of larger absolute value, and RT2 */
+	/*  is the eigenvalue of smaller absolute value. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  A       (input) DOUBLE PRECISION */
+	/*          The (1,1) element of the 2-by-2 matrix. */
+
+	/*  B       (input) DOUBLE PRECISION */
+	/*          The (1,2) and (2,1) elements of the 2-by-2 matrix. */
+
+	/*  C       (input) DOUBLE PRECISION */
+	/*          The (2,2) element of the 2-by-2 matrix. */
+
+	/*  RT1     (output) DOUBLE PRECISION */
+	/*          The eigenvalue of larger absolute value. */
+
+	/*  RT2     (output) DOUBLE PRECISION */
+	/*          The eigenvalue of smaller absolute value. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  RT1 is accurate to a few ulps barring over/underflow. */
+
+	/*  RT2 may be inaccurate if there is massive cancellation in the */
+	/*  determinant A*C-B*B; higher precision or correctly rounded or */
+	/*  correctly truncated arithmetic would be needed to compute RT2 */
+	/*  accurately in all cases. */
+
+	/*  Overflow is possible only if RT1 is within a factor of 5 of overflow. */
+	/*  Underflow is harmless if the input data is 0 or exceeds */
+	/*     underflow_threshold / macheps. */
+
+	/* ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     Compute the eigenvalues */
+
+		sm = *a + *c__;
+		df = *a - *c__;
+		adf = fabs(df);
+		tb = *b + *b;
+		ab = fabs(tb);
+		if (fabs(*a) > fabs(*c__)) {
+		acmx = *a;
+		acmn = *c__;
+		} else {
+		acmx = *c__;
+		acmn = *a;
+		}
+		if (adf > ab) {
+	/* Computing 2nd power */
+		d__1 = ab / adf;
+		rt = adf * sqrt(d__1 * d__1 + 1.);
+		} else if (adf < ab) {
+	/* Computing 2nd power */
+		d__1 = adf / ab;
+		rt = ab * sqrt(d__1 * d__1 + 1.);
+		} else {
+
+	/*        Includes case AB=ADF=0 */
+
+		rt = ab * sqrt(2.);
+		}
+		if (sm < 0.) {
+		*rt1 = (sm - rt) * .5;
+
+	/*        Order of execution important. */
+	/*        To get fully accurate smaller eigenvalue, */
+	/*        next line needs to be executed in higher precision. */
+
+		*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+		} else if (sm > 0.) {
+		*rt1 = (sm + rt) * .5;
+
+	/*        Order of execution important. */
+	/*        To get fully accurate smaller eigenvalue, */
+	/*        next line needs to be executed in higher precision. */
+
+		*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+		} else {
+
+	/*        Includes case RT1 = RT2 = 0 */
+
+		*rt1 = rt * .5;
+		*rt2 = rt * -.5;
+		}
+		return 0;
+
+	/*     End of ODE2 */
+
+	} /* ode2_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odebz.hpp b/external/pmrrr/include/pmrrr/lapack/odebz.hpp
new file mode 100644
index 0000000000..90335ab79f
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odebz.hpp
@@ -0,0 +1,637 @@
+/**
+	C++ template version of LAPACK routine dlaebz.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODEBZ_HPP__
+#define __ODEBZ_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#define imax(a,b) ( (a) > (b) ? (a) : (b) )
+#define imin(a,b) ( (a) < (b) ? (a) : (b) )
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odebz(int *ijob, int *nitmax, int *n, 
+		int *mmax, int *minp, int *nbmin, FloatingType *abstol, 
+		FloatingType *reltol, FloatingType *pivmin, FloatingType *d__, FloatingType *
+		e, FloatingType *e2, int *nval, FloatingType *ab, FloatingType *c__, 
+		int *mout, int *nab, FloatingType *work, int *iwork, 
+		int *info)
+	{
+		/* System generated locals */
+		int nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, 
+			i__5, i__6;
+		FloatingType d__1, d__2, d__3, d__4;
+
+		/* Local variables */
+		int j, kf, ji, kl, jp, jit;
+		FloatingType tmp1, tmp2;
+		int itmp1, itmp2, kfnew, klnew;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODEBZ contains the iteration loops which compute and use the */
+	/*  function N(w), which is the count of eigenvalues of a symmetric */
+	/*  tridiagonal matrix T less than or equal to its argument  w.  It */
+	/*  performs a choice of two types of loops: */
+
+	/*  IJOB=1, followed by */
+	/*  IJOB=2: It takes as input a list of intervals and returns a list of */
+	/*          sufficiently small intervals whose union contains the same */
+	/*          eigenvalues as the union of the original intervals. */
+	/*          The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. */
+	/*          The output interval (AB(j,1),AB(j,2)] will contain */
+	/*          eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. */
+
+	/*  IJOB=3: It performs a binary search in each input interval */
+	/*          (AB(j,1),AB(j,2)] for a point  w(j)  such that */
+	/*          N(w(j))=NVAL(j), and uses  C(j)  as the starting point of */
+	/*          the search.  If such a w(j) is found, then on output */
+	/*          AB(j,1)=AB(j,2)=w.  If no such w(j) is found, then on output */
+	/*          (AB(j,1),AB(j,2)] will be a small interval containing the */
+	/*          point where N(w) jumps through NVAL(j), unless that point */
+	/*          lies outside the initial interval. */
+
+	/*  Note that the intervals are in all cases half-open intervals, */
+	/*  i.e., of the form  (a,b] , which includes  b  but not  a . */
+
+	/*  To avoid underflow, the matrix should be scaled so that its largest */
+	/*  element is no greater than  overflow**(1/2) * underflow**(1/4) */
+	/*  in absolute value.  To assure the most accurate computation */
+	/*  of small eigenvalues, the matrix should be scaled to be */
+	/*  not much smaller than that, either. */
+
+	/*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
+	/*  Matrix", Report CS41, Computer Science Dept., Stanford */
+	/*  University, July 21, 1966 */
+
+	/*  Note: the arguments are, in general, *not* checked for unreasonable */
+	/*  values. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  IJOB    (input) INT */
+	/*          Specifies what is to be done: */
+	/*          = 1:  Compute NAB for the initial intervals. */
+	/*          = 2:  Perform bisection iteration to find eigenvalues of T. */
+	/*          = 3:  Perform bisection iteration to invert N(w), i.e., */
+	/*                to find a point which has a specified number of */
+	/*                eigenvalues of T to its left. */
+	/*          Other values will cause ODEBZ to return with INFO=-1. */
+
+	/*  NITMAX  (input) INT */
+	/*          The maximum number of "levels" of bisection to be */
+	/*          performed, i.e., an interval of width W will not be made */
+	/*          smaller than 2^(-NITMAX) * W.  If not all intervals */
+	/*          have converged after NITMAX iterations, then INFO is set */
+	/*          to the number of non-converged intervals. */
+
+	/*  N       (input) INT */
+	/*          The dimension n of the tridiagonal matrix T.  It must be at */
+	/*          least 1. */
+
+	/*  MMAX    (input) INT */
+	/*          The maximum number of intervals.  If more than MMAX intervals */
+	/*          are generated, then ODEBZ will quit with INFO=MMAX+1. */
+
+	/*  MINP    (input) INT */
+	/*          The initial number of intervals.  It may not be greater than */
+	/*          MMAX. */
+
+	/*  NBMIN   (input) INT */
+	/*          The smallest number of intervals that should be processed */
+	/*          using a vector loop.  If zero, then only the scalar loop */
+	/*          will be used. */
+
+	/*  ABSTOL  (input) DOUBLE PRECISION */
+	/*          The minimum (absolute) width of an interval.  When an */
+	/*          interval is narrower than ABSTOL, or than RELTOL times the */
+	/*          larger (in magnitude) endpoint, then it is considered to be */
+	/*          sufficiently small, i.e., converged.  This must be at least */
+	/*          zero. */
+
+	/*  RELTOL  (input) DOUBLE PRECISION */
+	/*          The minimum relative width of an interval.  When an interval */
+	/*          is narrower than ABSTOL, or than RELTOL times the larger (in */
+	/*          magnitude) endpoint, then it is considered to be */
+	/*          sufficiently small, i.e., converged.  Note: this should */
+	/*          always be at least radix*machine epsilon. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum absolute value of a "pivot" in the Sturm */
+	/*          sequence loop.  This *must* be at least  max |e(j)**2| * */
+	/*          safe_min  and at least safe_min, where safe_min is at least */
+	/*          the smallest number that can divide one without overflow. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The diagonal elements of the tridiagonal matrix T. */
+
+	/*  E       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The offdiagonal elements of the tridiagonal matrix T in */
+	/*          positions 1 through N-1.  E(N) is arbitrary. */
+
+	/*  E2      (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The squares of the offdiagonal elements of the tridiagonal */
+	/*          matrix T.  E2(N) is ignored. */
+
+	/*  NVAL    (input/output) INT array, dimension (MINP) */
+	/*          If IJOB=1 or 2, not referenced. */
+	/*          If IJOB=3, the desired values of N(w).  The elements of NVAL */
+	/*          will be reordered to correspond with the intervals in AB. */
+	/*          Thus, NVAL(j) on output will not, in general be the same as */
+	/*          NVAL(j) on input, but it will correspond with the interval */
+	/*          (AB(j,1),AB(j,2)] on output. */
+
+	/*  AB      (input/output) DOUBLE PRECISION array, dimension (MMAX,2) */
+	/*          The endpoints of the intervals.  AB(j,1) is  a(j), the left */
+	/*          endpoint of the j-th interval, and AB(j,2) is b(j), the */
+	/*          right endpoint of the j-th interval.  The input intervals */
+	/*          will, in general, be modified, split, and reordered by the */
+	/*          calculation. */
+
+	/*  C       (input/output) DOUBLE PRECISION array, dimension (MMAX) */
+	/*          If IJOB=1, ignored. */
+	/*          If IJOB=2, workspace. */
+	/*          If IJOB=3, then on input C(j) should be initialized to the */
+	/*          first search point in the binary search. */
+
+	/*  MOUT    (output) INT */
+	/*          If IJOB=1, the number of eigenvalues in the intervals. */
+	/*          If IJOB=2 or 3, the number of intervals output. */
+	/*          If IJOB=3, MOUT will equal MINP. */
+
+	/*  NAB     (input/output) INT array, dimension (MMAX,2) */
+	/*          If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). */
+	/*          If IJOB=2, then on input, NAB(i,j) should be set.  It must */
+	/*             satisfy the condition: */
+	/*             N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), */
+	/*             which means that in interval i only eigenvalues */
+	/*             NAB(i,1)+1,...,NAB(i,2) will be considered.  Usually, */
+	/*             NAB(i,j)=N(AB(i,j)), from a previous call to ODEBZ with */
+	/*             IJOB=1. */
+	/*             On output, NAB(i,j) will contain */
+	/*             max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of */
+	/*             the input interval that the output interval */
+	/*             (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the */
+	/*             the input values of NAB(k,1) and NAB(k,2). */
+	/*          If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), */
+	/*             unless N(w) > NVAL(i) for all search points  w , in which */
+	/*             case NAB(i,1) will not be modified, i.e., the output */
+	/*             value will be the same as the input value (modulo */
+	/*             reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) */
+	/*             for all search points  w , in which case NAB(i,2) will */
+	/*             not be modified.  Normally, NAB should be set to some */
+	/*             distinctive value(s) before ODEBZ is called. */
+
+	/*  WORK    (workspace) DOUBLE PRECISION array, dimension (MMAX) */
+	/*          Workspace. */
+
+	/*  IWORK   (workspace) INT array, dimension (MMAX) */
+	/*          Workspace. */
+
+	/*  INFO    (output) INT */
+	/*          = 0:       All intervals converged. */
+	/*          = 1--MMAX: The last INFO intervals did not converge. */
+	/*          = MMAX+1:  More than MMAX intervals were generated. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*      This routine is intended to be called only by other LAPACK */
+	/*  routines, thus the interface is less user-friendly.  It is intended */
+	/*  for two purposes: */
+
+	/*  (a) finding eigenvalues.  In this case, ODEBZ should have one or */
+	/*      more initial intervals set up in AB, and ODEBZ should be called */
+	/*      with IJOB=1.  This sets up NAB, and also counts the eigenvalues. */
+	/*      Intervals with no eigenvalues would usually be thrown out at */
+	/*      this point.  Also, if not all the eigenvalues in an interval i */
+	/*      are desired, NAB(i,1) can be increased or NAB(i,2) decreased. */
+	/*      For example, set NAB(i,1)=NAB(i,2)-1 to get the largest */
+	/*      eigenvalue.  ODEBZ is then called with IJOB=2 and MMAX */
+	/*      no smaller than the value of MOUT returned by the call with */
+	/*      IJOB=1.  After this (IJOB=2) call, eigenvalues NAB(i,1)+1 */
+	/*      through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the */
+	/*      tolerance specified by ABSTOL and RELTOL. */
+
+	/*  (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). */
+	/*      In this case, start with a Gershgorin interval  (a,b).  Set up */
+	/*      AB to contain 2 search intervals, both initially (a,b).  One */
+	/*      NVAL element should contain  f-1  and the other should contain  l */
+	/*      , while C should contain a and b, resp.  NAB(i,1) should be -1 */
+	/*      and NAB(i,2) should be N+1, to flag an error if the desired */
+	/*      interval does not lie in (a,b).  ODEBZ is then called with */
+	/*      IJOB=3.  On exit, if w(f-1) < w(f), then one of the intervals -- */
+	/*      j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while */
+	/*      if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r */
+	/*      >= 0, then the interval will have  N(AB(j,1))=NAB(j,1)=f-k and */
+	/*      N(AB(j,2))=NAB(j,2)=f+r.  The cases w(l) < w(l+1) and */
+	/*      w(l-r)=...=w(l+k) are handled similarly. */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     Check for Errors */
+
+		/* Parameter adjustments */
+		nab_dim1 = *mmax;
+		nab_offset = 1 + nab_dim1;
+		nab -= nab_offset;
+		ab_dim1 = *mmax;
+		ab_offset = 1 + ab_dim1;
+		ab -= ab_offset;
+		--d__;
+		--e;
+		--e2;
+		--nval;
+		--c__;
+		--work;
+		--iwork;
+
+		/* Function Body */
+		*info = 0;
+		if (*ijob < 1 || *ijob > 3) {
+		*info = -1;
+		return 0;
+		}
+
+	/*     Initialize NAB */
+
+		if (*ijob == 1) {
+
+	/*        Compute the number of eigenvalues in the initial intervals. */
+
+		*mout = 0;
+		i__1 = *minp;
+		for (ji = 1; ji <= i__1; ++ji) {
+			for (jp = 1; jp <= 2; ++jp) {
+			tmp1 = d__[1] - ab[ji + jp * ab_dim1];
+			if (fabs(tmp1) < *pivmin) {
+				tmp1 = -(*pivmin);
+			}
+			nab[ji + jp * nab_dim1] = 0;
+			if (tmp1 <= 0.) {
+				nab[ji + jp * nab_dim1] = 1;
+			}
+
+			i__2 = *n;
+			for (j = 2; j <= i__2; ++j) {
+				tmp1 = d__[j] - e2[j - 1] / tmp1 - ab[ji + jp * ab_dim1];
+				if (fabs(tmp1) < *pivmin) {
+				tmp1 = -(*pivmin);
+				}
+				if (tmp1 <= 0.) {
+				++nab[ji + jp * nab_dim1];
+				}
+	/* L10: */
+			}
+	/* L20: */
+			}
+			*mout = *mout + nab[ji + (nab_dim1 << 1)] - nab[ji + nab_dim1];
+	/* L30: */
+		}
+		return 0;
+		}
+
+	/*     Initialize for loop */
+
+	/*     KF and KL have the following meaning: */
+	/*        Intervals 1,...,KF-1 have converged. */
+	/*        Intervals KF,...,KL  still need to be refined. */
+
+		kf = 1;
+		kl = *minp;
+
+	/*     If IJOB=2, initialize C. */
+	/*     If IJOB=3, use the user-supplied starting point. */
+
+		if (*ijob == 2) {
+		i__1 = *minp;
+		for (ji = 1; ji <= i__1; ++ji) {
+			c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5;
+	/* L40: */
+		}
+		}
+
+	/*     Iteration loop */
+
+		i__1 = *nitmax;
+		for (jit = 1; jit <= i__1; ++jit) {
+
+	/*        Loop over intervals */
+
+		if (kl - kf + 1 >= *nbmin && *nbmin > 0) {
+
+	/*           Begin of Parallel Version of the loop */
+
+			i__2 = kl;
+			for (ji = kf; ji <= i__2; ++ji) {
+
+	/*              Compute N(c), the number of eigenvalues less than c */
+
+			work[ji] = d__[1] - c__[ji];
+			iwork[ji] = 0;
+			if (work[ji] <= *pivmin) {
+				iwork[ji] = 1;
+	/* Computing MIN */
+				d__1 = work[ji], d__2 = -(*pivmin);
+				work[ji] = fmin(d__1,d__2);
+			}
+
+			i__3 = *n;
+			for (j = 2; j <= i__3; ++j) {
+				work[ji] = d__[j] - e2[j - 1] / work[ji] - c__[ji];
+				if (work[ji] <= *pivmin) {
+				++iwork[ji];
+	/* Computing MIN */
+				d__1 = work[ji], d__2 = -(*pivmin);
+				work[ji] = fmin(d__1,d__2);
+				}
+	/* L50: */
+			}
+	/* L60: */
+			}
+
+			if (*ijob <= 2) {
+
+	/*              IJOB=2: Choose all intervals containing eigenvalues. */
+
+			klnew = kl;
+			i__2 = kl;
+			for (ji = kf; ji <= i__2; ++ji) {
+
+	/*                 Insure that N(w) is monotone */
+
+	/* Computing MIN */
+	/* Computing MAX */
+				i__5 = nab[ji + nab_dim1], i__6 = iwork[ji];
+				i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,i__6);
+				iwork[ji] = imin(i__3,i__4);
+
+	/*                 Update the Queue -- add intervals if both halves */
+	/*                 contain eigenvalues. */
+
+				if (iwork[ji] == nab[ji + (nab_dim1 << 1)]) {
+
+	/*                    No eigenvalue in the upper interval: */
+	/*                    just use the lower interval. */
+
+				ab[ji + (ab_dim1 << 1)] = c__[ji];
+
+				} else if (iwork[ji] == nab[ji + nab_dim1]) {
+
+	/*                    No eigenvalue in the lower interval: */
+	/*                    just use the upper interval. */
+
+				ab[ji + ab_dim1] = c__[ji];
+				} else {
+				++klnew;
+				if (klnew <= *mmax) {
+
+	/*                       Eigenvalue in both intervals -- add upper to */
+	/*                       queue. */
+
+					ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 
+						1)];
+					nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 
+						<< 1)];
+					ab[klnew + ab_dim1] = c__[ji];
+					nab[klnew + nab_dim1] = iwork[ji];
+					ab[ji + (ab_dim1 << 1)] = c__[ji];
+					nab[ji + (nab_dim1 << 1)] = iwork[ji];
+				} else {
+					*info = *mmax + 1;
+				}
+				}
+	/* L70: */
+			}
+			if (*info != 0) {
+				return 0;
+			}
+			kl = klnew;
+			} else {
+
+	/*              IJOB=3: Binary search.  Keep only the interval containing */
+	/*                      w   s.t. N(w) = NVAL */
+
+			i__2 = kl;
+			for (ji = kf; ji <= i__2; ++ji) {
+				if (iwork[ji] <= nval[ji]) {
+				ab[ji + ab_dim1] = c__[ji];
+				nab[ji + nab_dim1] = iwork[ji];
+				}
+				if (iwork[ji] >= nval[ji]) {
+				ab[ji + (ab_dim1 << 1)] = c__[ji];
+				nab[ji + (nab_dim1 << 1)] = iwork[ji];
+				}
+	/* L80: */
+			}
+			}
+
+		} else {
+
+	/*           End of Parallel Version of the loop */
+
+	/*           Begin of Serial Version of the loop */
+
+			klnew = kl;
+			i__2 = kl;
+			for (ji = kf; ji <= i__2; ++ji) {
+
+	/*              Compute N(w), the number of eigenvalues less than w */
+
+			tmp1 = c__[ji];
+			tmp2 = d__[1] - tmp1;
+			itmp1 = 0;
+			if (tmp2 <= *pivmin) {
+				itmp1 = 1;
+	/* Computing MIN */
+				d__1 = tmp2, d__2 = -(*pivmin);
+				tmp2 = fmin(d__1,d__2);
+			}
+
+			i__3 = *n;
+			for (j = 2; j <= i__3; ++j) {
+				tmp2 = d__[j] - e2[j - 1] / tmp2 - tmp1;
+				if (tmp2 <= *pivmin) {
+				++itmp1;
+	/* Computing MIN */
+				d__1 = tmp2, d__2 = -(*pivmin);
+				tmp2 = fmin(d__1,d__2);
+				}
+	/* L90: */
+			}
+
+			if (*ijob <= 2) {
+
+	/*                 IJOB=2: Choose all intervals containing eigenvalues. */
+
+	/*                 Insure that N(w) is monotone */
+
+	/* Computing MIN */
+	/* Computing MAX */
+				i__5 = nab[ji + nab_dim1];
+				i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,itmp1);
+				itmp1 = imin(i__3,i__4);
+
+	/*                 Update the Queue -- add intervals if both halves */
+	/*                 contain eigenvalues. */
+
+				if (itmp1 == nab[ji + (nab_dim1 << 1)]) {
+
+	/*                    No eigenvalue in the upper interval: */
+	/*                    just use the lower interval. */
+
+				ab[ji + (ab_dim1 << 1)] = tmp1;
+
+				} else if (itmp1 == nab[ji + nab_dim1]) {
+
+	/*                    No eigenvalue in the lower interval: */
+	/*                    just use the upper interval. */
+
+				ab[ji + ab_dim1] = tmp1;
+				} else if (klnew < *mmax) {
+
+	/*                    Eigenvalue in both intervals -- add upper to queue. */
+
+				++klnew;
+				ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 1)];
+				nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 << 
+					1)];
+				ab[klnew + ab_dim1] = tmp1;
+				nab[klnew + nab_dim1] = itmp1;
+				ab[ji + (ab_dim1 << 1)] = tmp1;
+				nab[ji + (nab_dim1 << 1)] = itmp1;
+				} else {
+				*info = *mmax + 1;
+				return 0;
+				}
+			} else {
+
+	/*                 IJOB=3: Binary search.  Keep only the interval */
+	/*                         containing  w  s.t. N(w) = NVAL */
+
+				if (itmp1 <= nval[ji]) {
+				ab[ji + ab_dim1] = tmp1;
+				nab[ji + nab_dim1] = itmp1;
+				}
+				if (itmp1 >= nval[ji]) {
+				ab[ji + (ab_dim1 << 1)] = tmp1;
+				nab[ji + (nab_dim1 << 1)] = itmp1;
+				}
+			}
+	/* L100: */
+			}
+			kl = klnew;
+
+	/*           End of Serial Version of the loop */
+
+		}
+
+	/*        Check for convergence */
+
+		kfnew = kf;
+		i__2 = kl;
+		for (ji = kf; ji <= i__2; ++ji) {
+			tmp1 = (d__1 = ab[ji + (ab_dim1 << 1)] - ab[ji + ab_dim1], fabs(
+				d__1));
+	/* Computing MAX */
+			d__3 = (d__1 = ab[ji + (ab_dim1 << 1)], fabs(d__1)), d__4 = (d__2 =
+				 ab[ji + ab_dim1], fabs(d__2));
+			tmp2 = fmax(d__3,d__4);
+	/* Computing MAX */
+			d__1 = fmax(*abstol,*pivmin), d__2 = *reltol * tmp2;
+			if (tmp1 < fmax(d__1,d__2) || nab[ji + nab_dim1] >= nab[ji + (
+				nab_dim1 << 1)]) {
+
+	/*              Converged -- Swap with position KFNEW, */
+	/*                           then increment KFNEW */
+
+			if (ji > kfnew) {
+				tmp1 = ab[ji + ab_dim1];
+				tmp2 = ab[ji + (ab_dim1 << 1)];
+				itmp1 = nab[ji + nab_dim1];
+				itmp2 = nab[ji + (nab_dim1 << 1)];
+				ab[ji + ab_dim1] = ab[kfnew + ab_dim1];
+				ab[ji + (ab_dim1 << 1)] = ab[kfnew + (ab_dim1 << 1)];
+				nab[ji + nab_dim1] = nab[kfnew + nab_dim1];
+				nab[ji + (nab_dim1 << 1)] = nab[kfnew + (nab_dim1 << 1)];
+				ab[kfnew + ab_dim1] = tmp1;
+				ab[kfnew + (ab_dim1 << 1)] = tmp2;
+				nab[kfnew + nab_dim1] = itmp1;
+				nab[kfnew + (nab_dim1 << 1)] = itmp2;
+				if (*ijob == 3) {
+				itmp1 = nval[ji];
+				nval[ji] = nval[kfnew];
+				nval[kfnew] = itmp1;
+				}
+			}
+			++kfnew;
+			}
+	/* L110: */
+		}
+		kf = kfnew;
+
+	/*        Choose Midpoints */
+
+		i__2 = kl;
+		for (ji = kf; ji <= i__2; ++ji) {
+			c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5;
+	/* L120: */
+		}
+
+	/*        If no more intervals to refine, quit. */
+
+		if (kf > kl) {
+			goto L140;
+		}
+	/* L130: */
+		}
+
+	/*     Converged */
+
+	L140:
+	/* Computing MAX */
+		i__1 = kl + 1 - kf;
+		*info = imax(i__1,0);
+		*mout = kl;
+
+		return 0;
+
+	/*     End of ODEBZ */
+
+	} /* odebz_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odev2.hpp b/external/pmrrr/include/pmrrr/lapack/odev2.hpp
new file mode 100644
index 0000000000..79078a0aa9
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odev2.hpp
@@ -0,0 +1,198 @@
+/**
+	C++ template version of LAPACK routine dlaev2.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODEV2_HPP__
+#define __ODEV2_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odev2(FloatingType *a, FloatingType *b, FloatingType *c__, 
+		FloatingType *rt1, FloatingType *rt2, FloatingType *cs1, FloatingType *sn1)
+	{
+		/* System generated locals */
+		FloatingType d__1;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		FloatingType ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
+		int sgn1, sgn2;
+		FloatingType acmn, acmx;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix */
+	/*     [  A   B  ] */
+	/*     [  B   C  ]. */
+	/*  On return, RT1 is the eigenvalue of larger absolute value, RT2 is the */
+	/*  eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right */
+	/*  eigenvector for RT1, giving the decomposition */
+
+	/*     [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ] */
+	/*     [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ]. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  A       (input) DOUBLE PRECISION */
+	/*          The (1,1) element of the 2-by-2 matrix. */
+
+	/*  B       (input) DOUBLE PRECISION */
+	/*          The (1,2) element and the conjugate of the (2,1) element of */
+	/*          the 2-by-2 matrix. */
+
+	/*  C       (input) DOUBLE PRECISION */
+	/*          The (2,2) element of the 2-by-2 matrix. */
+
+	/*  RT1     (output) DOUBLE PRECISION */
+	/*          The eigenvalue of larger absolute value. */
+
+	/*  RT2     (output) DOUBLE PRECISION */
+	/*          The eigenvalue of smaller absolute value. */
+
+	/*  CS1     (output) DOUBLE PRECISION */
+	/*  SN1     (output) DOUBLE PRECISION */
+	/*          The vector (CS1, SN1) is a unit right eigenvector for RT1. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  RT1 is accurate to a few ulps barring over/underflow. */
+
+	/*  RT2 may be inaccurate if there is massive cancellation in the */
+	/*  determinant A*C-B*B; higher precision or correctly rounded or */
+	/*  correctly truncated arithmetic would be needed to compute RT2 */
+	/*  accurately in all cases. */
+
+	/*  CS1 and SN1 are accurate to a few ulps barring over/underflow. */
+
+	/*  Overflow is possible only if RT1 is within a factor of 5 of overflow. */
+	/*  Underflow is harmless if the input data is 0 or exceeds */
+	/*     underflow_threshold / macheps. */
+
+	/* ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     Compute the eigenvalues */
+
+		sm = *a + *c__;
+		df = *a - *c__;
+		adf = fabs(df);
+		tb = *b + *b;
+		ab = fabs(tb);
+		if (fabs(*a) > fabs(*c__)) {
+		acmx = *a;
+		acmn = *c__;
+		} else {
+		acmx = *c__;
+		acmn = *a;
+		}
+		if (adf > ab) {
+	/* Computing 2nd power */
+		d__1 = ab / adf;
+		rt = adf * sqrt(d__1 * d__1 + 1.);
+		} else if (adf < ab) {
+	/* Computing 2nd power */
+		d__1 = adf / ab;
+		rt = ab * sqrt(d__1 * d__1 + 1.);
+		} else {
+
+	/*        Includes case AB=ADF=0 */
+
+		rt = ab * sqrt(2.);
+		}
+		if (sm < 0.) {
+		*rt1 = (sm - rt) * .5;
+		sgn1 = -1;
+
+	/*        Order of execution important. */
+	/*        To get fully accurate smaller eigenvalue, */
+	/*        next line needs to be executed in higher precision. */
+
+		*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+		} else if (sm > 0.) {
+		*rt1 = (sm + rt) * .5;
+		sgn1 = 1;
+
+	/*        Order of execution important. */
+	/*        To get fully accurate smaller eigenvalue, */
+	/*        next line needs to be executed in higher precision. */
+
+		*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+		} else {
+
+	/*        Includes case RT1 = RT2 = 0 */
+
+		*rt1 = rt * .5;
+		*rt2 = rt * -.5;
+		sgn1 = 1;
+		}
+
+	/*     Compute the eigenvector */
+
+		if (df >= 0.) {
+		cs = df + rt;
+		sgn2 = 1;
+		} else {
+		cs = df - rt;
+		sgn2 = -1;
+		}
+		acs = fabs(cs);
+		if (acs > ab) {
+		ct = -tb / cs;
+		*sn1 = 1. / sqrt(ct * ct + 1.);
+		*cs1 = ct * *sn1;
+		} else {
+		if (ab == 0.) {
+			*cs1 = 1.;
+			*sn1 = 0.;
+		} else {
+			tn = -cs / tb;
+			*cs1 = 1. / sqrt(tn * tn + 1.);
+			*sn1 = tn * *cs1;
+		}
+		}
+		if (sgn1 == sgn2) {
+		tn = *cs1;
+		*cs1 = -(*sn1);
+		*sn1 = tn;
+		}
+		return 0;
+
+	/*     End of ODEV2 */
+
+	} /* odev2_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odnan.hpp b/external/pmrrr/include/pmrrr/lapack/odnan.hpp
new file mode 100644
index 0000000000..2b3cb50990
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odnan.hpp
@@ -0,0 +1,63 @@
+/**
+	C++ template version of LAPACK routine disnan.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODNAN_HPP__
+#define __ODNAN_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odsnan.hpp>
+
+namespace pmrrr { namespace lapack {
+
+	template<typename FloatingType>
+	int odnan(FloatingType *din)
+	{
+		/* System generated locals */
+		int ret_val;
+
+		/* Local variables */
+		//extern int odsnan_(FloatingType *, FloatingType *);
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */
+	/*  otherwise.  To be replaced by the Fortran 2003 intrinsic in the */
+	/*  future. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  DIN      (input) DOUBLE PRECISION */
+	/*          Input to test for NaN. */
+
+	/*  ===================================================================== */
+
+	/*  .. External Functions .. */
+	/*  .. */
+	/*  .. Executable Statements .. */
+		ret_val = odsnan(din, din);
+		return ret_val;
+	} /* odnan_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odneg.hpp b/external/pmrrr/include/pmrrr/lapack/odneg.hpp
new file mode 100644
index 0000000000..ae5dc62b49
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odneg.hpp
@@ -0,0 +1,232 @@
+/**
+	C++ template version of LAPACK routine dlaneg.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODNEG_HPP__
+#define __ODNEG_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odnan.hpp>
+
+#define imax(a,b) ( (a) > (b) ? (a) : (b) )
+#define imin(a,b) ( (a) < (b) ? (a) : (b) )
+
+namespace pmrrr { namespace lapack {
+
+	template<typename FloatingType>
+	int odneg(int *n, FloatingType *d__, FloatingType *lld, FloatingType *
+		sigma, FloatingType *pivmin, int *r__)
+	{
+		/* System generated locals */
+		int ret_val, i__1, i__2, i__3, i__4;
+
+		/* Local variables */
+		int j;
+		FloatingType p, t;
+		int bj;
+		FloatingType tmp;
+		int neg1, neg2;
+		FloatingType bsav, gamma, dplus;
+		//extern int odnan_(FloatingType *);
+		int negcnt;
+		int sawnan;
+		FloatingType dminus;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODNEG computes the Sturm count, the number of negative pivots */
+	/*  encountered while factoring tridiagonal T - sigma I = L D L^T. */
+	/*  This implementation works directly on the factors without forming */
+	/*  the tridiagonal matrix T.  The Sturm count is also the number of */
+	/*  eigenvalues of T less than sigma. */
+
+	/*  This routine is called from DLARRB. */
+
+	/*  The current routine does not use the PIVMIN parameter but rather */
+	/*  requires IEEE-754 propagation of Infinities and NaNs.  This */
+	/*  routine also has no input range restrictions but does require */
+	/*  default exception handling such that x/0 produces Inf when x is */
+	/*  non-zero, and Inf/Inf produces NaN.  For more information, see: */
+
+	/*    Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */
+	/*    Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */
+	/*    Scientific Computing, v28, n5, 2006.  DOI 10.1137/050641624 */
+	/*    (Tech report version in LAWN 172 with the same title.) */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The N diagonal elements of the diagonal matrix D. */
+
+	/*  LLD     (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (N-1) elements L(i)*L(i)*D(i). */
+
+	/*  SIGMA   (input) DOUBLE PRECISION */
+	/*          Shift amount in T - sigma I = L D L^T. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum pivot in the Sturm sequence.  May be used */
+	/*          when zero pivots are encountered on non-IEEE-754 */
+	/*          architectures. */
+
+	/*  R       (input) INT */
+	/*          The twist index for the twisted factorization that is used */
+	/*          for the negcount. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+	/*     Jason Riedy, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     Some architectures propagate Infinities and NaNs very slowly, so */
+	/*     the code computes counts in BLKLEN chunks.  Then a NaN can */
+	/*     propagate at most BLKLEN columns before being detected.  This is */
+	/*     not a general tuning parameter; it needs only to be just large */
+	/*     enough that the overhead is tiny in common cases. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+		/* Parameter adjustments */
+		--lld;
+		--d__;
+
+		/* Function Body */
+		negcnt = 0;
+	/*     I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */
+		t = -(*sigma);
+		i__1 = *r__ - 1;
+		for (bj = 1; bj <= i__1; bj += 128) {
+		neg1 = 0;
+		bsav = t;
+	/* Computing MIN */
+		i__3 = bj + 127, i__4 = *r__ - 1;
+		i__2 = imin(i__3,i__4);
+		for (j = bj; j <= i__2; ++j) {
+			dplus = d__[j] + t;
+			if (dplus < 0.) {
+			++neg1;
+			}
+			tmp = t / dplus;
+			t = tmp * lld[j] - *sigma;
+	/* L21: */
+		}
+		sawnan = odnan(&t);
+	/*     Run a slower version of the above loop if a NaN is detected. */
+	/*     A NaN should occur only with a zero pivot after an infinite */
+	/*     pivot.  In that case, substituting 1 for T/DPLUS is the */
+	/*     correct limit. */
+		if (sawnan) {
+			neg1 = 0;
+			t = bsav;
+	/* Computing MIN */
+			i__3 = bj + 127, i__4 = *r__ - 1;
+			i__2 = imin(i__3,i__4);
+			for (j = bj; j <= i__2; ++j) {
+			dplus = d__[j] + t;
+			if (dplus < 0.) {
+				++neg1;
+			}
+			tmp = t / dplus;
+			if (odnan(&tmp)) {
+				tmp = 1.;
+			}
+			t = tmp * lld[j] - *sigma;
+	/* L22: */
+			}
+		}
+		negcnt += neg1;
+	/* L210: */
+		}
+
+	/*     II) lower part: L D L^T - SIGMA I = U- D- U-^T */
+		p = d__[*n] - *sigma;
+		i__1 = *r__;
+		for (bj = *n - 1; bj >= i__1; bj += -128) {
+		neg2 = 0;
+		bsav = p;
+	/* Computing MAX */
+		i__3 = bj - 127;
+		i__2 = imax(i__3,*r__);
+		for (j = bj; j >= i__2; --j) {
+			dminus = lld[j] + p;
+			if (dminus < 0.) {
+			++neg2;
+			}
+			tmp = p / dminus;
+			p = tmp * d__[j] - *sigma;
+	/* L23: */
+		}
+		sawnan = odnan(&p);
+	/*     As above, run a slower version that substitutes 1 for Inf/Inf. */
+
+		if (sawnan) {
+			neg2 = 0;
+			p = bsav;
+	/* Computing MAX */
+			i__3 = bj - 127;
+			i__2 = imax(i__3,*r__);
+			for (j = bj; j >= i__2; --j) {
+			dminus = lld[j] + p;
+			if (dminus < 0.) {
+				++neg2;
+			}
+			tmp = p / dminus;
+			if (odnan(&tmp)) {
+				tmp = 1.;
+			}
+			p = tmp * d__[j] - *sigma;
+	/* L24: */
+			}
+		}
+		negcnt += neg2;
+	/* L230: */
+		}
+
+	/*     III) Twist index */
+	/*       T was shifted by SIGMA initially. */
+		gamma = t + *sigma + p;
+		if (gamma < 0.) {
+		++negcnt;
+		}
+		ret_val = negcnt;
+		return ret_val;
+	} /* odneg_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odnst.hpp b/external/pmrrr/include/pmrrr/lapack/odnst.hpp
new file mode 100644
index 0000000000..a1d8cc1817
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odnst.hpp
@@ -0,0 +1,177 @@
+/**
+	C++ template version of LAPACK routine dlanst.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODNST_HPP__
+#define __ODNST_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odssq.hpp>
+#include <pmrrr/lapack/lapack.hpp>
+
+namespace pmrrr { namespace lapack {
+
+	template<typename FloatingType>
+	FloatingType odnst(const char *norm, int *n, FloatingType *d__, FloatingType *e)
+	{
+		/* Table of constant values */
+		static int c__1 = 1;
+
+		/* System generated locals */
+		int i__1;
+		FloatingType ret_val, d__1, d__2, d__3, d__4, d__5;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		int i__;
+		FloatingType sum, scale;
+		//extern int olsame(char *, char *);
+		FloatingType anorm;
+		//extern /* Subroutine */ int odssq_(int *, FloatingType *, int *, 
+			//FloatingType *, FloatingType *);
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODNST  returns the value of the one norm,  or the Frobenius norm, or */
+	/*  the  infinity norm,  or the  element of  largest absolute value  of a */
+	/*  real symmetric tridiagonal matrix A. */
+
+	/*  Description */
+	/*  =========== */
+
+	/*  ODNST returns the value */
+
+	/*     ODNST = ( max(abs(A(i,j))), NORM = 'M' or 'm' */
+	/*              ( */
+	/*              ( norm1(A),         NORM = '1', 'O' or 'o' */
+	/*              ( */
+	/*              ( normI(A),         NORM = 'I' or 'i' */
+	/*              ( */
+	/*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e' */
+
+	/*  where  norm1  denotes the  one norm of a matrix (maximum column sum), */
+	/*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and */
+	/*  normF  denotes the  Frobenius norm of a matrix (square root of sum of */
+	/*  squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  NORM    (input) CHARACTER*1 */
+	/*          Specifies the value to be returned in ODNST as described */
+	/*          above. */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix A.  N >= 0.  When N = 0, ODNST is */
+	/*          set to zero. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The diagonal elements of A. */
+
+	/*  E       (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (n-1) sub-diagonal or super-diagonal elements of A. */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--e;
+		--d__;
+
+		/* Function Body */
+		if (*n <= 0) {
+		anorm = 0.;
+		} else if (olsame(norm, "M")) {
+
+	/*        Find max(abs(A(i,j))). */
+
+		anorm = (d__1 = d__[*n], fabs(d__1));
+		i__1 = *n - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+	/* Computing MAX */
+			d__2 = anorm, d__3 = (d__1 = d__[i__], fabs(d__1));
+			anorm = fmax(d__2,d__3);
+	/* Computing MAX */
+			d__2 = anorm, d__3 = (d__1 = e[i__], fabs(d__1));
+			anorm = fmax(d__2,d__3);
+	/* L10: */
+		}
+		} else if (olsame(norm, "O") || *(unsigned char *)
+			norm == '1' || olsame(norm, "I")) {
+
+	/*        Find norm1(A). */
+
+		if (*n == 1) {
+			anorm = fabs(d__[1]);
+		} else {
+	/* Computing MAX */
+			d__3 = fabs(d__[1]) + fabs(e[1]), d__4 = (d__1 = e[*n - 1], fabs(
+				d__1)) + (d__2 = d__[*n], fabs(d__2));
+			anorm = fmax(d__3,d__4);
+			i__1 = *n - 1;
+			for (i__ = 2; i__ <= i__1; ++i__) {
+	/* Computing MAX */
+			d__4 = anorm, d__5 = (d__1 = d__[i__], fabs(d__1)) + (d__2 = e[
+				i__], fabs(d__2)) + (d__3 = e[i__ - 1], fabs(d__3));
+			anorm = fmax(d__4,d__5);
+	/* L20: */
+			}
+		}
+		} else if (olsame(norm, "F") || olsame(norm, "E")) {
+
+	/*        Find normF(A). */
+
+		scale = 0.;
+		sum = 1.;
+		if (*n > 1) {
+			i__1 = *n - 1;
+			odssq(&i__1, &e[1], &c__1, &scale, &sum);
+			sum *= 2;
+		}
+		odssq(n, &d__[1], &c__1, &scale, &sum);
+		anorm = scale * sqrt(sum);
+		}
+
+		ret_val = anorm;
+		return ret_val;
+
+	/*     End of ODNST */
+
+	} /* odnst_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odr1v.hpp b/external/pmrrr/include/pmrrr/lapack/odr1v.hpp
new file mode 100644
index 0000000000..462702c6e4
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odr1v.hpp
@@ -0,0 +1,457 @@
+/**
+	C++ template version of LAPACK routine odr1v.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODR1V_HPP__
+#define __ODR1V_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odnan.hpp>
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odr1v(int *n, int *b1, int *bn, FloatingType 
+		*lambda, FloatingType *d__, FloatingType *l, FloatingType *ld, FloatingType *
+		lld, FloatingType *pivmin, FloatingType *gaptol, FloatingType *z__, int 
+		*wantnc, int *negcnt, FloatingType *ztz, FloatingType *mingma, 
+		int *r__, int *isuppz, FloatingType *nrminv, FloatingType *resid, 
+		FloatingType *rqcorr, FloatingType *work)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2, d__3;
+
+		/* Builtin functions */
+		//    FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		int i__;
+		FloatingType s;
+		int r1, r2;
+		FloatingType eps, tmp;
+		int neg1, neg2, indp, inds;
+		FloatingType dplus;
+		// extern FloatingType odmch_(char *);
+		//extern int odnan_(FloatingType *);
+		int indlpl, indumn;
+		FloatingType dminus;
+		int sawnan1, sawnan2;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODR1V computes the (scaled) r-th column of the inverse of */
+	/*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
+	/*  L D L^T - sigma I. When sigma is close to an eigenvalue, the */
+	/*  computed vector is an accurate eigenvector. Usually, r corresponds */
+	/*  to the index where the eigenvector is largest in magnitude. */
+	/*  The following steps accomplish this computation : */
+	/*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T, */
+	/*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
+	/*  (c) Computation of the diagonal elements of the inverse of */
+	/*      L D L^T - sigma I by combining the above transforms, and choosing */
+	/*      r as the index where the diagonal of the inverse is (one of the) */
+	/*      largest in magnitude. */
+	/*  (d) Computation of the (scaled) r-th column of the inverse using the */
+	/*      twisted factorization obtained by combining the top part of the */
+	/*      the stationary and the bottom part of the progressive transform. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N        (input) INT */
+	/*           The order of the matrix L D L^T. */
+
+	/*  B1       (input) INT */
+	/*           First index of the submatrix of L D L^T. */
+
+	/*  BN       (input) INT */
+	/*           Last index of the submatrix of L D L^T. */
+
+	/*  LAMBDA    (input) DOUBLE PRECISION */
+	/*           The shift. In order to compute an accurate eigenvector, */
+	/*           LAMBDA should be a good approximation to an eigenvalue */
+	/*           of L D L^T. */
+
+	/*  L        (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*           The (n-1) subdiagonal elements of the unit bidiagonal matrix */
+	/*           L, in elements 1 to N-1. */
+
+	/*  D        (input) DOUBLE PRECISION array, dimension (N) */
+	/*           The n diagonal elements of the diagonal matrix D. */
+
+	/*  LD       (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*           The n-1 elements L(i)*D(i). */
+
+	/*  LLD      (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*           The n-1 elements L(i)*L(i)*D(i). */
+
+	/*  PIVMIN   (input) DOUBLE PRECISION */
+	/*           The minimum pivot in the Sturm sequence. */
+
+	/*  GAPTOL   (input) DOUBLE PRECISION */
+	/*           Tolerance that indicates when eigenvector entries are negligible */
+	/*           w.r.t. their contribution to the residual. */
+
+	/*  Z        (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*           On input, all entries of Z must be set to 0. */
+	/*           On output, Z contains the (scaled) r-th column of the */
+	/*           inverse. The scaling is such that Z(R) equals 1. */
+
+	/*  WANTNC   (input) INT */
+	/*           Specifies whether NEGCNT has to be computed. */
+
+	/*  NEGCNT   (output) INT */
+	/*           If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
+	/*           in the  matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
+
+	/*  ZTZ      (output) DOUBLE PRECISION */
+	/*           The square of the 2-norm of Z. */
+
+	/*  MINGMA   (output) DOUBLE PRECISION */
+	/*           The reciprocal of the largest (in magnitude) diagonal */
+	/*           element of the inverse of L D L^T - sigma I. */
+
+	/*  R        (input/output) INT */
+	/*           The twist index for the twisted factorization used to */
+	/*           compute Z. */
+	/*           On input, 0 <= R <= N. If R is input as 0, R is set to */
+	/*           the index where (L D L^T - sigma I)^{-1} is largest */
+	/*           in magnitude. If 1 <= R <= N, R is unchanged. */
+	/*           On output, R contains the twist index used to compute Z. */
+	/*           Ideally, R designates the position of the maximum entry in the */
+	/*           eigenvector. */
+
+	/*  ISUPPZ   (output) INT array, dimension (2) */
+	/*           The support of the vector in Z, i.e., the vector Z is */
+	/*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
+
+	/*  NRMINV   (output) DOUBLE PRECISION */
+	/*           NRMINV = 1/SQRT( ZTZ ) */
+
+	/*  RESID    (output) DOUBLE PRECISION */
+	/*           The residual of the FP vector. */
+	/*           RESID = ABS( MINGMA )/SQRT( ZTZ ) */
+
+	/*  RQCORR   (output) DOUBLE PRECISION */
+	/*           The Rayleigh Quotient correction to LAMBDA. */
+	/*           RQCORR = MINGMA*TMP */
+
+	/*  WORK     (workspace) DOUBLE PRECISION array, dimension (4*N) */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--work;
+		--isuppz;
+		--z__;
+		--lld;
+		--ld;
+		--l;
+		--d__;
+
+		/* Function Body */
+		eps = std::numeric_limits<FloatingType>::epsilon(); // eps = odmch_("Precision");
+		if (*r__ == 0) {
+		r1 = *b1;
+		r2 = *bn;
+		} else {
+		r1 = *r__;
+		r2 = *r__;
+		}
+	/*     Storage for LPLUS */
+		indlpl = 0;
+	/*     Storage for UMINUS */
+		indumn = *n;
+		inds = (*n << 1) + 1;
+		indp = *n * 3 + 1;
+		if (*b1 == 1) {
+		work[inds] = 0.;
+		} else {
+		work[inds + *b1 - 1] = lld[*b1 - 1];
+		}
+
+	/*     Compute the stationary transform (using the differential form) */
+	/*     until the index R2. */
+
+		sawnan1 = FALSE_;
+		neg1 = 0;
+		s = work[inds + *b1 - 1] - *lambda;
+		i__1 = r1 - 1;
+		for (i__ = *b1; i__ <= i__1; ++i__) {
+		dplus = d__[i__] + s;
+		work[indlpl + i__] = ld[i__] / dplus;
+		if (dplus < 0.) {
+			++neg1;
+		}
+		work[inds + i__] = s * work[indlpl + i__] * l[i__];
+		s = work[inds + i__] - *lambda;
+	/* L50: */
+		}
+		sawnan1 = odnan(&s);
+		if (sawnan1) {
+		goto L60;
+		}
+		i__1 = r2 - 1;
+		for (i__ = r1; i__ <= i__1; ++i__) {
+		dplus = d__[i__] + s;
+		work[indlpl + i__] = ld[i__] / dplus;
+		work[inds + i__] = s * work[indlpl + i__] * l[i__];
+		s = work[inds + i__] - *lambda;
+	/* L51: */
+		}
+		sawnan1 = odnan(&s);
+
+	L60:
+		if (sawnan1) {
+	/*        Runs a slower version of the above loop if a NaN is detected */
+		neg1 = 0;
+		s = work[inds + *b1 - 1] - *lambda;
+		i__1 = r1 - 1;
+		for (i__ = *b1; i__ <= i__1; ++i__) {
+			dplus = d__[i__] + s;
+			if (fabs(dplus) < *pivmin) {
+			dplus = -(*pivmin);
+			}
+			work[indlpl + i__] = ld[i__] / dplus;
+			if (dplus < 0.) {
+			++neg1;
+			}
+			work[inds + i__] = s * work[indlpl + i__] * l[i__];
+			if (work[indlpl + i__] == 0.) {
+			work[inds + i__] = lld[i__];
+			}
+			s = work[inds + i__] - *lambda;
+	/* L70: */
+		}
+		i__1 = r2 - 1;
+		for (i__ = r1; i__ <= i__1; ++i__) {
+			dplus = d__[i__] + s;
+			if (fabs(dplus) < *pivmin) {
+			dplus = -(*pivmin);
+			}
+			work[indlpl + i__] = ld[i__] / dplus;
+			work[inds + i__] = s * work[indlpl + i__] * l[i__];
+			if (work[indlpl + i__] == 0.) {
+			work[inds + i__] = lld[i__];
+			}
+			s = work[inds + i__] - *lambda;
+	/* L71: */
+		}
+		}
+
+	/*     Compute the progressive transform (using the differential form) */
+	/*     until the index R1 */
+
+		sawnan2 = FALSE_;
+		neg2 = 0;
+		work[indp + *bn - 1] = d__[*bn] - *lambda;
+		i__1 = r1;
+		for (i__ = *bn - 1; i__ >= i__1; --i__) {
+		dminus = lld[i__] + work[indp + i__];
+		tmp = d__[i__] / dminus;
+		if (dminus < 0.) {
+			++neg2;
+		}
+		work[indumn + i__] = l[i__] * tmp;
+		work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
+	/* L80: */
+		}
+		tmp = work[indp + r1 - 1];
+		sawnan2 = odnan(&tmp);
+		if (sawnan2) {
+	/*        Runs a slower version of the above loop if a NaN is detected */
+		neg2 = 0;
+		i__1 = r1;
+		for (i__ = *bn - 1; i__ >= i__1; --i__) {
+			dminus = lld[i__] + work[indp + i__];
+			if (fabs(dminus) < *pivmin) {
+			dminus = -(*pivmin);
+			}
+			tmp = d__[i__] / dminus;
+			if (dminus < 0.) {
+			++neg2;
+			}
+			work[indumn + i__] = l[i__] * tmp;
+			work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
+			if (tmp == 0.) {
+			work[indp + i__ - 1] = d__[i__] - *lambda;
+			}
+	/* L100: */
+		}
+		}
+
+	/*     Find the index (from R1 to R2) of the largest (in magnitude) */
+	/*     diagonal element of the inverse */
+
+		*mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
+		if (*mingma < 0.) {
+		++neg1;
+		}
+		if (*wantnc) {
+		*negcnt = neg1 + neg2;
+		} else {
+		*negcnt = -1;
+		}
+		if (fabs(*mingma) == 0.) {
+		*mingma = eps * work[inds + r1 - 1];
+		}
+		*r__ = r1;
+		i__1 = r2 - 1;
+		for (i__ = r1; i__ <= i__1; ++i__) {
+		tmp = work[inds + i__] + work[indp + i__];
+		if (tmp == 0.) {
+			tmp = eps * work[inds + i__];
+		}
+		if (fabs(tmp) <= fabs(*mingma)) {
+			*mingma = tmp;
+			*r__ = i__ + 1;
+		}
+	/* L110: */
+		}
+
+	/*     Compute the FP vector: solve N^T v = e_r */
+
+		isuppz[1] = *b1;
+		isuppz[2] = *bn;
+		z__[*r__] = 1.;
+		*ztz = 1.;
+
+	/*     Compute the FP vector upwards from R */
+
+		if (! sawnan1 && ! sawnan2) {
+		i__1 = *b1;
+		for (i__ = *r__ - 1; i__ >= i__1; --i__) {
+			z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
+			if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
+				d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
+			z__[i__] = 0.;
+			isuppz[1] = i__ + 1;
+			goto L220;
+			}
+			*ztz += z__[i__] * z__[i__];
+	/* L210: */
+		}
+	L220:
+		;
+		} else {
+	/*        Run slower loop if NaN occurred. */
+		i__1 = *b1;
+		for (i__ = *r__ - 1; i__ >= i__1; --i__) {
+			if (z__[i__ + 1] == 0.) {
+			z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
+			} else {
+			z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
+			}
+			if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
+				d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
+			z__[i__] = 0.;
+			isuppz[1] = i__ + 1;
+			goto L240;
+			}
+			*ztz += z__[i__] * z__[i__];
+	/* L230: */
+		}
+	L240:
+		;
+		}
+	/*     Compute the FP vector downwards from R in blocks of size BLKSIZ */
+		if (! sawnan1 && ! sawnan2) {
+		i__1 = *bn - 1;
+		for (i__ = *r__; i__ <= i__1; ++i__) {
+			z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
+			if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
+				d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
+			z__[i__ + 1] = 0.;
+			isuppz[2] = i__;
+			goto L260;
+			}
+			*ztz += z__[i__ + 1] * z__[i__ + 1];
+	/* L250: */
+		}
+	L260:
+		;
+		} else {
+	/*        Run slower loop if NaN occurred. */
+		i__1 = *bn - 1;
+		for (i__ = *r__; i__ <= i__1; ++i__) {
+			if (z__[i__] == 0.) {
+			z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
+			} else {
+			z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
+			}
+			if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
+				d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
+			z__[i__ + 1] = 0.;
+			isuppz[2] = i__;
+			goto L280;
+			}
+			*ztz += z__[i__ + 1] * z__[i__ + 1];
+	/* L270: */
+		}
+	L280:
+		;
+		}
+
+	/*     Compute quantities for convergence test */
+
+		tmp = 1. / *ztz;
+		*nrminv = sqrt(tmp);
+		*resid = fabs(*mingma) * *nrminv;
+		*rqcorr = *mingma * tmp;
+
+
+		return 0;
+
+	/*     End of ODR1V */
+
+	} /* odr1v_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrnv.hpp b/external/pmrrr/include/pmrrr/lapack/odrnv.hpp
new file mode 100644
index 0000000000..b271e06553
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrnv.hpp
@@ -0,0 +1,160 @@
+/**
+	C++ template version of LAPACK routine dlarnv.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRNV_HPP__
+#define __ODRNV_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odruv.hpp>
+
+#define imin(a,b) ( (a) < (b) ? (a) : (b) )
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrnv(int *idist, int *iseed, int *n, 
+		FloatingType *x)
+	{
+		/* System generated locals */
+		int i__1, i__2, i__3;
+
+		/* Builtin functions */
+		// FloatingType log(FloatingType), sqrt(FloatingType), cos(FloatingType);
+
+		/* Local variables */
+		int i__;
+		FloatingType u[128];
+		int il, iv, il2;
+		//extern int odruv_(int *, int *, FloatingType *);
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODRNV returns a vector of n random real numbers from a uniform or */
+	/*  normal distribution. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  IDIST   (input) INT */
+	/*          Specifies the distribution of the random numbers: */
+	/*          = 1:  uniform (0,1) */
+	/*          = 2:  uniform (-1,1) */
+	/*          = 3:  normal (0,1) */
+
+	/*  ISEED   (input/output) INT array, dimension (4) */
+	/*          On entry, the seed of the random number generator; the array */
+	/*          elements must be between 0 and 4095, and ISEED(4) must be */
+	/*          odd. */
+	/*          On exit, the seed is updated. */
+
+	/*  N       (input) INT */
+	/*          The number of random numbers to be generated. */
+
+	/*  X       (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The generated random numbers. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  This routine calls the auxiliary routine ODRUV to generate random */
+	/*  real numbers from a uniform (0,1) distribution, in batches of up to */
+	/*  128 using vectorisable code. The Box-Muller method is used to */
+	/*  transform numbers from a uniform to a normal distribution. */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Local Arrays .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--x;
+		--iseed;
+
+		/* Function Body */
+		i__1 = *n;
+		for (iv = 1; iv <= i__1; iv += 64) {
+	/* Computing MIN */
+		i__2 = 64, i__3 = *n - iv + 1;
+		il = imin(i__2,i__3);
+		if (*idist == 3) {
+			il2 = il << 1;
+		} else {
+			il2 = il;
+		}
+
+	/*        Call ODRUV to generate IL2 numbers from a uniform (0,1) */
+	/*        distribution (IL2 <= LV) */
+
+		odruv(&iseed[1], &il2, u);
+
+		if (*idist == 1) {
+
+	/*           Copy generated numbers */
+
+			i__2 = il;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			x[iv + i__ - 1] = u[i__ - 1];
+	/* L10: */
+			}
+		} else if (*idist == 2) {
+
+	/*           Convert generated numbers to uniform (-1,1) distribution */
+
+			i__2 = il;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			x[iv + i__ - 1] = u[i__ - 1] * 2. - 1.;
+	/* L20: */
+			}
+		} else if (*idist == 3) {
+
+	/*           Convert generated numbers to normal (0,1) distribution */
+
+			i__2 = il;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			x[iv + i__ - 1] = sqrt(log(u[(i__ << 1) - 2]) * -2.) * cos(u[(
+				i__ << 1) - 1] * 6.2831853071795864769252867663);
+	/* L30: */
+			}
+		}
+	/* L40: */
+		}
+		return 0;
+
+	/*     End of ODRNV */
+
+	} /* odrnv_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrra.hpp b/external/pmrrr/include/pmrrr/lapack/odrra.hpp
new file mode 100644
index 0000000000..5028cc3a04
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrra.hpp
@@ -0,0 +1,165 @@
+/**
+	C++ template version of LAPACK routine dlarra.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRA_HPP__
+#define __ODRRA_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrra(int *n, FloatingType *d__, FloatingType *e, 
+		FloatingType *e2, FloatingType *spltol, FloatingType *tnrm, int *nsplit, 
+		 int *isplit, int *info)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		int i__;
+		FloatingType tmp1, eabs;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  Compute the splitting points with threshold SPLTOL. */
+	/*  ODRRA sets any "small" off-diagonal elements to zero. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix. N > 0. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the N diagonal elements of the tridiagonal */
+	/*          matrix T. */
+
+	/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the first (N-1) entries contain the subdiagonal */
+	/*          elements of the tridiagonal matrix T; E(N) need not be set. */
+	/*          On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, */
+	/*          are set to zero, the other entries of E are untouched. */
+
+	/*  E2      (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the first (N-1) entries contain the SQUARES of the */
+	/*          subdiagonal elements of the tridiagonal matrix T; */
+	/*          E2(N) need not be set. */
+	/*          On exit, the entries E2( ISPLIT( I ) ), */
+	/*          1 <= I <= NSPLIT, have been set to zero */
+
+	/*  SPLTOL (input) DOUBLE PRECISION */
+	/*          The threshold for splitting. Two criteria can be used: */
+	/*          SPLTOL<0 : criterion based on absolute off-diagonal value */
+	/*          SPLTOL>0 : criterion that preserves relative accuracy */
+
+	/*  TNRM (input) DOUBLE PRECISION */
+	/*          The norm of the matrix. */
+
+	/*  NSPLIT  (output) INT */
+	/*          The number of blocks T splits into. 1 <= NSPLIT <= N. */
+
+	/*  ISPLIT  (output) INT array, dimension (N) */
+	/*          The splitting points, at which T breaks up into blocks. */
+	/*          The first block consists of rows/columns 1 to ISPLIT(1), */
+	/*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
+	/*          etc., and the NSPLIT-th consists of rows/columns */
+	/*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
+
+
+	/*  INFO    (output) INT */
+	/*          = 0:  successful exit */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--isplit;
+		--e2;
+		--e;
+		--d__;
+
+		/* Function Body */
+		*info = 0;
+	/*     Compute splitting points */
+		*nsplit = 1;
+		if (*spltol < 0.) {
+	/*        Criterion based on absolute off-diagonal value */
+		tmp1 = fabs(*spltol) * *tnrm;
+		i__1 = *n - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+			eabs = (d__1 = e[i__], fabs(d__1));
+			if (eabs <= tmp1) {
+			e[i__] = 0.;
+			e2[i__] = 0.;
+			isplit[*nsplit] = i__;
+			++(*nsplit);
+			}
+	/* L9: */
+		}
+		} else {
+	/*        Criterion that guarantees relative accuracy */
+		i__1 = *n - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+			eabs = (d__1 = e[i__], fabs(d__1));
+			if (eabs <= *spltol * sqrt((d__1 = d__[i__], fabs(d__1))) * sqrt((
+				d__2 = d__[i__ + 1], fabs(d__2)))) {
+			e[i__] = 0.;
+			e2[i__] = 0.;
+			isplit[*nsplit] = i__;
+			++(*nsplit);
+			}
+	/* L10: */
+		}
+		}
+		isplit[*nsplit] = *n;
+		return 0;
+
+	/*     End of ODRRA */
+
+	} /* odrra_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrb.hpp b/external/pmrrr/include/pmrrr/lapack/odrrb.hpp
new file mode 100644
index 0000000000..f01ff3d1d2
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrb.hpp
@@ -0,0 +1,361 @@
+/**
+	C++ template version of LAPACK routine dlarrb.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRB_HPP__
+#define __ODRRB_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odneg.hpp>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrb(int *n, FloatingType *d__, FloatingType *lld, 
+		int *ifirst, int *ilast, FloatingType *rtol1, FloatingType *rtol2, 
+		 int *offset, FloatingType *w, FloatingType *wgap, FloatingType *werr, 
+		FloatingType *work, int *iwork, FloatingType *pivmin, FloatingType *
+		spdiam, int *twist, int *info)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		//    FloatingType log(FloatingType);
+
+		/* Local variables */
+		int i__, k, r__, i1, ii, ip;
+		FloatingType gap, mid, tmp, back, lgap, rgap, left;
+		int iter, nint, prev, next;
+		FloatingType cvrgd, right, width;
+		//extern int odneg_(int *, FloatingType *, FloatingType *, FloatingType *
+	//, FloatingType *, int *);
+		int negcnt;
+		FloatingType mnwdth;
+		int olnint, maxitr;
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  Given the relatively robust representation(RRR) L D L^T, ODRRB */
+	/*  does "limited" bisection to refine the eigenvalues of L D L^T, */
+	/*  W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
+	/*  guesses for these eigenvalues are input in W, the corresponding estimate */
+	/*  of the error in these guesses and their gaps are input in WERR */
+	/*  and WGAP, respectively. During bisection, intervals */
+	/*  [left, right] are maintained by storing their mid-points and */
+	/*  semi-widths in the arrays W and WERR respectively. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The N diagonal elements of the diagonal matrix D. */
+
+	/*  LLD     (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (N-1) elements L(i)*L(i)*D(i). */
+
+	/*  IFIRST  (input) INT */
+	/*          The index of the first eigenvalue to be computed. */
+
+	/*  ILAST   (input) INT */
+	/*          The index of the last eigenvalue to be computed. */
+
+	/*  RTOL1   (input) DOUBLE PRECISION */
+	/*  RTOL2   (input) DOUBLE PRECISION */
+	/*          Tolerance for the convergence of the bisection intervals. */
+	/*          An interval [LEFT,RIGHT] has converged if */
+	/*          RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
+	/*          where GAP is the (estimated) distance to the nearest */
+	/*          eigenvalue. */
+
+	/*  OFFSET  (input) INT */
+	/*          Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */
+	/*          through ILAST-OFFSET elements of these arrays are to be used. */
+
+	/*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
+	/*          estimates of the eigenvalues of L D L^T indexed IFIRST throug */
+	/*          ILAST. */
+	/*          On output, these estimates are refined. */
+
+	/*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N-1) */
+	/*          On input, the (estimated) gaps between consecutive */
+	/*          eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */
+	/*          eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */
+	/*          then WGAP(IFIRST-OFFSET) must be set to ZERO. */
+	/*          On output, these gaps are refined. */
+
+	/*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
+	/*          the errors in the estimates of the corresponding elements in W. */
+	/*          On output, these errors are refined. */
+
+	/*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
+	/*          Workspace. */
+
+	/*  IWORK   (workspace) INT array, dimension (2*N) */
+	/*          Workspace. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum pivot in the Sturm sequence. */
+
+	/*  SPDIAM  (input) DOUBLE PRECISION */
+	/*          The spectral diameter of the matrix. */
+
+	/*  TWIST   (input) INT */
+	/*          The twist index for the twisted factorization that is used */
+	/*          for the negcount. */
+	/*          TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */
+	/*          TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */
+	/*          TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */
+
+	/*  INFO    (output) INT */
+	/*          Error flag. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--iwork;
+		--work;
+		--werr;
+		--wgap;
+		--w;
+		--lld;
+		--d__;
+
+		/* Function Body */
+		*info = 0;
+
+		maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + 
+			2;
+		mnwdth = *pivmin * 2.;
+
+		r__ = *twist;
+		if (r__ < 1 || r__ > *n) {
+		r__ = *n;
+		}
+
+	/*     Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
+	/*     The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
+	/*     Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */
+	/*     for an unconverged interval is set to the index of the next unconverged */
+	/*     interval, and is -1 or 0 for a converged interval. Thus a linked */
+	/*     list of unconverged intervals is set up. */
+
+		i1 = *ifirst;
+	/*     The number of unconverged intervals */
+		nint = 0;
+	/*     The last unconverged interval found */
+		prev = 0;
+		rgap = wgap[i1 - *offset];
+		i__1 = *ilast;
+		for (i__ = i1; i__ <= i__1; ++i__) {
+		k = i__ << 1;
+		ii = i__ - *offset;
+		left = w[ii] - werr[ii];
+		right = w[ii] + werr[ii];
+		lgap = rgap;
+		rgap = wgap[ii];
+		gap = fmin(lgap,rgap);
+	/*        Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
+	/*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */
+
+	/*        Do while( NEGCNT(LEFT).GT.I-1 ) */
+
+		back = werr[ii];
+	L20:
+		negcnt = odneg(n, &d__[1], &lld[1], &left, pivmin, &r__);
+		if (negcnt > i__ - 1) {
+			left -= back;
+			back *= 2.;
+			goto L20;
+		}
+
+	/*        Do while( NEGCNT(RIGHT).LT.I ) */
+	/*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */
+
+		back = werr[ii];
+	L50:
+		negcnt = odneg(n, &d__[1], &lld[1], &right, pivmin, &r__);
+		if (negcnt < i__) {
+			right += back;
+			back *= 2.;
+			goto L50;
+		}
+		width = (d__1 = left - right, fabs(d__1)) * .5;
+	/* Computing MAX */
+		d__1 = fabs(left), d__2 = fabs(right);
+		tmp = fmax(d__1,d__2);
+	/* Computing MAX */
+		d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
+		cvrgd = fmax(d__1,d__2);
+		if (width <= cvrgd || width <= mnwdth) {
+	/*           This interval has already converged and does not need refinement. */
+	/*           (Note that the gaps might change through refining the */
+	/*            eigenvalues, however, they can only get bigger.) */
+	/*           Remove it from the list. */
+			iwork[k - 1] = -1;
+	/*           Make sure that I1 always points to the first unconverged interval */
+			if (i__ == i1 && i__ < *ilast) {
+			i1 = i__ + 1;
+			}
+			if (prev >= i1 && i__ <= *ilast) {
+			iwork[(prev << 1) - 1] = i__ + 1;
+			}
+		} else {
+	/*           unconverged interval found */
+			prev = i__;
+			++nint;
+			iwork[k - 1] = i__ + 1;
+			iwork[k] = negcnt;
+		}
+		work[k - 1] = left;
+		work[k] = right;
+	/* L75: */
+		}
+
+	/*     Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
+	/*     and while (ITER.LT.MAXITR) */
+
+		iter = 0;
+	L80:
+		prev = i1 - 1;
+		i__ = i1;
+		olnint = nint;
+		i__1 = olnint;
+		for (ip = 1; ip <= i__1; ++ip) {
+		k = i__ << 1;
+		ii = i__ - *offset;
+		rgap = wgap[ii];
+		lgap = rgap;
+		if (ii > 1) {
+			lgap = wgap[ii - 1];
+		}
+		gap = fmin(lgap,rgap);
+		next = iwork[k - 1];
+		left = work[k - 1];
+		right = work[k];
+		mid = (left + right) * .5;
+	/*        semiwidth of interval */
+		width = right - mid;
+	/* Computing MAX */
+		d__1 = fabs(left), d__2 = fabs(right);
+		tmp = fmax(d__1,d__2);
+	/* Computing MAX */
+		d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
+		cvrgd = fmax(d__1,d__2);
+		if (width <= cvrgd || width <= mnwdth || iter == maxitr) {
+	/*           reduce number of unconverged intervals */
+			--nint;
+	/*           Mark interval as converged. */
+			iwork[k - 1] = 0;
+			if (i1 == i__) {
+			i1 = next;
+			} else {
+	/*              Prev holds the last unconverged interval previously examined */
+			if (prev >= i1) {
+				iwork[(prev << 1) - 1] = next;
+			}
+			}
+			i__ = next;
+			goto L100;
+		}
+		prev = i__;
+
+	/*        Perform one bisection step */
+
+		negcnt = odneg(n, &d__[1], &lld[1], &mid, pivmin, &r__);
+		if (negcnt <= i__ - 1) {
+			work[k - 1] = mid;
+		} else {
+			work[k] = mid;
+		}
+		i__ = next;
+	L100:
+		;
+		}
+		++iter;
+	/*     do another loop if there are still unconverged intervals */
+	/*     However, in the last iteration, all intervals are accepted */
+	/*     since this is the best we can do. */
+		if (nint > 0 && iter <= maxitr) {
+		goto L80;
+		}
+
+
+	/*     At this point, all the intervals have converged */
+		i__1 = *ilast;
+		for (i__ = *ifirst; i__ <= i__1; ++i__) {
+		k = i__ << 1;
+		ii = i__ - *offset;
+	/*        All intervals marked by '0' have been refined. */
+		if (iwork[k - 1] == 0) {
+			w[ii] = (work[k - 1] + work[k]) * .5;
+			werr[ii] = work[k] - w[ii];
+		}
+	/* L110: */
+		}
+
+		i__1 = *ilast;
+		for (i__ = *ifirst + 1; i__ <= i__1; ++i__) {
+		k = i__ << 1;
+		ii = i__ - *offset;
+	/* Computing MAX */
+		d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1];
+		wgap[ii - 1] = fmax(d__1,d__2);
+	/* L111: */
+		}
+		return 0;
+
+	/*     End of ODRRB */
+
+	} /* odrrb_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrc.hpp b/external/pmrrr/include/pmrrr/lapack/odrrc.hpp
new file mode 100644
index 0000000000..26bafcde38
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrc.hpp
@@ -0,0 +1,194 @@
+/**
+	C++ template version of LAPACK routine dlarrc.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRC_HPP__
+#define __ODRRC_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/lapack.hpp>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrc(const char *jobt, int *n, FloatingType *vl, 
+		FloatingType *vu, FloatingType *d__, FloatingType *e, FloatingType *pivmin, 
+		int *eigcnt, int *lcnt, int *rcnt, int *info)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1;
+
+		/* Local variables */
+		int i__;
+		FloatingType sl, su, tmp, tmp2;
+		int matt;
+		FloatingType lpivot, rpivot;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  Find the number of eigenvalues of the symmetric tridiagonal matrix T */
+	/*  that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T */
+	/*  if JOBT = 'L'. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  JOBT    (input) CHARACTER*1 */
+	/*          = 'T':  Compute Sturm count for matrix T. */
+	/*          = 'L':  Compute Sturm count for matrix L D L^T. */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix. N > 0. */
+
+	/*  VL      (input) DOUBLE PRECISION */
+	/*  VU      (input) DOUBLE PRECISION */
+	/*          The lower and upper bounds for the eigenvalues. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. */
+	/*          JOBT = 'L': The N diagonal elements of the diagonal matrix D. */
+
+	/*  E       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          JOBT = 'T': The N-1 offdiagonal elements of the matrix T. */
+	/*          JOBT = 'L': The N-1 offdiagonal elements of the matrix L. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum pivot in the Sturm sequence for T. */
+
+	/*  EIGCNT  (output) INT */
+	/*          The number of eigenvalues of the symmetric tridiagonal matrix T */
+	/*          that are in the interval (VL,VU] */
+
+	/*  LCNT    (output) INT */
+	/*  RCNT    (output) INT */
+	/*          The left and right negcounts of the interval. */
+
+	/*  INFO    (output) INT */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--e;
+		--d__;
+
+		/* Function Body */
+		*info = 0;
+		*lcnt = 0;
+		*rcnt = 0;
+		*eigcnt = 0;
+		matt = olsame(jobt, "T");
+		if (matt) {
+	/*        Sturm sequence count on T */
+		lpivot = d__[1] - *vl;
+		rpivot = d__[1] - *vu;
+		if (lpivot <= 0.) {
+			++(*lcnt);
+		}
+		if (rpivot <= 0.) {
+			++(*rcnt);
+		}
+		i__1 = *n - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+	/* Computing 2nd power */
+			d__1 = e[i__];
+			tmp = d__1 * d__1;
+			lpivot = d__[i__ + 1] - *vl - tmp / lpivot;
+			rpivot = d__[i__ + 1] - *vu - tmp / rpivot;
+			if (lpivot <= 0.) {
+			++(*lcnt);
+			}
+			if (rpivot <= 0.) {
+			++(*rcnt);
+			}
+	/* L10: */
+		}
+		} else {
+	/*        Sturm sequence count on L D L^T */
+		sl = -(*vl);
+		su = -(*vu);
+		i__1 = *n - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+			lpivot = d__[i__] + sl;
+			rpivot = d__[i__] + su;
+			if (lpivot <= 0.) {
+			++(*lcnt);
+			}
+			if (rpivot <= 0.) {
+			++(*rcnt);
+			}
+			tmp = e[i__] * d__[i__] * e[i__];
+
+			tmp2 = tmp / lpivot;
+			if (tmp2 == 0.) {
+			sl = tmp - *vl;
+			} else {
+			sl = sl * tmp2 - *vl;
+			}
+
+			tmp2 = tmp / rpivot;
+			if (tmp2 == 0.) {
+			su = tmp - *vu;
+			} else {
+			su = su * tmp2 - *vu;
+			}
+	/* L20: */
+		}
+		lpivot = d__[*n] + sl;
+		rpivot = d__[*n] + su;
+		if (lpivot <= 0.) {
+			++(*lcnt);
+		}
+		if (rpivot <= 0.) {
+			++(*rcnt);
+		}
+		}
+		*eigcnt = *rcnt - *lcnt;
+		return 0;
+
+	/*     end of ODRRC */
+
+	} /* odrrc_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrd.hpp b/external/pmrrr/include/pmrrr/lapack/odrrd.hpp
new file mode 100644
index 0000000000..a803eb6c67
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrd.hpp
@@ -0,0 +1,804 @@
+/**
+	C++ template version of LAPACK routine dlarrd.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRD_HPP__
+#define __ODRRD_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odebz.hpp>
+#include <pmrrr/lapack/lapack.hpp>
+
+#define imax(a,b) ( (a) > (b) ? (a) : (b) )
+#define imin(a,b) ( (a) < (b) ? (a) : (b) )
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrd(const char *range, const char *order, int *n, FloatingType *vl,  
+		FloatingType *vu, int *il, int *iu, FloatingType *gers, 
+		FloatingType *reltol, FloatingType *d__, FloatingType *e, FloatingType *e2, 
+		FloatingType *pivmin, int *nsplit, int *isplit, int *m, 
+		FloatingType *w, FloatingType *werr, FloatingType *wl, FloatingType *wu, 
+		int *iblock, int *indexw, FloatingType *work, int *iwork, 
+		int *info)
+	{
+		/* Table of constant values */
+		static int c__1 = 1;
+		static int c_n1 = -1;
+		static int c__3 = 3;
+		static int c__2 = 2;
+		static int c__0 = 0;
+		/* System generated locals */
+		int i__1, i__2, i__3;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType log(FloatingType);
+
+		/* Local variables */
+		int i__, j, ib, ie, je, nb;
+		FloatingType gl;
+		int im, in;
+		FloatingType gu;
+		int iw, jee;
+		FloatingType eps;
+		int nwl;
+		FloatingType wlu, wul;
+		int nwu;
+		FloatingType tmp1, tmp2;
+		int iend, jblk, ioff, iout, itmp1, itmp2, jdisc;
+		int iinfo;
+		FloatingType atoli;
+		int iwoff, itmax;
+		FloatingType wkill, rtoli, uflow, tnorm;
+		// extern FloatingType dlamch_(char *);
+		int ibegin;
+		int irange, idiscl, idumma[1];
+		/* extern int ilaenv_(int *, char *, char *, int *, int *,  */
+		/* 	    int *, int *); */
+		int idiscu;
+		int ncnvrg, toofew;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2.1)                        -- */
+	/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+	/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+	/*  -- April 2009                                                      -- */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODRRD computes the eigenvalues of a symmetric tridiagonal */
+	/*  matrix T to suitable accuracy. This is an auxiliary code to be */
+	/*  called from DSTEMR. */
+	/*  The user may ask for all eigenvalues, all eigenvalues */
+	/*  in the half-open interval (VL, VU], or the IL-th through IU-th */
+	/*  eigenvalues. */
+
+	/*  To avoid overflow, the matrix must be scaled so that its */
+	/*  largest element is no greater than overflow**(1/2) * */
+	/*  underflow**(1/4) in absolute value, and for greatest */
+	/*  accuracy, it should not be much smaller than that. */
+
+	/*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
+	/*  Matrix", Report CS41, Computer Science Dept., Stanford */
+	/*  University, July 21, 1966. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  RANGE   (input) CHARACTER */
+	/*          = 'A': ("All")   all eigenvalues will be found. */
+	/*          = 'V': ("Value") all eigenvalues in the half-open interval */
+	/*                           (VL, VU] will be found. */
+	/*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
+	/*                           entire matrix) will be found. */
+
+	/*  ORDER   (input) CHARACTER */
+	/*          = 'B': ("By Block") the eigenvalues will be grouped by */
+	/*                              split-off block (see IBLOCK, ISPLIT) and */
+	/*                              ordered from smallest to largest within */
+	/*                              the block. */
+	/*          = 'E': ("Entire matrix") */
+	/*                              the eigenvalues for the entire matrix */
+	/*                              will be ordered from smallest to */
+	/*                              largest. */
+
+	/*  N       (input) INT */
+	/*          The order of the tridiagonal matrix T.  N >= 0. */
+
+	/*  VL      (input) DOUBLE PRECISION */
+	/*  VU      (input) DOUBLE PRECISION */
+	/*          If RANGE='V', the lower and upper bounds of the interval to */
+	/*          be searched for eigenvalues.  Eigenvalues less than or equal */
+	/*          to VL, or greater than VU, will not be returned.  VL < VU. */
+	/*          Not referenced if RANGE = 'A' or 'I'. */
+
+	/*  IL      (input) INT */
+	/*  IU      (input) INT */
+	/*          If RANGE='I', the indices (in ascending order) of the */
+	/*          smallest and largest eigenvalues to be returned. */
+	/*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
+	/*          Not referenced if RANGE = 'A' or 'V'. */
+
+	/*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) */
+	/*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
+	/*          is (GERS(2*i-1), GERS(2*i)). */
+
+	/*  RELTOL  (input) DOUBLE PRECISION */
+	/*          The minimum relative width of an interval.  When an interval */
+	/*          is narrower than RELTOL times the larger (in */
+	/*          magnitude) endpoint, then it is considered to be */
+	/*          sufficiently small, i.e., converged.  Note: this should */
+	/*          always be at least radix*machine epsilon. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The n diagonal elements of the tridiagonal matrix T. */
+
+	/*  E       (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (n-1) off-diagonal elements of the tridiagonal matrix T. */
+
+	/*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum pivot allowed in the Sturm sequence for T. */
+
+	/*  NSPLIT  (input) INT */
+	/*          The number of diagonal blocks in the matrix T. */
+	/*          1 <= NSPLIT <= N. */
+
+	/*  ISPLIT  (input) INT array, dimension (N) */
+	/*          The splitting points, at which T breaks up into submatrices. */
+	/*          The first submatrix consists of rows/columns 1 to ISPLIT(1), */
+	/*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
+	/*          etc., and the NSPLIT-th consists of rows/columns */
+	/*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
+	/*          (Only the first NSPLIT elements will actually be used, but */
+	/*          since the user cannot know a priori what value NSPLIT will */
+	/*          have, N words must be reserved for ISPLIT.) */
+
+	/*  M       (output) INT */
+	/*          The actual number of eigenvalues found. 0 <= M <= N. */
+	/*          (See also the description of INFO=2,3.) */
+
+	/*  W       (output) DOUBLE PRECISION array, dimension (N) */
+	/*          On exit, the first M elements of W will contain the */
+	/*          eigenvalue approximations. ODRRD computes an interval */
+	/*          I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue */
+	/*          approximation is given as the interval midpoint */
+	/*          W(j)= ( a_j + b_j)/2. The corresponding error is bounded by */
+	/*          WERR(j) = abs( a_j - b_j)/2 */
+
+	/*  WERR    (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The error bound on the corresponding eigenvalue approximation */
+	/*          in W. */
+
+	/*  WL      (output) DOUBLE PRECISION */
+	/*  WU      (output) DOUBLE PRECISION */
+	/*          The interval (WL, WU] contains all the wanted eigenvalues. */
+	/*          If RANGE='V', then WL=VL and WU=VU. */
+	/*          If RANGE='A', then WL and WU are the global Gerschgorin bounds */
+	/*                        on the spectrum. */
+	/*          If RANGE='I', then WL and WU are computed by ODEBZ from the */
+	/*                        index range specified. */
+
+	/*  IBLOCK  (output) INT array, dimension (N) */
+	/*          At each row/column j where E(j) is zero or small, the */
+	/*          matrix T is considered to split into a block diagonal */
+	/*          matrix.  On exit, if INFO = 0, IBLOCK(i) specifies to which */
+	/*          block (from 1 to the number of blocks) the eigenvalue W(i) */
+	/*          belongs.  (ODRRD may use the remaining N-M elements as */
+	/*          workspace.) */
+
+	/*  INDEXW  (output) INT array, dimension (N) */
+	/*          The indices of the eigenvalues within each block (submatrix); */
+	/*          for example, INDEXW(i)= j and IBLOCK(i)=k imply that the */
+	/*          i-th eigenvalue W(i) is the j-th eigenvalue in block k. */
+
+	/*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N) */
+
+	/*  IWORK   (workspace) INT array, dimension (3*N) */
+
+	/*  INFO    (output) INT */
+	/*          = 0:  successful exit */
+	/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
+	/*          > 0:  some or all of the eigenvalues failed to converge or */
+	/*                were not computed: */
+	/*                =1 or 3: Bisection failed to converge for some */
+	/*                        eigenvalues; these eigenvalues are flagged by a */
+	/*                        negative block number.  The effect is that the */
+	/*                        eigenvalues may not be as accurate as the */
+	/*                        absolute and relative tolerances.  This is */
+	/*                        generally caused by unexpectedly inaccurate */
+	/*                        arithmetic. */
+	/*                =2 or 3: RANGE='I' only: Not all of the eigenvalues */
+	/*                        IL:IU were found. */
+	/*                        Effect: M < IU+1-IL */
+	/*                        Cause:  non-monotonic arithmetic, causing the */
+	/*                                Sturm sequence to be non-monotonic. */
+	/*                        Cure:   recalculate, using RANGE='A', and pick */
+	/*                                out eigenvalues IL:IU.  In some cases, */
+	/*                                increasing the PARAMETER "FUDGE" may */
+	/*                                make things work. */
+	/*                = 4:    RANGE='I', and the Gershgorin interval */
+	/*                        initially used was too small.  No eigenvalues */
+	/*                        were computed. */
+	/*                        Probable cause: your machine has sloppy */
+	/*                                        floating-point arithmetic. */
+	/*                        Cure: Increase the PARAMETER "FUDGE", */
+	/*                              recompile, and try again. */
+
+	/*  Internal Parameters */
+	/*  =================== */
+
+	/*  FUDGE   DOUBLE PRECISION, default = 2 */
+	/*          A "fudge factor" to widen the Gershgorin intervals.  Ideally, */
+	/*          a value of 1 should work, but on machines with sloppy */
+	/*          arithmetic, this needs to be larger.  The default for */
+	/*          publicly released versions should be large enough to handle */
+	/*          the worst machine around.  Note that this has no effect */
+	/*          on accuracy of the solution. */
+
+	/*  Based on contributions by */
+	/*     W. Kahan, University of California, Berkeley, USA */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Local Arrays .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--iwork;
+		--work;
+		--indexw;
+		--iblock;
+		--werr;
+		--w;
+		--isplit;
+		--e2;
+		--e;
+		--d__;
+		--gers;
+
+		/* Function Body */
+		*info = 0;
+
+	/*     Decode RANGE */
+
+		if (olsame(range, "A")) {
+		irange = 1;
+		} else if (olsame(range, "V")) {
+		irange = 2;
+		} else if (olsame(range, "I")) {
+		irange = 3;
+		} else {
+		irange = 0;
+		}
+
+	/*     Check for Errors */
+
+		if (irange <= 0) {
+		*info = -1;
+		} else if (! (olsame(order, "B") || olsame(order, 
+			"E"))) {
+		*info = -2;
+		} else if (*n < 0) {
+		*info = -3;
+		} else if (irange == 2) {
+		if (*vl >= *vu) {
+			*info = -5;
+		}
+		} else if (irange == 3 && (*il < 1 || *il > imax(1,*n))) {
+		*info = -6;
+		} else if (irange == 3 && (*iu < imin(*n,*il) || *iu > *n)) {
+		*info = -7;
+		}
+
+		if (*info != 0) {
+		return 0;
+		}
+	/*     Initialize error flags */
+		*info = 0;
+		ncnvrg = FALSE_;
+		toofew = FALSE_;
+	/*     Quick return if possible */
+		*m = 0;
+		if (*n == 0) {
+		return 0;
+		}
+	/*     Simplification: */
+		if (irange == 3 && *il == 1 && *iu == *n) {
+		irange = 1;
+		}
+	/*     Get machine constants */
+		eps = std::numeric_limits<FloatingType>::epsilon(); // odmch_("P");
+		uflow = std::numeric_limits<FloatingType>::min();  // odmch_("U");
+	/*     Special Case when N=1 */
+	/*     Treat case of 1x1 matrix for quick return */
+		if (*n == 1) {
+		if (irange == 1 || (irange == 2 && d__[1] > *vl && d__[1] <= *vu) || 
+			(irange == 3 && *il == 1 && *iu == 1)) {
+			*m = 1;
+			w[1] = d__[1];
+	/*           The computation error of the eigenvalue is zero */
+			werr[1] = 0.;
+			iblock[1] = 1;
+			indexw[1] = 1;
+		}
+		return 0;
+		}
+	/*     NB is the minimum vector length for vector bisection, or 0 */
+	/*     if only scalar is to be done. */
+		nb = 1; // ilaenv_(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1);
+		if (nb <= 1) {
+		nb = 0;
+		}
+	/*     Find global spectral radius */
+		gl = d__[1];
+		gu = d__[1];
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+	/* Computing MIN */
+		d__1 = gl, d__2 = gers[(i__ << 1) - 1];
+		gl = fmin(d__1,d__2);
+	/* Computing MAX */
+		d__1 = gu, d__2 = gers[i__ * 2];
+		gu = fmax(d__1,d__2);
+	/* L5: */
+		}
+	/*     Compute global Gerschgorin bounds and spectral diameter */
+	/* Computing MAX */
+		d__1 = fabs(gl), d__2 = fabs(gu);
+		tnorm = fmax(d__1,d__2);
+		gl = gl - tnorm * 2. * eps * *n - *pivmin * 4.;
+		gu = gu + tnorm * 2. * eps * *n + *pivmin * 4.;
+	/*     [JAN/28/2009] remove the line below since SPDIAM variable not use */
+	/*     SPDIAM = GU - GL */
+	/*     Input arguments for ODEBZ: */
+	/*     The relative tolerance.  An interval (a,b] lies within */
+	/*     "relative tolerance" if  b-a < RELTOL*max(|a|,|b|), */
+		rtoli = *reltol;
+	/*     Set the absolute tolerance for interval convergence to zero to force */
+	/*     interval convergence based on relative size of the interval. */
+	/*     This is dangerous because intervals might not converge when RELTOL is */
+	/*     small. But at least a very small number should be selected so that for */
+	/*     strongly graded matrices, the code can get relatively accurate */
+	/*     eigenvalues. */
+		atoli = uflow * 4. + *pivmin * 4.;
+		if (irange == 3) {
+	/*        RANGE='I': Compute an interval containing eigenvalues */
+	/*        IL through IU. The initial interval [GL,GU] from the global */
+	/*        Gerschgorin bounds GL and GU is refined by ODEBZ. */
+		itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 
+			2;
+		work[*n + 1] = gl;
+		work[*n + 2] = gl;
+		work[*n + 3] = gu;
+		work[*n + 4] = gu;
+		work[*n + 5] = gl;
+		work[*n + 6] = gu;
+		iwork[1] = -1;
+		iwork[2] = -1;
+		iwork[3] = *n + 1;
+		iwork[4] = *n + 1;
+		iwork[5] = *il - 1;
+		iwork[6] = *iu;
+
+		odebz(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, pivmin, &
+			d__[1], &e[1], &e2[1], &iwork[5], &work[*n + 1], &work[*n + 5]
+	, &iout, &iwork[1], &w[1], &iblock[1], &iinfo);
+		if (iinfo != 0) {
+			*info = iinfo;
+			return 0;
+		}
+	/*        On exit, output intervals may not be ordered by ascending negcount */
+		if (iwork[6] == *iu) {
+			*wl = work[*n + 1];
+			wlu = work[*n + 3];
+			nwl = iwork[1];
+			*wu = work[*n + 4];
+			wul = work[*n + 2];
+			nwu = iwork[4];
+		} else {
+			*wl = work[*n + 2];
+			wlu = work[*n + 4];
+			nwl = iwork[2];
+			*wu = work[*n + 3];
+			wul = work[*n + 1];
+			nwu = iwork[3];
+		}
+	/*        On exit, the interval [WL, WLU] contains a value with negcount NWL, */
+	/*        and [WUL, WU] contains a value with negcount NWU. */
+		if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) {
+			*info = 4;
+			return 0;
+		}
+		} else if (irange == 2) {
+		*wl = *vl;
+		*wu = *vu;
+		} else if (irange == 1) {
+		*wl = gl;
+		*wu = gu;
+		}
+	/*     Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. */
+	/*     NWL accumulates the number of eigenvalues .le. WL, */
+	/*     NWU accumulates the number of eigenvalues .le. WU */
+		*m = 0;
+		iend = 0;
+		*info = 0;
+		nwl = 0;
+		nwu = 0;
+
+		i__1 = *nsplit;
+		for (jblk = 1; jblk <= i__1; ++jblk) {
+		ioff = iend;
+		ibegin = ioff + 1;
+		iend = isplit[jblk];
+		in = iend - ioff;
+
+		if (in == 1) {
+	/*           1x1 block */
+			if (*wl >= d__[ibegin] - *pivmin) {
+			++nwl;
+			}
+			if (*wu >= d__[ibegin] - *pivmin) {
+			++nwu;
+			}
+			if (irange == 1 || (*wl < d__[ibegin] - *pivmin && *wu >= d__[
+				ibegin] - *pivmin)) {
+			++(*m);
+			w[*m] = d__[ibegin];
+			werr[*m] = 0.;
+	/*              The gap for a single block doesn't matter for the later */
+	/*              algorithm and is assigned an arbitrary large value */
+			iblock[*m] = jblk;
+			indexw[*m] = 1;
+			}
+	/*        Disabled 2x2 case because of a failure on the following matrix */
+	/*        RANGE = 'I', IL = IU = 4 */
+	/*          Original Tridiagonal, d = [ */
+	/*           -0.150102010615740E+00 */
+	/*           -0.849897989384260E+00 */
+	/*           -0.128208148052635E-15 */
+	/*            0.128257718286320E-15 */
+	/*          ]; */
+	/*          e = [ */
+	/*           -0.357171383266986E+00 */
+	/*           -0.180411241501588E-15 */
+	/*           -0.175152352710251E-15 */
+	/*          ]; */
+
+	/*         ELSE IF( IN.EQ.2 ) THEN */
+	/* *           2x2 block */
+	/*            DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) */
+	/*            TMP1 = HALF*(D(IBEGIN)+D(IEND)) */
+	/*            L1 = TMP1 - DISC */
+	/*            IF( WL.GE. L1-PIVMIN ) */
+	/*     $         NWL = NWL + 1 */
+	/*            IF( WU.GE. L1-PIVMIN ) */
+	/*     $         NWU = NWU + 1 */
+	/*            IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. */
+	/*     $          L1-PIVMIN ) ) THEN */
+	/*               M = M + 1 */
+	/*               W( M ) = L1 */
+	/* *              The uncertainty of eigenvalues of a 2x2 matrix is very small */
+	/*               WERR( M ) = EPS * ABS( W( M ) ) * TWO */
+	/*               IBLOCK( M ) = JBLK */
+	/*               INDEXW( M ) = 1 */
+	/*            ENDIF */
+	/*            L2 = TMP1 + DISC */
+	/*            IF( WL.GE. L2-PIVMIN ) */
+	/*     $         NWL = NWL + 1 */
+	/*            IF( WU.GE. L2-PIVMIN ) */
+	/*     $         NWU = NWU + 1 */
+	/*            IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. */
+	/*     $          L2-PIVMIN ) ) THEN */
+	/*               M = M + 1 */
+	/*               W( M ) = L2 */
+	/* *              The uncertainty of eigenvalues of a 2x2 matrix is very small */
+	/*               WERR( M ) = EPS * ABS( W( M ) ) * TWO */
+	/*               IBLOCK( M ) = JBLK */
+	/*               INDEXW( M ) = 2 */
+	/*            ENDIF */
+		} else {
+	/*           General Case - block of size IN >= 2 */
+	/*           Compute local Gerschgorin interval and use it as the initial */
+	/*           interval for ODEBZ */
+			gu = d__[ibegin];
+			gl = d__[ibegin];
+			tmp1 = 0.;
+			i__2 = iend;
+			for (j = ibegin; j <= i__2; ++j) {
+	/* Computing MIN */
+			d__1 = gl, d__2 = gers[(j << 1) - 1];
+			gl = fmin(d__1,d__2);
+	/* Computing MAX */
+			d__1 = gu, d__2 = gers[j * 2];
+			gu = fmax(d__1,d__2);
+	/* L40: */
+			}
+	/*           [JAN/28/2009] */
+	/*           change SPDIAM by TNORM in lines 2 and 3 thereafter */
+	/*           line 1: remove computation of SPDIAM (not useful anymore) */
+	/*           SPDIAM = GU - GL */
+	/*           GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN */
+	/*           GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN */
+			gl = gl - tnorm * 2. * eps * in - *pivmin * 2.;
+			gu = gu + tnorm * 2. * eps * in + *pivmin * 2.;
+
+			if (irange > 1) {
+			if (gu < *wl) {
+	/*                 the local block contains none of the wanted eigenvalues */
+				nwl += in;
+				nwu += in;
+				goto L70;
+			}
+	/*              refine search interval if possible, only range (WL,WU] matters */
+			gl = fmax(gl,*wl);
+			gu = fmin(gu,*wu);
+			if (gl >= gu) {
+				goto L70;
+			}
+			}
+	/*           Find negcount of initial interval boundaries GL and GU */
+			work[*n + 1] = gl;
+			work[*n + in + 1] = gu;
+			odebz(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, 
+				pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, &
+				work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], &
+				w[*m + 1], &iblock[*m + 1], &iinfo);
+			if (iinfo != 0) {
+			*info = iinfo;
+			return 0;
+			}
+
+			nwl += iwork[1];
+			nwu += iwork[in + 1];
+			iwoff = *m - iwork[1];
+	/*           Compute Eigenvalues */
+			itmax = (int) ((log(gu - gl + *pivmin) - log(*pivmin)) / log(
+				2.)) + 2;
+			odebz(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, 
+				pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, &
+				work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], 
+				 &w[*m + 1], &iblock[*m + 1], &iinfo);
+			if (iinfo != 0) {
+			*info = iinfo;
+			return 0;
+			}
+
+	/*           Copy eigenvalues into W and IBLOCK */
+	/*           Use -JBLK for block number for unconverged eigenvalues. */
+	/*           Loop over the number of output intervals from ODEBZ */
+			i__2 = iout;
+			for (j = 1; j <= i__2; ++j) {
+	/*              eigenvalue approximation is middle point of interval */
+			tmp1 = (work[j + *n] + work[j + in + *n]) * .5;
+	/*              semi length of error interval */
+			tmp2 = (d__1 = work[j + *n] - work[j + in + *n], fabs(d__1)) * 
+				.5;
+			if (j > iout - iinfo) {
+	/*                 Flag non-convergence. */
+				ncnvrg = TRUE_;
+				ib = -jblk;
+			} else {
+				ib = jblk;
+			}
+			i__3 = iwork[j + in] + iwoff;
+			for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) {
+				w[je] = tmp1;
+				werr[je] = tmp2;
+				indexw[je] = je - iwoff;
+				iblock[je] = ib;
+	/* L50: */
+			}
+	/* L60: */
+			}
+
+			*m += im;
+		}
+	L70:
+		;
+		}
+	/*     If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */
+	/*     If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */
+		if (irange == 3) {
+		idiscl = *il - 1 - nwl;
+		idiscu = nwu - *iu;
+
+		if (idiscl > 0) {
+			im = 0;
+			i__1 = *m;
+			for (je = 1; je <= i__1; ++je) {
+	/*              Remove some of the smallest eigenvalues from the left so that */
+	/*              at the end IDISCL =0. Move all eigenvalues up to the left. */
+			if (w[je] <= wlu && idiscl > 0) {
+				--idiscl;
+			} else {
+				++im;
+				w[im] = w[je];
+				werr[im] = werr[je];
+				indexw[im] = indexw[je];
+				iblock[im] = iblock[je];
+			}
+	/* L80: */
+			}
+			*m = im;
+		}
+		if (idiscu > 0) {
+	/*           Remove some of the largest eigenvalues from the right so that */
+	/*           at the end IDISCU =0. Move all eigenvalues up to the left. */
+			im = *m + 1;
+			for (je = *m; je >= 1; --je) {
+			if (w[je] >= wul && idiscu > 0) {
+				--idiscu;
+			} else {
+				--im;
+				w[im] = w[je];
+				werr[im] = werr[je];
+				indexw[im] = indexw[je];
+				iblock[im] = iblock[je];
+			}
+	/* L81: */
+			}
+			jee = 0;
+			i__1 = *m;
+			for (je = im; je <= i__1; ++je) {
+			++jee;
+			w[jee] = w[je];
+			werr[jee] = werr[je];
+			indexw[jee] = indexw[je];
+			iblock[jee] = iblock[je];
+	/* L82: */
+			}
+			*m = *m - im + 1;
+		}
+		if (idiscl > 0 || idiscu > 0) {
+	/*           Code to deal with effects of bad arithmetic. (If N(w) is */
+	/*           monotone non-decreasing, this should never happen.) */
+	/*           Some low eigenvalues to be discarded are not in (WL,WLU], */
+	/*           or high eigenvalues to be discarded are not in (WUL,WU] */
+	/*           so just kill off the smallest IDISCL/largest IDISCU */
+	/*           eigenvalues, by marking the corresponding IBLOCK = 0 */
+			if (idiscl > 0) {
+			wkill = *wu;
+			i__1 = idiscl;
+			for (jdisc = 1; jdisc <= i__1; ++jdisc) {
+				iw = 0;
+				i__2 = *m;
+				for (je = 1; je <= i__2; ++je) {
+				if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) {
+					iw = je;
+					wkill = w[je];
+				}
+	/* L90: */
+				}
+				iblock[iw] = 0;
+	/* L100: */
+			}
+			}
+			if (idiscu > 0) {
+			wkill = *wl;
+			i__1 = idiscu;
+			for (jdisc = 1; jdisc <= i__1; ++jdisc) {
+				iw = 0;
+				i__2 = *m;
+				for (je = 1; je <= i__2; ++je) {
+				if (iblock[je] != 0 && (w[je] >= wkill || iw == 0)) {
+					iw = je;
+					wkill = w[je];
+				}
+	/* L110: */
+				}
+				iblock[iw] = 0;
+	/* L120: */
+			}
+			}
+	/*           Now erase all eigenvalues with IBLOCK set to zero */
+			im = 0;
+			i__1 = *m;
+			for (je = 1; je <= i__1; ++je) {
+			if (iblock[je] != 0) {
+				++im;
+				w[im] = w[je];
+				werr[im] = werr[je];
+				indexw[im] = indexw[je];
+				iblock[im] = iblock[je];
+			}
+	/* L130: */
+			}
+			*m = im;
+		}
+		if (idiscl < 0 || idiscu < 0) {
+			toofew = TRUE_;
+		}
+		}
+
+		if ((irange == 1 && *m != *n) || (irange == 3 && *m != *iu - *il + 1)) {
+		toofew = TRUE_;
+		}
+	/*     If ORDER='B', do nothing the eigenvalues are already sorted by */
+	/*        block. */
+	/*     If ORDER='E', sort the eigenvalues from smallest to largest */
+		if (olsame(order, "E") && *nsplit > 1) {
+		i__1 = *m - 1;
+		for (je = 1; je <= i__1; ++je) {
+			ie = 0;
+			tmp1 = w[je];
+			i__2 = *m;
+			for (j = je + 1; j <= i__2; ++j) {
+			if (w[j] < tmp1) {
+				ie = j;
+				tmp1 = w[j];
+			}
+	/* L140: */
+			}
+			if (ie != 0) {
+			tmp2 = werr[ie];
+			itmp1 = iblock[ie];
+			itmp2 = indexw[ie];
+			w[ie] = w[je];
+			werr[ie] = werr[je];
+			iblock[ie] = iblock[je];
+			indexw[ie] = indexw[je];
+			w[je] = tmp1;
+			werr[je] = tmp2;
+			iblock[je] = itmp1;
+			indexw[je] = itmp2;
+			}
+	/* L150: */
+		}
+		}
+
+		*info = 0;
+		if (ncnvrg) {
+		++(*info);
+		}
+		if (toofew) {
+		*info += 2;
+		}
+		return 0;
+
+	/*     End of ODRRD */
+
+	} /* odrrd_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrre.hpp b/external/pmrrr/include/pmrrr/lapack/odrre.hpp
new file mode 100644
index 0000000000..d48f393975
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrre.hpp
@@ -0,0 +1,869 @@
+/**
+	C++ template version of LAPACK routine dlarre.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRE_HPP__
+#define __ODRRE_HPP__
+
+#include <limits>
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <cfloat>
+#include <cassert>
+
+#include <pmrrr/lapack/lapack.hpp>
+#include <pmrrr/lapack/odrrd.hpp>
+#include <pmrrr/lapack/odrrb.hpp>
+#include <pmrrr/lapack/odrrc.hpp>
+#include <pmrrr/lapack/odrrk.hpp>
+#include <pmrrr/lapack/odrnv.hpp>
+#include <pmrrr/lapack/odsq2.hpp>
+#include <pmrrr/blas/odcpy.hpp>
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrre(const char *range, int *n, FloatingType *vl, 
+		FloatingType *vu, int *il, int *iu, FloatingType *d__, FloatingType *e, 
+		FloatingType *e2, FloatingType *rtol1, FloatingType *rtol2, FloatingType *spltol, 
+		int *nsplit, int *isplit, int *m, FloatingType *w, 
+		FloatingType *werr, FloatingType *wgap, int *iblock, int *indexw, 
+		FloatingType *gers, FloatingType *pivmin, FloatingType *work, int *
+		iwork, int *info)
+	{
+		/* Table of constant values */
+		static int c__1 = 1;
+		static int c__2 = 2;
+
+		/* System generated locals */
+		int i__1, i__2;
+		FloatingType d__1, d__2, d__3;
+
+		/* Builtin functions */
+		//    FloatingType sqrt(FloatingType), log(FloatingType);
+
+		/* Local variables */
+		int i__, j;
+		FloatingType s1, s2;
+		int mb;
+		FloatingType gl;
+		int in, mm;
+		FloatingType gu;
+		int cnt;
+		FloatingType eps, tau, tmp, rtl;
+		int cnt1, cnt2;
+		FloatingType tmp1, eabs;
+		int iend, jblk;
+		FloatingType eold;
+		int indl;
+		FloatingType dmax__, emax;
+		int wend, idum, indu;
+		FloatingType rtol;
+		int iseed[4];
+		FloatingType avgap, sigma;
+		//extern int olsame_(char *, char *);
+		int iinfo;
+	//    extern /* Subroutine */ int odcpy_(int *, FloatingType *, int *, 
+	//	    FloatingType *, int *);
+		FloatingType norep;
+		//extern /* Subroutine */ int odsq2_(int *, FloatingType *, int *);
+		// extern FloatingType odmch_(char *);
+		int ibegin;
+		FloatingType forceb;
+		int irange;
+		FloatingType sgndef;
+		
+		int wbegin;
+		
+		FloatingType safmin, spdiam;
+		
+		FloatingType usedqd;
+		FloatingType clwdth, isleft;
+		
+		FloatingType isrght, bsrtol, dpivot;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  To find the desired eigenvalues of a given real symmetric */
+	/*  tridiagonal matrix T, ODRRE sets any "small" off-diagonal */
+	/*  elements to zero, and for each unreduced block T_i, it finds */
+	/*  (a) a suitable shift at one end of the block's spectrum, */
+	/*  (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */
+	/*  (c) eigenvalues of each L_i D_i L_i^T. */
+	/*  The representations and eigenvalues found are then used by */
+	/*  DSTEMR to compute the eigenvectors of T. */
+	/*  The accuracy varies depending on whether bisection is used to */
+	/*  find a few eigenvalues or the dqds algorithm (subroutine ODSQ2) to */
+	/*  conpute all and then discard any unwanted one. */
+	/*  As an added benefit, ODRRE also outputs the n */
+	/*  Gerschgorin intervals for the matrices L_i D_i L_i^T. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  RANGE   (input) CHARACTER */
+	/*          = 'A': ("All")   all eigenvalues will be found. */
+	/*          = 'V': ("Value") all eigenvalues in the half-open interval */
+	/*                           (VL, VU] will be found. */
+	/*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
+	/*                           entire matrix) will be found. */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix. N > 0. */
+
+	/*  VL      (input/output) DOUBLE PRECISION */
+	/*  VU      (input/output) DOUBLE PRECISION */
+	/*          If RANGE='V', the lower and upper bounds for the eigenvalues. */
+	/*          Eigenvalues less than or equal to VL, or greater than VU, */
+	/*          will not be returned.  VL < VU. */
+	/*          If RANGE='I' or ='A', ODRRE computes bounds on the desired */
+	/*          part of the spectrum. */
+
+	/*  IL      (input) INT */
+	/*  IU      (input) INT */
+	/*          If RANGE='I', the indices (in ascending order) of the */
+	/*          smallest and largest eigenvalues to be returned. */
+	/*          1 <= IL <= IU <= N. */
+
+	/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the N diagonal elements of the tridiagonal */
+	/*          matrix T. */
+	/*          On exit, the N diagonal elements of the diagonal */
+	/*          matrices D_i. */
+
+	/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the first (N-1) entries contain the subdiagonal */
+	/*          elements of the tridiagonal matrix T; E(N) need not be set. */
+	/*          On exit, E contains the subdiagonal elements of the unit */
+	/*          bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */
+	/*          1 <= I <= NSPLIT, contain the base points sigma_i on output. */
+
+	/*  E2      (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the first (N-1) entries contain the SQUARES of the */
+	/*          subdiagonal elements of the tridiagonal matrix T; */
+	/*          E2(N) need not be set. */
+	/*          On exit, the entries E2( ISPLIT( I ) ), */
+	/*          1 <= I <= NSPLIT, have been set to zero */
+
+	/*  RTOL1   (input) DOUBLE PRECISION */
+	/*  RTOL2   (input) DOUBLE PRECISION */
+	/*           Parameters for bisection. */
+	/*           An interval [LEFT,RIGHT] has converged if */
+	/*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
+
+	/*  SPLTOL (input) DOUBLE PRECISION */
+	/*          The threshold for splitting. */
+
+	/*  NSPLIT  (output) INT */
+	/*          The number of blocks T splits into. 1 <= NSPLIT <= N. */
+
+	/*  ISPLIT  (output) INT array, dimension (N) */
+	/*          The splitting points, at which T breaks up into blocks. */
+	/*          The first block consists of rows/columns 1 to ISPLIT(1), */
+	/*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
+	/*          etc., and the NSPLIT-th consists of rows/columns */
+	/*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
+
+	/*  M       (output) INT */
+	/*          The total number of eigenvalues (of all L_i D_i L_i^T) */
+	/*          found. */
+
+	/*  W       (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The first M elements contain the eigenvalues. The */
+	/*          eigenvalues of each of the blocks, L_i D_i L_i^T, are */
+	/*          sorted in ascending order ( ODRRE may use the */
+	/*          remaining N-M elements as workspace). */
+
+	/*  WERR    (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The error bound on the corresponding eigenvalue in W. */
+
+	/*  WGAP    (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The separation from the right neighbor eigenvalue in W. */
+	/*          The gap is only with respect to the eigenvalues of the same block */
+	/*          as each block has its own representation tree. */
+	/*          Exception: at the right end of a block we store the left gap */
+
+	/*  IBLOCK  (output) INT array, dimension (N) */
+	/*          The indices of the blocks (submatrices) associated with the */
+	/*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
+	/*          W(i) belongs to the first block from the top, =2 if W(i) */
+	/*          belongs to the second block, etc. */
+
+	/*  INDEXW  (output) INT array, dimension (N) */
+	/*          The indices of the eigenvalues within each block (submatrix); */
+	/*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
+	/*          i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */
+
+	/*  GERS    (output) DOUBLE PRECISION array, dimension (2*N) */
+	/*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
+	/*          is (GERS(2*i-1), GERS(2*i)). */
+
+	/*  PIVMIN  (output) DOUBLE PRECISION */
+	/*          The minimum pivot in the Sturm sequence for T. */
+
+	/*  WORK    (workspace) DOUBLE PRECISION array, dimension (6*N) */
+	/*          Workspace. */
+
+	/*  IWORK   (workspace) INT array, dimension (5*N) */
+	/*          Workspace. */
+
+	/*  INFO    (output) INT */
+	/*          = 0:  successful exit */
+	/*          > 0:  A problem occured in ODRRE. */
+	/*          < 0:  One of the called subroutines signaled an internal problem. */
+	/*                Needs inspection of the corresponding parameter IINFO */
+	/*                for further information. */
+
+	/*          =-1:  Problem in ODRRD. */
+	/*          = 2:  No base representation could be found in MAXTRY iterations. */
+	/*                Increasing MAXTRY and recompilation might be a remedy. */
+	/*          =-3:  Problem in ODRRB when computing the refined root */
+	/*                representation for ODSQ2. */
+	/*          =-4:  Problem in ODRRB when preforming bisection on the */
+	/*                desired part of the spectrum. */
+	/*          =-5:  Problem in ODSQ2. */
+	/*          =-6:  Problem in ODSQ2. */
+
+	/*  Further Details */
+	/*  The base representations are required to suffer very little */
+	/*  element growth and consequently define all their eigenvalues to */
+	/*  high relative accuracy. */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Local Arrays .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--iwork;
+		--work;
+		--gers;
+		--indexw;
+		--iblock;
+		--wgap;
+		--werr;
+		--w;
+		--isplit;
+		--e2;
+		--e;
+		--d__;
+
+		/* Function Body */
+		*info = 0;
+
+	/*     Decode RANGE */
+
+		if (olsame(range, "A")) {
+		irange = 1;
+		} else if (olsame(range, "V")) {
+		irange = 3;
+		} else if (olsame(range, "I")) {
+		irange = 2;
+		}
+		*m = 0;
+	/*     Get machine constants */
+		safmin = std::numeric_limits<FloatingType>::min(); // odmch_("S");
+		eps = std::numeric_limits<FloatingType>::epsilon(); // odmch_("P");
+	/*     Set parameters */
+		rtl = sqrt(eps);
+		bsrtol = sqrt(eps);
+	/*     Treat case of 1x1 matrix for quick return */
+		if (*n == 1) {
+		if (irange == 1 || (irange == 3 && d__[1] > *vl && d__[1] <= *vu) || 
+			(irange == 2 && *il == 1 && *iu == 1) ) {
+			*m = 1;
+			w[1] = d__[1];
+	/*           The computation error of the eigenvalue is zero */
+			werr[1] = 0.;
+			wgap[1] = 0.;
+			iblock[1] = 1;
+			indexw[1] = 1;
+			gers[1] = d__[1];
+			gers[2] = d__[1];
+		}
+	/*        store the shift for the initial RRR, which is zero in this case */
+		e[1] = 0.;
+		return 0;
+		}
+	/*     General case: tridiagonal matrix of order > 1 */
+
+	/*     Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */
+	/*     Compute maximum off-diagonal entry and pivmin. */
+		gl = d__[1];
+		gu = d__[1];
+		eold = 0.;
+		emax = 0.;
+		e[*n] = 0.;
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		werr[i__] = 0.;
+		wgap[i__] = 0.;
+		eabs = (d__1 = e[i__], fabs(d__1));
+		if (eabs >= emax) {
+			emax = eabs;
+		}
+		tmp1 = eabs + eold;
+		gers[(i__ << 1) - 1] = d__[i__] - tmp1;
+	/* Computing MIN */
+		d__1 = gl, d__2 = gers[(i__ << 1) - 1];
+		gl = fmin(d__1,d__2);
+		gers[i__ * 2] = d__[i__] + tmp1;
+	/* Computing MAX */
+		d__1 = gu, d__2 = gers[i__ * 2];
+		gu = fmax(d__1,d__2);
+		eold = eabs;
+	/* L5: */
+		}
+	/*     The minimum pivot allowed in the Sturm sequence for T */
+	/* Computing MAX */
+	/* Computing 2nd power */
+		d__3 = emax;
+		d__1 = 1., d__2 = d__3 * d__3;
+		*pivmin = safmin * fmax(d__1,d__2);
+	/*     Compute spectral diameter. The Gerschgorin bounds give an */
+	/*     estimate that is wrong by at most a factor of SQRT(2) */
+		spdiam = gu - gl;
+	/*     Compute splitting points */
+		odrra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], &
+			iinfo);
+	/*     Can force use of bisection instead of faster DQDS. */
+	/*     Option left in the code for future multisection work. */
+		forceb = FALSE_;
+	/*     Initialize USEDQD, DQDS should be used for ALLRNG unless someone */
+	/*     explicitly wants bisection. */
+		usedqd = irange == 1 && ! forceb;
+		if (irange == 1 && ! forceb) {
+	/*        Set interval [VL,VU] that contains all eigenvalues */
+		*vl = gl;
+		*vu = gu;
+		} else {
+	/*        We call ODRRD to find crude approximations to the eigenvalues */
+	/*        in the desired range. In case IRANGE = INDRNG, we also obtain the */
+	/*        interval (VL,VU] that contains all the wanted eigenvalues. */
+	/*        An interval [LEFT,RIGHT] has converged if */
+	/*        RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */
+	/*        ODRRD needs a WORK of size 4*N, IWORK of size 3*N */
+		odrrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[
+			1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], 
+			vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo);
+		if (iinfo != 0) {
+			*info = -1;
+			return 0;
+		}
+	/*        Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */
+		i__1 = *n;
+		for (i__ = mm + 1; i__ <= i__1; ++i__) {
+			w[i__] = 0.;
+			werr[i__] = 0.;
+			iblock[i__] = 0;
+			indexw[i__] = 0;
+	/* L14: */
+		}
+		}
+	/* ** */
+	/*     Loop over unreduced blocks */
+		ibegin = 1;
+		wbegin = 1;
+		i__1 = *nsplit;
+		for (jblk = 1; jblk <= i__1; ++jblk) {
+		iend = isplit[jblk];
+		in = iend - ibegin + 1;
+	/*        1 X 1 block */
+		if (in == 1) {
+			if (irange == 1 || (irange == 3 && d__[ibegin] > *vl && d__[ibegin]
+				 <= *vu) || (irange == 2 && iblock[wbegin] == jblk) ) {
+			++(*m);
+			w[*m] = d__[ibegin];
+			werr[*m] = 0.;
+	/*              The gap for a single block doesn't matter for the later */
+	/*              algorithm and is assigned an arbitrary large value */
+			wgap[*m] = 0.;
+			iblock[*m] = jblk;
+			indexw[*m] = 1;
+			++wbegin;
+			}
+	/*           E( IEND ) holds the shift for the initial RRR */
+			e[iend] = 0.;
+			ibegin = iend + 1;
+			goto L170;
+		}
+
+	/*        Blocks of size larger than 1x1 */
+
+	/*        E( IEND ) will hold the shift for the initial RRR, for now set it =0 */
+		e[iend] = 0.;
+
+	/*        Find local outer bounds GL,GU for the block */
+		gl = d__[ibegin];
+		gu = d__[ibegin];
+		i__2 = iend;
+		for (i__ = ibegin; i__ <= i__2; ++i__) {
+	/* Computing MIN */
+			d__1 = gers[(i__ << 1) - 1];
+			gl = fmin(d__1,gl);
+	/* Computing MAX */
+			d__1 = gers[i__ * 2];
+			gu = fmax(d__1,gu);
+	/* L15: */
+		}
+		spdiam = gu - gl;
+		if (! (irange == 1 && ! forceb)) {
+	/*           Count the number of eigenvalues in the current block. */
+			mb = 0;
+			i__2 = mm;
+			for (i__ = wbegin; i__ <= i__2; ++i__) {
+			if (iblock[i__] == jblk) {
+				++mb;
+			} else {
+				goto L21;
+			}
+	/* L20: */
+			}
+	L21:
+			if (mb == 0) {
+	/*              No eigenvalue in the current block lies in the desired range */
+	/*              E( IEND ) holds the shift for the initial RRR */
+			e[iend] = 0.;
+			ibegin = iend + 1;
+			goto L170;
+			} else {
+	/*              Decide whether dqds or bisection is more efficient */
+			usedqd = (FloatingType) mb > in * .5 && ! forceb;
+			wend = wbegin + mb - 1;
+	/*              Calculate gaps for the current block */
+	/*              In later stages, when representations for individual */
+	/*              eigenvalues are different, we use SIGMA = E( IEND ). */
+			sigma = 0.;
+			i__2 = wend - 1;
+			for (i__ = wbegin; i__ <= i__2; ++i__) {
+	/* Computing MAX */
+				d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + 
+					werr[i__]);
+				wgap[i__] = fmax(d__1,d__2);
+	/* L30: */
+			}
+	/* Computing MAX */
+			d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
+			wgap[wend] = fmax(d__1,d__2);
+	/*              Find local index of the first and last desired evalue. */
+			indl = indexw[wbegin];
+			indu = indexw[wend];
+			}
+		}
+		if ((irange == 1 && ! forceb) || usedqd) {
+	/*           Case of DQDS */
+	/*           Find approximations to the extremal eigenvalues of the block */
+			odrrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
+				rtl, &tmp, &tmp1, &iinfo);
+			if (iinfo != 0) {
+			*info = -1;
+			return 0;
+			}
+	/* Computing MAX */
+			d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, 
+				fabs(d__1));
+			isleft = fmax(d__2,d__3);
+			odrrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
+				rtl, &tmp, &tmp1, &iinfo);
+			if (iinfo != 0) {
+			*info = -1;
+			return 0;
+			}
+	/* Computing MIN */
+			d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, 
+				fabs(d__1));
+			isrght = fmin(d__2,d__3);
+	/*           Improve the estimate of the spectral diameter */
+			spdiam = isrght - isleft;
+		} else {
+	/*           Case of bisection */
+	/*           Find approximations to the wanted extremal eigenvalues */
+	/* Computing MAX */
+			d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = 
+				w[wbegin] - werr[wbegin], fabs(d__1));
+			isleft = fmax(d__2,d__3);
+	/* Computing MIN */
+			d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[
+				wend] + werr[wend], fabs(d__1));
+			isrght = fmin(d__2,d__3);
+		}
+	/*        Decide whether the base representation for the current block */
+	/*        L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */
+	/*        should be on the left or the right end of the current block. */
+	/*        The strategy is to shift to the end which is "more populated" */
+	/*        Furthermore, decide whether to use DQDS for the computation of */
+	/*        the eigenvalue approximations at the end of ODRRE or bisection. */
+	/*        dqds is chosen if all eigenvalues are desired or the number of */
+	/*        eigenvalues to be computed is large compared to the blocksize. */
+		if (irange == 1 && ! forceb) {
+	/*           If all the eigenvalues have to be computed, we use dqd */
+			usedqd = TRUE_;
+	/*           INDL is the local index of the first eigenvalue to compute */
+			indl = 1;
+			indu = in;
+	/*           MB =  number of eigenvalues to compute */
+			mb = in;
+			wend = wbegin + mb - 1;
+	/*           Define 1/4 and 3/4 points of the spectrum */
+			s1 = isleft + spdiam * .25;
+			s2 = isrght - spdiam * .25;
+		} else {
+	/*           ODRRD has computed IBLOCK and INDEXW for each eigenvalue */
+	/*           approximation. */
+	/*           choose sigma */
+			if (usedqd) {
+			s1 = isleft + spdiam * .25;
+			s2 = isrght - spdiam * .25;
+			} else {
+			tmp = fmin(isrght,*vu) - fmax(isleft,*vl);
+			s1 = fmax(isleft,*vl) + tmp * .25;
+			s2 = fmin(isrght,*vu) - tmp * .25;
+			}
+		}
+	/*        Compute the negcount at the 1/4 and 3/4 points */
+		if (mb > 1) {
+			odrrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, &
+				cnt, &cnt1, &cnt2, &iinfo);
+		}
+		if (mb == 1) {
+			sigma = gl;
+			sgndef = 1.;
+		} else if (cnt1 - indl >= indu - cnt2) {
+			if (irange == 1 && ! forceb) {
+			sigma = fmax(isleft,gl);
+			} else if (usedqd) {
+	/*              use Gerschgorin bound as shift to get pos def matrix */
+	/*              for dqds */
+			sigma = isleft;
+			} else {
+	/*              use approximation of the first desired eigenvalue of the */
+	/*              block as shift */
+			sigma = fmax(isleft,*vl);
+			}
+			sgndef = 1.;
+		} else {
+			if (irange == 1 && ! forceb) {
+			sigma = fmin(isrght,gu);
+			} else if (usedqd) {
+	/*              use Gerschgorin bound as shift to get neg def matrix */
+	/*              for dqds */
+			sigma = isrght;
+			} else {
+	/*              use approximation of the first desired eigenvalue of the */
+	/*              block as shift */
+			sigma = fmin(isrght,*vu);
+			}
+			sgndef = -1.;
+		}
+	/*        An initial SIGMA has been chosen that will be used for computing */
+	/*        T - SIGMA I = L D L^T */
+	/*        Define the increment TAU of the shift in case the initial shift */
+	/*        needs to be refined to obtain a factorization with not too much */
+	/*        element growth. */
+		if (usedqd) {
+	/*           The initial SIGMA was to the outer end of the spectrum */
+	/*           the matrix is definite and we need not retreat. */
+			tau = spdiam * eps * *n + *pivmin * 2.;
+			tau = fmax(tau, 2 * eps * fabs(sigma));
+		} else {
+			if (mb > 1) {
+			clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin];
+			avgap = (d__1 = clwdth / (FloatingType) (wend - wbegin), fabs(
+				d__1));
+			if (sgndef == 1.) {
+	/* Computing MAX */
+				d__1 = wgap[wbegin];
+				tau = fmax(d__1,avgap) * .5;
+	/* Computing MAX */
+				d__1 = tau, d__2 = werr[wbegin];
+				tau = fmax(d__1,d__2);
+			} else {
+	/* Computing MAX */
+				d__1 = wgap[wend - 1];
+				tau = fmax(d__1,avgap) * .5;
+	/* Computing MAX */
+				d__1 = tau, d__2 = werr[wend];
+				tau = fmax(d__1,d__2);
+			}
+			} else {
+			tau = werr[wbegin];
+			}
+		}
+
+		for (idum = 1; idum <= 6; ++idum) {
+	/*           Compute L D L^T factorization of tridiagonal matrix T - sigma I. */
+	/*           Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */
+	/*           pivots in WORK(2*IN+1:3*IN) */
+			dpivot = d__[ibegin] - sigma;
+			work[1] = dpivot;
+			dmax__ = fabs(work[1]);
+			j = ibegin;
+			i__2 = in - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			work[(in << 1) + i__] = 1. / work[i__];
+			tmp = e[j] * work[(in << 1) + i__];
+			work[in + i__] = tmp;
+			dpivot = d__[j + 1] - sigma - tmp * e[j];
+			work[i__ + 1] = dpivot;
+	/* Computing MAX */
+			d__1 = dmax__, d__2 = fabs(dpivot);
+			dmax__ = fmax(d__1,d__2);
+			++j;
+	/* L70: */
+			}
+	/*           check for element growth */
+			if (dmax__ > spdiam * 64.) {
+			norep = TRUE_;
+			} else {
+			norep = FALSE_;
+			}
+			if (usedqd && ! norep) {
+	/*              Ensure the definiteness of the representation */
+	/*              All entries of D (of L D L^T) must have the same sign */
+			i__2 = in;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+				tmp = sgndef * work[i__];
+				if (tmp < 0.) {
+				norep = TRUE_;
+				}
+	/* L71: */
+			}
+			}
+			if (norep) {
+	/*              Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */
+	/*              shift which makes the matrix definite. So we should end up */
+	/*              here really only in the case of IRANGE = VALRNG or INDRNG. */
+			if (idum == 5) {
+				if (sgndef == 1.) {
+	/*                    The fudged Gerschgorin shift should succeed */
+				sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.;
+				} else {
+				sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.;
+				}
+			} else {
+				sigma -= sgndef * tau;
+				tau *= 2.;
+			}
+			} else {
+	/*              an initial RRR is found */
+			goto L83;
+			}
+	/* L80: */
+		}
+	/*        if the program reaches this point, no base representation could be */
+	/*        found in MAXTRY iterations. */
+		*info = 2;
+		return 0;
+	L83:
+	/*        At this point, we have found an initial base representation */
+	/*        T - SIGMA I = L D L^T with not too much element growth. */
+	/*        Store the shift. */
+		e[iend] = sigma;
+	/*        Store D and L. */
+		blas::odcpy(&in, &work[1], &c__1, &d__[ibegin], &c__1);
+		i__2 = in - 1;
+		blas::odcpy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1);
+		if (mb > 1) {
+
+	/*           Perturb each entry of the base representation by a small */
+	/*           (but random) relative amount to overcome difficulties with */
+	/*           glued matrices. */
+
+			for (i__ = 1; i__ <= 4; ++i__) {
+			iseed[i__ - 1] = 1;
+	/* L122: */
+			}
+			i__2 = (in << 1) - 1;
+			odrnv(&c__2, iseed, &i__2, &work[1]);
+			i__2 = in - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.;
+			e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.;
+	/* L125: */
+			}
+			d__[iend] *= eps * 4. * work[in] + 1.;
+
+		}
+
+	/*        Don't update the Gerschgorin intervals because keeping track */
+	/*        of the updates would be too much work in DLARRV. */
+	/*        We update W instead and use it to locate the proper Gerschgorin */
+	/*        intervals. */
+	/*        Compute the required eigenvalues of L D L' by bisection or dqds */
+		if (! usedqd) {
+	/*           If ODRRD has been used, shift the eigenvalue approximations */
+	/*           according to their representation. This is necessary for */
+	/*           a uniform DLARRV since dqds computes eigenvalues of the */
+	/*           shifted representation. In DLARRV, W will always hold the */
+	/*           UNshifted eigenvalue approximation. */
+			i__2 = wend;
+			for (j = wbegin; j <= i__2; ++j) {
+			w[j] -= sigma;
+			werr[j] += (d__1 = w[j], fabs(d__1)) * eps;
+	/* L134: */
+			}
+	/*           call ODRRB to reduce eigenvalue error of the approximations */
+	/*           from ODRRD */
+			i__2 = iend - 1;
+			for (i__ = ibegin; i__ <= i__2; ++i__) {
+	/* Computing 2nd power */
+			d__1 = e[i__];
+			work[i__] = d__[i__] * (d__1 * d__1);
+	/* L135: */
+			}
+	/*           use bisection to find EV from INDL to INDU */
+			i__2 = indl - 1;
+			odrrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, 
+				rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], &
+				work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, &
+				iinfo);
+			if (iinfo != 0) {
+			*info = -4;
+			return 0;
+			}
+	/*           ODRRB computes all gaps correctly except for the last one */
+	/*           Record distance to VU/GU */
+	/* Computing MAX */
+			d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
+			wgap[wend] = fmax(d__1,d__2);
+			i__2 = indu;
+			for (i__ = indl; i__ <= i__2; ++i__) {
+			++(*m);
+			iblock[*m] = jblk;
+			indexw[*m] = i__;
+	/* L138: */
+			}
+		} else {
+	/*           Call dqds to get all eigs (and then possibly delete unwanted */
+	/*           eigenvalues). */
+	/*           Note that dqds finds the eigenvalues of the L D L^T representation */
+	/*           of T to high relative accuracy. High relative accuracy */
+	/*           might be lost when the shift of the RRR is subtracted to obtain */
+	/*           the eigenvalues of T. However, T is not guaranteed to define its */
+	/*           eigenvalues to high relative accuracy anyway. */
+	/*           Set RTOL to the order of the tolerance used in ODSQ2 */
+	/*           This is an ESTIMATED error, the worst case bound is 4*N*EPS */
+	/*           which is usually too large and requires unnecessary work to be */
+	/*           done by bisection when computing the eigenvectors */
+			rtol = log((FloatingType) in) * 4. * eps;
+			j = ibegin;
+			i__2 = in - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			work[(i__ << 1) - 1] = (d__1 = d__[j], fabs(d__1));
+			work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1];
+			++j;
+	/* L140: */
+			}
+			work[(in << 1) - 1] = (d__1 = d__[iend], fabs(d__1));
+			work[in * 2] = 0.;
+			odsq2(&in, &work[1], &iinfo);
+			if (iinfo != 0) {
+	/*              If IINFO = -5 then an index is part of a tight cluster */
+	/*              and should be changed. The index is in IWORK(1) and the */
+	/*              gap is in WORK(N+1) */
+			*info = -5;
+			return 0;
+			} else {
+	/*              Test that all eigenvalues are positive as expected */
+			i__2 = in;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+				if (work[i__] < 0.) {
+				*info = -6;
+				return 0;
+				}
+	/* L149: */
+			}
+			}
+			if (sgndef > 0.) {
+			i__2 = indu;
+			for (i__ = indl; i__ <= i__2; ++i__) {
+				++(*m);
+				w[*m] = work[in - i__ + 1];
+				iblock[*m] = jblk;
+				indexw[*m] = i__;
+	/* L150: */
+			}
+			} else {
+			i__2 = indu;
+			for (i__ = indl; i__ <= i__2; ++i__) {
+				++(*m);
+				w[*m] = -work[i__];
+				iblock[*m] = jblk;
+				indexw[*m] = i__;
+	/* L160: */
+			}
+			}
+			i__2 = *m;
+			for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
+	/*              the value of RTOL below should be the tolerance in ODSQ2 */
+			werr[i__] = rtol * (d__1 = w[i__], fabs(d__1));
+	/* L165: */
+			}
+			i__2 = *m - 1;
+			for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
+	/*              compute the right gap between the intervals */
+	/* Computing MAX */
+			d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[
+				i__]);
+			wgap[i__] = fmax(d__1,d__2);
+	/* L166: */
+			}
+	/* Computing MAX */
+			d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]);
+			wgap[*m] = fmax(d__1,d__2);
+		}
+	/*        proceed with next block */
+		ibegin = iend + 1;
+		wbegin = wend + 1;
+	L170:
+		;
+		}
+
+		return 0;
+
+	/*     end of ODRRE */
+
+	} /* odrre_ */
+
+} //namespace lapack
+
+} //namespace pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrf.hpp b/external/pmrrr/include/pmrrr/lapack/odrrf.hpp
new file mode 100644
index 0000000000..f32e9860b8
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrf.hpp
@@ -0,0 +1,439 @@
+/**
+	C++ template version of LAPACK routine dlarrf.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRF_HPP__
+#define __ODRRF_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+
+#include <pmrrr/lapack/odnan.hpp>
+#include <pmrrr/blas/odcpy.hpp>
+
+namespace pmrrr { namespace lapack {
+
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrf(int *n, FloatingType *d__, FloatingType *l, 
+		FloatingType *ld, int *clstrt, int *clend, FloatingType *w, 
+		FloatingType *wgap, FloatingType *werr, FloatingType *spdiam, FloatingType *
+		clgapl, FloatingType *clgapr, FloatingType *pivmin, FloatingType *sigma, 
+		FloatingType *dplus, FloatingType *lplus, FloatingType *work, int *info)
+	{
+		/* Table of constant values */
+		#define TRUE_ (1)
+		#define FALSE_ (0)
+		static int c__1 = 1;
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2, d__3;
+
+		/* Builtin functions */
+		//    FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		int i__;
+		FloatingType s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, 
+			znm2, growthbound, fail, fact, oldp;
+		int indx;
+		FloatingType prod;
+		int ktry;
+		FloatingType fail2, avgap, ldmax, rdmax;
+		int shift;
+	//    extern /* Subroutine */ int odcpy_(int *, FloatingType *, int *, 
+	//	    FloatingType *, int *);
+		int dorrr1;
+		// extern FloatingType odmch_(char *);
+		FloatingType ldelta;
+		int nofail;
+		FloatingType mingap, lsigma, rdelta;
+		//extern int odnan_(FloatingType *);
+		int forcer;
+		FloatingType rsigma, clwdth;
+		int sawnan1, sawnan2, tryrrr1;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+	/* * */
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  Given the initial representation L D L^T and its cluster of close */
+	/*  eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
+	/*  W( CLEND ), ODRRF finds a new relatively robust representation */
+	/*  L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
+	/*  eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix (subblock, if the matrix splitted). */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The N diagonal elements of the diagonal matrix D. */
+
+	/*  L       (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (N-1) subdiagonal elements of the unit bidiagonal */
+	/*          matrix L. */
+
+	/*  LD      (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (N-1) elements L(i)*D(i). */
+
+	/*  CLSTRT  (input) INT */
+	/*          The index of the first eigenvalue in the cluster. */
+
+	/*  CLEND   (input) INT */
+	/*          The index of the last eigenvalue in the cluster. */
+
+	/*  W       (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
+	/*          The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
+	/*          W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
+	/*          close eigenalues. */
+
+	/*  WGAP    (input/output) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
+	/*          The separation from the right neighbor eigenvalue in W. */
+
+	/*  WERR    (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
+	/*          WERR contain the semiwidth of the uncertainty */
+	/*          interval of the corresponding eigenvalue APPROXIMATION in W */
+
+	/*  SPDIAM (input) estimate of the spectral diameter obtained from the */
+	/*          Gerschgorin intervals */
+
+	/*  CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
+	/*          Set by the calling routine to protect against shifts too close */
+	/*          to eigenvalues outside the cluster. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum pivot allowed in the Sturm sequence. */
+
+	/*  SIGMA   (output) DOUBLE PRECISION */
+	/*          The shift used to form L(+) D(+) L(+)^T. */
+
+	/*  DPLUS   (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The N diagonal elements of the diagonal matrix D(+). */
+
+	/*  LPLUS   (output) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The first (N-1) elements of LPLUS contain the subdiagonal */
+	/*          elements of the unit bidiagonal matrix L(+). */
+
+	/*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
+	/*          Workspace. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--work;
+		--lplus;
+		--dplus;
+		--werr;
+		--wgap;
+		--w;
+		--ld;
+		--l;
+		--d__;
+
+		/* Function Body */
+		*info = 0;
+		fact = 2.;
+		eps = std::numeric_limits<FloatingType>::epsilon(); // eps = odmch_("Precision");
+		shift = 0;
+		forcer = FALSE_;
+	/*     Note that we cannot guarantee that for any of the shifts tried, */
+	/*     the factorization has a small or even moderate element growth. */
+	/*     There could be Ritz values at both ends of the cluster and despite */
+	/*     backing off, there are examples where all factorizations tried */
+	/*     (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
+	/*     element growth. */
+	/*     For this reason, we should use PIVMIN in this subroutine so that at */
+	/*     least the L D L^T factorization exists. It can be checked afterwards */
+	/*     whether the element growth caused bad residuals/orthogonality. */
+	/*     Decide whether the code should accept the best among all */
+	/*     representations despite large element growth or signal INFO=1 */
+		nofail = TRUE_;
+
+	/*     Compute the average gap length of the cluster */
+		clwdth = (d__1 = w[*clend] - w[*clstrt], fabs(d__1)) + werr[*clend] + werr[
+			*clstrt];
+		avgap = clwdth / (FloatingType) (*clend - *clstrt);
+		mingap = fmin(*clgapl,*clgapr);
+	/*     Initial values for shifts to both ends of cluster */
+	/* Computing MIN */
+		d__1 = w[*clstrt], d__2 = w[*clend];
+		lsigma = fmin(d__1,d__2) - werr[*clstrt];
+	/* Computing MAX */
+		d__1 = w[*clstrt], d__2 = w[*clend];
+		rsigma = fmax(d__1,d__2) + werr[*clend];
+	/*     Use a small fudge to make sure that we really shift to the outside */
+		lsigma -= fabs(lsigma) * 4. * eps;
+		rsigma += fabs(rsigma) * 4. * eps;
+	/*     Compute upper bounds for how much to back off the initial shifts */
+		ldmax = mingap * .25 + *pivmin * 2.;
+		rdmax = mingap * .25 + *pivmin * 2.;
+	/* Computing MAX */
+		d__1 = avgap, d__2 = wgap[*clstrt];
+		ldelta = fmax(d__1,d__2) / fact;
+	/* Computing MAX */
+		d__1 = avgap, d__2 = wgap[*clend - 1];
+		rdelta = fmax(d__1,d__2) / fact;
+
+	/*     Initialize the record of the best representation found */
+
+		// s = std::numeric_limits<FloatingType>::min(); // s = odmch_("S");
+		smlgrowth = std::numeric_limits<FloatingType>::max();
+		fail = (FloatingType) (*n - 1) * mingap / (*spdiam * eps);
+		fail2 = (FloatingType) (*n - 1) * mingap / (*spdiam * sqrt(eps));
+		bestshift = lsigma;
+
+	/*     while (KTRY <= KTRYMAX) */
+		ktry = 0;
+		growthbound = *spdiam * 8.;
+	L5:
+		sawnan1 = FALSE_;
+		sawnan2 = FALSE_;
+	/*     Ensure that we do not back off too much of the initial shifts */
+		ldelta = fmin(ldmax,ldelta);
+		rdelta = fmin(rdmax,rdelta);
+	/*     Compute the element growth when shifting to both ends of the cluster */
+	/*     accept the shift if there is no element growth at one of the two ends */
+	/*     Left end */
+		s = -lsigma;
+		dplus[1] = d__[1] + s;
+		if (fabs(dplus[1]) < *pivmin) {
+		dplus[1] = -(*pivmin);
+	/*        Need to set SAWNAN1 because refined RRR test should not be used */
+	/*        in this case */
+		sawnan1 = TRUE_;
+		}
+		max1 = fabs(dplus[1]);
+		i__1 = *n - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		lplus[i__] = ld[i__] / dplus[i__];
+		s = s * lplus[i__] * l[i__] - lsigma;
+		dplus[i__ + 1] = d__[i__ + 1] + s;
+		if ((d__1 = dplus[i__ + 1], fabs(d__1)) < *pivmin) {
+			dplus[i__ + 1] = -(*pivmin);
+	/*           Need to set SAWNAN1 because refined RRR test should not be used */
+	/*           in this case */
+			sawnan1 = TRUE_;
+		}
+	/* Computing MAX */
+		d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], fabs(d__1));
+		max1 = fmax(d__2,d__3);
+	/* L6: */
+		}
+		sawnan1 = sawnan1 || odnan(&max1);
+		if (forcer || (max1 <= growthbound && ! sawnan1)) {
+		*sigma = lsigma;
+		shift = 1;
+		goto L100;
+		}
+	/*     Right end */
+		s = -rsigma;
+		work[1] = d__[1] + s;
+		if (fabs(work[1]) < *pivmin) {
+		work[1] = -(*pivmin);
+	/*        Need to set SAWNAN2 because refined RRR test should not be used */
+	/*        in this case */
+		sawnan2 = TRUE_;
+		}
+		max2 = fabs(work[1]);
+		i__1 = *n - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		work[*n + i__] = ld[i__] / work[i__];
+		s = s * work[*n + i__] * l[i__] - rsigma;
+		work[i__ + 1] = d__[i__ + 1] + s;
+		if ((d__1 = work[i__ + 1], fabs(d__1)) < *pivmin) {
+			work[i__ + 1] = -(*pivmin);
+	/*           Need to set SAWNAN2 because refined RRR test should not be used */
+	/*           in this case */
+			sawnan2 = TRUE_;
+		}
+	/* Computing MAX */
+		d__2 = max2, d__3 = (d__1 = work[i__ + 1], fabs(d__1));
+		max2 = fmax(d__2,d__3);
+	/* L7: */
+		}
+		sawnan2 = sawnan2 || odnan(&max2);
+		if (forcer || (max2 <= growthbound && ! sawnan2)) {
+		*sigma = rsigma;
+		shift = 2;
+		goto L100;
+		}
+	/*     If we are at this point, both shifts led to too much element growth */
+	/*     Record the better of the two shifts (provided it didn't lead to NaN) */
+		if (sawnan1 && sawnan2) {
+	/*        both MAX1 and MAX2 are NaN */
+		goto L50;
+		} else {
+		if (! sawnan1) {
+			indx = 1;
+			if (max1 <= smlgrowth) {
+			smlgrowth = max1;
+			bestshift = lsigma;
+			}
+		}
+		if (! sawnan2) {
+			if (sawnan1 || max2 <= max1) {
+			indx = 2;
+			}
+			if (max2 <= smlgrowth) {
+			smlgrowth = max2;
+			bestshift = rsigma;
+			}
+		}
+		}
+	/*     If we are here, both the left and the right shift led to */
+	/*     element growth. If the element growth is moderate, then */
+	/*     we may still accept the representation, if it passes a */
+	/*     refined test for RRR. This test supposes that no NaN occurred. */
+	/*     Moreover, we use the refined RRR test only for isolated clusters. */
+		if (clwdth < mingap / 128. && fmin(max1,max2) < fail2 && ! sawnan1 && ! 
+			sawnan2) {
+		dorrr1 = TRUE_;
+		} else {
+		dorrr1 = FALSE_;
+		}
+		tryrrr1 = TRUE_;
+		if (tryrrr1 && dorrr1) {
+		if (indx == 1) {
+			tmp = (d__1 = dplus[*n], fabs(d__1));
+			znm2 = 1.;
+			prod = 1.;
+			oldp = 1.;
+			for (i__ = *n - 1; i__ >= 1; --i__) {
+			if (prod <= eps) {
+				prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
+					 work[*n + i__]) * oldp;
+			} else {
+				prod *= (d__1 = work[*n + i__], fabs(d__1));
+			}
+			oldp = prod;
+	/* Computing 2nd power */
+			d__1 = prod;
+			znm2 += d__1 * d__1;
+	/* Computing MAX */
+			d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, fabs(d__1));
+			tmp = fmax(d__2,d__3);
+	/* L15: */
+			}
+			rrr1 = tmp / (*spdiam * sqrt(znm2));
+			if (rrr1 <= 8.) {
+			*sigma = lsigma;
+			shift = 1;
+			goto L100;
+			}
+		} else if (indx == 2) {
+			tmp = (d__1 = work[*n], fabs(d__1));
+			znm2 = 1.;
+			prod = 1.;
+			oldp = 1.;
+			for (i__ = *n - 1; i__ >= 1; --i__) {
+			if (prod <= eps) {
+				prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * 
+					lplus[i__]) * oldp;
+			} else {
+				prod *= (d__1 = lplus[i__], fabs(d__1));
+			}
+			oldp = prod;
+	/* Computing 2nd power */
+			d__1 = prod;
+			znm2 += d__1 * d__1;
+	/* Computing MAX */
+			d__2 = tmp, d__3 = (d__1 = work[i__] * prod, fabs(d__1));
+			tmp = fmax(d__2,d__3);
+	/* L16: */
+			}
+			rrr2 = tmp / (*spdiam * sqrt(znm2));
+			if (rrr2 <= 8.) {
+			*sigma = rsigma;
+			shift = 2;
+			goto L100;
+			}
+		}
+		}
+	L50:
+		if (ktry < 1) {
+	/*        If we are here, both shifts failed also the RRR test. */
+	/*        Back off to the outside */
+	/* Computing MAX */
+		d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
+		lsigma = fmax(d__1,d__2);
+	/* Computing MIN */
+		d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
+		rsigma = fmin(d__1,d__2);
+		ldelta *= 2.;
+		rdelta *= 2.;
+		++ktry;
+		goto L5;
+		} else {
+	/*        None of the representations investigated satisfied our */
+	/*        criteria. Take the best one we found. */
+		if (smlgrowth < fail || nofail) {
+			lsigma = bestshift;
+			rsigma = bestshift;
+			forcer = TRUE_;
+			goto L5;
+		} else {
+			*info = 1;
+			return 0;
+		}
+		}
+	L100:
+		if (shift == 1) {
+		} else if (shift == 2) {
+	/*        store new L and D back into DPLUS, LPLUS */
+		blas::odcpy(n, &work[1], &c__1, &dplus[1], &c__1);
+		i__1 = *n - 1;
+		blas::odcpy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
+		}
+		return 0;
+
+	/*     End of ODRRF */
+
+	} /* odrrf_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrj.hpp b/external/pmrrr/include/pmrrr/lapack/odrrj.hpp
new file mode 100644
index 0000000000..f636e85f86
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrj.hpp
@@ -0,0 +1,348 @@
+/**
+	C++ template version of LAPACK routine dlarrj.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRJ_HPP__
+#define __ODRRJ_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrj(int *n, FloatingType *d__, FloatingType *e2, 
+		int *ifirst, int *ilast, FloatingType *rtol, int *offset, 
+		FloatingType *w, FloatingType *werr, FloatingType *work, int *iwork, 
+		FloatingType *pivmin, FloatingType *spdiam, int *info)
+	{
+		/* System generated locals */
+		int i__1, i__2;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType log(FloatingType);
+
+		/* Local variables */
+		int i__, j, k, p;
+		FloatingType s;
+		int i1, i2, ii;
+		FloatingType fac, mid;
+		int cnt;
+		FloatingType tmp, left;
+		int iter, nint, prev, next, savi1;
+		FloatingType right, width, dplus;
+		int olnint, maxitr;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  Given the initial eigenvalue approximations of T, ODRRJ */
+	/*  does  bisection to refine the eigenvalues of T, */
+	/*  W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
+	/*  guesses for these eigenvalues are input in W, the corresponding estimate */
+	/*  of the error in these guesses in WERR. During bisection, intervals */
+	/*  [left, right] are maintained by storing their mid-points and */
+	/*  semi-widths in the arrays W and WERR respectively. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The N diagonal elements of T. */
+
+	/*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The Squares of the (N-1) subdiagonal elements of T. */
+
+	/*  IFIRST  (input) INT */
+	/*          The index of the first eigenvalue to be computed. */
+
+	/*  ILAST   (input) INT */
+	/*          The index of the last eigenvalue to be computed. */
+
+	/*  RTOL   (input) DOUBLE PRECISION */
+	/*          Tolerance for the convergence of the bisection intervals. */
+	/*          An interval [LEFT,RIGHT] has converged if */
+	/*          RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */
+
+	/*  OFFSET  (input) INT */
+	/*          Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */
+	/*          through ILAST-OFFSET elements of these arrays are to be used. */
+
+	/*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
+	/*          estimates of the eigenvalues of L D L^T indexed IFIRST through */
+	/*          ILAST. */
+	/*          On output, these estimates are refined. */
+
+	/*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
+	/*          the errors in the estimates of the corresponding elements in W. */
+	/*          On output, these errors are refined. */
+
+	/*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
+	/*          Workspace. */
+
+	/*  IWORK   (workspace) INT array, dimension (2*N) */
+	/*          Workspace. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum pivot in the Sturm sequence for T. */
+
+	/*  SPDIAM  (input) DOUBLE PRECISION */
+	/*          The spectral diameter of T. */
+
+	/*  INFO    (output) INT */
+	/*          Error flag. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--iwork;
+		--work;
+		--werr;
+		--w;
+		--e2;
+		--d__;
+
+		/* Function Body */
+		*info = 0;
+
+		maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + 
+			2;
+
+	/*     Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
+	/*     The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
+	/*     Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */
+	/*     for an unconverged interval is set to the index of the next unconverged */
+	/*     interval, and is -1 or 0 for a converged interval. Thus a linked */
+	/*     list of unconverged intervals is set up. */
+
+		i1 = *ifirst;
+		i2 = *ilast;
+	/*     The number of unconverged intervals */
+		nint = 0;
+	/*     The last unconverged interval found */
+		prev = 0;
+		i__1 = i2;
+		for (i__ = i1; i__ <= i__1; ++i__) {
+		k = i__ << 1;
+		ii = i__ - *offset;
+		left = w[ii] - werr[ii];
+		mid = w[ii];
+		right = w[ii] + werr[ii];
+		width = right - mid;
+	/* Computing MAX */
+		d__1 = fabs(left), d__2 = fabs(right);
+		tmp = fmax(d__1,d__2);
+	/*        The following test prevents the test of converged intervals */
+		if (width < *rtol * tmp) {
+	/*           This interval has already converged and does not need refinement. */
+	/*           (Note that the gaps might change through refining the */
+	/*            eigenvalues, however, they can only get bigger.) */
+	/*           Remove it from the list. */
+			iwork[k - 1] = -1;
+	/*           Make sure that I1 always points to the first unconverged interval */
+			if (i__ == i1 && i__ < i2) {
+			i1 = i__ + 1;
+			}
+			if (prev >= i1 && i__ <= i2) {
+			iwork[(prev << 1) - 1] = i__ + 1;
+			}
+		} else {
+	/*           unconverged interval found */
+			prev = i__;
+	/*           Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
+
+	/*           Do while( CNT(LEFT).GT.I-1 ) */
+
+			fac = 1.;
+	L20:
+			cnt = 0;
+			s = left;
+			dplus = d__[1] - s;
+			if (dplus < 0.) {
+			++cnt;
+			}
+			i__2 = *n;
+			for (j = 2; j <= i__2; ++j) {
+			dplus = d__[j] - s - e2[j - 1] / dplus;
+			if (dplus < 0.) {
+				++cnt;
+			}
+	/* L30: */
+			}
+			if (cnt > i__ - 1) {
+			left -= werr[ii] * fac;
+			fac *= 2.;
+			goto L20;
+			}
+
+	/*           Do while( CNT(RIGHT).LT.I ) */
+
+			fac = 1.;
+	L50:
+			cnt = 0;
+			s = right;
+			dplus = d__[1] - s;
+			if (dplus < 0.) {
+			++cnt;
+			}
+			i__2 = *n;
+			for (j = 2; j <= i__2; ++j) {
+			dplus = d__[j] - s - e2[j - 1] / dplus;
+			if (dplus < 0.) {
+				++cnt;
+			}
+	/* L60: */
+			}
+			if (cnt < i__) {
+			right += werr[ii] * fac;
+			fac *= 2.;
+			goto L50;
+			}
+			++nint;
+			iwork[k - 1] = i__ + 1;
+			iwork[k] = cnt;
+		}
+		work[k - 1] = left;
+		work[k] = right;
+	/* L75: */
+		}
+		savi1 = i1;
+
+	/*     Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
+	/*     and while (ITER.LT.MAXITR) */
+
+		iter = 0;
+	L80:
+		prev = i1 - 1;
+		i__ = i1;
+		olnint = nint;
+		i__1 = olnint;
+		for (p = 1; p <= i__1; ++p) {
+		k = i__ << 1;
+		ii = i__ - *offset;
+		next = iwork[k - 1];
+		left = work[k - 1];
+		right = work[k];
+		mid = (left + right) * .5;
+	/*        semiwidth of interval */
+		width = right - mid;
+	/* Computing MAX */
+		d__1 = fabs(left), d__2 = fabs(right);
+		tmp = fmax(d__1,d__2);
+		if (width < *rtol * tmp || iter == maxitr) {
+	/*           reduce number of unconverged intervals */
+			--nint;
+	/*           Mark interval as converged. */
+			iwork[k - 1] = 0;
+			if (i1 == i__) {
+			i1 = next;
+			} else {
+	/*              Prev holds the last unconverged interval previously examined */
+			if (prev >= i1) {
+				iwork[(prev << 1) - 1] = next;
+			}
+			}
+			i__ = next;
+			goto L100;
+		}
+		prev = i__;
+
+	/*        Perform one bisection step */
+
+		cnt = 0;
+		s = mid;
+		dplus = d__[1] - s;
+		if (dplus < 0.) {
+			++cnt;
+		}
+		i__2 = *n;
+		for (j = 2; j <= i__2; ++j) {
+			dplus = d__[j] - s - e2[j - 1] / dplus;
+			if (dplus < 0.) {
+			++cnt;
+			}
+	/* L90: */
+		}
+		if (cnt <= i__ - 1) {
+			work[k - 1] = mid;
+		} else {
+			work[k] = mid;
+		}
+		i__ = next;
+	L100:
+		;
+		}
+		++iter;
+	/*     do another loop if there are still unconverged intervals */
+	/*     However, in the last iteration, all intervals are accepted */
+	/*     since this is the best we can do. */
+		if (nint > 0 && iter <= maxitr) {
+		goto L80;
+		}
+
+
+	/*     At this point, all the intervals have converged */
+		i__1 = *ilast;
+		for (i__ = savi1; i__ <= i__1; ++i__) {
+		k = i__ << 1;
+		ii = i__ - *offset;
+	/*        All intervals marked by '0' have been refined. */
+		if (iwork[k - 1] == 0) {
+			w[ii] = (work[k - 1] + work[k]) * .5;
+			werr[ii] = work[k] - w[ii];
+		}
+	/* L110: */
+		}
+
+		return 0;
+
+	/*     End of ODRRJ */
+
+	} /* odrrj_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrk.hpp b/external/pmrrr/include/pmrrr/lapack/odrrk.hpp
new file mode 100644
index 0000000000..11e6f8a01c
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrk.hpp
@@ -0,0 +1,204 @@
+/**
+	C++ template version of LAPACK routine dlarrk.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRK_HPP__
+#define __ODRRK_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrk(int *n, int *iw, FloatingType *gl, 
+		FloatingType *gu, FloatingType *d__, FloatingType *e2, FloatingType *pivmin, 
+		FloatingType *reltol, FloatingType *w, FloatingType *werr, int *info)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType log(FloatingType);
+
+		/* Local variables */
+		int i__, it;
+		FloatingType mid, eps, tmp1, tmp2, left, atoli, right;
+		int itmax;
+		FloatingType rtoli, tnorm;
+		// extern FloatingType odmch_(char *);
+		int negcnt;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODRRK computes one eigenvalue of a symmetric tridiagonal */
+	/*  matrix T to suitable accuracy. This is an auxiliary code to be */
+	/*  called from DSTEMR. */
+
+	/*  To avoid overflow, the matrix must be scaled so that its */
+	/*  largest element is no greater than overflow**(1/2) * */
+	/*  underflow**(1/4) in absolute value, and for greatest */
+	/*  accuracy, it should not be much smaller than that. */
+
+	/*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
+	/*  Matrix", Report CS41, Computer Science Dept., Stanford */
+	/*  University, July 21, 1966. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the tridiagonal matrix T.  N >= 0. */
+
+	/*  IW      (input) INT */
+	/*          The index of the eigenvalues to be returned. */
+
+	/*  GL      (input) DOUBLE PRECISION */
+	/*  GU      (input) DOUBLE PRECISION */
+	/*          An upper and a lower bound on the eigenvalue. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The n diagonal elements of the tridiagonal matrix T. */
+
+	/*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
+	/*          The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */
+
+	/*  PIVMIN  (input) DOUBLE PRECISION */
+	/*          The minimum pivot allowed in the Sturm sequence for T. */
+
+	/*  RELTOL  (input) DOUBLE PRECISION */
+	/*          The minimum relative width of an interval.  When an interval */
+	/*          is narrower than RELTOL times the larger (in */
+	/*          magnitude) endpoint, then it is considered to be */
+	/*          sufficiently small, i.e., converged.  Note: this should */
+	/*          always be at least radix*machine epsilon. */
+
+	/*  W       (output) DOUBLE PRECISION */
+
+	/*  WERR    (output) DOUBLE PRECISION */
+	/*          The error bound on the corresponding eigenvalue approximation */
+	/*          in W. */
+
+	/*  INFO    (output) INT */
+	/*          = 0:       Eigenvalue converged */
+	/*          = -1:      Eigenvalue did NOT converge */
+
+	/*  Internal Parameters */
+	/*  =================== */
+
+	/*  FUDGE   DOUBLE PRECISION, default = 2 */
+	/*          A "fudge factor" to widen the Gershgorin intervals. */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     Get machine constants */
+		/* Parameter adjustments */
+		--e2;
+		--d__;
+
+		/* Function Body */
+		eps = std::numeric_limits<FloatingType>::epsilon(); // odmch_("P");
+	/* Computing MAX */
+		d__1 = fabs(*gl), d__2 = fabs(*gu);
+		tnorm = fmax(d__1,d__2);
+		rtoli = *reltol;
+		atoli = *pivmin * 4.;
+		itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2;
+		*info = -1;
+		left = *gl - tnorm * 2. * eps * *n - *pivmin * 4.;
+		right = *gu + tnorm * 2. * eps * *n + *pivmin * 4.;
+		it = 0;
+	L10:
+
+	/*     Check if interval converged or maximum number of iterations reached */
+
+		tmp1 = (d__1 = right - left, fabs(d__1));
+	/* Computing MAX */
+		d__1 = fabs(right), d__2 = fabs(left);
+		tmp2 = fmax(d__1,d__2);
+	/* Computing MAX */
+		d__1 = fmax(atoli,*pivmin), d__2 = rtoli * tmp2;
+		if (tmp1 < fmax(d__1,d__2)) {
+		*info = 0;
+		goto L30;
+		}
+		if (it > itmax) {
+		goto L30;
+		}
+
+	/*     Count number of negative pivots for mid-point */
+
+		++it;
+		mid = (left + right) * .5;
+		negcnt = 0;
+		tmp1 = d__[1] - mid;
+		if (fabs(tmp1) < *pivmin) {
+		tmp1 = -(*pivmin);
+		}
+		if (tmp1 <= 0.) {
+		++negcnt;
+		}
+
+		i__1 = *n;
+		for (i__ = 2; i__ <= i__1; ++i__) {
+		tmp1 = d__[i__] - e2[i__ - 1] / tmp1 - mid;
+		if (fabs(tmp1) < *pivmin) {
+			tmp1 = -(*pivmin);
+		}
+		if (tmp1 <= 0.) {
+			++negcnt;
+		}
+	/* L20: */
+		}
+		if (negcnt >= *iw) {
+		right = mid;
+		} else {
+		left = mid;
+		}
+		goto L10;
+	L30:
+
+	/*     Converged or maximum number of iterations reached */
+
+		*w = (left + right) * .5;
+		*werr = (d__1 = right - left, fabs(d__1)) * .5;
+		return 0;
+
+	/*     End of ODRRK */
+
+	} /* odrrk_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrr.hpp b/external/pmrrr/include/pmrrr/lapack/odrrr.hpp
new file mode 100644
index 0000000000..ce9f0d287b
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrr.hpp
@@ -0,0 +1,190 @@
+/**
+	C++ template version of LAPACK routine dlarrr.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRR_HPP__
+#define __ODRRR_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrr(int *n, FloatingType *d__, FloatingType *e, 
+		int *info)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		int i__;
+		FloatingType eps, tmp, tmp2, rmin;
+		// extern FloatingType odmch_(char *);
+		FloatingType offdig, safmin;
+		int yesrel;
+		FloatingType smlnum, offdig2;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  Perform tests to decide whether the symmetric tridiagonal matrix T */
+	/*  warrants expensive computations which guarantee high relative accuracy */
+	/*  in the eigenvalues. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix. N > 0. */
+
+	/*  D       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The N diagonal elements of the tridiagonal matrix T. */
+
+	/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the first (N-1) entries contain the subdiagonal */
+	/*          elements of the tridiagonal matrix T; E(N) is set to ZERO. */
+
+	/*  INFO    (output) INT */
+	/*          INFO = 0(default) : the matrix warrants computations preserving */
+	/*                              relative accuracy. */
+	/*          INFO = 1          : the matrix warrants computations guaranteeing */
+	/*                              only absolute accuracy. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     As a default, do NOT go for relative-accuracy preserving computations. */
+		/* Parameter adjustments */
+		--e;
+		--d__;
+
+		/* Function Body */
+		*info = 1;
+		safmin = std::numeric_limits<FloatingType>::min(); // safmin = odmch_("Safe minimum");
+		eps = std::numeric_limits<FloatingType>::epsilon(); // eps = odmch_("Precision");
+		smlnum = safmin / eps;
+		rmin = sqrt(smlnum);
+	/*     Tests for relative accuracy */
+
+	/*     Test for scaled diagonal dominance */
+	/*     Scale the diagonal entries to one and check whether the sum of the */
+	/*     off-diagonals is less than one */
+
+	/*     The sdd relative error bounds have a 1/(1- 2*x) factor in them, */
+	/*     x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative */
+	/*     accuracy is promised.  In the notation of the code fragment below, */
+	/*     1/(1 - (OFFDIG + OFFDIG2)) is the condition number. */
+	/*     We don't think it is worth going into "sdd mode" unless the relative */
+	/*     condition number is reasonable, not 1/macheps. */
+	/*     The threshold should be compatible with other thresholds used in the */
+	/*     code. We set  OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds */
+	/*     to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 */
+	/*     instead of the current OFFDIG + OFFDIG2 < 1 */
+
+		yesrel = TRUE_;
+		offdig = 0.;
+		tmp = sqrt((fabs(d__[1])));
+		if (tmp < rmin) {
+		yesrel = FALSE_;
+		}
+		if (! yesrel) {
+		goto L11;
+		}
+		i__1 = *n;
+		for (i__ = 2; i__ <= i__1; ++i__) {
+		tmp2 = sqrt((d__1 = d__[i__], fabs(d__1)));
+		if (tmp2 < rmin) {
+			yesrel = FALSE_;
+		}
+		if (! yesrel) {
+			goto L11;
+		}
+		offdig2 = (d__1 = e[i__ - 1], fabs(d__1)) / (tmp * tmp2);
+		if (offdig + offdig2 >= .999) {
+			yesrel = FALSE_;
+		}
+		if (! yesrel) {
+			goto L11;
+		}
+		tmp = tmp2;
+		offdig = offdig2;
+	/* L10: */
+		}
+	L11:
+		if (yesrel) {
+		*info = 0;
+		return 0;
+		} else {
+		}
+
+
+	/*     *** MORE TO BE IMPLEMENTED *** */
+
+
+	/*     Test if the lower bidiagonal matrix L from T = L D L^T */
+	/*     (zero shift facto) is well conditioned */
+
+
+	/*     Test if the upper bidiagonal matrix U from T = U D U^T */
+	/*     (zero shift facto) is well conditioned. */
+	/*     In this case, the matrix needs to be flipped and, at the end */
+	/*     of the eigenvector computation, the flip needs to be applied */
+	/*     to the computed eigenvectors (and the support) */
+
+
+		return 0;
+
+	/*     END OF ODRRR */
+
+	} /* odrrr_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odrrv.hpp b/external/pmrrr/include/pmrrr/lapack/odrrv.hpp
new file mode 100644
index 0000000000..63695dc78c
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odrrv.hpp
@@ -0,0 +1,1012 @@
+/**
+	C++ template version of LAPACK routine dlarrv.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRRV_HPP__
+#define __ODRRV_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odset.hpp>
+#include <pmrrr/lapack/odr1v.hpp>
+#include <pmrrr/lapack/odrrf.hpp>
+#include <pmrrr/lapack/odrrb.hpp>
+#include <pmrrr/blas/odcpy.hpp>
+#include <pmrrr/blas/odscal.hpp>
+
+#define imax(a,b) ( (a) > (b) ? (a) : (b) )
+#define imin(a,b) ( (a) < (b) ? (a) : (b) )
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odrrv(int *n, FloatingType *vl, FloatingType *vu, 
+		FloatingType *d__, FloatingType *l, FloatingType *pivmin, int *isplit, 
+		int *m, int *dol, int *dou, FloatingType *minrgp, 
+		FloatingType *rtol1, FloatingType *rtol2, FloatingType *w, FloatingType *werr, 
+		 FloatingType *wgap, int *iblock, int *indexw, FloatingType *gers, 
+		 FloatingType *z__, int *ldz, int *isuppz, FloatingType *work, 
+		int *iwork, int *info)
+	{
+
+		/* Table of constant values */
+		//TODO 
+		static FloatingType c_b5 = 0.;
+		static int c__1 = 1;
+		static int c__2 = 2;
+
+		/* System generated locals */
+		int z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+		FloatingType d__1, d__2;
+		int L__1;
+
+		/* Builtin functions */
+		// FloatingType log(FloatingType);
+
+		/* Local variables */
+		int minwsize, i__, j, k, p, q, miniwsize, ii;
+		FloatingType gl;
+		int im, in;
+		FloatingType gu, gap, eps, tau, tol, tmp;
+		int zto;
+		FloatingType ztz;
+		int iend, jblk;
+		FloatingType lgap;
+		int done;
+		FloatingType rgap, left;
+		int wend, iter;
+		FloatingType bstw;
+		int itmp1;
+	//    extern /* Subroutine */ int odscl_(int *, FloatingType *, FloatingType *, 
+	//	    int *);
+		int indld;
+		FloatingType fudge;
+		int idone;
+		FloatingType sigma;
+		int iinfo, iindr;
+		FloatingType resid;
+		int eskip;
+		FloatingType right;
+	//    extern /* Subroutine */ int odcpy_(int *, FloatingType *, int *, 
+	//	    FloatingType *, int *);
+		int nclus, zfrom;
+		FloatingType rqtol;
+		int iindc1, iindc2;
+		//extern /* Subroutine */ int odr1v_(int *, int *, int *, 
+			/*FloatingType *, FloatingType *, FloatingType *, FloatingType *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *, int *, 
+			 int *, FloatingType *, FloatingType *, int *, int *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *);*/
+		int stp2ii;
+		FloatingType lambda;
+		// extern FloatingType odmch_(char *);
+		int ibegin, indeig;
+		int needbs;
+		int indlld;
+		FloatingType sgndef, mingma;
+		//extern /* Subroutine */ int odrrb_(int *, FloatingType *, FloatingType *, 
+			/* int *, int *, FloatingType *, FloatingType *, int *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *, int *, 
+			 FloatingType *, FloatingType *, int *, int *);*/
+		int oldien, oldncl, wbegin;
+		FloatingType spdiam;
+		int negcnt;
+		//extern /* Subroutine */ int odrrf_(int *, FloatingType *, FloatingType *, 
+			/* FloatingType *, int *, int *, FloatingType *, FloatingType *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *, 
+			FloatingType *, int *);*/
+		int oldcls;
+		FloatingType savgap;
+		int ndepth;
+		FloatingType ssigma;
+		//extern /* Subroutine */ int odset_(char *, int *, int *, 
+			//FloatingType *, FloatingType *, FloatingType *, int *);
+		int usedbs;
+		int iindwk, offset;
+		FloatingType gaptol;
+		int newcls, oldfst, indwrk, windex, oldlst;
+		int usedrq;
+		int newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
+		FloatingType bstres;
+		int newsiz, zusedu, zusedw;
+		FloatingType nrminv, rqcorr;
+		int tryrqc;
+		int isupmx;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODRRV computes the eigenvectors of the tridiagonal matrix */
+	/*  T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
+	/*  The input eigenvalues should have been computed by DLARRE. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix.  N >= 0. */
+
+	/*  VL      (input) DOUBLE PRECISION */
+	/*  VU      (input) DOUBLE PRECISION */
+	/*          Lower and upper bounds of the interval that contains the desired */
+	/*          eigenvalues. VL < VU. Needed to compute gaps on the left or right */
+	/*          end of the extremal eigenvalues in the desired RANGE. */
+
+	/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the N diagonal elements of the diagonal matrix D. */
+	/*          On exit, D may be overwritten. */
+
+	/*  L       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the (N-1) subdiagonal elements of the unit */
+	/*          bidiagonal matrix L are in elements 1 to N-1 of L */
+	/*          (if the matrix is not splitted.) At the end of each block */
+	/*          is stored the corresponding shift as given by DLARRE. */
+	/*          On exit, L is overwritten. */
+
+	/*  PIVMIN  (in) DOUBLE PRECISION */
+	/*          The minimum pivot allowed in the Sturm sequence. */
+
+	/*  ISPLIT  (input) INT array, dimension (N) */
+	/*          The splitting points, at which T breaks up into blocks. */
+	/*          The first block consists of rows/columns 1 to */
+	/*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
+	/*          through ISPLIT( 2 ), etc. */
+
+	/*  M       (input) INT */
+	/*          The total number of input eigenvalues.  0 <= M <= N. */
+
+	/*  DOL     (input) INT */
+	/*  DOU     (input) INT */
+	/*          If the user wants to compute only selected eigenvectors from all */
+	/*          the eigenvalues supplied, he can specify an index range DOL:DOU. */
+	/*          Or else the setting DOL=1, DOU=M should be applied. */
+	/*          Note that DOL and DOU refer to the order in which the eigenvalues */
+	/*          are stored in W. */
+	/*          If the user wants to compute only selected eigenpairs, then */
+	/*          the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
+	/*          computed eigenvectors. All other columns of Z are set to zero. */
+
+	/*  MINRGP  (input) DOUBLE PRECISION */
+
+	/*  RTOL1   (input) DOUBLE PRECISION */
+	/*  RTOL2   (input) DOUBLE PRECISION */
+	/*           Parameters for bisection. */
+	/*           An interval [LEFT,RIGHT] has converged if */
+	/*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
+
+	/*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          The first M elements of W contain the APPROXIMATE eigenvalues for */
+	/*          which eigenvectors are to be computed.  The eigenvalues */
+	/*          should be grouped by split-off block and ordered from */
+	/*          smallest to largest within the block ( The output array */
+	/*          W from DLARRE is expected here ). Furthermore, they are with */
+	/*          respect to the shift of the corresponding root representation */
+	/*          for their block. On exit, W holds the eigenvalues of the */
+	/*          UNshifted matrix. */
+
+	/*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          The first M elements contain the semiwidth of the uncertainty */
+	/*          interval of the corresponding eigenvalue in W */
+
+	/*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          The separation from the right neighbor eigenvalue in W. */
+
+	/*  IBLOCK  (input) INT array, dimension (N) */
+	/*          The indices of the blocks (submatrices) associated with the */
+	/*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
+	/*          W(i) belongs to the first block from the top, =2 if W(i) */
+	/*          belongs to the second block, etc. */
+
+	/*  INDEXW  (input) INT array, dimension (N) */
+	/*          The indices of the eigenvalues within each block (submatrix); */
+	/*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
+	/*          i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
+
+	/*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) */
+	/*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
+	/*          is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
+	/*          be computed from the original UNshifted matrix. */
+
+	/*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
+	/*          If INFO = 0, the first M columns of Z contain the */
+	/*          orthonormal eigenvectors of the matrix T */
+	/*          corresponding to the input eigenvalues, with the i-th */
+	/*          column of Z holding the eigenvector associated with W(i). */
+	/*          Note: the user must ensure that at least max(1,M) columns are */
+	/*          supplied in the array Z. */
+
+	/*  LDZ     (input) INT */
+	/*          The leading dimension of the array Z.  LDZ >= 1, and if */
+	/*          JOBZ = 'V', LDZ >= max(1,N). */
+
+	/*  ISUPPZ  (output) INT array, dimension ( 2*max(1,M) ) */
+	/*          The support of the eigenvectors in Z, i.e., the indices */
+	/*          indicating the nonzero elements in Z. The I-th eigenvector */
+	/*          is nonzero only in elements ISUPPZ( 2*I-1 ) through */
+	/*          ISUPPZ( 2*I ). */
+
+	/*  WORK    (workspace) DOUBLE PRECISION array, dimension (12*N) */
+
+	/*  IWORK   (workspace) INT array, dimension (7*N) */
+
+	/*  INFO    (output) INT */
+	/*          = 0:  successful exit */
+
+	/*          > 0:  A problem occured in ODRRV. */
+	/*          < 0:  One of the called subroutines signaled an internal problem. */
+	/*                Needs inspection of the corresponding parameter IINFO */
+	/*                for further information. */
+
+	/*          =-1:  Problem in ODRRB when refining a child's eigenvalues. */
+	/*          =-2:  Problem in ODRRF when computing the RRR of a child. */
+	/*                When a child is inside a tight cluster, it can be difficult */
+	/*                to find an RRR. A partial remedy from the user's point of */
+	/*                view is to make the parameter MINRGP smaller and recompile. */
+	/*                However, as the orthogonality of the computed vectors is */
+	/*                proportional to 1/MINRGP, the user should be aware that */
+	/*                he might be trading in precision when he decreases MINRGP. */
+	/*          =-3:  Problem in ODRRB when refining a single eigenvalue */
+	/*                after the Rayleigh correction was rejected. */
+	/*          = 5:  The Rayleigh Quotient Iteration failed to converge to */
+	/*                full accuracy in MAXITR steps. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+	/*     .. */
+	/*     The first N entries of WORK are reserved for the eigenvalues */
+		/* Parameter adjustments */
+		--d__;
+		--l;
+		--isplit;
+		--w;
+		--werr;
+		--wgap;
+		--iblock;
+		--indexw;
+		--gers;
+		z_dim1 = *ldz;
+		z_offset = 1 + z_dim1;
+		z__ -= z_offset;
+		--isuppz;
+		--work;
+		--iwork;
+
+		/* Function Body */
+		indld = *n + 1;
+		indlld = (*n << 1) + 1;
+		indwrk = *n * 3 + 1;
+		minwsize = *n * 12;
+		i__1 = minwsize;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.;
+	/* L5: */
+		}
+	/*     IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
+	/*     factorization used to compute the FP vector */
+		iindr = 0;
+	/*     IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
+	/*     layer and the one above. */
+		iindc1 = *n;
+		iindc2 = *n << 1;
+		iindwk = *n * 3 + 1;
+		miniwsize = *n * 7;
+		i__1 = miniwsize;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		iwork[i__] = 0;
+	/* L10: */
+		}
+		zusedl = 1;
+		if (*dol > 1) {
+	/*        Set lower bound for use of Z */
+		zusedl = *dol - 1;
+		}
+		zusedu = *m;
+		if (*dou < *m) {
+	/*        Set lower bound for use of Z */
+		zusedu = *dou + 1;
+		}
+	/*     The width of the part of Z that is used */
+		zusedw = zusedu - zusedl + 1;
+		odset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
+		eps = std::numeric_limits<FloatingType>::epsilon(); // eps = odmch_("Precision");
+		rqtol = eps * 2.;
+
+	/*     Set expert flags for standard code. */
+		tryrqc = TRUE_;
+		if (*dol == 1 && *dou == *m) {
+		} else {
+	/*        Only selected eigenpairs are computed. Since the other evalues */
+	/*        are not refined by RQ iteration, bisection has to compute to full */
+	/*        accuracy. */
+		*rtol1 = eps * 4.;
+		*rtol2 = eps * 4.;
+		}
+	/*     The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
+	/*     desired eigenvalues. The support of the nonzero eigenvector */
+	/*     entries is contained in the interval IBEGIN:IEND. */
+	/*     Remark that if k eigenpairs are desired, then the eigenvectors */
+	/*     are stored in k contiguous columns of Z. */
+	/*     DONE is the number of eigenvectors already computed */
+		done = 0;
+		ibegin = 1;
+		wbegin = 1;
+		i__1 = iblock[*m];
+		for (jblk = 1; jblk <= i__1; ++jblk) {
+		iend = isplit[jblk];
+		sigma = l[iend];
+	/*        Find the eigenvectors of the submatrix indexed IBEGIN */
+	/*        through IEND. */
+		wend = wbegin - 1;
+	L15:
+		if (wend < *m) {
+			if (iblock[wend + 1] == jblk) {
+			++wend;
+			goto L15;
+			}
+		}
+		if (wend < wbegin) {
+			ibegin = iend + 1;
+			goto L170;
+		} else if (wend < *dol || wbegin > *dou) {
+			ibegin = iend + 1;
+			wbegin = wend + 1;
+			goto L170;
+		}
+	/*        Find local spectral diameter of the block */
+		gl = gers[(ibegin << 1) - 1];
+		gu = gers[ibegin * 2];
+		i__2 = iend;
+		for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
+	/* Computing MIN */
+			d__1 = gers[(i__ << 1) - 1];
+			gl = fmin(d__1,gl);
+	/* Computing MAX */
+			d__1 = gers[i__ * 2];
+			gu = fmax(d__1,gu);
+	/* L20: */
+		}
+		spdiam = gu - gl;
+	/*        OLDIEN is the last index of the previous block */
+		oldien = ibegin - 1;
+	/*        Calculate the size of the current block */
+		in = iend - ibegin + 1;
+	/*        The number of eigenvalues in the current block */
+		im = wend - wbegin + 1;
+	/*        This is for a 1x1 block */
+		if (ibegin == iend) {
+			++done;
+			z__[ibegin + wbegin * z_dim1] = 1.;
+			isuppz[(wbegin << 1) - 1] = ibegin;
+			isuppz[wbegin * 2] = ibegin;
+			w[wbegin] += sigma;
+			work[wbegin] = w[wbegin];
+			ibegin = iend + 1;
+			++wbegin;
+			goto L170;
+		}
+	/*        The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
+	/*        Note that these can be approximations, in this case, the corresp. */
+	/*        entries of WERR give the size of the uncertainty interval. */
+	/*        The eigenvalue approximations will be refined when necessary as */
+	/*        high relative accuracy is required for the computation of the */
+	/*        corresponding eigenvectors. */
+		blas::odcpy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
+	/*        We store in W the eigenvalue approximations w.r.t. the original */
+	/*        matrix T. */
+		i__2 = im;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+			w[wbegin + i__ - 1] += sigma;
+	/* L30: */
+		}
+	/*        NDEPTH is the current depth of the representation tree */
+		ndepth = 0;
+	/*        PARITY is either 1 or 0 */
+		parity = 1;
+	/*        NCLUS is the number of clusters for the next level of the */
+	/*        representation tree, we start with NCLUS = 1 for the root */
+		nclus = 1;
+		iwork[iindc1 + 1] = 1;
+		iwork[iindc1 + 2] = im;
+	/*        IDONE is the number of eigenvectors already computed in the current */
+	/*        block */
+		idone = 0;
+	/*        loop while( IDONE.LT.IM ) */
+	/*        generate the representation tree for the current block and */
+	/*        compute the eigenvectors */
+	L40:
+		if (idone < im) {
+	/*           This is a crude protection against infinitely deep trees */
+			if (ndepth > *m) {
+			*info = -2;
+			return 0;
+			}
+	/*           breadth first processing of the current level of the representation */
+	/*           tree: OLDNCL = number of clusters on current level */
+			oldncl = nclus;
+	/*           reset NCLUS to count the number of child clusters */
+			nclus = 0;
+
+			parity = 1 - parity;
+			if (parity == 0) {
+			oldcls = iindc1;
+			newcls = iindc2;
+			} else {
+			oldcls = iindc2;
+			newcls = iindc1;
+			}
+	/*           Process the clusters on the current level */
+			i__2 = oldncl;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			j = oldcls + (i__ << 1);
+	/*              OLDFST, OLDLST = first, last index of current cluster. */
+	/*                               cluster indices start with 1 and are relative */
+	/*                               to WBEGIN when accessing W, WGAP, WERR, Z */
+			oldfst = iwork[j - 1];
+			oldlst = iwork[j];
+			if (ndepth > 0) {
+	/*                 Retrieve relatively robust representation (RRR) of cluster */
+	/*                 that has been computed at the previous level */
+	/*                 The RRR is stored in Z and overwritten once the eigenvectors */
+	/*                 have been computed or when the cluster is refined */
+				if (*dol == 1 && *dou == *m) {
+	/*                    Get representation from location of the leftmost evalue */
+	/*                    of the cluster */
+				j = wbegin + oldfst - 1;
+				} else {
+				if (wbegin + oldfst - 1 < *dol) {
+	/*                       Get representation from the left end of Z array */
+					j = *dol - 1;
+				} else if (wbegin + oldfst - 1 > *dou) {
+	/*                       Get representation from the right end of Z array */
+					j = *dou;
+				} else {
+					j = wbegin + oldfst - 1;
+				}
+				}
+				blas::odcpy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
+	, &c__1);
+				i__3 = in - 1;
+				blas::odcpy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
+					ibegin], &c__1);
+				sigma = z__[iend + (j + 1) * z_dim1];
+	/*                 Set the corresponding entries in Z to zero */
+				odset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j 
+					* z_dim1], ldz);
+			}
+	/*              Compute DL and DLL of current RRR */
+			i__3 = iend - 1;
+			for (j = ibegin; j <= i__3; ++j) {
+				tmp = d__[j] * l[j];
+				work[indld - 1 + j] = tmp;
+				work[indlld - 1 + j] = tmp * l[j];
+	/* L50: */
+			}
+			if (ndepth > 0) {
+	/*                 P and Q are index of the first and last eigenvalue to compute */
+	/*                 within the current block */
+				p = indexw[wbegin - 1 + oldfst];
+				q = indexw[wbegin - 1 + oldlst];
+	/*                 Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
+	/*                 thru' Q-OFFSET elements of these arrays are to be used. */
+	/*                  OFFSET = P-OLDFST */
+				offset = indexw[wbegin] - 1;
+	/*                 perform limited bisection (if necessary) to get approximate */
+	/*                 eigenvalues to the precision needed. */
+				odrrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, 
+					 &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
+					wbegin], &werr[wbegin], &work[indwrk], &iwork[
+					iindwk], pivmin, &spdiam, &in, &iinfo);
+				if (iinfo != 0) {
+				*info = -1;
+				return 0;
+				}
+	/*                 We also recompute the extremal gaps. W holds all eigenvalues */
+	/*                 of the unshifted matrix and must be used for computation */
+	/*                 of WGAP, the entries of WORK might stem from RRRs with */
+	/*                 different shifts. The gaps from WBEGIN-1+OLDFST to */
+	/*                 WBEGIN-1+OLDLST are correctly computed in ODRRB. */
+	/*                 However, we only allow the gaps to become greater since */
+	/*                 this is what should happen when we decrease WERR */
+				if (oldfst > 1) {
+	/* Computing MAX */
+				d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + 
+					oldfst - 1] - werr[wbegin + oldfst - 1] - w[
+					wbegin + oldfst - 2] - werr[wbegin + oldfst - 
+					2];
+				wgap[wbegin + oldfst - 2] = fmax(d__1,d__2);
+				}
+				if (wbegin + oldlst - 1 < wend) {
+	/* Computing MAX */
+				d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + 
+					oldlst] - werr[wbegin + oldlst] - w[wbegin + 
+					oldlst - 1] - werr[wbegin + oldlst - 1];
+				wgap[wbegin + oldlst - 1] = fmax(d__1,d__2);
+				}
+	/*                 Each time the eigenvalues in WORK get refined, we store */
+	/*                 the newly found approximation with all shifts applied in W */
+				i__3 = oldlst;
+				for (j = oldfst; j <= i__3; ++j) {
+				w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
+	/* L53: */
+				}
+			}
+	/*              Process the current node. */
+			newfst = oldfst;
+			i__3 = oldlst;
+			for (j = oldfst; j <= i__3; ++j) {
+				if (j == oldlst) {
+	/*                    we are at the right end of the cluster, this is also the */
+	/*                    boundary of the child cluster */
+				newlst = j;
+				} else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
+					wbegin + j - 1], fabs(d__1))) {
+	/*                    the right relative gap is big enough, the child cluster */
+	/*                    (NEWFST,..,NEWLST) is well separated from the following */
+				newlst = j;
+				} else {
+	/*                    inside a child cluster, the relative gap is not */
+	/*                    big enough. */
+				goto L140;
+				}
+	/*                 Compute size of child cluster found */
+				newsiz = newlst - newfst + 1;
+	/*                 NEWFTT is the place in Z where the new RRR or the computed */
+	/*                 eigenvector is to be stored */
+				if (*dol == 1 && *dou == *m) {
+	/*                    Store representation at location of the leftmost evalue */
+	/*                    of the cluster */
+				newftt = wbegin + newfst - 1;
+				} else {
+				if (wbegin + newfst - 1 < *dol) {
+	/*                       Store representation at the left end of Z array */
+					newftt = *dol - 1;
+				} else if (wbegin + newfst - 1 > *dou) {
+	/*                       Store representation at the right end of Z array */
+					newftt = *dou;
+				} else {
+					newftt = wbegin + newfst - 1;
+				}
+				}
+				if (newsiz > 1) {
+
+	/*                    Current child is not a singleton but a cluster. */
+	/*                    Compute and store new representation of child. */
+
+
+	/*                    Compute left and right cluster gap. */
+
+	/*                    LGAP and RGAP are not computed from WORK because */
+	/*                    the eigenvalue approximations may stem from RRRs */
+	/*                    different shifts. However, W hold all eigenvalues */
+	/*                    of the unshifted matrix. Still, the entries in WGAP */
+	/*                    have to be computed from WORK since the entries */
+	/*                    in W might be of the same order so that gaps are not */
+	/*                    exhibited correctly for very close eigenvalues. */
+				if (newfst == 1) {
+	/* Computing MAX */
+					d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
+					lgap = fmax(d__1,d__2);
+				} else {
+					lgap = wgap[wbegin + newfst - 2];
+				}
+				rgap = wgap[wbegin + newlst - 1];
+
+	/*                    Compute left- and rightmost eigenvalue of child */
+	/*                    to high precision in order to shift as close */
+	/*                    as possible and obtain as large relative gaps */
+	/*                    as possible */
+
+				for (k = 1; k <= 2; ++k) {
+					if (k == 1) {
+					p = indexw[wbegin - 1 + newfst];
+					} else {
+					p = indexw[wbegin - 1 + newlst];
+					}
+					offset = indexw[wbegin] - 1;
+					odrrb(&in, &d__[ibegin], &work[indlld + ibegin 
+						- 1], &p, &p, &rqtol, &rqtol, &offset, &
+						work[wbegin], &wgap[wbegin], &werr[wbegin]
+	, &work[indwrk], &iwork[iindwk], pivmin, &
+						spdiam, &in, &iinfo);
+	/* L55: */
+				}
+
+				if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 
+					> *dou) {
+	/*                       if the cluster contains no desired eigenvalues */
+	/*                       skip the computation of that branch of the rep. tree */
+
+	/*                       We could skip before the refinement of the extremal */
+	/*                       eigenvalues of the child, but then the representation */
+	/*                       tree could be different from the one when nothing is */
+	/*                       skipped. For this reason we skip at this place. */
+					idone = idone + newlst - newfst + 1;
+					goto L139;
+				}
+
+	/*                    Compute RRR of child cluster. */
+	/*                    Note that the new RRR is stored in Z */
+
+	/*                    ODRRF needs LWORK = 2*N */
+				odrrf(&in, &d__[ibegin], &l[ibegin], &work[indld + 
+					ibegin - 1], &newfst, &newlst, &work[wbegin], 
+					&wgap[wbegin], &werr[wbegin], &spdiam, &lgap, 
+					&rgap, pivmin, &tau, &z__[ibegin + newftt * 
+					z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], 
+					 &work[indwrk], &iinfo);
+				if (iinfo == 0) {
+	/*                       a new RRR for the cluster was found by ODRRF */
+	/*                       update shift and store it */
+					ssigma = sigma + tau;
+					z__[iend + (newftt + 1) * z_dim1] = ssigma;
+	/*                       WORK() are the midpoints and WERR() the semi-width */
+	/*                       Note that the entries in W are unchanged. */
+					i__4 = newlst;
+					for (k = newfst; k <= i__4; ++k) {
+					fudge = eps * 3. * (d__1 = work[wbegin + k - 
+						1], fabs(d__1));
+					work[wbegin + k - 1] -= tau;
+					fudge += eps * 4. * (d__1 = work[wbegin + k - 
+						1], fabs(d__1));
+	/*                          Fudge errors */
+					werr[wbegin + k - 1] += fudge;
+	/*                          Gaps are not fudged. Provided that WERR is small */
+	/*                          when eigenvalues are close, a zero gap indicates */
+	/*                          that a new representation is needed for resolving */
+	/*                          the cluster. A fudge could lead to a wrong decision */
+	/*                          of judging eigenvalues 'separated' which in */
+	/*                          reality are not. This could have a negative impact */
+	/*                          on the orthogonality of the computed eigenvectors. */
+	/* L116: */
+					}
+					++nclus;
+					k = newcls + (nclus << 1);
+					iwork[k - 1] = newfst;
+					iwork[k] = newlst;
+				} else {
+					*info = -2;
+					return 0;
+				}
+				} else {
+
+	/*                    Compute eigenvector of singleton */
+
+				iter = 0;
+
+				tol = log((FloatingType) in) * 4. * eps;
+
+				k = newfst;
+				windex = wbegin + k - 1;
+	/* Computing MAX */
+				i__4 = windex - 1;
+				windmn = imax(i__4,1);
+	/* Computing MIN */
+				i__4 = windex + 1;
+				windpl = imin(i__4,*m);
+				lambda = work[windex];
+				++done;
+	/*                    Check if eigenvector computation is to be skipped */
+				if (windex < *dol || windex > *dou) {
+					eskip = TRUE_;
+					goto L125;
+				} else {
+					eskip = FALSE_;
+				}
+				left = work[windex] - werr[windex];
+				right = work[windex] + werr[windex];
+				indeig = indexw[windex];
+	/*                    Note that since we compute the eigenpairs for a child, */
+	/*                    all eigenvalue approximations are w.r.t the same shift. */
+	/*                    In this case, the entries in WORK should be used for */
+	/*                    computing the gaps since they exhibit even very small */
+	/*                    differences in the eigenvalues, as opposed to the */
+	/*                    entries in W which might "look" the same. */
+				if (k == 1) {
+	/*                       In the case RANGE='I' and with not much initial */
+	/*                       accuracy in LAMBDA and VL, the formula */
+	/*                       LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
+	/*                       can lead to an overestimation of the left gap and */
+	/*                       thus to inadequately early RQI 'convergence'. */
+	/*                       Prevent this by forcing a small left gap. */
+	/* Computing MAX */
+					d__1 = fabs(left), d__2 = fabs(right);
+					lgap = eps * fmax(d__1,d__2);
+				} else {
+					lgap = wgap[windmn];
+				}
+				if (k == im) {
+	/*                       In the case RANGE='I' and with not much initial */
+	/*                       accuracy in LAMBDA and VU, the formula */
+	/*                       can lead to an overestimation of the right gap and */
+	/*                       thus to inadequately early RQI 'convergence'. */
+	/*                       Prevent this by forcing a small right gap. */
+	/* Computing MAX */
+					d__1 = fabs(left), d__2 = fabs(right);
+					rgap = eps * fmax(d__1,d__2);
+				} else {
+					rgap = wgap[windex];
+				}
+				gap = fmin(lgap,rgap);
+				if (k == 1 || k == im) {
+	/*                       The eigenvector support can become wrong */
+	/*                       because significant entries could be cut off due to a */
+	/*                       large GAPTOL parameter in LAR1V. Prevent this. */
+					gaptol = 0.;
+				} else {
+					gaptol = gap * eps;
+				}
+				isupmn = in;
+				isupmx = 1;
+	/*                    Update WGAP so that it holds the minimum gap */
+	/*                    to the left or the right. This is crucial in the */
+	/*                    case where bisection is used to ensure that the */
+	/*                    eigenvalue is refined up to the required precision. */
+	/*                    The correct value is restored afterwards. */
+				savgap = wgap[windex];
+				wgap[windex] = gap;
+	/*                    We want to use the Rayleigh Quotient Correction */
+	/*                    as often as possible since it converges quadratically */
+	/*                    when we are close enough to the desired eigenvalue. */
+	/*                    However, the Rayleigh Quotient can have the wrong sign */
+	/*                    and lead us away from the desired eigenvalue. In this */
+	/*                    case, the best we can do is to use bisection. */
+				usedbs = FALSE_;
+				usedrq = FALSE_;
+	/*                    Bisection is initially turned off unless it is forced */
+				needbs = ! tryrqc;
+	L120:
+	/*                    Check if bisection should be used to refine eigenvalue */
+				if (needbs) {
+	/*                       Take the bisection as new iterate */
+					usedbs = TRUE_;
+					itmp1 = iwork[iindr + windex];
+					offset = indexw[wbegin] - 1;
+					d__1 = eps * 2.;
+					odrrb(&in, &d__[ibegin], &work[indlld + ibegin 
+						- 1], &indeig, &indeig, &c_b5, &d__1, &
+						offset, &work[wbegin], &wgap[wbegin], &
+						werr[wbegin], &work[indwrk], &iwork[
+						iindwk], pivmin, &spdiam, &itmp1, &iinfo);
+					if (iinfo != 0) {
+					*info = -3;
+					return 0;
+					}
+					lambda = work[windex];
+	/*                       Reset twist index from inaccurate LAMBDA to */
+	/*                       force computation of true MINGMA */
+					iwork[iindr + windex] = 0;
+				}
+	/*                    Given LAMBDA, compute the eigenvector. */
+				L__1 = ! usedbs;
+				odr1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
+					ibegin], &work[indld + ibegin - 1], &work[
+					indlld + ibegin - 1], pivmin, &gaptol, &z__[
+					ibegin + windex * z_dim1], &L__1, &negcnt, &
+					ztz, &mingma, &iwork[iindr + windex], &isuppz[
+					(windex << 1) - 1], &nrminv, &resid, &rqcorr, 
+					&work[indwrk]);
+				if (iter == 0) {
+					bstres = resid;
+					bstw = lambda;
+				} else if (resid < bstres) {
+					bstres = resid;
+					bstw = lambda;
+				}
+	/* Computing MIN */
+				i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
+				isupmn = imin(i__4,i__5);
+	/* Computing MAX */
+				i__4 = isupmx, i__5 = isuppz[windex * 2];
+				isupmx = imax(i__4,i__5);
+				++iter;
+	/*                    sin alpha <= |resid|/gap */
+	/*                    Note that both the residual and the gap are */
+	/*                    proportional to the matrix, so ||T|| doesn't play */
+	/*                    a role in the quotient */
+
+	/*                    Convergence test for Rayleigh-Quotient iteration */
+	/*                    (omitted when Bisection has been used) */
+
+				if (resid > tol * gap && fabs(rqcorr) > rqtol * fabs(
+					lambda) && ! usedbs) {
+	/*                       We need to check that the RQCORR update doesn't */
+	/*                       move the eigenvalue away from the desired one and */
+	/*                       towards a neighbor. -> protection with bisection */
+					if (indeig <= negcnt) {
+	/*                          The wanted eigenvalue lies to the left */
+					sgndef = -1.;
+					} else {
+	/*                          The wanted eigenvalue lies to the right */
+					sgndef = 1.;
+					}
+	/*                       We only use the RQCORR if it improves the */
+	/*                       the iterate reasonably. */
+					if (rqcorr * sgndef >= 0. && lambda + rqcorr <= 
+						right && lambda + rqcorr >= left) {
+					usedrq = TRUE_;
+	/*                          Store new midpoint of bisection interval in WORK */
+					if (sgndef == 1.) {
+	/*                             The current LAMBDA is on the left of the true */
+	/*                             eigenvalue */
+						left = lambda;
+	/*                             We prefer to assume that the error estimate */
+	/*                             is correct. We could make the interval not */
+	/*                             as a bracket but to be modified if the RQCORR */
+	/*                             chooses to. In this case, the RIGHT side should */
+	/*                             be modified as follows: */
+	/*                              RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
+					} else {
+	/*                             The current LAMBDA is on the right of the true */
+	/*                             eigenvalue */
+						right = lambda;
+	/*                             See comment about assuming the error estimate is */
+	/*                             correct above. */
+	/*                              LEFT = MIN(LEFT, LAMBDA + RQCORR) */
+					}
+					work[windex] = (right + left) * .5;
+	/*                          Take RQCORR since it has the correct sign and */
+	/*                          improves the iterate reasonably */
+					lambda += rqcorr;
+	/*                          Update width of error interval */
+					werr[windex] = (right - left) * .5;
+					} else {
+					needbs = TRUE_;
+					}
+					if (right - left < rqtol * fabs(lambda)) {
+	/*                             The eigenvalue is computed to bisection accuracy */
+	/*                             compute eigenvector and stop */
+					usedbs = TRUE_;
+					goto L120;
+					} else if (iter < 10) {
+					goto L120;
+					} else if (iter == 10) {
+					needbs = TRUE_;
+					goto L120;
+					} else {
+					*info = 5;
+					return 0;
+					}
+				} else {
+					stp2ii = FALSE_;
+					if (usedrq && usedbs && bstres <= resid) {
+					lambda = bstw;
+					stp2ii = TRUE_;
+					}
+					if (stp2ii) {
+	/*                          improve error angle by second step */
+					L__1 = ! usedbs;
+					odr1v(&in, &c__1, &in, &lambda, &d__[ibegin]
+	, &l[ibegin], &work[indld + ibegin - 
+						1], &work[indlld + ibegin - 1], 
+						pivmin, &gaptol, &z__[ibegin + windex 
+						* z_dim1], &L__1, &negcnt, &ztz, &
+						mingma, &iwork[iindr + windex], &
+						isuppz[(windex << 1) - 1], &nrminv, &
+						resid, &rqcorr, &work[indwrk]);
+					}
+					work[windex] = lambda;
+				}
+
+	/*                    Compute FP-vector support w.r.t. whole matrix */
+
+				isuppz[(windex << 1) - 1] += oldien;
+				isuppz[windex * 2] += oldien;
+				zfrom = isuppz[(windex << 1) - 1];
+				zto = isuppz[windex * 2];
+				isupmn += oldien;
+				isupmx += oldien;
+	/*                    Ensure vector is ok if support in the RQI has changed */
+				if (isupmn < zfrom) {
+					i__4 = zfrom - 1;
+					for (ii = isupmn; ii <= i__4; ++ii) {
+					z__[ii + windex * z_dim1] = 0.;
+	/* L122: */
+					}
+				}
+				if (isupmx > zto) {
+					i__4 = isupmx;
+					for (ii = zto + 1; ii <= i__4; ++ii) {
+					z__[ii + windex * z_dim1] = 0.;
+	/* L123: */
+					}
+				}
+				i__4 = zto - zfrom + 1;
+				blas::odscal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], 
+					&c__1);
+	L125:
+	/*                    Update W */
+				w[windex] = lambda + sigma;
+	/*                    Recompute the gaps on the left and right */
+	/*                    But only allow them to become larger and not */
+	/*                    smaller (which can only happen through "bad" */
+	/*                    cancellation and doesn't reflect the theory */
+	/*                    where the initial gaps are underestimated due */
+	/*                    to WERR being too crude.) */
+				if (! eskip) {
+					if (k > 1) {
+	/* Computing MAX */
+					d__1 = wgap[windmn], d__2 = w[windex] - werr[
+						windex] - w[windmn] - werr[windmn];
+					wgap[windmn] = fmax(d__1,d__2);
+					}
+					if (windex < wend) {
+	/* Computing MAX */
+					d__1 = savgap, d__2 = w[windpl] - werr[windpl]
+						 - w[windex] - werr[windex];
+					wgap[windex] = fmax(d__1,d__2);
+					}
+				}
+				++idone;
+				}
+	/*                 here ends the code for the current child */
+
+	L139:
+	/*                 Proceed to any remaining child nodes */
+				newfst = j + 1;
+	L140:
+				;
+			}
+	/* L150: */
+			}
+			++ndepth;
+			goto L40;
+		}
+		ibegin = iend + 1;
+		wbegin = wend + 1;
+	L170:
+		;
+		}
+
+		return 0;
+
+	/*     End of ODRRV */
+
+	} /* odrrv_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odruv.hpp b/external/pmrrr/include/pmrrr/lapack/odruv.hpp
new file mode 100644
index 0000000000..b461dd4ec2
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odruv.hpp
@@ -0,0 +1,205 @@
+/**
+	C++ template version of LAPACK routine dlaruv.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODRUV_HPP__
+#define __ODRUV_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#define imax(a,b) ( (a) > (b) ? (a) : (b) )
+#define imin(a,b) ( (a) < (b) ? (a) : (b) )
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odruv(int *iseed, int *n, FloatingType *x)
+	{
+		/* Initialized data */
+
+		static int mm[512]	/* was [128][4] */ = { 494,2637,255,2008,1253,
+			3344,4084,1739,3143,3468,688,1657,1238,3166,1292,3422,1270,2016,
+			154,2862,697,1706,491,931,1444,444,3577,3944,2184,1661,3482,657,
+			3023,3618,1267,1828,164,3798,3087,2400,2870,3876,1905,1593,1797,
+			1234,3460,328,2861,1950,617,2070,3331,769,1558,2412,2800,189,287,
+			2045,1227,2838,209,2770,3654,3993,192,2253,3491,2889,2857,2094,
+			1818,688,1407,634,3231,815,3524,1914,516,164,303,2144,3480,119,
+			3357,837,2826,2332,2089,3780,1700,3712,150,2000,3375,1621,3090,
+			3765,1149,3146,33,3082,2741,359,3316,1749,185,2784,2202,2199,1364,
+			1244,2020,3160,2785,2772,1217,1822,1245,2252,3904,2774,997,2573,
+			1148,545,322,789,1440,752,2859,123,1848,643,2405,2638,2344,46,
+			3814,913,3649,339,3808,822,2832,3078,3633,2970,637,2249,2081,4019,
+			1478,242,481,2075,4058,622,3376,812,234,641,4005,1122,3135,2640,
+			2302,40,1832,2247,2034,2637,1287,1691,496,1597,2394,2584,1843,336,
+			1472,2407,433,2096,1761,2810,566,442,41,1238,1086,603,840,3168,
+			1499,1084,3438,2408,1589,2391,288,26,512,1456,171,1677,2657,2270,
+			2587,2961,1970,1817,676,1410,3723,2803,3185,184,663,499,3784,1631,
+			1925,3912,1398,1349,1441,2224,2411,1907,3192,2786,382,37,759,2948,
+			1862,3802,2423,2051,2295,1332,1832,2405,3638,3661,327,3660,716,
+			1842,3987,1368,1848,2366,2508,3754,1766,3572,2893,307,1297,3966,
+			758,2598,3406,2922,1038,2934,2091,2451,1580,1958,2055,1507,1078,
+			3273,17,854,2916,3971,2889,3831,2621,1541,893,736,3992,787,2125,
+			2364,2460,257,1574,3912,1216,3248,3401,2124,2762,149,2245,166,466,
+			4018,1399,190,2879,153,2320,18,712,2159,2318,2091,3443,1510,449,
+			1956,2201,3137,3399,1321,2271,3667,2703,629,2365,2431,1113,3922,
+			2554,184,2099,3228,4012,1921,3452,3901,572,3309,3171,817,3039,
+			1696,1256,3715,2077,3019,1497,1101,717,51,981,1978,1813,3881,76,
+			3846,3694,1682,124,1660,3997,479,1141,886,3514,1301,3604,1888,
+			1836,1990,2058,692,1194,20,3285,2046,2107,3508,3525,3801,2549,
+			1145,2253,305,3301,1065,3133,2913,3285,1241,1197,3729,2501,1673,
+			541,2753,949,2361,1165,4081,2725,3305,3069,3617,3733,409,2157,
+			1361,3973,1865,2525,1409,3445,3577,77,3761,2149,1449,3005,225,85,
+			3673,3117,3089,1349,2057,413,65,1845,697,3085,3441,1573,3689,2941,
+			929,533,2841,4077,721,2821,2249,2397,2817,245,1913,1997,3121,997,
+			1833,2877,1633,981,2009,941,2449,197,2441,285,1473,2741,3129,909,
+			2801,421,4073,2813,2337,1429,1177,1901,81,1669,2633,2269,129,1141,
+			249,3917,2481,3941,2217,2749,3041,1877,345,2861,1809,3141,2825,
+			157,2881,3637,1465,2829,2161,3365,361,2685,3745,2325,3609,3821,
+			3537,517,3017,2141,1537 };
+
+		/* System generated locals */
+		int i__1;
+
+		/* Local variables */
+		int i__, i1, i2, i3, i4, it1, it2, it3, it4;
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODRUV returns a vector of n random real numbers from a uniform (0,1) */
+	/*  distribution (n <= 128). */
+
+	/*  This is an auxiliary routine called by DLARNV and ZLARNV. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  ISEED   (input/output) INT array, dimension (4) */
+	/*          On entry, the seed of the random number generator; the array */
+	/*          elements must be between 0 and 4095, and ISEED(4) must be */
+	/*          odd. */
+	/*          On exit, the seed is updated. */
+
+	/*  N       (input) INT */
+	/*          The number of random numbers to be generated. N <= 128. */
+
+	/*  X       (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The generated random numbers. */
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  This routine uses a multiplicative congruential method with modulus */
+	/*  2**48 and multiplier 33952834046453 (see G.S.Fishman, */
+	/*  'Multiplicative congruential random number generators with modulus */
+	/*  2**b: an exhaustive analysis for b = 32 and a partial analysis for */
+	/*  b = 48', Math. Comp. 189, pp 331-344, 1990). */
+
+	/*  48-bit ints are stored in 4 int array elements with 12 bits */
+	/*  per element. Hence the routine is portable across machines with */
+	/*  ints of 32 bits or more. */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Local Arrays .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Data statements .. */
+		/* Parameter adjustments */
+		--iseed;
+		--x;
+
+		/* Function Body */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		i1 = iseed[1];
+		i2 = iseed[2];
+		i3 = iseed[3];
+		i4 = iseed[4];
+
+		i__1 = imin(*n,128);
+		for (i__ = 1; i__ <= i__1; ++i__) {
+
+	L20:
+
+	/*        Multiply the seed by i-th power of the multiplier modulo 2**48 */
+
+		it4 = i4 * mm[i__ + 383];
+		it3 = it4 / 4096;
+		it4 -= it3 << 12;
+		it3 = it3 + i3 * mm[i__ + 383] + i4 * mm[i__ + 255];
+		it2 = it3 / 4096;
+		it3 -= it2 << 12;
+		it2 = it2 + i2 * mm[i__ + 383] + i3 * mm[i__ + 255] + i4 * mm[i__ + 
+			127];
+		it1 = it2 / 4096;
+		it2 -= it1 << 12;
+		it1 = it1 + i1 * mm[i__ + 383] + i2 * mm[i__ + 255] + i3 * mm[i__ + 
+			127] + i4 * mm[i__ - 1];
+		it1 %= 4096;
+
+	/*        Convert 48-bit int to a real number in the interval (0,1) */
+
+		x[i__] = ((FloatingType) it1 + ((FloatingType) it2 + ((FloatingType) it3 + (
+			FloatingType) it4 * 2.44140625e-4) * 2.44140625e-4) * 
+			2.44140625e-4) * 2.44140625e-4;
+
+		if (x[i__] == 1.) {
+	/*           If a real number has n bits of precision, and the first */
+	/*           n bits of the 48-bit int above happen to be all 1 (which */
+	/*           will occur about once every 2**n calls), then X( I ) will */
+	/*           be rounded to exactly 1.0. */
+	/*           Since X( I ) is not supposed to return exactly 0.0 or 1.0, */
+	/*           the statistically correct thing to do in this situation is */
+	/*           simply to iterate again. */
+	/*           N.B. the case X( I ) = 0.0 should not be possible. */
+			i1 += 2;
+			i2 += 2;
+			i3 += 2;
+			i4 += 2;
+			goto L20;
+		}
+
+	/* L10: */
+		}
+
+	/*     Return final value of seed */
+
+		iseed[1] = it1;
+		iseed[2] = it2;
+		iseed[3] = it3;
+		iseed[4] = it4;
+		return 0;
+
+	/*     End of ODRUV */
+
+	} /* odruv_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odset.hpp b/external/pmrrr/include/pmrrr/lapack/odset.hpp
new file mode 100644
index 0000000000..b81d684649
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odset.hpp
@@ -0,0 +1,166 @@
+/**
+	C++ template version of LAPACK routine dlaset.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSET_HPP__
+#define __ODSET_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/lapack.hpp>
+
+#define imin(a,b) ( (a) < (b) ? (a) : (b) )
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odset(const char *uplo, int *m, int *n, FloatingType *alpha, 
+	FloatingType *beta, FloatingType *a, int *lda)
+	{
+		/* System generated locals */
+		int a_dim1, a_offset, i__1, i__2, i__3;
+
+		/* Local variables */
+		int i__, j;
+		//extern int olsame(char *, char *);
+
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSET initializes an m-by-n matrix A to BETA on the diagonal and */
+	/*  ALPHA on the offdiagonals. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  UPLO    (input) CHARACTER*1 */
+	/*          Specifies the part of the matrix A to be set. */
+	/*          = 'U':      Upper triangular part is set; the strictly lower */
+	/*                      triangular part of A is not changed. */
+	/*          = 'L':      Lower triangular part is set; the strictly upper */
+	/*                      triangular part of A is not changed. */
+	/*          Otherwise:  All of the matrix A is set. */
+
+	/*  M       (input) INT */
+	/*          The number of rows of the matrix A.  M >= 0. */
+
+	/*  N       (input) INT */
+	/*          The number of columns of the matrix A.  N >= 0. */
+
+	/*  ALPHA   (input) DOUBLE PRECISION */
+	/*          The constant to which the offdiagonal elements are to be set. */
+
+	/*  BETA    (input) DOUBLE PRECISION */
+	/*          The constant to which the diagonal elements are to be set. */
+
+	/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
+	/*          On exit, the leading m-by-n submatrix of A is set as follows: */
+
+	/*          if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */
+	/*          if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */
+	/*          otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */
+
+	/*          and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */
+
+	/*  LDA     (input) INT */
+	/*          The leading dimension of the array A.  LDA >= max(1,M). */
+
+	/* ===================================================================== */
+
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		a_dim1 = *lda;
+		a_offset = 1 + a_dim1;
+		a -= a_offset;
+
+		/* Function Body */
+		if (olsame(uplo, "U")) {
+
+	/*        Set the strictly upper triangular or trapezoidal part of the */
+	/*        array to ALPHA. */
+
+		i__1 = *n;
+		for (j = 2; j <= i__1; ++j) {
+	/* Computing MIN */
+			i__3 = j - 1;
+			i__2 = imin(i__3,*m);
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = *alpha;
+	/* L10: */
+			}
+	/* L20: */
+		}
+
+		} else if (olsame(uplo, "L")) {
+
+	/*        Set the strictly lower triangular or trapezoidal part of the */
+	/*        array to ALPHA. */
+
+		i__1 = imin(*m,*n);
+		for (j = 1; j <= i__1; ++j) {
+			i__2 = *m;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = *alpha;
+	/* L30: */
+			}
+	/* L40: */
+		}
+
+		} else {
+
+	/*        Set the leading m-by-n submatrix to ALPHA. */
+
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = *alpha;
+	/* L50: */
+			}
+	/* L60: */
+		}
+		}
+
+	/*     Set the first imin(M,N) diagonal elements to BETA. */
+
+		i__1 = imin(*m,*n);
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		a[i__ + i__ * a_dim1] = *beta;
+	/* L70: */
+		}
+
+		return 0;
+
+	/*     End of ODSET */
+
+	} /* odset_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odsnan.hpp b/external/pmrrr/include/pmrrr/lapack/odsnan.hpp
new file mode 100644
index 0000000000..8a9bc0c22e
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odsnan.hpp
@@ -0,0 +1,67 @@
+/**
+	C++ template version of LAPACK routine dlaisnan.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSNAN_HPP__
+#define __ODSNAN_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	template<typename FloatingType>
+	int odsnan(FloatingType *din1, FloatingType *din2)
+	{
+		/* System generated locals */
+		int ret_val;
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  This routine is not for general use.  It exists solely to avoid */
+	/*  over-optimization in DISNAN. */
+
+	/*  ODSNAN checks for NaNs by comparing its two arguments for */
+	/*  inequality.  NaN is the only floating-point value where NaN != NaN */
+	/*  returns .TRUE.  To check for NaNs, pass the same variable as both */
+	/*  arguments. */
+
+	/*  A compiler must assume that the two arguments are */
+	/*  not the same variable, and the test will not be optimized away. */
+	/*  Interprocedural or whole-program optimization may delete this */
+	/*  test.  The ISNAN functions will be replaced by the correct */
+	/*  Fortran 03 intrinsic once the intrinsic is widely available. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  DIN1     (input) DOUBLE PRECISION */
+	/*  DIN2     (input) DOUBLE PRECISION */
+	/*          Two numbers to compare for inequality. */
+
+	/*  ===================================================================== */
+
+	/*  .. Executable Statements .. */
+		ret_val = *din1 != *din2;
+		return ret_val;
+	} /* odsnan_ */
+
+}
+
+}
+
+#endif
+
diff --git a/external/pmrrr/include/pmrrr/lapack/odsq2.hpp b/external/pmrrr/include/pmrrr/lapack/odsq2.hpp
new file mode 100644
index 0000000000..934c5b4efb
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odsq2.hpp
@@ -0,0 +1,613 @@
+/**
+	C++ template version of LAPACK routine dlasq2.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSQ2_HPP__
+#define __ODSQ2_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odsrt.hpp>
+#include <pmrrr/lapack/odsq3.hpp>
+#include <pmrrr/lapack/lapack.hpp>
+
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odsq2(int *n, FloatingType *z__, int *info)
+	{
+		/* Table of constant values */
+		static int c__1 = 1;
+		static int c__2 = 2;
+		static int c__10 = 10;
+		static int c__3 = 3;
+		static int c__4 = 4;
+		static int c__11 = 11;
+		/* System generated locals */
+		int i__1, i__2, i__3;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		FloatingType d__, e, g;
+		int k;
+		FloatingType s, t;
+		int i0, i4, n0;
+		FloatingType dn;
+		int pp;
+		FloatingType dn1, dn2, dee, eps, tau, tol;
+		int ipn4;
+		FloatingType tol2;
+		int ieee;
+		int nbig;
+		FloatingType dmin__, emin, emax;
+		int kmin, ndiv, iter;
+		FloatingType qmin, temp, qmax, zmax;
+		int splt;
+		FloatingType dmin1, dmin2;
+		int nfail;
+		FloatingType desig, trace, sigma;
+		int iinfo, ttype;
+		/* Subroutine */ /*extern  int odsq3_(int *, int *, FloatingType *, 
+			int *, FloatingType *, FloatingType *, FloatingType *, FloatingType *, 
+			 int *, int *, int *, int *, int *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *, 
+			FloatingType *, FloatingType *, FloatingType *);*/
+		// extern FloatingType odmch_(char *);
+		FloatingType deemin;
+		int iwhila, iwhilb;
+		FloatingType oldemn, safmin;
+		//extern /* Subroutine */ int oerbla_(char *, int *);
+		/* extern int oienv_(int *, char *, char *, int *, int *,  */
+		/* 	    int *, int *); */
+		//extern /* Subroutine */ int odsrt_(char *, int *, FloatingType *, 
+			//int *);
+
+
+	/*  -- LAPACK routine (version 3.2)                                    -- */
+
+	/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
+	/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
+	/*  -- Berkeley                                                        -- */
+	/*  -- November 2008                                                   -- */
+
+	/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+	/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSQ2 computes all the eigenvalues of the symmetric positive */
+	/*  definite tridiagonal matrix associated with the qd array Z to high */
+	/*  relative accuracy are computed to high relative accuracy, in the */
+	/*  absence of denormalization, underflow and overflow. */
+
+	/*  To see the relation of Z to the tridiagonal matrix, let L be a */
+	/*  unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */
+	/*  let U be an upper bidiagonal matrix with 1's above and diagonal */
+	/*  Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */
+	/*  symmetric tridiagonal to which it is similar. */
+
+	/*  Note : ODSQ2 defines a int variable, IEEE, which is true */
+	/*  on machines which follow ieee-754 floating-point standard in their */
+	/*  handling of infinities and NaNs, and false otherwise. This variable */
+	/*  is passed to ODSQ3. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N     (input) INT */
+	/*        The number of rows and columns in the matrix. N >= 0. */
+
+	/*  Z     (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */
+	/*        On entry Z holds the qd array. On exit, entries 1 to N hold */
+	/*        the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */
+	/*        trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */
+	/*        N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */
+	/*        holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */
+	/*        shifts that failed. */
+
+	/*  INFO  (output) INT */
+	/*        = 0: successful exit */
+	/*        < 0: if the i-th argument is a scalar and had an illegal */
+	/*             value, then INFO = -i, if the i-th argument is an */
+	/*             array and the j-entry had an illegal value, then */
+	/*             INFO = -(i*100+j) */
+	/*        > 0: the algorithm failed */
+	/*              = 1, a split was marked by a positive value in E */
+	/*              = 2, current block of Z not diagonalized after 30*N */
+	/*                   iterations (in inner while loop) */
+	/*              = 3, termination criterion of outer while loop not met */
+	/*                   (program created more than N unreduced blocks) */
+
+	/*  Further Details */
+	/*  =============== */
+	/*  Local Variables: I0:N0 defines a current unreduced segment of Z. */
+	/*  The shifts are accumulated in SIGMA. Iteration count is in ITER. */
+	/*  Ping-pong is controlled by PP (alternates between 0 and 1). */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     Test the input arguments. */
+	/*     (in case ODSQ2 is not called by DLASQ1) */
+
+		/* Parameter adjustments */
+		--z__;
+
+		/* Function Body */
+		*info = 0;
+		eps = std::numeric_limits<FloatingType>::epsilon(); // odmch_("Precision");
+		safmin = std::numeric_limits<FloatingType>::min(); // odmch_("Safe minimum");
+		tol = eps * 100.;
+	/* Computing 2nd power */
+		d__1 = tol;
+		tol2 = d__1 * d__1;
+
+		if (*n < 0) {
+		*info = -1;
+		oerbla("ODSQ2", &c__1);
+		return 0;
+		} else if (*n == 0) {
+		return 0;
+		} else if (*n == 1) {
+
+	/*        1-by-1 case. */
+
+		if (z__[1] < 0.) {
+			*info = -201;
+			oerbla("ODSQ2", &c__2);
+		}
+		return 0;
+		} else if (*n == 2) {
+
+	/*        2-by-2 case. */
+
+		if (z__[2] < 0. || z__[3] < 0.) {
+			*info = -2;
+			oerbla("ODSQ2", &c__2);
+			return 0;
+		} else if (z__[3] > z__[1]) {
+			d__ = z__[3];
+			z__[3] = z__[1];
+			z__[1] = d__;
+		}
+		z__[5] = z__[1] + z__[2] + z__[3];
+		if (z__[2] > z__[3] * tol2) {
+			t = (z__[1] - z__[3] + z__[2]) * .5;
+			s = z__[3] * (z__[2] / t);
+			if (s <= t) {
+			s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
+			} else {
+			s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
+			}
+			t = z__[1] + (s + z__[2]);
+			z__[3] *= z__[1] / t;
+			z__[1] = t;
+		}
+		z__[2] = z__[3];
+		z__[6] = z__[2] + z__[1];
+		return 0;
+		}
+
+	/*     Check for negative data and compute sums of q's and e's. */
+
+		z__[*n * 2] = 0.;
+		emin = z__[2];
+		qmax = 0.;
+		zmax = 0.;
+		d__ = 0.;
+		e = 0.;
+
+		i__1 = (*n - 1) << 1;
+		for (k = 1; k <= i__1; k += 2) {
+		if (z__[k] < 0.) {
+			*info = -(k + 200);
+			oerbla("ODSQ2", &c__2);
+			return 0;
+		} else if (z__[k + 1] < 0.) {
+			*info = -(k + 201);
+			oerbla("ODSQ2", &c__2);
+			return 0;
+		}
+		d__ += z__[k];
+		e += z__[k + 1];
+	/* Computing MAX */
+		d__1 = qmax, d__2 = z__[k];
+		qmax = fmax(d__1,d__2);
+	/* Computing MIN */
+		d__1 = emin, d__2 = z__[k + 1];
+		emin = fmin(d__1,d__2);
+	/* Computing MAX */
+		d__1 = fmax(qmax,zmax), d__2 = z__[k + 1];
+		zmax = fmax(d__1,d__2);
+	/* L10: */
+		}
+		if (z__[(*n << 1) - 1] < 0.) {
+		*info = -((*n << 1) + 199);
+		oerbla("ODSQ2", &c__2);
+		return 0;
+		}
+		d__ += z__[(*n << 1) - 1];
+	/* Computing MAX */
+		d__1 = qmax, d__2 = z__[(*n << 1) - 1];
+		qmax = fmax(d__1,d__2);
+		zmax = fmax(qmax,zmax);
+
+	/*     Check for diagonality. */
+
+		if (e == 0.) {
+		i__1 = *n;
+		for (k = 2; k <= i__1; ++k) {
+			z__[k] = z__[(k << 1) - 1];
+	/* L20: */
+		}
+		odsrt("D", n, &z__[1], &iinfo);
+		z__[(*n << 1) - 1] = d__;
+		return 0;
+		}
+
+		trace = d__ + e;
+
+	/*     Check for zero data. */
+
+		if (trace == 0.) {
+		z__[(*n << 1) - 1] = 0.;
+		return 0;
+		}
+
+	/*     Check whether the machine is IEEE conformable. */
+		/* ieee = oienv_(&c__10, "ODSQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && oienv_(&c__11, "ODSQ2", "N", &c__1, &c__2,  */
+		/* 	     &c__3, &c__4) == 1; */
+		ieee = 1;
+
+	/*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
+
+		for (k = *n << 1; k >= 2; k += -2) {
+		z__[k * 2] = 0.;
+		z__[(k << 1) - 1] = z__[k];
+		z__[(k << 1) - 2] = 0.;
+		z__[(k << 1) - 3] = z__[k - 1];
+	/* L30: */
+		}
+
+		i0 = 1;
+		n0 = *n;
+
+	/*     Reverse the qd-array, if warranted. */
+
+		if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
+		ipn4 = (i0 + n0) << 2;
+		i__1 = (i0 + n0 - 1) << 1;
+		for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
+			temp = z__[i4 - 3];
+			z__[i4 - 3] = z__[ipn4 - i4 - 3];
+			z__[ipn4 - i4 - 3] = temp;
+			temp = z__[i4 - 1];
+			z__[i4 - 1] = z__[ipn4 - i4 - 5];
+			z__[ipn4 - i4 - 5] = temp;
+	/* L40: */
+		}
+		}
+
+	/*     Initial split checking via dqd and Li's test. */
+
+		pp = 0;
+
+		for (k = 1; k <= 2; ++k) {
+
+		d__ = z__[(n0 << 2) + pp - 3];
+		i__1 = (i0 << 2) + pp;
+		for (i4 = ((n0 - 1) << 2) + pp; i4 >= i__1; i4 += -4) {
+			if (z__[i4 - 1] <= tol2 * d__) {
+			z__[i4 - 1] = -0.;
+			d__ = z__[i4 - 3];
+			} else {
+			d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
+			}
+	/* L50: */
+		}
+
+	/*        dqd maps Z to ZZ plus Li's test. */
+
+		emin = z__[(i0 << 2) + pp + 1];
+		d__ = z__[(i0 << 2) + pp - 3];
+		i__1 = ((n0 - 1) << 2) + pp;
+		for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
+			z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
+			if (z__[i4 - 1] <= tol2 * d__) {
+			z__[i4 - 1] = -0.;
+			z__[i4 - (pp << 1) - 2] = d__;
+			z__[i4 - (pp << 1)] = 0.;
+			d__ = z__[i4 + 1];
+			} else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && 
+				safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
+			temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
+			z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
+			d__ *= temp;
+			} else {
+			z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
+				pp << 1) - 2]);
+			d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
+			}
+	/* Computing MIN */
+			d__1 = emin, d__2 = z__[i4 - (pp << 1)];
+			emin = fmin(d__1,d__2);
+	/* L60: */
+		}
+		z__[(n0 << 2) - pp - 2] = d__;
+
+	/*        Now find qmax. */
+
+		qmax = z__[(i0 << 2) - pp - 2];
+		i__1 = (n0 << 2) - pp - 2;
+		for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
+	/* Computing MAX */
+			d__1 = qmax, d__2 = z__[i4];
+			qmax = fmax(d__1,d__2);
+	/* L70: */
+		}
+
+	/*        Prepare for the next iteration on K. */
+
+		pp = 1 - pp;
+	/* L80: */
+		}
+
+	/*     Initialise variables to pass to ODSQ3. */
+
+		ttype = 0;
+		dmin1 = 0.;
+		dmin2 = 0.;
+		dn = 0.;
+		dn1 = 0.;
+		dn2 = 0.;
+		g = 0.;
+		tau = 0.;
+
+		iter = 2;
+		nfail = 0;
+		ndiv = (n0 - i0) << 1;
+
+		i__1 = *n + 1;
+		for (iwhila = 1; iwhila <= i__1; ++iwhila) {
+		if (n0 < 1) {
+			goto L170;
+		}
+
+	/*        While array unfinished do */
+
+	/*        E(N0) holds the value of SIGMA when submatrix in I0:N0 */
+	/*        splits from the rest of the array, but is negated. */
+
+		desig = 0.;
+		if (n0 == *n) {
+			sigma = 0.;
+		} else {
+			sigma = -z__[(n0 << 2) - 1];
+		}
+		if (sigma < 0.) {
+			*info = 1;
+			return 0;
+		}
+
+	/*        Find last unreduced submatrix's top index I0, find QMAX and */
+	/*        EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
+
+		emax = 0.;
+		if (n0 > i0) {
+			emin = (d__1 = z__[(n0 << 2) - 5], fabs(d__1));
+		} else {
+			emin = 0.;
+		}
+		qmin = z__[(n0 << 2) - 3];
+		qmax = qmin;
+		for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
+			if (z__[i4 - 5] <= 0.) {
+			goto L100;
+			}
+			if (qmin >= emax * 4.) {
+	/* Computing MIN */
+			d__1 = qmin, d__2 = z__[i4 - 3];
+			qmin = fmin(d__1,d__2);
+	/* Computing MAX */
+			d__1 = emax, d__2 = z__[i4 - 5];
+			emax = fmax(d__1,d__2);
+			}
+	/* Computing MAX */
+			d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
+			qmax = fmax(d__1,d__2);
+	/* Computing MIN */
+			d__1 = emin, d__2 = z__[i4 - 5];
+			emin = fmin(d__1,d__2);
+	/* L90: */
+		}
+		i4 = 4;
+
+	L100:
+		i0 = i4 / 4;
+		pp = 0;
+
+		if (n0 - i0 > 1) {
+			dee = z__[(i0 << 2) - 3];
+			deemin = dee;
+			kmin = i0;
+			i__2 = (n0 << 2) - 3;
+			for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
+			dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
+			if (dee <= deemin) {
+				deemin = dee;
+				kmin = (i4 + 3) / 4;
+			}
+	/* L110: */
+			}
+			if ((kmin - i0) << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * 
+				.5) {
+			ipn4 = (i0 + n0) << 2;
+			pp = 2;
+			i__2 = (i0 + n0 - 1) << 1;
+			for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
+				temp = z__[i4 - 3];
+				z__[i4 - 3] = z__[ipn4 - i4 - 3];
+				z__[ipn4 - i4 - 3] = temp;
+				temp = z__[i4 - 2];
+				z__[i4 - 2] = z__[ipn4 - i4 - 2];
+				z__[ipn4 - i4 - 2] = temp;
+				temp = z__[i4 - 1];
+				z__[i4 - 1] = z__[ipn4 - i4 - 5];
+				z__[ipn4 - i4 - 5] = temp;
+				temp = z__[i4];
+				z__[i4] = z__[ipn4 - i4 - 4];
+				z__[ipn4 - i4 - 4] = temp;
+	/* L120: */
+			}
+			}
+		}
+
+	/*        Put -(initial shift) into DMIN. */
+
+	/* Computing MAX */
+		d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
+		dmin__ = -fmax(d__1,d__2);
+
+	/*        Now I0:N0 is unreduced. */
+	/*        PP = 0 for ping, PP = 1 for pong. */
+	/*        PP = 2 indicates that flipping was applied to the Z array and */
+	/*               and that the tests for deflation upon entry in ODSQ3 */
+	/*               should not be performed. */
+
+		nbig = (n0 - i0 + 1) * 100;
+		i__2 = nbig;
+		for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
+			if (i0 > n0) {
+			goto L150;
+			}
+
+	/*           While submatrix unfinished take a good dqds step. */
+
+			odsq3(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
+				nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
+				dn1, &dn2, &g, &tau);
+
+			pp = 1 - pp;
+
+	/*           When EMIN is very small check for splits. */
+
+			if (pp == 0 && n0 - i0 >= 3) {
+			if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
+				 sigma) {
+				splt = i0 - 1;
+				qmax = z__[(i0 << 2) - 3];
+				emin = z__[(i0 << 2) - 1];
+				oldemn = z__[i0 * 4];
+				i__3 = (n0 - 3) << 2;
+				for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
+				if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= 
+					tol2 * sigma) {
+					z__[i4 - 1] = -sigma;
+					splt = i4 / 4;
+					qmax = 0.;
+					emin = z__[i4 + 3];
+					oldemn = z__[i4 + 4];
+				} else {
+	/* Computing MAX */
+					d__1 = qmax, d__2 = z__[i4 + 1];
+					qmax = fmax(d__1,d__2);
+	/* Computing MIN */
+					d__1 = emin, d__2 = z__[i4 - 1];
+					emin = fmin(d__1,d__2);
+	/* Computing MIN */
+					d__1 = oldemn, d__2 = z__[i4];
+					oldemn = fmin(d__1,d__2);
+				}
+	/* L130: */
+				}
+				z__[(n0 << 2) - 1] = emin;
+				z__[n0 * 4] = oldemn;
+				i0 = splt + 1;
+			}
+			}
+
+	/* L140: */
+		}
+
+		*info = 2;
+		return 0;
+
+	/*        end IWHILB */
+
+	L150:
+
+	/* L160: */
+		;
+		}
+
+		*info = 3;
+		return 0;
+
+	/*     end IWHILA */
+
+	L170:
+
+	/*     Move q's to the front. */
+		i__1 = *n;
+		for (k = 2; k <= i__1; ++k) {
+		z__[k] = z__[(k << 2) - 3];
+	/* L180: */
+		}
+
+	/*     Sort and compute sum of eigenvalues. */
+		odsrt("D", n, &z__[1], &iinfo);
+
+		e = 0.;
+		for (k = *n; k >= 1; --k) {
+		e += z__[k];
+	/* L190: */
+		}
+
+	/*     Store trace, sum(eigenvalues) and information on performance. */
+		z__[(*n << 1) + 1] = trace;
+		z__[(*n << 1) + 2] = e;
+		z__[(*n << 1) + 3] = (FloatingType) iter;
+	/* Computing 2nd power */
+		i__1 = *n;
+		z__[(*n << 1) + 4] = (FloatingType) ndiv / (FloatingType) (i__1 * i__1);
+		z__[(*n << 1) + 5] = nfail * 100. / (FloatingType) iter;
+		return 0;
+
+	/*     End of ODSQ2 */
+
+	} /* odsq2_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odsq3.hpp b/external/pmrrr/include/pmrrr/lapack/odsq3.hpp
new file mode 100644
index 0000000000..cd5b7865f4
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odsq3.hpp
@@ -0,0 +1,365 @@
+/**
+	C++ template version of LAPACK routine dlasq3.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSQ3_HPP__
+#define __ODSQ3_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/odsq4.hpp>
+#include <pmrrr/lapack/odsq5.hpp>
+#include <pmrrr/lapack/odsq6.hpp>
+#include <pmrrr/lapack/odnan.hpp>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odsq3(int *i0, int *n0, FloatingType *z__, 
+		int *pp, FloatingType *dmin__, FloatingType *sigma, FloatingType *desig, 
+		 FloatingType *qmax, int *nfail, int *iter, int *ndiv, 
+		int *ieee, int *ttype, FloatingType *dmin1, FloatingType *dmin2, 
+		FloatingType *dn, FloatingType *dn1, FloatingType *dn2, FloatingType *g, 
+		FloatingType *tau)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		FloatingType s, t;
+		int j4, nn;
+		FloatingType eps, tol;
+		int n0in, ipn4;
+		FloatingType tol2, temp;
+		/* Subroutine */ /*extern  int odsq4_(int *, int *, FloatingType *, 
+			int *, int *, FloatingType *, FloatingType *, FloatingType *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *, int *, 
+			 FloatingType *), odsq5_(int *, int *, FloatingType *, 
+			int *, FloatingType *, FloatingType *, FloatingType *, FloatingType *, 
+			 FloatingType *, FloatingType *, FloatingType *, int *), odsq6_(
+			int *, int *, FloatingType *, int *, FloatingType *, 
+			FloatingType *, FloatingType *, FloatingType *, FloatingType *, 
+			FloatingType *);*/
+		// extern FloatingType odmch_(char *);
+
+
+	/*  -- LAPACK routine (version 3.2)                                    -- */
+
+	/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
+	/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
+	/*  -- Berkeley                                                        -- */
+	/*  -- November 2008                                                   -- */
+
+	/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+	/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
+	/*  In case of failure it changes shifts, and tries again until output */
+	/*  is positive. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  I0     (input) INT */
+	/*         First index. */
+
+	/*  N0     (input) INT */
+	/*         Last index. */
+
+	/*  Z      (input) DOUBLE PRECISION array, dimension ( 4*N ) */
+	/*         Z holds the qd array. */
+
+	/*  PP     (input/output) INT */
+	/*         PP=0 for ping, PP=1 for pong. */
+	/*         PP=2 indicates that flipping was applied to the Z array */
+	/*         and that the initial tests for deflation should not be */
+	/*         performed. */
+
+	/*  DMIN   (output) DOUBLE PRECISION */
+	/*         Minimum value of d. */
+
+	/*  SIGMA  (output) DOUBLE PRECISION */
+	/*         Sum of shifts used in current segment. */
+
+	/*  DESIG  (input/output) DOUBLE PRECISION */
+	/*         Lower order part of SIGMA */
+
+	/*  QMAX   (input) DOUBLE PRECISION */
+	/*         Maximum value of q. */
+
+	/*  NFAIL  (output) INT */
+	/*         Number of times shift was too big. */
+
+	/*  ITER   (output) INT */
+	/*         Number of iterations. */
+
+	/*  NDIV   (output) INT */
+	/*         Number of divisions. */
+
+	/*  IEEE   (input) INT */
+	/*         Flag for IEEE or non IEEE arithmetic (passed to ODSQ5). */
+
+	/*  TTYPE  (input/output) INT */
+	/*         Shift type. */
+
+	/*  DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
+	/*         These are passed as arguments in order to save their values */
+	/*         between calls to ODSQ3. */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. External Function .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--z__;
+
+		/* Function Body */
+		n0in = *n0;
+		eps = std::numeric_limits<FloatingType>::epsilon(); // odmch_("Precision");
+		tol = eps * 100.;
+	/* Computing 2nd power */
+		d__1 = tol;
+		tol2 = d__1 * d__1;
+
+	/*     Check for deflation. */
+
+	L10:
+
+		if (*n0 < *i0) {
+		return 0;
+		}
+		if (*n0 == *i0) {
+		goto L20;
+		}
+		nn = (*n0 << 2) + *pp;
+		if (*n0 == *i0 + 1) {
+		goto L40;
+		}
+
+	/*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
+
+		if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - 
+			4] > tol2 * z__[nn - 7]) {
+		goto L30;
+		}
+
+	L20:
+
+		z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
+		--(*n0);
+		goto L10;
+
+	/*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
+
+	L30:
+
+		if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
+			nn - 11]) {
+		goto L50;
+		}
+
+	L40:
+
+		if (z__[nn - 3] > z__[nn - 7]) {
+		s = z__[nn - 3];
+		z__[nn - 3] = z__[nn - 7];
+		z__[nn - 7] = s;
+		}
+		if (z__[nn - 5] > z__[nn - 3] * tol2) {
+		t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
+		s = z__[nn - 3] * (z__[nn - 5] / t);
+		if (s <= t) {
+			s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
+		} else {
+			s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
+		}
+		t = z__[nn - 7] + (s + z__[nn - 5]);
+		z__[nn - 3] *= z__[nn - 7] / t;
+		z__[nn - 7] = t;
+		}
+		z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
+		z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
+		*n0 += -2;
+		goto L10;
+
+	L50:
+		if (*pp == 2) {
+		*pp = 0;
+		}
+
+	/*     Reverse the qd-array, if warranted. */
+
+		if (*dmin__ <= 0. || *n0 < n0in) {
+		if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
+			ipn4 = (*i0 + *n0) << 2;
+			i__1 = (*i0 + *n0 - 1) << 1;
+			for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+			temp = z__[j4 - 3];
+			z__[j4 - 3] = z__[ipn4 - j4 - 3];
+			z__[ipn4 - j4 - 3] = temp;
+			temp = z__[j4 - 2];
+			z__[j4 - 2] = z__[ipn4 - j4 - 2];
+			z__[ipn4 - j4 - 2] = temp;
+			temp = z__[j4 - 1];
+			z__[j4 - 1] = z__[ipn4 - j4 - 5];
+			z__[ipn4 - j4 - 5] = temp;
+			temp = z__[j4];
+			z__[j4] = z__[ipn4 - j4 - 4];
+			z__[ipn4 - j4 - 4] = temp;
+	/* L60: */
+			}
+			if (*n0 - *i0 <= 4) {
+			z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
+			z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
+			}
+	/* Computing MIN */
+			d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
+			*dmin2 = fmin(d__1,d__2);
+	/* Computing MIN */
+			d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
+				, d__1 = fmin(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
+			z__[(*n0 << 2) + *pp - 1] = fmin(d__1,d__2);
+	/* Computing MIN */
+			d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
+				 fmin(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
+			z__[(*n0 << 2) - *pp] = fmin(d__1,d__2);
+	/* Computing MAX */
+			d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = fmax(d__1,
+				d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
+			*qmax = fmax(d__1,d__2);
+			*dmin__ = -0.;
+		}
+		}
+
+	/*     Choose a shift. */
+
+		odsq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, 
+			tau, ttype, g);
+
+	/*     Call dqds until DMIN > 0. */
+
+	L70:
+
+		odsq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, 
+			ieee);
+
+		*ndiv += *n0 - *i0 + 2;
+		++(*iter);
+
+	/*     Check status. */
+
+		if (*dmin__ >= 0. && *dmin1 > 0.) {
+
+	/*        Success. */
+
+		goto L90;
+
+		} else if (*dmin__ < 0. && *dmin1 > 0. && z__[((*n0 - 1) << 2) - *pp] < tol 
+			* (*sigma + *dn1) && fabs(*dn) < tol * *sigma) {
+
+	/*        Convergence hidden by negative DN. */
+
+		z__[((*n0 - 1) << 2) - *pp + 2] = 0.;
+		*dmin__ = 0.;
+		goto L90;
+		} else if (*dmin__ < 0.) {
+
+	/*        TAU too big. Select new TAU and try again. */
+
+		++(*nfail);
+		if (*ttype < -22) {
+
+	/*           Failed twice. Play it safe. */
+
+			*tau = 0.;
+		} else if (*dmin1 > 0.) {
+
+	/*           Late failure. Gives excellent shift. */
+
+			*tau = (*tau + *dmin__) * (1. - eps * 2.);
+			*ttype += -11;
+		} else {
+
+	/*           Early failure. Divide by 4. */
+
+			*tau *= .25;
+			*ttype += -12;
+		}
+		goto L70;
+		} else if (odnan(dmin__)) {
+
+	/*        NaN. */
+
+		if (*tau == 0.) {
+			goto L80;
+		} else {
+			*tau = 0.;
+			goto L70;
+		}
+		} else {
+
+	/*        Possible underflow. Play it safe. */
+
+		goto L80;
+		}
+
+	/*     Risk of underflow. */
+
+	L80:
+		odsq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
+		*ndiv += *n0 - *i0 + 2;
+		++(*iter);
+		*tau = 0.;
+
+	L90:
+		if (*tau < *sigma) {
+		*desig += *tau;
+		t = *sigma + *desig;
+		*desig -= t - *sigma;
+		} else {
+		t = *sigma + *tau;
+		*desig = *sigma - (t - *tau) + *desig;
+		}
+		*sigma = t;
+
+		return 0;
+
+	/*     End of ODSQ3 */
+
+	} /* odsq3_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odsq4.hpp b/external/pmrrr/include/pmrrr/lapack/odsq4.hpp
new file mode 100644
index 0000000000..d7f19cec0c
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odsq4.hpp
@@ -0,0 +1,412 @@
+/**
+	C++ template version of LAPACK routine dlasq4.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSQ4_HPP__
+#define __ODSQ4_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odsq4(int *i0, int *n0, FloatingType *z__, 
+			int *pp, int *n0in, FloatingType *dmin__, FloatingType *dmin1, 
+			FloatingType *dmin2, FloatingType *dn, FloatingType *dn1, FloatingType *dn2, 
+			FloatingType *tau, int *ttype, FloatingType *g)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		FloatingType s, a2, b1, b2;
+		int i4, nn, np;
+		FloatingType gam, gap1, gap2;
+
+
+	/*  -- LAPACK routine (version 3.2)                                    -- */
+
+	/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
+	/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
+	/*  -- Berkeley                                                        -- */
+	/*  -- November 2008                                                   -- */
+
+	/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+	/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSQ4 computes an approximation TAU to the smallest eigenvalue */
+	/*  using values of d from the previous transform. */
+
+	/*  I0    (input) INT */
+	/*        First index. */
+
+	/*  N0    (input) INT */
+	/*        Last index. */
+
+	/*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
+	/*        Z holds the qd array. */
+
+	/*  PP    (input) INT */
+	/*        PP=0 for ping, PP=1 for pong. */
+
+	/*  NOIN  (input) INT */
+	/*        The value of N0 at start of EIGTEST. */
+
+	/*  DMIN  (input) DOUBLE PRECISION */
+	/*        Minimum value of d. */
+
+	/*  DMIN1 (input) DOUBLE PRECISION */
+	/*        Minimum value of d, excluding D( N0 ). */
+
+	/*  DMIN2 (input) DOUBLE PRECISION */
+	/*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
+
+	/*  DN    (input) DOUBLE PRECISION */
+	/*        d(N) */
+
+	/*  DN1   (input) DOUBLE PRECISION */
+	/*        d(N-1) */
+
+	/*  DN2   (input) DOUBLE PRECISION */
+	/*        d(N-2) */
+
+	/*  TAU   (output) DOUBLE PRECISION */
+	/*        This is the shift. */
+
+	/*  TTYPE (output) INT */
+	/*        Shift type. */
+
+	/*  G     (input/output) REAL */
+	/*        G is passed as an argument in order to save its value between */
+	/*        calls to ODSQ4. */
+
+	/*  Further Details */
+	/*  =============== */
+	/*  CNST1 = 9/16 */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     A negative DMIN forces the shift to take that absolute value */
+	/*     TTYPE records the type of shift. */
+
+		/* Parameter adjustments */
+		--z__;
+
+		/* Function Body */
+		if (*dmin__ <= 0.) {
+		*tau = -(*dmin__);
+		*ttype = -1;
+		return 0;
+		}
+
+		nn = (*n0 << 2) + *pp;
+		if (*n0in == *n0) {
+
+	/*        No eigenvalues deflated. */
+
+		if (*dmin__ == *dn || *dmin__ == *dn1) {
+
+			b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
+			b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
+			a2 = z__[nn - 7] + z__[nn - 5];
+
+	/*           Cases 2 and 3. */
+
+			if (*dmin__ == *dn && *dmin1 == *dn1) {
+			gap2 = *dmin2 - a2 - *dmin2 * .25;
+			if (gap2 > 0. && gap2 > b2) {
+				gap1 = a2 - *dn - b2 / gap2 * b2;
+			} else {
+				gap1 = a2 - *dn - (b1 + b2);
+			}
+			if (gap1 > 0. && gap1 > b1) {
+	/* Computing MAX */
+				d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
+				s = fmax(d__1,d__2);
+				*ttype = -2;
+			} else {
+				s = 0.;
+				if (*dn > b1) {
+				s = *dn - b1;
+				}
+				if (a2 > b1 + b2) {
+	/* Computing MIN */
+				d__1 = s, d__2 = a2 - (b1 + b2);
+				s = fmin(d__1,d__2);
+				}
+	/* Computing MAX */
+				d__1 = s, d__2 = *dmin__ * .333;
+				s = fmax(d__1,d__2);
+				*ttype = -3;
+			}
+			} else {
+
+	/*              Case 4. */
+
+			*ttype = -4;
+			s = *dmin__ * .25;
+			if (*dmin__ == *dn) {
+				gam = *dn;
+				a2 = 0.;
+				if (z__[nn - 5] > z__[nn - 7]) {
+				return 0;
+				}
+				b2 = z__[nn - 5] / z__[nn - 7];
+				np = nn - 9;
+			} else {
+				np = nn - (*pp << 1);
+				b2 = z__[np - 2];
+				gam = *dn1;
+				if (z__[np - 4] > z__[np - 2]) {
+				return 0;
+				}
+				a2 = z__[np - 4] / z__[np - 2];
+				if (z__[nn - 9] > z__[nn - 11]) {
+				return 0;
+				}
+				b2 = z__[nn - 9] / z__[nn - 11];
+				np = nn - 13;
+			}
+
+	/*              Approximate contribution to norm squared from I < NN-1. */
+
+			a2 += b2;
+			i__1 = (*i0 << 2) - 1 + *pp;
+			for (i4 = np; i4 >= i__1; i4 += -4) {
+				if (b2 == 0.) {
+				goto L20;
+				}
+				b1 = b2;
+				if (z__[i4] > z__[i4 - 2]) {
+				return 0;
+				}
+				b2 *= z__[i4] / z__[i4 - 2];
+				a2 += b2;
+				if (fmax(b2,b1) * 100. < a2 || .563 < a2) {
+				goto L20;
+				}
+	/* L10: */
+			}
+	L20:
+			a2 *= 1.05;
+
+	/*              Rayleigh quotient residual bound. */
+
+			if (a2 < .563) {
+				s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+			}
+			}
+		} else if (*dmin__ == *dn2) {
+
+	/*           Case 5. */
+
+			*ttype = -5;
+			s = *dmin__ * .25;
+
+	/*           Compute contribution to norm squared from I > NN-2. */
+
+			np = nn - (*pp << 1);
+			b1 = z__[np - 2];
+			b2 = z__[np - 6];
+			gam = *dn2;
+			if (z__[np - 8] > b2 || z__[np - 4] > b1) {
+			return 0;
+			}
+			a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
+
+	/*           Approximate contribution to norm squared from I < NN-2. */
+
+			if (*n0 - *i0 > 2) {
+			b2 = z__[nn - 13] / z__[nn - 15];
+			a2 += b2;
+			i__1 = (*i0 << 2) - 1 + *pp;
+			for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
+				if (b2 == 0.) {
+				goto L40;
+				}
+				b1 = b2;
+				if (z__[i4] > z__[i4 - 2]) {
+				return 0;
+				}
+				b2 *= z__[i4] / z__[i4 - 2];
+				a2 += b2;
+				if (fmax(b2,b1) * 100. < a2 || .563 < a2) {
+				goto L40;
+				}
+	/* L30: */
+			}
+	L40:
+			a2 *= 1.05;
+			}
+
+			if (a2 < .563) {
+			s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+			}
+		} else {
+
+	/*           Case 6, no information to guide us. */
+
+			if (*ttype == -6) {
+			*g += (1. - *g) * .333;
+			} else if (*ttype == -18) {
+			*g = .083250000000000005;
+			} else {
+			*g = .25;
+			}
+			s = *g * *dmin__;
+			*ttype = -6;
+		}
+
+		} else if (*n0in == *n0 + 1) {
+
+	/*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
+
+		if (*dmin1 == *dn1 && *dmin2 == *dn2) {
+
+	/*           Cases 7 and 8. */
+
+			*ttype = -7;
+			s = *dmin1 * .333;
+			if (z__[nn - 5] > z__[nn - 7]) {
+			return 0;
+			}
+			b1 = z__[nn - 5] / z__[nn - 7];
+			b2 = b1;
+			if (b2 == 0.) {
+			goto L60;
+			}
+			i__1 = (*i0 << 2) - 1 + *pp;
+			for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
+			a2 = b1;
+			if (z__[i4] > z__[i4 - 2]) {
+				return 0;
+			}
+			b1 *= z__[i4] / z__[i4 - 2];
+			b2 += b1;
+			if (fmax(b1,a2) * 100. < b2) {
+				goto L60;
+			}
+	/* L50: */
+			}
+	L60:
+			b2 = sqrt(b2 * 1.05);
+	/* Computing 2nd power */
+			d__1 = b2;
+			a2 = *dmin1 / (d__1 * d__1 + 1.);
+			gap2 = *dmin2 * .5 - a2;
+			if (gap2 > 0. && gap2 > b2 * a2) {
+	/* Computing MAX */
+			d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+			s = fmax(d__1,d__2);
+			} else {
+	/* Computing MAX */
+			d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+			s = fmax(d__1,d__2);
+			*ttype = -8;
+			}
+		} else {
+
+	/*           Case 9. */
+
+			s = *dmin1 * .25;
+			if (*dmin1 == *dn1) {
+			s = *dmin1 * .5;
+			}
+			*ttype = -9;
+		}
+
+		} else if (*n0in == *n0 + 2) {
+
+	/*        Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
+
+	/*        Cases 10 and 11. */
+
+		if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
+			*ttype = -10;
+			s = *dmin2 * .333;
+			if (z__[nn - 5] > z__[nn - 7]) {
+			return 0;
+			}
+			b1 = z__[nn - 5] / z__[nn - 7];
+			b2 = b1;
+			if (b2 == 0.) {
+			goto L80;
+			}
+			i__1 = (*i0 << 2) - 1 + *pp;
+			for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
+			if (z__[i4] > z__[i4 - 2]) {
+				return 0;
+			}
+			b1 *= z__[i4] / z__[i4 - 2];
+			b2 += b1;
+			if (b1 * 100. < b2) {
+				goto L80;
+			}
+	/* L70: */
+			}
+	L80:
+			b2 = sqrt(b2 * 1.05);
+	/* Computing 2nd power */
+			d__1 = b2;
+			a2 = *dmin2 / (d__1 * d__1 + 1.);
+			gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
+				nn - 9]) - a2;
+			if (gap2 > 0. && gap2 > b2 * a2) {
+	/* Computing MAX */
+			d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+			s = fmax(d__1,d__2);
+			} else {
+	/* Computing MAX */
+			d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+			s = fmax(d__1,d__2);
+			}
+		} else {
+			s = *dmin2 * .25;
+			*ttype = -11;
+		}
+		} else if (*n0in > *n0 + 2) {
+
+	/*        Case 12, more than two eigenvalues deflated. No information. */
+
+		s = 0.;
+		*ttype = -12;
+		}
+
+		*tau = s;
+		return 0;
+
+	/*     End of ODSQ4 */
+
+	} /* odsq4_ */
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odsq5.hpp b/external/pmrrr/include/pmrrr/lapack/odsq5.hpp
new file mode 100644
index 0000000000..e7361978f6
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odsq5.hpp
@@ -0,0 +1,250 @@
+/**
+	C++ template version of LAPACK routine dlasq5.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSQ5_HPP__
+#define __ODSQ5_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odsq5(int *i0, int *n0, FloatingType *z__, 
+		int *pp, FloatingType *tau, FloatingType *dmin__, FloatingType *dmin1, 
+		FloatingType *dmin2, FloatingType *dn, FloatingType *dnm1, FloatingType *dnm2, 
+		 int *ieee)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2;
+
+		/* Local variables */
+		FloatingType d__;
+		int j4, j4p2;
+		FloatingType emin, temp;
+
+
+	/*  -- LAPACK routine (version 3.2)                                    -- */
+
+	/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
+	/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
+	/*  -- Berkeley                                                        -- */
+	/*  -- November 2008                                                   -- */
+
+	/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+	/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSQ5 computes one dqds transform in ping-pong form, one */
+	/*  version for IEEE machines another for non IEEE machines. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  I0    (input) INT */
+	/*        First index. */
+
+	/*  N0    (input) INT */
+	/*        Last index. */
+
+	/*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
+	/*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
+	/*        an extra argument. */
+
+	/*  PP    (input) INT */
+	/*        PP=0 for ping, PP=1 for pong. */
+
+	/*  TAU   (input) DOUBLE PRECISION */
+	/*        This is the shift. */
+
+	/*  DMIN  (output) DOUBLE PRECISION */
+	/*        Minimum value of d. */
+
+	/*  DMIN1 (output) DOUBLE PRECISION */
+	/*        Minimum value of d, excluding D( N0 ). */
+
+	/*  DMIN2 (output) DOUBLE PRECISION */
+	/*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
+
+	/*  DN    (output) DOUBLE PRECISION */
+	/*        d(N0), the last value of d. */
+
+	/*  DNM1  (output) DOUBLE PRECISION */
+	/*        d(N0-1). */
+
+	/*  DNM2  (output) DOUBLE PRECISION */
+	/*        d(N0-2). */
+
+	/*  IEEE  (input) INT */
+	/*        Flag for IEEE or non IEEE arithmetic. */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameter .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--z__;
+
+		/* Function Body */
+		if (*n0 - *i0 - 1 <= 0) {
+		return 0;
+		}
+
+		j4 = (*i0 << 2) + *pp - 3;
+		emin = z__[j4 + 4];
+		d__ = z__[j4] - *tau;
+		*dmin__ = d__;
+		*dmin1 = -z__[j4];
+
+		if (*ieee) {
+
+	/*        Code for IEEE arithmetic. */
+
+		if (*pp == 0) {
+			i__1 = (*n0 - 3) << 2;
+			for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+			z__[j4 - 2] = d__ + z__[j4 - 1];
+			temp = z__[j4 + 1] / z__[j4 - 2];
+			d__ = d__ * temp - *tau;
+			*dmin__ = fmin(*dmin__,d__);
+			z__[j4] = z__[j4 - 1] * temp;
+	/* Computing MIN */
+			d__1 = z__[j4];
+			emin = fmin(d__1,emin);
+	/* L10: */
+			}
+		} else {
+			i__1 = (*n0 - 3) << 2;
+			for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+			z__[j4 - 3] = d__ + z__[j4];
+			temp = z__[j4 + 2] / z__[j4 - 3];
+			d__ = d__ * temp - *tau;
+			*dmin__ = fmin(*dmin__,d__);
+			z__[j4 - 1] = z__[j4] * temp;
+	/* Computing MIN */
+			d__1 = z__[j4 - 1];
+			emin = fmin(d__1,emin);
+	/* L20: */
+			}
+		}
+
+	/*        Unroll last two steps. */
+
+		*dnm2 = d__;
+		*dmin2 = *dmin__;
+		j4 = ((*n0 - 2) << 2) - *pp;
+		j4p2 = j4 + (*pp << 1) - 1;
+		z__[j4 - 2] = *dnm2 + z__[j4p2];
+		z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+		*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+		*dmin__ = fmin(*dmin__,*dnm1);
+
+		*dmin1 = *dmin__;
+		j4 += 4;
+		j4p2 = j4 + (*pp << 1) - 1;
+		z__[j4 - 2] = *dnm1 + z__[j4p2];
+		z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+		*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+		*dmin__ = fmin(*dmin__,*dn);
+
+		} else {
+
+	/*        Code for non IEEE arithmetic. */
+
+		if (*pp == 0) {
+			i__1 = (*n0 - 3) << 2;
+			for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+			z__[j4 - 2] = d__ + z__[j4 - 1];
+			if (d__ < 0.) {
+				return 0;
+			} else {
+				z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+				d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
+			}
+			*dmin__ = fmin(*dmin__,d__);
+	/* Computing MIN */
+			d__1 = emin, d__2 = z__[j4];
+			emin = fmin(d__1,d__2);
+	/* L30: */
+			}
+		} else {
+			i__1 = (*n0 - 3) << 2;
+			for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+			z__[j4 - 3] = d__ + z__[j4];
+			if (d__ < 0.) {
+				return 0;
+			} else {
+				z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+				d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
+			}
+			*dmin__ = fmin(*dmin__,d__);
+	/* Computing MIN */
+			d__1 = emin, d__2 = z__[j4 - 1];
+			emin = fmin(d__1,d__2);
+	/* L40: */
+			}
+		}
+
+	/*        Unroll last two steps. */
+
+		*dnm2 = d__;
+		*dmin2 = *dmin__;
+		j4 = ((*n0 - 2) << 2) - *pp;
+		j4p2 = j4 + (*pp << 1) - 1;
+		z__[j4 - 2] = *dnm2 + z__[j4p2];
+		if (*dnm2 < 0.) {
+			return 0;
+		} else {
+			z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+			*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+		}
+		*dmin__ = fmin(*dmin__,*dnm1);
+
+		*dmin1 = *dmin__;
+		j4 += 4;
+		j4p2 = j4 + (*pp << 1) - 1;
+		z__[j4 - 2] = *dnm1 + z__[j4p2];
+		if (*dnm1 < 0.) {
+			return 0;
+		} else {
+			z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+			*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+		}
+		*dmin__ = fmin(*dmin__,*dn);
+
+		}
+
+		z__[j4 + 2] = *dn;
+		z__[(*n0 << 2) - *pp] = emin;
+		return 0;
+
+	/*     End of ODSQ5 */
+
+	} /* odsq5_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odsq6.hpp b/external/pmrrr/include/pmrrr/lapack/odsq6.hpp
new file mode 100644
index 0000000000..f613a90c88
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odsq6.hpp
@@ -0,0 +1,223 @@
+/**
+	C++ template version of LAPACK routine dlasq6.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSQ6_HPP__
+#define __ODSQ6_HPP__
+
+#include <limits>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odsq6(int *i0, int *n0, FloatingType *z__, 
+		int *pp, FloatingType *dmin__, FloatingType *dmin1, FloatingType *dmin2, 
+		 FloatingType *dn, FloatingType *dnm1, FloatingType *dnm2)
+	{
+		/* System generated locals */
+		int i__1;
+		FloatingType d__1, d__2;
+
+		/* Local variables */
+		FloatingType d__;
+		int j4, j4p2;
+		FloatingType emin, temp;
+		//    extern FloatingType odmch_(char *);
+		FloatingType safmin;
+
+
+	/*  -- LAPACK routine (version 3.2)                                    -- */
+
+	/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
+	/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
+	/*  -- Berkeley                                                        -- */
+	/*  -- November 2008                                                   -- */
+
+	/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+	/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSQ6 computes one dqd (shift equal to zero) transform in */
+	/*  ping-pong form, with protection against underflow and overflow. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  I0    (input) INT */
+	/*        First index. */
+
+	/*  N0    (input) INT */
+	/*        Last index. */
+
+	/*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
+	/*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
+	/*        an extra argument. */
+
+	/*  PP    (input) INT */
+	/*        PP=0 for ping, PP=1 for pong. */
+
+	/*  DMIN  (output) DOUBLE PRECISION */
+	/*        Minimum value of d. */
+
+	/*  DMIN1 (output) DOUBLE PRECISION */
+	/*        Minimum value of d, excluding D( N0 ). */
+
+	/*  DMIN2 (output) DOUBLE PRECISION */
+	/*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
+
+	/*  DN    (output) DOUBLE PRECISION */
+	/*        d(N0), the last value of d. */
+
+	/*  DNM1  (output) DOUBLE PRECISION */
+	/*        d(N0-1). */
+
+	/*  DNM2  (output) DOUBLE PRECISION */
+	/*        d(N0-2). */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameter .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. External Function .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--z__;
+
+		/* Function Body */
+		if (*n0 - *i0 - 1 <= 0) {
+		return 0;
+		}
+
+		safmin = std::numeric_limits<FloatingType>::min(); // odmch_("Safe minimum");
+		j4 = (*i0 << 2) + *pp - 3;
+		emin = z__[j4 + 4];
+		d__ = z__[j4];
+		*dmin__ = d__;
+
+		if (*pp == 0) {
+		i__1 = (*n0 - 3) << 2;
+		for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+			z__[j4 - 2] = d__ + z__[j4 - 1];
+			if (z__[j4 - 2] == 0.) {
+			z__[j4] = 0.;
+			d__ = z__[j4 + 1];
+			*dmin__ = d__;
+			emin = 0.;
+			} else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 
+				- 2] < z__[j4 + 1]) {
+			temp = z__[j4 + 1] / z__[j4 - 2];
+			z__[j4] = z__[j4 - 1] * temp;
+			d__ *= temp;
+			} else {
+			z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+			d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
+			}
+			*dmin__ = fmin(*dmin__,d__);
+	/* Computing MIN */
+			d__1 = emin, d__2 = z__[j4];
+			emin = fmin(d__1,d__2);
+	/* L10: */
+		}
+		} else {
+		i__1 = (*n0 - 3) << 2;
+		for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+			z__[j4 - 3] = d__ + z__[j4];
+			if (z__[j4 - 3] == 0.) {
+			z__[j4 - 1] = 0.;
+			d__ = z__[j4 + 2];
+			*dmin__ = d__;
+			emin = 0.;
+			} else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 
+				- 3] < z__[j4 + 2]) {
+			temp = z__[j4 + 2] / z__[j4 - 3];
+			z__[j4 - 1] = z__[j4] * temp;
+			d__ *= temp;
+			} else {
+			z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+			d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
+			}
+			*dmin__ = fmin(*dmin__,d__);
+	/* Computing MIN */
+			d__1 = emin, d__2 = z__[j4 - 1];
+			emin = fmin(d__1,d__2);
+	/* L20: */
+		}
+		}
+
+	/*     Unroll last two steps. */
+
+		*dnm2 = d__;
+		*dmin2 = *dmin__;
+		j4 = ((*n0 - 2) << 2) - *pp;
+		j4p2 = j4 + (*pp << 1) - 1;
+		z__[j4 - 2] = *dnm2 + z__[j4p2];
+		if (z__[j4 - 2] == 0.) {
+		z__[j4] = 0.;
+		*dnm1 = z__[j4p2 + 2];
+		*dmin__ = *dnm1;
+		emin = 0.;
+		} else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < 
+			z__[j4p2 + 2]) {
+		temp = z__[j4p2 + 2] / z__[j4 - 2];
+		z__[j4] = z__[j4p2] * temp;
+		*dnm1 = *dnm2 * temp;
+		} else {
+		z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+		*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
+		}
+		*dmin__ = fmin(*dmin__,*dnm1);
+
+		*dmin1 = *dmin__;
+		j4 += 4;
+		j4p2 = j4 + (*pp << 1) - 1;
+		z__[j4 - 2] = *dnm1 + z__[j4p2];
+		if (z__[j4 - 2] == 0.) {
+		z__[j4] = 0.;
+		*dn = z__[j4p2 + 2];
+		*dmin__ = *dn;
+		emin = 0.;
+		} else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < 
+			z__[j4p2 + 2]) {
+		temp = z__[j4p2 + 2] / z__[j4 - 2];
+		z__[j4] = z__[j4p2] * temp;
+		*dn = *dnm1 * temp;
+		} else {
+		z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+		*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
+		}
+		*dmin__ = fmin(*dmin__,*dn);
+
+		z__[j4 + 2] = *dn;
+		z__[(*n0 << 2) - *pp] = emin;
+		return 0;
+
+	/*     End of ODSQ6 */
+
+	} /* odsq6_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odsrt.hpp b/external/pmrrr/include/pmrrr/lapack/odsrt.hpp
new file mode 100644
index 0000000000..869a6da155
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odsrt.hpp
@@ -0,0 +1,298 @@
+/**
+	C++ template version of LAPACK routine dlasrt.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSRT_HPP__
+#define __ODSRT_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+#include <pmrrr/lapack/lapack.hpp>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odsrt(const char *id, int *n, FloatingType *d__, int *
+		info)
+	{
+		/* System generated locals */
+		int i__1, i__2;
+
+		/* Local variables */
+		int i__, j;
+		FloatingType d1, d2, d3;
+		int dir;
+		FloatingType tmp;
+		int endd;
+		//extern int olsame(char *, char *);
+		int stack[64]	/* was [2][32] */;
+		FloatingType dmnmx;
+		int start;
+		//extern /* Subroutine */ int oerbla_(char *, int *);
+		int stkpnt;
+
+
+	/*  -- LAPACK routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  Sort the numbers in D in increasing order (if ID = 'I') or */
+	/*  in decreasing order (if ID = 'D' ). */
+
+	/*  Use Quick Sort, reverting to Insertion sort on arrays of */
+	/*  size <= 20. Dimension of STACK limits N to about 2**32. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  ID      (input) CHARACTER*1 */
+	/*          = 'I': sort D in increasing order; */
+	/*          = 'D': sort D in decreasing order. */
+
+	/*  N       (input) INT */
+	/*          The length of the array D. */
+
+	/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the array to be sorted. */
+	/*          On exit, D has been sorted into increasing order */
+	/*          (D(1) <= ... <= D(N) ) or into decreasing order */
+	/*          (D(1) >= ... >= D(N) ), depending on ID. */
+
+	/*  INFO    (output) INT */
+	/*          = 0:  successful exit */
+	/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Local Arrays .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     Test the input paramters. */
+
+		/* Parameter adjustments */
+		--d__;
+
+		/* Function Body */
+		*info = 0;
+		dir = -1;
+		if (olsame(id, "D")) {
+		dir = 0;
+		} else if (olsame(id, "I")) {
+		dir = 1;
+		}
+		if (dir == -1) {
+		*info = -1;
+		} else if (*n < 0) {
+		*info = -2;
+		}
+		if (*info != 0) {
+		i__1 = -(*info);
+		oerbla("ODSRT", &i__1);
+		return 0;
+		}
+
+	/*     Quick return if possible */
+
+		if (*n <= 1) {
+		return 0;
+		}
+
+		stkpnt = 1;
+		stack[0] = 1;
+		stack[1] = *n;
+	L10:
+		start = stack[(stkpnt << 1) - 2];
+		endd = stack[(stkpnt << 1) - 1];
+		--stkpnt;
+		if (endd - start <= 20 && endd - start > 0) {
+
+	/*        Do Insertion sort on D( START:ENDD ) */
+
+		if (dir == 0) {
+
+	/*           Sort into decreasing order */
+
+			i__1 = endd;
+			for (i__ = start + 1; i__ <= i__1; ++i__) {
+			i__2 = start + 1;
+			for (j = i__; j >= i__2; --j) {
+				if (d__[j] > d__[j - 1]) {
+				dmnmx = d__[j];
+				d__[j] = d__[j - 1];
+				d__[j - 1] = dmnmx;
+				} else {
+				goto L30;
+				}
+	/* L20: */
+			}
+	L30:
+			;
+			}
+
+		} else {
+
+	/*           Sort into increasing order */
+
+			i__1 = endd;
+			for (i__ = start + 1; i__ <= i__1; ++i__) {
+			i__2 = start + 1;
+			for (j = i__; j >= i__2; --j) {
+				if (d__[j] < d__[j - 1]) {
+				dmnmx = d__[j];
+				d__[j] = d__[j - 1];
+				d__[j - 1] = dmnmx;
+				} else {
+				goto L50;
+				}
+	/* L40: */
+			}
+	L50:
+			;
+			}
+
+		}
+
+		} else if (endd - start > 20) {
+
+	/*        Partition D( START:ENDD ) and stack parts, largest one first */
+
+	/*        Choose partition entry as median of 3 */
+
+		d1 = d__[start];
+		d2 = d__[endd];
+		i__ = (start + endd) / 2;
+		d3 = d__[i__];
+		if (d1 < d2) {
+			if (d3 < d1) {
+			dmnmx = d1;
+			} else if (d3 < d2) {
+			dmnmx = d3;
+			} else {
+			dmnmx = d2;
+			}
+		} else {
+			if (d3 < d2) {
+			dmnmx = d2;
+			} else if (d3 < d1) {
+			dmnmx = d3;
+			} else {
+			dmnmx = d1;
+			}
+		}
+
+		if (dir == 0) {
+
+	/*           Sort into decreasing order */
+
+			i__ = start - 1;
+			j = endd + 1;
+	L60:
+	L70:
+			--j;
+			if (d__[j] < dmnmx) {
+			goto L70;
+			}
+	L80:
+			++i__;
+			if (d__[i__] > dmnmx) {
+			goto L80;
+			}
+			if (i__ < j) {
+			tmp = d__[i__];
+			d__[i__] = d__[j];
+			d__[j] = tmp;
+			goto L60;
+			}
+			if (j - start > endd - j - 1) {
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = start;
+			stack[(stkpnt << 1) - 1] = j;
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = j + 1;
+			stack[(stkpnt << 1) - 1] = endd;
+			} else {
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = j + 1;
+			stack[(stkpnt << 1) - 1] = endd;
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = start;
+			stack[(stkpnt << 1) - 1] = j;
+			}
+		} else {
+
+	/*           Sort into increasing order */
+
+			i__ = start - 1;
+			j = endd + 1;
+	L90:
+	L100:
+			--j;
+			if (d__[j] > dmnmx) {
+			goto L100;
+			}
+	L110:
+			++i__;
+			if (d__[i__] < dmnmx) {
+			goto L110;
+			}
+			if (i__ < j) {
+			tmp = d__[i__];
+			d__[i__] = d__[j];
+			d__[j] = tmp;
+			goto L90;
+			}
+			if (j - start > endd - j - 1) {
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = start;
+			stack[(stkpnt << 1) - 1] = j;
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = j + 1;
+			stack[(stkpnt << 1) - 1] = endd;
+			} else {
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = j + 1;
+			stack[(stkpnt << 1) - 1] = endd;
+			++stkpnt;
+			stack[(stkpnt << 1) - 2] = start;
+			stack[(stkpnt << 1) - 1] = j;
+			}
+		}
+		}
+		if (stkpnt > 0) {
+		goto L10;
+		}
+		return 0;
+
+	/*     End of ODSRT */
+
+	} /* odsrt_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odssq.hpp b/external/pmrrr/include/pmrrr/lapack/odssq.hpp
new file mode 100644
index 0000000000..d60f7f3445
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odssq.hpp
@@ -0,0 +1,125 @@
+/**
+	C++ template version of LAPACK routine dlassq.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSSQ_HPP__
+#define __ODSSQ_HPP__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odssq(int *n, FloatingType *x, int *incx, 
+		FloatingType *scale, FloatingType *sumsq)
+	{
+		/* System generated locals */
+		int i__1, i__2;
+		FloatingType d__1;
+
+		/* Local variables */
+		int ix;
+		FloatingType absxi;
+
+	/*  -- LAPACK auxiliary routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSSQ  returns the values  scl  and  smsq  such that */
+
+	/*     ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */
+
+	/*  where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is */
+	/*  assumed to be non-negative and  scl  returns the value */
+
+	/*     scl = max( scale, abs( x( i ) ) ). */
+
+	/*  scale and sumsq must be supplied in SCALE and SUMSQ and */
+	/*  scl and smsq are overwritten on SCALE and SUMSQ respectively. */
+
+	/*  The routine makes only one pass through the vector x. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  N       (input) INT */
+	/*          The number of elements to be used from the vector X. */
+
+	/*  X       (input) DOUBLE PRECISION array, dimension (N) */
+	/*          The vector for which a scaled sum of squares is computed. */
+	/*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */
+
+	/*  INCX    (input) INT */
+	/*          The increment between successive values of the vector X. */
+	/*          INCX > 0. */
+
+	/*  SCALE   (input/output) DOUBLE PRECISION */
+	/*          On entry, the value  scale  in the equation above. */
+	/*          On exit, SCALE is overwritten with  scl , the scaling factor */
+	/*          for the sum of squares. */
+
+	/*  SUMSQ   (input/output) DOUBLE PRECISION */
+	/*          On entry, the value  sumsq  in the equation above. */
+	/*          On exit, SUMSQ is overwritten with  smsq , the basic sum of */
+	/*          squares from which  scl  has been factored out. */
+
+	/* ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+		/* Parameter adjustments */
+		--x;
+
+		/* Function Body */
+		if (*n > 0) {
+		i__1 = (*n - 1) * *incx + 1;
+		i__2 = *incx;
+		for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+			if (x[ix] != 0.) {
+			absxi = (d__1 = x[ix], fabs(d__1));
+			if (*scale < absxi) {
+	/* Computing 2nd power */
+				d__1 = *scale / absxi;
+				*sumsq = *sumsq * (d__1 * d__1) + 1;
+				*scale = absxi;
+			} else {
+	/* Computing 2nd power */
+				d__1 = absxi / *scale;
+				*sumsq += d__1 * d__1;
+			}
+			}
+	/* L10: */
+		}
+		}
+		return 0;
+
+	/*     End of ODSSQ */
+
+	} /* odssq_ */
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/lapack/odstmr.hpp b/external/pmrrr/include/pmrrr/lapack/odstmr.hpp
new file mode 100644
index 0000000000..17d2f5d1c5
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/lapack/odstmr.hpp
@@ -0,0 +1,723 @@
+/**
+	C++ template version of LAPACK routine dstemr.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#ifndef __ODSTMR_HPP__
+#define __ODSTMR_HPP__
+
+#include <limits>
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include <cfloat>
+#include <cassert>
+
+#include <pmrrr/lapack/lapack.hpp>
+#include <pmrrr/lapack/odrre.hpp>
+#include <pmrrr/lapack/odrrc.hpp>
+#include <pmrrr/lapack/odrrj.hpp>
+#include <pmrrr/lapack/odrrr.hpp>
+#include <pmrrr/lapack/ode2.hpp>
+#include <pmrrr/lapack/odev2.hpp>
+#include <pmrrr/lapack/odrrv.hpp>
+#include <pmrrr/lapack/odsrt.hpp>
+#include <pmrrr/lapack/odnst.hpp>
+#include <pmrrr/blas/odcpy.hpp>
+#include <pmrrr/blas/odscal.hpp>
+#include <pmrrr/blas/odswap.hpp>
+
+/* Table of constant values */
+#define TRUE_ (1)
+#define FALSE_ (0)
+#define iabs(a) ( (a) > (0) ? (a) : (-a) )
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	template<typename FloatingType>
+	int odstmr(const char *jobz, const char *range, int *n, FloatingType *d__, 
+		FloatingType *e, FloatingType *vl, FloatingType *vu, int *il, 
+		int *iu, int *m, FloatingType *w, FloatingType *z__, int *ldz, 
+		 int *nzc, int *isuppz, int *tryrac, FloatingType *work, 
+		int *lwork, int *iwork, int *liwork, int *info)
+	{
+		/* Moved here to include floating type type */
+		static FloatingType c_b18 = .001;
+		static int c__1 = 1;
+		/* System generated locals */
+		int z_dim1, z_offset, i__1, i__2;
+		FloatingType d__1, d__2;
+
+		/* Builtin functions */
+		// FloatingType sqrt(FloatingType);
+
+		/* Local variables */
+		int i__, j;
+		FloatingType r1, r2;
+		int jj;
+		FloatingType cs;
+		int in;
+		FloatingType sn, wl, wu;
+		int iil, iiu;
+		FloatingType eps, tmp;
+		int indd, iend, jblk, wend;
+		FloatingType rmin, rmax;
+		int itmp;
+		FloatingType tnrm;
+		int inde2, itmp2;
+		FloatingType rtol1, rtol2;
+		FloatingType scale;
+		int indgp;
+		int iinfo, iindw, ilast;
+		int lwmin;
+		int wantz;
+		int alleig;
+		int ibegin;
+		int indeig;
+		int iindbl;
+		int valeig;
+		int wbegin;
+		FloatingType safmin;
+		FloatingType bignum;
+		int inderr, iindwk, indgrs, offset;
+		FloatingType thresh;
+		int iinspl, ifirst, indwrk, liwmin, nzcmin;
+		FloatingType pivmin;
+		int nsplit;
+		FloatingType smlnum;
+		int lquery, zquery;
+
+
+	/*  -- LAPACK computational routine (version 3.2) -- */
+	/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+	/*     November 2006 */
+
+	/*     .. Scalar Arguments .. */
+	/*     .. */
+	/*     .. Array Arguments .. */
+	/*     .. */
+
+	/*  Purpose */
+	/*  ======= */
+
+	/*  ODSTMR computes selected eigenvalues and, optionally, eigenvectors */
+	/*  of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */
+	/*  a well defined set of pairwise different real eigenvalues, the corresponding */
+	/*  real eigenvectors are pairwise orthogonal. */
+
+	/*  The spectrum may be computed either completely or partially by specifying */
+	/*  either an interval (VL,VU] or a range of indices IL:IU for the desired */
+	/*  eigenvalues. */
+
+	/*  Depending on the number of desired eigenvalues, these are computed either */
+	/*  by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are */
+	/*  computed by the use of various suitable L D L^T factorizations near clusters */
+	/*  of close eigenvalues (referred to as RRRs, Relatively Robust */
+	/*  Representations). An informal sketch of the algorithm follows. */
+
+	/*  For each unreduced block (submatrix) of T, */
+	/*     (a) Compute T - sigma I  = L D L^T, so that L and D */
+	/*         define all the wanted eigenvalues to high relative accuracy. */
+	/*         This means that small relative changes in the entries of D and L */
+	/*         cause only small relative changes in the eigenvalues and */
+	/*         eigenvectors. The standard (unfactored) representation of the */
+	/*         tridiagonal matrix T does not have this property in general. */
+	/*     (b) Compute the eigenvalues to suitable accuracy. */
+	/*         If the eigenvectors are desired, the algorithm attains full */
+	/*         accuracy of the computed eigenvalues only right before */
+	/*         the corresponding vectors have to be computed, see steps c) and d). */
+	/*     (c) For each cluster of close eigenvalues, select a new */
+	/*         shift close to the cluster, find a new factorization, and refine */
+	/*         the shifted eigenvalues to suitable accuracy. */
+	/*     (d) For each eigenvalue with a large enough relative separation compute */
+	/*         the corresponding eigenvector by forming a rank revealing twisted */
+	/*         factorization. Go back to (c) for any clusters that remain. */
+
+	/*  For more details, see: */
+	/*  - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */
+	/*    to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */
+	/*    Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */
+	/*  - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */
+	/*    Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */
+	/*    2004.  Also LAPACK Working Note 154. */
+	/*  - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */
+	/*    tridiagonal eigenvalue/eigenvector problem", */
+	/*    Computer Science Division Technical Report No. UCB/CSD-97-971, */
+	/*    UC Berkeley, May 1997. */
+
+	/*  Notes: */
+	/*  1.ODSTMR works only on machines which follow IEEE-754 */
+	/*  floating-point standard in their handling of infinities and NaNs. */
+	/*  This permits the use of efficient inner loops avoiding a check for */
+	/*  zero divisors. */
+
+	/*  Arguments */
+	/*  ========= */
+
+	/*  JOBZ    (input) CHARACTER*1 */
+	/*          = 'N':  Compute eigenvalues only; */
+	/*          = 'V':  Compute eigenvalues and eigenvectors. */
+
+	/*  RANGE   (input) CHARACTER*1 */
+	/*          = 'A': all eigenvalues will be found. */
+	/*          = 'V': all eigenvalues in the half-open interval (VL,VU] */
+	/*                 will be found. */
+	/*          = 'I': the IL-th through IU-th eigenvalues will be found. */
+
+	/*  N       (input) INT */
+	/*          The order of the matrix.  N >= 0. */
+
+	/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the N diagonal elements of the tridiagonal matrix */
+	/*          T. On exit, D is overwritten. */
+
+	/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
+	/*          On entry, the (N-1) subdiagonal elements of the tridiagonal */
+	/*          matrix T in elements 1 to N-1 of E. E(N) need not be set on */
+	/*          input, but is used internally as workspace. */
+	/*          On exit, E is overwritten. */
+
+	/*  VL      (input) DOUBLE PRECISION */
+	/*  VU      (input) DOUBLE PRECISION */
+	/*          If RANGE='V', the lower and upper bounds of the interval to */
+	/*          be searched for eigenvalues. VL < VU. */
+	/*          Not referenced if RANGE = 'A' or 'I'. */
+
+	/*  IL      (input) INT */
+	/*  IU      (input) INT */
+	/*          If RANGE='I', the indices (in ascending order) of the */
+	/*          smallest and largest eigenvalues to be returned. */
+	/*          1 <= IL <= IU <= N, if N > 0. */
+	/*          Not referenced if RANGE = 'A' or 'V'. */
+
+	/*  M       (output) INT */
+	/*          The total number of eigenvalues found.  0 <= M <= N. */
+	/*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
+
+	/*  W       (output) DOUBLE PRECISION array, dimension (N) */
+	/*          The first M elements contain the selected eigenvalues in */
+	/*          ascending order. */
+
+	/*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
+	/*          If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */
+	/*          contain the orthonormal eigenvectors of the matrix T */
+	/*          corresponding to the selected eigenvalues, with the i-th */
+	/*          column of Z holding the eigenvector associated with W(i). */
+	/*          If JOBZ = 'N', then Z is not referenced. */
+	/*          Note: the user must ensure that at least max(1,M) columns are */
+	/*          supplied in the array Z; if RANGE = 'V', the exact value of M */
+	/*          is not known in advance and can be computed with a workspace */
+	/*          query by setting NZC = -1, see below. */
+
+	/*  LDZ     (input) INT */
+	/*          The leading dimension of the array Z.  LDZ >= 1, and if */
+	/*          JOBZ = 'V', then LDZ >= max(1,N). */
+
+	/*  NZC     (input) INT */
+	/*          The number of eigenvectors to be held in the array Z. */
+	/*          If RANGE = 'A', then NZC >= max(1,N). */
+	/*          If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. */
+	/*          If RANGE = 'I', then NZC >= IU-IL+1. */
+	/*          If NZC = -1, then a workspace query is assumed; the */
+	/*          routine calculates the number of columns of the array Z that */
+	/*          are needed to hold the eigenvectors. */
+	/*          This value is returned as the first entry of the Z array, and */
+	/*          no error message related to NZC is issued by OERBLA. */
+
+	/*  ISUPPZ  (output) INT ARRAY, dimension ( 2*max(1,M) ) */
+	/*          The support of the eigenvectors in Z, i.e., the indices */
+	/*          indicating the nonzero elements in Z. The i-th computed eigenvector */
+	/*          is nonzero only in elements ISUPPZ( 2*i-1 ) through */
+	/*          ISUPPZ( 2*i ). This is relevant in the case when the matrix */
+	/*          is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */
+
+	/*  TRYRAC  (input/output) INT */
+	/*          If TRYRAC.EQ..TRUE., indicates that the code should check whether */
+	/*          the tridiagonal matrix defines its eigenvalues to high relative */
+	/*          accuracy.  If so, the code uses relative-accuracy preserving */
+	/*          algorithms that might be (a bit) slower depending on the matrix. */
+	/*          If the matrix does not define its eigenvalues to high relative */
+	/*          accuracy, the code can uses possibly faster algorithms. */
+	/*          If TRYRAC.EQ..FALSE., the code is not required to guarantee */
+	/*          relatively accurate eigenvalues and can use the fastest possible */
+	/*          techniques. */
+	/*          On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix */
+	/*          does not define its eigenvalues to high relative accuracy. */
+
+	/*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */
+	/*          On exit, if INFO = 0, WORK(1) returns the optimal */
+	/*          (and minimal) LWORK. */
+
+	/*  LWORK   (input) INT */
+	/*          The dimension of the array WORK. LWORK >= max(1,18*N) */
+	/*          if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */
+	/*          If LWORK = -1, then a workspace query is assumed; the routine */
+	/*          only calculates the optimal size of the WORK array, returns */
+	/*          this value as the first entry of the WORK array, and no error */
+	/*          message related to LWORK is issued by OERBLA. */
+
+	/*  IWORK   (workspace/output) INT array, dimension (LIWORK) */
+	/*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
+
+	/*  LIWORK  (input) INT */
+	/*          The dimension of the array IWORK.  LIWORK >= max(1,10*N) */
+	/*          if the eigenvectors are desired, and LIWORK >= max(1,8*N) */
+	/*          if only the eigenvalues are to be computed. */
+	/*          If LIWORK = -1, then a workspace query is assumed; the */
+	/*          routine only calculates the optimal size of the IWORK array, */
+	/*          returns this value as the first entry of the IWORK array, and */
+	/*          no error message related to LIWORK is issued by OERBLA. */
+
+	/*  INFO    (output) INT */
+	/*          On exit, INFO */
+	/*          = 0:  successful exit */
+	/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
+	/*          > 0:  if INFO = 1X, internal error in ODRRE, */
+	/*                if INFO = 2X, internal error in ODRRV. */
+	/*                Here, the digit X = ABS( IINFO ) < 10, where IINFO is */
+	/*                the nonzero error code returned by ODRRE or */
+	/*                ODRRV, respectively. */
+
+
+	/*  Further Details */
+	/*  =============== */
+
+	/*  Based on contributions by */
+	/*     Beresford Parlett, University of California, Berkeley, USA */
+	/*     Jim Demmel, University of California, Berkeley, USA */
+	/*     Inderjit Dhillon, University of Texas, Austin, USA */
+	/*     Osni Marques, LBNL/NERSC, USA */
+	/*     Christof Voemel, University of California, Berkeley, USA */
+
+	/*  ===================================================================== */
+
+	/*     .. Parameters .. */
+	/*     .. */
+	/*     .. Local Scalars .. */
+	/*     .. */
+	/*     .. */
+	/*     .. External Functions .. */
+	/*     .. */
+	/*     .. External Subroutines .. */
+	/*     .. */
+	/*     .. Intrinsic Functions .. */
+	/*     .. */
+	/*     .. Executable Statements .. */
+
+	/*     Test the input parameters. */
+
+		/* Parameter adjustments */
+		--d__;
+		--e;
+		--w;
+		z_dim1 = *ldz;
+		z_offset = 1 + z_dim1;
+		z__ -= z_offset;
+		--isuppz;
+		--work;
+		--iwork;
+
+		/* Function Body */
+		wantz = olsame(jobz, "V");
+		alleig = olsame(range, "A");
+		valeig = olsame(range, "V");
+		indeig = olsame(range, "I");
+
+		lquery = *lwork == -1 || *liwork == -1;
+		zquery = *nzc == -1;
+	/*     ODSTMR needs WORK of size 6*N, IWORK of size 3*N. */
+	/*     In addition, ODRRE needs WORK of size 6*N, IWORK of size 5*N. */
+	/*     Furthermore, ODRRV needs WORK of size 12*N, IWORK of size 7*N. */
+		if (wantz) {
+		lwmin = *n * 18;
+		liwmin = *n * 10;
+		} else {
+	/*        need less workspace if only the eigenvalues are wanted */
+		lwmin = *n * 12;
+		liwmin = *n << 3;
+		}
+		wl = 0.;
+		wu = 0.;
+		iil = 0;
+		iiu = 0;
+		if (valeig) {
+	/*        We do not reference VL, VU in the cases RANGE = 'I','A' */
+	/*        The interval (WL, WU] contains all the wanted eigenvalues. */
+	/*        It is either given by the user or computed in ODRRE. */
+		wl = *vl;
+		wu = *vu;
+		} else if (indeig) {
+	/*        We do not reference IL, IU in the cases RANGE = 'V','A' */
+		iil = *il;
+		iiu = *iu;
+		}
+
+		*info = 0;
+		if (! (wantz || olsame(jobz, "N"))) {
+		*info = -1;
+		} else if (! (alleig || valeig || indeig)) {
+		*info = -2;
+		} else if (*n < 0) {
+		*info = -3;
+		} else if (valeig && *n > 0 && wu <= wl) {
+		*info = -7;
+		} else if (indeig && (iil < 1 || iil > *n)) {
+		*info = -8;
+		} else if (indeig && (iiu < iil || iiu > *n)) {
+		*info = -9;
+		} else if (*ldz < 1 || (wantz && *ldz < *n)) {
+		*info = -13;
+		} else if (*lwork < lwmin && ! lquery) {
+		*info = -17;
+		} else if (*liwork < liwmin && ! lquery) {
+		*info = -19;
+		}
+
+	/*     Get machine constants. */
+
+		safmin = std::numeric_limits<FloatingType>::min(); // safmin = odmch_("Safe minimum");
+		eps = std::numeric_limits<FloatingType>::epsilon(); // eps = odmch_("Precision");
+		smlnum = safmin / eps;
+		bignum = 1. / smlnum;
+		rmin = sqrt(smlnum);
+	/* Computing MIN */
+		d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin));
+		rmax = fmin(d__1,d__2);
+
+		if (*info == 0) {
+		work[1] = (FloatingType) lwmin;
+		iwork[1] = liwmin;
+
+		if (wantz && alleig) {
+			nzcmin = *n;
+		} else if (wantz && valeig) {
+			odrrc("T", n, vl, vu, &d__[1], &e[1], &safmin, &nzcmin, &itmp, &
+				itmp2, info);
+		} else if (wantz && indeig) {
+			nzcmin = iiu - iil + 1;
+		} else {
+	/*           WANTZ .EQ. FALSE. */
+			nzcmin = 0;
+		}
+		if (zquery && *info == 0) {
+			z__[z_dim1 + 1] = (FloatingType) nzcmin;
+		} else if (*nzc < nzcmin && ! zquery) {
+			*info = -14;
+		}
+		}
+		if (*info != 0) {
+
+		i__1 = -(*info);
+		oerbla("ODSTMR", &i__1);
+
+		return 0;
+		} else if (lquery || zquery) {
+		return 0;
+		}
+
+	/*     Handle N = 0, 1, and 2 cases immediately */
+
+		*m = 0;
+		if (*n == 0) {
+		return 0;
+		}
+
+		if (*n == 1) {
+		if (alleig || indeig) {
+			*m = 1;
+			w[1] = d__[1];
+		} else {
+			if (wl < d__[1] && wu >= d__[1]) {
+			*m = 1;
+			w[1] = d__[1];
+			}
+		}
+		if (wantz && ! zquery) {
+			z__[z_dim1 + 1] = 1.;
+			isuppz[1] = 1;
+			isuppz[2] = 1;
+		}
+		return 0;
+		}
+
+		if (*n == 2) {
+		if (! wantz) {
+			ode2(&d__[1], &e[1], &d__[2], &r1, &r2);
+		} else if (wantz && ! zquery) {
+			odev2(&d__[1], &e[1], &d__[2], &r1, &r2, &cs, &sn);
+		}
+		if (alleig || (valeig && r2 > wl && r2 <= wu) || (indeig && iil == 1)) {
+			++(*m);
+			w[*m] = r2;
+			if (wantz && ! zquery) {
+			z__[*m * z_dim1 + 1] = -sn;
+			z__[*m * z_dim1 + 2] = cs;
+	/*              Note: At most one of SN and CS can be zero. */
+			if (sn != 0.) {
+				if (cs != 0.) {
+				isuppz[(*m << 1) - 1] = 1;
+				isuppz[(*m << 1) - 1] = 2;
+				} else {
+				isuppz[(*m << 1) - 1] = 1;
+				isuppz[(*m << 1) - 1] = 1;
+				}
+			} else {
+				isuppz[(*m << 1) - 1] = 2;
+				isuppz[*m * 2] = 2;
+			}
+			}
+		}
+		if (alleig || (valeig && r1 > wl && r1 <= wu) || (indeig && iiu == 2)) {
+			++(*m);
+			w[*m] = r1;
+			if (wantz && ! zquery) {
+			z__[*m * z_dim1 + 1] = cs;
+			z__[*m * z_dim1 + 2] = sn;
+	/*              Note: At most one of SN and CS can be zero. */
+			if (sn != 0.) {
+				if (cs != 0.) {
+				isuppz[(*m << 1) - 1] = 1;
+				isuppz[(*m << 1) - 1] = 2;
+				} else {
+				isuppz[(*m << 1) - 1] = 1;
+				isuppz[(*m << 1) - 1] = 1;
+				}
+			} else {
+				isuppz[(*m << 1) - 1] = 2;
+				isuppz[*m * 2] = 2;
+			}
+			}
+		}
+		return 0;
+		}
+	/*     Continue with general N */
+		indgrs = 1;
+		inderr = (*n << 1) + 1;
+		indgp = *n * 3 + 1;
+		indd = (*n << 2) + 1;
+		inde2 = *n * 5 + 1;
+		indwrk = *n * 6 + 1;
+
+		iinspl = 1;
+		iindbl = *n + 1;
+		iindw = (*n << 1) + 1;
+		iindwk = *n * 3 + 1;
+
+	/*     Scale matrix to allowable range, if necessary. */
+	/*     The allowable range is related to the PIVMIN parameter; see the */
+	/*     comments in DLARRD.  The preference for scaling small values */
+	/*     up is heuristic; we expect users' matrices not to be close to the */
+	/*     RMAX threshold. */
+
+		scale = 1.;
+		tnrm = odnst("M", n, &d__[1], &e[1]);
+		if (tnrm > 0. && tnrm < rmin) {
+		scale = rmin / tnrm;
+		} else if (tnrm > rmax) {
+		scale = rmax / tnrm;
+		}
+		if (scale != 1.) {
+		blas::odscal(n, &scale, &d__[1], &c__1);
+		i__1 = *n - 1;
+		blas::odscal(&i__1, &scale, &e[1], &c__1);
+		tnrm *= scale;
+		if (valeig) {
+	/*           If eigenvalues in interval have to be found, */
+	/*           scale (WL, WU] accordingly */
+			wl *= scale;
+			wu *= scale;
+		}
+		}
+
+	/*     Compute the desired eigenvalues of the tridiagonal after splitting */
+	/*     into smaller subblocks if the corresponding off-diagonal elements */
+	/*     are small */
+	/*     THRESH is the splitting parameter for ODRRE */
+	/*     A negative THRESH forces the old splitting criterion based on the */
+	/*     size of the off-diagonal. A positive THRESH switches to splitting */
+	/*     which preserves relative accuracy. */
+
+		if (*tryrac) {
+	/*        Test whether the matrix warrants the more expensive relative approach. */
+		odrrr(n, &d__[1], &e[1], &iinfo);
+		} else {
+	/*        The user does not care about relative accurately eigenvalues */
+		iinfo = -1;
+		}
+	/*     Set the splitting criterion */
+		if (iinfo == 0) {
+		thresh = eps;
+		} else {
+		thresh = -eps;
+	/*        relative accuracy is desired but T does not guarantee it */
+		*tryrac = FALSE_;
+		}
+
+		if (*tryrac) {
+	/*        Copy original diagonal, needed to guarantee relative accuracy */
+		blas::odcpy(n, &d__[1], &c__1, &work[indd], &c__1);
+		}
+	/*     Store the squares of the offdiagonal values of T */
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+	/* Computing 2nd power */
+		d__1 = e[j];
+		work[inde2 + j - 1] = d__1 * d__1;
+	/* L5: */
+		}
+	/*     Set the tolerance parameters for bisection */
+		if (! wantz) {
+	/*        ODRRE computes the eigenvalues to full precision. */
+		rtol1 = eps * 4.;
+		rtol2 = eps * 4.;
+		} else {
+	/*        ODRRE computes the eigenvalues to less than full precision. */
+	/*        ODRRV will refine the eigenvalue approximations, and we can */
+	/*        need less accurate initial bisection in ODRRE. */
+	/*        Note: these settings do only affect the subset case and ODRRE */
+		rtol1 = sqrt(eps);
+	/* Computing MAX */
+		d__1 = sqrt(eps) * .005, d__2 = eps * 4.;
+		rtol2 = fmax(d__1,d__2);
+		}
+		odrre(range, n, &wl, &wu, &iil, &iiu, &d__[1], &e[1], &work[inde2], &
+			rtol1, &rtol2, &thresh, &nsplit, &iwork[iinspl], m, &w[1], &work[
+			inderr], &work[indgp], &iwork[iindbl], &iwork[iindw], &work[
+			indgrs], &pivmin, &work[indwrk], &iwork[iindwk], &iinfo);
+		if (iinfo != 0) {
+		*info = iabs(iinfo) + 10;
+		return 0;
+		}
+	/*     Note that if RANGE .NE. 'V', ODRRE computes bounds on the desired */
+	/*     part of the spectrum. All desired eigenvalues are contained in */
+	/*     (WL,WU] */
+		if (wantz) {
+
+	/*        Compute the desired eigenvectors corresponding to the computed */
+	/*        eigenvalues */
+
+		odrrv(n, &wl, &wu, &d__[1], &e[1], &pivmin, &iwork[iinspl], m, &
+			c__1, m, &c_b18, &rtol1, &rtol2, &w[1], &work[inderr], &work[
+			indgp], &iwork[iindbl], &iwork[iindw], &work[indgrs], &z__[
+			z_offset], ldz, &isuppz[1], &work[indwrk], &iwork[iindwk], &
+			iinfo);
+		if (iinfo != 0) {
+			*info = iabs(iinfo) + 20;
+			return 0;
+		}
+		} else {
+	/*        ODRRE computes eigenvalues of the (shifted) root representation */
+	/*        ODRRV returns the eigenvalues of the unshifted matrix. */
+	/*        However, if the eigenvectors are not desired by the user, we need */
+	/*        to apply the corresponding shifts from ODRRE to obtain the */
+	/*        eigenvalues of the original matrix. */
+		i__1 = *m;
+		for (j = 1; j <= i__1; ++j) {
+			itmp = iwork[iindbl + j - 1];
+			w[j] += e[iwork[iinspl + itmp - 1]];
+	/* L20: */
+		}
+		}
+
+		if (*tryrac) {
+	/*        Refine computed eigenvalues so that they are relatively accurate */
+	/*        with respect to the original matrix T. */
+		ibegin = 1;
+		wbegin = 1;
+		i__1 = iwork[iindbl + *m - 1];
+		for (jblk = 1; jblk <= i__1; ++jblk) {
+			iend = iwork[iinspl + jblk - 1];
+			in = iend - ibegin + 1;
+			wend = wbegin - 1;
+	/*           check if any eigenvalues have to be refined in this block */
+	L36:
+			if (wend < *m) {
+			if (iwork[iindbl + wend] == jblk) {
+				++wend;
+				goto L36;
+			}
+			}
+			if (wend < wbegin) {
+			ibegin = iend + 1;
+			goto L39;
+			}
+			offset = iwork[iindw + wbegin - 1] - 1;
+			ifirst = iwork[iindw + wbegin - 1];
+			ilast = iwork[iindw + wend - 1];
+			rtol2 = eps * 4.;
+			odrrj(&in, &work[indd + ibegin - 1], &work[inde2 + ibegin - 1], 
+				&ifirst, &ilast, &rtol2, &offset, &w[wbegin], &work[
+				inderr + wbegin - 1], &work[indwrk], &iwork[iindwk], &
+				pivmin, &tnrm, &iinfo);
+			ibegin = iend + 1;
+			wbegin = wend + 1;
+	L39:
+			;
+		}
+		}
+
+	/*     If matrix was scaled, then rescale eigenvalues appropriately. */
+
+		if (scale != 1.) {
+		d__1 = 1. / scale;
+		blas::odscal(m, &d__1, &w[1], &c__1);
+		}
+
+	/*     If eigenvalues are not in increasing order, then sort them, */
+	/*     possibly along with eigenvectors. */
+
+		if (nsplit > 1) {
+		if (! wantz) {
+			odsrt("I", m, &w[1], &iinfo);
+			if (iinfo != 0) {
+			*info = 3;
+			return 0;
+			}
+		} else {
+			i__1 = *m - 1;
+			for (j = 1; j <= i__1; ++j) {
+			i__ = 0;
+			tmp = w[j];
+			i__2 = *m;
+			for (jj = j + 1; jj <= i__2; ++jj) {
+				if (w[jj] < tmp) {
+				i__ = jj;
+				tmp = w[jj];
+				}
+	/* L50: */
+			}
+			if (i__ != 0) {
+				w[i__] = w[j];
+				w[j] = tmp;
+				if (wantz) {
+				blas::odswap(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * 
+					z_dim1 + 1], &c__1);
+				itmp = isuppz[(i__ << 1) - 1];
+				isuppz[(i__ << 1) - 1] = isuppz[(j << 1) - 1];
+				isuppz[(j << 1) - 1] = itmp;
+				itmp = isuppz[i__ * 2];
+				isuppz[i__ * 2] = isuppz[j * 2];
+				isuppz[j * 2] = itmp;
+				}
+			}
+	/* L60: */
+			}
+		}
+		}
+
+
+		work[1] = (FloatingType) lwmin;
+		iwork[1] = liwmin;
+		return 0;
+
+	/*     End of ODSTMR */
+
+	} /* odstmr_ */
+
+} //namespace lapack
+
+} //namespace pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/plarre.hpp b/external/pmrrr/include/pmrrr/plarre.hpp
new file mode 100644
index 0000000000..5ac09a9d63
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/plarre.hpp
@@ -0,0 +1,1224 @@
+/* Parallel computation of eigenvalues and symmetric tridiagonal 
+ * matrix T, given by its diagonal elements D and its super-/sub-
+ * diagonal elements E.
+ *
+ * Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.7
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef __PLARRE_HPP__
+#define __PLARRE_HPP__
+
+#include <pmrrr/definitions/pmrrr.h>
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/structs.h>
+#include <pmrrr/definitions/plarre.h>
+
+#include <pmrrr/lapack/odrra.hpp>
+#include <pmrrr/lapack/odebz.hpp>
+#include <pmrrr/lapack/odrrd.hpp>
+#include <pmrrr/lapack/odrnv.hpp>
+#include <pmrrr/lapack/odrrk.hpp>
+#include <pmrrr/lapack/odrrc.hpp>
+#include <pmrrr/lapack/odrrb.hpp>
+
+#define ONE                1.0
+#define HUNDRED          100.0
+#define HALF               0.5
+#define FOURTH             0.25
+
+namespace pmrrr {
+
+namespace detail{
+
+	/*
+	 * Hide functions relevant only to plarre in an anonymous namespace.
+	 */
+	namespace {
+
+		template<typename FloatingType>
+		static void *eigval_subset_thread_a(void *argin);
+
+		template<typename FloatingType>
+		static void *eigval_subset_thread_r(void *argin);
+
+		template<typename FloatingType>
+		static void clean_up_plarre(FloatingType*, FloatingType*, int*, int*, int*);
+
+		template<typename FloatingType>
+		int eigval_approx_proc(proc_t *procinfo, int ifirst, int ilast, 
+					   int n, FloatingType *D, FloatingType *E, FloatingType *E2,  
+					   int *Windex, int *iblock, FloatingType *gersch, tol_t<FloatingType> *tolstruct, 
+					   FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work,
+					   int *iwork);
+
+		template<typename FloatingType>
+		int eigval_root_proc(proc_t *procinfo, int ifirst, int ilast, 
+					   int n, FloatingType *D, FloatingType *E, FloatingType *E2,  
+					   int *Windex, int *iblock, FloatingType *gersch, tol_t<FloatingType> *tolstruct, 
+					   FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work,
+					 int *iwork);
+
+		template<typename FloatingType>
+		int eigval_refine_proc(proc_t *procinfo, int ifirst, int ilast, 
+					   int n, FloatingType *D, FloatingType *E, FloatingType *E2,  
+					   int *Windex, int *iblock, FloatingType *gersch, tol_t<FloatingType> *tolstruct, 
+					   FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work,
+					   int *iwork);
+
+		template<typename FloatingType>
+		auxarg1_t<FloatingType> *create_auxarg1(int, FloatingType*, FloatingType*, FloatingType*, int, int, 
+					  int, int, int, int*, FloatingType,  FloatingType,
+					  FloatingType*, FloatingType*, FloatingType*, int*, int*);
+
+		template<typename FloatingType> 
+		void retrieve_auxarg1(auxarg1_t<FloatingType>*, int*, FloatingType**, FloatingType**, FloatingType**,
+					  int*, int*, int*, int*, int*, int**, FloatingType*, 
+					  FloatingType*, FloatingType**, FloatingType**, FloatingType**, int**, 
+					  int**);
+
+		template<typename FloatingType>
+		auxarg2_t<FloatingType> *create_auxarg2(int, FloatingType*, FloatingType*, int, int, FloatingType*,
+						  FloatingType*,FloatingType*,int*,FloatingType, FloatingType, FloatingType, FloatingType);
+
+		template<typename FloatingType>
+		void retrieve_auxarg2(auxarg2_t<FloatingType>*, int*, FloatingType**, FloatingType**, int*,
+					  int*, FloatingType**, FloatingType**, FloatingType**, int**, FloatingType*, FloatingType*, FloatingType*,
+					  FloatingType*);
+
+	}
+
+    /* Routine to compute eigenvalues */
+    template<typename FloatingType>
+    int plarre(proc_t *procinfo, char *jobz, char *range, in_t<FloatingType> *Dstruct, 
+               val_t<FloatingType> *Wstruct, tol_t<FloatingType> *tolstruct, int *nzp, int *offsetp)
+    {
+      /* input variables */
+      int              nproc  = procinfo->nproc;
+      bool             wantZ  = (jobz[0]  == 'V' || jobz[0]  == 'v');
+      bool             cntval = (jobz[0]  == 'C' || jobz[0]  == 'c');
+      int              n      = Dstruct->n;
+      FloatingType *restrict D      = Dstruct->D;
+      FloatingType *restrict E      = Dstruct->E;
+      int    *restrict isplit = Dstruct->isplit;
+      FloatingType           *vl    = Wstruct->vl;
+      FloatingType           *vu    = Wstruct->vu;
+      int              *il    = Wstruct->il;
+      int              *iu    = Wstruct->iu;
+      FloatingType *restrict W      = Wstruct->W;
+      FloatingType *restrict Werr   = Wstruct->Werr;
+      FloatingType *restrict Wgap   = Wstruct->Wgap;
+      int    *restrict Windex = Wstruct->Windex;
+      int    *restrict iblock = Wstruct->iblock;
+      FloatingType *restrict gersch = Wstruct->gersch;
+
+      /* constants */
+      int             IZERO = 0,   IONE = 1;
+      FloatingType          DZERO = 0.0;
+
+      /* work space */
+      FloatingType          *E2;
+      FloatingType         *work;
+      int             *iwork;
+
+      /* compute geschgorin disks and spectral diameter */
+      FloatingType          gl, gu, eold, emax, eabs;
+
+      /* compute splitting points */
+      int             bl_begin, bl_end, bl_size;
+
+      /* distribute work among processes */
+      int             ifirst, ilast, ifirst_tmp, ilast_tmp;
+      int             chunk, isize, iil, iiu;
+
+      /* gather results */
+      int             *rcount, *rdispl;
+
+      /* others */
+      int             info, i, j, jbl, idummy;
+      FloatingType          tmp1, dummy;
+      bool             sorted;
+      enum range_enum {allrng=1, valrng=2, indrng=3} irange;
+      FloatingType          intervals[2];
+      int             negcounts[2];
+      FloatingType          sigma;
+
+      if (range[0] == 'A' || range[0] == 'a') {
+        irange = allrng;
+      } else if (range[0] == 'V' || range[0] == 'v') {
+        irange = valrng;
+      } else if (range[0] == 'I' || range[0] == 'i') {
+        irange = indrng;
+      } else {
+        return 1;
+      }
+
+      /* allocate work space */
+      E2     = (FloatingType *) malloc(     n * sizeof(FloatingType) );
+      assert(E2 != NULL);
+      work   = (FloatingType *) malloc(   4*n * sizeof(FloatingType) );
+      assert(work != NULL);
+      iwork  = (int *)    malloc(   3*n * sizeof(int) );
+      assert(iwork != NULL);
+      rcount = (int *)    malloc( nproc * sizeof(int) );
+      assert(rcount != NULL);
+      rdispl = (int *)    malloc( nproc * sizeof(int) );
+      assert(rdispl != NULL);
+
+      /* Compute square of off-diagonal elements */
+      for (i=0; i<n-1; i++) {
+        E2[i] = E[i]*E[i];
+      }
+
+      /* compute geschgorin disks and spectral diameter */
+      gl     = D[0];
+      gu     = D[0];
+      eold   =  0.0;
+      emax   =  0.0;
+      E[n-1] =  0.0;
+
+      for (i=0; i<n; i++) {
+        eabs = fabs(E[i]);
+        if (eabs >= emax) emax = eabs;
+        tmp1 = eabs + eold;
+        gersch[2*i] = D[i] - tmp1;
+        gl = fmin(gl, gersch[2*i]);
+        gersch[2*i+1] = D[i] + tmp1;
+        gu = fmax(gu, gersch[2*i+1]);
+        eold = eabs;
+      }
+      /* min. pivot allowed in the Sturm sequence of T */
+      tolstruct->pivmin = std::numeric_limits<FloatingType>::min() * fmax(1.0, emax*emax);
+      /* estimate of spectral diameter */
+      Dstruct->spdiam = gu - gl;
+
+      /* compute splitting points with threshold "split" */
+      lapack::odrra(&n, D, E, E2, &tolstruct->split, &Dstruct->spdiam,
+          &Dstruct->nsplit, isplit, &info);
+      assert(info == 0);
+
+      if (irange == allrng || irange == indrng) {
+        *vl = gl;
+        *vu = gu;
+      }
+
+      /* set eigenvalue indices in case of all or subset by value has
+       * to be computed; thereby convert all problem to subset by index
+       * computation */
+      if (irange == allrng) {
+        *il = 1;
+        *iu = n;
+      } else if (irange == valrng) {
+        intervals[0] = *vl; intervals[1] = *vu;
+        
+        /* find negcount at boundaries 'vl' and 'vu';
+         * needs work of dim(n) and iwork of dim(n) */
+        lapack::odebz(&IONE, &IZERO, &n, &IONE, &IONE, &IZERO,
+            &DZERO, &DZERO, &tolstruct->pivmin, D, E, E2, &idummy,
+            intervals, &dummy, &idummy, negcounts, work,
+            iwork, &info);
+        assert(info == 0);
+        
+        /* update negcounts of whole matrix with negcounts found for block */
+        *il = negcounts[0] + 1;
+        *iu = negcounts[1];
+      }
+
+      if (cntval && irange == valrng) {
+        /* clean up and return */
+        *nzp = iceil(*iu-*il+1, nproc);
+        clean_up_plarre(E2, work, iwork, rcount, rdispl);
+        return 0;
+      }
+
+
+      /* loop over unreduced blocks */  
+      bl_begin  = 0;
+      
+      for (jbl=0; jbl<Dstruct->nsplit; jbl++) {
+        
+        bl_end  = isplit[jbl] - 1;
+        bl_size = bl_end - bl_begin + 1;
+        
+        /* deal with 1x1 block immediately */
+        if (bl_size == 1) {
+          E[bl_end] = 0.0;
+          W[bl_begin]      = D[bl_begin];
+          Werr[bl_begin]   = 0.0;
+          Werr[bl_begin]   = 0.0;
+          iblock[bl_begin] = jbl + 1;
+          Windex[bl_begin] = 1;
+          bl_begin  = bl_end + 1;
+          continue;
+        }
+
+        /* Indix range of block */
+        iil = 1;
+        iiu = bl_size;
+
+        /* each process computes a subset of the eigenvalues of the block */
+        ifirst_tmp = iil;
+        for (i=0; i<nproc; i++) {
+          chunk  = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc);
+          if (i == nproc-1) {
+            ilast_tmp = iiu;
+          } else {
+            ilast_tmp = ifirst_tmp + chunk - 1;
+            ilast_tmp = imin(ilast_tmp, iiu);
+          }
+          if (i == procinfo->pid) {
+            ifirst    = ifirst_tmp;
+            ilast     = ilast_tmp;
+            isize     = ilast - ifirst + 1;
+            *offsetp = ifirst - iil;
+            *nzp      = isize;
+          }
+          rcount[i]  = ilast_tmp - ifirst_tmp + 1;
+          rdispl[i]  = ifirst_tmp - iil;
+          ifirst_tmp = ilast_tmp + 1;
+          ifirst_tmp = imin(ifirst_tmp, iiu + 1);
+        }
+        
+        /* approximate eigenvalues of input assigned to process */
+        if (isize != 0) {      
+          info = eigval_approx_proc(procinfo, ifirst, ilast,
+                        bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], 
+                        &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin],
+                        tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], 
+                        work, iwork);
+          assert(info == 0);    
+        }
+
+        /* compute root representation of block */
+        info = eigval_root_proc(procinfo, ifirst, ilast,
+                      bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], 
+                      &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin],
+                      tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], 
+                      work, iwork);
+        assert(info == 0);    
+
+        /* refine eigenvalues assigned to process w.r.t root */
+        if (isize != 0) {
+          info = eigval_refine_proc(procinfo, ifirst, ilast,
+                        bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], 
+                        &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin],
+                        tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], 
+                        work, iwork);
+          assert(info == 0);    
+        }
+        
+        memcpy(work, &W[bl_begin], isize * sizeof(FloatingType) );
+        MPI_Allgatherv(work, isize, float_traits<FloatingType>::mpi_type(), &W[bl_begin], rcount, rdispl,
+               float_traits<FloatingType>::mpi_type(), procinfo->comm);
+        
+        memcpy(work, &Werr[bl_begin], isize * sizeof(FloatingType) );
+        MPI_Allgatherv(work, isize, float_traits<FloatingType>::mpi_type(), &Werr[bl_begin], rcount, rdispl,
+               float_traits<FloatingType>::mpi_type(), procinfo->comm);
+        
+        memcpy(iwork, &Windex[bl_begin], isize * sizeof(int) );
+        MPI_Allgatherv(iwork, isize, MPI_INT, &Windex[bl_begin], rcount, rdispl,
+               MPI_INT, procinfo->comm);
+        
+        /* Ensure that within block eigenvalues sorted */
+        sorted = false;
+        while (sorted == false) {
+            sorted = true;
+            for (j=bl_begin; j < bl_end; j++) {
+              if (W[j+1] < W[j]) {
+                sorted = false;
+                tmp1 = W[j];
+                W[j] = W[j+1];
+                W[j+1] = tmp1;
+                tmp1 = Werr[j];
+                Werr[j] = Werr[j+1];
+                Werr[j+1] = tmp1;
+              }
+            }
+        }
+        
+        /* Set indices index correctly */
+        for (j=bl_begin; j <= bl_end; j++)
+          iblock[j] = jbl + 1;
+        
+        /* Recompute gaps within the blocks */
+        for (j = bl_begin; j < bl_end; j++) {
+          Wgap[j] = fmax(0.0, (W[j+1] - Werr[j+1]) - (W[j] + Werr[j]) );
+        }
+        sigma = E[bl_end];
+        Wgap[bl_end] = fmax(0.0, (gu - sigma) - (W[bl_end] + Werr[bl_end]) );
+
+        /* Compute UNSHIFTED eigenvalues */
+        if (!wantZ) {
+          sigma = E[bl_end];
+          for (i=bl_begin; i<=bl_end; i++) {
+        W[i]   += sigma;
+          }
+        }
+        
+        /* Proceed with next block */
+        bl_begin  = bl_end  + 1;
+      }
+      /* end of loop over unreduced blocks */    
+      
+      /* free memory */
+      clean_up_plarre(E2, work, iwork, rcount, rdispl);
+      
+      return 0;
+    }
+  
+	namespace {
+
+		/*
+		 * Free's on allocated memory of plarre routine
+		 */
+		template<typename FloatingType>
+		void clean_up_plarre(FloatingType *E2, FloatingType *work, int *iwork, 
+					 int *rcount, int *rdispl)
+		{
+		  free(E2);
+		  free(work);
+		  free(iwork);
+		  free(rcount);
+		  free(rdispl);
+		}
+		
+		#ifndef DISABLE_PTHREADS
+		    template<typename FloatingType> 
+		    int eigval_approx_proc(proc_t *procinfo, int ifirst, int ilast, 
+					       int n, FloatingType *D, FloatingType *E, FloatingType *E2,  
+					       int *Windex, int *iblock, FloatingType *gersch, tol_t<FloatingType> *tolstruct, 
+					       FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work,
+					       int *iwork)
+		    {
+		      /* Input parameter */
+		      int              isize        = ilast-ifirst+1;
+		      FloatingType       pivmin       = tolstruct->pivmin;
+
+
+		      /* /\* Multithreading *\/ */
+		      int              max_nthreads = procinfo->nthreads;
+		      int            iifirst, iilast, chunk;
+		      pthread_t      *threads;
+		      pthread_attr_t attr;
+		      auxarg1_t<FloatingType>      *auxarg1;
+
+		      /* Others */
+		      int    info, m, i, j;
+		      FloatingType dummy;
+		      
+		      /* Allocate workspace */
+		      int *isplit = (int *) malloc( n * sizeof(int) );
+		      assert(isplit != NULL);
+		      threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) );
+		      assert(threads != NULL);
+
+		      /* This is an unreduced block */
+		      int nsplit = 1;
+		      isplit[0] = n;
+		      
+		      if (max_nthreads > 1) {
+			    pthread_attr_init(&attr);
+			    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+			    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
+		      }
+
+		      /* Set tolerance parameters */
+		      FloatingType bsrtol = sqrt(std::numeric_limits<FloatingType>::epsilon());    
+
+
+		      /* APPROXIMATE EIGENVALUES */
+
+		      /* compute approximations of the eigenvalues with muliple threads */
+		      /* equivalent to: */
+		      /* dlarrd_("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch, */
+		      /*         &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr, */
+		      /*         &wl, &wu, iblock, Windex, work, iwork, &info); */
+		      /* assert(info == 0); */
+		      /* assert(m == ilast-ifirst+1); */
+		      
+		      int nthreads = max_nthreads;
+		      while (nthreads > 1 && isize / nthreads < 2)
+			    nthreads--;
+
+		      if (nthreads > 1) {
+		
+			    /* each threads computes W[iifirst:iilast] and places them in
+			     * work[0:n-1]; the corresponding errors in work[n:2*n-1];
+			     * the blocks they belong in iwork[0:n-1]; and their indices in
+			     * iwork[n:2*n-1]; */
+		
+			    iifirst = ifirst;
+			    chunk = isize / nthreads;
+			    for (i=1; i<nthreads; i++) {
+			      
+			      iilast = iifirst + chunk - 1;
+
+			      auxarg1 = create_auxarg1(n, D, E, E2, ifirst, ilast, iifirst, iilast,
+						       nsplit, isplit, bsrtol, pivmin, gersch,
+						       &work[0], &work[n], &iwork[n], &iwork[0]);
+			      
+			      info = pthread_create(&threads[i], &attr,
+						    eigval_subset_thread_a<FloatingType>,
+						    (void *) auxarg1);
+			      assert(info == 0);
+			      
+			      iifirst = iilast + 1;
+			    }
+			    iilast = ilast;
+
+			    auxarg1 = create_auxarg1(n, D, E, E2, ifirst, ilast, iifirst, iilast,
+						     nsplit, isplit, bsrtol, pivmin, gersch,
+						     &work[0], &work[n], &iwork[n], &iwork[0]);
+		
+			    void * status = eigval_subset_thread_a<FloatingType>( (void *) auxarg1 );
+			    assert(status == NULL);
+		
+			    /* join threads */
+			    for (i=1; i<nthreads; i++) {
+			      info = pthread_join(threads[i], &status);
+			      assert(info == 0 && status == NULL);
+			    }
+		
+			    /* m counts the numbers of eigenvalues computed by process */
+			    m = isize;
+			    for (j=0; j<isize; j++) {
+			      W[j]      = work[j];
+			      Werr[j]   = work[j+n];
+			      iblock[j] = iwork[j];
+			      Windex[j] = iwork[j+n];
+			    }
+		
+		      } else {
+			    /* no multithreaded computation */
+
+                FloatingType wl, wu;
+			    lapack::odrrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch,
+		      	    &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr,
+		      	    &wl, &wu, iblock, Windex, work, iwork, &info);
+			    assert(info == 0);
+			    assert(m == ilast-ifirst+1);
+		      }
+
+		      /* clean up */
+		      free(threads);
+		      free(isplit);
+
+		      if (max_nthreads > 1) {
+			    pthread_attr_destroy(&attr);
+		      }
+
+		      return 0;
+		    }
+        #else
+		    template<typename FloatingType> 
+            int eigval_approx_proc
+            (proc_t *procinfo, int ifirst, int ilast, 
+             int n, FloatingType *D, FloatingType *E, FloatingType *E2,  
+             int *Windex, int *iblock, FloatingType *gersch, tol_t<FloatingType> *tolstruct, 
+             FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work,
+             int *iwork)
+            {
+              /* Input parameter */
+              FloatingType pivmin = tolstruct->pivmin;
+
+              /* Allocate workspace */
+              int *isplit = (int *) malloc( n * sizeof(int) );
+              assert(isplit != NULL);
+
+              /* This is an unreduced block */
+              int nsplit = 1;
+              isplit[0] = n;
+              
+              /* Set tolerance parameters */
+		      FloatingType bsrtol = sqrt(std::numeric_limits<FloatingType>::epsilon());    
+
+              /* APPROXIMATE EIGENVALUES */
+              int m, info;
+              FloatingType wl, wu, dummy;
+              lapack::odrrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch,
+                &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr,
+                &wl, &wu, iblock, Windex, work, iwork, &info);
+              assert(info == 0);
+              assert(m == ilast-ifirst+1);
+
+              /* clean up */
+              free(isplit);
+
+              return 0;
+            }
+        #endif
+
+		template<typename FloatingType> 
+		int eigval_root_proc(proc_t *procinfo, int ifirst, int ilast, 
+					   int n, FloatingType *D, FloatingType *E, FloatingType *E2,  
+					   int *Windex, int *iblock, FloatingType *gersch, tol_t<FloatingType> *tolstruct, 
+					   FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work,
+					   int *iwork)
+		{
+		  /* Input parameter */
+		  FloatingType       pivmin       = tolstruct->pivmin;
+
+		  /* Create random vector to perturb rrr, same seed */
+		  int    two_n = 2*n;
+		  int    iseed[4] = {1,1,1,1};
+
+		  FloatingType Dpivot, Dmax;
+		  bool   noREP;
+
+		  int   info, i, j;
+		  int   IONE = 1, ITWO = 2;
+		  FloatingType tmp, tmp1, tmp2;
+
+		  /* Set tolerance parameters (need to be same as in refine function) */
+		  FloatingType rtl = sqrt(std::numeric_limits<FloatingType>::epsilon());
+		  
+		  /* Allocate workspace */
+		  FloatingType *randvec = (FloatingType *) malloc( 2*n * sizeof(FloatingType) );
+		  assert(randvec != NULL);
+
+		  /* create random vector to perturb rrr and broadcast it */
+		  lapack::odrnv(&ITWO, iseed, &two_n, randvec);
+		  
+		  /* store shift of initial RRR, here set to zero */
+		  E[n-1] = 0.0;
+
+		  /* find outer bounds GL, GU for block and spectral diameter */
+		  FloatingType gl = D[0];
+		  FloatingType gu = D[0];
+		  for (i = 0; i < n; i++) {
+			gl = fmin(gl, gersch[2*i]  );
+			gu = fmax(gu, gersch[2*i+1]);
+		  }
+		  FloatingType spdiam = gu - gl;
+		  
+		  /* find approximation of extremal eigenvalues of the block
+		   * odrrk computes one eigenvalue of tridiagonal matrix T
+		   * tmp1 and tmp2 one hold the eigenvalue and error, respectively */
+		  lapack::odrrk(&n, &IONE, &gl, &gu, D, E2,
+			  &pivmin, &rtl, &tmp1, &tmp2, &info);
+		  assert(info == 0);  /* if info=-1 => eigenvalue did not converge */
+		
+		  FloatingType isleft = fmax(gl, tmp1-tmp2 - HUNDRED*std::numeric_limits<FloatingType>::epsilon()*fabs(tmp1-tmp2) );
+		
+		  lapack::odrrk(&n, &n, &gl, &gu, D, E2,
+		  	    &pivmin, &rtl, &tmp1, &tmp2, &info);
+		  assert(info == 0);  /* if info=-1 => eigenvalue did not converge */
+		
+		  FloatingType isright = fmin(gu, tmp1+tmp2 + HUNDRED*std::numeric_limits<FloatingType>::epsilon()*fabs(tmp1+tmp2) );
+		  
+		  spdiam = isright - isleft;
+		  
+		  /* compute negcount at points s1 and s2 */
+		  FloatingType s1 = isleft  + HALF   * spdiam;
+		  FloatingType s2 = isright - FOURTH * spdiam;  /* not needed currently */
+
+		  /* compute negcount at points s1 and s2 */
+		  /* cnt = number of eigenvalues in (s1,s2] = count_right - count_left
+		   * negcnt_lft = number of eigenvalues smaller equals than s1
+		   * negcnt_rgt = number of eigenvalues smaller equals than s2 */
+          int cnt, negcnt_lft, negcnt_rgt;
+		  lapack::odrrc("T", &n, &s1, &s2, D, E, &pivmin,
+			  &cnt, &negcnt_lft, &negcnt_rgt, &info);
+		  assert(info == 0);
+		  
+		  /* if more of the desired eigenvectors are in the left part shift left
+		   * and the other way around */
+          int sgndef;
+          FloatingType sigma;
+		  if ( negcnt_lft >= n - negcnt_lft ) {
+			/* shift left */
+			sigma = isleft;
+			sgndef = ONE;
+		  } else {
+			/* shift right */
+			sigma = isright;
+			sgndef = -ONE;
+		  }
+
+		  /* define increment to perturb initial shift to find RRR
+		   * with not too much element growth */
+		  FloatingType tau = spdiam*std::numeric_limits<FloatingType>::epsilon()*n + 2.0*pivmin;
+
+
+		  /* try to find initial RRR of block:
+		   * need work space of 3*n here to store D, L, D^-1 of possible
+		   * representation:
+		   * D_try      = work[0  :  n-1]
+		   * L_try      = work[n  :2*n-1]
+		   * inv(D_try) = work[2*n:3*n-1] */
+
+		  int off_L    = n;
+		  int off_invD = 2*n;
+
+          int jtry;
+		  for (jtry = 0; jtry < MAX_TRY_RRR; jtry++) {
+
+			Dpivot  = D[0] - sigma;
+			work[0] = Dpivot;
+			Dmax    = fabs( work[0] );
+			j = 0;
+
+			for (i = 0; i < n-1; i++) {
+			  work[i+off_invD] = 1.0 / work[i];
+			  tmp = E[j] * work[i+off_invD];
+			  work[i+off_L] = tmp;
+			  Dpivot = (D[j+1] - sigma) - tmp*E[j];
+			  work[i+1] = Dpivot;
+			  Dmax = fmax(Dmax, fabs(Dpivot) );
+			  j++;
+			}
+			  
+			/* except representation only if not too much element growth */
+			if (Dmax > MAX_GROWTH*spdiam) {
+			  noREP = true;
+			} else {
+			  noREP = false;
+			}
+			  
+			if (noREP == true) {
+			  /* if all eigenvalues are desired shift is made definite to use DQDS
+			   * so we should not end here */
+			  if (jtry == MAX_TRY_RRR-2) {
+			if (sgndef == ONE) { /* floating point comparison okay here */
+			  sigma = gl - FUDGE_FACTOR*spdiam*std::numeric_limits<FloatingType>::epsilon()*n
+				- FUDGE_FACTOR*2.0*pivmin;
+			} else {
+			  sigma = gu + FUDGE_FACTOR*spdiam*std::numeric_limits<FloatingType>::epsilon()*n
+				+ FUDGE_FACTOR*2.0*pivmin;
+			}
+			  } else if (jtry == MAX_TRY_RRR-1) {
+			fprintf(stderr,"No initial representation could be found.\n");
+			exit(3);
+			  } else {
+			sigma -= sgndef*tau;
+			tau   *= 2.0;
+			continue;
+			  }
+			} else {   /* found representation */
+			  break;
+			}
+		  }
+		  /* end trying to find initial RRR of block */
+
+		  /* save initial RRR and corresponding shift */
+		  memcpy(D, &work[0],  n    * sizeof(FloatingType) );
+		  memcpy(E, &work[n], (n-1) * sizeof(FloatingType) );
+		  E[n-1] = sigma;
+		  /* work[0:4*n-1] can now be used again for anything */
+
+		  /* perturb root rrr by small relative amount, first make sure
+		   * that at least two values are actually disturbed enough,
+		   * which might not be necessary */
+		  while( fabs(randvec[0])*RAND_FACTOR < 1.0 )
+			randvec[0] *= 2.0;
+		  while( fabs(randvec[n-1])  *RAND_FACTOR < 1.0 )
+			randvec[n-1]   *= 2.0;
+
+		  for (i=0; i<n-1; i++) {
+			D[i] *= 1.0 + std::numeric_limits<FloatingType>::epsilon()*RAND_FACTOR*randvec[i];
+			E[i] *= 1.0 + std::numeric_limits<FloatingType>::epsilon()*RAND_FACTOR*randvec[i+n];
+		  }
+		  D[n-1] *= 1.0 + std::numeric_limits<FloatingType>::epsilon()*RAND_FACTOR*randvec[n-1];
+
+		  /* clean up */
+		  free(randvec);
+
+		  return 0;
+		}
+
+        #ifndef DISABLE_PTHREADS
+	        template<typename FloatingType> 
+	        int eigval_refine_proc(proc_t *procinfo, int ifirst, int ilast, 
+				           int n, FloatingType *D, FloatingType *E, FloatingType *E2,  
+				           int *Windex, int *iblock, FloatingType *gersch, tol_t<FloatingType> *tolstruct, 
+				           FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work,
+				           int *iwork)
+	        {
+	          /* Input parameter */
+	          int              isize        = ilast-ifirst+1;
+	          FloatingType       pivmin       = tolstruct->pivmin;
+
+	          /* Multithreading */
+	          int            nthreads;
+	          int            max_nthreads = procinfo->nthreads;
+	          int            chunk;
+	          pthread_t      *threads;
+	          pthread_attr_t attr;
+	          auxarg2_t<FloatingType>      *auxarg2;
+
+	          int info, i;
+
+	          /* Allocate space */
+	          threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) );
+	          assert(threads != NULL);
+	          int *isplit = (int *) malloc( n * sizeof(int) );
+	          assert(isplit != NULL);
+
+	          /* This is an unreduced block */
+	          isplit[0] = n;
+	          
+	          /* Prepare multi-threading */
+	          if (max_nthreads > 1) {
+		        pthread_attr_init(&attr);
+		        pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+		        pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
+	          }
+
+	          /* find outer bounds GL, GU for block and spectral diameter */
+	          FloatingType gl = D[0];
+	          FloatingType gu = D[0];
+	          for (i = 0; i < n; i++) {
+		        gl = fmin(gl, gersch[2*i]  );
+		        gu = fmax(gu, gersch[2*i+1]);
+	          }
+	          FloatingType spdiam = gu - gl;
+
+	          /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */
+	          
+	          int i_low = Windex[0];
+	          int i_upp = Windex[isize-1];
+	          FloatingType sigma = E[n-1];
+
+	          /* calculate gaps */
+	          for (i=0; i<isize-1; i++) {
+		        Wgap[i] = fmax(0.0, (W[i+1] - Werr[i+1]) - (W[i] + Werr[i]) );
+	          }
+	          Wgap[isize-1] = fmax(0.0, gu - (W[isize-1] + Werr[isize-1]) );
+	
+	          /* shift eigenvalues to be consistent with dqds
+	           * and compute eigenvalues of SHIFTED matrix */
+	          for (i=0; i<isize; i++) {
+		        W[i]    -= sigma;
+		        Werr[i] += fabs(W[i])*std::numeric_limits<FloatingType>::epsilon();
+	          }
+
+	          /* work  for sequential odrrb = work[0:2*n-1]
+	           * iwork for sequential odrrb = iwork[0:2*n-1]
+	           * DE2 = work[2*n:3*n-1] strting at bl_begin */
+	          int off_DE2 = 2*n;
+	
+	          /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */
+	          for (i=0; i<n; i++) {
+		        work[i+off_DE2] = D[i]*E[i]*E[i];
+	          }
+	
+	          nthreads = max_nthreads;
+	          while (nthreads > 1 && isize/nthreads < 2) {
+		        nthreads--;
+	          }
+
+	          if (nthreads > 1) {
+
+		        int rf_begin = 0, rf_end;
+		        chunk    = isize / nthreads;
+		        for (i=1; i<nthreads; i++) {
+		          
+		          rf_end = rf_begin + chunk - 1;
+			            
+		          auxarg2 = create_auxarg2(n, D,
+					           &work[off_DE2],
+					           rf_begin, rf_end, W, Werr, Wgap, Windex,
+					           tolstruct->rtol1, tolstruct->rtol2,
+					           pivmin, spdiam);
+		          
+		          info = pthread_create(&threads[i], &attr,
+	          			      eigval_subset_thread_r<FloatingType>,
+					        (void *) auxarg2);
+		          assert(info == 0);
+		          
+		          rf_begin = rf_end + 1;
+		        }
+		        rf_end = isize-1;
+
+		        auxarg2 = create_auxarg2(n, D,
+					         &work[off_DE2],
+					         rf_begin, rf_end, W, Werr, Wgap, Windex,
+					         tolstruct->rtol1, tolstruct->rtol2,
+					         pivmin, spdiam);
+		          
+		        void * status = eigval_subset_thread_r<FloatingType>( (void *) auxarg2 );
+		        assert(status == NULL);
+	
+		        /* join threads */
+		        for (i=1; i<nthreads; i++) {
+		          info = pthread_join(threads[i], &status);
+		          assert(info == 0 && status == NULL);
+		        }
+		        /* should update gaps at splitting points here, but the gaps
+		         * will be recomputed anyway */
+		          
+	          } else {
+	
+		        int offset = i_low-1;
+	
+		        /* refine eigenvalues found by odrrb for i_low:i_upp */
+		        lapack::odrrb(&n, D, &work[off_DE2], &i_low,
+			        &i_upp, &tolstruct->rtol1, &tolstruct->rtol2, &offset, W, Wgap, 
+			        Werr, work, iwork, &pivmin, &spdiam, &n, &info);
+		        assert(info == 0);
+		        /* needs work of dim(2*n) and iwork of dim(2*n) */
+	          }
+	          /* odrrb computes gaps correctly, but not last one;
+	           * this is ignored since the gaps are recomputed anyway */
+	          
+	          /* clean up */
+	          free(threads);
+	          free(isplit);
+	          
+	          if (max_nthreads > 1) {
+		        pthread_attr_destroy(&attr);
+	          }
+	          
+	          return 0;
+	        }
+        #else
+            template<typename FloatingType>
+            int eigval_refine_proc
+            (proc_t *procinfo, int ifirst, int ilast, 
+             int n, FloatingType  *D, FloatingType  *E, FloatingType  *E2,  
+             int *Windex, int *iblock, FloatingType  *gersch, tol_t<FloatingType> *tolstruct, 
+             FloatingType  *W, FloatingType  *Werr, FloatingType  *Wgap, FloatingType  *work, int *iwork)
+            {
+              /* Input parameter */
+              int    isize  = ilast-ifirst+1;
+              FloatingType  pivmin = tolstruct->pivmin;
+
+              /* Allocate space */
+              int *isplit = (int *) malloc( n * sizeof(int) );
+              assert(isplit != NULL);
+
+              /* This is an unreduced block (nsplit=1) */
+              isplit[0] = n;
+              
+              /* find outer bounds GL, GU for block and spectral diameter */
+              FloatingType  gl = D[0];
+              FloatingType  gu = D[0];
+              int i;
+              for (i = 0; i < n; i++) {
+                gl = fmin(gl, gersch[2*i]  );
+                gu = fmax(gu, gersch[2*i+1]);
+              }
+              FloatingType  spdiam = gu - gl;
+
+              /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */
+              
+              int i_low = Windex[0];
+              int i_upp = Windex[isize-1];
+              FloatingType  sigma = E[n-1];
+
+              /* calculate gaps */
+              for (i=0; i<isize-1; i++) {
+                Wgap[i] = fmax(0.0, (W[i+1] - Werr[i+1]) - (W[i] + Werr[i]) );
+              }
+              Wgap[isize-1] = fmax(0.0, gu - (W[isize-1] + Werr[isize-1]) );
+                
+              /* shift eigenvalues to be consistent with dqds
+               * and compute eigenvalues of SHIFTED matrix */
+              for (i=0; i<isize; i++) {
+                W[i]    -= sigma;
+                Werr[i] += fabs(W[i])*std::numeric_limits<FloatingType>::epsilon();
+              }
+
+              /* work  for sequential odrrb = work[0:2*n-1]
+               * iwork for sequential odrrb = iwork[0:2*n-1]
+               * DE2 = work[2*n:3*n-1] strting at bl_begin */
+              int off_DE2 = 2*n;
+                
+              /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */
+              for (i=0; i<n; i++) {
+                work[i+off_DE2] = D[i]*E[i]*E[i];
+              }
+                
+              int offset = i_low-1;
+                
+              /* refine eigenvalues found by odrrb for i_low:i_upp */
+              int info;
+              lapack::odrrb(&n, D, &work[off_DE2], &i_low,
+                &i_upp, &tolstruct->rtol1, &tolstruct->rtol2, &offset, W, Wgap, 
+                Werr, work, iwork, &pivmin, &spdiam, &n, &info);
+              assert(info == 0);
+              /* needs work of dim(2*n) and iwork of dim(2*n) */
+
+              /* odrrb computes gaps correctly, but not last one;
+               * this is ignored since the gaps are recomputed anyway */
+              
+              /* clean up */
+              free(isplit);
+              
+              return 0;
+            }
+        #endif
+
+		template<typename FloatingType>
+		void *eigval_subset_thread_a(void *argin)
+		{
+		  /* from input argument */
+		  int    n, il, iu, my_il, my_iu;
+		  FloatingType *D, *E, *E2, *gersch;
+		  FloatingType bsrtol, pivmin;
+		  int    nsplit, *isplit;
+
+		  /* others */
+		  int    info;
+		  FloatingType dummy1, dummy2;
+		  int    num_vals;
+		  FloatingType *W_tmp, *Werr_tmp, *W, *Werr;
+		  int    *iblock_tmp, *Windex_tmp, *iblock, *Windex;
+		  FloatingType *work;
+		  int    *iwork;
+		  
+		  retrieve_auxarg1((auxarg1_t<FloatingType> *) argin, &n, &D, &E, &E2,
+				   &il, &iu, &my_il, &my_iu, &nsplit,
+				   &isplit, &bsrtol, &pivmin, &gersch,
+				   &W, &Werr, &Windex, &iblock);
+
+		  /* allocate memory */
+		  W_tmp = (FloatingType *) malloc( n * sizeof(FloatingType) );
+		  assert(W_tmp != NULL);
+		  
+		  Werr_tmp = (FloatingType *) malloc( n * sizeof(FloatingType) );
+		  assert(Werr_tmp != NULL);
+		  
+		  Windex_tmp = (int *) malloc( n * sizeof(int) );
+		  assert(Windex_tmp != NULL);
+
+		  iblock_tmp = (int *) malloc( n * sizeof(int) );
+		  assert(iblock_tmp != NULL);
+
+		  work  = (FloatingType *) malloc( 4*n * sizeof(FloatingType) );
+		  assert (work != NULL);
+
+		  iwork = (int *) malloc( 3*n * sizeof(int) );
+		  assert (iwork != NULL);
+
+		  /* compute eigenvalues 'my_il' to 'my_iu', put into temporary arrays */
+		  lapack::odrrd("I", "B", &n, &dummy1, &dummy2, &my_il, &my_iu, gersch,
+		  	  &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &num_vals,
+		  	  W_tmp, Werr_tmp, &dummy1, &dummy2, iblock_tmp, Windex_tmp,
+		  	  work, iwork, &info);
+
+		  assert(info == 0);
+
+		  /* copy computed values in W, Werr, Windex, iblock (which are work space) */
+		  memcpy(&W[my_il-il],      W_tmp,      num_vals * sizeof(FloatingType) );
+		  memcpy(&Werr[my_il-il],   Werr_tmp,   num_vals * sizeof(FloatingType) );
+		  memcpy(&Windex[my_il-il], Windex_tmp, num_vals * sizeof(int)    );
+		  memcpy(&iblock[my_il-il], iblock_tmp, num_vals * sizeof(int)    );
+		  
+		  free(W_tmp);
+		  free(Werr_tmp);
+		  free(Windex_tmp);
+		  free(iblock_tmp);
+		  free(work);
+		  free(iwork);
+
+		  return NULL;
+		}
+
+		template<typename FloatingType> 
+		auxarg1_t<FloatingType> *create_auxarg1(int n, FloatingType *D, FloatingType *E, FloatingType *E2,
+					  int il, int iu, int my_il, int my_iu, 
+					  int nsplit, int *isplit, FloatingType bsrtol, 
+					  FloatingType pivmin, FloatingType *gersch, FloatingType *W, 
+					  FloatingType *Werr, int *Windex, int *iblock)
+		{
+		  auxarg1_t<FloatingType> *arg;
+
+		  arg = (auxarg1_t<FloatingType> *) malloc( sizeof(auxarg1_t<FloatingType>) );
+		  assert(arg != NULL);
+
+		  arg->n       = n;
+		  arg->D       = D;
+		  arg->E       = E;
+		  arg->E2      = E2;
+		  arg->il      = il;
+		  arg->iu      = iu;
+		  arg->my_il   = my_il;
+		  arg->my_iu   = my_iu;
+		  arg->nsplit  = nsplit;
+		  arg->isplit  = isplit;
+		  arg->bsrtol  = bsrtol;
+		  arg->pivmin  = pivmin;
+		  arg->gersch  = gersch;
+		  arg->W       = W;
+		  arg->Werr    = Werr;
+		  arg->Windex  = Windex;
+		  arg->iblock  = iblock;
+
+		  return arg;
+		}
+
+		template<typename FloatingType>
+		void retrieve_auxarg1(auxarg1_t<FloatingType> *arg, int *n, FloatingType **D, FloatingType **E,
+					  FloatingType **E2, int *il, int *iu, int *my_il, 
+					  int *my_iu, int *nsplit, int **isplit, 
+					  FloatingType *bsrtol, FloatingType *pivmin, FloatingType **gersch, 
+					  FloatingType **W, FloatingType **Werr, int **Windex, 
+					  int **iblock)
+		{
+		  *n      = arg->n;
+		  *D      = arg->D;
+		  *E      = arg->E;
+		  *E2     = arg->E2;
+		  *il     = arg->il;
+		  *iu     = arg->iu;
+		  *my_il  = arg->my_il;
+		  *my_iu  = arg->my_iu;
+		  *nsplit = arg->nsplit;
+		  *isplit = arg->isplit;
+		  *bsrtol = arg->bsrtol;
+		  *pivmin = arg->pivmin;
+		  *gersch = arg->gersch;
+		  *W      = arg->W;
+		  *Werr   = arg->Werr;
+		  *Windex = arg->Windex;
+		  *iblock = arg->iblock;
+
+		  free(arg);
+		}
+
+		template<typename FloatingType> 
+		void *eigval_subset_thread_r(void *argin)
+		{
+		  /* from input argument */
+		  int          bl_size, rf_begin, rf_end;
+		  FloatingType       *D, *DE2;
+		  FloatingType       rtol1, rtol2, pivmin;
+		  FloatingType       bl_spdiam;
+
+		  /* others */
+		  int          info, offset;
+		  FloatingType       *W, *Werr, *Wgap;
+		  int          *Windex;
+		  FloatingType       *work;
+		  int          *iwork;
+
+		  retrieve_auxarg2((auxarg2_t<FloatingType> *) argin, &bl_size, &D, &DE2,
+				   &rf_begin, &rf_end, &W, &Werr, &Wgap, &Windex, &rtol1, &rtol2,
+				   &pivmin, &bl_spdiam);
+
+		  /* malloc work space */
+		  work = (FloatingType *) malloc( 2*bl_size * sizeof(FloatingType) );
+		  assert(work != NULL);
+		  
+		  iwork = (int *)   malloc( 2*bl_size * sizeof(int) );
+		  assert(iwork != NULL);
+
+		  /* special case of only one eigenvalue */
+		  if (rf_begin == rf_end)
+			Wgap[rf_begin] = 0.0;
+		 
+		  offset = Windex[rf_begin] - 1;
+		  
+		  /* call bisection routine to refine the eigenvalues */
+		  lapack::odrrb(&bl_size, D, DE2, &Windex[rf_begin], &Windex[rf_end],
+			  &rtol1, &rtol2, &offset, &W[rf_begin], &Wgap[rf_begin],
+			  &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam,
+			  &bl_size, &info);
+		  assert(info == 0);
+
+		  /* clean up */
+		  free(work);
+		  free(iwork);
+
+		  return NULL;
+		}
+
+		template<typename FloatingType> 
+		auxarg2_t<FloatingType> *create_auxarg2(int bl_size, FloatingType *D, FloatingType *DE2,
+					  int rf_begin, int rf_end, FloatingType *W, FloatingType *Werr,
+					  FloatingType *Wgap, int *Windex,    
+					  FloatingType rtol1, FloatingType rtol2, FloatingType pivmin, 
+					  FloatingType bl_spdiam)
+		{
+		  auxarg2_t<FloatingType> *arg;
+
+		  arg = (auxarg2_t<FloatingType> *) malloc( sizeof(auxarg2_t<FloatingType>) );
+		  assert(arg != NULL);
+
+		  arg->bl_size   = bl_size;
+		  arg->D         = D;
+		  arg->DE2       = DE2;
+		  arg->rf_begin  = rf_begin;
+		  arg->rf_end    = rf_end;
+		  arg->W   = W;
+		  arg->Werr   = Werr;
+		  arg->Wgap   = Wgap;
+		  arg->Windex   = Windex;
+		  arg->rtol1     = rtol1;
+		  arg->rtol2     = rtol2;
+		  arg->pivmin    = pivmin;
+		  arg->bl_spdiam = bl_spdiam;
+
+		  return arg;
+		}
+
+		template<typename FloatingType>
+		void retrieve_auxarg2(auxarg2_t<FloatingType> *arg, int *bl_size, FloatingType **D,
+					  FloatingType **DE2, int *rf_begin, int *rf_end,
+					  FloatingType **W, FloatingType **Werr, FloatingType **Wgap, int **Windex, 
+					  FloatingType *rtol1, FloatingType *rtol2,
+					  FloatingType *pivmin, FloatingType *bl_spdiam)
+		{
+		  *bl_size   = arg->bl_size;
+		  *D         = arg->D;
+		  *DE2       = arg->DE2;
+		  *rf_begin  = arg->rf_begin;
+		  *rf_end    = arg->rf_end;
+		  *W   = arg->W;
+		  *Werr   = arg->Werr;
+		  *Wgap   = arg->Wgap;
+		  *Windex   = arg->Windex;
+		  *rtol1     = arg->rtol1;
+		  *rtol2     = arg->rtol2;
+		  *pivmin    = arg->pivmin;
+		  *bl_spdiam = arg->bl_spdiam;
+
+		  free(arg);
+		}
+	}	// anonymous
+
+}	// detail
+
+}	// pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/plarrv.hpp b/external/pmrrr/include/pmrrr/plarrv.hpp
new file mode 100644
index 0000000000..1077c358de
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/plarrv.hpp
@@ -0,0 +1,744 @@
+/* Parallel computation of eigenvectors and symmetric tridiagonal 
+ * matrix T, which is preprocessed by the routine 'plarre'.
+ *
+ * Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef __PLARRV_HPP__
+#define __PLARRV_HPP__
+
+#include <algorithm>
+
+#include <pmrrr/definitions/pmrrr.h>
+#include <pmrrr/definitions/plarrv.h>
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/queue.h>
+#include <pmrrr/definitions/structs.h>
+#include <pmrrr/definitions/counter.h>
+
+#include <pmrrr/process_c_task.hpp>
+#include <pmrrr/process_s_task.hpp>
+#include <pmrrr/process_r_task.hpp>
+#include <pmrrr/rrr.hpp>
+#include <pmrrr/tasks.hpp>
+
+using std::sort;
+
+namespace pmrrr { namespace detail{
+
+	namespace {
+
+		template<typename FloatingType>
+		int assign_to_proc(proc_t *procinfo, in_t<FloatingType> *Dstruct,
+					  val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct, int *nzp,
+					  int *myfirstp);
+
+		template<typename FloatingType>
+		int init_workQ(proc_t *procinfo, in_t<FloatingType> *Dstruct,
+					   val_t<FloatingType> *Wstruct, int *nzp,
+					   workQ_t *workQ);
+
+		template<typename FloatingType>
+		void *empty_workQ(void*);
+
+		static workQ_t *create_workQ();
+
+		static void destroy_workQ(workQ_t*);
+
+		template<typename FloatingType>
+		auxarg3_t<FloatingType> *create_auxarg3(int, proc_t*, val_t<FloatingType>*, vec_t<FloatingType>*,
+						 tol_t<FloatingType>*, workQ_t*, counter_t*);
+
+		template<typename FloatingType>
+		void retrieve_auxarg3(auxarg3_t<FloatingType>*, int*, proc_t**, val_t<FloatingType>**,
+						 vec_t<FloatingType>**, tol_t<FloatingType>**, workQ_t**, 
+						 counter_t**);
+
+		template<typename FloatingType> 
+		bool cmp(const sort_struct_t<FloatingType> & arg1, const sort_struct_t<FloatingType> & arg2);
+
+	}
+
+
+/*
+ * Computation of eigenvectors of a symmetric tridiagonal
+ */
+#ifndef DISABLE_PTHREADS
+    template<typename FloatingType>
+    int plarrv(proc_t *procinfo, in_t<FloatingType> *Dstruct, val_t<FloatingType> *Wstruct,
+	       vec_t<FloatingType> *Zstruct, tol_t<FloatingType> *tolstruct, int *nzp,
+	       int *myfirstp)
+    {
+      /* Input variables */
+      int            	   nthreads = procinfo->nthreads;
+      int            	   n        = Dstruct->n;
+      FloatingType         *W       = Wstruct->W;
+
+      /* Allocate work space and copy eigenvalues */
+      FloatingType *Wshifted = (FloatingType *) malloc( n * sizeof(FloatingType) );
+      assert(Wshifted != NULL);
+
+      memcpy(Wshifted, W, n*sizeof(FloatingType));
+      Wstruct->Wshifted = Wshifted;
+     
+      /* Multi-threading */
+      pthread_t *threads = (pthread_t *) malloc(nthreads * sizeof(pthread_t));
+      assert(threads != NULL);
+
+      /* Assign eigenvectors to processes */
+      assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp,
+		                myfirstp);
+      
+      /* Create work queue Q, counter, threads to empty Q */
+      workQ_t *workQ = create_workQ();
+      counter_t *num_left = PMR_create_counter(*nzp);
+
+      threads[0] = pthread_self();
+      pthread_attr_t attr;
+      pthread_attr_init(&attr);
+      pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+      pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); 
+
+      int i;
+      for (i=1; i<nthreads; i++) {
+        auxarg3_t<FloatingType> *auxarg =
+            create_auxarg3(i, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left);
+        int info = pthread_create(&threads[i], &attr, empty_workQ<FloatingType>, (void *) auxarg);
+        assert(info == 0);
+      }
+
+      /* Initialize work queue of process */
+      int info = init_workQ(procinfo, Dstruct, Wstruct, nzp, workQ);
+      assert(info == 0);
+
+      /* Empty the work queue */
+      auxarg3_t<FloatingType> * auxarg =
+        create_auxarg3(0, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left);
+      void * status = empty_workQ<FloatingType>((void *) auxarg);
+      assert(status == NULL);
+
+      /* Join all the worker thread */
+      for (i=1; i<nthreads; i++) {
+        info = pthread_join(threads[i], &status);
+        assert(info == 0 && status == NULL);
+      }
+
+      /* Clean up */
+      free(Wshifted);
+      free(threads);
+      pthread_attr_destroy(&attr);
+      destroy_workQ(workQ);
+      PMR_destroy_counter(num_left);
+
+      return 0;
+    }
+#else
+    template<typename FloatingType>
+    int plarrv(proc_t *procinfo, in_t<FloatingType> *Dstruct, val_t<FloatingType> *Wstruct,
+	       vec_t<FloatingType> *Zstruct, tol_t<FloatingType> *tolstruct, int *nzp,
+	       int *myfirstp)
+    {
+      int     n  = Dstruct->n;
+      FloatingType  *W = Wstruct->W;
+
+      /* Allocate work space and copy eigenvalues */
+      FloatingType *Wshifted = (FloatingType*)malloc(n*sizeof(FloatingType));
+      assert(Wshifted != NULL);
+
+      memcpy(Wshifted, W, n*sizeof(FloatingType));
+      Wstruct->Wshifted = Wshifted;
+
+      /* Assign eigenvectors to processes */
+      assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp, myfirstp);
+
+      /* Create work queue Q, counter, threads to empty Q */
+      workQ_t *workQ = create_workQ();
+      counter_t *num_left = PMR_create_counter(*nzp);
+
+      /* Initialize work queue of process */
+      int info = init_workQ(procinfo, Dstruct, Wstruct, nzp, workQ);
+      assert(info == 0);
+
+      /* Empty the work queue */
+      auxarg3_t<FloatingType> *auxarg = 
+        create_auxarg3(0, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left);
+      void *status = empty_workQ<FloatingType>((void*)auxarg);
+      assert(status == NULL);
+
+      /* Clean up */
+      free(Wshifted);
+      destroy_workQ(workQ);
+      PMR_destroy_counter(num_left);
+
+      return 0;
+    }
+#endif
+
+	namespace {
+
+		/* 
+		 * Assign the computation of eigenvectors to the processes
+		 */
+		template<typename FloatingType> 
+		int assign_to_proc(proc_t *procinfo, in_t<FloatingType> *Dstruct, val_t<FloatingType> *Wstruct,
+				   vec_t<FloatingType> *Zstruct, int *nzp, int *myfirstp)
+		{
+		  /* From inputs */
+		  int              pid     = procinfo->pid;
+		  int              nproc   = procinfo->nproc;
+		  FloatingType *restrict L       = Dstruct->E;
+		  int    *restrict isplit  = Dstruct->isplit;
+		  int              n       = Wstruct->n;
+		  int              il      = *(Wstruct->il);
+		  int              iu      = *(Wstruct->iu);
+		  FloatingType *restrict W       = Wstruct->W;
+		  int    *restrict Windex  = Wstruct->Windex;
+		  int    *restrict iblock  = Wstruct->iblock;
+		  int    *restrict iproc   = Wstruct->iproc;
+		  int    *restrict Zindex  = Zstruct->Zindex;
+
+		  sort_struct_t<FloatingType> *array =
+            (sort_struct_t<FloatingType> *) malloc(n*sizeof(sort_struct_t<FloatingType>));
+
+          int i;
+		  for (i=0; i<n; i++) {
+			/* Find shift of block */
+			int iblk                    = iblock[i];
+			int ishift                  = isplit[iblk-1] - 1;
+			/* Apply shift so that unshifted eigenvalues can be sorted */
+			array[i].lambda    = W[i] + L[ishift]; 
+			array[i].local_ind = Windex[i];
+			array[i].block_ind = iblk;
+			array[i].ind       = i;
+		  }
+
+		  /* Alternatively could sort list of pointers, which
+			 requires less data copying */
+		  sort(array, array + n, cmp<FloatingType>);
+
+		  /* Mark eigenvectors that do not need to be computed */
+          int j;
+		  for (j = 0; j < il-1; j++ ) {
+			iproc[array[j].ind]  = -1;
+			Zindex[array[j].ind] = -1;
+		  }
+
+		  int isize = iu - il + 1;
+
+		  int ibegin = il - 1, iend;
+          int id;
+		  for (id=0; id<nproc; id++) {
+
+			int chunk = imax(1, isize/nproc + (id < isize%nproc));
+		
+			if (id==nproc-1) {
+			  iend = iu - 1;
+			} else {
+			  iend = ibegin + chunk - 1;
+			  iend = imin(iend, iu -1);
+			}
+
+			int k = 0;
+			for (j=ibegin; j<=iend; j++) {
+			  iproc[array[j].ind]  = id;
+			  Zindex[array[j].ind] = k;
+			  k++;
+			}
+
+			if (id == pid) {
+			  *myfirstp   = ibegin - il + 1;
+			  *nzp        = iend - ibegin + 1;
+			  Zstruct->nz = *nzp; 
+			}
+
+			ibegin = iend + 1;
+			ibegin = imin(ibegin, iu);
+		  } /* end id */
+
+		  for (j = iend+1; j < n; j++ ) {
+			iproc[array[j].ind]  = -1;
+			Zindex[array[j].ind] = -1;
+		  }
+		  
+		  free(array);
+		  return 0;
+		}
+
+		/* 
+		 * Compare function for using qsort() on an array of 
+		 * sort_structs
+		 */
+		template<typename FloatingType> 
+		bool cmp(const sort_struct_t<FloatingType> & arg1, const sort_struct_t<FloatingType> & arg2)
+		{
+		  /* Within block local index decides */
+		  if (arg1.block_ind == arg2.block_ind) {
+			// Was: (arg1.local_ind - arg2.local_ind)
+			// Now: true iff arg1.local_ind is lower, false for equal or greater
+			return arg1.local_ind < arg2.local_ind;
+		  } else {
+			if (arg1.lambda < arg2.lambda) {
+			  return true;
+			} else if (arg1.lambda > arg2.lambda) {
+			  return false;
+			} else {
+			  if (arg1.local_ind < arg2.local_ind)
+			return true;
+			  else
+			return false;
+			}
+		  }
+		}
+
+
+		/*
+		 * Initialize work queue by putting all tasks for the process
+		 * into the work queue.
+		 */
+		template<typename FloatingType>
+		int init_workQ(proc_t *procinfo, in_t<FloatingType> *Dstruct, val_t<FloatingType> *Wstruct,
+				   int *nzp, workQ_t *workQ)
+		{
+		  /* Input arguments */
+		  int              pid      = procinfo->pid;
+		  int              nproc    = procinfo->nproc;
+		  int              nthreads = procinfo->nthreads;
+		  FloatingType *restrict D        = Dstruct->D;
+		  FloatingType *restrict L        = Dstruct->E;
+		  int              nsplit   = Dstruct->nsplit;
+		  int    *restrict isplit   = Dstruct->isplit;
+		  FloatingType *restrict W        = Wstruct->W;
+		  FloatingType *restrict Werr     = Wstruct->Werr;
+		  FloatingType *restrict Wgap     = Wstruct->Wgap;
+		  int    *restrict iproc    = Wstruct->iproc;
+		  FloatingType *restrict Wshifted = Wstruct->Wshifted;
+		  FloatingType *restrict gersch   = Wstruct->gersch;
+		  int              nz       = *nzp;
+
+		  /* Loop over blocks */
+		  int i, j, k, l;
+		  int ibegin = 0;
+		  for ( j=0; j<nsplit; j++ ) {
+
+			int iend   = isplit[j] - 1;
+			int isize  = iend - ibegin + 1;
+			FloatingType sigma  = L[iend];
+
+			/* Use Gerschgorin disks to find spectral diameter */
+			FloatingType gl = gersch[2*ibegin    ];
+			FloatingType gu = gersch[2*ibegin + 1];
+			for (i=ibegin+1; i<iend; i++) {
+			  gl = fmin(gl, gersch[2*i    ]);
+			  gu = fmax(gu, gersch[2*i + 1]);
+			}
+			FloatingType spdiam = gu - gl;
+			FloatingType avggap = spdiam / (isize-1);
+
+			/* Find eigenvalues in block */
+			FloatingType nbl = 0;
+			FloatingType iWbegin = iend   + 1;
+			FloatingType iWend   = ibegin - 1;
+			for (i=ibegin; i<=iend; i++) {
+			  if (nbl == 0 && iproc[i] == pid) {
+			iWbegin = i;
+			iWend   = i;
+			nbl++;
+			k = i+1;
+			while (k <=iend && iproc[k] == pid) {
+			  iWend++;
+			  nbl++;
+			  k++;
+			}
+			/* iWend = iWbegin + nbl - 1; instead of incrementing in loop */
+			  }
+			}
+
+			/* If no eigenvalues for process in block continue */
+			if (nbl == 0) {
+			  ibegin = iend + 1;
+			  continue;
+			}
+
+			/* Compute DL and DLL for later computation of singletons
+			 * (freed when all singletons of root are computed) */
+			FloatingType *DL  = (FloatingType *) malloc(isize * sizeof(FloatingType));
+			assert(DL != NULL);
+		
+			FloatingType *DLL = (FloatingType *) malloc(isize * sizeof(FloatingType));
+			assert(DLL != NULL);
+
+			for (i=0; i<isize-1; i++) {
+			  FloatingType tmp = D[i+ibegin]*L[i+ibegin];
+			  DL[i]  = tmp;
+			  DLL[i] = tmp*L[i+ibegin];
+			}
+
+			rrr_t<FloatingType> *RRR = PMR_create_rrr(&D[ibegin], &L[ibegin], DL, DLL, isize, 0);
+			PMR_increment_rrr_dependencies(RRR);
+		
+			/* In W apply shift of current block to eigenvalues
+			 * to get unshifted values w.r.t. T */
+			for (i=ibegin; i<=iend; i++) {
+			  W[i] += sigma;
+			}
+
+			/* Split eigenvalues of block into singletons and clusters
+			 * and add them to process work queue */
+			int max_size = imax(1, nz/nthreads);
+			bool task_inserted = false;
+            int new_first=ibegin, new_last;
+            int sn_first, sn_last, sn_size;
+			for (i=ibegin; i<=iend; i++) {    
+
+			  if (i == iend)
+			    new_last = i;
+			  else if (Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]))
+			    new_last = i;
+			  else
+			    continue;
+
+			  /* Skip rest if no eigenvalues of process */
+			  if (new_first > iWend || new_last < iWbegin) {
+			    new_first = i + 1;
+			    continue;
+			  }
+
+			  int new_size = new_last - new_first + 1;
+			  
+			  if (new_size == 1) {
+			    /* Singleton was found */
+	
+			    if (new_first < iWbegin || new_first > iWend) {
+			      new_first = i + 1;
+			      continue;
+			    } else {
+			      if (new_first==iWbegin || task_inserted==true) {
+				    /* Initialize new singleton task */
+				    sn_first = new_first;
+				    sn_last  = new_first;
+				    sn_size  = 1;
+			      } else {
+				    /* Extend singleton task by one */
+				    sn_last++;
+				    sn_size++;
+			      }
+			    }
+
+			    /* Insert task if ... */
+			    if (i==iWend || sn_size>=max_size ||
+				    Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {
+
+                  FloatingType lgap;
+			      if (sn_first == ibegin) {
+				    lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
+			      } else {
+				    lgap = Wgap[sn_first-1];
+			      }
+
+			      PMR_increment_rrr_dependencies(RRR);
+
+			      task_t *task = PMR_create_s_task(sn_first, sn_last, 1, ibegin, 
+						       iend, spdiam, lgap, RRR);
+			      
+		     	  PMR_insert_task_at_back(workQ->s_queue, task);
+			      
+			      task_inserted = true;
+			    } else {
+			      task_inserted = false;
+			    }
+
+			  } else {
+			    /* Cluster was found */
+
+			    int cl_first = new_first;
+			    int cl_last  = new_last;
+			    int cl_size  = new_size;
+
+			    /* Split cluster into clusters by absolut criterion */
+			    if (cl_size > 3) {
+
+			      /* Split cluster to smaller clusters [cl_first:cl_last] */
+			      for (k=new_first+1; k<new_last; k++) {
+
+				    if (k == new_last-1)
+				      cl_last = new_last;
+				    else if (k != cl_first && Wgap[k] > 0.8*avggap)
+				      cl_last = k;
+				    else
+				      continue;
+
+				    /* Skip cluster if no eigenvalues of process in it */
+				    if (cl_last < iWbegin || cl_first > iWend) {
+				      cl_first = k + 1;
+				      continue;
+				    }
+
+				    /* Record left gap of cluster */
+                    FloatingType lgap;
+				    if (cl_first == ibegin) {
+				      lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl);
+				    } else {
+				      lgap = Wgap[cl_first-1];
+				    }
+
+				    /* Determine processes involved in processing the cluster */
+				    int left_pid  = nproc-1;
+				    int right_pid = 0;
+				    for (l=cl_first; l<=cl_last; l++) {
+				      if (iproc[l] != -1) {
+				        left_pid  = imin(left_pid,  iproc[l]);
+				        right_pid = imax(right_pid, iproc[l]);
+				      }
+				    }
+				
+				    /* 
+				     * We have to explicitly specify the type because neither NULL nor nullptr
+				     * can't be used for template type deduction.
+				     */
+				    rrr_t<FloatingType> *RRR_parent = PMR_create_rrr<FloatingType>(&D[ibegin], &L[ibegin], 
+							    NULL, NULL, isize, 0);
+
+				    task_t *task = PMR_create_c_task(cl_first, cl_last, 1, ibegin, 
+							     iend, spdiam, lgap, iWbegin, 
+							     iWend, left_pid, right_pid, 
+							     RRR_parent);
+
+				    /* Insert task into queue, depending if cluster need
+				     * communication with other processes */
+				    if (left_pid != right_pid)
+				      PMR_insert_task_at_back(workQ->r_queue, task);
+				    else
+				      PMR_insert_task_at_back(workQ->c_queue, task);
+			
+				    cl_first = k + 1;
+			      } /* end k */
+
+			    } else {
+			      /* Cluster is too small to split, so insert it to queue */
+
+			      /* Record left gap of cluster */
+                  FloatingType lgap;
+			      if (cl_first == ibegin) {
+				    lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
+			      } else {
+				    lgap = Wgap[cl_first-1];
+			      }
+
+			      /* Determine processes involved */
+			      int left_pid  = nproc-1;
+			      int right_pid = 0;
+			      for (l=cl_first; l<=cl_last; l++) {
+				    if (iproc[l] != -1) {
+				      left_pid  = imin(left_pid,  iproc[l]);
+				      right_pid = imax(right_pid, iproc[l]);
+				    }
+			      }
+
+                  /* 
+                   * We have to explicitly specify the type because neither NULL nor nullptr
+                   * can't be used for template type deduction.
+                   */
+			      rrr_t<FloatingType> *RRR_parent = PMR_create_rrr<FloatingType>(&D[ibegin], &L[ibegin], 
+							      NULL, NULL, isize, 0);
+
+			      task_t *task = PMR_create_c_task(cl_first, cl_last, 1, ibegin, 
+						       iend, spdiam, lgap, iWbegin, iWend,
+						       left_pid, right_pid, RRR_parent);
+
+			      /* Insert task into queue, depending if cluster need
+			       * communication with other processes */
+			      if (left_pid != right_pid)
+				    PMR_insert_task_at_back(workQ->r_queue, task);
+			      else
+				    PMR_insert_task_at_back(workQ->c_queue, task);
+			      
+			    }
+			    task_inserted = true;
+
+			  } /* end new_size */
+
+			  new_first = i + 1;
+			} /* end of splitting eigenvalues into tasks */
+
+			/* Set flag in RRR that last singleton is created */
+			PMR_set_parent_processed_flag(RRR);
+			PMR_try_destroy_rrr(RRR);
+
+			ibegin = iend + 1;
+		  } /* end loop over blocks */ 
+
+		  return 0;
+		}
+
+		/*
+		 * Processes all the tasks put in the work queue.
+		 */
+		template<typename FloatingType> 
+		void *empty_workQ(void *argin)
+		{
+		  int          tid;
+		  proc_t       *procinfo;
+		  val_t<FloatingType>        *Wstruct;
+		  vec_t<FloatingType>        *Zstruct;
+		  tol_t<FloatingType>        *tolstruct;
+		  workQ_t *workQ;
+		  counter_t    *num_left;
+          retrieve_auxarg3((auxarg3_t<FloatingType> *) argin, &tid, &procinfo, &Wstruct,
+				   &Zstruct, &tolstruct, &workQ, &num_left);
+
+		  int n = Wstruct->n;
+
+		  /* max. needed double precision work space: odr1v */
+		  FloatingType *work = (FloatingType *) malloc(4*n * sizeof(FloatingType));
+		  assert(work != NULL);
+
+		  /* max. needed double precision work space: odrrb */
+		  int *iwork = (int *) malloc(2*n * sizeof(int)   );
+		  assert(iwork != NULL);
+
+
+		  /* while loop to empty the work queue */
+		  while (PMR_get_counter_value(num_left) > 0) {
+
+			/* empty r-queue before processing other tasks */
+			PMR_process_r_queue(tid, procinfo, Wstruct, Zstruct, tolstruct,
+					workQ, num_left, work, iwork);
+
+			task_t *task = PMR_remove_task_at_front(workQ->s_queue);
+			if ( task != NULL ) {
+			  assert(task->flag == SINGLETON_TASK_FLAG);
+
+			  PMR_process_s_task((singleton_t<FloatingType> *) task->data, tid, procinfo,
+					 Wstruct, Zstruct, tolstruct, num_left, 
+					 work, iwork);
+			  free(task);
+			  continue;
+			}
+		
+			task = PMR_remove_task_at_front(workQ->c_queue);
+			if ( task != NULL ) {
+			  assert(task->flag == CLUSTER_TASK_FLAG);
+
+			  PMR_process_c_task((cluster_t<FloatingType> *) task->data, tid, procinfo,
+					 Wstruct, Zstruct, tolstruct, workQ,
+					 num_left, work, iwork);
+			  free(task);
+			  continue;
+			}
+		
+		  } /* end while */
+
+		  free(work);
+		  free(iwork);
+
+		  return NULL;
+		}
+
+		static workQ_t *create_workQ()
+		{
+		  workQ_t *wq;
+
+		  wq = (workQ_t *) malloc(sizeof(workQ_t));
+
+		  wq->r_queue = PMR_create_empty_queue();
+		  wq->s_queue = PMR_create_empty_queue();
+		  wq->c_queue = PMR_create_empty_queue();
+
+		  return(wq);
+		}
+
+		static void destroy_workQ(workQ_t *wq)
+		{
+		  PMR_destroy_queue(wq->r_queue);
+		  PMR_destroy_queue(wq->s_queue);
+		  PMR_destroy_queue(wq->c_queue);
+		  free(wq);
+		}
+
+		template<typename FloatingType>
+		auxarg3_t<FloatingType>*
+		create_auxarg3(int tid, proc_t *procinfo, val_t<FloatingType> *Wstruct,
+				   vec_t<FloatingType> *Zstruct, tol_t<FloatingType> *tolstruct,
+				   workQ_t *workQ, counter_t *num_left)
+		{
+		  auxarg3_t<FloatingType> *arg =
+            (auxarg3_t<FloatingType> *) malloc( sizeof(auxarg3_t<FloatingType>) );
+		  assert(arg != NULL);
+
+		  arg->tid         = tid;
+		  arg->procinfo    = procinfo;
+		  arg->Wstruct     = Wstruct;
+		  arg->Zstruct     = Zstruct;
+		  arg->tolstruct   = tolstruct; 
+		  arg->workQ       = workQ;
+		  arg->num_left    = num_left;
+
+		  return arg;
+		}
+
+
+
+		template<typename FloatingType>
+		void 
+		retrieve_auxarg3(auxarg3_t<FloatingType> *arg, int *tid, proc_t **procinfo,
+				 val_t<FloatingType> **Wstruct, vec_t<FloatingType> **Zstruct,
+				 tol_t<FloatingType> **tolstruct, workQ_t **workQ,
+				 counter_t **num_left)
+		{
+		  *tid         = arg->tid;
+		  *procinfo    = arg->procinfo;
+		  *Wstruct     = arg->Wstruct;
+		  *Zstruct     = arg->Zstruct;
+		  *tolstruct   = arg->tolstruct;
+		  *workQ  = arg->workQ;
+		  *num_left    = arg->num_left;
+
+		  free(arg);
+		}
+	}
+
+}
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/pmrrr.hpp b/external/pmrrr/include/pmrrr/pmrrr.hpp
new file mode 100644
index 0000000000..1125fb45d2
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/pmrrr.hpp
@@ -0,0 +1,1223 @@
+/* Computation of eigenvalues and eigenvectors of a symmetric
+ * tridiagonal matrix T, given by its diagonal elements D
+ * and its super-/subdiagonal elements E.
+ *
+ * See INCLUDE/pmrrr.h for more information.
+ *
+ * Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef __PMRRR_HPP__
+#define __PMRRR_HPP__
+
+#include <pmrrr/definitions/pmrrr.h>
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/structs.h>
+#include <pmrrr/plarrv.hpp>
+#include <pmrrr/plarre.hpp>
+#include <pmrrr/lapack/odstmr.hpp>
+#include <pmrrr/lapack/odrrr.hpp>
+#include <pmrrr/lapack/odnst.hpp>
+#include <pmrrr/lapack/odrrj.hpp>
+#include <pmrrr/blas/odscal.hpp>
+
+using std::sort;
+
+/*
+ * Computation of eigenvalues and eigenvectors of a symmetric
+ * tridiagonal matrix T, given by its diagonal elements D
+ * and its super-/subdiagonal elements E.
+ * See README or 'pmrrr.h' for details.
+ */
+namespace pmrrr {
+
+	namespace detail{
+		template<typename FloatingType>
+		FloatingType scale_matrix(in_t<FloatingType> *Dstruct, val_t<FloatingType> *Wstruct, bool valeig);
+
+		template<typename FloatingType>
+		int handle_small_cases(char *jobz, char *range, int *np, FloatingType  *D,
+				   FloatingType *E, FloatingType *vlp, FloatingType *vup, int *ilp,
+				   int *iup, int *tryracp, MPI_Comm comm, int *nzp,
+				   int *myfirstp, FloatingType *W, FloatingType *Z, int *ldzp,
+				   int *Zsupp);
+
+		template<typename FloatingType>
+		bool cmp(const FloatingType & a1, const FloatingType & a2);
+
+		/*
+		 * Template template parameter required because of the sort function not catching overload properly.
+		 */
+		template<typename FloatingType>
+		bool cmp_sort_struct(const sort_struct_t<FloatingType> & a1, const sort_struct_t<FloatingType> & a2);
+
+
+		template<typename FloatingType>
+		void clean_up(MPI_Comm comm, FloatingType *Werr, FloatingType *Wgap,
+			  FloatingType *gersch, int *iblock, int *iproc,
+			  int *Windex, int *isplit, int *Zindex,
+			  proc_t *procinfo, in_t<FloatingType> *Dstruct,
+			  val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+			  tol_t<FloatingType> *tolstruct);
+
+		template<typename FloatingType>
+		int sort_eigenpairs(proc_t *procinfo, val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct);
+
+		template<typename FloatingType>
+		int refine_to_highrac(proc_t *procinfo, char *jobz, FloatingType *D,
+					  FloatingType *E2, in_t<FloatingType> *Dstruct, int *nzp,
+					  val_t<FloatingType> *Wstruct, tol_t<FloatingType> *tolstruct);
+
+		template<typename FloatingType>
+		void invscale_eigenvalues(val_t<FloatingType> *Wstruct, FloatingType scale,
+					  int size);
+	}
+
+    /* Arguments:
+     * ----------
+     *
+     * INPUTS: 
+     * -------
+     * jobz              "N" or "n" - compute only eigenvalues
+     *                   "V" or "v" - compute also eigenvectors
+     *                   "C" or "c" - count the maximal number of 
+     *                                locally computed eigenvectors
+     * range             "A" or "a" - all
+     *                   "V" or "v" - by interval: (VL,VU]
+     *                   "I" or "i" - by index:     IL-IU
+     * n                 matrix size
+     * ldz               must be set on input to the leading dimension 
+     *                   of of eigenvector matrix Z; this is often equal 
+     *                   to matrix size n (not changed on output)
+     *
+     * INPUT + OUTPUT: 
+     * ---------------
+     * D (double[n])     Diagonal elements of tridiagonal T.
+     *                   (On output the array will be overwritten).
+     * E (double[n])     Off-diagonal elements of tridiagonal T.
+     *                   First n-1 elements contain off-diagonals,
+     *                   the last element can have an abitrary value. 
+     *                   (On output the array will be overwritten.)
+     * vl                If range="V", lower bound of interval
+     *                   (vl,vu], on output refined.
+     *                   If range="A" or "I" not referenced as input.
+     *                   On output the interval (vl,vu] contains ALL
+     *                   the computed eigenvalues.
+     * vu                If range="V", upper bound of interval
+     *                   (vl,vu], on output refined.
+     *                   If range="A" or "I" not referenced as input.
+     *                   On output the interval (vl,vu] contains ALL
+     *                   the computed eigenvalues.
+     * il                If range="I", lower index (1-based indexing) of 
+     *                   the subset 'il' to 'iu'.
+     *                   If range="A" or "V" not referenced as input.
+     *                   On output the eigenvalues with index il to iu are 
+     *                   computed by ALL processes.
+     * iu                If range="I", upper index (1-based indexing) of 
+     *                   the subset 'il' to 'iu'.
+     *                   If range="A" or "V" not referenced as input.
+     *                   On output the eigenvalues with index il to iu are 
+     *                   computed by ALL processes.
+     * tryrac            0 - do not try to achieve high relative accuracy.
+     *                   NOTE: this should be the default in context of  
+     *                         dense eigenproblems.
+     *                   1 - relative accuracy will be attempted; 
+     *                       on output it is set to zero if high relative 
+     *                       accuracy is not achieved.
+     * comm              MPI communicator; commonly: MPI_COMM_WORLD.
+     *
+     * OUTPUT: 
+     * -------
+     * nz                Number of eigenvalues and eigenvectors computed 
+     *                   locally.
+     *                   If jobz="C", 'nz' will be set to the maximal
+     *                   number of locally computed eigenvectors such 
+     *                   that double[n*nz] will provide enough memory 
+     *                   for the local eigenvectors;  this is only 
+     *                   important in case of range="V" since 
+     *                   '#eigenpairs' are not known in advance
+     * offset            Index, relative to the computed eigenvalues, of 
+     *                   the smallest eigenvalue computed locally
+     *                   (0-based indexing).
+     * W (double[n])     Locally computed eigenvalues;
+     *                   The first nz entries contain the eigenvalues 
+     *                   computed locally; the first entry contains the 
+     *                   'offset + 1'-th computed eigenvalue, which is the 
+     *                   'offset + il'-th eigenvalue of the input matrix 
+     *                   (1-based indexing in both cases).
+     *                   In some situations it is desirable to have all 
+     *                   computed eigenvalues in W, instead of only 
+     *                   those computed locally. In this case, call 
+     *                   routine 'PMR_comm_eigvals' after 
+     *                   'pmrrr' returns (see example and interface below).
+     * Z                 Locally computed eigenvectors.
+     * (double[n*nz])    Enough space must be provided to store the
+     *                   vectors. 'nz' should be bigger or equal 
+     *                   to ceil('#eigenpairs'/'#processes'), where 
+     *                   '#eigenpairs' is 'n' in case of range="A" and
+     *                   'iu-il+1' in case of range="I". Alternatively, 
+     *                   and for range="V" 'nz' can be obtained 
+     *                   by running the routine with jobz="C". 
+     * Zsupp             Support of eigenvectors, which is given by
+     * (double[2*n])     i1=Zsupp[2*i] to i2=Zsupp[2*i+1] for the i-th local eigenvector
+     *                   (returns 1-based indexing; e.g. in C Z[i1-1:i2-1] are non-zero and
+     *                   in Fotran Z(i1:i2) are non-zero).
+     *
+     * RETURN VALUE: 
+     * -------------
+     *                 0 - success  
+     *                 1 - wrong input parameter
+     *                 2 - misc errors  
+     *
+     * The Fortran interface takes an additinal integer argument INFO
+     * to retrieve the return value. 
+     * An example call in Fortran looks therefore like
+     *
+     * CALL PMRRR('V', 'A', N, D, E, VL, VU, IL, IU, TRYRAC, 
+     *            MPI_COMM_WORLD, NZ, MYFIRST, W, Z, LDZ, ZSUPP, INFO)
+     *
+     *
+     * EXAMPLE CALL: 
+     * -------------
+     * char    *jobz, *range;
+     * int     n, il, iu, tryRAC=0, nz, offset, ldz, *Zsupp;
+     * double  *D, *E, *W, *Z, vl, vu;
+     *
+     * // allocate space for D, E, W, Z
+     * // initialize D, E
+     * // set jobz, range, ldz, and if necessary, il, iu or vl, vu  
+     * 
+     * info = pmrrr(jobz, range, &n, D, E, &vl, &vu, &il, &iu,
+     *              &tryRAC, MPI_COMM_WORLD, &nz, &myfirst, W,
+     *          Z, &ldz , Zsupp);
+     *
+     * // optional: 
+     * PMR_comm_eigvals(MPI_COMM_WORLD, &nz, &myfirst, W);
+     *
+     */
+    template<typename FloatingType>
+    int pmrrr(char *jobz, char *range, int *np, FloatingType  *D,
+          FloatingType *E, FloatingType *vl, FloatingType *vu, int *il,
+          int *iu, int *tryracp, MPI_Comm comm, int *nzp,
+          int *offsetp, FloatingType *W, FloatingType *Z, int *ldz,
+          int *Zsupp)
+    {
+      /* Input parameter */
+      int         n      = *np;
+      bool onlyW = toupper(jobz[0]) == 'N';
+      bool wantZ = toupper(jobz[0]) == 'V';
+      bool cntval = toupper(jobz[0]) == 'C';
+      bool alleig = toupper(range[0]) == 'A';
+      bool valeig = toupper(range[0]) == 'V';
+      bool indeig = toupper(range[0]) == 'I';
+
+      /* Check input parameters */
+      if(!(onlyW  || wantZ  || cntval)) return 1;
+      if(!(alleig || valeig || indeig)) return 1;
+      if(n <= 0) return 1;
+      if (valeig) {
+        if(*vu<=*vl) return 1;
+      } else if (indeig) {
+        if (*il<1 || *il>n || *iu<*il || *iu>n) return 1;
+      }
+
+      /* MPI & multithreading info */
+      int is_init, is_final;
+      MPI_Initialized(&is_init);
+      MPI_Finalized(&is_final);
+      if (is_init!=1 || is_final==1) {
+        fprintf(stderr, "ERROR: MPI is not active! (init=%d, final=%d) \n",
+          is_init, is_final);
+        return 1;
+      }
+      MPI_Comm comm_dup;
+      MPI_Comm_dup(comm, &comm_dup);
+      int nproc, pid, thread_support;
+      MPI_Comm_size(comm_dup, &nproc);
+      MPI_Comm_rank(comm_dup, &pid);
+      MPI_Query_thread(&thread_support);
+
+      int nthreads;
+      if ( !(thread_support == MPI_THREAD_MULTIPLE ||
+             thread_support == MPI_THREAD_FUNNELED) ) {
+        /* Disable multithreading; note: to support multithreading with 
+         * MPI_THREAD_SERIALIZED the code must be changed slightly; this 
+         * is not supported at the moment */
+        nthreads = 1;
+      } else {
+        char *ompvar = getenv("PMR_NUM_THREADS");
+        if (ompvar == NULL) {
+          nthreads = DEFAULT_NUM_THREADS;
+        } else {
+          nthreads = atoi(ompvar);
+        }
+      }
+
+    #if defined(MVAPICH2_VERSION)
+      if (nthreads>1) {
+        int           mv2_affinity=1;
+        char        *mv2_string = getenv("MV2_ENABLE_AFFINITY");
+        if (mv2_string != NULL) {
+          mv2_affinity = atoi(mv2_string);
+        }    
+        if (mv2_affinity!=0) {
+          nthreads = 1;
+          if (pid==0) {
+            fprintf(stderr, "WARNING: PMRRR incurs a significant performance penalty when multithreaded with MVAPICH2 with affinity enabled. The number of threads has been reduced to one; please rerun with MV2_ENABLE_AFFINITY=0 or PMR_NUM_THREADS=1 in the future.\n");
+            fflush(stderr);
+          }
+        } 
+      }
+    #endif
+
+      /* If only maximal number of local eigenvectors are queried
+       * return if possible here */
+      *nzp     = 0;
+      *offsetp = 0;
+      if (cntval) {
+        if ( alleig || n < DSTEMR_IF_SMALLER ) {
+          *nzp = iceil(n,nproc);
+          MPI_Comm_free(&comm_dup);
+          return 0;
+        } else if (indeig) {
+          *nzp = iceil(*iu-*il+1,nproc);
+          MPI_Comm_free(&comm_dup);
+          return 0;
+        }
+      }
+
+      /* Check if computation should be done by multiple processes */
+      int info;
+      if (n < DSTEMR_IF_SMALLER) {
+        info = detail::handle_small_cases(jobz, range, np, D, E, vl, vu, il,
+                      iu, tryracp, comm, nzp, offsetp, W,
+                      Z, ldz, Zsupp);
+        MPI_Comm_free(&comm_dup);
+        return info;
+      }
+
+      /* Allocate memory */
+      FloatingType *Werr = (FloatingType *) malloc( n * sizeof(FloatingType) );
+      assert(Werr != NULL);
+      FloatingType *Wgap = (FloatingType *) malloc( n * sizeof(FloatingType) );
+      assert(Wgap != NULL);
+      FloatingType *gersch = (FloatingType *) malloc( 2*n*sizeof(FloatingType) );
+      assert(gersch != NULL);
+      int *iblock  = (int *) calloc( n , sizeof(int) );
+      assert(iblock != NULL);
+      int *iproc = (int *)  malloc( n * sizeof(int) );
+      assert(iproc != NULL);
+      int *Windex = (int *) malloc( n * sizeof(int) );
+      assert(Windex != NULL);
+      int *isplit = (int *) malloc( n * sizeof(int) );
+      assert(isplit != NULL);
+      int *Zindex = (int *) malloc( n * sizeof(int) );
+      assert(Zindex != NULL);
+      detail::proc_t *procinfo = (detail::proc_t *) malloc( sizeof(detail::proc_t) );
+      assert(procinfo != NULL);
+      detail::in_t<FloatingType> *Dstruct = (detail::in_t<FloatingType> *) malloc( sizeof(detail::in_t<FloatingType>) );
+      assert(Dstruct != NULL);
+      detail::val_t<FloatingType> *Wstruct = (detail::val_t<FloatingType> *) malloc( sizeof(detail::val_t<FloatingType>) );
+      assert(Wstruct != NULL);
+      detail::vec_t<FloatingType> *Zstruct = (detail::vec_t<FloatingType> *) malloc( sizeof(detail::vec_t<FloatingType>) );
+      assert(Zstruct != NULL);
+      detail::tol_t<FloatingType> *tolstruct = (detail::tol_t<FloatingType> *) malloc( sizeof(detail::tol_t<FloatingType>) );
+      assert(tolstruct != NULL);
+
+      /* Bundle variables into a structures */
+      procinfo->pid            = pid;
+      procinfo->nproc          = nproc;
+      procinfo->comm           = comm_dup;
+      procinfo->nthreads       = nthreads;
+      procinfo->thread_support = thread_support;
+
+      Dstruct->n               = n;
+      Dstruct->D               = D;
+      Dstruct->E               = E;
+      Dstruct->isplit          = isplit;
+
+      Wstruct->n               = n;
+      Wstruct->vl              = vl;
+      Wstruct->vu              = vu;
+      Wstruct->il              = il;
+      Wstruct->iu              = iu;
+      Wstruct->W               = W;
+      Wstruct->Werr            = Werr;
+      Wstruct->Wgap            = Wgap;
+      Wstruct->Windex          = Windex;
+      Wstruct->iblock          = iblock;
+      Wstruct->iproc           = iproc;
+      Wstruct->gersch          = gersch;
+
+      Zstruct->ldz             = *ldz;
+      Zstruct->nz              = 0;
+      Zstruct->Z               = Z;
+      Zstruct->Zsupp           = Zsupp;
+      Zstruct->Zindex          = Zindex;
+
+      /* Scale matrix to allowable range, returns 1.0 if not scaled */
+      FloatingType scale = detail::scale_matrix<FloatingType>(Dstruct, Wstruct, valeig);
+
+      /*  Test if matrix warrants more expensive computations which
+       *  guarantees high relative accuracy */
+      if (*tryracp)
+        lapack::odrrr(&n, D, E, &info); /* 0 - rel acc */
+      else info = -1;
+
+      int i;
+      FloatingType *Dcopy, *E2copy;
+      if (info == 0) {
+        /* This case is extremely rare in practice */ 
+        tolstruct->split = std::numeric_limits<FloatingType>::epsilon();
+        /* Copy original data needed for refinement later */
+        Dcopy  = (FloatingType *) malloc( n * sizeof(FloatingType) );
+        assert(Dcopy != NULL);
+        memcpy(Dcopy, D, n*sizeof(FloatingType));  
+        E2copy = (FloatingType *) malloc( n * sizeof(FloatingType) );
+        assert(E2copy != NULL);
+        for (i=0; i<n-1; i++)
+          E2copy[i] = E[i]*E[i];
+      } else {
+        /* Neg. threshold forces old splitting criterion */
+        tolstruct->split = -std::numeric_limits<FloatingType>::epsilon(); 
+        *tryracp = 0;
+      }
+
+      if (!wantZ) {
+        /* Compute eigenvalues to full precision */
+        tolstruct->rtol1 = 4.0 * std::numeric_limits<FloatingType>::epsilon();
+        tolstruct->rtol2 = 4.0 * std::numeric_limits<FloatingType>::epsilon();
+      } else {
+        /* Do not compute to full accuracy first, but refine later */
+        tolstruct->rtol1 = sqrt(std::numeric_limits<FloatingType>::epsilon());
+        tolstruct->rtol1 = fmin(1e-2*MIN_RELGAP, tolstruct->rtol1);
+        tolstruct->rtol2 = sqrt(std::numeric_limits<FloatingType>::epsilon())*5.0E-3;
+        tolstruct->rtol2 = fmin(5e-6*MIN_RELGAP, tolstruct->rtol2);
+        tolstruct->rtol2 = fmax(4.0 * std::numeric_limits<FloatingType>::epsilon(), tolstruct->rtol2);
+      }
+
+      /*  Compute all eigenvalues: sorted by block */
+      // TODO: change later the casting
+      info = detail::plarre(procinfo, jobz, range, Dstruct, Wstruct, tolstruct, nzp, offsetp);
+      assert(info == 0);
+
+      /* If just number of local eigenvectors are queried */
+      if (cntval & valeig) {    
+        detail::clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
+             isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct,
+             tolstruct);
+        return 0;
+      }
+
+      /* If only eigenvalues are to be computed */
+      if (!wantZ) {
+
+        /* Refine to high relative with respect to input T */
+        if (*tryracp) {
+          info = detail::refine_to_highrac(procinfo, jobz, Dcopy, E2copy, 
+                                        Dstruct, nzp, Wstruct, tolstruct);
+          assert(info == 0);
+        }
+
+        /* Sort eigenvalues */
+        sort(W, W + n, detail::cmp<FloatingType>);
+
+        /* Only keep subset ifirst:ilast */
+        int ifirst, ilast, isize;
+        int iil = *il;
+        int iiu = *iu;    
+        int ifirst_tmp = iil;
+        for (i=0; i<nproc; i++) {
+          int chunk  = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc);
+          int ilast_tmp;
+          if (i == nproc-1) {
+            ilast_tmp = iiu;
+          } else {
+            ilast_tmp = ifirst_tmp + chunk - 1;
+            ilast_tmp = imin(ilast_tmp, iiu);
+          }
+          if (i == pid) {
+            ifirst    = ifirst_tmp;
+            ilast     = ilast_tmp;
+            isize     = ilast - ifirst + 1;
+            *offsetp = ifirst - iil;
+            *nzp      = isize;
+          }
+          ifirst_tmp = ilast_tmp + 1;
+          ifirst_tmp = imin(ifirst_tmp, iiu + 1);
+        }
+        if (isize > 0) {
+          memmove(W, &W[ifirst-1], *nzp * sizeof(FloatingType));
+        }
+
+        /* If matrix was scaled, rescale eigenvalues */
+        detail::invscale_eigenvalues(Wstruct, scale, *nzp);
+
+        detail::clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
+             isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct,
+             tolstruct);
+
+        return 0;
+      } /* end of only eigenvalues to compute */
+
+      /* Compute eigenvectors */
+      info = detail::plarrv(procinfo, Dstruct, Wstruct, Zstruct, tolstruct, 
+            nzp, offsetp);
+      assert(info == 0);
+
+      /* Refine to high relative with respect to input matrix */
+      if (*tryracp) {
+        info = detail::refine_to_highrac(procinfo, jobz, Dcopy, E2copy, 
+                     Dstruct, nzp, Wstruct, tolstruct);
+        assert(info == 0);
+      }
+
+      /* If matrix was scaled, rescale eigenvalues */
+      detail::invscale_eigenvalues(Wstruct, scale, n);
+
+      /* Sort eigenvalues and eigenvectors of process */
+      detail::sort_eigenpairs<FloatingType>(procinfo, Wstruct, Zstruct);
+
+      detail::clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
+           isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct,
+           tolstruct);
+      if (*tryracp) {
+        free(Dcopy);
+        free(E2copy);
+      }
+
+      return 0;
+    } /* end pmrrr */
+
+
+    /*
+     * Routine to communicate eigenvalues such that every process has
+     * all computed eigenvalues (iu-il+1) in W; this routine is designed 
+     * to be called right after 'pmrrr'.
+     *
+     * Arguments:
+     * ----------
+     *
+     * INPUTS: 
+     * -------
+     * jobz              "N" or "n" - compute only eigenvalues
+     *                   "V" or "v" - compute also eigenvectors
+     *                   "C" or "c" - count the maximal number of 
+     *                                locally computed eigenvectors
+     * range             "A" or "a" - all
+     *                   "V" or "v" - by interval: (VL,VU]
+     *                   "I" or "i" - by index:     IL-IU
+     * n                 matrix size
+     * ldz               must be set on input to the leading dimension 
+     *                   of of eigenvector matrix Z; this is often equal 
+     *                   to matrix size n (not changed on output)
+     *
+     * INPUT + OUTPUT: 
+     * ---------------
+     * D (double[n])     Diagonal elements of tridiagonal T.
+     *                   (On output the array will be overwritten).
+     * E (double[n])     Off-diagonal elements of tridiagonal T.
+     *                   First n-1 elements contain off-diagonals,
+     *                   the last element can have an abitrary value. 
+     *                   (On output the array will be overwritten.)
+     * vl                If range="V", lower bound of interval
+     *                   (vl,vu], on output refined.
+     *                   If range="A" or "I" not referenced as input.
+     *                   On output the interval (vl,vu] contains ALL
+     *                   the computed eigenvalues.
+     * vu                If range="V", upper bound of interval
+     *                   (vl,vu], on output refined.
+     *                   If range="A" or "I" not referenced as input.
+     *                   On output the interval (vl,vu] contains ALL
+     *                   the computed eigenvalues.
+     * il                If range="I", lower index (1-based indexing) of 
+     *                   the subset 'il' to 'iu'.
+     *                   If range="A" or "V" not referenced as input.
+     *                   On output the eigenvalues with index il to iu are 
+     *                   computed by ALL processes.
+     * iu                If range="I", upper index (1-based indexing) of 
+     *                   the subset 'il' to 'iu'.
+     *                   If range="A" or "V" not referenced as input.
+     *                   On output the eigenvalues with index il to iu are 
+     *                   computed by ALL processes.
+     * tryrac            0 - do not try to achieve high relative accuracy.
+     *                   NOTE: this should be the default in context of  
+     *                         dense eigenproblems.
+     *                   1 - relative accuracy will be attempted; 
+     *                       on output it is set to zero if high relative 
+     *                       accuracy is not achieved.
+     * comm              MPI communicator; commonly: MPI_COMM_WORLD.
+     *
+     * OUTPUT: 
+     * -------
+     * nz                Number of eigenvalues and eigenvectors computed 
+     *                   locally.
+     *                   If jobz="C", 'nz' will be set to the maximal
+     *                   number of locally computed eigenvectors such 
+     *                   that double[n*nz] will provide enough memory 
+     *                   for the local eigenvectors;  this is only 
+     *                   important in case of range="V" since 
+     *                   '#eigenpairs' are not known in advance
+     * offset            Index, relative to the computed eigenvalues, of 
+     *                   the smallest eigenvalue computed locally
+     *                   (0-based indexing).
+     * W (double[n])     Locally computed eigenvalues;
+     *                   The first nz entries contain the eigenvalues 
+     *                   computed locally; the first entry contains the 
+     *                   'offset + 1'-th computed eigenvalue, which is the 
+     *                   'offset + il'-th eigenvalue of the input matrix 
+     *                   (1-based indexing in both cases).
+     *                   In some situations it is desirable to have all 
+     *                   computed eigenvalues in W, instead of only 
+     *                   those computed locally. In this case, call 
+     *                   routine 'PMR_comm_eigvals' after 
+     *                   'pmrrr' returns (see example and interface below).
+     * Z                 Locally computed eigenvectors.
+     * (double[n*nz])    Enough space must be provided to store the
+     *                   vectors. 'nz' should be bigger or equal 
+     *                   to ceil('#eigenpairs'/'#processes'), where 
+     *                   '#eigenpairs' is 'n' in case of range="A" and
+     *                   'iu-il+1' in case of range="I". Alternatively, 
+     *                   and for range="V" 'nz' can be obtained 
+     *                   by running the routine with jobz="C". 
+     * Zsupp             Support of eigenvectors, which is given by
+     * (double[2*n])     i1=Zsupp[2*i] to i2=Zsupp[2*i+1] for the i-th local eigenvector
+     *                   (returns 1-based indexing; e.g. in C Z[i1-1:i2-1] are non-zero and
+     *                   in Fotran Z(i1:i2) are non-zero).
+     *
+     * RETURN VALUE: 
+     * -------------
+     *                 0 - success  
+     *                 1 - wrong input parameter
+     *                 2 - misc errors  
+     *
+     * The Fortran interface takes an additinal integer argument INFO
+     * to retrieve the return value. 
+     * An example call in Fortran looks therefore like
+     *
+     * CALL PMRRR('V', 'A', N, D, E, VL, VU, IL, IU, TRYRAC, 
+     *            MPI_COMM_WORLD, NZ, MYFIRST, W, Z, LDZ, ZSUPP, INFO)
+     *
+     *
+     * EXAMPLE CALL: 
+     * -------------
+     * char    *jobz, *range;
+     * int     n, il, iu, tryRAC=0, nz, offset, ldz, *Zsupp;
+     * double  *D, *E, *W, *Z, vl, vu;
+     *
+     * // allocate space for D, E, W, Z
+     * // initialize D, E
+     * // set jobz, range, ldz, and if necessary, il, iu or vl, vu  
+     * 
+     * info = pmrrr(jobz, range, &n, D, E, &vl, &vu, &il, &iu,
+     *              &tryRAC, MPI_COMM_WORLD, &nz, &myfirst, W,
+     *          Z, &ldz , Zsupp);
+     *
+     * // optional: 
+     * PMR_comm_eigvals(MPI_COMM_WORLD, &nz, &myfirst, W);
+     *
+     */
+    template<typename FloatingType>
+    int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *myfirstp, FloatingType *W)
+    {
+      MPI_Comm comm_dup;
+      MPI_Comm_dup(comm, &comm_dup);
+      int nproc;
+      MPI_Comm_size(comm_dup, &nproc);
+
+      int *rcount = (int *) malloc( nproc * sizeof(int) );
+      assert(rcount != NULL);
+      int *rdispl = (int *) malloc( nproc * sizeof(int) );
+      assert(rdispl != NULL);
+      FloatingType *work = (FloatingType *) malloc((*nz+1) * sizeof(FloatingType));
+      assert(work != NULL);
+
+      if (*nz > 0)
+        memcpy(work, W, (*nz) * sizeof(FloatingType) );
+
+      MPI_Allgather(nz, 1, MPI_INT, rcount, 1, MPI_INT, comm_dup);
+
+      MPI_Allgather(myfirstp, 1, MPI_INT, rdispl, 1, MPI_INT, comm_dup);
+      
+      MPI_Allgatherv(work, *nz, float_traits<FloatingType>::mpi_type(), W, rcount, rdispl,
+             float_traits<FloatingType>::mpi_type(), comm_dup);
+
+      MPI_Comm_free(&comm_dup);
+      free(rcount);
+      free(rdispl);
+      free(work);
+
+      return 0;
+    }
+
+    /**
+        Helper methods
+     **/
+
+    namespace detail{
+
+        /*
+         * Free's on allocated memory of pmrrr routine
+         */
+        template<typename FloatingType>
+        void clean_up(MPI_Comm comm, FloatingType *Werr, FloatingType *Wgap,
+                  FloatingType *gersch, int *iblock, int *iproc,
+                  int *Windex, int *isplit, int *Zindex,
+                  proc_t *procinfo, in_t<FloatingType> *Dstruct,
+                  val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+                  tol_t<FloatingType> *tolstruct)
+        {
+          MPI_Comm_free(&comm);
+          free(Werr);
+          free(Wgap);
+          free(gersch);
+          free(iblock);
+          free(iproc);
+          free(Windex);
+          free(isplit);
+          free(Zindex);
+          free(procinfo);
+          free(Dstruct);
+          free(Wstruct);
+          free(Zstruct);
+          free(tolstruct);
+        }
+
+        /*
+         * Wrapper to call LAPACKs DSTEMR for small matrices
+         */
+        template<typename FloatingType>
+        int handle_small_cases(char *jobz, char *range, int *np, FloatingType  *D,
+                       FloatingType *E, FloatingType *vlp, FloatingType *vup, int *ilp,
+                       int *iup, int *tryracp, MPI_Comm comm, int *nzp,
+                       int *myfirstp, FloatingType *W, FloatingType *Z, int *ldzp,
+                       int *Zsupp)
+        {
+          bool   cntval  = (jobz[0]  == 'C' || jobz[0]  == 'c');
+          bool   onlyW   = (jobz[0]  == 'N' || jobz[0]  == 'n');
+          bool   wantZ   = (jobz[0]  == 'V' || jobz[0]  == 'v');
+          bool   indeig  = (range[0] == 'I' || range[0] == 'i');
+          int    n       = *np;
+          int    ldz_tmp = *np;
+          int    ldz     = *ldzp;
+
+          int    nproc, pid;
+          MPI_Comm_size(comm, &nproc);
+          MPI_Comm_rank(comm, &pid);
+
+          int lwork, liwork;
+          FloatingType *Z_tmp; 
+          if (onlyW) {
+            lwork  = 12*n;
+            liwork =  8*n;
+          } else if (cntval) {
+            lwork  = 18*n;
+            liwork = 10*n;
+          } else if (wantZ) {
+            lwork  = 18*n;
+            liwork = 10*n;
+            int itmp;
+            if (indeig) itmp = *iup-*ilp+1;
+            else        itmp = n;
+            Z_tmp = (FloatingType *) malloc(n*itmp * sizeof(FloatingType));
+            assert(Z_tmp != NULL);
+          } else {
+            return 1;
+          }
+
+          FloatingType *work = (FloatingType *) malloc( lwork  * sizeof(FloatingType));
+          assert(work != NULL);
+          int *iwork = (int *)   malloc( liwork * sizeof(int));
+          assert(iwork != NULL);
+
+          if (cntval) {
+            /* Note: at the moment, jobz="C" should never get here, since
+             * it is blocked before. */
+            int m, info, MINUSONE = -1;
+            FloatingType cnt;
+            lapack::odstmr("V", "V", np, D, E, vlp, vup, ilp, iup, &m, W, &cnt,
+                &ldz_tmp, &MINUSONE, Zsupp, tryracp, work, &lwork, iwork,
+                &liwork, &info);
+            assert(info == 0);
+            
+            *nzp = (int) ceil(cnt/nproc);
+            free(work); free(iwork);
+            return 0;
+          }
+
+          int m, info;
+          lapack::odstmr(jobz, range, np, D, E, vlp, vup, ilp, iup, &m, W, Z_tmp,
+              &ldz_tmp, np, Zsupp, tryracp, work, &lwork, iwork,
+              &liwork, &info);
+          assert(info == 0);
+
+          int chunk = iceil(m,nproc);
+          int myfirst = imin(pid * chunk, m);
+          int mylast = imin((pid+1)*chunk - 1, m - 1);
+          int mysize = mylast - myfirst + 1;
+
+          if (mysize > 0) {
+            memmove(W, &W[myfirst], mysize*sizeof(FloatingType));
+            if (wantZ) {
+              if (ldz == ldz_tmp) {
+                /* copy everything in one chunk */
+                memcpy(Z, &Z_tmp[myfirst*ldz_tmp], n*mysize*sizeof(FloatingType));
+              } else {
+                int i;
+                /* copy each vector seperately */
+                for (i=0; i<mysize; i++)
+                  memcpy(&Z[i*ldz], &Z_tmp[(myfirst+i)*ldz_tmp], 
+                     n*sizeof(FloatingType));
+              } 
+            } /* if (wantZ) */
+          } 
+          
+          *myfirstp = myfirst;
+          *nzp      = mysize;
+
+          if (wantZ) free(Z_tmp);
+          free(work);
+          free(iwork);
+
+          return 0;
+        }
+
+        /*
+         * Scale matrix to allowable range, returns 1.0 if not scaled
+         */
+        template<typename FloatingType>
+        FloatingType scale_matrix(in_t<FloatingType> *Dstruct, val_t<FloatingType> *Wstruct, bool valeig)
+        {
+          int              n  = Dstruct->n;
+          FloatingType *restrict D  = Dstruct->D;
+          FloatingType *restrict E  = Dstruct->E;
+          FloatingType          *vl = Wstruct->vl;
+          FloatingType          *vu = Wstruct->vu;
+
+          /* Set some machine dependent constants */
+          FloatingType smlnum = std::numeric_limits<FloatingType>::min() / std::numeric_limits<FloatingType>::epsilon();
+          FloatingType bignum = 1.0 / smlnum;
+          FloatingType rmin = sqrt(smlnum);
+          FloatingType rmax = fmin(sqrt(bignum), 1.0 / sqrt(sqrt(std::numeric_limits<FloatingType>::min())));
+
+          /*  Scale matrix to allowable range */
+          FloatingType scale = 1.0;
+          FloatingType T_norm = lapack::odnst("M", &n, D, E);  /* returns max(|T(i,j)|) */
+          if (T_norm > 0 && T_norm < rmin) {
+            scale = rmin / T_norm;
+          } else if (T_norm > rmax) {
+            scale = rmax / T_norm;
+          }
+
+          if (scale != 1.0) {  /* FP cmp okay */
+            /* Scale matrix and matrix norm */
+            int itmp = n-1;
+            int IONE = 1;
+            blas::odscal(&n,    &scale, D, &IONE);
+            blas::odscal(&itmp, &scale, E, &IONE);
+            if (valeig == true) {
+              /* Scale eigenvalue bounds */
+              *vl *= scale;
+              *vu *= scale;
+            }
+          } /* end scaling */
+
+          return scale;
+        }
+
+        /*
+         * If matrix scaled, rescale eigenvalues
+         */
+        template<typename FloatingType>
+        void invscale_eigenvalues(val_t<FloatingType> *Wstruct, FloatingType scale,
+                      int size)
+        {
+          if (scale != 1.0) {  /* FP cmp okay */
+            FloatingType *vl = Wstruct->vl;
+            FloatingType *vu = Wstruct->vu;
+            FloatingType *W  = Wstruct->W;
+
+            FloatingType invscale = 1.0 / scale;
+            int IONE = 1;
+            *vl *= invscale;
+            *vu *= invscale;
+            blas::odscal(&size, &invscale, W, &IONE);
+          }
+
+        }
+        
+        template<typename FloatingType>
+        int sort_eigenpairs_local(proc_t *procinfo, int m, val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct)
+        {
+          int              pid        = procinfo->pid;
+          int              n        = Wstruct->n;
+          FloatingType *restrict W        = Wstruct->W;
+          FloatingType *restrict work     = Wstruct->gersch;
+          int              ldz      = Zstruct->ldz;
+          FloatingType *restrict Z        = Zstruct->Z;
+          int    *restrict Zsupp    = Zstruct->Zsupp;
+         
+          bool             sorted;
+          int              j;
+          FloatingType           tmp;
+          int              itmp1, itmp2;
+          
+          /* Make sure that sorted correctly; ineffective implementation,
+           * but usually no or very little swapping should be done here */
+          sorted = false;
+          while (sorted == false) {
+            sorted = true;
+            for (j=0; j<m-1; j++) {
+              if (W[j] > W[j+1]) {
+                sorted = false;
+                /* swap eigenvalue */
+                tmp    = W[j];
+                W[j]   = W[j+1];
+                W[j+1] = tmp;
+                /* swap eigenvalue support */
+                itmp1 = Zsupp[2*j];
+                Zsupp[2*j] = Zsupp[2*(j+1)];
+                Zsupp[2*(j+1)] = itmp1;
+                itmp2 = Zsupp[2*j + 1];
+                Zsupp[2*j + 1] = Zsupp[2*(j+1) + 1];
+                Zsupp[2*(j+1) +1 ] = itmp2;
+                /* swap eigenvector */
+                memcpy(work, &Z[j*ldz], n*sizeof(FloatingType));
+                memcpy(&Z[j*ldz], &Z[(j+1)*ldz], n*sizeof(FloatingType));
+                memcpy(&Z[(j+1)*ldz], work, n*sizeof(FloatingType));
+              }
+            }
+          } /* end while */
+
+          return(0);
+        }
+
+        template<typename FloatingType>
+        int sort_eigenpairs_global(proc_t *procinfo, int m, val_t<FloatingType> *Wstruct, 
+                       vec_t<FloatingType> *Zstruct)
+        {
+          int              pid   = procinfo->pid;
+          int              nproc = procinfo->nproc;
+          int              n     = Wstruct->n;
+          FloatingType *restrict W     = Wstruct->W;
+          FloatingType *restrict work  = Wstruct->gersch;
+          int              ldz   = Zstruct->ldz;
+          FloatingType *restrict Z     = Zstruct->Z;
+          int    *restrict Zsupp = Zstruct->Zsupp;
+
+          FloatingType           *minW, *maxW, *minmax; 
+          int              i, p, lp, itmp[2];
+          bool             sorted;
+          MPI_Status       status;
+          FloatingType  nan_value = 0.0/0.0;
+          
+          minW   = (FloatingType *) malloc(  nproc*sizeof(FloatingType));
+          assert(minW != NULL);
+          maxW   = (FloatingType *) malloc(  nproc*sizeof(FloatingType));
+          assert(maxW != NULL);
+          minmax = (FloatingType *) malloc(2*nproc*sizeof(FloatingType));
+          assert(minmax != NULL);
+
+          if (m == 0) {
+            MPI_Allgather(&nan_value, 1, float_traits<FloatingType>::mpi_type(), minW, 1, float_traits<FloatingType>::mpi_type(), 
+                  procinfo->comm); 
+            MPI_Allgather(&nan_value, 1, float_traits<FloatingType>::mpi_type(), maxW, 1, float_traits<FloatingType>::mpi_type(), 
+                  procinfo->comm); 
+          } else {
+            MPI_Allgather(&W[0], 1, float_traits<FloatingType>::mpi_type(), minW, 1, float_traits<FloatingType>::mpi_type(), 
+                  procinfo->comm); 
+            MPI_Allgather(&W[m-1], 1, float_traits<FloatingType>::mpi_type(), maxW, 1, float_traits<FloatingType>::mpi_type(), 
+                  procinfo->comm); 
+          }
+
+          for (i=0; i<nproc; i++) {
+            minmax[2*i]   = minW[i];
+            minmax[2*i+1] = maxW[i];
+          }
+
+          sorted = true;
+          for (i=0; i<2*nproc-1; i++) {
+            if (minmax[i] > minmax[i+1]) sorted = false;
+          }
+
+          /* Make sure that sorted correctly; ineffective implementation,
+           * but usually no or very little swapping should be done here */
+          while (sorted == false) {
+
+            sorted = true;
+
+            for (p=1; p<nproc; p++) {
+
+              lp =  p - 1;
+
+              /* swap one pair of eigenvalues and eigenvectors */
+              if ((pid == lp || pid == p) && minW[p] < maxW[lp]) {
+                if (pid == lp) {
+                  W[m-1] = minW[p];
+                      MPI_Sendrecv(&Z[(m-1)*ldz], n, float_traits<FloatingType>::mpi_type(), p, lp, 
+                           work, n, float_traits<FloatingType>::mpi_type(), p, p, 
+                           procinfo->comm, &status);
+                  memcpy(&Z[(m-1)*ldz], work, n*sizeof(FloatingType));
+                }
+                if (pid == p) {
+                  W[0]   = maxW[p-1];
+                      MPI_Sendrecv(&Z[0], n, float_traits<FloatingType>::mpi_type(), lp, p, 
+                           work,  n, float_traits<FloatingType>::mpi_type(), lp, lp, 
+                           procinfo->comm, &status);
+                  memcpy(&Z[0], work, n*sizeof(FloatingType));
+                }
+              }
+
+              /* swap eigenvector support as well; 
+               * (would better be recomputed here though) */
+              if ((pid == lp || pid == p) && minW[p] < maxW[lp]) {
+                if (pid == lp) {
+                      MPI_Sendrecv(&Zsupp[2*(m-1)], 2, MPI_INT, p, lp, 
+                           itmp, 2, MPI_INT, p, p, 
+                           procinfo->comm, &status);
+                  Zsupp[2*(m-1)]     = itmp[0];
+                  Zsupp[2*(m-1) + 1] = itmp[1];
+                }
+                if (pid == p) {
+                      MPI_Sendrecv(&Zsupp[0], 2, MPI_INT, lp, p, 
+                           itmp,  2, MPI_INT, lp, lp, 
+                           procinfo->comm, &status);
+                  Zsupp[0] = itmp[0];
+                  Zsupp[1] = itmp[1];
+                }
+              }
+            }
+
+            /* sort local again */
+            sort_eigenpairs_local<FloatingType>(procinfo, m, Wstruct, Zstruct);
+            
+            /* check again if globally sorted */
+            if (m == 0) {
+              MPI_Allgather(&nan_value, 1, float_traits<FloatingType>::mpi_type(), minW, 1, float_traits<FloatingType>::mpi_type(), 
+                    procinfo->comm); 
+              MPI_Allgather(&nan_value, 1, float_traits<FloatingType>::mpi_type(), maxW, 1, float_traits<FloatingType>::mpi_type(), 
+                    procinfo->comm);       
+            } else {
+              MPI_Allgather(&W[0], 1, float_traits<FloatingType>::mpi_type(), minW, 1, float_traits<FloatingType>::mpi_type(), 
+                    procinfo->comm); 
+              MPI_Allgather(&W[m-1], 1, float_traits<FloatingType>::mpi_type(), maxW, 1, float_traits<FloatingType>::mpi_type(), 
+                    procinfo->comm); 
+            }
+            
+            for (i=0; i<nproc; i++) {
+              minmax[2*i]   = minW[i];
+              minmax[2*i+1] = maxW[i];
+            }
+            
+            for (i=0; i<2*nproc-1; i++) {
+              if (minmax[i] > minmax[i+1]) sorted = false;
+            }
+            
+          } /* end while not sorted */
+
+          free(minW);
+          free(maxW);
+          free(minmax);
+
+          return 0;
+        }
+
+        /* Routine to sort the eigenpairs */
+        template<typename FloatingType>
+        int sort_eigenpairs(proc_t *procinfo, val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct)
+        {
+          /* From inputs */
+          int              pid      = procinfo->pid;
+          int              n        = Wstruct->n;
+          FloatingType *restrict W        = Wstruct->W;
+          int    *restrict Windex   = Wstruct->Windex;
+          int    *restrict iproc    = Wstruct->iproc;
+          int    *restrict Zindex   = Zstruct->Zindex;
+
+          /* Others */
+          int           im, j;
+          sort_struct_t<FloatingType> *sort_array;
+
+          /* Make the first nz elements of W contains the eigenvalues
+           * associated to the process */
+          im = 0;
+          for (j=0; j<n; j++) {
+            if (iproc[j] == pid) {
+              W[im]      = W[j];
+              Windex[im] = Windex[j];
+              Zindex[im] = Zindex[j];
+              im++;
+            }
+          }
+
+          sort_array = (sort_struct_t<FloatingType> *) malloc(im*sizeof(sort_struct_t<FloatingType>));
+
+          for (j=0; j<im; j++) {
+            sort_array[j].lambda    = W[j]; 
+            sort_array[j].local_ind = Windex[j];
+            sort_array[j].block_ind = 0;
+            sort_array[j].ind       = Zindex[j];
+          }
+
+          /* Sort according to Zindex */
+          sort(sort_array, sort_array + im, detail::cmp_sort_struct<FloatingType>);
+
+          for (j=0; j<im; j++) {
+            W[j]      = sort_array[j].lambda; 
+            Windex[j] = sort_array[j].local_ind;
+          }
+
+          /* Make sure eigenpairs are sorted locally; this is a very 
+           * inefficient way sorting, but in general no or very little 
+           * swapping of eigenpairs is expected here */
+          sort_eigenpairs_local<FloatingType>(procinfo, im, Wstruct, Zstruct);
+
+          /* Make sure eigenpairs are sorted globally; this is a very 
+           * inefficient way sorting, but in general no or very little 
+           * swapping of eigenpairs is expected here */
+          if (ASSERT_SORTED_EIGENPAIRS == true)
+            sort_eigenpairs_global<FloatingType>(procinfo, im, Wstruct, Zstruct);
+
+          free(sort_array);
+
+          return(0);
+        }
+
+        /*
+         * Refines the eigenvalue to high relative accuracy with
+         * respect to the input matrix;
+         * Note: In principle this part could be fully parallel too,
+         * but it will only rarely be called and not much work
+         * is involved, if the eigenvalues are not small in magnitude
+         * even no work at all is not uncommon. 
+         */
+        template<typename FloatingType>
+        int refine_to_highrac(proc_t *procinfo, char *jobz, FloatingType *D,
+                      FloatingType *E2, in_t<FloatingType> *Dstruct, int *nzp,
+                      val_t<FloatingType> *Wstruct, tol_t<FloatingType> *tolstruct)
+        {
+          int              n      = Dstruct->n;
+          int              nsplit = Dstruct->nsplit;
+          int    *restrict isplit = Dstruct->isplit;
+          FloatingType     spdiam = Dstruct->spdiam;
+          FloatingType *restrict W      = Wstruct->W;
+          FloatingType *restrict Werr   = Wstruct->Werr;
+
+          FloatingType *work  = (FloatingType *) malloc( 2*n * sizeof(FloatingType) );
+          assert (work != NULL);
+          int *iwork = (int *)    malloc( 2*n * sizeof(int)    );
+          assert (iwork != NULL);
+
+          int j, ibegin  = 0;
+          for (j=0; j<nsplit; j++) {
+            
+            int iend   = isplit[j] - 1;
+            int isize  = iend - ibegin + 1;
+            int nbl    = isize;
+            
+            if (nbl == 1) {
+              ibegin = iend + 1;
+              continue;
+            }
+            
+            int ifirst = 1, ilast = nbl, offset = 0, info;
+
+            FloatingType pivmin = tolstruct->pivmin;
+            FloatingType tol    = 4 * std::numeric_limits<FloatingType>::epsilon(); 
+            lapack::odrrj(&isize, &D[ibegin], &E2[ibegin], &ifirst, &ilast, &tol,
+                &offset, &W[ibegin], &Werr[ibegin], work, iwork, &pivmin,
+                &spdiam, &info);
+            assert(info == 0);
+            
+            ibegin = iend + 1;
+          } /* end j */
+          
+          free(work);
+          free(iwork);
+          return 0;
+        }
+
+        /*
+         * Compare function for using qsort() on an array
+         * of FloatingTypes
+         */
+        template<typename FloatingType>
+        bool cmp(const FloatingType & arg1, const FloatingType & arg2)
+        {
+            return arg1 < arg2;
+        }
+
+        /*
+         * Compare function for using qsort() on an array of 
+         * sort_structs
+         */
+        template<typename FloatingType>
+        bool cmp_sort_struct(const sort_struct_t<FloatingType> & arg1, const sort_struct_t<FloatingType> & arg2)
+        {
+          /* Within block local index decides */
+          return arg1.ind < arg2.ind;
+        }
+    }
+/* Fortran function prototype */
+/*void pmrrr_(char *jobz, char *range, int *n, FloatingType  *D,
+	    FloatingType *E, FloatingType *vl, FloatingType *vu, int *il, int *iu,
+	    int *tryracp, MPI_Fint *comm, int *nz, int *myfirst,
+	    FloatingType *W, FloatingType *Z, int *ldz, int *Zsupp, int* info)
+{
+  MPI_Comm c_comm = MPI_Comm_f2c(*comm);
+
+  *info = pmrrr(jobz, range, n, D, E, vl, vu, il, iu, tryracp, 
+		c_comm, nz, myfirst, W, Z, ldz, Zsupp);
+}
+
+void pmr_comm_eigvals_(MPI_Fint *comm, int *nz, int *myfirstp, 
+		       FloatingType *W, int *info)
+{
+  MPI_Comm c_comm = MPI_Comm_f2c(*comm);
+
+  *info = PMR_comm_eigvals(c_comm, nz, myfirstp, W);
+}*/
+
+}
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/process_c_task.hpp b/external/pmrrr/include/pmrrr/process_c_task.hpp
new file mode 100644
index 0000000000..7128f97e9b
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/process_c_task.hpp
@@ -0,0 +1,854 @@
+
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+
+#ifndef __PROCESS_C_TASK_HPP__
+#define __PROCESS_C_TASK_HPP__
+
+#include <pmrrr/definitions/pmrrr.h>
+#include <pmrrr/definitions/plarrv.h>
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/queue.h>
+#include <pmrrr/definitions/counter.h>
+#include <pmrrr/definitions/structs.h>
+
+#include <pmrrr/tasks.hpp>
+#include <pmrrr/rrr.hpp>
+#include <pmrrr/lapack/odrrb.hpp>
+#include <pmrrr/lapack/odrrf.hpp>
+
+
+#define THREE            3.0
+#define FOUR             4.0
+
+namespace pmrrr { namespace detail {
+
+	namespace {
+
+		template<typename FloatingType>
+		rrr_t<FloatingType>* compute_new_rrr(cluster_t<FloatingType> *cl, int tid, proc_t *procinfo,
+					   val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+					   tol_t<FloatingType> *tolstruct, FloatingType *work, int *iwork);
+
+		template<typename FloatingType>
+		inline int refine_eigvals(cluster_t<FloatingType> *cl, int rf_begin, int rf_end,
+				   int tid, proc_t *procinfo,
+				   rrr_t<FloatingType> *RRR, val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+				   tol_t<FloatingType> *tolstruct, counter_t *num_left, 
+				   workQ_t *workQ, FloatingType *work, 
+				   int *iwork);
+
+		template<typename FloatingType>
+		inline int communicate_refined_eigvals(cluster_t<FloatingType> *cl, proc_t *procinfo,
+						int tid, val_t<FloatingType> *Wstruct, rrr_t<FloatingType> *RRR);
+
+		template<typename FloatingType>
+		inline int test_comm_status(cluster_t<FloatingType> *cl, val_t<FloatingType> *Wstruct);
+
+		template<typename FloatingType>
+		inline int create_subtasks(cluster_t<FloatingType> *cl, int tid, proc_t *procinfo,
+					rrr_t<FloatingType> *RRR, val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+					workQ_t *workQ,
+					counter_t *num_left);
+	
+	}
+
+	template<typename FloatingType>
+	int PMR_process_c_task(cluster_t<FloatingType> *cl, int tid, proc_t *procinfo,
+				   val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct, 
+				   tol_t<FloatingType> *tolstruct, workQ_t *workQ, 
+				   counter_t *num_left, FloatingType *work, int *iwork)
+	{
+	  /* From inputs */
+	  int   depth      = cl->depth;
+	  int   left_pid   = cl->left_pid;
+	  int   right_pid  = cl->right_pid;
+	  int   pid        = procinfo->pid;
+	  int   n          = Wstruct->n;
+
+	  /* Protection against infinitely deep trees */
+	  assert(depth < n);
+
+	  /* Check if task only need to be split into subtasks */
+	  int status;
+	  if (cl->wait_until_refined == true) {
+		status = test_comm_status(cl, Wstruct);
+		if (status == COMM_COMPLETE) {
+		  create_subtasks(cl, tid, procinfo, cl->RRR, Wstruct, Zstruct,
+				  workQ, num_left);
+		  return C_TASK_PROCESSED;
+		} else {
+		  return C_TASK_NOT_PROCESSED;
+		}
+	  }
+
+	  /* Otherwise: compute new rrr, refine part own cluster,
+	   * communicate the refined eigenvalues if necessary,
+	   * and create subtasks if possible */
+
+	  rrr_t<FloatingType> *RRR = compute_new_rrr(cl, tid, procinfo, Wstruct, Zstruct,
+				tolstruct, work, iwork);
+
+	  /* Refine eigenvalues 'rf_begin' to 'rf_end' */
+      	  int rf_begin, rf_end;
+	  if (left_pid != right_pid) {
+		rf_begin = imax(cl->begin, cl->proc_W_begin);
+		rf_end   = imin(cl->end,   cl->proc_W_end);
+	  } 
+	  if (pid == left_pid ) rf_begin = cl->begin;
+	  if (pid == right_pid) rf_end   = cl->end;
+
+	  refine_eigvals(cl, rf_begin, rf_end, tid, procinfo, RRR,
+			 Wstruct, Zstruct, tolstruct, num_left,
+			 workQ, work, iwork);
+
+	  /* Communicate results: non-blocking */
+	  status = COMM_COMPLETE;
+	  if (left_pid != right_pid) {
+
+		status = communicate_refined_eigvals(cl, procinfo, tid, Wstruct, RRR);
+		/* status = COMM_INCOMPLETE if communication not finished */
+	  }
+
+	  if (status == COMM_COMPLETE) {
+		
+		create_subtasks(cl, tid, procinfo, RRR, Wstruct, Zstruct,
+				workQ, num_left);
+
+		return C_TASK_PROCESSED;
+	  } else {
+		return C_TASK_NOT_PROCESSED;
+	  }
+
+	} /* end process_c_task */
+
+	namespace {
+
+		template<typename FloatingType>
+		rrr_t<FloatingType>* compute_new_rrr(cluster_t<FloatingType> *cl, int tid, proc_t *procinfo,
+					   val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+					   tol_t<FloatingType> *tolstruct, FloatingType *work, int *iwork)
+		{
+		  /* From inputs */
+		  int              cl_begin    = cl->begin;
+		  int              cl_end      = cl->end;
+		  int              cl_size     = cl_end - cl_begin + 1;
+		  int              depth       = cl->depth;
+		  int              bl_begin    = cl->bl_begin;
+		  int              bl_end      = cl->bl_end;
+		  int              bl_size     = bl_end - bl_begin + 1;
+		  FloatingType     bl_spdiam   = cl->bl_spdiam;
+		  rrr_t<FloatingType>            *RRR_parent = cl->RRR;
+
+		  FloatingType *restrict Werr        = Wstruct->Werr;
+		  FloatingType *restrict Wgap        = Wstruct->Wgap;
+		  int    *restrict 		 Windex      = Wstruct->Windex;
+		  FloatingType *restrict Wshifted    = Wstruct->Wshifted;
+
+		  /* Allocate memory for new representation for cluster */
+		  FloatingType *D = (FloatingType *) malloc(bl_size * sizeof(FloatingType));
+		  FloatingType *L = (FloatingType *) malloc(bl_size * sizeof(FloatingType));
+		  FloatingType *DL  = (FloatingType *) malloc(bl_size * sizeof(FloatingType));
+		  FloatingType *DLL = (FloatingType *) malloc(bl_size * sizeof(FloatingType));
+		  assert(D != NULL);
+		  assert(L != NULL);
+		  assert(DL != NULL);
+		  assert(DLL != NULL);
+
+		  /* Recompute DL and DLL */
+          int i;
+          FloatingType tmp;
+		  FloatingType *D_parent = RRR_parent->D;
+		  FloatingType *L_parent = RRR_parent->L;
+		  for (i=0; i<bl_size-1; i++) {
+			tmp    = D_parent[i]*L_parent[i];
+			DL[i]  = tmp;
+			DLL[i] = tmp*L_parent[i];
+		  }
+		  FloatingType *DL_parent  = DL;
+		  FloatingType *DLL_parent = DLL;
+
+          FloatingType RQtol = 2*std::numeric_limits<FloatingType>::epsilon();
+          FloatingType pivmin = tolstruct->pivmin;
+
+		  /* to shift as close as possible refine extremal eigenvalues */
+          int k, p;
+          FloatingType savegap;
+		  for (k=0; k<2; k++) {
+			if (k == 0) {
+			  p              = Windex[cl_begin];
+			  savegap        = Wgap[cl_begin];
+			  Wgap[cl_begin] = 0.0;
+			} else {
+			  p              = Windex[cl_end  ];
+			  savegap        = Wgap[cl_end];
+			  Wgap[cl_end]   = 0.0;
+			}
+
+		    int info;
+			int offset  = Windex[cl_begin] - 1;
+
+			lapack::odrrb(&bl_size, D_parent, DLL_parent, &p, &p, &RQtol,
+				&RQtol, &offset, &Wshifted[cl_begin], &Wgap[cl_begin],
+				&Werr[cl_begin], work, iwork, &pivmin, &bl_spdiam,
+				&bl_size, &info);
+			assert( info == 0 );
+
+			if (k == 0) {
+			  Wgap[cl_begin] = fmax(0, (Wshifted[cl_begin+1]-Werr[cl_begin+1])
+						- (Wshifted[cl_begin]+Werr[cl_begin]) );
+			} else {
+			  Wgap[cl_end]   = savegap;
+			}
+		  } /* end k */
+
+		  FloatingType left_gap  = cl->lgap;
+		  FloatingType right_gap = Wgap[cl_end];
+
+		  /* Compute new RRR and store it in D and L */
+          int info;
+          int IONE=1;
+          FloatingType tau;
+		  lapack::odrrf(&bl_size, D_parent, L_parent, DL_parent,
+			  &IONE, &cl_size, &Wshifted[cl_begin], &Wgap[cl_begin],
+			  &Werr[cl_begin], &bl_spdiam, &left_gap, &right_gap,
+			  &pivmin, &tau, D, L, work, &info);
+		  assert(info == 0);
+
+		  /* Update shift and store it */
+		  tmp = L_parent[bl_size-1] + tau;
+		  L[bl_size-1] = tmp;
+
+		  /* Compute D*L and D*L*L */
+		  for (i=0; i<bl_size-1; i++) {
+			tmp    = D[i]*L[i];
+			DL[i]  = tmp;
+			DLL[i] = tmp*L[i];
+		  }
+
+		  /* New RRR of cluster is usually created at the parent level and
+		   * initialized to parent RRR, now reset to contain new RRR */
+		  if (RRR_parent->copied_parent_rrr == true) {
+			free(RRR_parent->D);
+			free(RRR_parent->L);
+		  }
+		  rrr_t<FloatingType> *RRR = PMR_reset_rrr(RRR_parent, D, L, DL, DLL, bl_size, depth+1);
+		  
+		  /* Update shifted eigenvalues */
+		  for (k=cl_begin; k<=cl_end; k++) {
+			FloatingType fudge  = THREE * std::numeric_limits<FloatingType>::epsilon() * fabs( Wshifted[k] );
+			Wshifted[k] -= tau;
+			fudge += FOUR * std::numeric_limits<FloatingType>::epsilon() * fabs( Wshifted[k] );
+			Werr[k] += fudge;
+		  }
+
+		  /* Assure that structure is not freed while it is processed */
+		  PMR_increment_rrr_dependencies(RRR);
+
+		  return RRR;
+		} /* end compute_new_rrr */
+
+		/* 
+		 * Refine eigenvalues with respect to new rrr 
+		 */
+		template<typename FloatingType>
+		int refine_eigvals(cluster_t<FloatingType> *cl, int rf_begin, int rf_end,
+				   int tid, proc_t *procinfo, rrr_t<FloatingType> *RRR, 
+				   val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+				   tol_t<FloatingType> *tolstruct, counter_t *num_left,
+				   workQ_t *workQ, FloatingType *work,
+				   int *iwork)
+		{
+          typedef refine_t<FloatingType>* data_t;
+
+		  /* From inputs */
+		  int              rf_size   = rf_end-rf_begin+1;
+		  int              bl_begin  = cl->bl_begin;
+		  int              bl_end    = cl->bl_end;
+		  int              bl_size   = bl_end - bl_begin + 1;
+		  FloatingType     bl_spdiam = cl->bl_spdiam;
+
+		  FloatingType *restrict D         = RRR->D;
+		  FloatingType *restrict L         = RRR->L;
+		  FloatingType *restrict DLL       = RRR->DLL;
+
+		  FloatingType *restrict W         = Wstruct->W;
+		  FloatingType *restrict Werr      = Wstruct->Werr;
+		  FloatingType *restrict Wgap      = Wstruct->Wgap;
+		  int    *restrict 		 Windex    = Wstruct->Windex;
+		  FloatingType *restrict Wshifted  = Wstruct->Wshifted;
+		  
+		  FloatingType pivmin = tolstruct->pivmin;
+		  FloatingType rtol1 = tolstruct->rtol1;
+		  FloatingType rtol2 = tolstruct->rtol2;
+
+		  /* Determine if refinement should be split into tasks */
+		  int left = PMR_get_counter_value(num_left);
+		  int nz = Zstruct->nz;
+		  int nthreads = procinfo->nthreads;
+		  int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads));
+		  int own_part = (int)fmax(ceil((FloatingType)left/nthreads),MIN_REFINE_CHUNK);
+
+		  int offset, i, p, q;
+		  FloatingType savegap;
+		  task_t *task;
+		  if (own_part < rf_size) {
+
+			int others_part = rf_size - own_part;
+			int num_tasks   = iceil(rf_size, own_part) - 1; /* >1 */
+			int chunk       = others_part/num_tasks;        /* floor */
+
+			int ts_begin = rf_begin, ts_end;
+			p        = Windex[rf_begin];
+			for (i=0; i<num_tasks; i++) {
+			  ts_end = ts_begin + chunk - 1;
+			  q      = p        + chunk - 1;
+
+			  task = PMR_create_r_task(ts_begin, ts_end, D, DLL, p, q, 
+						   bl_size, bl_spdiam, tid);
+			 
+			  if (ts_begin <= ts_end)
+			    PMR_insert_task_at_back(workQ->r_queue, task);
+			  else
+                /* TODO: remove casting if void* pointers are gone from task_t */
+			    PMR_refine_sem_post(static_cast<data_t>(task->data)); /* case chunk=0 */
+
+			  ts_begin = ts_end + 1;
+			  p        = q      + 1;
+			}
+			ts_end = rf_end;
+			q      = Windex[rf_end];
+			offset = Windex[ts_begin] - 1;
+
+			/* Call bisection routine to refine the values */
+			if (ts_begin <= ts_end) {
+              int info;
+			  lapack::odrrb(&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
+				  &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin],
+				  work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
+			  assert( info == 0 );
+			}
+
+			/* Empty "all" r-queue refine tasks before waiting */
+			int num_iter = PMR_get_num_tasks(workQ->r_queue);
+			for (i=0; i<num_iter; i++) {
+			  task = PMR_remove_task_at_front(workQ->r_queue);
+			  if (task != NULL) {
+			if (task->flag == REFINE_TASK_FLAG) {
+			  PMR_process_r_task((refine_t<FloatingType> *) task->data, procinfo, 
+						 Wstruct, tolstruct, work, iwork);
+			  free(task);
+			} else {
+			  PMR_insert_task_at_back(workQ->r_queue, task);
+			}
+			  } /* if task */
+			} /* end for i */
+		
+			/* Barrier: wait until all created tasks finished */
+			int count = num_tasks;
+			while (count > 0) {
+              /* TODO: remove casting if void* pointers are gone from task_t */
+			  while ( PMR_refine_sem_wait(static_cast<data_t>(task->data)) != 0 ) { };
+			  count--;
+			}
+            /* TODO: remove casting if void* pointers are gone from task_t */
+			PMR_refine_sem_destroy(static_cast<data_t>(task->data));
+
+			/* Edit right gap at splitting point */
+			ts_begin = rf_begin;
+			for (i=0; i<num_tasks; i++) {
+			  ts_end = ts_begin + chunk - 1;
+			  
+			  Wgap[ts_end] = fmax(0.0, Wshifted[ts_end + 1] - Werr[ts_end + 1]
+					  - Wshifted[ts_end] - Werr[ts_end]);
+			  
+			  ts_begin = ts_end + 1;
+			}
+		  } else {
+			/* Refinement of cluster without creating tasks */
+		
+			/* 'p' and 'q' are local (within block) indices of
+			 * the first/last eigenvalue of the cluster */
+			p = Windex[rf_begin];
+			q = Windex[rf_end];
+		
+			offset = Windex[rf_begin] - 1;    /* = p - 1 */
+		
+			if (p == q) {
+			  savegap = Wgap[rf_begin];
+			  Wgap[rf_begin] = 0.0;
+			}  
+		
+			/* Bisection routine to refine the values */
+            int info;
+			lapack::odrrb(&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
+				&Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin],
+				work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
+			assert( info == 0 );
+		
+			if (p == q)
+			  Wgap[rf_begin] = savegap;
+		  } /* end refine with or without creating tasks */
+		  
+          /* refined eigenvalues with all shifts applied in W */
+          FloatingType sigma = L[bl_size-1];
+		  for (i=rf_begin; i<=rf_end; i++)
+			W[i] = Wshifted[i] + sigma;
+
+		  return 0;
+		} /* end refine_eigvals */
+
+
+		template<typename FloatingType>
+		int communicate_refined_eigvals(cluster_t<FloatingType> *cl, proc_t *procinfo,
+						int tid, val_t<FloatingType> *Wstruct, rrr_t<FloatingType> *RRR)
+		{
+		  /* From inputs */
+		  int              cl_begin     = cl->begin;
+		  int              cl_end       = cl->end;
+		  int              bl_begin     = cl->bl_begin;
+		  int              bl_end       = cl->bl_end;
+		  int              proc_W_begin = cl->proc_W_begin;
+		  int              proc_W_end   = cl->proc_W_end;
+		  int              left_pid     = cl->left_pid;
+		  int              right_pid    = cl->right_pid;
+		  int              pid          = procinfo->pid;
+
+		  FloatingType *restrict W            = Wstruct->W;
+		  FloatingType *restrict Werr         = Wstruct->Werr;
+		  FloatingType *restrict Wgap         = Wstruct->Wgap;
+		  FloatingType *restrict Wshifted     = Wstruct->Wshifted;
+		  int    *restrict 		 iproc        = Wstruct->iproc;
+
+		  int my_begin = imax(cl_begin, proc_W_begin);
+		  int my_end   = imin(cl_end,   proc_W_end);
+		  if (pid == left_pid ) my_begin = cl_begin;
+		  if (pid == right_pid) my_end   = cl_end;
+          int my_size  = my_end - my_begin + 1;
+
+          int i, k;
+          int num_messages = 0;
+		  for (i=left_pid; i<=right_pid; i++) {
+			for (k=cl_begin; k<=cl_end; k++) {
+			  if (iproc[k] == i) {
+			num_messages += 4;
+			break;
+			  }
+			}    
+		  }
+
+		  MPI_Request *requests = (MPI_Request *) malloc( num_messages *
+							  sizeof(MPI_Request) );
+		  MPI_Status  *stats = (MPI_Status  *) malloc( num_messages * 
+							  sizeof(MPI_Status) );
+
+          int p;
+          int i_msg = 0;
+          int other_begin, other_end, other_size;
+		  for (p=left_pid; p<=right_pid; p++) {
+
+			bool proc_involved = false;
+			for (k=cl_begin; k<=cl_end; k++) {
+			  if (iproc[k] == p) {
+			    proc_involved = true;
+			    break;
+			  }
+			}
+
+            int u;
+			if (p != pid && proc_involved == true) {
+
+			  /* send message to process p (non-blocking) */
+			  MPI_Isend(&Wshifted[my_begin], my_size, float_traits<FloatingType>::mpi_type(), p,
+				my_begin, procinfo->comm, &requests[4*i_msg]);
+
+			  MPI_Isend(&Werr[my_begin], my_size, float_traits<FloatingType>::mpi_type(), p,
+				my_begin, procinfo->comm, &requests[4*i_msg+1]);
+
+			  /* Find eigenvalues in of process p */
+			  other_size = 0;
+			  for (k=cl_begin; k<=cl_end; k++) {
+			    if (other_size == 0 && iproc[k] == p) {
+			      other_begin = k;
+			      other_end   = k;
+			      other_size++;
+			      u = k+1;
+			      while (u <=cl_end && iproc[u] == p) {
+				    other_end++;
+				    other_size++;
+				    u++;
+			      }
+			    }
+			  }
+			  if (p == left_pid) {
+			    other_begin = cl_begin;
+			    u = cl_begin;
+			    while (iproc[u] == -1) {
+			      other_size++;
+			      u++;
+			    }
+			  }
+			  if (p == right_pid) {
+			    other_end = cl_end;
+			    u = cl_end;
+			    while (iproc[u] == -1) {
+			      other_size++;
+			      u--;
+			    }
+			  }
+
+			  /* receive message from process p (non-blocking) */
+			  MPI_Irecv(&Wshifted[other_begin], other_size, float_traits<FloatingType>::mpi_type(),	p,
+				other_begin, procinfo->comm, &requests[4*i_msg+2]);
+
+			  MPI_Irecv(&Werr[other_begin], other_size, float_traits<FloatingType>::mpi_type(), p,
+				other_begin, procinfo->comm, &requests[4*i_msg+3]);
+			 
+			  i_msg++;
+			}
+
+		  } /* end for p */
+		  num_messages = 4*i_msg; /* messages actually send */
+
+		  int communication_done;
+          int status = MPI_Testall(num_messages, requests, 
+					   &communication_done, stats);
+		  assert(status == MPI_SUCCESS);
+
+		  if (communication_done == true) {
+
+			FloatingType sigma = RRR->L[bl_end-bl_begin];
+			for (k=cl_begin; k<cl_end; k++) {
+			  W[k]    = Wshifted[k] + sigma;
+			  Wgap[k] = fmax(0, Wshifted[k+1]-Werr[k+1] 
+				                - (Wshifted[k]-Werr[k]));
+			}
+			W[cl_end] = Wshifted[cl_end] + sigma;
+
+			free(requests);
+			free(stats);
+			status = COMM_COMPLETE;
+		  } else {
+		   
+			comm_t *comm = (comm_t *) malloc( sizeof(comm_t) );
+			assert(comm != NULL);
+
+			comm->num_messages      = num_messages;
+			comm->requests          = requests;
+			comm->stats             = stats;
+			cl->wait_until_refined  = true;
+			cl->messages            = comm;
+		
+			status = COMM_INCOMPLETE;
+		  }
+		  
+		  return status;
+		} /* end communicate_refined_eigvals */
+
+
+		template<typename FloatingType> 
+		int test_comm_status(cluster_t<FloatingType> *cl, val_t<FloatingType> *Wstruct)
+		{
+		  int         cl_begin            = cl->begin;
+		  int         cl_end              = cl->end;
+		  int         bl_begin            = cl->bl_begin;
+		  int         bl_end              = cl->bl_end;
+		  rrr_t<FloatingType>       *RRR                = cl->RRR;
+		  comm_t      *comm               = cl->messages;
+		  int         num_messages        = comm->num_messages;
+		  MPI_Request *requests           = comm->requests;
+		  MPI_Status  *stats              = comm->stats;
+		  FloatingType      *restrict W         = Wstruct->W;
+		  FloatingType      *restrict Werr      = Wstruct->Werr;
+		  FloatingType      *restrict Wgap      = Wstruct->Wgap;
+		  FloatingType      *restrict Wshifted  = Wstruct->Wshifted;
+
+		  /* Test if communication complete */
+		  int communication_done;
+          int status = MPI_Testall(num_messages, requests, 
+					   &communication_done, stats);
+		  assert(status == MPI_SUCCESS);
+
+		  if (communication_done == true) {
+
+			cl->wait_until_refined = false;
+
+            int k;
+			FloatingType sigma     = RRR->L[bl_end-bl_begin];
+			for (k=cl_begin; k<cl_end; k++) {
+			  W[k]    = Wshifted[k] + sigma;
+			  Wgap[k] = fmax(0, Wshifted[k+1]-Werr[k+1] 
+						- (Wshifted[k]-Werr[k]));
+			}
+			W[cl_end] = Wshifted[cl_end] + sigma;
+		
+			free(comm);
+			free(requests);
+			free(stats);
+			status = COMM_COMPLETE;
+		  } else {
+			status = COMM_INCOMPLETE;
+		  }
+
+		  return status;
+		} /* test_comm_status */
+
+        /* TODO: Refactor this routine */
+		template<typename FloatingType>
+		int create_subtasks(cluster_t<FloatingType> *cl, int tid, proc_t *procinfo, 
+					rrr_t<FloatingType> *RRR, val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct,
+					workQ_t *workQ, counter_t *num_left)
+		{
+		  /* From inputs */
+		  int              cl_begin  = cl->begin;
+		  int              cl_end    = cl->end;
+		  int              depth     = cl->depth;
+		  int              bl_begin  = cl->bl_begin;
+		  int              bl_end    = cl->bl_end;
+		  int              bl_size   = bl_end - bl_begin + 1;
+		  FloatingType     bl_spdiam = cl->bl_spdiam;
+		  FloatingType     lgap;
+
+		  int              pid       = procinfo->pid;
+		  int              nproc     = procinfo->nproc;
+		  int              nthreads  = procinfo->nthreads;
+		  bool           proc_involved=true;
+
+		  FloatingType *restrict Wgap      = Wstruct->Wgap;
+		  FloatingType *restrict Wshifted  = Wstruct->Wshifted;
+		  int    *restrict iproc     = Wstruct->iproc;
+
+		  int              ldz       = Zstruct->ldz;
+		  FloatingType *restrict Z         = Zstruct->Z;
+		  int    *restrict Zindex    = Zstruct->Zindex;
+
+		  /* others */
+		  int    i, l, k;
+		  int    max_size;
+		  task_t *task;
+		  bool   task_inserted;
+		  int    new_first, new_last, new_size, new_ftt1, new_ftt2;
+		  int    sn_first, sn_last, sn_size;
+		  rrr_t<FloatingType>  *RRR_parent;
+		  int    new_lpid, new_rpid;
+		  FloatingType *restrict D_parent;
+		  FloatingType *restrict L_parent;
+		  int    my_first, my_last;
+		  bool   copy_parent_rrr;
+
+
+		  max_size = fmax(1, PMR_get_counter_value(num_left) /
+					 (fmin(depth+1,4)*nthreads) );
+		  task_inserted = true;
+		  new_first = cl_begin;
+		  for (i=cl_begin; i<=cl_end; i++) {    
+
+			if ( i == cl_end )
+			  new_last = i;
+			else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) )
+			  new_last = i;
+			else
+			  continue;
+
+			new_size = new_last - new_first + 1;
+
+			if (new_size == 1) {
+			  /* singleton was found */
+			  
+			  if (new_first==cl_begin || task_inserted==true) {
+			/* initialize new singleton task */
+			sn_first = new_first;
+			sn_last  = new_first;
+			sn_size  = 1;
+			  } else {
+			/* extend singleton task by one */
+			sn_last++;
+			sn_size++;
+			  }
+			  
+			  /* insert task if ... */
+			  if (i==cl_end || sn_size>=max_size ||
+				Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {
+
+			/* Check if process involved in s-task */
+			proc_involved = false;
+			for (k=sn_first; k<=sn_last; k++) {
+			  if (iproc[k] == pid) {
+				proc_involved = true;
+				break;
+			  }
+			}
+			if (proc_involved == false) {
+			  task_inserted = true;
+			  new_first = i + 1;
+			  continue;
+			}
+
+			/* Insert task as process is involved */
+			if (sn_first == cl_begin) {
+			  lgap = cl->lgap;
+			} else {
+			  lgap = Wgap[sn_first-1];
+			}
+	
+			PMR_increment_rrr_dependencies(RRR);
+	
+			task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin,
+						 bl_end, bl_spdiam, lgap, RRR);
+	
+			PMR_insert_task_at_back(workQ->s_queue, task);
+			  
+			task_inserted = true;
+			  } else {
+			task_inserted = false;
+			  }
+			  
+			} else {
+			  /* cluster was found */
+
+			  /* check if process involved in processing the new cluster */
+			  new_lpid = nproc-1;
+			  new_rpid = -1;
+			  for (l=new_first; l<=new_last; l++) {
+			if (iproc[l] != -1) {
+			  new_lpid = imin(new_lpid, iproc[l]);
+			  new_rpid = imax(new_rpid, iproc[l]);
+			  }
+			  }
+			  if (new_lpid > pid || new_rpid < pid) {
+			task_inserted = true;
+			new_first = i + 1;
+			continue;
+			  }
+
+			  /* find gap to the left */
+			  if (new_first == cl_begin) {
+			lgap = cl->lgap;
+			  } else {
+			lgap = Wgap[new_first - 1];
+			  }
+		
+			  /* determine where to store the parent rrr needed by the
+			   * cluster to find its new rrr */
+			  my_first = imax(new_first, cl->proc_W_begin);
+			  my_last  = imin(new_last,  cl->proc_W_end);
+			  if ( my_first == my_last ) {
+			/* only one eigenvalue of cluster belongs to process */
+			copy_parent_rrr = true;
+			  } else {
+			/* store parent rrr in Z at column new_ftt */
+			copy_parent_rrr = false;
+			  }
+			  new_ftt1 = Zindex[my_first    ];
+			  new_ftt2 = Zindex[my_first + 1];
+
+			  if (copy_parent_rrr == true) {
+                /* Copy parent RRR into alloceted arrays and mark them
+                 * for freeing later */
+                D_parent = (FloatingType *) malloc(bl_size * sizeof(FloatingType));
+                assert(D_parent != NULL);
+        
+                L_parent = (FloatingType *) malloc(bl_size * sizeof(FloatingType));
+                assert(L_parent != NULL);
+
+                memcpy(D_parent, RRR->D, bl_size*sizeof(FloatingType));
+                memcpy(L_parent, RRR->L, bl_size*sizeof(FloatingType));
+
+                /* 
+                 * We have to explicitly specify the type because neither NULL nor nullptr
+                 * can't be used for template type deduction.
+                 */
+                RRR_parent = PMR_create_rrr<FloatingType>(D_parent, L_parent, NULL, 
+                                NULL, bl_size, depth);
+                PMR_set_copied_parent_rrr_flag(RRR_parent, true);
+
+			  } else {
+                /* copy parent RRR into Z to make cluster task independent */
+                memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, 
+                       bl_size*sizeof(FloatingType));
+                memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, 
+                       bl_size*sizeof(FloatingType));
+                /* 
+                 * We have to explicitly specify the type because neither NULL nor nullptr
+                 * can't be used for template type deduction.
+                 */
+                RRR_parent = PMR_create_rrr<FloatingType>(&Z[new_ftt1*ldz + bl_begin],
+                                &Z[new_ftt2*ldz + bl_begin],
+                                NULL, NULL, bl_size, depth);
+			  }
+			  
+			  /* Create the task for the cluster and put it in the queue */ 
+			  task = PMR_create_c_task(new_first, new_last, depth+1, 
+						   bl_begin, bl_end, bl_spdiam, lgap, 
+						   cl->proc_W_begin, cl->proc_W_end, 
+						   new_lpid, new_rpid, RRR_parent);
+
+			  if (new_lpid != new_rpid)
+			PMR_insert_task_at_back(workQ->r_queue, task);
+			  else
+			PMR_insert_task_at_back(workQ->c_queue, task);
+
+			  task_inserted = true;
+			  
+			} /* if singleton or cluster found */
+
+			new_first = i + 1;
+		  } /* end i */
+		  
+		  /* set flag in RRR that last singleton is created */
+		  PMR_set_parent_processed_flag(RRR);
+		  
+		  /* clean up */
+		  PMR_try_destroy_rrr(RRR);
+		  free(cl);
+
+		  return(0);
+		} /* end create_subtasks */
+	}
+
+} //namespace detail
+
+} //namespace pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/process_r_task.hpp b/external/pmrrr/include/pmrrr/process_r_task.hpp
new file mode 100644
index 0000000000..b9bda64d9b
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/process_r_task.hpp
@@ -0,0 +1,163 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef __PROCESS_R_TASK_HPP__
+#define __PROCESS_R_TASK_HPP__
+
+#include <pmrrr/definitions/pmrrr.h>
+#include <pmrrr/definitions/plarrv.h>
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/queue.h>
+#include <pmrrr/definitions/counter.h>
+#include <pmrrr/definitions/rrr.h>
+#include <pmrrr/definitions/structs.h>
+#include <pmrrr/definitions/tasks.h>
+
+#include <pmrrr/lapack/odrrb.hpp>
+
+namespace pmrrr { namespace detail {
+
+	/*
+	 * Executes all tasks which are in the r-queue at the moment of the 
+	 * call. This routine is called to make sure that all tasks in the 
+	 * queue are dequeued before continueing with other tasks.
+	 */
+	template<typename FloatingType>
+	void PMR_process_r_queue(int tid, proc_t *procinfo, val_t<FloatingType> *Wstruct, 
+				 vec_t<FloatingType> *Zstruct, tol_t<FloatingType> *tolstruct, 
+				 workQ_t *workQ, counter_t *num_left, 
+				 FloatingType *work, int *iwork)
+	{
+	  int thread_support = procinfo->thread_support;
+	  int num_tasks = PMR_get_num_tasks(workQ->r_queue);
+
+      int t;
+	  for (t=0; t<num_tasks; t++) {
+		
+		task_t *task = PMR_remove_task_at_front(workQ->r_queue);
+
+		if ( task != NULL ) {
+		
+		  if (task->flag == CLUSTER_TASK_FLAG) {
+
+            if (thread_support != MPI_THREAD_FUNNELED || tid == 0) {
+              /* if MPI_THREAD_FUNNELED only tid==0 should process 
+                   * these tasks, otherwise any thread can do it */
+              int status = PMR_process_c_task((cluster_t<FloatingType> *) task->data,
+                              tid, procinfo, Wstruct,
+                              Zstruct, tolstruct, workQ,
+                              num_left, work, iwork);
+              
+              if (status == C_TASK_PROCESSED) {
+                free(task);
+              } else {
+                PMR_insert_task_at_back(workQ->r_queue, task);
+              }
+            } else {
+                PMR_insert_task_at_back(workQ->r_queue, task);
+            }
+
+		  } /* end if cluster task */
+          else if (task->flag == REFINE_TASK_FLAG) {
+            PMR_process_r_task((refine_t<FloatingType> *) task->data, procinfo,
+                       Wstruct, tolstruct, work, iwork);
+            free(task);
+		  } 
+		} /* end if task removed */
+	  } /* end for t */
+	} /* end process_entire_r_queue */
+  
+	/*
+	 * Process the task of refining a subset of eigenvalues.
+	 */
+	template<typename FloatingType>
+	int PMR_process_r_task(refine_t<FloatingType> *rf, proc_t *procinfo, 
+				   val_t<FloatingType> *Wstruct, tol_t<FloatingType> *tolstruct, 
+				   FloatingType *work, int *iwork)
+	{
+	  /* From inputs */
+	  int              		 ts_begin  = rf->begin;
+	  FloatingType *restrict D         = rf->D;
+	  FloatingType *restrict DLL       = rf->DLL;
+	  int              		 p         = rf->p;
+	  int              		 q         = rf->q;
+	  int              		 bl_size   = rf->bl_size;
+	  FloatingType           bl_spdiam = rf->bl_spdiam;
+
+	  FloatingType *restrict Werr      = Wstruct->Werr;
+	  FloatingType *restrict Wgap      = Wstruct->Wgap;
+	  int    *restrict 		 Windex    = Wstruct->Windex;
+	  FloatingType *restrict Wshifted  = Wstruct->Wshifted;
+	  
+	  FloatingType savegap;
+
+	  if (p == q) {
+		savegap = Wgap[ts_begin];
+		Wgap[ts_begin] = 0.0;
+	  }
+
+      int info;
+      int offset = Windex[ts_begin]-1;
+      FloatingType rtol1 = tolstruct->rtol1;
+      FloatingType rtol2 = tolstruct->rtol2;
+      FloatingType pivmin = tolstruct->pivmin;
+	  lapack::odrrb(&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
+		  &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin],
+		  work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
+	  assert(info == 0);
+
+	  if (p == q) {
+		Wgap[ts_begin] = savegap;
+	  }  
+
+	  PMR_refine_sem_post(rf);
+	  free(rf);
+
+	  return 0;
+	}
+
+} //namespace detail
+
+} //namespace pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/process_s_task.hpp b/external/pmrrr/include/pmrrr/process_s_task.hpp
new file mode 100644
index 0000000000..febec9484c
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/process_s_task.hpp
@@ -0,0 +1,328 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef __PROCESS_S_TASK_HPP__
+#define __PROCESS_S_TASK_HPP__
+
+#include <pmrrr/definitions/pmrrr.h>
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/structs.h>
+#include <pmrrr/definitions/counter.h>
+#include <pmrrr/definitions/rrr.h>
+#include <pmrrr/definitions/tasks.h>
+
+#include <pmrrr/lapack/odr1v.hpp>
+#include <pmrrr/lapack/odrrb.hpp>
+#include <pmrrr/blas/odscal.hpp>
+
+
+namespace pmrrr { namespace detail {
+
+	template<typename FloatingType>
+	int PMR_process_s_task(singleton_t<FloatingType> *sng, int tid, proc_t *procinfo,
+				   val_t<FloatingType> *Wstruct, vec_t<FloatingType> *Zstruct, 
+				   tol_t<FloatingType> *tolstruct, counter_t *num_left, 
+				   FloatingType *work, int *iwork)
+	{
+	  /* Inputs */
+	  int    				 begin         = sng->begin; 
+	  int    				 end           = sng->end;
+	  int    				 bl_begin      = sng->bl_begin;
+	  int    				 bl_end        = sng->bl_end;
+	  int    				 bl_size       = bl_end - bl_begin + 1;
+	  FloatingType 			 bl_spdiam     = sng->bl_spdiam; 
+	  rrr_t<FloatingType>  				 *RRR          = sng->RRR;
+	  FloatingType *restrict D   = RRR->D; 
+	  FloatingType *restrict L   = RRR->L; 
+	  FloatingType *restrict DL  = RRR->DL;
+	  FloatingType *restrict DLL = RRR->DLL;
+
+	  int              		 pid      = procinfo->pid;  
+	  int              		 n        = Wstruct->n;
+	  FloatingType *restrict W        = Wstruct->W;
+	  FloatingType *restrict Werr     = Wstruct->Werr;
+	  FloatingType *restrict Wgap     = Wstruct->Wgap;
+	  int    *restrict 		 Windex   = Wstruct->Windex;  
+	  int    *restrict 		 iproc    = Wstruct->iproc;  
+	  FloatingType *restrict Wshifted = Wstruct->Wshifted;
+	  int              		 ldz      = Zstruct->ldz;
+	  FloatingType *restrict Z        = Zstruct->Z;
+	  int    *restrict 		 isuppZ   = Zstruct->Zsupp;;
+	  int    *restrict 		 Zindex   = Zstruct->Zindex;
+	  FloatingType           pivmin   = tolstruct->pivmin;
+
+	  /* others */
+	  int              		 info, i, k, itmp, num_decrement=0;
+	  int              		 IONE = 1;
+	  FloatingType           DZERO = 0.0;
+	  FloatingType           tol, lambda, left, right;
+	  int              		 i_local, zind;
+	  FloatingType           gap, lgap, rgap, gaptol, savedgap, tmp;
+	  int             		 usedBS, usedRQ, needBS, wantNC, step2II;
+	  int              		 r, offset;
+	  FloatingType           twoeps = 2*std::numeric_limits<FloatingType>::epsilon(), RQtol = 2*std::numeric_limits<FloatingType>::epsilon();
+	  FloatingType           residual, bstres, bstw; 
+	  int              		 i_supmn, i_supmx;
+	  FloatingType           RQcorr;
+	  int              		 negcount;
+	  int              		 sgndef, suppsize;
+	  FloatingType           sigma;
+	  int              		 i_Zfrom, i_Zto;
+	  FloatingType           ztz, norminv, mingma;
+
+
+	  /* set tolerance parameter */
+	  tol  = 4.0 * log( (FloatingType) bl_size ) * std::numeric_limits<FloatingType>::epsilon();
+
+	  /* loop over all singletons in the task */
+	  for (i=begin; i<=end; i++) {
+
+		/* check if eigenvector is supposed to be computed by
+		 * the process */
+		if (iproc[i] != pid)
+		  continue;
+		num_decrement++;
+
+		if (bl_size == 1) {
+		  /* set eigenvector to column of identity matrix */
+		  zind = Zindex[i];
+		  memset(&Z[zind*ldz], 0.0, n*sizeof(FloatingType) );
+		  Z[zind*ldz + bl_begin] = 1.0;
+		  isuppZ[2*zind    ]     = bl_begin + 1;
+		  isuppZ[2*zind + 1]     = bl_begin + 1;
+		  continue;
+		}
+
+		lambda  = Wshifted[i];  
+		left    = Wshifted[i] - Werr[i];
+		right   = Wshifted[i] + Werr[i];
+		i_local = Windex[i];
+		r       = 0;
+		
+		/* compute left and right gap */
+		if (i == bl_begin)
+		  lgap = std::numeric_limits<FloatingType>::epsilon() * fmax( fabs(left), fabs(right) );
+		else if (i == begin)
+		  lgap = sng->lgap;
+		else
+		  lgap = Wgap[i-1];
+
+		if (i == bl_end) {
+		  rgap = std::numeric_limits<FloatingType>::epsilon() * fmax( fabs(left), fabs(right) );
+		} else {
+		  rgap = Wgap[i];
+		}
+
+		gap = fmin(lgap, rgap);
+
+		if ( i == bl_begin || i == bl_end ) {
+		  gaptol = 0.0;
+		} else {
+		  gaptol = gap * std::numeric_limits<FloatingType>::epsilon();
+		}
+
+		/* initialize lower and upper value of support */
+		i_supmn = bl_size;
+		i_supmx = 1;
+
+		/* update Wgap so that it holds minimum gap and save the 
+		 * old value */
+		savedgap  = Wgap[i];
+		Wgap[i]   = gap;
+		
+		/* initialize flags indicating if bisection or Rayleigh-Quotient
+		 * correction was used */
+		usedBS = false;
+		usedRQ = false;
+	  
+		/* the need for bisection is initially turned off */
+		needBS = !TRY_RQC;
+
+		/* IEEE floating point is assumed, so that all 0 bits are 0.0 */
+		zind = Zindex[i];
+		memset(&Z[zind*ldz], 0.0, n*sizeof(FloatingType));
+
+		/* inverse iteration with twisted factorization */
+		for (k=1; k<=MAXITER; k++) {
+
+		  if (needBS == true) {
+		usedBS = true;
+		itmp   = r;
+	
+		offset  = Windex[i] - 1;
+		tmp     = Wgap[i]; 
+		Wgap[i] = 0.0;
+	
+		lapack::odrrb(&bl_size, D, DLL, &i_local, &i_local, &DZERO, 
+			&twoeps, &offset, &Wshifted[i], &Wgap[i],
+			&Werr[i], work, iwork, &pivmin, &bl_spdiam,
+			&itmp, &info);
+		assert(info == 0);
+	
+		Wgap[i] = tmp;
+		lambda = Wshifted[i];
+		r = 0;
+		  }
+		  wantNC = (usedBS == true) ? false : true;
+
+		  /* compute the eigenvector corresponding to lambda */
+		  lapack::odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
+			  &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
+			  &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
+			  &norminv, &residual, &RQcorr, work);
+
+		  if (k == 1) {
+		bstres = residual;
+		bstw   = lambda;
+		  } else if (residual < bstres) {
+		bstres = residual;
+		bstw   = lambda;
+		  }
+		  
+		  /* update support held */
+		  i_supmn = imin(i_supmn, isuppZ[2*zind    ]);
+		  i_supmx = imax(i_supmx, isuppZ[2*zind + 1]);
+
+		  /* Convergence test for Rayleigh Quotient Iteration
+		   * not done if bisection was used */
+		  if ( !usedBS && residual > tol*gap 
+		   && fabs(RQcorr) > RQtol*fabs(lambda) ) {
+		  
+		if (i_local <= negcount) {
+		  sgndef = -1;    /* wanted eigenvalue lies to the left  */
+		} else {
+		  sgndef =  1;    /* wanted eigenvalue lies to the right */
+		}
+	
+		if ( RQcorr*sgndef >= 0.0
+			 && lambda+RQcorr <= right 
+			 && lambda+RQcorr >= left ) {
+		  usedRQ = true;
+		  if ( sgndef == 1 )
+			left  = lambda;
+		  else
+			right = lambda;
+		  Wshifted[i] = 0.5*(left + right);
+		  lambda     += RQcorr;
+		} else { /* bisection is needed */
+		  needBS = true;
+		}
+	
+		if ( right-left < RQtol*fabs(lambda) ) {
+		  /* eigenvalue computed to bisection accuracy
+		   * => compute eigenvector */
+		  usedBS = true;
+		} else if ( k == MAXITER-1 ) {
+		  /* for last iteration use bisection */
+		  needBS = true;
+		}
+		  } else {
+		/* go to next iteration */
+		break;
+		  }
+
+		} /* end k */
+
+		/* if necessary call odr1v to improve error angle by 2nd step */
+		step2II = false;
+		if ( usedRQ && usedBS && (bstres <= residual) ) {
+		  lambda = bstw;
+		  step2II = true;
+		}
+		if ( step2II == true ) {
+		  lapack::odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
+			  &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
+			  &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
+			  &norminv, &residual, &RQcorr, work);
+		}
+		Wshifted[i] = lambda;
+
+		/* compute support w.r.t. whole matrix
+		 * block beginning is offset for each support */
+		isuppZ[2*zind    ] += bl_begin;
+		isuppZ[2*zind + 1] += bl_begin;
+	  
+		/* ensure vector is okay if support changed in RQI 
+		 * minus ones because of indices starting from zero */
+		i_Zfrom    = isuppZ[2*zind    ] - 1;
+		i_Zto      = isuppZ[2*zind + 1] - 1;
+		i_supmn   += bl_begin - 1;
+		i_supmx   += bl_begin - 1;
+		if ( i_supmn < i_Zfrom ) {
+		  for ( k=i_supmn; k < i_Zfrom; k++ ) {
+		Z[k + zind*ldz] = 0.0;
+		  }
+		}
+		if ( i_supmx > i_Zto ) {
+		  for ( k=i_Zto+1; k <= i_supmx; k++ ) {
+		Z[k + zind*ldz] = 0.0;
+		  }
+		}
+		
+		/* normalize eigenvector */
+		suppsize = i_Zto - i_Zfrom + 1;
+		blas::odscal(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE);
+
+		sigma = L[bl_size-1];
+		W[i]  = lambda + sigma;
+		
+		if (i < end)
+		  Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]);
+
+	  } /* end i */
+
+	  /* decrement counter */
+	  PMR_decrement_counter(num_left, num_decrement);
+
+	  /* clean up */
+	  free(sng);
+	  PMR_try_destroy_rrr(RRR);
+
+	  return 0;
+	}
+
+} // detail
+
+} // pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/rrr.hpp b/external/pmrrr/include/pmrrr/rrr.hpp
new file mode 100644
index 0000000000..be4fc7b6b9
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/rrr.hpp
@@ -0,0 +1,227 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef __RRR_HPP__
+#define __RRR_HPP__
+
+#include <cstdlib>
+#include <cstdio>
+#include <cassert>
+
+#include <pmrrr/definitions/rrr.h>
+#include <pmrrr/definitions/global.h>
+
+#ifndef DISABLE_PTHREADS
+# include <errno.h>
+#endif
+
+namespace pmrrr { namespace detail {
+
+	template<typename FloatingType>
+    int PMR_rrr_init_lock(rrr_t<FloatingType> *RRR)
+    {
+    #ifndef DISABLE_PTHREADS
+      int info = pthread_mutex_init(&RRR->mutex, NULL);
+      assert(info == 0);
+      return info;
+    #else
+      return 0;
+    #endif
+    }
+
+	template<typename FloatingType>
+    void PMR_rrr_destroy_lock(rrr_t<FloatingType> *RRR)
+    {
+    #ifndef DISABLE_PTHREADS
+      pthread_mutex_destroy(&RRR->mutex);
+    #endif
+    }
+
+	template<typename FloatingType>
+    int PMR_rrr_lock(rrr_t<FloatingType> *RRR)
+    {
+    #ifndef DISABLE_PTHREADS
+      int info = pthread_mutex_lock(&RRR->mutex);
+      if( info == EINVAL )
+        fprintf(stderr,"pthread_mutex_lock returned EINVAL\n");
+      else if( info == EAGAIN )
+        fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n");
+      else if( info == EDEADLK )
+        fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n");
+      else if( info == EPERM )
+        fprintf(stderr,"pthread_mutex_lock returned EPERM\n");
+      else
+        fprintf(stderr,"pthread_mutex_lock returned %d\n",info);
+      assert(info == 0);
+      return info;
+    #else
+      return 0;
+    #endif
+    }
+
+	template<typename FloatingType>
+    int PMR_rrr_unlock(rrr_t<FloatingType> *RRR)
+    {
+    #ifndef DISABLE_PTHREADS
+      int info = pthread_mutex_unlock(&RRR->mutex);
+      if( info == EINVAL )
+        fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n");
+      else if( info == EAGAIN )
+        fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n");
+      else if( info == EDEADLK )
+        fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n");
+      else if( info == EPERM )
+        fprintf(stderr,"pthread_mutex_unlock returned EPERM\n");
+      else
+        fprintf(stderr,"pthread_mutex_unlock returned %d\n",info);
+      assert(info == 0);
+      return info;
+    #else
+      return 0;
+    #endif
+    }
+
+	template<typename FloatingType>
+	rrr_t<FloatingType> *PMR_create_rrr(FloatingType *restrict D, FloatingType *restrict L,
+				  FloatingType *restrict DL, FloatingType *restrict DLL,
+				  int size, int depth)
+	{
+	  rrr_t<FloatingType> *RRR = (rrr_t<FloatingType> *) malloc( sizeof(rrr_t<FloatingType>) );
+	  assert(RRR != NULL);
+
+	  RRR->D                 = D;
+	  RRR->L                 = L;
+	  RRR->DL                = DL;
+	  RRR->DLL               = DLL;
+	  RRR->size              = size;
+	  RRR->depth             = depth;
+	  RRR->parent_processed  = false;
+	  RRR->copied_parent_rrr = false;
+	  RRR->ndepend           = 0;
+
+	  int info = PMR_rrr_init_lock(RRR); 
+
+	  return RRR;
+	}
+
+	template<typename FloatingType>
+	rrr_t<FloatingType> *PMR_reset_rrr(rrr_t<FloatingType> *RRR, FloatingType *restrict D, 
+				 FloatingType *restrict L, FloatingType *restrict DL, 
+				 FloatingType *restrict DLL, int size, int depth)
+	{
+	  RRR->D                = D;
+	  RRR->L                = L;
+	  RRR->DL               = DL;
+	  RRR->DLL              = DLL;
+	  RRR->size             = size;
+	  RRR->depth            = depth;
+	  RRR->parent_processed = false;
+
+	  return RRR;
+	}
+
+	template<typename FloatingType>
+	int PMR_increment_rrr_dependencies(rrr_t<FloatingType> *RRR)
+	{
+	  /* returns number of dependencies */
+	  int info = PMR_rrr_lock(RRR); 
+	  RRR->ndepend++;
+	  int i = RRR->ndepend; 
+	  info |= PMR_rrr_unlock(RRR); 
+	  return i;
+	}
+
+	template<typename FloatingType>
+	int PMR_set_parent_processed_flag(rrr_t<FloatingType> *RRR)
+	{
+	  int info = PMR_rrr_lock(RRR); 
+	  RRR->parent_processed = true;
+	  info |= PMR_rrr_unlock(RRR); 
+	  return info;
+	}
+
+	template<typename FloatingType>
+	int PMR_set_copied_parent_rrr_flag(rrr_t<FloatingType> *RRR, bool val)
+	{
+	  int info = PMR_rrr_lock(RRR); 
+	  RRR->copied_parent_rrr = val;
+	  info |= PMR_rrr_unlock(RRR); 
+	  return info;
+	}
+
+	template<typename FloatingType>
+	int PMR_try_destroy_rrr(rrr_t<FloatingType> *RRR)
+	{
+	  /* return 0 on success, otherwise 1 */
+      int info = PMR_rrr_lock(RRR);
+
+	  RRR->ndepend--;
+      int tmp = 0;
+	  if (RRR->ndepend == 0 && RRR->parent_processed == true) {
+		if (RRR->depth >0) {
+		  free(RRR->D);
+		  free(RRR->L);
+		}
+		if (RRR->depth >=0) {
+		  free(RRR->DL);
+		  free(RRR->DLL);
+		}	
+		tmp = 1;
+	  }
+	  
+	  info |= PMR_rrr_unlock(RRR);
+
+	  if (tmp == 1) { 
+        PMR_rrr_destroy_lock(RRR); 
+		free(RRR);
+		return 0;
+	  } else {
+		return 1;
+	  }
+	}
+
+}	// detail
+
+}	// pmrrr
+
+#endif
diff --git a/external/pmrrr/include/pmrrr/structs.h b/external/pmrrr/include/pmrrr/structs.h
deleted file mode 100644
index e40ef3533f..0000000000
--- a/external/pmrrr/include/pmrrr/structs.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-
-#ifndef SSTRUCTS_H
-#define SSTRUCTS_H
-
-#include "mpi.h"
-#include "global.h"
-#include "counter.h"
-#include "queue.h"
-
-typedef struct {
-  int              n;
-  double *restrict D;
-  double *restrict E;
-  int              nsplit;
-  int    *restrict isplit ;
-  double           spdiam;
-} in_t;
-
-typedef struct {
-  int              n;
-  double           *vl;
-  double           *vu;
-  int              *il;
-  int              *iu;
-  double *restrict W;
-  double *restrict Werr;
-  double *restrict Wgap;
-  int    *restrict Windex;
-  int    *restrict iblock;
-  int    *restrict iproc;
-  double *restrict Wshifted;
-  double *restrict gersch;
-} val_t;
-
-typedef struct {
-  int              ldz;
-  int              nz;
-  double *restrict Z;
-  int    *restrict Zsupp;
-  int    *restrict Zindex;
-} vec_t;
-
-typedef struct {
-  int      pid;
-  int      nproc;
-  MPI_Comm comm;
-  int      nthreads;
-  int      thread_support;
-} proc_t;
-
-typedef struct {
-  double split;
-  double rtol1;
-  double rtol2;
-  double pivmin;
-} tol_t;
-
-typedef struct {
-  int         num_messages;
-  MPI_Request *requests;
-  MPI_Status  *stats;
-} comm_t;
-
-typedef struct {
-  queue_t *r_queue;
-  queue_t *s_queue;
-  queue_t *c_queue;
-} workQ_t;
-
-typedef struct {
-  double lambda;
-  int    local_ind;
-  int    block_ind;
-  int    ind;
-} sort_struct_t;
-
-typedef struct {
-  int    n;
-  double *D;
-  double *E;
-  double *E2;
-  int    il;
-  int    iu;
-  int    my_il;
-  int    my_iu;
-  int    nsplit;
-  int    *isplit;
-  double bsrtol;
-  double pivmin;
-  double *gersch;
-  double *W;
-  double *Werr;
-  int    *Windex;
-  int    *iblock;
-} auxarg1_t;
-
-typedef struct {
-  int          bl_size;
-  double       *D;
-  double       *DE2;
-  int          rf_begin;
-  int          rf_end;
-  double        *W;
-  double        *Werr;
-  double        *Wgap;
-  int            *Windex;
-  double       rtol1;
-  double       rtol2;
-  double       pivmin;
-  double       bl_spdiam;
-} auxarg2_t;
-
-typedef struct {
-  int          tid;
-  proc_t       *procinfo;
-  val_t        *Wstruct;
-  vec_t        *Zstruct;
-  tol_t        *tolstruct;
-  workQ_t      *workQ;
-  counter_t    *num_left;
-} auxarg3_t;
-
-#endif
diff --git a/external/pmrrr/include/pmrrr/tasks.hpp b/external/pmrrr/include/pmrrr/tasks.hpp
new file mode 100644
index 0000000000..66941b8da3
--- /dev/null
+++ b/external/pmrrr/include/pmrrr/tasks.hpp
@@ -0,0 +1,200 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#ifndef __TASKS_HPP__
+#define __TASKS_HPP__
+
+#include <cstdlib>
+#include <cstdio>
+#include <cassert>
+
+#include <pmrrr/definitions/rrr.h>
+#include <pmrrr/definitions/tasks.h>
+#include <pmrrr/definitions/global.h>
+
+namespace pmrrr { namespace detail {
+
+	template<typename FloatingType>
+	task_t *PMR_create_s_task(int first, int last, int depth,
+				  int bl_begin, int bl_end, 
+				  FloatingType spdiam, FloatingType lgap, rrr_t<FloatingType> *RRR)
+	{
+	  task_t      *t = (task_t *) malloc(sizeof(task_t));
+	  singleton_t<FloatingType> *s = (singleton_t<FloatingType> *) malloc( sizeof(singleton_t<FloatingType>) );
+	  assert(t != NULL);
+	  assert(s != NULL);
+
+	  s->begin        = first;
+	  s->end          = last;
+	  s->depth        = depth;  
+	  s->bl_begin     = bl_begin;
+	  s->bl_end       = bl_end;
+	  s->bl_spdiam    = spdiam;
+	  s->lgap         = lgap;
+	  s->RRR          = RRR;
+
+	  t->data = (void *) s;
+	  t->flag = SINGLETON_TASK_FLAG;
+	  t->next = NULL;
+	  t->prev = NULL;
+
+	  return t;
+	}
+
+	template<typename FloatingType>
+	task_t *PMR_create_c_task(int first, int last, int depth, 
+				  int bl_begin, int bl_end, FloatingType spdiam,
+				  FloatingType lgap, int proc_W_begin, 
+				  int proc_W_end, int lpid, int rpid, 
+				  rrr_t<FloatingType> *RRR)
+	{
+	  task_t *t = (task_t *) malloc(sizeof(task_t));
+	  cluster_t<FloatingType> *c = (cluster_t<FloatingType> *) malloc( sizeof(cluster_t<FloatingType>) );
+	  assert(t != NULL);
+	  assert(c != NULL);
+
+	  c->begin              = first;
+	  c->end                = last;
+	  c->depth              = depth;  
+	  c->bl_begin           = bl_begin;
+	  c->bl_end             = bl_end;
+	  c->bl_spdiam          = spdiam;
+	  c->lgap               = lgap;
+	  c->proc_W_begin       = proc_W_begin;
+	  c->proc_W_end         = proc_W_end;
+	  c->left_pid           = lpid;
+	  c->right_pid          = rpid;
+	  c->RRR                = RRR;
+	  c->wait_until_refined = false;
+
+	  t->data = (void *) c;
+	  t->flag = CLUSTER_TASK_FLAG;
+	  t->next = NULL;
+	  t->prev = NULL;
+
+	  return t;
+	}
+
+    template<typename FloatingType>
+    int PMR_refine_sem_init(refine_t<FloatingType> *refine)
+    {
+    #ifndef DISABLE_PTHREADS
+      int info = sem_init(refine->sem, 0, 0);
+      assert(info == 0);
+      return info;
+    #else
+      return 0;
+    #endif
+    }
+
+    template<typename FloatingType>
+    int PMR_refine_sem_destroy(refine_t<FloatingType> *refine)
+    {
+    #ifndef DISABLE_PTHREADS
+      int info = sem_destroy(refine->sem);
+      assert(info == 0);
+      return info;
+    #else
+      return 0;
+    #endif
+    }
+
+    template<typename FloatingType>
+    int PMR_refine_sem_wait(refine_t<FloatingType> *refine)
+    {
+    #ifndef DISABLE_PTHREADS
+      int info = sem_wait(refine->sem);
+      assert(info == 0);
+      return info;
+    #else
+      return 0;
+    #endif
+    }
+
+    template<typename FloatingType>
+    int PMR_refine_sem_post(refine_t<FloatingType> *refine)
+    {
+    #ifndef DISABLE_PTHREADS
+      int info = sem_post(refine->sem);
+      assert(info == 0);
+      return info;
+    #else
+      return 0;
+    #endif
+    }
+
+	template<typename FloatingType>
+	task_t *PMR_create_r_task(int begin, int end, FloatingType *D,
+				  FloatingType *DLL, int p, int q, int bl_size,
+				  FloatingType bl_spdiam, int tid)
+	{
+	  task_t *t= (task_t *) malloc(sizeof(task_t));
+	  refine_t<FloatingType> *r = (refine_t<FloatingType> *) malloc( sizeof(refine_t<FloatingType>) );
+	  assert(t != NULL);
+	  assert(r != NULL); 
+
+	  r->begin        = begin;
+	  r->end          = end;
+	  r->D            = D;
+	  r->DLL          = DLL;
+	  r->p            = p;
+	  r->q            = q;
+	  r->bl_size      = bl_size;
+	  r->bl_spdiam    = bl_spdiam;
+	  r->producer_tid = tid;
+      
+      PMR_refine_sem_init(r);
+
+	  t->data = (void *) r;
+	  t->flag = REFINE_TASK_FLAG;
+	  t->next = NULL;
+	  t->prev = NULL;
+
+	  return t;
+	}
+
+}	// detail
+
+}	// pmrrr
+
+#endif
diff --git a/external/pmrrr/src/blas/LICENSE b/external/pmrrr/src/blas/LICENSE
deleted file mode 100644
index aad36d6e12..0000000000
--- a/external/pmrrr/src/blas/LICENSE
+++ /dev/null
@@ -1,36 +0,0 @@
-Copyright (c) 1992-2009 The University of Tennessee.  All rights reserved.
-
-$COPYRIGHT$
-
-Additional copyrights may follow
-
-$HEADER$
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-- Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer. 
-  
-- Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer listed
-  in this license in the documentation and/or other materials
-  provided with the distribution.
-  
-- Neither the name of the copyright holders nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-  
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT  
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-
diff --git a/external/pmrrr/src/blas/odcpy.c b/external/pmrrr/src/blas/odcpy.c
deleted file mode 100644
index ec1e53628d..0000000000
--- a/external/pmrrr/src/blas/odcpy.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* odcpy.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odcpy(int *n, double *dx, int *incx, double *dy, int *incy)
-{
-    /* System generated locals */
-    int i__1;
-
-    /* Local variables */
-    int i__, m, ix, iy, mp1;
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*     copies a vector, x, to a vector, y. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[iy] = dx[ix];
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 7;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[i__] = dx[i__];
-/* L30: */
-    }
-    if (*n < 7) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-	dy[i__] = dx[i__];
-	dy[i__ + 1] = dx[i__ + 1];
-	dy[i__ + 2] = dx[i__ + 2];
-	dy[i__ + 3] = dx[i__ + 3];
-	dy[i__ + 4] = dx[i__ + 4];
-	dy[i__ + 5] = dx[i__ + 5];
-	dy[i__ + 6] = dx[i__ + 6];
-/* L50: */
-    }
-    return 0;
-} 
diff --git a/external/pmrrr/src/blas/odscal.c b/external/pmrrr/src/blas/odscal.c
deleted file mode 100644
index c02b91602e..0000000000
--- a/external/pmrrr/src/blas/odscal.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/* dscal.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odscal(int *n, double *da, double *dx, int *incx)
-{
-    /* System generated locals */
-    int i__1, i__2;
-
-    /* Local variables */
-    int i__, m, mp1, nincx;
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-/* * */
-/*     scales a vector by a constant. */
-/*     uses unrolled loops for increment equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-    /* Parameter adjustments */
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-	return 0;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	dx[i__] = *da * dx[i__];
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-	goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	dx[i__] = *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-	dx[i__] = *da * dx[i__];
-	dx[i__ + 1] = *da * dx[i__ + 1];
-	dx[i__ + 2] = *da * dx[i__ + 2];
-	dx[i__ + 3] = *da * dx[i__ + 3];
-	dx[i__ + 4] = *da * dx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} 
diff --git a/external/pmrrr/src/blas/odswap.c b/external/pmrrr/src/blas/odswap.c
deleted file mode 100644
index e3db133564..0000000000
--- a/external/pmrrr/src/blas/odswap.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/* dswap.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odswap(int *n, double *dx, int *incx, double *dy, int *incy)
-{
-    /* System generated locals */
-    int i__1;
-
-    /* Local variables */
-    int i__, m, ix, iy, mp1;
-    double dtemp;
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*     interchanges two vectors. */
-/*     uses unrolled loops for increments equal one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = dx[ix];
-	dx[ix] = dy[iy];
-	dy[iy] = dtemp;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-
-/*       clean-up loop */
-
-L20:
-    m = *n % 3;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = dx[i__];
-	dx[i__] = dy[i__];
-	dy[i__] = dtemp;
-/* L30: */
-    }
-    if (*n < 3) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-	dtemp = dx[i__];
-	dx[i__] = dy[i__];
-	dy[i__] = dtemp;
-	dtemp = dx[i__ + 1];
-	dx[i__ + 1] = dy[i__ + 1];
-	dy[i__ + 1] = dtemp;
-	dtemp = dx[i__ + 2];
-	dx[i__ + 2] = dy[i__ + 2];
-	dy[i__ + 2] = dtemp;
-/* L50: */
-    }
-    return 0;
-}
diff --git a/external/pmrrr/src/core/counter.c b/external/pmrrr/src/core/counter.c
deleted file mode 100644
index 055d473a17..0000000000
--- a/external/pmrrr/src/core/counter.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include "pmrrr/global.h"
-#include "pmrrr/counter.h"
-
-#ifndef DISABLE_PTHREADS
-# include <errno.h>
-#endif
-
-int PMR_counter_init_lock(counter_t *counter)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  int info = pthread_mutex_init(&counter->lock, NULL);
- #else
-  int info = pthread_spin_init(&counter->lock, PTHREAD_PROCESS_PRIVATE);
- #endif
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-void PMR_counter_destroy_lock(counter_t *counter)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  pthread_mutex_destroy(&counter->lock);
- #else
-  pthread_spin_destroy(&counter->lock);
- #endif
-#endif
-}
-
-int PMR_counter_lock(counter_t *counter)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  int info = pthread_mutex_lock(&counter->lock);
-  if( info == EINVAL )
-    fprintf(stderr,"pthread_mutex_lock returned EINVAL\n");
-  else if( info == EAGAIN )
-    fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n");
-  else if( info == EDEADLK )
-    fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n");
-  else if( info == EPERM )
-    fprintf(stderr,"pthread_mutex_lock returned EPERM\n");
-  else
-    fprintf(stderr,"pthread_mutex_lock returned %d\n",info);
- #else
-  int info = pthread_spin_lock(&counter->lock);
- #endif
-  assert(info == 0);
- return info;
-#else
-  return 0;
-#endif
-}
-
-int PMR_counter_unlock(counter_t *counter)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  int info = pthread_mutex_unlock(&counter->lock);
-  if( info == EINVAL )
-    fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n");
-  else if( info == EAGAIN )
-    fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n");
-  else if( info == EDEADLK )
-    fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n");
-  else if( info == EPERM )
-    fprintf(stderr,"pthread_mutex_unlock returned EPERM\n");
-  else
-    fprintf(stderr,"pthread_mutex_unlock returned %d\n",info);
- #else
-  int info = pthread_spin_unlock(&counter->lock);
- #endif
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-counter_t *PMR_create_counter(int init_value)
-{
-  counter_t *counter = (counter_t *) malloc( sizeof(counter_t) );
-  counter->value = init_value;
-  int info = PMR_counter_init_lock(counter);
-  return counter;
-}
-
-void PMR_destroy_counter(counter_t *counter)
-{
-  PMR_counter_destroy_lock(counter);
-  free(counter);
-}
-
-int PMR_get_counter_value(counter_t *counter)
-{
-  int info = PMR_counter_lock(counter);
-  int value = counter->value;
-  info |= PMR_counter_unlock(counter);
-  return value;
-}
-
-inline int PMR_set_counter_value(counter_t *counter, int value)
-{
-  int info = PMR_counter_lock(counter);
-  counter->value = value;
-  info |= PMR_counter_unlock(counter);
-  return value;
-}
-
-int PMR_decrement_counter(counter_t *counter, int amount)
-{
-  int info = PMR_counter_lock(counter);
-  counter->value -= amount;
-  int value = counter->value;
-  info |= PMR_counter_unlock(counter);
-  return value;
-}
-
-int PMR_increment_counter(counter_t *counter, int amount)
-{
-  int info = PMR_counter_lock(counter);
-  counter->value += amount;
-  int value = counter->value;
-  info |= PMR_counter_unlock(counter);
-  return value;
-}
diff --git a/external/pmrrr/src/core/queue.c b/external/pmrrr/src/core/queue.c
deleted file mode 100644
index 622cadc345..0000000000
--- a/external/pmrrr/src/core/queue.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include "pmrrr/global.h"
-#include "pmrrr/queue.h"
-
-#ifndef DISABLE_PTHREADS
-# include <errno.h>
-#endif
-
-int PMR_queue_init_lock(queue_t *queue)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  int info = pthread_mutex_init(&queue->lock, NULL);
- #else
-  int info = pthread_spin_init(&queue->lock, PTHREAD_PROCESS_PRIVATE);
- #endif
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-void PMR_queue_destroy_lock(queue_t *queue)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  pthread_mutex_destroy(&queue->lock);
- #else
-  pthread_spin_destroy(&queue->lock);
- #endif
-#endif
-}
-
-int PMR_queue_lock(queue_t *queue)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  int info = pthread_mutex_lock(&queue->lock);
-  if( info == EINVAL )
-    fprintf(stderr,"pthread_mutex_lock returned EINVAL\n");
-  else if( info == EAGAIN )
-    fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n");
-  else if( info == EDEADLK )
-    fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n");
-  else if( info == EPERM )
-    fprintf(stderr,"pthread_mutex_lock returned EPERM\n");
-  else
-    fprintf(stderr,"pthread_mutex_lock returned %d\n",info);
- #else
-  int info = pthread_spin_lock(&queue->lock);
- #endif
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-int PMR_queue_unlock(queue_t *queue)
-{
-#ifndef DISABLE_PTHREADS
- #ifdef NOSPINLOCKS
-  int info = pthread_mutex_unlock(&queue->lock);
-  if( info == EINVAL )
-    fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n");
-  else if( info == EAGAIN )
-    fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n");
-  else if( info == EDEADLK )
-    fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n");
-  else if( info == EPERM )
-    fprintf(stderr,"pthread_mutex_unlock returned EPERM\n");
-  else
-    fprintf(stderr,"pthread_mutex_unlock returned %d\n",info);
- #else
-  int info = pthread_spin_unlock(&queue->lock);
- #endif
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-queue_t *PMR_create_empty_queue(void)
-{
-  queue_t* queue = (queue_t*)malloc(sizeof(queue_t)); assert(queue!=NULL);
-  
-  queue->num_tasks = 0;
-  queue->head      = NULL;
-  queue->back      = NULL;
-  
-  PMR_queue_init_lock(queue);
-  return queue;
-}
-
-void PMR_destroy_queue(queue_t *queue)
-{
-  PMR_queue_destroy_lock(queue);
-  free(queue);
-}
-
-int PMR_insert_task_at_front(queue_t *queue, task_t *task)
-{
-  int info = PMR_queue_lock(queue);
-
-  queue->num_tasks++;
-  task->next = queue->head;
-  if (queue->head == NULL)
-    queue->back = task;
-  else
-    queue->head->prev = task;
-  queue->head = task;
-
-  info |= PMR_queue_unlock(queue);
-  return info;
-}
-
-int PMR_insert_task_at_back(queue_t *queue, task_t *task)
-{
-  int info = PMR_queue_lock(queue);
-
-  queue->num_tasks++;
-  task->prev = queue->back;
-  task->next = NULL;
-  if (queue->head == NULL)
-    queue->head = task;
-  else
-    queue->back->next = task;
-  queue->back = task;
-
-  info |= PMR_queue_unlock(queue);
-  return info;
-}
-
-/* returns NULL when empty */
-task_t *PMR_remove_task_at_front(queue_t *queue)
-{
-  int info = PMR_queue_lock(queue);
-  
-  task_t *task = queue->head;
-  if (queue->head != NULL) {
-    /* at least one element */
-    queue->num_tasks--;
-    if (queue->head->next == NULL) {
-      /* last task removed */
-      queue->head = NULL;
-      queue->back = NULL;
-    } else {
-      /* at least two tasks */
-      queue->head->next->prev = NULL;
-      queue->head = queue->head->next;
-    }
-  }
-  
-  info |= PMR_queue_unlock(queue);
-  return task;
-}
-
-/* returns NULL when empty */
-task_t *PMR_remove_task_at_back (queue_t *queue)
-{
-  int info = PMR_queue_lock(queue);
-  
-  task_t *task = queue->back;
-  if (queue->back != NULL) {
-    /* at least one element */
-    queue->num_tasks--;
-    if (queue->back->prev == NULL) {
-      /* last task removed */
-      queue->head = NULL;
-      queue->back = NULL;
-    } else {
-      /* at least two tasks */
-      queue->back->prev->next = NULL;
-      queue->back = queue->back->prev;
-    }
-  }
-  
-  info |= PMR_queue_unlock(queue);
-  return task;
-}
-
-int PMR_get_num_tasks(queue_t *queue)
-{
-  int info = PMR_queue_lock(queue);
-  int num_tasks = queue->num_tasks;
-  info |= PMR_queue_unlock(queue);
-  return num_tasks;
-}
diff --git a/external/pmrrr/src/core/rrr.c b/external/pmrrr/src/core/rrr.c
deleted file mode 100644
index f9df579515..0000000000
--- a/external/pmrrr/src/core/rrr.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include "pmrrr/rrr.h"
-#include "pmrrr/global.h"
-
-#ifndef DISABLE_PTHREADS
-# include <errno.h>
-#endif
-
-int PMR_rrr_init_lock(rrr_t *RRR)
-{
-#ifndef DISABLE_PTHREADS
-  int info = pthread_mutex_init(&RRR->mutex, NULL);
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-void PMR_rrr_destroy_lock(rrr_t *RRR)
-{
-#ifndef DISABLE_PTHREADS
-  pthread_mutex_destroy(&RRR->mutex);
-#endif
-}
-
-int PMR_rrr_lock(rrr_t *RRR)
-{
-#ifndef DISABLE_PTHREADS
-  int info = pthread_mutex_lock(&RRR->mutex);
-  if( info == EINVAL )
-    fprintf(stderr,"pthread_mutex_lock returned EINVAL\n");
-  else if( info == EAGAIN )
-    fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n");
-  else if( info == EDEADLK )
-    fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n");
-  else if( info == EPERM )
-    fprintf(stderr,"pthread_mutex_lock returned EPERM\n");
-  else
-    fprintf(stderr,"pthread_mutex_lock returned %d\n",info);
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-int PMR_rrr_unlock(rrr_t *RRR)
-{
-#ifndef DISABLE_PTHREADS
-  int info = pthread_mutex_unlock(&RRR->mutex);
-  if( info == EINVAL )
-    fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n");
-  else if( info == EAGAIN )
-    fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n");
-  else if( info == EDEADLK )
-    fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n");
-  else if( info == EPERM )
-    fprintf(stderr,"pthread_mutex_unlock returned EPERM\n");
-  else
-    fprintf(stderr,"pthread_mutex_unlock returned %d\n",info);
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-rrr_t *PMR_create_rrr
-(double *restrict D, double *restrict L,
- double *restrict DL, double *restrict DLL, int size, int depth)
-{
-  rrr_t* RRR = (rrr_t*)malloc(sizeof(rrr_t)); assert(RRR!=NULL);
-
-  RRR->D                 = D;
-  RRR->L                 = L;
-  RRR->DL                = DL;
-  RRR->DLL               = DLL;
-  RRR->size              = size;
-  RRR->depth             = depth;
-  RRR->parent_processed  = false;
-  RRR->copied_parent_rrr = false;
-  RRR->ndepend           = 0;
-
-  int info = PMR_rrr_init_lock(RRR);
-  return RRR;
-}
- 
-rrr_t *PMR_reset_rrr
-(rrr_t *RRR, double *restrict D, 
- double *restrict L, double *restrict DL, 
- double *restrict DLL, int size, int depth)
-{
-  RRR->D                = D;
-  RRR->L                = L;
-  RRR->DL               = DL;
-  RRR->DLL              = DLL;
-  RRR->size             = size;
-  RRR->depth            = depth;
-  RRR->parent_processed = false;
-
-  return RRR;
-}
-
-int PMR_increment_rrr_dependencies(rrr_t *RRR)
-{
-  int info = PMR_rrr_lock(RRR);
-  RRR->ndepend++;
-  int i = RRR->ndepend;
-  info |= PMR_rrr_unlock(RRR);
-  return i;
-}
-
-int PMR_set_parent_processed_flag(rrr_t *RRR)
-{
-  int info = PMR_rrr_lock(RRR);
-  RRR->parent_processed = true;
-  info |= PMR_rrr_unlock(RRR);
-  return info;
-}
-
-int PMR_set_copied_parent_rrr_flag(rrr_t *RRR, bool val)
-{
-  int info = PMR_rrr_lock(RRR);
-  RRR->copied_parent_rrr = val;
-  info |= PMR_rrr_unlock(RRR);
-  return info;
-}
-
-/* return 0 on success, otherwise 1 */
-int PMR_try_destroy_rrr(rrr_t *RRR)
-{
-  int info = PMR_rrr_lock(RRR);
-
-  RRR->ndepend--;
-  int tmp=0;
-  if (RRR->ndepend == 0 && RRR->parent_processed == true) {
-    if (RRR->depth >0) {
-      free(RRR->D);
-      free(RRR->L);
-    }
-    if (RRR->depth >=0) {
-      free(RRR->DL);
-      free(RRR->DLL);
-    }
-    tmp = 1;
-  }
-  
-  info |= PMR_rrr_unlock(RRR);
-
-  if (tmp == 1) {
-    PMR_rrr_destroy_lock(RRR);
-    free(RRR);
-    return 0;
-  } else {
-    return 1;
-  }
-}
diff --git a/external/pmrrr/src/core/tasks.c b/external/pmrrr/src/core/tasks.c
deleted file mode 100644
index 1258d87dfa..0000000000
--- a/external/pmrrr/src/core/tasks.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include "pmrrr/global.h"
-#include "pmrrr/tasks.h"
-#include "pmrrr/rrr.h"
-
-task_t *PMR_create_s_task
-(int first, int last, int depth, int bl_begin, int bl_end, 
- double spdiam, double lgap, rrr_t *RRR)
-{
-  task_t *t = (task_t*)malloc(sizeof(task_t)); assert(t!=NULL);
-  singleton_t *s = (singleton_t*)malloc(sizeof(singleton_t)); assert(s!=NULL);
-
-  s->begin     = first;
-  s->end       = last;
-  s->depth     = depth;  
-  s->bl_begin  = bl_begin;
-  s->bl_end    = bl_end;
-  s->bl_spdiam = spdiam;
-  s->lgap      = lgap;
-  s->RRR       = RRR;
-
-  t->data = (void*)s;
-  t->flag = SINGLETON_TASK_FLAG;
-  t->next = NULL;
-  t->prev = NULL;
-
-  return t;
-}
-
-task_t *PMR_create_c_task
-(int first, int last, int depth, int bl_begin, int bl_end, double spdiam,
- double lgap, int proc_W_begin, int proc_W_end, int lpid, int rpid, rrr_t *RRR)
-{
-  task_t *t = (task_t*)malloc(sizeof(task_t)); assert(t!=NULL);
-  cluster_t *c = (cluster_t*)malloc(sizeof(cluster_t)); assert(c!=NULL);
-
-  c->begin              = first;
-  c->end                = last;
-  c->depth              = depth;  
-  c->bl_begin           = bl_begin;
-  c->bl_end             = bl_end;
-  c->bl_spdiam          = spdiam;
-  c->lgap               = lgap;
-  c->proc_W_begin       = proc_W_begin;
-  c->proc_W_end         = proc_W_end;
-  c->left_pid           = lpid;
-  c->right_pid          = rpid;
-  c->RRR                = RRR;
-  c->wait_until_refined = false;
-
-  t->data = (void*)c;
-  t->flag = CLUSTER_TASK_FLAG;
-  t->next = NULL;
-  t->prev = NULL;
-
-  return t;
-}
-
-int PMR_refine_sem_init(refine_t *refine)
-{
-#ifndef DISABLE_PTHREADS
-  int info = sem_init(refine->sem, 0, 0);
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-int PMR_refine_sem_destroy(refine_t *refine)
-{
-#ifndef DISABLE_PTHREADS
-  int info = sem_destroy(refine->sem);
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-int PMR_refine_sem_wait(refine_t *refine)
-{
-#ifndef DISABLE_PTHREADS
-  int info = sem_wait(refine->sem);
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-int PMR_refine_sem_post(refine_t *refine)
-{
-#ifndef DISABLE_PTHREADS
-  int info = sem_post(refine->sem);
-  assert(info == 0);
-  return info;
-#else
-  return 0;
-#endif
-}
-
-task_t *PMR_create_r_task
-(int begin, int end, double *D, double *DLL, 
- int p, int q, int bl_size, double bl_spdiam, int tid)
-{
-  task_t *t = (task_t*)malloc(sizeof(task_t)); assert(t!=NULL);
-  refine_t *r = (refine_t*)malloc(sizeof(refine_t)); assert(r!=NULL); 
-
-  r->begin        = begin;
-  r->end          = end;
-  r->D            = D;
-  r->DLL          = DLL;
-  r->p            = p;
-  r->q            = q;
-  r->bl_size      = bl_size;
-  r->bl_spdiam    = bl_spdiam;
-  r->producer_tid = tid;
-
-  PMR_refine_sem_init(r);
-
-  t->data = (void*)r;
-  t->flag = REFINE_TASK_FLAG;
-  t->next = NULL;
-  t->prev = NULL;
-
-  return t;
-}
diff --git a/external/pmrrr/src/counter.cpp b/external/pmrrr/src/counter.cpp
new file mode 100644
index 0000000000..d3a0ee1ea3
--- /dev/null
+++ b/external/pmrrr/src/counter.cpp
@@ -0,0 +1,183 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#include <cstdlib>
+#include <cassert>
+
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/counter.h>
+
+#ifndef DISABLE_PTHREADS
+# include <errno.h>
+#endif
+
+namespace pmrrr { namespace detail {
+
+	int PMR_counter_init_lock(counter_t *counter)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  int info = pthread_mutex_init(&counter->lock, NULL);
+	 #else
+	  int info = pthread_spin_init(&counter->lock, PTHREAD_PROCESS_PRIVATE);
+	 #endif
+	  assert(info == 0);
+	  return info;
+	#else
+	  return 0;
+	#endif
+	}
+
+	void PMR_counter_destroy_lock(counter_t *counter)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  pthread_mutex_destroy(&counter->lock);
+	 #else
+	  pthread_spin_destroy(&counter->lock);
+	 #endif
+	#endif
+	}
+
+	int PMR_counter_lock(counter_t *counter)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  int info = pthread_mutex_lock(&counter->lock);
+	  if( info == EINVAL )
+	    fprintf(stderr,"pthread_mutex_lock returned EINVAL\n");
+	  else if( info == EAGAIN )
+	    fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n");
+	  else if( info == EDEADLK )
+	    fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n");
+	  else if( info == EPERM )
+	    fprintf(stderr,"pthread_mutex_lock returned EPERM\n");
+	  else
+	    fprintf(stderr,"pthread_mutex_lock returned %d\n",info);
+	 #else
+	  int info = pthread_spin_lock(&counter->lock);
+	 #endif
+	  assert(info == 0);
+	 return info;
+	#else
+	  return 0;
+	#endif
+	}
+
+	int PMR_counter_unlock(counter_t *counter)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  int info = pthread_mutex_unlock(&counter->lock);
+	  if( info == EINVAL )
+	    fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n");
+	  else if( info == EAGAIN )
+	    fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n");
+	  else if( info == EDEADLK )
+	    fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n");
+	  else if( info == EPERM )
+	    fprintf(stderr,"pthread_mutex_unlock returned EPERM\n");
+	  else
+	    fprintf(stderr,"pthread_mutex_unlock returned %d\n",info);
+	 #else
+	  int info = pthread_spin_unlock(&counter->lock);
+	 #endif
+	  assert(info == 0);
+	  return info;
+	#else
+	  return 0;
+	#endif
+	}
+
+	counter_t *PMR_create_counter(int init_value)
+	{
+	  counter_t *counter = (counter_t *) malloc( sizeof(counter_t) ); 
+	  counter->value = init_value;
+	  int info = PMR_counter_init_lock(counter);
+  	  return counter;
+	}
+
+	void PMR_destroy_counter(counter_t *counter)
+	{
+	  PMR_counter_destroy_lock(counter);
+	  free(counter);
+	}
+
+	int PMR_get_counter_value(counter_t *counter)
+	{
+	  int info = PMR_counter_lock(counter);
+	  int value = counter->value;
+	  info |= PMR_counter_unlock(counter);
+	  return value;
+	}
+
+	inline int PMR_set_counter_value(counter_t *counter, int value)
+	{
+	  int info = PMR_counter_lock(counter);
+	  counter->value = value;
+	  info |= PMR_counter_unlock(counter);
+	  return value;	
+	}
+
+	int PMR_decrement_counter(counter_t *counter, int amount)
+	{	
+	  int info = PMR_counter_lock(counter);
+	  counter->value -= amount;
+	  int value = counter->value;
+	  info |= PMR_counter_unlock(counter);
+	  return value;
+	}
+
+	int PMR_increment_counter(counter_t *counter, int amount)
+	{
+	  int info = PMR_counter_lock(counter);
+	  counter->value += amount;
+	  int value = counter->value;
+	  info |= PMR_counter_unlock(counter);
+	  return value;	
+	}
+
+}	// namespace detail
+
+}	// namespace pmrrr
+
diff --git a/external/pmrrr/src/lapack/LICENSE b/external/pmrrr/src/lapack/LICENSE
deleted file mode 100644
index aad36d6e12..0000000000
--- a/external/pmrrr/src/lapack/LICENSE
+++ /dev/null
@@ -1,36 +0,0 @@
-Copyright (c) 1992-2009 The University of Tennessee.  All rights reserved.
-
-$COPYRIGHT$
-
-Additional copyrights may follow
-
-$HEADER$
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-- Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer. 
-  
-- Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer listed
-  in this license in the documentation and/or other materials
-  provided with the distribution.
-  
-- Neither the name of the copyright holders nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-  
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT  
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-
diff --git a/external/pmrrr/src/lapack/ode2.c b/external/pmrrr/src/lapack/ode2.c
deleted file mode 100644
index 80cfef8226..0000000000
--- a/external/pmrrr/src/lapack/ode2.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/* dlae2.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int ode2(double *a, double *b, double *c__, double *rt1, double *rt2)
-{
-    /* System generated locals */
-    double d__1;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    double ab, df, tb, sm, rt, adf, acmn, acmx;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODE2  computes the eigenvalues of a 2-by-2 symmetric matrix */
-/*     [  A   B  ] */
-/*     [  B   C  ]. */
-/*  On return, RT1 is the eigenvalue of larger absolute value, and RT2 */
-/*  is the eigenvalue of smaller absolute value. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  A       (input) DOUBLE PRECISION */
-/*          The (1,1) element of the 2-by-2 matrix. */
-
-/*  B       (input) DOUBLE PRECISION */
-/*          The (1,2) and (2,1) elements of the 2-by-2 matrix. */
-
-/*  C       (input) DOUBLE PRECISION */
-/*          The (2,2) element of the 2-by-2 matrix. */
-
-/*  RT1     (output) DOUBLE PRECISION */
-/*          The eigenvalue of larger absolute value. */
-
-/*  RT2     (output) DOUBLE PRECISION */
-/*          The eigenvalue of smaller absolute value. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  RT1 is accurate to a few ulps barring over/underflow. */
-
-/*  RT2 may be inaccurate if there is massive cancellation in the */
-/*  determinant A*C-B*B; higher precision or correctly rounded or */
-/*  correctly truncated arithmetic would be needed to compute RT2 */
-/*  accurately in all cases. */
-
-/*  Overflow is possible only if RT1 is within a factor of 5 of overflow. */
-/*  Underflow is harmless if the input data is 0 or exceeds */
-/*     underflow_threshold / macheps. */
-
-/* ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Compute the eigenvalues */
-
-    sm = *a + *c__;
-    df = *a - *c__;
-    adf = fabs(df);
-    tb = *b + *b;
-    ab = fabs(tb);
-    if (fabs(*a) > fabs(*c__)) {
-	acmx = *a;
-	acmn = *c__;
-    } else {
-	acmx = *c__;
-	acmn = *a;
-    }
-    if (adf > ab) {
-/* Computing 2nd power */
-	d__1 = ab / adf;
-	rt = adf * sqrt(d__1 * d__1 + 1.);
-    } else if (adf < ab) {
-/* Computing 2nd power */
-	d__1 = adf / ab;
-	rt = ab * sqrt(d__1 * d__1 + 1.);
-    } else {
-
-/*        Includes case AB=ADF=0 */
-
-	rt = ab * sqrt(2.);
-    }
-    if (sm < 0.) {
-	*rt1 = (sm - rt) * .5;
-
-/*        Order of execution important. */
-/*        To get fully accurate smaller eigenvalue, */
-/*        next line needs to be executed in higher precision. */
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.) {
-	*rt1 = (sm + rt) * .5;
-
-/*        Order of execution important. */
-/*        To get fully accurate smaller eigenvalue, */
-/*        next line needs to be executed in higher precision. */
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else {
-
-/*        Includes case RT1 = RT2 = 0 */
-
-	*rt1 = rt * .5;
-	*rt2 = rt * -.5;
-    }
-    return 0;
-
-/*     End of ODE2 */
-
-} /* ode2_ */
diff --git a/external/pmrrr/src/lapack/odebz.c b/external/pmrrr/src/lapack/odebz.c
deleted file mode 100644
index 5aadbc24bc..0000000000
--- a/external/pmrrr/src/lapack/odebz.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/* dlaebz.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define imax(a,b) ( (a) > (b) ? (a) : (b) )
-#define imin(a,b) ( (a) < (b) ? (a) : (b) )
-
-/* Subroutine */ 
-int odebz(int *ijob, int *nitmax, int *n, 
-	int *mmax, int *minp, int *nbmin, double *abstol, 
-	double *reltol, double *pivmin, double *d__, double *
-	e, double *e2, int *nval, double *ab, double *c__, 
-	int *mout, int *nab, double *work, int *iwork, 
-	int *info)
-{
-    /* System generated locals */
-    int nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, 
-	    i__5, i__6;
-    double d__1, d__2, d__3, d__4;
-
-    /* Local variables */
-    int j, kf, ji, kl, jp, jit;
-    double tmp1, tmp2;
-    int itmp1, itmp2, kfnew, klnew;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODEBZ contains the iteration loops which compute and use the */
-/*  function N(w), which is the count of eigenvalues of a symmetric */
-/*  tridiagonal matrix T less than or equal to its argument  w.  It */
-/*  performs a choice of two types of loops: */
-
-/*  IJOB=1, followed by */
-/*  IJOB=2: It takes as input a list of intervals and returns a list of */
-/*          sufficiently small intervals whose union contains the same */
-/*          eigenvalues as the union of the original intervals. */
-/*          The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. */
-/*          The output interval (AB(j,1),AB(j,2)] will contain */
-/*          eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. */
-
-/*  IJOB=3: It performs a binary search in each input interval */
-/*          (AB(j,1),AB(j,2)] for a point  w(j)  such that */
-/*          N(w(j))=NVAL(j), and uses  C(j)  as the starting point of */
-/*          the search.  If such a w(j) is found, then on output */
-/*          AB(j,1)=AB(j,2)=w.  If no such w(j) is found, then on output */
-/*          (AB(j,1),AB(j,2)] will be a small interval containing the */
-/*          point where N(w) jumps through NVAL(j), unless that point */
-/*          lies outside the initial interval. */
-
-/*  Note that the intervals are in all cases half-open intervals, */
-/*  i.e., of the form  (a,b] , which includes  b  but not  a . */
-
-/*  To avoid underflow, the matrix should be scaled so that its largest */
-/*  element is no greater than  overflow**(1/2) * underflow**(1/4) */
-/*  in absolute value.  To assure the most accurate computation */
-/*  of small eigenvalues, the matrix should be scaled to be */
-/*  not much smaller than that, either. */
-
-/*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
-/*  Matrix", Report CS41, Computer Science Dept., Stanford */
-/*  University, July 21, 1966 */
-
-/*  Note: the arguments are, in general, *not* checked for unreasonable */
-/*  values. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  IJOB    (input) INT */
-/*          Specifies what is to be done: */
-/*          = 1:  Compute NAB for the initial intervals. */
-/*          = 2:  Perform bisection iteration to find eigenvalues of T. */
-/*          = 3:  Perform bisection iteration to invert N(w), i.e., */
-/*                to find a point which has a specified number of */
-/*                eigenvalues of T to its left. */
-/*          Other values will cause ODEBZ to return with INFO=-1. */
-
-/*  NITMAX  (input) INT */
-/*          The maximum number of "levels" of bisection to be */
-/*          performed, i.e., an interval of width W will not be made */
-/*          smaller than 2^(-NITMAX) * W.  If not all intervals */
-/*          have converged after NITMAX iterations, then INFO is set */
-/*          to the number of non-converged intervals. */
-
-/*  N       (input) INT */
-/*          The dimension n of the tridiagonal matrix T.  It must be at */
-/*          least 1. */
-
-/*  MMAX    (input) INT */
-/*          The maximum number of intervals.  If more than MMAX intervals */
-/*          are generated, then ODEBZ will quit with INFO=MMAX+1. */
-
-/*  MINP    (input) INT */
-/*          The initial number of intervals.  It may not be greater than */
-/*          MMAX. */
-
-/*  NBMIN   (input) INT */
-/*          The smallest number of intervals that should be processed */
-/*          using a vector loop.  If zero, then only the scalar loop */
-/*          will be used. */
-
-/*  ABSTOL  (input) DOUBLE PRECISION */
-/*          The minimum (absolute) width of an interval.  When an */
-/*          interval is narrower than ABSTOL, or than RELTOL times the */
-/*          larger (in magnitude) endpoint, then it is considered to be */
-/*          sufficiently small, i.e., converged.  This must be at least */
-/*          zero. */
-
-/*  RELTOL  (input) DOUBLE PRECISION */
-/*          The minimum relative width of an interval.  When an interval */
-/*          is narrower than ABSTOL, or than RELTOL times the larger (in */
-/*          magnitude) endpoint, then it is considered to be */
-/*          sufficiently small, i.e., converged.  Note: this should */
-/*          always be at least radix*machine epsilon. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum absolute value of a "pivot" in the Sturm */
-/*          sequence loop.  This *must* be at least  max |e(j)**2| * */
-/*          safe_min  and at least safe_min, where safe_min is at least */
-/*          the smallest number that can divide one without overflow. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The diagonal elements of the tridiagonal matrix T. */
-
-/*  E       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The offdiagonal elements of the tridiagonal matrix T in */
-/*          positions 1 through N-1.  E(N) is arbitrary. */
-
-/*  E2      (input) DOUBLE PRECISION array, dimension (N) */
-/*          The squares of the offdiagonal elements of the tridiagonal */
-/*          matrix T.  E2(N) is ignored. */
-
-/*  NVAL    (input/output) INT array, dimension (MINP) */
-/*          If IJOB=1 or 2, not referenced. */
-/*          If IJOB=3, the desired values of N(w).  The elements of NVAL */
-/*          will be reordered to correspond with the intervals in AB. */
-/*          Thus, NVAL(j) on output will not, in general be the same as */
-/*          NVAL(j) on input, but it will correspond with the interval */
-/*          (AB(j,1),AB(j,2)] on output. */
-
-/*  AB      (input/output) DOUBLE PRECISION array, dimension (MMAX,2) */
-/*          The endpoints of the intervals.  AB(j,1) is  a(j), the left */
-/*          endpoint of the j-th interval, and AB(j,2) is b(j), the */
-/*          right endpoint of the j-th interval.  The input intervals */
-/*          will, in general, be modified, split, and reordered by the */
-/*          calculation. */
-
-/*  C       (input/output) DOUBLE PRECISION array, dimension (MMAX) */
-/*          If IJOB=1, ignored. */
-/*          If IJOB=2, workspace. */
-/*          If IJOB=3, then on input C(j) should be initialized to the */
-/*          first search point in the binary search. */
-
-/*  MOUT    (output) INT */
-/*          If IJOB=1, the number of eigenvalues in the intervals. */
-/*          If IJOB=2 or 3, the number of intervals output. */
-/*          If IJOB=3, MOUT will equal MINP. */
-
-/*  NAB     (input/output) INT array, dimension (MMAX,2) */
-/*          If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). */
-/*          If IJOB=2, then on input, NAB(i,j) should be set.  It must */
-/*             satisfy the condition: */
-/*             N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), */
-/*             which means that in interval i only eigenvalues */
-/*             NAB(i,1)+1,...,NAB(i,2) will be considered.  Usually, */
-/*             NAB(i,j)=N(AB(i,j)), from a previous call to ODEBZ with */
-/*             IJOB=1. */
-/*             On output, NAB(i,j) will contain */
-/*             max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of */
-/*             the input interval that the output interval */
-/*             (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the */
-/*             the input values of NAB(k,1) and NAB(k,2). */
-/*          If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), */
-/*             unless N(w) > NVAL(i) for all search points  w , in which */
-/*             case NAB(i,1) will not be modified, i.e., the output */
-/*             value will be the same as the input value (modulo */
-/*             reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) */
-/*             for all search points  w , in which case NAB(i,2) will */
-/*             not be modified.  Normally, NAB should be set to some */
-/*             distinctive value(s) before ODEBZ is called. */
-
-/*  WORK    (workspace) DOUBLE PRECISION array, dimension (MMAX) */
-/*          Workspace. */
-
-/*  IWORK   (workspace) INT array, dimension (MMAX) */
-/*          Workspace. */
-
-/*  INFO    (output) INT */
-/*          = 0:       All intervals converged. */
-/*          = 1--MMAX: The last INFO intervals did not converge. */
-/*          = MMAX+1:  More than MMAX intervals were generated. */
-
-/*  Further Details */
-/*  =============== */
-
-/*      This routine is intended to be called only by other LAPACK */
-/*  routines, thus the interface is less user-friendly.  It is intended */
-/*  for two purposes: */
-
-/*  (a) finding eigenvalues.  In this case, ODEBZ should have one or */
-/*      more initial intervals set up in AB, and ODEBZ should be called */
-/*      with IJOB=1.  This sets up NAB, and also counts the eigenvalues. */
-/*      Intervals with no eigenvalues would usually be thrown out at */
-/*      this point.  Also, if not all the eigenvalues in an interval i */
-/*      are desired, NAB(i,1) can be increased or NAB(i,2) decreased. */
-/*      For example, set NAB(i,1)=NAB(i,2)-1 to get the largest */
-/*      eigenvalue.  ODEBZ is then called with IJOB=2 and MMAX */
-/*      no smaller than the value of MOUT returned by the call with */
-/*      IJOB=1.  After this (IJOB=2) call, eigenvalues NAB(i,1)+1 */
-/*      through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the */
-/*      tolerance specified by ABSTOL and RELTOL. */
-
-/*  (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). */
-/*      In this case, start with a Gershgorin interval  (a,b).  Set up */
-/*      AB to contain 2 search intervals, both initially (a,b).  One */
-/*      NVAL element should contain  f-1  and the other should contain  l */
-/*      , while C should contain a and b, resp.  NAB(i,1) should be -1 */
-/*      and NAB(i,2) should be N+1, to flag an error if the desired */
-/*      interval does not lie in (a,b).  ODEBZ is then called with */
-/*      IJOB=3.  On exit, if w(f-1) < w(f), then one of the intervals -- */
-/*      j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while */
-/*      if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r */
-/*      >= 0, then the interval will have  N(AB(j,1))=NAB(j,1)=f-k and */
-/*      N(AB(j,2))=NAB(j,2)=f+r.  The cases w(l) < w(l+1) and */
-/*      w(l-r)=...=w(l+k) are handled similarly. */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Check for Errors */
-
-    /* Parameter adjustments */
-    nab_dim1 = *mmax;
-    nab_offset = 1 + nab_dim1;
-    nab -= nab_offset;
-    ab_dim1 = *mmax;
-    ab_offset = 1 + ab_dim1;
-    ab -= ab_offset;
-    --d__;
-    --e;
-    --e2;
-    --nval;
-    --c__;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    if (*ijob < 1 || *ijob > 3) {
-	*info = -1;
-	return 0;
-    }
-
-/*     Initialize NAB */
-
-    if (*ijob == 1) {
-
-/*        Compute the number of eigenvalues in the initial intervals. */
-
-	*mout = 0;
-	i__1 = *minp;
-	for (ji = 1; ji <= i__1; ++ji) {
-	    for (jp = 1; jp <= 2; ++jp) {
-		tmp1 = d__[1] - ab[ji + jp * ab_dim1];
-		if (fabs(tmp1) < *pivmin) {
-		    tmp1 = -(*pivmin);
-		}
-		nab[ji + jp * nab_dim1] = 0;
-		if (tmp1 <= 0.) {
-		    nab[ji + jp * nab_dim1] = 1;
-		}
-
-		i__2 = *n;
-		for (j = 2; j <= i__2; ++j) {
-		    tmp1 = d__[j] - e2[j - 1] / tmp1 - ab[ji + jp * ab_dim1];
-		    if (fabs(tmp1) < *pivmin) {
-			tmp1 = -(*pivmin);
-		    }
-		    if (tmp1 <= 0.) {
-			++nab[ji + jp * nab_dim1];
-		    }
-/* L10: */
-		}
-/* L20: */
-	    }
-	    *mout = *mout + nab[ji + (nab_dim1 << 1)] - nab[ji + nab_dim1];
-/* L30: */
-	}
-	return 0;
-    }
-
-/*     Initialize for loop */
-
-/*     KF and KL have the following meaning: */
-/*        Intervals 1,...,KF-1 have converged. */
-/*        Intervals KF,...,KL  still need to be refined. */
-
-    kf = 1;
-    kl = *minp;
-
-/*     If IJOB=2, initialize C. */
-/*     If IJOB=3, use the user-supplied starting point. */
-
-    if (*ijob == 2) {
-	i__1 = *minp;
-	for (ji = 1; ji <= i__1; ++ji) {
-	    c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5;
-/* L40: */
-	}
-    }
-
-/*     Iteration loop */
-
-    i__1 = *nitmax;
-    for (jit = 1; jit <= i__1; ++jit) {
-
-/*        Loop over intervals */
-
-	if (kl - kf + 1 >= *nbmin && *nbmin > 0) {
-
-/*           Begin of Parallel Version of the loop */
-
-	    i__2 = kl;
-	    for (ji = kf; ji <= i__2; ++ji) {
-
-/*              Compute N(c), the number of eigenvalues less than c */
-
-		work[ji] = d__[1] - c__[ji];
-		iwork[ji] = 0;
-		if (work[ji] <= *pivmin) {
-		    iwork[ji] = 1;
-/* Computing MIN */
-		    d__1 = work[ji], d__2 = -(*pivmin);
-		    work[ji] = fmin(d__1,d__2);
-		}
-
-		i__3 = *n;
-		for (j = 2; j <= i__3; ++j) {
-		    work[ji] = d__[j] - e2[j - 1] / work[ji] - c__[ji];
-		    if (work[ji] <= *pivmin) {
-			++iwork[ji];
-/* Computing MIN */
-			d__1 = work[ji], d__2 = -(*pivmin);
-			work[ji] = fmin(d__1,d__2);
-		    }
-/* L50: */
-		}
-/* L60: */
-	    }
-
-	    if (*ijob <= 2) {
-
-/*              IJOB=2: Choose all intervals containing eigenvalues. */
-
-		klnew = kl;
-		i__2 = kl;
-		for (ji = kf; ji <= i__2; ++ji) {
-
-/*                 Insure that N(w) is monotone */
-
-/* Computing MIN */
-/* Computing MAX */
-		    i__5 = nab[ji + nab_dim1], i__6 = iwork[ji];
-		    i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,i__6);
-		    iwork[ji] = imin(i__3,i__4);
-
-/*                 Update the Queue -- add intervals if both halves */
-/*                 contain eigenvalues. */
-
-		    if (iwork[ji] == nab[ji + (nab_dim1 << 1)]) {
-
-/*                    No eigenvalue in the upper interval: */
-/*                    just use the lower interval. */
-
-			ab[ji + (ab_dim1 << 1)] = c__[ji];
-
-		    } else if (iwork[ji] == nab[ji + nab_dim1]) {
-
-/*                    No eigenvalue in the lower interval: */
-/*                    just use the upper interval. */
-
-			ab[ji + ab_dim1] = c__[ji];
-		    } else {
-			++klnew;
-			if (klnew <= *mmax) {
-
-/*                       Eigenvalue in both intervals -- add upper to */
-/*                       queue. */
-
-			    ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 
-				    1)];
-			    nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 
-				    << 1)];
-			    ab[klnew + ab_dim1] = c__[ji];
-			    nab[klnew + nab_dim1] = iwork[ji];
-			    ab[ji + (ab_dim1 << 1)] = c__[ji];
-			    nab[ji + (nab_dim1 << 1)] = iwork[ji];
-			} else {
-			    *info = *mmax + 1;
-			}
-		    }
-/* L70: */
-		}
-		if (*info != 0) {
-		    return 0;
-		}
-		kl = klnew;
-	    } else {
-
-/*              IJOB=3: Binary search.  Keep only the interval containing */
-/*                      w   s.t. N(w) = NVAL */
-
-		i__2 = kl;
-		for (ji = kf; ji <= i__2; ++ji) {
-		    if (iwork[ji] <= nval[ji]) {
-			ab[ji + ab_dim1] = c__[ji];
-			nab[ji + nab_dim1] = iwork[ji];
-		    }
-		    if (iwork[ji] >= nval[ji]) {
-			ab[ji + (ab_dim1 << 1)] = c__[ji];
-			nab[ji + (nab_dim1 << 1)] = iwork[ji];
-		    }
-/* L80: */
-		}
-	    }
-
-	} else {
-
-/*           End of Parallel Version of the loop */
-
-/*           Begin of Serial Version of the loop */
-
-	    klnew = kl;
-	    i__2 = kl;
-	    for (ji = kf; ji <= i__2; ++ji) {
-
-/*              Compute N(w), the number of eigenvalues less than w */
-
-		tmp1 = c__[ji];
-		tmp2 = d__[1] - tmp1;
-		itmp1 = 0;
-		if (tmp2 <= *pivmin) {
-		    itmp1 = 1;
-/* Computing MIN */
-		    d__1 = tmp2, d__2 = -(*pivmin);
-		    tmp2 = fmin(d__1,d__2);
-		}
-
-		i__3 = *n;
-		for (j = 2; j <= i__3; ++j) {
-		    tmp2 = d__[j] - e2[j - 1] / tmp2 - tmp1;
-		    if (tmp2 <= *pivmin) {
-			++itmp1;
-/* Computing MIN */
-			d__1 = tmp2, d__2 = -(*pivmin);
-			tmp2 = fmin(d__1,d__2);
-		    }
-/* L90: */
-		}
-
-		if (*ijob <= 2) {
-
-/*                 IJOB=2: Choose all intervals containing eigenvalues. */
-
-/*                 Insure that N(w) is monotone */
-
-/* Computing MIN */
-/* Computing MAX */
-		    i__5 = nab[ji + nab_dim1];
-		    i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,itmp1);
-		    itmp1 = imin(i__3,i__4);
-
-/*                 Update the Queue -- add intervals if both halves */
-/*                 contain eigenvalues. */
-
-		    if (itmp1 == nab[ji + (nab_dim1 << 1)]) {
-
-/*                    No eigenvalue in the upper interval: */
-/*                    just use the lower interval. */
-
-			ab[ji + (ab_dim1 << 1)] = tmp1;
-
-		    } else if (itmp1 == nab[ji + nab_dim1]) {
-
-/*                    No eigenvalue in the lower interval: */
-/*                    just use the upper interval. */
-
-			ab[ji + ab_dim1] = tmp1;
-		    } else if (klnew < *mmax) {
-
-/*                    Eigenvalue in both intervals -- add upper to queue. */
-
-			++klnew;
-			ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 1)];
-			nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 << 
-				1)];
-			ab[klnew + ab_dim1] = tmp1;
-			nab[klnew + nab_dim1] = itmp1;
-			ab[ji + (ab_dim1 << 1)] = tmp1;
-			nab[ji + (nab_dim1 << 1)] = itmp1;
-		    } else {
-			*info = *mmax + 1;
-			return 0;
-		    }
-		} else {
-
-/*                 IJOB=3: Binary search.  Keep only the interval */
-/*                         containing  w  s.t. N(w) = NVAL */
-
-		    if (itmp1 <= nval[ji]) {
-			ab[ji + ab_dim1] = tmp1;
-			nab[ji + nab_dim1] = itmp1;
-		    }
-		    if (itmp1 >= nval[ji]) {
-			ab[ji + (ab_dim1 << 1)] = tmp1;
-			nab[ji + (nab_dim1 << 1)] = itmp1;
-		    }
-		}
-/* L100: */
-	    }
-	    kl = klnew;
-
-/*           End of Serial Version of the loop */
-
-	}
-
-/*        Check for convergence */
-
-	kfnew = kf;
-	i__2 = kl;
-	for (ji = kf; ji <= i__2; ++ji) {
-	    tmp1 = (d__1 = ab[ji + (ab_dim1 << 1)] - ab[ji + ab_dim1], fabs(
-		    d__1));
-/* Computing MAX */
-	    d__3 = (d__1 = ab[ji + (ab_dim1 << 1)], fabs(d__1)), d__4 = (d__2 =
-		     ab[ji + ab_dim1], fabs(d__2));
-	    tmp2 = fmax(d__3,d__4);
-/* Computing MAX */
-	    d__1 = fmax(*abstol,*pivmin), d__2 = *reltol * tmp2;
-	    if (tmp1 < fmax(d__1,d__2) || nab[ji + nab_dim1] >= nab[ji + (
-		    nab_dim1 << 1)]) {
-
-/*              Converged -- Swap with position KFNEW, */
-/*                           then increment KFNEW */
-
-		if (ji > kfnew) {
-		    tmp1 = ab[ji + ab_dim1];
-		    tmp2 = ab[ji + (ab_dim1 << 1)];
-		    itmp1 = nab[ji + nab_dim1];
-		    itmp2 = nab[ji + (nab_dim1 << 1)];
-		    ab[ji + ab_dim1] = ab[kfnew + ab_dim1];
-		    ab[ji + (ab_dim1 << 1)] = ab[kfnew + (ab_dim1 << 1)];
-		    nab[ji + nab_dim1] = nab[kfnew + nab_dim1];
-		    nab[ji + (nab_dim1 << 1)] = nab[kfnew + (nab_dim1 << 1)];
-		    ab[kfnew + ab_dim1] = tmp1;
-		    ab[kfnew + (ab_dim1 << 1)] = tmp2;
-		    nab[kfnew + nab_dim1] = itmp1;
-		    nab[kfnew + (nab_dim1 << 1)] = itmp2;
-		    if (*ijob == 3) {
-			itmp1 = nval[ji];
-			nval[ji] = nval[kfnew];
-			nval[kfnew] = itmp1;
-		    }
-		}
-		++kfnew;
-	    }
-/* L110: */
-	}
-	kf = kfnew;
-
-/*        Choose Midpoints */
-
-	i__2 = kl;
-	for (ji = kf; ji <= i__2; ++ji) {
-	    c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5;
-/* L120: */
-	}
-
-/*        If no more intervals to refine, quit. */
-
-	if (kf > kl) {
-	    goto L140;
-	}
-/* L130: */
-    }
-
-/*     Converged */
-
-L140:
-/* Computing MAX */
-    i__1 = kl + 1 - kf;
-    *info = imax(i__1,0);
-    *mout = kl;
-
-    return 0;
-
-/*     End of ODEBZ */
-
-} /* odebz_ */
diff --git a/external/pmrrr/src/lapack/odev2.c b/external/pmrrr/src/lapack/odev2.c
deleted file mode 100644
index b70b228676..0000000000
--- a/external/pmrrr/src/lapack/odev2.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/* dlaev2.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odev2(double *a, double *b, double *c__, 
-	double *rt1, double *rt2, double *cs1, double *sn1)
-{
-    /* System generated locals */
-    double d__1;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    double ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
-    int sgn1, sgn2;
-    double acmn, acmx;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix */
-/*     [  A   B  ] */
-/*     [  B   C  ]. */
-/*  On return, RT1 is the eigenvalue of larger absolute value, RT2 is the */
-/*  eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right */
-/*  eigenvector for RT1, giving the decomposition */
-
-/*     [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ] */
-/*     [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ]. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  A       (input) DOUBLE PRECISION */
-/*          The (1,1) element of the 2-by-2 matrix. */
-
-/*  B       (input) DOUBLE PRECISION */
-/*          The (1,2) element and the conjugate of the (2,1) element of */
-/*          the 2-by-2 matrix. */
-
-/*  C       (input) DOUBLE PRECISION */
-/*          The (2,2) element of the 2-by-2 matrix. */
-
-/*  RT1     (output) DOUBLE PRECISION */
-/*          The eigenvalue of larger absolute value. */
-
-/*  RT2     (output) DOUBLE PRECISION */
-/*          The eigenvalue of smaller absolute value. */
-
-/*  CS1     (output) DOUBLE PRECISION */
-/*  SN1     (output) DOUBLE PRECISION */
-/*          The vector (CS1, SN1) is a unit right eigenvector for RT1. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  RT1 is accurate to a few ulps barring over/underflow. */
-
-/*  RT2 may be inaccurate if there is massive cancellation in the */
-/*  determinant A*C-B*B; higher precision or correctly rounded or */
-/*  correctly truncated arithmetic would be needed to compute RT2 */
-/*  accurately in all cases. */
-
-/*  CS1 and SN1 are accurate to a few ulps barring over/underflow. */
-
-/*  Overflow is possible only if RT1 is within a factor of 5 of overflow. */
-/*  Underflow is harmless if the input data is 0 or exceeds */
-/*     underflow_threshold / macheps. */
-
-/* ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Compute the eigenvalues */
-
-    sm = *a + *c__;
-    df = *a - *c__;
-    adf = fabs(df);
-    tb = *b + *b;
-    ab = fabs(tb);
-    if (fabs(*a) > fabs(*c__)) {
-	acmx = *a;
-	acmn = *c__;
-    } else {
-	acmx = *c__;
-	acmn = *a;
-    }
-    if (adf > ab) {
-/* Computing 2nd power */
-	d__1 = ab / adf;
-	rt = adf * sqrt(d__1 * d__1 + 1.);
-    } else if (adf < ab) {
-/* Computing 2nd power */
-	d__1 = adf / ab;
-	rt = ab * sqrt(d__1 * d__1 + 1.);
-    } else {
-
-/*        Includes case AB=ADF=0 */
-
-	rt = ab * sqrt(2.);
-    }
-    if (sm < 0.) {
-	*rt1 = (sm - rt) * .5;
-	sgn1 = -1;
-
-/*        Order of execution important. */
-/*        To get fully accurate smaller eigenvalue, */
-/*        next line needs to be executed in higher precision. */
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.) {
-	*rt1 = (sm + rt) * .5;
-	sgn1 = 1;
-
-/*        Order of execution important. */
-/*        To get fully accurate smaller eigenvalue, */
-/*        next line needs to be executed in higher precision. */
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else {
-
-/*        Includes case RT1 = RT2 = 0 */
-
-	*rt1 = rt * .5;
-	*rt2 = rt * -.5;
-	sgn1 = 1;
-    }
-
-/*     Compute the eigenvector */
-
-    if (df >= 0.) {
-	cs = df + rt;
-	sgn2 = 1;
-    } else {
-	cs = df - rt;
-	sgn2 = -1;
-    }
-    acs = fabs(cs);
-    if (acs > ab) {
-	ct = -tb / cs;
-	*sn1 = 1. / sqrt(ct * ct + 1.);
-	*cs1 = ct * *sn1;
-    } else {
-	if (ab == 0.) {
-	    *cs1 = 1.;
-	    *sn1 = 0.;
-	} else {
-	    tn = -cs / tb;
-	    *cs1 = 1. / sqrt(tn * tn + 1.);
-	    *sn1 = tn * *cs1;
-	}
-    }
-    if (sgn1 == sgn2) {
-	tn = *cs1;
-	*cs1 = -(*sn1);
-	*sn1 = tn;
-    }
-    return 0;
-
-/*     End of ODEV2 */
-
-} /* odev2_ */
diff --git a/external/pmrrr/src/lapack/odnan.c b/external/pmrrr/src/lapack/odnan.c
deleted file mode 100644
index ce7abb4bcd..0000000000
--- a/external/pmrrr/src/lapack/odnan.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/* disnan.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-int odnan(double *din)
-{
-    /* System generated locals */
-    int ret_val;
-
-    /* Local variables */
-    extern int odsnan(double *, double *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */
-/*  otherwise.  To be replaced by the Fortran 2003 intrinsic in the */
-/*  future. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  DIN      (input) DOUBLE PRECISION */
-/*          Input to test for NaN. */
-
-/*  ===================================================================== */
-
-/*  .. External Functions .. */
-/*  .. */
-/*  .. Executable Statements .. */
-    ret_val = odsnan(din, din);
-    return ret_val;
-} /* odnan_ */
diff --git a/external/pmrrr/src/lapack/odneg.c b/external/pmrrr/src/lapack/odneg.c
deleted file mode 100644
index 4a3836797c..0000000000
--- a/external/pmrrr/src/lapack/odneg.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/* dlaneg.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define imax(a,b) ( (a) > (b) ? (a) : (b) )
-#define imin(a,b) ( (a) < (b) ? (a) : (b) )
-
-int odneg(int *n, double *d__, double *lld, double *
-	sigma, double *pivmin, int *r__)
-{
-    /* System generated locals */
-    int ret_val, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    int j;
-    double p, t;
-    int bj;
-    double tmp;
-    int neg1, neg2;
-    double bsav, gamma, dplus;
-    extern int odnan(double *);
-    int negcnt;
-    int sawnan;
-    double dminus;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODNEG computes the Sturm count, the number of negative pivots */
-/*  encountered while factoring tridiagonal T - sigma I = L D L^T. */
-/*  This implementation works directly on the factors without forming */
-/*  the tridiagonal matrix T.  The Sturm count is also the number of */
-/*  eigenvalues of T less than sigma. */
-
-/*  This routine is called from DLARRB. */
-
-/*  The current routine does not use the PIVMIN parameter but rather */
-/*  requires IEEE-754 propagation of Infinities and NaNs.  This */
-/*  routine also has no input range restrictions but does require */
-/*  default exception handling such that x/0 produces Inf when x is */
-/*  non-zero, and Inf/Inf produces NaN.  For more information, see: */
-
-/*    Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */
-/*    Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */
-/*    Scientific Computing, v28, n5, 2006.  DOI 10.1137/050641624 */
-/*    (Tech report version in LAWN 172 with the same title.) */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the matrix. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The N diagonal elements of the diagonal matrix D. */
-
-/*  LLD     (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (N-1) elements L(i)*L(i)*D(i). */
-
-/*  SIGMA   (input) DOUBLE PRECISION */
-/*          Shift amount in T - sigma I = L D L^T. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum pivot in the Sturm sequence.  May be used */
-/*          when zero pivots are encountered on non-IEEE-754 */
-/*          architectures. */
-
-/*  R       (input) INT */
-/*          The twist index for the twisted factorization that is used */
-/*          for the negcount. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-/*     Jason Riedy, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     Some architectures propagate Infinities and NaNs very slowly, so */
-/*     the code computes counts in BLKLEN chunks.  Then a NaN can */
-/*     propagate at most BLKLEN columns before being detected.  This is */
-/*     not a general tuning parameter; it needs only to be just large */
-/*     enough that the overhead is tiny in common cases. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --lld;
-    --d__;
-
-    /* Function Body */
-    negcnt = 0;
-/*     I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */
-    t = -(*sigma);
-    i__1 = *r__ - 1;
-    for (bj = 1; bj <= i__1; bj += 128) {
-	neg1 = 0;
-	bsav = t;
-/* Computing MIN */
-	i__3 = bj + 127, i__4 = *r__ - 1;
-	i__2 = imin(i__3,i__4);
-	for (j = bj; j <= i__2; ++j) {
-	    dplus = d__[j] + t;
-	    if (dplus < 0.) {
-		++neg1;
-	    }
-	    tmp = t / dplus;
-	    t = tmp * lld[j] - *sigma;
-/* L21: */
-	}
-	sawnan = odnan(&t);
-/*     Run a slower version of the above loop if a NaN is detected. */
-/*     A NaN should occur only with a zero pivot after an infinite */
-/*     pivot.  In that case, substituting 1 for T/DPLUS is the */
-/*     correct limit. */
-	if (sawnan) {
-	    neg1 = 0;
-	    t = bsav;
-/* Computing MIN */
-	    i__3 = bj + 127, i__4 = *r__ - 1;
-	    i__2 = imin(i__3,i__4);
-	    for (j = bj; j <= i__2; ++j) {
-		dplus = d__[j] + t;
-		if (dplus < 0.) {
-		    ++neg1;
-		}
-		tmp = t / dplus;
-		if (odnan(&tmp)) {
-		    tmp = 1.;
-		}
-		t = tmp * lld[j] - *sigma;
-/* L22: */
-	    }
-	}
-	negcnt += neg1;
-/* L210: */
-    }
-
-/*     II) lower part: L D L^T - SIGMA I = U- D- U-^T */
-    p = d__[*n] - *sigma;
-    i__1 = *r__;
-    for (bj = *n - 1; bj >= i__1; bj += -128) {
-	neg2 = 0;
-	bsav = p;
-/* Computing MAX */
-	i__3 = bj - 127;
-	i__2 = imax(i__3,*r__);
-	for (j = bj; j >= i__2; --j) {
-	    dminus = lld[j] + p;
-	    if (dminus < 0.) {
-		++neg2;
-	    }
-	    tmp = p / dminus;
-	    p = tmp * d__[j] - *sigma;
-/* L23: */
-	}
-	sawnan = odnan(&p);
-/*     As above, run a slower version that substitutes 1 for Inf/Inf. */
-
-	if (sawnan) {
-	    neg2 = 0;
-	    p = bsav;
-/* Computing MAX */
-	    i__3 = bj - 127;
-	    i__2 = imax(i__3,*r__);
-	    for (j = bj; j >= i__2; --j) {
-		dminus = lld[j] + p;
-		if (dminus < 0.) {
-		    ++neg2;
-		}
-		tmp = p / dminus;
-		if (odnan(&tmp)) {
-		    tmp = 1.;
-		}
-		p = tmp * d__[j] - *sigma;
-/* L24: */
-	    }
-	}
-	negcnt += neg2;
-/* L230: */
-    }
-
-/*     III) Twist index */
-/*       T was shifted by SIGMA initially. */
-    gamma = t + *sigma + p;
-    if (gamma < 0.) {
-	++negcnt;
-    }
-    ret_val = negcnt;
-    return ret_val;
-} /* odneg_ */
diff --git a/external/pmrrr/src/lapack/odnst.c b/external/pmrrr/src/lapack/odnst.c
deleted file mode 100644
index f12ab24f46..0000000000
--- a/external/pmrrr/src/lapack/odnst.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/* dlanst.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Table of constant values */
-static int c__1 = 1;
-
-double odnst(char *norm, int *n, double *d__, double *e)
-{
-    /* System generated locals */
-    int i__1;
-    double ret_val, d__1, d__2, d__3, d__4, d__5;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    int i__;
-    double sum, scale;
-    extern int olsame(char *, char *);
-    double anorm;
-    extern /* Subroutine */ int odssq(int *, double *, int *, 
-	    double *, double *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODNST  returns the value of the one norm,  or the Frobenius norm, or */
-/*  the  infinity norm,  or the  element of  largest absolute value  of a */
-/*  real symmetric tridiagonal matrix A. */
-
-/*  Description */
-/*  =========== */
-
-/*  ODNST returns the value */
-
-/*     ODNST = ( max(abs(A(i,j))), NORM = 'M' or 'm' */
-/*              ( */
-/*              ( norm1(A),         NORM = '1', 'O' or 'o' */
-/*              ( */
-/*              ( normI(A),         NORM = 'I' or 'i' */
-/*              ( */
-/*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e' */
-
-/*  where  norm1  denotes the  one norm of a matrix (maximum column sum), */
-/*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and */
-/*  normF  denotes the  Frobenius norm of a matrix (square root of sum of */
-/*  squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  NORM    (input) CHARACTER*1 */
-/*          Specifies the value to be returned in ODNST as described */
-/*          above. */
-
-/*  N       (input) INT */
-/*          The order of the matrix A.  N >= 0.  When N = 0, ODNST is */
-/*          set to zero. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The diagonal elements of A. */
-
-/*  E       (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (n-1) sub-diagonal or super-diagonal elements of A. */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --e;
-    --d__;
-
-    /* Function Body */
-    if (*n <= 0) {
-	anorm = 0.;
-    } else if (olsame(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	anorm = (d__1 = d__[*n], fabs(d__1));
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__2 = anorm, d__3 = (d__1 = d__[i__], fabs(d__1));
-	    anorm = fmax(d__2,d__3);
-/* Computing MAX */
-	    d__2 = anorm, d__3 = (d__1 = e[i__], fabs(d__1));
-	    anorm = fmax(d__2,d__3);
-/* L10: */
-	}
-    } else if (olsame(norm, "O") || *(unsigned char *)
-	    norm == '1' || olsame(norm, "I")) {
-
-/*        Find norm1(A). */
-
-	if (*n == 1) {
-	    anorm = fabs(d__[1]);
-	} else {
-/* Computing MAX */
-	    d__3 = fabs(d__[1]) + fabs(e[1]), d__4 = (d__1 = e[*n - 1], fabs(
-		    d__1)) + (d__2 = d__[*n], fabs(d__2));
-	    anorm = fmax(d__3,d__4);
-	    i__1 = *n - 1;
-	    for (i__ = 2; i__ <= i__1; ++i__) {
-/* Computing MAX */
-		d__4 = anorm, d__5 = (d__1 = d__[i__], fabs(d__1)) + (d__2 = e[
-			i__], fabs(d__2)) + (d__3 = e[i__ - 1], fabs(d__3));
-		anorm = fmax(d__4,d__5);
-/* L20: */
-	    }
-	}
-    } else if (olsame(norm, "F") || olsame(norm, "E")) {
-
-/*        Find normF(A). */
-
-	scale = 0.;
-	sum = 1.;
-	if (*n > 1) {
-	    i__1 = *n - 1;
-	    odssq(&i__1, &e[1], &c__1, &scale, &sum);
-	    sum *= 2;
-	}
-	odssq(n, &d__[1], &c__1, &scale, &sum);
-	anorm = scale * sqrt(sum);
-    }
-
-    ret_val = anorm;
-    return ret_val;
-
-/*     End of ODNST */
-
-} /* odnst_ */
diff --git a/external/pmrrr/src/lapack/odr1v.c b/external/pmrrr/src/lapack/odr1v.c
deleted file mode 100644
index 1c30a80835..0000000000
--- a/external/pmrrr/src/lapack/odr1v.c
+++ /dev/null
@@ -1,439 +0,0 @@
-/* odr1v.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define TRUE_ (1)
-#define FALSE_ (0)
-
-/* Subroutine */ 
-int odr1v(int *n, int *b1, int *bn, double 
-	*lambda, double *d__, double *l, double *ld, double *
-	lld, double *pivmin, double *gaptol, double *z__, int 
-	*wantnc, int *negcnt, double *ztz, double *mingma, 
-	int *r__, int *isuppz, double *nrminv, double *resid, 
-	double *rqcorr, double *work)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2, d__3;
-
-    /* Builtin functions */
-    //    double sqrt(double);
-
-    /* Local variables */
-    int i__;
-    double s;
-    int r1, r2;
-    double eps, tmp;
-    int neg1, neg2, indp, inds;
-    double dplus;
-    // extern double odmch(char *);
-    extern int odnan(double *);
-    int indlpl, indumn;
-    double dminus;
-    int sawnan1, sawnan2;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODR1V computes the (scaled) r-th column of the inverse of */
-/*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
-/*  L D L^T - sigma I. When sigma is close to an eigenvalue, the */
-/*  computed vector is an accurate eigenvector. Usually, r corresponds */
-/*  to the index where the eigenvector is largest in magnitude. */
-/*  The following steps accomplish this computation : */
-/*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T, */
-/*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
-/*  (c) Computation of the diagonal elements of the inverse of */
-/*      L D L^T - sigma I by combining the above transforms, and choosing */
-/*      r as the index where the diagonal of the inverse is (one of the) */
-/*      largest in magnitude. */
-/*  (d) Computation of the (scaled) r-th column of the inverse using the */
-/*      twisted factorization obtained by combining the top part of the */
-/*      the stationary and the bottom part of the progressive transform. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N        (input) INT */
-/*           The order of the matrix L D L^T. */
-
-/*  B1       (input) INT */
-/*           First index of the submatrix of L D L^T. */
-
-/*  BN       (input) INT */
-/*           Last index of the submatrix of L D L^T. */
-
-/*  LAMBDA    (input) DOUBLE PRECISION */
-/*           The shift. In order to compute an accurate eigenvector, */
-/*           LAMBDA should be a good approximation to an eigenvalue */
-/*           of L D L^T. */
-
-/*  L        (input) DOUBLE PRECISION array, dimension (N-1) */
-/*           The (n-1) subdiagonal elements of the unit bidiagonal matrix */
-/*           L, in elements 1 to N-1. */
-
-/*  D        (input) DOUBLE PRECISION array, dimension (N) */
-/*           The n diagonal elements of the diagonal matrix D. */
-
-/*  LD       (input) DOUBLE PRECISION array, dimension (N-1) */
-/*           The n-1 elements L(i)*D(i). */
-
-/*  LLD      (input) DOUBLE PRECISION array, dimension (N-1) */
-/*           The n-1 elements L(i)*L(i)*D(i). */
-
-/*  PIVMIN   (input) DOUBLE PRECISION */
-/*           The minimum pivot in the Sturm sequence. */
-
-/*  GAPTOL   (input) DOUBLE PRECISION */
-/*           Tolerance that indicates when eigenvector entries are negligible */
-/*           w.r.t. their contribution to the residual. */
-
-/*  Z        (input/output) DOUBLE PRECISION array, dimension (N) */
-/*           On input, all entries of Z must be set to 0. */
-/*           On output, Z contains the (scaled) r-th column of the */
-/*           inverse. The scaling is such that Z(R) equals 1. */
-
-/*  WANTNC   (input) INT */
-/*           Specifies whether NEGCNT has to be computed. */
-
-/*  NEGCNT   (output) INT */
-/*           If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
-/*           in the  matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
-
-/*  ZTZ      (output) DOUBLE PRECISION */
-/*           The square of the 2-norm of Z. */
-
-/*  MINGMA   (output) DOUBLE PRECISION */
-/*           The reciprocal of the largest (in magnitude) diagonal */
-/*           element of the inverse of L D L^T - sigma I. */
-
-/*  R        (input/output) INT */
-/*           The twist index for the twisted factorization used to */
-/*           compute Z. */
-/*           On input, 0 <= R <= N. If R is input as 0, R is set to */
-/*           the index where (L D L^T - sigma I)^{-1} is largest */
-/*           in magnitude. If 1 <= R <= N, R is unchanged. */
-/*           On output, R contains the twist index used to compute Z. */
-/*           Ideally, R designates the position of the maximum entry in the */
-/*           eigenvector. */
-
-/*  ISUPPZ   (output) INT array, dimension (2) */
-/*           The support of the vector in Z, i.e., the vector Z is */
-/*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
-
-/*  NRMINV   (output) DOUBLE PRECISION */
-/*           NRMINV = 1/SQRT( ZTZ ) */
-
-/*  RESID    (output) DOUBLE PRECISION */
-/*           The residual of the FP vector. */
-/*           RESID = ABS( MINGMA )/SQRT( ZTZ ) */
-
-/*  RQCORR   (output) DOUBLE PRECISION */
-/*           The Rayleigh Quotient correction to LAMBDA. */
-/*           RQCORR = MINGMA*TMP */
-
-/*  WORK     (workspace) DOUBLE PRECISION array, dimension (4*N) */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --work;
-    --isuppz;
-    --z__;
-    --lld;
-    --ld;
-    --l;
-    --d__;
-
-    /* Function Body */
-    eps = DBL_EPSILON; // eps = odmch("Precision");
-    if (*r__ == 0) {
-	r1 = *b1;
-	r2 = *bn;
-    } else {
-	r1 = *r__;
-	r2 = *r__;
-    }
-/*     Storage for LPLUS */
-    indlpl = 0;
-/*     Storage for UMINUS */
-    indumn = *n;
-    inds = (*n << 1) + 1;
-    indp = *n * 3 + 1;
-    if (*b1 == 1) {
-	work[inds] = 0.;
-    } else {
-	work[inds + *b1 - 1] = lld[*b1 - 1];
-    }
-
-/*     Compute the stationary transform (using the differential form) */
-/*     until the index R2. */
-
-    sawnan1 = FALSE_;
-    neg1 = 0;
-    s = work[inds + *b1 - 1] - *lambda;
-    i__1 = r1 - 1;
-    for (i__ = *b1; i__ <= i__1; ++i__) {
-	dplus = d__[i__] + s;
-	work[indlpl + i__] = ld[i__] / dplus;
-	if (dplus < 0.) {
-	    ++neg1;
-	}
-	work[inds + i__] = s * work[indlpl + i__] * l[i__];
-	s = work[inds + i__] - *lambda;
-/* L50: */
-    }
-    sawnan1 = odnan(&s);
-    if (sawnan1) {
-	goto L60;
-    }
-    i__1 = r2 - 1;
-    for (i__ = r1; i__ <= i__1; ++i__) {
-	dplus = d__[i__] + s;
-	work[indlpl + i__] = ld[i__] / dplus;
-	work[inds + i__] = s * work[indlpl + i__] * l[i__];
-	s = work[inds + i__] - *lambda;
-/* L51: */
-    }
-    sawnan1 = odnan(&s);
-
-L60:
-    if (sawnan1) {
-/*        Runs a slower version of the above loop if a NaN is detected */
-	neg1 = 0;
-	s = work[inds + *b1 - 1] - *lambda;
-	i__1 = r1 - 1;
-	for (i__ = *b1; i__ <= i__1; ++i__) {
-	    dplus = d__[i__] + s;
-	    if (fabs(dplus) < *pivmin) {
-		dplus = -(*pivmin);
-	    }
-	    work[indlpl + i__] = ld[i__] / dplus;
-	    if (dplus < 0.) {
-		++neg1;
-	    }
-	    work[inds + i__] = s * work[indlpl + i__] * l[i__];
-	    if (work[indlpl + i__] == 0.) {
-		work[inds + i__] = lld[i__];
-	    }
-	    s = work[inds + i__] - *lambda;
-/* L70: */
-	}
-	i__1 = r2 - 1;
-	for (i__ = r1; i__ <= i__1; ++i__) {
-	    dplus = d__[i__] + s;
-	    if (fabs(dplus) < *pivmin) {
-		dplus = -(*pivmin);
-	    }
-	    work[indlpl + i__] = ld[i__] / dplus;
-	    work[inds + i__] = s * work[indlpl + i__] * l[i__];
-	    if (work[indlpl + i__] == 0.) {
-		work[inds + i__] = lld[i__];
-	    }
-	    s = work[inds + i__] - *lambda;
-/* L71: */
-	}
-    }
-
-/*     Compute the progressive transform (using the differential form) */
-/*     until the index R1 */
-
-    sawnan2 = FALSE_;
-    neg2 = 0;
-    work[indp + *bn - 1] = d__[*bn] - *lambda;
-    i__1 = r1;
-    for (i__ = *bn - 1; i__ >= i__1; --i__) {
-	dminus = lld[i__] + work[indp + i__];
-	tmp = d__[i__] / dminus;
-	if (dminus < 0.) {
-	    ++neg2;
-	}
-	work[indumn + i__] = l[i__] * tmp;
-	work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
-/* L80: */
-    }
-    tmp = work[indp + r1 - 1];
-    sawnan2 = odnan(&tmp);
-    if (sawnan2) {
-/*        Runs a slower version of the above loop if a NaN is detected */
-	neg2 = 0;
-	i__1 = r1;
-	for (i__ = *bn - 1; i__ >= i__1; --i__) {
-	    dminus = lld[i__] + work[indp + i__];
-	    if (fabs(dminus) < *pivmin) {
-		dminus = -(*pivmin);
-	    }
-	    tmp = d__[i__] / dminus;
-	    if (dminus < 0.) {
-		++neg2;
-	    }
-	    work[indumn + i__] = l[i__] * tmp;
-	    work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
-	    if (tmp == 0.) {
-		work[indp + i__ - 1] = d__[i__] - *lambda;
-	    }
-/* L100: */
-	}
-    }
-
-/*     Find the index (from R1 to R2) of the largest (in magnitude) */
-/*     diagonal element of the inverse */
-
-    *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
-    if (*mingma < 0.) {
-	++neg1;
-    }
-    if (*wantnc) {
-	*negcnt = neg1 + neg2;
-    } else {
-	*negcnt = -1;
-    }
-    if (fabs(*mingma) == 0.) {
-	*mingma = eps * work[inds + r1 - 1];
-    }
-    *r__ = r1;
-    i__1 = r2 - 1;
-    for (i__ = r1; i__ <= i__1; ++i__) {
-	tmp = work[inds + i__] + work[indp + i__];
-	if (tmp == 0.) {
-	    tmp = eps * work[inds + i__];
-	}
-	if (fabs(tmp) <= fabs(*mingma)) {
-	    *mingma = tmp;
-	    *r__ = i__ + 1;
-	}
-/* L110: */
-    }
-
-/*     Compute the FP vector: solve N^T v = e_r */
-
-    isuppz[1] = *b1;
-    isuppz[2] = *bn;
-    z__[*r__] = 1.;
-    *ztz = 1.;
-
-/*     Compute the FP vector upwards from R */
-
-    if (! sawnan1 && ! sawnan2) {
-	i__1 = *b1;
-	for (i__ = *r__ - 1; i__ >= i__1; --i__) {
-	    z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
-	    if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
-		    d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
-		z__[i__] = 0.;
-		isuppz[1] = i__ + 1;
-		goto L220;
-	    }
-	    *ztz += z__[i__] * z__[i__];
-/* L210: */
-	}
-L220:
-	;
-    } else {
-/*        Run slower loop if NaN occurred. */
-	i__1 = *b1;
-	for (i__ = *r__ - 1; i__ >= i__1; --i__) {
-	    if (z__[i__ + 1] == 0.) {
-		z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
-	    } else {
-		z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
-	    }
-	    if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
-		    d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
-		z__[i__] = 0.;
-		isuppz[1] = i__ + 1;
-		goto L240;
-	    }
-	    *ztz += z__[i__] * z__[i__];
-/* L230: */
-	}
-L240:
-	;
-    }
-/*     Compute the FP vector downwards from R in blocks of size BLKSIZ */
-    if (! sawnan1 && ! sawnan2) {
-	i__1 = *bn - 1;
-	for (i__ = *r__; i__ <= i__1; ++i__) {
-	    z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
-	    if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
-		    d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
-		z__[i__ + 1] = 0.;
-		isuppz[2] = i__;
-		goto L260;
-	    }
-	    *ztz += z__[i__ + 1] * z__[i__ + 1];
-/* L250: */
-	}
-L260:
-	;
-    } else {
-/*        Run slower loop if NaN occurred. */
-	i__1 = *bn - 1;
-	for (i__ = *r__; i__ <= i__1; ++i__) {
-	    if (z__[i__] == 0.) {
-		z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
-	    } else {
-		z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
-	    }
-	    if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs(
-		    d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) {
-		z__[i__ + 1] = 0.;
-		isuppz[2] = i__;
-		goto L280;
-	    }
-	    *ztz += z__[i__ + 1] * z__[i__ + 1];
-/* L270: */
-	}
-L280:
-	;
-    }
-
-/*     Compute quantities for convergence test */
-
-    tmp = 1. / *ztz;
-    *nrminv = sqrt(tmp);
-    *resid = fabs(*mingma) * *nrminv;
-    *rqcorr = *mingma * tmp;
-
-
-    return 0;
-
-/*     End of ODR1V */
-
-} /* odr1v_ */
diff --git a/external/pmrrr/src/lapack/odrnv.c b/external/pmrrr/src/lapack/odrnv.c
deleted file mode 100644
index f0330d1f78..0000000000
--- a/external/pmrrr/src/lapack/odrnv.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/* dlarnv.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define imin(a,b) ( (a) < (b) ? (a) : (b) )
-
-/* Subroutine */ 
-int odrnv(int *idist, int *iseed, int *n, double *x)
-{
-    /* System generated locals */
-    int i__1, i__2, i__3;
-
-    /* Builtin functions */
-    // double log(double), sqrt(double), cos(double);
-
-    /* Local variables */
-    int i__;
-    double u[128];
-    int il, iv, il2;
-    extern int odruv(int *, int *, double *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODRNV returns a vector of n random real numbers from a uniform or */
-/*  normal distribution. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  IDIST   (input) INT */
-/*          Specifies the distribution of the random numbers: */
-/*          = 1:  uniform (0,1) */
-/*          = 2:  uniform (-1,1) */
-/*          = 3:  normal (0,1) */
-
-/*  ISEED   (input/output) INT array, dimension (4) */
-/*          On entry, the seed of the random number generator; the array */
-/*          elements must be between 0 and 4095, and ISEED(4) must be */
-/*          odd. */
-/*          On exit, the seed is updated. */
-
-/*  N       (input) INT */
-/*          The number of random numbers to be generated. */
-
-/*  X       (output) DOUBLE PRECISION array, dimension (N) */
-/*          The generated random numbers. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  This routine calls the auxiliary routine ODRUV to generate random */
-/*  real numbers from a uniform (0,1) distribution, in batches of up to */
-/*  128 using vectorisable code. The Box-Muller method is used to */
-/*  transform numbers from a uniform to a normal distribution. */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Local Arrays .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --x;
-    --iseed;
-
-    /* Function Body */
-    i__1 = *n;
-    for (iv = 1; iv <= i__1; iv += 64) {
-/* Computing MIN */
-	i__2 = 64, i__3 = *n - iv + 1;
-	il = imin(i__2,i__3);
-	if (*idist == 3) {
-	    il2 = il << 1;
-	} else {
-	    il2 = il;
-	}
-
-/*        Call ODRUV to generate IL2 numbers from a uniform (0,1) */
-/*        distribution (IL2 <= LV) */
-
-	odruv(&iseed[1], &il2, u);
-
-	if (*idist == 1) {
-
-/*           Copy generated numbers */
-
-	    i__2 = il;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		x[iv + i__ - 1] = u[i__ - 1];
-/* L10: */
-	    }
-	} else if (*idist == 2) {
-
-/*           Convert generated numbers to uniform (-1,1) distribution */
-
-	    i__2 = il;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		x[iv + i__ - 1] = u[i__ - 1] * 2. - 1.;
-/* L20: */
-	    }
-	} else if (*idist == 3) {
-
-/*           Convert generated numbers to normal (0,1) distribution */
-
-	    i__2 = il;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		x[iv + i__ - 1] = sqrt(log(u[(i__ << 1) - 2]) * -2.) * cos(u[(
-			i__ << 1) - 1] * 6.2831853071795864769252867663);
-/* L30: */
-	    }
-	}
-/* L40: */
-    }
-    return 0;
-
-/*     End of ODRNV */
-
-} /* odrnv_ */
diff --git a/external/pmrrr/src/lapack/odrra.c b/external/pmrrr/src/lapack/odrra.c
deleted file mode 100644
index f0d802d1a7..0000000000
--- a/external/pmrrr/src/lapack/odrra.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/* dlarra.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odrra(int *n, double *d__, double *e, 
-	double *e2, double *spltol, double *tnrm, int *nsplit, 
-	 int *isplit, int *info)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    int i__;
-    double tmp1, eabs;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  Compute the splitting points with threshold SPLTOL. */
-/*  ODRRA sets any "small" off-diagonal elements to zero. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the matrix. N > 0. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the N diagonal elements of the tridiagonal */
-/*          matrix T. */
-
-/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the first (N-1) entries contain the subdiagonal */
-/*          elements of the tridiagonal matrix T; E(N) need not be set. */
-/*          On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, */
-/*          are set to zero, the other entries of E are untouched. */
-
-/*  E2      (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the first (N-1) entries contain the SQUARES of the */
-/*          subdiagonal elements of the tridiagonal matrix T; */
-/*          E2(N) need not be set. */
-/*          On exit, the entries E2( ISPLIT( I ) ), */
-/*          1 <= I <= NSPLIT, have been set to zero */
-
-/*  SPLTOL (input) DOUBLE PRECISION */
-/*          The threshold for splitting. Two criteria can be used: */
-/*          SPLTOL<0 : criterion based on absolute off-diagonal value */
-/*          SPLTOL>0 : criterion that preserves relative accuracy */
-
-/*  TNRM (input) DOUBLE PRECISION */
-/*          The norm of the matrix. */
-
-/*  NSPLIT  (output) INT */
-/*          The number of blocks T splits into. 1 <= NSPLIT <= N. */
-
-/*  ISPLIT  (output) INT array, dimension (N) */
-/*          The splitting points, at which T breaks up into blocks. */
-/*          The first block consists of rows/columns 1 to ISPLIT(1), */
-/*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
-/*          etc., and the NSPLIT-th consists of rows/columns */
-/*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
-
-
-/*  INFO    (output) INT */
-/*          = 0:  successful exit */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --isplit;
-    --e2;
-    --e;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-/*     Compute splitting points */
-    *nsplit = 1;
-    if (*spltol < 0.) {
-/*        Criterion based on absolute off-diagonal value */
-	tmp1 = fabs(*spltol) * *tnrm;
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    eabs = (d__1 = e[i__], fabs(d__1));
-	    if (eabs <= tmp1) {
-		e[i__] = 0.;
-		e2[i__] = 0.;
-		isplit[*nsplit] = i__;
-		++(*nsplit);
-	    }
-/* L9: */
-	}
-    } else {
-/*        Criterion that guarantees relative accuracy */
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    eabs = (d__1 = e[i__], fabs(d__1));
-	    if (eabs <= *spltol * sqrt((d__1 = d__[i__], fabs(d__1))) * sqrt((
-		    d__2 = d__[i__ + 1], fabs(d__2)))) {
-		e[i__] = 0.;
-		e2[i__] = 0.;
-		isplit[*nsplit] = i__;
-		++(*nsplit);
-	    }
-/* L10: */
-	}
-    }
-    isplit[*nsplit] = *n;
-    return 0;
-
-/*     End of ODRRA */
-
-} /* odrra_ */
diff --git a/external/pmrrr/src/lapack/odrrb.c b/external/pmrrr/src/lapack/odrrb.c
deleted file mode 100644
index 4a1a8600f6..0000000000
--- a/external/pmrrr/src/lapack/odrrb.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/* dlarrb.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odrrb(int *n, double *d__, double *lld, 
-	int *ifirst, int *ilast, double *rtol1, double *rtol2, 
-	 int *offset, double *w, double *wgap, double *werr, 
-	double *work, int *iwork, double *pivmin, double *
-	spdiam, int *twist, int *info)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    //    double log(double);
-
-    /* Local variables */
-    int i__, k, r__, i1, ii, ip;
-    double gap, mid, tmp, back, lgap, rgap, left;
-    int iter, nint, prev, next;
-    double cvrgd, right, width;
-    extern int odneg(int *, double *, double *, double *
-, double *, int *);
-    int negcnt;
-    double mnwdth;
-    int olnint, maxitr;
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  Given the relatively robust representation(RRR) L D L^T, ODRRB */
-/*  does "limited" bisection to refine the eigenvalues of L D L^T, */
-/*  W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
-/*  guesses for these eigenvalues are input in W, the corresponding estimate */
-/*  of the error in these guesses and their gaps are input in WERR */
-/*  and WGAP, respectively. During bisection, intervals */
-/*  [left, right] are maintained by storing their mid-points and */
-/*  semi-widths in the arrays W and WERR respectively. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the matrix. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The N diagonal elements of the diagonal matrix D. */
-
-/*  LLD     (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (N-1) elements L(i)*L(i)*D(i). */
-
-/*  IFIRST  (input) INT */
-/*          The index of the first eigenvalue to be computed. */
-
-/*  ILAST   (input) INT */
-/*          The index of the last eigenvalue to be computed. */
-
-/*  RTOL1   (input) DOUBLE PRECISION */
-/*  RTOL2   (input) DOUBLE PRECISION */
-/*          Tolerance for the convergence of the bisection intervals. */
-/*          An interval [LEFT,RIGHT] has converged if */
-/*          RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
-/*          where GAP is the (estimated) distance to the nearest */
-/*          eigenvalue. */
-
-/*  OFFSET  (input) INT */
-/*          Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */
-/*          through ILAST-OFFSET elements of these arrays are to be used. */
-
-/*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
-/*          estimates of the eigenvalues of L D L^T indexed IFIRST throug */
-/*          ILAST. */
-/*          On output, these estimates are refined. */
-
-/*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N-1) */
-/*          On input, the (estimated) gaps between consecutive */
-/*          eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */
-/*          eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */
-/*          then WGAP(IFIRST-OFFSET) must be set to ZERO. */
-/*          On output, these gaps are refined. */
-
-/*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
-/*          the errors in the estimates of the corresponding elements in W. */
-/*          On output, these errors are refined. */
-
-/*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
-/*          Workspace. */
-
-/*  IWORK   (workspace) INT array, dimension (2*N) */
-/*          Workspace. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum pivot in the Sturm sequence. */
-
-/*  SPDIAM  (input) DOUBLE PRECISION */
-/*          The spectral diameter of the matrix. */
-
-/*  TWIST   (input) INT */
-/*          The twist index for the twisted factorization that is used */
-/*          for the negcount. */
-/*          TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */
-/*          TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */
-/*          TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */
-
-/*  INFO    (output) INT */
-/*          Error flag. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --iwork;
-    --work;
-    --werr;
-    --wgap;
-    --w;
-    --lld;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-
-    maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + 
-	    2;
-    mnwdth = *pivmin * 2.;
-
-    r__ = *twist;
-    if (r__ < 1 || r__ > *n) {
-	r__ = *n;
-    }
-
-/*     Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
-/*     The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
-/*     Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */
-/*     for an unconverged interval is set to the index of the next unconverged */
-/*     interval, and is -1 or 0 for a converged interval. Thus a linked */
-/*     list of unconverged intervals is set up. */
-
-    i1 = *ifirst;
-/*     The number of unconverged intervals */
-    nint = 0;
-/*     The last unconverged interval found */
-    prev = 0;
-    rgap = wgap[i1 - *offset];
-    i__1 = *ilast;
-    for (i__ = i1; i__ <= i__1; ++i__) {
-	k = i__ << 1;
-	ii = i__ - *offset;
-	left = w[ii] - werr[ii];
-	right = w[ii] + werr[ii];
-	lgap = rgap;
-	rgap = wgap[ii];
-	gap = fmin(lgap,rgap);
-/*        Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
-/*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */
-
-/*        Do while( NEGCNT(LEFT).GT.I-1 ) */
-
-	back = werr[ii];
-L20:
-	negcnt = odneg(n, &d__[1], &lld[1], &left, pivmin, &r__);
-	if (negcnt > i__ - 1) {
-	    left -= back;
-	    back *= 2.;
-	    goto L20;
-	}
-
-/*        Do while( NEGCNT(RIGHT).LT.I ) */
-/*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */
-
-	back = werr[ii];
-L50:
-	negcnt = odneg(n, &d__[1], &lld[1], &right, pivmin, &r__);
-	if (negcnt < i__) {
-	    right += back;
-	    back *= 2.;
-	    goto L50;
-	}
-	width = (d__1 = left - right, fabs(d__1)) * .5;
-/* Computing MAX */
-	d__1 = fabs(left), d__2 = fabs(right);
-	tmp = fmax(d__1,d__2);
-/* Computing MAX */
-	d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
-	cvrgd = fmax(d__1,d__2);
-	if (width <= cvrgd || width <= mnwdth) {
-/*           This interval has already converged and does not need refinement. */
-/*           (Note that the gaps might change through refining the */
-/*            eigenvalues, however, they can only get bigger.) */
-/*           Remove it from the list. */
-	    iwork[k - 1] = -1;
-/*           Make sure that I1 always points to the first unconverged interval */
-	    if (i__ == i1 && i__ < *ilast) {
-		i1 = i__ + 1;
-	    }
-	    if (prev >= i1 && i__ <= *ilast) {
-		iwork[(prev << 1) - 1] = i__ + 1;
-	    }
-	} else {
-/*           unconverged interval found */
-	    prev = i__;
-	    ++nint;
-	    iwork[k - 1] = i__ + 1;
-	    iwork[k] = negcnt;
-	}
-	work[k - 1] = left;
-	work[k] = right;
-/* L75: */
-    }
-
-/*     Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
-/*     and while (ITER.LT.MAXITR) */
-
-    iter = 0;
-L80:
-    prev = i1 - 1;
-    i__ = i1;
-    olnint = nint;
-    i__1 = olnint;
-    for (ip = 1; ip <= i__1; ++ip) {
-	k = i__ << 1;
-	ii = i__ - *offset;
-	rgap = wgap[ii];
-	lgap = rgap;
-	if (ii > 1) {
-	    lgap = wgap[ii - 1];
-	}
-	gap = fmin(lgap,rgap);
-	next = iwork[k - 1];
-	left = work[k - 1];
-	right = work[k];
-	mid = (left + right) * .5;
-/*        semiwidth of interval */
-	width = right - mid;
-/* Computing MAX */
-	d__1 = fabs(left), d__2 = fabs(right);
-	tmp = fmax(d__1,d__2);
-/* Computing MAX */
-	d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
-	cvrgd = fmax(d__1,d__2);
-	if (width <= cvrgd || width <= mnwdth || iter == maxitr) {
-/*           reduce number of unconverged intervals */
-	    --nint;
-/*           Mark interval as converged. */
-	    iwork[k - 1] = 0;
-	    if (i1 == i__) {
-		i1 = next;
-	    } else {
-/*              Prev holds the last unconverged interval previously examined */
-		if (prev >= i1) {
-		    iwork[(prev << 1) - 1] = next;
-		}
-	    }
-	    i__ = next;
-	    goto L100;
-	}
-	prev = i__;
-
-/*        Perform one bisection step */
-
-	negcnt = odneg(n, &d__[1], &lld[1], &mid, pivmin, &r__);
-	if (negcnt <= i__ - 1) {
-	    work[k - 1] = mid;
-	} else {
-	    work[k] = mid;
-	}
-	i__ = next;
-L100:
-	;
-    }
-    ++iter;
-/*     do another loop if there are still unconverged intervals */
-/*     However, in the last iteration, all intervals are accepted */
-/*     since this is the best we can do. */
-    if (nint > 0 && iter <= maxitr) {
-	goto L80;
-    }
-
-
-/*     At this point, all the intervals have converged */
-    i__1 = *ilast;
-    for (i__ = *ifirst; i__ <= i__1; ++i__) {
-	k = i__ << 1;
-	ii = i__ - *offset;
-/*        All intervals marked by '0' have been refined. */
-	if (iwork[k - 1] == 0) {
-	    w[ii] = (work[k - 1] + work[k]) * .5;
-	    werr[ii] = work[k] - w[ii];
-	}
-/* L110: */
-    }
-
-    i__1 = *ilast;
-    for (i__ = *ifirst + 1; i__ <= i__1; ++i__) {
-	k = i__ << 1;
-	ii = i__ - *offset;
-/* Computing MAX */
-	d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1];
-	wgap[ii - 1] = fmax(d__1,d__2);
-/* L111: */
-    }
-    return 0;
-
-/*     End of ODRRB */
-
-} /* odrrb_ */
diff --git a/external/pmrrr/src/lapack/odrrc.c b/external/pmrrr/src/lapack/odrrc.c
deleted file mode 100644
index eab853b372..0000000000
--- a/external/pmrrr/src/lapack/odrrc.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/* dlarrc.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-
-/* Subroutine */ 
-int odrrc(char *jobt, int *n, double *vl, 
-	double *vu, double *d__, double *e, double *pivmin, 
-	int *eigcnt, int *lcnt, int *rcnt, int *info)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1;
-
-    /* Local variables */
-    int i__;
-    double sl, su, tmp, tmp2;
-    int matt;
-    extern int olsame(char *, char *);
-    double lpivot, rpivot;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  Find the number of eigenvalues of the symmetric tridiagonal matrix T */
-/*  that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T */
-/*  if JOBT = 'L'. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  JOBT    (input) CHARACTER*1 */
-/*          = 'T':  Compute Sturm count for matrix T. */
-/*          = 'L':  Compute Sturm count for matrix L D L^T. */
-
-/*  N       (input) INT */
-/*          The order of the matrix. N > 0. */
-
-/*  VL      (input) DOUBLE PRECISION */
-/*  VU      (input) DOUBLE PRECISION */
-/*          The lower and upper bounds for the eigenvalues. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. */
-/*          JOBT = 'L': The N diagonal elements of the diagonal matrix D. */
-
-/*  E       (input) DOUBLE PRECISION array, dimension (N) */
-/*          JOBT = 'T': The N-1 offdiagonal elements of the matrix T. */
-/*          JOBT = 'L': The N-1 offdiagonal elements of the matrix L. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum pivot in the Sturm sequence for T. */
-
-/*  EIGCNT  (output) INT */
-/*          The number of eigenvalues of the symmetric tridiagonal matrix T */
-/*          that are in the interval (VL,VU] */
-
-/*  LCNT    (output) INT */
-/*  RCNT    (output) INT */
-/*          The left and right negcounts of the interval. */
-
-/*  INFO    (output) INT */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --e;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-    *lcnt = 0;
-    *rcnt = 0;
-    *eigcnt = 0;
-    matt = olsame(jobt, "T");
-    if (matt) {
-/*        Sturm sequence count on T */
-	lpivot = d__[1] - *vl;
-	rpivot = d__[1] - *vu;
-	if (lpivot <= 0.) {
-	    ++(*lcnt);
-	}
-	if (rpivot <= 0.) {
-	    ++(*rcnt);
-	}
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing 2nd power */
-	    d__1 = e[i__];
-	    tmp = d__1 * d__1;
-	    lpivot = d__[i__ + 1] - *vl - tmp / lpivot;
-	    rpivot = d__[i__ + 1] - *vu - tmp / rpivot;
-	    if (lpivot <= 0.) {
-		++(*lcnt);
-	    }
-	    if (rpivot <= 0.) {
-		++(*rcnt);
-	    }
-/* L10: */
-	}
-    } else {
-/*        Sturm sequence count on L D L^T */
-	sl = -(*vl);
-	su = -(*vu);
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    lpivot = d__[i__] + sl;
-	    rpivot = d__[i__] + su;
-	    if (lpivot <= 0.) {
-		++(*lcnt);
-	    }
-	    if (rpivot <= 0.) {
-		++(*rcnt);
-	    }
-	    tmp = e[i__] * d__[i__] * e[i__];
-
-	    tmp2 = tmp / lpivot;
-	    if (tmp2 == 0.) {
-		sl = tmp - *vl;
-	    } else {
-		sl = sl * tmp2 - *vl;
-	    }
-
-	    tmp2 = tmp / rpivot;
-	    if (tmp2 == 0.) {
-		su = tmp - *vu;
-	    } else {
-		su = su * tmp2 - *vu;
-	    }
-/* L20: */
-	}
-	lpivot = d__[*n] + sl;
-	rpivot = d__[*n] + su;
-	if (lpivot <= 0.) {
-	    ++(*lcnt);
-	}
-	if (rpivot <= 0.) {
-	    ++(*rcnt);
-	}
-    }
-    *eigcnt = *rcnt - *lcnt;
-    return 0;
-
-/*     end of ODRRC */
-
-} /* odrrc_ */
diff --git a/external/pmrrr/src/lapack/odrrd.c b/external/pmrrr/src/lapack/odrrd.c
deleted file mode 100644
index 32bef21c97..0000000000
--- a/external/pmrrr/src/lapack/odrrd.c
+++ /dev/null
@@ -1,793 +0,0 @@
-/* dlarrd.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define imax(a,b) ( (a) > (b) ? (a) : (b) )
-#define imin(a,b) ( (a) < (b) ? (a) : (b) )
-#define TRUE_ (1)
-#define FALSE_ (0)
-
-/* Table of constant values */
-static int c__1 = 1;
-/*
-static int c_n1 = -1;
-*/
-static int c__3 = 3;
-static int c__2 = 2;
-static int c__0 = 0;
-
-/* Subroutine */ 
-int odrrd(char *range, char *order, int *n, double *vl,  
-	double *vu, int *il, int *iu, double *gers, 
-	double *reltol, double *d__, double *e, double *e2, 
-	double *pivmin, int *nsplit, int *isplit, int *m, 
-	double *w, double *werr, double *wl, double *wu, 
-	int *iblock, int *indexw, double *work, int *iwork, 
-	int *info)
-{
-    /* System generated locals */
-    int i__1, i__2, i__3;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double log(double);
-
-    /* Local variables */
-    int i__, j, ib, ie, je, nb;
-    double gl;
-    int im, in;
-    double gu;
-    int iw, jee;
-    double eps;
-    int nwl;
-    double wlu, wul;
-    int nwu;
-    double tmp1, tmp2;
-    int iend, jblk, ioff, iout, itmp1, itmp2, jdisc;
-    extern int olsame(char *, char *);
-    int iinfo;
-    double atoli;
-    int iwoff, itmax;
-    double wkill, rtoli, uflow, tnorm;
-    // extern double dlamch(char *);
-    int ibegin;
-    extern /* Subroutine */ int odebz(int *, int *, int *, 
-	    int *, int *, int *, double *, double *, 
-	    double *, double *, double *, double *, int *, 
-	     double *, double *, int *, int *, double *, 
-	    int *, int *);
-    int irange, idiscl, idumma[1];
-    /* extern int ilaenv(int *, char *, char *, int *, int *,  */
-    /* 	    int *, int *); */
-    int idiscu;
-    int ncnvrg, toofew;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2.1)                        -- */
-/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
-/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/*  -- April 2009                                                      -- */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODRRD computes the eigenvalues of a symmetric tridiagonal */
-/*  matrix T to suitable accuracy. This is an auxiliary code to be */
-/*  called from DSTEMR. */
-/*  The user may ask for all eigenvalues, all eigenvalues */
-/*  in the half-open interval (VL, VU], or the IL-th through IU-th */
-/*  eigenvalues. */
-
-/*  To avoid overflow, the matrix must be scaled so that its */
-/*  largest element is no greater than overflow**(1/2) * */
-/*  underflow**(1/4) in absolute value, and for greatest */
-/*  accuracy, it should not be much smaller than that. */
-
-/*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
-/*  Matrix", Report CS41, Computer Science Dept., Stanford */
-/*  University, July 21, 1966. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  RANGE   (input) CHARACTER */
-/*          = 'A': ("All")   all eigenvalues will be found. */
-/*          = 'V': ("Value") all eigenvalues in the half-open interval */
-/*                           (VL, VU] will be found. */
-/*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
-/*                           entire matrix) will be found. */
-
-/*  ORDER   (input) CHARACTER */
-/*          = 'B': ("By Block") the eigenvalues will be grouped by */
-/*                              split-off block (see IBLOCK, ISPLIT) and */
-/*                              ordered from smallest to largest within */
-/*                              the block. */
-/*          = 'E': ("Entire matrix") */
-/*                              the eigenvalues for the entire matrix */
-/*                              will be ordered from smallest to */
-/*                              largest. */
-
-/*  N       (input) INT */
-/*          The order of the tridiagonal matrix T.  N >= 0. */
-
-/*  VL      (input) DOUBLE PRECISION */
-/*  VU      (input) DOUBLE PRECISION */
-/*          If RANGE='V', the lower and upper bounds of the interval to */
-/*          be searched for eigenvalues.  Eigenvalues less than or equal */
-/*          to VL, or greater than VU, will not be returned.  VL < VU. */
-/*          Not referenced if RANGE = 'A' or 'I'. */
-
-/*  IL      (input) INT */
-/*  IU      (input) INT */
-/*          If RANGE='I', the indices (in ascending order) of the */
-/*          smallest and largest eigenvalues to be returned. */
-/*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
-/*          Not referenced if RANGE = 'A' or 'V'. */
-
-/*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) */
-/*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
-/*          is (GERS(2*i-1), GERS(2*i)). */
-
-/*  RELTOL  (input) DOUBLE PRECISION */
-/*          The minimum relative width of an interval.  When an interval */
-/*          is narrower than RELTOL times the larger (in */
-/*          magnitude) endpoint, then it is considered to be */
-/*          sufficiently small, i.e., converged.  Note: this should */
-/*          always be at least radix*machine epsilon. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The n diagonal elements of the tridiagonal matrix T. */
-
-/*  E       (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (n-1) off-diagonal elements of the tridiagonal matrix T. */
-
-/*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum pivot allowed in the Sturm sequence for T. */
-
-/*  NSPLIT  (input) INT */
-/*          The number of diagonal blocks in the matrix T. */
-/*          1 <= NSPLIT <= N. */
-
-/*  ISPLIT  (input) INT array, dimension (N) */
-/*          The splitting points, at which T breaks up into submatrices. */
-/*          The first submatrix consists of rows/columns 1 to ISPLIT(1), */
-/*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
-/*          etc., and the NSPLIT-th consists of rows/columns */
-/*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
-/*          (Only the first NSPLIT elements will actually be used, but */
-/*          since the user cannot know a priori what value NSPLIT will */
-/*          have, N words must be reserved for ISPLIT.) */
-
-/*  M       (output) INT */
-/*          The actual number of eigenvalues found. 0 <= M <= N. */
-/*          (See also the description of INFO=2,3.) */
-
-/*  W       (output) DOUBLE PRECISION array, dimension (N) */
-/*          On exit, the first M elements of W will contain the */
-/*          eigenvalue approximations. ODRRD computes an interval */
-/*          I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue */
-/*          approximation is given as the interval midpoint */
-/*          W(j)= ( a_j + b_j)/2. The corresponding error is bounded by */
-/*          WERR(j) = abs( a_j - b_j)/2 */
-
-/*  WERR    (output) DOUBLE PRECISION array, dimension (N) */
-/*          The error bound on the corresponding eigenvalue approximation */
-/*          in W. */
-
-/*  WL      (output) DOUBLE PRECISION */
-/*  WU      (output) DOUBLE PRECISION */
-/*          The interval (WL, WU] contains all the wanted eigenvalues. */
-/*          If RANGE='V', then WL=VL and WU=VU. */
-/*          If RANGE='A', then WL and WU are the global Gerschgorin bounds */
-/*                        on the spectrum. */
-/*          If RANGE='I', then WL and WU are computed by ODEBZ from the */
-/*                        index range specified. */
-
-/*  IBLOCK  (output) INT array, dimension (N) */
-/*          At each row/column j where E(j) is zero or small, the */
-/*          matrix T is considered to split into a block diagonal */
-/*          matrix.  On exit, if INFO = 0, IBLOCK(i) specifies to which */
-/*          block (from 1 to the number of blocks) the eigenvalue W(i) */
-/*          belongs.  (ODRRD may use the remaining N-M elements as */
-/*          workspace.) */
-
-/*  INDEXW  (output) INT array, dimension (N) */
-/*          The indices of the eigenvalues within each block (submatrix); */
-/*          for example, INDEXW(i)= j and IBLOCK(i)=k imply that the */
-/*          i-th eigenvalue W(i) is the j-th eigenvalue in block k. */
-
-/*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N) */
-
-/*  IWORK   (workspace) INT array, dimension (3*N) */
-
-/*  INFO    (output) INT */
-/*          = 0:  successful exit */
-/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
-/*          > 0:  some or all of the eigenvalues failed to converge or */
-/*                were not computed: */
-/*                =1 or 3: Bisection failed to converge for some */
-/*                        eigenvalues; these eigenvalues are flagged by a */
-/*                        negative block number.  The effect is that the */
-/*                        eigenvalues may not be as accurate as the */
-/*                        absolute and relative tolerances.  This is */
-/*                        generally caused by unexpectedly inaccurate */
-/*                        arithmetic. */
-/*                =2 or 3: RANGE='I' only: Not all of the eigenvalues */
-/*                        IL:IU were found. */
-/*                        Effect: M < IU+1-IL */
-/*                        Cause:  non-monotonic arithmetic, causing the */
-/*                                Sturm sequence to be non-monotonic. */
-/*                        Cure:   recalculate, using RANGE='A', and pick */
-/*                                out eigenvalues IL:IU.  In some cases, */
-/*                                increasing the PARAMETER "FUDGE" may */
-/*                                make things work. */
-/*                = 4:    RANGE='I', and the Gershgorin interval */
-/*                        initially used was too small.  No eigenvalues */
-/*                        were computed. */
-/*                        Probable cause: your machine has sloppy */
-/*                                        floating-point arithmetic. */
-/*                        Cure: Increase the PARAMETER "FUDGE", */
-/*                              recompile, and try again. */
-
-/*  Internal Parameters */
-/*  =================== */
-
-/*  FUDGE   DOUBLE PRECISION, default = 2 */
-/*          A "fudge factor" to widen the Gershgorin intervals.  Ideally, */
-/*          a value of 1 should work, but on machines with sloppy */
-/*          arithmetic, this needs to be larger.  The default for */
-/*          publicly released versions should be large enough to handle */
-/*          the worst machine around.  Note that this has no effect */
-/*          on accuracy of the solution. */
-
-/*  Based on contributions by */
-/*     W. Kahan, University of California, Berkeley, USA */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Local Arrays .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --iwork;
-    --work;
-    --indexw;
-    --iblock;
-    --werr;
-    --w;
-    --isplit;
-    --e2;
-    --e;
-    --d__;
-    --gers;
-
-    /* Function Body */
-    *info = 0;
-
-/*     Decode RANGE */
-
-    if (olsame(range, "A")) {
-	irange = 1;
-    } else if (olsame(range, "V")) {
-	irange = 2;
-    } else if (olsame(range, "I")) {
-	irange = 3;
-    } else {
-	irange = 0;
-    }
-
-/*     Check for Errors */
-
-    if (irange <= 0) {
-	*info = -1;
-    } else if (! (olsame(order, "B") || olsame(order, "E"))) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (irange == 2) {
-	if (*vl >= *vu) {
-	    *info = -5;
-	}
-    } else if (irange == 3 && (*il < 1 || *il > imax(1,*n))) {
-	*info = -6;
-    } else if (irange == 3 && (*iu < imin(*n,*il) || *iu > *n)) {
-	*info = -7;
-    }
-
-    if (*info != 0) {
-	return 0;
-    }
-/*     Initialize error flags */
-    *info = 0;
-    ncnvrg = FALSE_;
-    toofew = FALSE_;
-/*     Quick return if possible */
-    *m = 0;
-    if (*n == 0) {
-	return 0;
-    }
-/*     Simplification: */
-    if (irange == 3 && *il == 1 && *iu == *n) {
-	irange = 1;
-    }
-/*     Get machine constants */
-    eps = DBL_EPSILON; // odmch("P");
-    uflow = DBL_MIN;  // odmch("U");
-/*     Special Case when N=1 */
-/*     Treat case of 1x1 matrix for quick return */
-    if (*n == 1) {
-	if (irange == 1 || irange == 2 && d__[1] > *vl && d__[1] <= *vu || 
-		irange == 3 && *il == 1 && *iu == 1) {
-	    *m = 1;
-	    w[1] = d__[1];
-/*           The computation error of the eigenvalue is zero */
-	    werr[1] = 0.;
-	    iblock[1] = 1;
-	    indexw[1] = 1;
-	}
-	return 0;
-    }
-/*     NB is the minimum vector length for vector bisection, or 0 */
-/*     if only scalar is to be done. */
-    nb = 1; // ilaenv(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1);
-    if (nb <= 1) {
-	nb = 0;
-    }
-/*     Find global spectral radius */
-    gl = d__[1];
-    gu = d__[1];
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MIN */
-	d__1 = gl, d__2 = gers[(i__ << 1) - 1];
-	gl = fmin(d__1,d__2);
-/* Computing MAX */
-	d__1 = gu, d__2 = gers[i__ * 2];
-	gu = fmax(d__1,d__2);
-/* L5: */
-    }
-/*     Compute global Gerschgorin bounds and spectral diameter */
-/* Computing MAX */
-    d__1 = fabs(gl), d__2 = fabs(gu);
-    tnorm = fmax(d__1,d__2);
-    gl = gl - tnorm * 2. * eps * *n - *pivmin * 4.;
-    gu = gu + tnorm * 2. * eps * *n + *pivmin * 4.;
-/*     [JAN/28/2009] remove the line below since SPDIAM variable not use */
-/*     SPDIAM = GU - GL */
-/*     Input arguments for ODEBZ: */
-/*     The relative tolerance.  An interval (a,b] lies within */
-/*     "relative tolerance" if  b-a < RELTOL*max(|a|,|b|), */
-    rtoli = *reltol;
-/*     Set the absolute tolerance for interval convergence to zero to force */
-/*     interval convergence based on relative size of the interval. */
-/*     This is dangerous because intervals might not converge when RELTOL is */
-/*     small. But at least a very small number should be selected so that for */
-/*     strongly graded matrices, the code can get relatively accurate */
-/*     eigenvalues. */
-    atoli = uflow * 4. + *pivmin * 4.;
-    if (irange == 3) {
-/*        RANGE='I': Compute an interval containing eigenvalues */
-/*        IL through IU. The initial interval [GL,GU] from the global */
-/*        Gerschgorin bounds GL and GU is refined by ODEBZ. */
-	itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 
-		2;
-	work[*n + 1] = gl;
-	work[*n + 2] = gl;
-	work[*n + 3] = gu;
-	work[*n + 4] = gu;
-	work[*n + 5] = gl;
-	work[*n + 6] = gu;
-	iwork[1] = -1;
-	iwork[2] = -1;
-	iwork[3] = *n + 1;
-	iwork[4] = *n + 1;
-	iwork[5] = *il - 1;
-	iwork[6] = *iu;
-
-	odebz(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, pivmin, &
-		d__[1], &e[1], &e2[1], &iwork[5], &work[*n + 1], &work[*n + 5]
-, &iout, &iwork[1], &w[1], &iblock[1], &iinfo);
-	if (iinfo != 0) {
-	    *info = iinfo;
-	    return 0;
-	}
-/*        On exit, output intervals may not be ordered by ascending negcount */
-	if (iwork[6] == *iu) {
-	    *wl = work[*n + 1];
-	    wlu = work[*n + 3];
-	    nwl = iwork[1];
-	    *wu = work[*n + 4];
-	    wul = work[*n + 2];
-	    nwu = iwork[4];
-	} else {
-	    *wl = work[*n + 2];
-	    wlu = work[*n + 4];
-	    nwl = iwork[2];
-	    *wu = work[*n + 3];
-	    wul = work[*n + 1];
-	    nwu = iwork[3];
-	}
-/*        On exit, the interval [WL, WLU] contains a value with negcount NWL, */
-/*        and [WUL, WU] contains a value with negcount NWU. */
-	if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) {
-	    *info = 4;
-	    return 0;
-	}
-    } else if (irange == 2) {
-	*wl = *vl;
-	*wu = *vu;
-    } else if (irange == 1) {
-	*wl = gl;
-	*wu = gu;
-    }
-/*     Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. */
-/*     NWL accumulates the number of eigenvalues .le. WL, */
-/*     NWU accumulates the number of eigenvalues .le. WU */
-    *m = 0;
-    iend = 0;
-    *info = 0;
-    nwl = 0;
-    nwu = 0;
-
-    i__1 = *nsplit;
-    for (jblk = 1; jblk <= i__1; ++jblk) {
-	ioff = iend;
-	ibegin = ioff + 1;
-	iend = isplit[jblk];
-	in = iend - ioff;
-
-	if (in == 1) {
-/*           1x1 block */
-	    if (*wl >= d__[ibegin] - *pivmin) {
-		++nwl;
-	    }
-	    if (*wu >= d__[ibegin] - *pivmin) {
-		++nwu;
-	    }
-	    if (irange == 1 || *wl < d__[ibegin] - *pivmin && *wu >= d__[
-		    ibegin] - *pivmin) {
-		++(*m);
-		w[*m] = d__[ibegin];
-		werr[*m] = 0.;
-/*              The gap for a single block doesn't matter for the later */
-/*              algorithm and is assigned an arbitrary large value */
-		iblock[*m] = jblk;
-		indexw[*m] = 1;
-	    }
-/*        Disabled 2x2 case because of a failure on the following matrix */
-/*        RANGE = 'I', IL = IU = 4 */
-/*          Original Tridiagonal, d = [ */
-/*           -0.150102010615740E+00 */
-/*           -0.849897989384260E+00 */
-/*           -0.128208148052635E-15 */
-/*            0.128257718286320E-15 */
-/*          ]; */
-/*          e = [ */
-/*           -0.357171383266986E+00 */
-/*           -0.180411241501588E-15 */
-/*           -0.175152352710251E-15 */
-/*          ]; */
-
-/*         ELSE IF( IN.EQ.2 ) THEN */
-/* *           2x2 block */
-/*            DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) */
-/*            TMP1 = HALF*(D(IBEGIN)+D(IEND)) */
-/*            L1 = TMP1 - DISC */
-/*            IF( WL.GE. L1-PIVMIN ) */
-/*     $         NWL = NWL + 1 */
-/*            IF( WU.GE. L1-PIVMIN ) */
-/*     $         NWU = NWU + 1 */
-/*            IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. */
-/*     $          L1-PIVMIN ) ) THEN */
-/*               M = M + 1 */
-/*               W( M ) = L1 */
-/* *              The uncertainty of eigenvalues of a 2x2 matrix is very small */
-/*               WERR( M ) = EPS * ABS( W( M ) ) * TWO */
-/*               IBLOCK( M ) = JBLK */
-/*               INDEXW( M ) = 1 */
-/*            ENDIF */
-/*            L2 = TMP1 + DISC */
-/*            IF( WL.GE. L2-PIVMIN ) */
-/*     $         NWL = NWL + 1 */
-/*            IF( WU.GE. L2-PIVMIN ) */
-/*     $         NWU = NWU + 1 */
-/*            IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. */
-/*     $          L2-PIVMIN ) ) THEN */
-/*               M = M + 1 */
-/*               W( M ) = L2 */
-/* *              The uncertainty of eigenvalues of a 2x2 matrix is very small */
-/*               WERR( M ) = EPS * ABS( W( M ) ) * TWO */
-/*               IBLOCK( M ) = JBLK */
-/*               INDEXW( M ) = 2 */
-/*            ENDIF */
-	} else {
-/*           General Case - block of size IN >= 2 */
-/*           Compute local Gerschgorin interval and use it as the initial */
-/*           interval for ODEBZ */
-	    gu = d__[ibegin];
-	    gl = d__[ibegin];
-	    tmp1 = 0.;
-	    i__2 = iend;
-	    for (j = ibegin; j <= i__2; ++j) {
-/* Computing MIN */
-		d__1 = gl, d__2 = gers[(j << 1) - 1];
-		gl = fmin(d__1,d__2);
-/* Computing MAX */
-		d__1 = gu, d__2 = gers[j * 2];
-		gu = fmax(d__1,d__2);
-/* L40: */
-	    }
-/*           [JAN/28/2009] */
-/*           change SPDIAM by TNORM in lines 2 and 3 thereafter */
-/*           line 1: remove computation of SPDIAM (not useful anymore) */
-/*           SPDIAM = GU - GL */
-/*           GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN */
-/*           GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN */
-	    gl = gl - tnorm * 2. * eps * in - *pivmin * 2.;
-	    gu = gu + tnorm * 2. * eps * in + *pivmin * 2.;
-
-	    if (irange > 1) {
-		if (gu < *wl) {
-/*                 the local block contains none of the wanted eigenvalues */
-		    nwl += in;
-		    nwu += in;
-		    goto L70;
-		}
-/*              refine search interval if possible, only range (WL,WU] matters */
-		gl = fmax(gl,*wl);
-		gu = fmin(gu,*wu);
-		if (gl >= gu) {
-		    goto L70;
-		}
-	    }
-/*           Find negcount of initial interval boundaries GL and GU */
-	    work[*n + 1] = gl;
-	    work[*n + in + 1] = gu;
-	    odebz(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, 
-		    pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, &
-		    work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], &
-		    w[*m + 1], &iblock[*m + 1], &iinfo);
-	    if (iinfo != 0) {
-		*info = iinfo;
-		return 0;
-	    }
-
-	    nwl += iwork[1];
-	    nwu += iwork[in + 1];
-	    iwoff = *m - iwork[1];
-/*           Compute Eigenvalues */
-	    itmax = (int) ((log(gu - gl + *pivmin) - log(*pivmin)) / log(
-		    2.)) + 2;
-	    odebz(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, 
-		    pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, &
-		    work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], 
-		     &w[*m + 1], &iblock[*m + 1], &iinfo);
-	    if (iinfo != 0) {
-		*info = iinfo;
-		return 0;
-	    }
-
-/*           Copy eigenvalues into W and IBLOCK */
-/*           Use -JBLK for block number for unconverged eigenvalues. */
-/*           Loop over the number of output intervals from ODEBZ */
-	    i__2 = iout;
-	    for (j = 1; j <= i__2; ++j) {
-/*              eigenvalue approximation is middle point of interval */
-		tmp1 = (work[j + *n] + work[j + in + *n]) * .5;
-/*              semi length of error interval */
-		tmp2 = (d__1 = work[j + *n] - work[j + in + *n], fabs(d__1)) * 
-			.5;
-		if (j > iout - iinfo) {
-/*                 Flag non-convergence. */
-		    ncnvrg = TRUE_;
-		    ib = -jblk;
-		} else {
-		    ib = jblk;
-		}
-		i__3 = iwork[j + in] + iwoff;
-		for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) {
-		    w[je] = tmp1;
-		    werr[je] = tmp2;
-		    indexw[je] = je - iwoff;
-		    iblock[je] = ib;
-/* L50: */
-		}
-/* L60: */
-	    }
-
-	    *m += im;
-	}
-L70:
-	;
-    }
-/*     If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */
-/*     If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */
-    if (irange == 3) {
-	idiscl = *il - 1 - nwl;
-	idiscu = nwu - *iu;
-
-	if (idiscl > 0) {
-	    im = 0;
-	    i__1 = *m;
-	    for (je = 1; je <= i__1; ++je) {
-/*              Remove some of the smallest eigenvalues from the left so that */
-/*              at the end IDISCL =0. Move all eigenvalues up to the left. */
-		if (w[je] <= wlu && idiscl > 0) {
-		    --idiscl;
-		} else {
-		    ++im;
-		    w[im] = w[je];
-		    werr[im] = werr[je];
-		    indexw[im] = indexw[je];
-		    iblock[im] = iblock[je];
-		}
-/* L80: */
-	    }
-	    *m = im;
-	}
-	if (idiscu > 0) {
-/*           Remove some of the largest eigenvalues from the right so that */
-/*           at the end IDISCU =0. Move all eigenvalues up to the left. */
-	    im = *m + 1;
-	    for (je = *m; je >= 1; --je) {
-		if (w[je] >= wul && idiscu > 0) {
-		    --idiscu;
-		} else {
-		    --im;
-		    w[im] = w[je];
-		    werr[im] = werr[je];
-		    indexw[im] = indexw[je];
-		    iblock[im] = iblock[je];
-		}
-/* L81: */
-	    }
-	    jee = 0;
-	    i__1 = *m;
-	    for (je = im; je <= i__1; ++je) {
-		++jee;
-		w[jee] = w[je];
-		werr[jee] = werr[je];
-		indexw[jee] = indexw[je];
-		iblock[jee] = iblock[je];
-/* L82: */
-	    }
-	    *m = *m - im + 1;
-	}
-	if (idiscl > 0 || idiscu > 0) {
-/*           Code to deal with effects of bad arithmetic. (If N(w) is */
-/*           monotone non-decreasing, this should never happen.) */
-/*           Some low eigenvalues to be discarded are not in (WL,WLU], */
-/*           or high eigenvalues to be discarded are not in (WUL,WU] */
-/*           so just kill off the smallest IDISCL/largest IDISCU */
-/*           eigenvalues, by marking the corresponding IBLOCK = 0 */
-	    if (idiscl > 0) {
-		wkill = *wu;
-		i__1 = idiscl;
-		for (jdisc = 1; jdisc <= i__1; ++jdisc) {
-		    iw = 0;
-		    i__2 = *m;
-		    for (je = 1; je <= i__2; ++je) {
-			if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) {
-			    iw = je;
-			    wkill = w[je];
-			}
-/* L90: */
-		    }
-		    iblock[iw] = 0;
-/* L100: */
-		}
-	    }
-	    if (idiscu > 0) {
-		wkill = *wl;
-		i__1 = idiscu;
-		for (jdisc = 1; jdisc <= i__1; ++jdisc) {
-		    iw = 0;
-		    i__2 = *m;
-		    for (je = 1; je <= i__2; ++je) {
-			if (iblock[je] != 0 && (w[je] >= wkill || iw == 0)) {
-			    iw = je;
-			    wkill = w[je];
-			}
-/* L110: */
-		    }
-		    iblock[iw] = 0;
-/* L120: */
-		}
-	    }
-/*           Now erase all eigenvalues with IBLOCK set to zero */
-	    im = 0;
-	    i__1 = *m;
-	    for (je = 1; je <= i__1; ++je) {
-		if (iblock[je] != 0) {
-		    ++im;
-		    w[im] = w[je];
-		    werr[im] = werr[je];
-		    indexw[im] = indexw[je];
-		    iblock[im] = iblock[je];
-		}
-/* L130: */
-	    }
-	    *m = im;
-	}
-	if (idiscl < 0 || idiscu < 0) {
-	    toofew = TRUE_;
-	}
-    }
-
-    if (irange == 1 && *m != *n || irange == 3 && *m != *iu - *il + 1) {
-	toofew = TRUE_;
-    }
-/*     If ORDER='B', do nothing the eigenvalues are already sorted by */
-/*        block. */
-/*     If ORDER='E', sort the eigenvalues from smallest to largest */
-    if (olsame(order, "E") && *nsplit > 1) {
-	i__1 = *m - 1;
-	for (je = 1; je <= i__1; ++je) {
-	    ie = 0;
-	    tmp1 = w[je];
-	    i__2 = *m;
-	    for (j = je + 1; j <= i__2; ++j) {
-		if (w[j] < tmp1) {
-		    ie = j;
-		    tmp1 = w[j];
-		}
-/* L140: */
-	    }
-	    if (ie != 0) {
-		tmp2 = werr[ie];
-		itmp1 = iblock[ie];
-		itmp2 = indexw[ie];
-		w[ie] = w[je];
-		werr[ie] = werr[je];
-		iblock[ie] = iblock[je];
-		indexw[ie] = indexw[je];
-		w[je] = tmp1;
-		werr[je] = tmp2;
-		iblock[je] = itmp1;
-		indexw[je] = itmp2;
-	    }
-/* L150: */
-	}
-    }
-
-    *info = 0;
-    if (ncnvrg) {
-	++(*info);
-    }
-    if (toofew) {
-	*info += 2;
-    }
-    return 0;
-
-/*     End of ODRRD */
-
-} /* odrrd_ */
diff --git a/external/pmrrr/src/lapack/odrre.c b/external/pmrrr/src/lapack/odrre.c
deleted file mode 100644
index 05e74171bf..0000000000
--- a/external/pmrrr/src/lapack/odrre.c
+++ /dev/null
@@ -1,858 +0,0 @@
-/* dlarre.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Table of constant values */
-static int c__1 = 1;
-static int c__2 = 2;
-#define TRUE_ (1)
-#define FALSE_ (0)
-
-/* Subroutine */ 
-int odrre(char *range, int *n, double *vl, 
-	double *vu, int *il, int *iu, double *d__, double *e, 
-	double *e2, double *rtol1, double *rtol2, double *spltol, 
-	int *nsplit, int *isplit, int *m, double *w, 
-	double *werr, double *wgap, int *iblock, int *indexw, 
-	double *gers, double *pivmin, double *work, int *
-	iwork, int *info)
-{
-    /* System generated locals */
-    int i__1, i__2;
-    double d__1, d__2, d__3;
-
-    /* Builtin functions */
-    //    double sqrt(double), log(double);
-
-    /* Local variables */
-    int i__, j;
-    double s1, s2;
-    int mb;
-    double gl;
-    int in, mm;
-    double gu;
-    int cnt;
-    double eps, tau, tmp, rtl;
-    int cnt1, cnt2;
-    double tmp1, eabs;
-    int iend, jblk;
-    double eold;
-    int indl;
-    double dmax__, emax;
-    int wend, idum, indu;
-    double rtol;
-    int iseed[4];
-    double avgap, sigma;
-    extern int olsame(char *, char *);
-    int iinfo;
-    extern /* Subroutine */ int odcpy(int *, double *, int *, 
-	    double *, int *);
-    long double norep;
-    extern /* Subroutine */ int odsq2(int *, double *, int *);
-    // extern double odmch(char *);
-    int ibegin;
-    long double forceb;
-    int irange;
-    double sgndef;
-    extern /* Subroutine */ int odrra(int *, double *, double *, 
-	     double *, double *, double *, int *, int *, 
-	    int *), odrrb(int *, double *, double *, 
-	    int *, int *, double *, double *, int *, 
-	    double *, double *, double *, double *, int *, 
-	     double *, double *, int *, int *), odrrc(char *
-, int *, double *, double *, double *, double 
-	    *, double *, int *, int *, int *, int *);
-    int wbegin;
-    extern /* Subroutine */ int odrrd(char *, char *, int *, double 
-	    *, double *, int *, int *, double *, double *, 
-	     double *, double *, double *, double *, int *
-, int *, int *, double *, double *, double *, 
-	    double *, int *, int *, double *, int *, 
-	    int *);
-    double safmin, spdiam;
-    extern /* Subroutine */ int odrrk(int *, int *, double *, 
-	    double *, double *, double *, double *, 
-	    double *, double *, double *, int *);
-    long double usedqd;
-    double clwdth, isleft;
-    extern /* Subroutine */ int odrnv(int *, int *, int *, 
-	    double *);
-    double isrght, bsrtol, dpivot;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  To find the desired eigenvalues of a given real symmetric */
-/*  tridiagonal matrix T, ODRRE sets any "small" off-diagonal */
-/*  elements to zero, and for each unreduced block T_i, it finds */
-/*  (a) a suitable shift at one end of the block's spectrum, */
-/*  (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */
-/*  (c) eigenvalues of each L_i D_i L_i^T. */
-/*  The representations and eigenvalues found are then used by */
-/*  DSTEMR to compute the eigenvectors of T. */
-/*  The accuracy varies depending on whether bisection is used to */
-/*  find a few eigenvalues or the dqds algorithm (subroutine ODSQ2) to */
-/*  conpute all and then discard any unwanted one. */
-/*  As an added benefit, ODRRE also outputs the n */
-/*  Gerschgorin intervals for the matrices L_i D_i L_i^T. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  RANGE   (input) CHARACTER */
-/*          = 'A': ("All")   all eigenvalues will be found. */
-/*          = 'V': ("Value") all eigenvalues in the half-open interval */
-/*                           (VL, VU] will be found. */
-/*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
-/*                           entire matrix) will be found. */
-
-/*  N       (input) INT */
-/*          The order of the matrix. N > 0. */
-
-/*  VL      (input/output) DOUBLE PRECISION */
-/*  VU      (input/output) DOUBLE PRECISION */
-/*          If RANGE='V', the lower and upper bounds for the eigenvalues. */
-/*          Eigenvalues less than or equal to VL, or greater than VU, */
-/*          will not be returned.  VL < VU. */
-/*          If RANGE='I' or ='A', ODRRE computes bounds on the desired */
-/*          part of the spectrum. */
-
-/*  IL      (input) INT */
-/*  IU      (input) INT */
-/*          If RANGE='I', the indices (in ascending order) of the */
-/*          smallest and largest eigenvalues to be returned. */
-/*          1 <= IL <= IU <= N. */
-
-/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the N diagonal elements of the tridiagonal */
-/*          matrix T. */
-/*          On exit, the N diagonal elements of the diagonal */
-/*          matrices D_i. */
-
-/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the first (N-1) entries contain the subdiagonal */
-/*          elements of the tridiagonal matrix T; E(N) need not be set. */
-/*          On exit, E contains the subdiagonal elements of the unit */
-/*          bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */
-/*          1 <= I <= NSPLIT, contain the base points sigma_i on output. */
-
-/*  E2      (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the first (N-1) entries contain the SQUARES of the */
-/*          subdiagonal elements of the tridiagonal matrix T; */
-/*          E2(N) need not be set. */
-/*          On exit, the entries E2( ISPLIT( I ) ), */
-/*          1 <= I <= NSPLIT, have been set to zero */
-
-/*  RTOL1   (input) DOUBLE PRECISION */
-/*  RTOL2   (input) DOUBLE PRECISION */
-/*           Parameters for bisection. */
-/*           An interval [LEFT,RIGHT] has converged if */
-/*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
-
-/*  SPLTOL (input) DOUBLE PRECISION */
-/*          The threshold for splitting. */
-
-/*  NSPLIT  (output) INT */
-/*          The number of blocks T splits into. 1 <= NSPLIT <= N. */
-
-/*  ISPLIT  (output) INT array, dimension (N) */
-/*          The splitting points, at which T breaks up into blocks. */
-/*          The first block consists of rows/columns 1 to ISPLIT(1), */
-/*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
-/*          etc., and the NSPLIT-th consists of rows/columns */
-/*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
-
-/*  M       (output) INT */
-/*          The total number of eigenvalues (of all L_i D_i L_i^T) */
-/*          found. */
-
-/*  W       (output) DOUBLE PRECISION array, dimension (N) */
-/*          The first M elements contain the eigenvalues. The */
-/*          eigenvalues of each of the blocks, L_i D_i L_i^T, are */
-/*          sorted in ascending order ( ODRRE may use the */
-/*          remaining N-M elements as workspace). */
-
-/*  WERR    (output) DOUBLE PRECISION array, dimension (N) */
-/*          The error bound on the corresponding eigenvalue in W. */
-
-/*  WGAP    (output) DOUBLE PRECISION array, dimension (N) */
-/*          The separation from the right neighbor eigenvalue in W. */
-/*          The gap is only with respect to the eigenvalues of the same block */
-/*          as each block has its own representation tree. */
-/*          Exception: at the right end of a block we store the left gap */
-
-/*  IBLOCK  (output) INT array, dimension (N) */
-/*          The indices of the blocks (submatrices) associated with the */
-/*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
-/*          W(i) belongs to the first block from the top, =2 if W(i) */
-/*          belongs to the second block, etc. */
-
-/*  INDEXW  (output) INT array, dimension (N) */
-/*          The indices of the eigenvalues within each block (submatrix); */
-/*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
-/*          i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */
-
-/*  GERS    (output) DOUBLE PRECISION array, dimension (2*N) */
-/*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
-/*          is (GERS(2*i-1), GERS(2*i)). */
-
-/*  PIVMIN  (output) DOUBLE PRECISION */
-/*          The minimum pivot in the Sturm sequence for T. */
-
-/*  WORK    (workspace) DOUBLE PRECISION array, dimension (6*N) */
-/*          Workspace. */
-
-/*  IWORK   (workspace) INT array, dimension (5*N) */
-/*          Workspace. */
-
-/*  INFO    (output) INT */
-/*          = 0:  successful exit */
-/*          > 0:  A problem occured in ODRRE. */
-/*          < 0:  One of the called subroutines signaled an internal problem. */
-/*                Needs inspection of the corresponding parameter IINFO */
-/*                for further information. */
-
-/*          =-1:  Problem in ODRRD. */
-/*          = 2:  No base representation could be found in MAXTRY iterations. */
-/*                Increasing MAXTRY and recompilation might be a remedy. */
-/*          =-3:  Problem in ODRRB when computing the refined root */
-/*                representation for ODSQ2. */
-/*          =-4:  Problem in ODRRB when preforming bisection on the */
-/*                desired part of the spectrum. */
-/*          =-5:  Problem in ODSQ2. */
-/*          =-6:  Problem in ODSQ2. */
-
-/*  Further Details */
-/*  The base representations are required to suffer very little */
-/*  element growth and consequently define all their eigenvalues to */
-/*  high relative accuracy. */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Local Arrays .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --iwork;
-    --work;
-    --gers;
-    --indexw;
-    --iblock;
-    --wgap;
-    --werr;
-    --w;
-    --isplit;
-    --e2;
-    --e;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-
-/*     Decode RANGE */
-
-    if (olsame(range, "A")) {
-	irange = 1;
-    } else if (olsame(range, "V")) {
-	irange = 3;
-    } else if (olsame(range, "I")) {
-	irange = 2;
-    }
-    *m = 0;
-/*     Get machine constants */
-    safmin = DBL_MIN; // odmch("S");
-    eps = DBL_EPSILON; // odmch("P");
-/*     Set parameters */
-    rtl = sqrt(eps);
-    bsrtol = sqrt(eps);
-/*     Treat case of 1x1 matrix for quick return */
-    if (*n == 1) {
-	if (irange == 1 || irange == 3 && d__[1] > *vl && d__[1] <= *vu || 
-		irange == 2 && *il == 1 && *iu == 1) {
-	    *m = 1;
-	    w[1] = d__[1];
-/*           The computation error of the eigenvalue is zero */
-	    werr[1] = 0.;
-	    wgap[1] = 0.;
-	    iblock[1] = 1;
-	    indexw[1] = 1;
-	    gers[1] = d__[1];
-	    gers[2] = d__[1];
-	}
-/*        store the shift for the initial RRR, which is zero in this case */
-	e[1] = 0.;
-	return 0;
-    }
-/*     General case: tridiagonal matrix of order > 1 */
-
-/*     Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */
-/*     Compute maximum off-diagonal entry and pivmin. */
-    gl = d__[1];
-    gu = d__[1];
-    eold = 0.;
-    emax = 0.;
-    e[*n] = 0.;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	werr[i__] = 0.;
-	wgap[i__] = 0.;
-	eabs = (d__1 = e[i__], fabs(d__1));
-	if (eabs >= emax) {
-	    emax = eabs;
-	}
-	tmp1 = eabs + eold;
-	gers[(i__ << 1) - 1] = d__[i__] - tmp1;
-/* Computing MIN */
-	d__1 = gl, d__2 = gers[(i__ << 1) - 1];
-	gl = fmin(d__1,d__2);
-	gers[i__ * 2] = d__[i__] + tmp1;
-/* Computing MAX */
-	d__1 = gu, d__2 = gers[i__ * 2];
-	gu = fmax(d__1,d__2);
-	eold = eabs;
-/* L5: */
-    }
-/*     The minimum pivot allowed in the Sturm sequence for T */
-/* Computing MAX */
-/* Computing 2nd power */
-    d__3 = emax;
-    d__1 = 1., d__2 = d__3 * d__3;
-    *pivmin = safmin * fmax(d__1,d__2);
-/*     Compute spectral diameter. The Gerschgorin bounds give an */
-/*     estimate that is wrong by at most a factor of SQRT(2) */
-    spdiam = gu - gl;
-/*     Compute splitting points */
-    odrra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], &
-	    iinfo);
-/*     Can force use of bisection instead of faster DQDS. */
-/*     Option left in the code for future multisection work. */
-    forceb = FALSE_;
-/*     Initialize USEDQD, DQDS should be used for ALLRNG unless someone */
-/*     explicitly wants bisection. */
-    usedqd = irange == 1 && ! forceb;
-    if (irange == 1 && ! forceb) {
-/*        Set interval [VL,VU] that contains all eigenvalues */
-	*vl = gl;
-	*vu = gu;
-    } else {
-/*        We call ODRRD to find crude approximations to the eigenvalues */
-/*        in the desired range. In case IRANGE = INDRNG, we also obtain the */
-/*        interval (VL,VU] that contains all the wanted eigenvalues. */
-/*        An interval [LEFT,RIGHT] has converged if */
-/*        RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */
-/*        ODRRD needs a WORK of size 4*N, IWORK of size 3*N */
-	odrrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[
-		1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], 
-		vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo);
-	if (iinfo != 0) {
-	    *info = -1;
-	    return 0;
-	}
-/*        Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */
-	i__1 = *n;
-	for (i__ = mm + 1; i__ <= i__1; ++i__) {
-	    w[i__] = 0.;
-	    werr[i__] = 0.;
-	    iblock[i__] = 0;
-	    indexw[i__] = 0;
-/* L14: */
-	}
-    }
-/* ** */
-/*     Loop over unreduced blocks */
-    ibegin = 1;
-    wbegin = 1;
-    i__1 = *nsplit;
-    for (jblk = 1; jblk <= i__1; ++jblk) {
-	iend = isplit[jblk];
-	in = iend - ibegin + 1;
-/*        1 X 1 block */
-	if (in == 1) {
-	    if (irange == 1 || irange == 3 && d__[ibegin] > *vl && d__[ibegin]
-		     <= *vu || irange == 2 && iblock[wbegin] == jblk) {
-		++(*m);
-		w[*m] = d__[ibegin];
-		werr[*m] = 0.;
-/*              The gap for a single block doesn't matter for the later */
-/*              algorithm and is assigned an arbitrary large value */
-		wgap[*m] = 0.;
-		iblock[*m] = jblk;
-		indexw[*m] = 1;
-		++wbegin;
-	    }
-/*           E( IEND ) holds the shift for the initial RRR */
-	    e[iend] = 0.;
-	    ibegin = iend + 1;
-	    goto L170;
-	}
-
-/*        Blocks of size larger than 1x1 */
-
-/*        E( IEND ) will hold the shift for the initial RRR, for now set it =0 */
-	e[iend] = 0.;
-
-/*        Find local outer bounds GL,GU for the block */
-	gl = d__[ibegin];
-	gu = d__[ibegin];
-	i__2 = iend;
-	for (i__ = ibegin; i__ <= i__2; ++i__) {
-/* Computing MIN */
-	    d__1 = gers[(i__ << 1) - 1];
-	    gl = fmin(d__1,gl);
-/* Computing MAX */
-	    d__1 = gers[i__ * 2];
-	    gu = fmax(d__1,gu);
-/* L15: */
-	}
-	spdiam = gu - gl;
-	if (! (irange == 1 && ! forceb)) {
-/*           Count the number of eigenvalues in the current block. */
-	    mb = 0;
-	    i__2 = mm;
-	    for (i__ = wbegin; i__ <= i__2; ++i__) {
-		if (iblock[i__] == jblk) {
-		    ++mb;
-		} else {
-		    goto L21;
-		}
-/* L20: */
-	    }
-L21:
-	    if (mb == 0) {
-/*              No eigenvalue in the current block lies in the desired range */
-/*              E( IEND ) holds the shift for the initial RRR */
-		e[iend] = 0.;
-		ibegin = iend + 1;
-		goto L170;
-	    } else {
-/*              Decide whether dqds or bisection is more efficient */
-		usedqd = (double) mb > in * .5 && ! forceb;
-		wend = wbegin + mb - 1;
-/*              Calculate gaps for the current block */
-/*              In later stages, when representations for individual */
-/*              eigenvalues are different, we use SIGMA = E( IEND ). */
-		sigma = 0.;
-		i__2 = wend - 1;
-		for (i__ = wbegin; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		    d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + 
-			    werr[i__]);
-		    wgap[i__] = fmax(d__1,d__2);
-/* L30: */
-		}
-/* Computing MAX */
-		d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
-		wgap[wend] = fmax(d__1,d__2);
-/*              Find local index of the first and last desired evalue. */
-		indl = indexw[wbegin];
-		indu = indexw[wend];
-	    }
-	}
-	if (irange == 1 && ! forceb || usedqd) {
-/*           Case of DQDS */
-/*           Find approximations to the extremal eigenvalues of the block */
-	    odrrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
-		    rtl, &tmp, &tmp1, &iinfo);
-	    if (iinfo != 0) {
-		*info = -1;
-		return 0;
-	    }
-/* Computing MAX */
-	    d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, 
-		    fabs(d__1));
-	    isleft = fmax(d__2,d__3);
-	    odrrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
-		    rtl, &tmp, &tmp1, &iinfo);
-	    if (iinfo != 0) {
-		*info = -1;
-		return 0;
-	    }
-/* Computing MIN */
-	    d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, 
-		    fabs(d__1));
-	    isrght = fmin(d__2,d__3);
-/*           Improve the estimate of the spectral diameter */
-	    spdiam = isrght - isleft;
-	} else {
-/*           Case of bisection */
-/*           Find approximations to the wanted extremal eigenvalues */
-/* Computing MAX */
-	    d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = 
-		    w[wbegin] - werr[wbegin], fabs(d__1));
-	    isleft = fmax(d__2,d__3);
-/* Computing MIN */
-	    d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[
-		    wend] + werr[wend], fabs(d__1));
-	    isrght = fmin(d__2,d__3);
-	}
-/*        Decide whether the base representation for the current block */
-/*        L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */
-/*        should be on the left or the right end of the current block. */
-/*        The strategy is to shift to the end which is "more populated" */
-/*        Furthermore, decide whether to use DQDS for the computation of */
-/*        the eigenvalue approximations at the end of ODRRE or bisection. */
-/*        dqds is chosen if all eigenvalues are desired or the number of */
-/*        eigenvalues to be computed is large compared to the blocksize. */
-	if (irange == 1 && ! forceb) {
-/*           If all the eigenvalues have to be computed, we use dqd */
-	    usedqd = TRUE_;
-/*           INDL is the local index of the first eigenvalue to compute */
-	    indl = 1;
-	    indu = in;
-/*           MB =  number of eigenvalues to compute */
-	    mb = in;
-	    wend = wbegin + mb - 1;
-/*           Define 1/4 and 3/4 points of the spectrum */
-	    s1 = isleft + spdiam * .25;
-	    s2 = isrght - spdiam * .25;
-	} else {
-/*           ODRRD has computed IBLOCK and INDEXW for each eigenvalue */
-/*           approximation. */
-/*           choose sigma */
-	    if (usedqd) {
-		s1 = isleft + spdiam * .25;
-		s2 = isrght - spdiam * .25;
-	    } else {
-		tmp = fmin(isrght,*vu) - fmax(isleft,*vl);
-		s1 = fmax(isleft,*vl) + tmp * .25;
-		s2 = fmin(isrght,*vu) - tmp * .25;
-	    }
-	}
-/*        Compute the negcount at the 1/4 and 3/4 points */
-	if (mb > 1) {
-	    odrrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, &
-		    cnt, &cnt1, &cnt2, &iinfo);
-	}
-	if (mb == 1) {
-	    sigma = gl;
-	    sgndef = 1.;
-	} else if (cnt1 - indl >= indu - cnt2) {
-	    if (irange == 1 && ! forceb) {
-		sigma = fmax(isleft,gl);
-	    } else if (usedqd) {
-/*              use Gerschgorin bound as shift to get pos def matrix */
-/*              for dqds */
-		sigma = isleft;
-	    } else {
-/*              use approximation of the first desired eigenvalue of the */
-/*              block as shift */
-		sigma = fmax(isleft,*vl);
-	    }
-	    sgndef = 1.;
-	} else {
-	    if (irange == 1 && ! forceb) {
-		sigma = fmin(isrght,gu);
-	    } else if (usedqd) {
-/*              use Gerschgorin bound as shift to get neg def matrix */
-/*              for dqds */
-		sigma = isrght;
-	    } else {
-/*              use approximation of the first desired eigenvalue of the */
-/*              block as shift */
-		sigma = fmin(isrght,*vu);
-	    }
-	    sgndef = -1.;
-	}
-/*        An initial SIGMA has been chosen that will be used for computing */
-/*        T - SIGMA I = L D L^T */
-/*        Define the increment TAU of the shift in case the initial shift */
-/*        needs to be refined to obtain a factorization with not too much */
-/*        element growth. */
-	if (usedqd) {
-/*           The initial SIGMA was to the outer end of the spectrum */
-/*           the matrix is definite and we need not retreat. */
-	    tau = spdiam * eps * *n + *pivmin * 2.;
-	    tau = fmax(tau, 2 * eps * fabs(sigma));
-	} else {
-	    if (mb > 1) {
-		clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin];
-		avgap = (d__1 = clwdth / (double) (wend - wbegin), fabs(
-			d__1));
-		if (sgndef == 1.) {
-/* Computing MAX */
-		    d__1 = wgap[wbegin];
-		    tau = fmax(d__1,avgap) * .5;
-/* Computing MAX */
-		    d__1 = tau, d__2 = werr[wbegin];
-		    tau = fmax(d__1,d__2);
-		} else {
-/* Computing MAX */
-		    d__1 = wgap[wend - 1];
-		    tau = fmax(d__1,avgap) * .5;
-/* Computing MAX */
-		    d__1 = tau, d__2 = werr[wend];
-		    tau = fmax(d__1,d__2);
-		}
-	    } else {
-		tau = werr[wbegin];
-	    }
-	}
-
-	for (idum = 1; idum <= 6; ++idum) {
-/*           Compute L D L^T factorization of tridiagonal matrix T - sigma I. */
-/*           Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */
-/*           pivots in WORK(2*IN+1:3*IN) */
-	    dpivot = d__[ibegin] - sigma;
-	    work[1] = dpivot;
-	    dmax__ = fabs(work[1]);
-	    j = ibegin;
-	    i__2 = in - 1;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[(in << 1) + i__] = 1. / work[i__];
-		tmp = e[j] * work[(in << 1) + i__];
-		work[in + i__] = tmp;
-		dpivot = d__[j + 1] - sigma - tmp * e[j];
-		work[i__ + 1] = dpivot;
-/* Computing MAX */
-		d__1 = dmax__, d__2 = fabs(dpivot);
-		dmax__ = fmax(d__1,d__2);
-		++j;
-/* L70: */
-	    }
-/*           check for element growth */
-	    if (dmax__ > spdiam * 64.) {
-		norep = TRUE_;
-	    } else {
-		norep = FALSE_;
-	    }
-	    if (usedqd && ! norep) {
-/*              Ensure the definiteness of the representation */
-/*              All entries of D (of L D L^T) must have the same sign */
-		i__2 = in;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    tmp = sgndef * work[i__];
-		    if (tmp < 0.) {
-			norep = TRUE_;
-		    }
-/* L71: */
-		}
-	    }
-	    if (norep) {
-/*              Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */
-/*              shift which makes the matrix definite. So we should end up */
-/*              here really only in the case of IRANGE = VALRNG or INDRNG. */
-		if (idum == 5) {
-		    if (sgndef == 1.) {
-/*                    The fudged Gerschgorin shift should succeed */
-			sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.;
-		    } else {
-			sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.;
-		    }
-		} else {
-		    sigma -= sgndef * tau;
-		    tau *= 2.;
-		}
-	    } else {
-/*              an initial RRR is found */
-		goto L83;
-	    }
-/* L80: */
-	}
-/*        if the program reaches this point, no base representation could be */
-/*        found in MAXTRY iterations. */
-	*info = 2;
-	return 0;
-L83:
-/*        At this point, we have found an initial base representation */
-/*        T - SIGMA I = L D L^T with not too much element growth. */
-/*        Store the shift. */
-	e[iend] = sigma;
-/*        Store D and L. */
-	odcpy(&in, &work[1], &c__1, &d__[ibegin], &c__1);
-	i__2 = in - 1;
-	odcpy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1);
-	if (mb > 1) {
-
-/*           Perturb each entry of the base representation by a small */
-/*           (but random) relative amount to overcome difficulties with */
-/*           glued matrices. */
-
-	    for (i__ = 1; i__ <= 4; ++i__) {
-		iseed[i__ - 1] = 1;
-/* L122: */
-	    }
-	    i__2 = (in << 1) - 1;
-	    odrnv(&c__2, iseed, &i__2, &work[1]);
-	    i__2 = in - 1;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.;
-		e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.;
-/* L125: */
-	    }
-	    d__[iend] *= eps * 4. * work[in] + 1.;
-
-	}
-
-/*        Don't update the Gerschgorin intervals because keeping track */
-/*        of the updates would be too much work in DLARRV. */
-/*        We update W instead and use it to locate the proper Gerschgorin */
-/*        intervals. */
-/*        Compute the required eigenvalues of L D L' by bisection or dqds */
-	if (! usedqd) {
-/*           If ODRRD has been used, shift the eigenvalue approximations */
-/*           according to their representation. This is necessary for */
-/*           a uniform DLARRV since dqds computes eigenvalues of the */
-/*           shifted representation. In DLARRV, W will always hold the */
-/*           UNshifted eigenvalue approximation. */
-	    i__2 = wend;
-	    for (j = wbegin; j <= i__2; ++j) {
-		w[j] -= sigma;
-		werr[j] += (d__1 = w[j], fabs(d__1)) * eps;
-/* L134: */
-	    }
-/*           call ODRRB to reduce eigenvalue error of the approximations */
-/*           from ODRRD */
-	    i__2 = iend - 1;
-	    for (i__ = ibegin; i__ <= i__2; ++i__) {
-/* Computing 2nd power */
-		d__1 = e[i__];
-		work[i__] = d__[i__] * (d__1 * d__1);
-/* L135: */
-	    }
-/*           use bisection to find EV from INDL to INDU */
-	    i__2 = indl - 1;
-	    odrrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, 
-		    rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], &
-		    work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, &
-		    iinfo);
-	    if (iinfo != 0) {
-		*info = -4;
-		return 0;
-	    }
-/*           ODRRB computes all gaps correctly except for the last one */
-/*           Record distance to VU/GU */
-/* Computing MAX */
-	    d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
-	    wgap[wend] = fmax(d__1,d__2);
-	    i__2 = indu;
-	    for (i__ = indl; i__ <= i__2; ++i__) {
-		++(*m);
-		iblock[*m] = jblk;
-		indexw[*m] = i__;
-/* L138: */
-	    }
-	} else {
-/*           Call dqds to get all eigs (and then possibly delete unwanted */
-/*           eigenvalues). */
-/*           Note that dqds finds the eigenvalues of the L D L^T representation */
-/*           of T to high relative accuracy. High relative accuracy */
-/*           might be lost when the shift of the RRR is subtracted to obtain */
-/*           the eigenvalues of T. However, T is not guaranteed to define its */
-/*           eigenvalues to high relative accuracy anyway. */
-/*           Set RTOL to the order of the tolerance used in ODSQ2 */
-/*           This is an ESTIMATED error, the worst case bound is 4*N*EPS */
-/*           which is usually too large and requires unnecessary work to be */
-/*           done by bisection when computing the eigenvectors */
-	    rtol = log((double) in) * 4. * eps;
-	    j = ibegin;
-	    i__2 = in - 1;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[(i__ << 1) - 1] = (d__1 = d__[j], fabs(d__1));
-		work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1];
-		++j;
-/* L140: */
-	    }
-	    work[(in << 1) - 1] = (d__1 = d__[iend], fabs(d__1));
-	    work[in * 2] = 0.;
-	    odsq2(&in, &work[1], &iinfo);
-	    if (iinfo != 0) {
-/*              If IINFO = -5 then an index is part of a tight cluster */
-/*              and should be changed. The index is in IWORK(1) and the */
-/*              gap is in WORK(N+1) */
-		*info = -5;
-		return 0;
-	    } else {
-/*              Test that all eigenvalues are positive as expected */
-		i__2 = in;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if (work[i__] < 0.) {
-			*info = -6;
-			return 0;
-		    }
-/* L149: */
-		}
-	    }
-	    if (sgndef > 0.) {
-		i__2 = indu;
-		for (i__ = indl; i__ <= i__2; ++i__) {
-		    ++(*m);
-		    w[*m] = work[in - i__ + 1];
-		    iblock[*m] = jblk;
-		    indexw[*m] = i__;
-/* L150: */
-		}
-	    } else {
-		i__2 = indu;
-		for (i__ = indl; i__ <= i__2; ++i__) {
-		    ++(*m);
-		    w[*m] = -work[i__];
-		    iblock[*m] = jblk;
-		    indexw[*m] = i__;
-/* L160: */
-		}
-	    }
-	    i__2 = *m;
-	    for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
-/*              the value of RTOL below should be the tolerance in ODSQ2 */
-		werr[i__] = rtol * (d__1 = w[i__], fabs(d__1));
-/* L165: */
-	    }
-	    i__2 = *m - 1;
-	    for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
-/*              compute the right gap between the intervals */
-/* Computing MAX */
-		d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[
-			i__]);
-		wgap[i__] = fmax(d__1,d__2);
-/* L166: */
-	    }
-/* Computing MAX */
-	    d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]);
-	    wgap[*m] = fmax(d__1,d__2);
-	}
-/*        proceed with next block */
-	ibegin = iend + 1;
-	wbegin = wend + 1;
-L170:
-	;
-    }
-
-    return 0;
-
-/*     end of ODRRE */
-
-} /* odrre_ */
diff --git a/external/pmrrr/src/lapack/odrrf.c b/external/pmrrr/src/lapack/odrrf.c
deleted file mode 100644
index fb3ad63408..0000000000
--- a/external/pmrrr/src/lapack/odrrf.c
+++ /dev/null
@@ -1,419 +0,0 @@
-/* dlarrf.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Table of constant values */
-#define TRUE_ (1)
-#define FALSE_ (0)
-static int c__1 = 1;
-
-/* Subroutine */ 
-int odrrf(int *n, double *d__, double *l, 
-	double *ld, int *clstrt, int *clend, double *w, 
-	double *wgap, double *werr, double *spdiam, double *
-	clgapl, double *clgapr, double *pivmin, double *sigma, 
-	double *dplus, double *lplus, double *work, int *info)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2, d__3;
-
-    /* Builtin functions */
-    //    double sqrt(double);
-
-    /* Local variables */
-    int i__;
-    double s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, 
-	    znm2, growthbound, fail, fact, oldp;
-    int indx;
-    double prod;
-    int ktry;
-    double fail2, avgap, ldmax, rdmax;
-    int shift;
-    extern /* Subroutine */ int odcpy(int *, double *, int *, 
-	    double *, int *);
-    int dorrr1;
-    // extern double odmch(char *);
-    double ldelta;
-    int nofail;
-    double mingap, lsigma, rdelta;
-    extern int odnan(double *);
-    int forcer;
-    double rsigma, clwdth;
-    int sawnan1, sawnan2, tryrrr1;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-/* * */
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  Given the initial representation L D L^T and its cluster of close */
-/*  eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
-/*  W( CLEND ), ODRRF finds a new relatively robust representation */
-/*  L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
-/*  eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the matrix (subblock, if the matrix splitted). */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The N diagonal elements of the diagonal matrix D. */
-
-/*  L       (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (N-1) subdiagonal elements of the unit bidiagonal */
-/*          matrix L. */
-
-/*  LD      (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (N-1) elements L(i)*D(i). */
-
-/*  CLSTRT  (input) INT */
-/*          The index of the first eigenvalue in the cluster. */
-
-/*  CLEND   (input) INT */
-/*          The index of the last eigenvalue in the cluster. */
-
-/*  W       (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
-/*          The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
-/*          W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
-/*          close eigenalues. */
-
-/*  WGAP    (input/output) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
-/*          The separation from the right neighbor eigenvalue in W. */
-
-/*  WERR    (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
-/*          WERR contain the semiwidth of the uncertainty */
-/*          interval of the corresponding eigenvalue APPROXIMATION in W */
-
-/*  SPDIAM (input) estimate of the spectral diameter obtained from the */
-/*          Gerschgorin intervals */
-
-/*  CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
-/*          Set by the calling routine to protect against shifts too close */
-/*          to eigenvalues outside the cluster. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum pivot allowed in the Sturm sequence. */
-
-/*  SIGMA   (output) DOUBLE PRECISION */
-/*          The shift used to form L(+) D(+) L(+)^T. */
-
-/*  DPLUS   (output) DOUBLE PRECISION array, dimension (N) */
-/*          The N diagonal elements of the diagonal matrix D(+). */
-
-/*  LPLUS   (output) DOUBLE PRECISION array, dimension (N-1) */
-/*          The first (N-1) elements of LPLUS contain the subdiagonal */
-/*          elements of the unit bidiagonal matrix L(+). */
-
-/*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
-/*          Workspace. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --work;
-    --lplus;
-    --dplus;
-    --werr;
-    --wgap;
-    --w;
-    --ld;
-    --l;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-    fact = 2.;
-    eps = DBL_EPSILON; // eps = odmch("Precision");
-    shift = 0;
-    forcer = FALSE_;
-/*     Note that we cannot guarantee that for any of the shifts tried, */
-/*     the factorization has a small or even moderate element growth. */
-/*     There could be Ritz values at both ends of the cluster and despite */
-/*     backing off, there are examples where all factorizations tried */
-/*     (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
-/*     element growth. */
-/*     For this reason, we should use PIVMIN in this subroutine so that at */
-/*     least the L D L^T factorization exists. It can be checked afterwards */
-/*     whether the element growth caused bad residuals/orthogonality. */
-/*     Decide whether the code should accept the best among all */
-/*     representations despite large element growth or signal INFO=1 */
-    nofail = TRUE_;
-
-/*     Compute the average gap length of the cluster */
-    clwdth = (d__1 = w[*clend] - w[*clstrt], fabs(d__1)) + werr[*clend] + werr[
-	    *clstrt];
-    avgap = clwdth / (double) (*clend - *clstrt);
-    mingap = fmin(*clgapl,*clgapr);
-/*     Initial values for shifts to both ends of cluster */
-/* Computing MIN */
-    d__1 = w[*clstrt], d__2 = w[*clend];
-    lsigma = fmin(d__1,d__2) - werr[*clstrt];
-/* Computing MAX */
-    d__1 = w[*clstrt], d__2 = w[*clend];
-    rsigma = fmax(d__1,d__2) + werr[*clend];
-/*     Use a small fudge to make sure that we really shift to the outside */
-    lsigma -= fabs(lsigma) * 4. * eps;
-    rsigma += fabs(rsigma) * 4. * eps;
-/*     Compute upper bounds for how much to back off the initial shifts */
-    ldmax = mingap * .25 + *pivmin * 2.;
-    rdmax = mingap * .25 + *pivmin * 2.;
-/* Computing MAX */
-    d__1 = avgap, d__2 = wgap[*clstrt];
-    ldelta = fmax(d__1,d__2) / fact;
-/* Computing MAX */
-    d__1 = avgap, d__2 = wgap[*clend - 1];
-    rdelta = fmax(d__1,d__2) / fact;
-
-/*     Initialize the record of the best representation found */
-
-    // s = DBL_MIN; // s = odmch("S");
-    smlgrowth = DBL_MAX;
-    fail = (double) (*n - 1) * mingap / (*spdiam * eps);
-    fail2 = (double) (*n - 1) * mingap / (*spdiam * sqrt(eps));
-    bestshift = lsigma;
-
-/*     while (KTRY <= KTRYMAX) */
-    ktry = 0;
-    growthbound = *spdiam * 8.;
-L5:
-    sawnan1 = FALSE_;
-    sawnan2 = FALSE_;
-/*     Ensure that we do not back off too much of the initial shifts */
-    ldelta = fmin(ldmax,ldelta);
-    rdelta = fmin(rdmax,rdelta);
-/*     Compute the element growth when shifting to both ends of the cluster */
-/*     accept the shift if there is no element growth at one of the two ends */
-/*     Left end */
-    s = -lsigma;
-    dplus[1] = d__[1] + s;
-    if (fabs(dplus[1]) < *pivmin) {
-	dplus[1] = -(*pivmin);
-/*        Need to set SAWNAN1 because refined RRR test should not be used */
-/*        in this case */
-	sawnan1 = TRUE_;
-    }
-    max1 = fabs(dplus[1]);
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	lplus[i__] = ld[i__] / dplus[i__];
-	s = s * lplus[i__] * l[i__] - lsigma;
-	dplus[i__ + 1] = d__[i__ + 1] + s;
-	if ((d__1 = dplus[i__ + 1], fabs(d__1)) < *pivmin) {
-	    dplus[i__ + 1] = -(*pivmin);
-/*           Need to set SAWNAN1 because refined RRR test should not be used */
-/*           in this case */
-	    sawnan1 = TRUE_;
-	}
-/* Computing MAX */
-	d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], fabs(d__1));
-	max1 = fmax(d__2,d__3);
-/* L6: */
-    }
-    sawnan1 = sawnan1 || odnan(&max1);
-    if (forcer || max1 <= growthbound && ! sawnan1) {
-	*sigma = lsigma;
-	shift = 1;
-	goto L100;
-    }
-/*     Right end */
-    s = -rsigma;
-    work[1] = d__[1] + s;
-    if (fabs(work[1]) < *pivmin) {
-	work[1] = -(*pivmin);
-/*        Need to set SAWNAN2 because refined RRR test should not be used */
-/*        in this case */
-	sawnan2 = TRUE_;
-    }
-    max2 = fabs(work[1]);
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	work[*n + i__] = ld[i__] / work[i__];
-	s = s * work[*n + i__] * l[i__] - rsigma;
-	work[i__ + 1] = d__[i__ + 1] + s;
-	if ((d__1 = work[i__ + 1], fabs(d__1)) < *pivmin) {
-	    work[i__ + 1] = -(*pivmin);
-/*           Need to set SAWNAN2 because refined RRR test should not be used */
-/*           in this case */
-	    sawnan2 = TRUE_;
-	}
-/* Computing MAX */
-	d__2 = max2, d__3 = (d__1 = work[i__ + 1], fabs(d__1));
-	max2 = fmax(d__2,d__3);
-/* L7: */
-    }
-    sawnan2 = sawnan2 || odnan(&max2);
-    if (forcer || max2 <= growthbound && ! sawnan2) {
-	*sigma = rsigma;
-	shift = 2;
-	goto L100;
-    }
-/*     If we are at this point, both shifts led to too much element growth */
-/*     Record the better of the two shifts (provided it didn't lead to NaN) */
-    if (sawnan1 && sawnan2) {
-/*        both MAX1 and MAX2 are NaN */
-	goto L50;
-    } else {
-	if (! sawnan1) {
-	    indx = 1;
-	    if (max1 <= smlgrowth) {
-		smlgrowth = max1;
-		bestshift = lsigma;
-	    }
-	}
-	if (! sawnan2) {
-	    if (sawnan1 || max2 <= max1) {
-		indx = 2;
-	    }
-	    if (max2 <= smlgrowth) {
-		smlgrowth = max2;
-		bestshift = rsigma;
-	    }
-	}
-    }
-/*     If we are here, both the left and the right shift led to */
-/*     element growth. If the element growth is moderate, then */
-/*     we may still accept the representation, if it passes a */
-/*     refined test for RRR. This test supposes that no NaN occurred. */
-/*     Moreover, we use the refined RRR test only for isolated clusters. */
-    if (clwdth < mingap / 128. && fmin(max1,max2) < fail2 && ! sawnan1 && ! 
-	    sawnan2) {
-	dorrr1 = TRUE_;
-    } else {
-	dorrr1 = FALSE_;
-    }
-    tryrrr1 = TRUE_;
-    if (tryrrr1 && dorrr1) {
-	if (indx == 1) {
-	    tmp = (d__1 = dplus[*n], fabs(d__1));
-	    znm2 = 1.;
-	    prod = 1.;
-	    oldp = 1.;
-	    for (i__ = *n - 1; i__ >= 1; --i__) {
-		if (prod <= eps) {
-		    prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
-			     work[*n + i__]) * oldp;
-		} else {
-		    prod *= (d__1 = work[*n + i__], fabs(d__1));
-		}
-		oldp = prod;
-/* Computing 2nd power */
-		d__1 = prod;
-		znm2 += d__1 * d__1;
-/* Computing MAX */
-		d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, fabs(d__1));
-		tmp = fmax(d__2,d__3);
-/* L15: */
-	    }
-	    rrr1 = tmp / (*spdiam * sqrt(znm2));
-	    if (rrr1 <= 8.) {
-		*sigma = lsigma;
-		shift = 1;
-		goto L100;
-	    }
-	} else if (indx == 2) {
-	    tmp = (d__1 = work[*n], fabs(d__1));
-	    znm2 = 1.;
-	    prod = 1.;
-	    oldp = 1.;
-	    for (i__ = *n - 1; i__ >= 1; --i__) {
-		if (prod <= eps) {
-		    prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * 
-			    lplus[i__]) * oldp;
-		} else {
-		    prod *= (d__1 = lplus[i__], fabs(d__1));
-		}
-		oldp = prod;
-/* Computing 2nd power */
-		d__1 = prod;
-		znm2 += d__1 * d__1;
-/* Computing MAX */
-		d__2 = tmp, d__3 = (d__1 = work[i__] * prod, fabs(d__1));
-		tmp = fmax(d__2,d__3);
-/* L16: */
-	    }
-	    rrr2 = tmp / (*spdiam * sqrt(znm2));
-	    if (rrr2 <= 8.) {
-		*sigma = rsigma;
-		shift = 2;
-		goto L100;
-	    }
-	}
-    }
-L50:
-    if (ktry < 1) {
-/*        If we are here, both shifts failed also the RRR test. */
-/*        Back off to the outside */
-/* Computing MAX */
-	d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
-	lsigma = fmax(d__1,d__2);
-/* Computing MIN */
-	d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
-	rsigma = fmin(d__1,d__2);
-	ldelta *= 2.;
-	rdelta *= 2.;
-	++ktry;
-	goto L5;
-    } else {
-/*        None of the representations investigated satisfied our */
-/*        criteria. Take the best one we found. */
-	if (smlgrowth < fail || nofail) {
-	    lsigma = bestshift;
-	    rsigma = bestshift;
-	    forcer = TRUE_;
-	    goto L5;
-	} else {
-	    *info = 1;
-	    return 0;
-	}
-    }
-L100:
-    if (shift == 1) {
-    } else if (shift == 2) {
-/*        store new L and D back into DPLUS, LPLUS */
-	odcpy(n, &work[1], &c__1, &dplus[1], &c__1);
-	i__1 = *n - 1;
-	odcpy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
-    }
-    return 0;
-
-/*     End of ODRRF */
-
-} /* odrrf_ */
diff --git a/external/pmrrr/src/lapack/odrrj.c b/external/pmrrr/src/lapack/odrrj.c
deleted file mode 100644
index 9dc93ccc7d..0000000000
--- a/external/pmrrr/src/lapack/odrrj.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/* dlarrj.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odrrj(int *n, double *d__, double *e2, 
-	int *ifirst, int *ilast, double *rtol, int *offset, 
-	double *w, double *werr, double *work, int *iwork, 
-	double *pivmin, double *spdiam, int *info)
-{
-    /* System generated locals */
-    int i__1, i__2;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double log(double);
-
-    /* Local variables */
-    int i__, j, k, p;
-    double s;
-    int i1, i2, ii;
-    double fac, mid;
-    int cnt;
-    double tmp, left;
-    int iter, nint, prev, next, savi1;
-    double right, width, dplus;
-    int olnint, maxitr;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  Given the initial eigenvalue approximations of T, ODRRJ */
-/*  does  bisection to refine the eigenvalues of T, */
-/*  W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
-/*  guesses for these eigenvalues are input in W, the corresponding estimate */
-/*  of the error in these guesses in WERR. During bisection, intervals */
-/*  [left, right] are maintained by storing their mid-points and */
-/*  semi-widths in the arrays W and WERR respectively. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the matrix. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The N diagonal elements of T. */
-
-/*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The Squares of the (N-1) subdiagonal elements of T. */
-
-/*  IFIRST  (input) INT */
-/*          The index of the first eigenvalue to be computed. */
-
-/*  ILAST   (input) INT */
-/*          The index of the last eigenvalue to be computed. */
-
-/*  RTOL   (input) DOUBLE PRECISION */
-/*          Tolerance for the convergence of the bisection intervals. */
-/*          An interval [LEFT,RIGHT] has converged if */
-/*          RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */
-
-/*  OFFSET  (input) INT */
-/*          Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */
-/*          through ILAST-OFFSET elements of these arrays are to be used. */
-
-/*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
-/*          estimates of the eigenvalues of L D L^T indexed IFIRST through */
-/*          ILAST. */
-/*          On output, these estimates are refined. */
-
-/*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
-/*          the errors in the estimates of the corresponding elements in W. */
-/*          On output, these errors are refined. */
-
-/*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
-/*          Workspace. */
-
-/*  IWORK   (workspace) INT array, dimension (2*N) */
-/*          Workspace. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum pivot in the Sturm sequence for T. */
-
-/*  SPDIAM  (input) DOUBLE PRECISION */
-/*          The spectral diameter of T. */
-
-/*  INFO    (output) INT */
-/*          Error flag. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --iwork;
-    --work;
-    --werr;
-    --w;
-    --e2;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-
-    maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + 
-	    2;
-
-/*     Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
-/*     The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
-/*     Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */
-/*     for an unconverged interval is set to the index of the next unconverged */
-/*     interval, and is -1 or 0 for a converged interval. Thus a linked */
-/*     list of unconverged intervals is set up. */
-
-    i1 = *ifirst;
-    i2 = *ilast;
-/*     The number of unconverged intervals */
-    nint = 0;
-/*     The last unconverged interval found */
-    prev = 0;
-    i__1 = i2;
-    for (i__ = i1; i__ <= i__1; ++i__) {
-	k = i__ << 1;
-	ii = i__ - *offset;
-	left = w[ii] - werr[ii];
-	mid = w[ii];
-	right = w[ii] + werr[ii];
-	width = right - mid;
-/* Computing MAX */
-	d__1 = fabs(left), d__2 = fabs(right);
-	tmp = fmax(d__1,d__2);
-/*        The following test prevents the test of converged intervals */
-	if (width < *rtol * tmp) {
-/*           This interval has already converged and does not need refinement. */
-/*           (Note that the gaps might change through refining the */
-/*            eigenvalues, however, they can only get bigger.) */
-/*           Remove it from the list. */
-	    iwork[k - 1] = -1;
-/*           Make sure that I1 always points to the first unconverged interval */
-	    if (i__ == i1 && i__ < i2) {
-		i1 = i__ + 1;
-	    }
-	    if (prev >= i1 && i__ <= i2) {
-		iwork[(prev << 1) - 1] = i__ + 1;
-	    }
-	} else {
-/*           unconverged interval found */
-	    prev = i__;
-/*           Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
-
-/*           Do while( CNT(LEFT).GT.I-1 ) */
-
-	    fac = 1.;
-L20:
-	    cnt = 0;
-	    s = left;
-	    dplus = d__[1] - s;
-	    if (dplus < 0.) {
-		++cnt;
-	    }
-	    i__2 = *n;
-	    for (j = 2; j <= i__2; ++j) {
-		dplus = d__[j] - s - e2[j - 1] / dplus;
-		if (dplus < 0.) {
-		    ++cnt;
-		}
-/* L30: */
-	    }
-	    if (cnt > i__ - 1) {
-		left -= werr[ii] * fac;
-		fac *= 2.;
-		goto L20;
-	    }
-
-/*           Do while( CNT(RIGHT).LT.I ) */
-
-	    fac = 1.;
-L50:
-	    cnt = 0;
-	    s = right;
-	    dplus = d__[1] - s;
-	    if (dplus < 0.) {
-		++cnt;
-	    }
-	    i__2 = *n;
-	    for (j = 2; j <= i__2; ++j) {
-		dplus = d__[j] - s - e2[j - 1] / dplus;
-		if (dplus < 0.) {
-		    ++cnt;
-		}
-/* L60: */
-	    }
-	    if (cnt < i__) {
-		right += werr[ii] * fac;
-		fac *= 2.;
-		goto L50;
-	    }
-	    ++nint;
-	    iwork[k - 1] = i__ + 1;
-	    iwork[k] = cnt;
-	}
-	work[k - 1] = left;
-	work[k] = right;
-/* L75: */
-    }
-    savi1 = i1;
-
-/*     Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
-/*     and while (ITER.LT.MAXITR) */
-
-    iter = 0;
-L80:
-    prev = i1 - 1;
-    i__ = i1;
-    olnint = nint;
-    i__1 = olnint;
-    for (p = 1; p <= i__1; ++p) {
-	k = i__ << 1;
-	ii = i__ - *offset;
-	next = iwork[k - 1];
-	left = work[k - 1];
-	right = work[k];
-	mid = (left + right) * .5;
-/*        semiwidth of interval */
-	width = right - mid;
-/* Computing MAX */
-	d__1 = fabs(left), d__2 = fabs(right);
-	tmp = fmax(d__1,d__2);
-	if (width < *rtol * tmp || iter == maxitr) {
-/*           reduce number of unconverged intervals */
-	    --nint;
-/*           Mark interval as converged. */
-	    iwork[k - 1] = 0;
-	    if (i1 == i__) {
-		i1 = next;
-	    } else {
-/*              Prev holds the last unconverged interval previously examined */
-		if (prev >= i1) {
-		    iwork[(prev << 1) - 1] = next;
-		}
-	    }
-	    i__ = next;
-	    goto L100;
-	}
-	prev = i__;
-
-/*        Perform one bisection step */
-
-	cnt = 0;
-	s = mid;
-	dplus = d__[1] - s;
-	if (dplus < 0.) {
-	    ++cnt;
-	}
-	i__2 = *n;
-	for (j = 2; j <= i__2; ++j) {
-	    dplus = d__[j] - s - e2[j - 1] / dplus;
-	    if (dplus < 0.) {
-		++cnt;
-	    }
-/* L90: */
-	}
-	if (cnt <= i__ - 1) {
-	    work[k - 1] = mid;
-	} else {
-	    work[k] = mid;
-	}
-	i__ = next;
-L100:
-	;
-    }
-    ++iter;
-/*     do another loop if there are still unconverged intervals */
-/*     However, in the last iteration, all intervals are accepted */
-/*     since this is the best we can do. */
-    if (nint > 0 && iter <= maxitr) {
-	goto L80;
-    }
-
-
-/*     At this point, all the intervals have converged */
-    i__1 = *ilast;
-    for (i__ = savi1; i__ <= i__1; ++i__) {
-	k = i__ << 1;
-	ii = i__ - *offset;
-/*        All intervals marked by '0' have been refined. */
-	if (iwork[k - 1] == 0) {
-	    w[ii] = (work[k - 1] + work[k]) * .5;
-	    werr[ii] = work[k] - w[ii];
-	}
-/* L110: */
-    }
-
-    return 0;
-
-/*     End of ODRRJ */
-
-} /* odrrj_ */
diff --git a/external/pmrrr/src/lapack/odrrk.c b/external/pmrrr/src/lapack/odrrk.c
deleted file mode 100644
index 5fee04e438..0000000000
--- a/external/pmrrr/src/lapack/odrrk.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/* dlarrk.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odrrk(int *n, int *iw, double *gl, 
-	double *gu, double *d__, double *e2, double *pivmin, 
-	double *reltol, double *w, double *werr, int *info)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double log(double);
-
-    /* Local variables */
-    int i__, it;
-    double mid, eps, tmp1, tmp2, left, atoli, right;
-    int itmax;
-    double rtoli, tnorm;
-    // extern double odmch(char *);
-    int negcnt;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODRRK computes one eigenvalue of a symmetric tridiagonal */
-/*  matrix T to suitable accuracy. This is an auxiliary code to be */
-/*  called from DSTEMR. */
-
-/*  To avoid overflow, the matrix must be scaled so that its */
-/*  largest element is no greater than overflow**(1/2) * */
-/*  underflow**(1/4) in absolute value, and for greatest */
-/*  accuracy, it should not be much smaller than that. */
-
-/*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
-/*  Matrix", Report CS41, Computer Science Dept., Stanford */
-/*  University, July 21, 1966. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the tridiagonal matrix T.  N >= 0. */
-
-/*  IW      (input) INT */
-/*          The index of the eigenvalues to be returned. */
-
-/*  GL      (input) DOUBLE PRECISION */
-/*  GU      (input) DOUBLE PRECISION */
-/*          An upper and a lower bound on the eigenvalue. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The n diagonal elements of the tridiagonal matrix T. */
-
-/*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
-/*          The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */
-
-/*  PIVMIN  (input) DOUBLE PRECISION */
-/*          The minimum pivot allowed in the Sturm sequence for T. */
-
-/*  RELTOL  (input) DOUBLE PRECISION */
-/*          The minimum relative width of an interval.  When an interval */
-/*          is narrower than RELTOL times the larger (in */
-/*          magnitude) endpoint, then it is considered to be */
-/*          sufficiently small, i.e., converged.  Note: this should */
-/*          always be at least radix*machine epsilon. */
-
-/*  W       (output) DOUBLE PRECISION */
-
-/*  WERR    (output) DOUBLE PRECISION */
-/*          The error bound on the corresponding eigenvalue approximation */
-/*          in W. */
-
-/*  INFO    (output) INT */
-/*          = 0:       Eigenvalue converged */
-/*          = -1:      Eigenvalue did NOT converge */
-
-/*  Internal Parameters */
-/*  =================== */
-
-/*  FUDGE   DOUBLE PRECISION, default = 2 */
-/*          A "fudge factor" to widen the Gershgorin intervals. */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Get machine constants */
-    /* Parameter adjustments */
-    --e2;
-    --d__;
-
-    /* Function Body */
-    eps = DBL_EPSILON; // odmch("P");
-/* Computing MAX */
-    d__1 = fabs(*gl), d__2 = fabs(*gu);
-    tnorm = fmax(d__1,d__2);
-    rtoli = *reltol;
-    atoli = *pivmin * 4.;
-    itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2;
-    *info = -1;
-    left = *gl - tnorm * 2. * eps * *n - *pivmin * 4.;
-    right = *gu + tnorm * 2. * eps * *n + *pivmin * 4.;
-    it = 0;
-L10:
-
-/*     Check if interval converged or maximum number of iterations reached */
-
-    tmp1 = (d__1 = right - left, fabs(d__1));
-/* Computing MAX */
-    d__1 = fabs(right), d__2 = fabs(left);
-    tmp2 = fmax(d__1,d__2);
-/* Computing MAX */
-    d__1 = fmax(atoli,*pivmin), d__2 = rtoli * tmp2;
-    if (tmp1 < fmax(d__1,d__2)) {
-	*info = 0;
-	goto L30;
-    }
-    if (it > itmax) {
-	goto L30;
-    }
-
-/*     Count number of negative pivots for mid-point */
-
-    ++it;
-    mid = (left + right) * .5;
-    negcnt = 0;
-    tmp1 = d__[1] - mid;
-    if (fabs(tmp1) < *pivmin) {
-	tmp1 = -(*pivmin);
-    }
-    if (tmp1 <= 0.) {
-	++negcnt;
-    }
-
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	tmp1 = d__[i__] - e2[i__ - 1] / tmp1 - mid;
-	if (fabs(tmp1) < *pivmin) {
-	    tmp1 = -(*pivmin);
-	}
-	if (tmp1 <= 0.) {
-	    ++negcnt;
-	}
-/* L20: */
-    }
-    if (negcnt >= *iw) {
-	right = mid;
-    } else {
-	left = mid;
-    }
-    goto L10;
-L30:
-
-/*     Converged or maximum number of iterations reached */
-
-    *w = (left + right) * .5;
-    *werr = (d__1 = right - left, fabs(d__1)) * .5;
-    return 0;
-
-/*     End of ODRRK */
-
-} /* odrrk_ */
diff --git a/external/pmrrr/src/lapack/odrrr.c b/external/pmrrr/src/lapack/odrrr.c
deleted file mode 100644
index 3b510ce27a..0000000000
--- a/external/pmrrr/src/lapack/odrrr.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/* dlarrr.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define TRUE_ (1)
-#define FALSE_ (0)
-
-/* Subroutine */ 
-int odrrr(int *n, double *d__, double *e, int *info)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    int i__;
-    double eps, tmp, tmp2, rmin;
-    // extern double odmch(char *);
-    double offdig, safmin;
-    int yesrel;
-    double smlnum, offdig2;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-
-/*  Purpose */
-/*  ======= */
-
-/*  Perform tests to decide whether the symmetric tridiagonal matrix T */
-/*  warrants expensive computations which guarantee high relative accuracy */
-/*  in the eigenvalues. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the matrix. N > 0. */
-
-/*  D       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The N diagonal elements of the tridiagonal matrix T. */
-
-/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the first (N-1) entries contain the subdiagonal */
-/*          elements of the tridiagonal matrix T; E(N) is set to ZERO. */
-
-/*  INFO    (output) INT */
-/*          INFO = 0(default) : the matrix warrants computations preserving */
-/*                              relative accuracy. */
-/*          INFO = 1          : the matrix warrants computations guaranteeing */
-/*                              only absolute accuracy. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     As a default, do NOT go for relative-accuracy preserving computations. */
-    /* Parameter adjustments */
-    --e;
-    --d__;
-
-    /* Function Body */
-    *info = 1;
-    safmin = DBL_MIN; // safmin = odmch("Safe minimum");
-    eps = DBL_EPSILON; // eps = odmch("Precision");
-    smlnum = safmin / eps;
-    rmin = sqrt(smlnum);
-/*     Tests for relative accuracy */
-
-/*     Test for scaled diagonal dominance */
-/*     Scale the diagonal entries to one and check whether the sum of the */
-/*     off-diagonals is less than one */
-
-/*     The sdd relative error bounds have a 1/(1- 2*x) factor in them, */
-/*     x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative */
-/*     accuracy is promised.  In the notation of the code fragment below, */
-/*     1/(1 - (OFFDIG + OFFDIG2)) is the condition number. */
-/*     We don't think it is worth going into "sdd mode" unless the relative */
-/*     condition number is reasonable, not 1/macheps. */
-/*     The threshold should be compatible with other thresholds used in the */
-/*     code. We set  OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds */
-/*     to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 */
-/*     instead of the current OFFDIG + OFFDIG2 < 1 */
-
-    yesrel = TRUE_;
-    offdig = 0.;
-    tmp = sqrt((fabs(d__[1])));
-    if (tmp < rmin) {
-	yesrel = FALSE_;
-    }
-    if (! yesrel) {
-	goto L11;
-    }
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	tmp2 = sqrt((d__1 = d__[i__], fabs(d__1)));
-	if (tmp2 < rmin) {
-	    yesrel = FALSE_;
-	}
-	if (! yesrel) {
-	    goto L11;
-	}
-	offdig2 = (d__1 = e[i__ - 1], fabs(d__1)) / (tmp * tmp2);
-	if (offdig + offdig2 >= .999) {
-	    yesrel = FALSE_;
-	}
-	if (! yesrel) {
-	    goto L11;
-	}
-	tmp = tmp2;
-	offdig = offdig2;
-/* L10: */
-    }
-L11:
-    if (yesrel) {
-	*info = 0;
-	return 0;
-    } else {
-    }
-
-
-/*     *** MORE TO BE IMPLEMENTED *** */
-
-
-/*     Test if the lower bidiagonal matrix L from T = L D L^T */
-/*     (zero shift facto) is well conditioned */
-
-
-/*     Test if the upper bidiagonal matrix U from T = U D U^T */
-/*     (zero shift facto) is well conditioned. */
-/*     In this case, the matrix needs to be flipped and, at the end */
-/*     of the eigenvector computation, the flip needs to be applied */
-/*     to the computed eigenvectors (and the support) */
-
-
-    return 0;
-
-/*     END OF ODRRR */
-
-} /* odrrr_ */
diff --git a/external/pmrrr/src/lapack/odrrv.c b/external/pmrrr/src/lapack/odrrv.c
deleted file mode 100644
index 0b9b2c8054..0000000000
--- a/external/pmrrr/src/lapack/odrrv.c
+++ /dev/null
@@ -1,986 +0,0 @@
-/* dlarrv.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define imax(a,b) ( (a) > (b) ? (a) : (b) )
-#define imin(a,b) ( (a) < (b) ? (a) : (b) )
-#define TRUE_ (1)
-#define FALSE_ (0)
-
-/* Table of constant values */
-static double c_b5 = 0.;
-static int c__1 = 1;
-static int c__2 = 2;
-
-/* Subroutine */ int odrrv(int *n, double *vl, double *vu, 
-	double *d__, double *l, double *pivmin, int *isplit, 
-	int *m, int *dol, int *dou, double *minrgp, 
-	double *rtol1, double *rtol2, double *w, double *werr, 
-	 double *wgap, int *iblock, int *indexw, double *gers, 
-	 double *z__, int *ldz, int *isuppz, double *work, 
-	int *iwork, int *info)
-{
-    /* System generated locals */
-    int z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
-    double d__1, d__2;
-    int L__1;
-
-    /* Builtin functions */
-    // double log(double);
-
-    /* Local variables */
-    int minwsize, i__, j, k, p, q, miniwsize, ii;
-    double gl;
-    int im, in;
-    double gu, gap, eps, tau, tol, tmp;
-    int zto;
-    double ztz;
-    int iend, jblk;
-    double lgap;
-    int done;
-    double rgap, left;
-    int wend, iter;
-    double bstw;
-    int itmp1;
-    extern /* Subroutine */ int odscal(int *, double *, double *, 
-	    int *);
-    int indld;
-    double fudge;
-    int idone;
-    double sigma;
-    int iinfo, iindr;
-    double resid;
-    int eskip;
-    double right;
-    extern /* Subroutine */ int odcpy(int *, double *, int *, 
-	    double *, int *);
-    int nclus, zfrom;
-    double rqtol;
-    int iindc1, iindc2;
-    extern /* Subroutine */ int odr1v(int *, int *, int *, 
-	    double *, double *, double *, double *, 
-	    double *, double *, double *, double *, int *, 
-	     int *, double *, double *, int *, int *, 
-	    double *, double *, double *, double *);
-    int stp2ii;
-    double lambda;
-    // extern double odmch(char *);
-    int ibegin, indeig;
-    int needbs;
-    int indlld;
-    double sgndef, mingma;
-    extern /* Subroutine */ int odrrb(int *, double *, double *, 
-	     int *, int *, double *, double *, int *, 
-	    double *, double *, double *, double *, int *, 
-	     double *, double *, int *, int *);
-    int oldien, oldncl, wbegin;
-    double spdiam;
-    int negcnt;
-    extern /* Subroutine */ int odrrf(int *, double *, double *, 
-	     double *, int *, int *, double *, double *, 
-	    double *, double *, double *, double *, 
-	    double *, double *, double *, double *, 
-	    double *, int *);
-    int oldcls;
-    double savgap;
-    int ndepth;
-    double ssigma;
-    extern /* Subroutine */ int odset(char *, int *, int *, 
-	    double *, double *, double *, int *);
-    int usedbs;
-    int iindwk, offset;
-    double gaptol;
-    int newcls, oldfst, indwrk, windex, oldlst;
-    int usedrq;
-    int newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
-    double bstres;
-    int newsiz, zusedu, zusedw;
-    double nrminv, rqcorr;
-    int tryrqc;
-    int isupmx;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODRRV computes the eigenvectors of the tridiagonal matrix */
-/*  T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
-/*  The input eigenvalues should have been computed by DLARRE. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The order of the matrix.  N >= 0. */
-
-/*  VL      (input) DOUBLE PRECISION */
-/*  VU      (input) DOUBLE PRECISION */
-/*          Lower and upper bounds of the interval that contains the desired */
-/*          eigenvalues. VL < VU. Needed to compute gaps on the left or right */
-/*          end of the extremal eigenvalues in the desired RANGE. */
-
-/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the N diagonal elements of the diagonal matrix D. */
-/*          On exit, D may be overwritten. */
-
-/*  L       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the (N-1) subdiagonal elements of the unit */
-/*          bidiagonal matrix L are in elements 1 to N-1 of L */
-/*          (if the matrix is not splitted.) At the end of each block */
-/*          is stored the corresponding shift as given by DLARRE. */
-/*          On exit, L is overwritten. */
-
-/*  PIVMIN  (in) DOUBLE PRECISION */
-/*          The minimum pivot allowed in the Sturm sequence. */
-
-/*  ISPLIT  (input) INT array, dimension (N) */
-/*          The splitting points, at which T breaks up into blocks. */
-/*          The first block consists of rows/columns 1 to */
-/*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
-/*          through ISPLIT( 2 ), etc. */
-
-/*  M       (input) INT */
-/*          The total number of input eigenvalues.  0 <= M <= N. */
-
-/*  DOL     (input) INT */
-/*  DOU     (input) INT */
-/*          If the user wants to compute only selected eigenvectors from all */
-/*          the eigenvalues supplied, he can specify an index range DOL:DOU. */
-/*          Or else the setting DOL=1, DOU=M should be applied. */
-/*          Note that DOL and DOU refer to the order in which the eigenvalues */
-/*          are stored in W. */
-/*          If the user wants to compute only selected eigenpairs, then */
-/*          the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
-/*          computed eigenvectors. All other columns of Z are set to zero. */
-
-/*  MINRGP  (input) DOUBLE PRECISION */
-
-/*  RTOL1   (input) DOUBLE PRECISION */
-/*  RTOL2   (input) DOUBLE PRECISION */
-/*           Parameters for bisection. */
-/*           An interval [LEFT,RIGHT] has converged if */
-/*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
-
-/*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          The first M elements of W contain the APPROXIMATE eigenvalues for */
-/*          which eigenvectors are to be computed.  The eigenvalues */
-/*          should be grouped by split-off block and ordered from */
-/*          smallest to largest within the block ( The output array */
-/*          W from DLARRE is expected here ). Furthermore, they are with */
-/*          respect to the shift of the corresponding root representation */
-/*          for their block. On exit, W holds the eigenvalues of the */
-/*          UNshifted matrix. */
-
-/*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          The first M elements contain the semiwidth of the uncertainty */
-/*          interval of the corresponding eigenvalue in W */
-
-/*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          The separation from the right neighbor eigenvalue in W. */
-
-/*  IBLOCK  (input) INT array, dimension (N) */
-/*          The indices of the blocks (submatrices) associated with the */
-/*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
-/*          W(i) belongs to the first block from the top, =2 if W(i) */
-/*          belongs to the second block, etc. */
-
-/*  INDEXW  (input) INT array, dimension (N) */
-/*          The indices of the eigenvalues within each block (submatrix); */
-/*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
-/*          i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
-
-/*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) */
-/*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
-/*          is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
-/*          be computed from the original UNshifted matrix. */
-
-/*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
-/*          If INFO = 0, the first M columns of Z contain the */
-/*          orthonormal eigenvectors of the matrix T */
-/*          corresponding to the input eigenvalues, with the i-th */
-/*          column of Z holding the eigenvector associated with W(i). */
-/*          Note: the user must ensure that at least max(1,M) columns are */
-/*          supplied in the array Z. */
-
-/*  LDZ     (input) INT */
-/*          The leading dimension of the array Z.  LDZ >= 1, and if */
-/*          JOBZ = 'V', LDZ >= max(1,N). */
-
-/*  ISUPPZ  (output) INT array, dimension ( 2*max(1,M) ) */
-/*          The support of the eigenvectors in Z, i.e., the indices */
-/*          indicating the nonzero elements in Z. The I-th eigenvector */
-/*          is nonzero only in elements ISUPPZ( 2*I-1 ) through */
-/*          ISUPPZ( 2*I ). */
-
-/*  WORK    (workspace) DOUBLE PRECISION array, dimension (12*N) */
-
-/*  IWORK   (workspace) INT array, dimension (7*N) */
-
-/*  INFO    (output) INT */
-/*          = 0:  successful exit */
-
-/*          > 0:  A problem occured in ODRRV. */
-/*          < 0:  One of the called subroutines signaled an internal problem. */
-/*                Needs inspection of the corresponding parameter IINFO */
-/*                for further information. */
-
-/*          =-1:  Problem in ODRRB when refining a child's eigenvalues. */
-/*          =-2:  Problem in ODRRF when computing the RRR of a child. */
-/*                When a child is inside a tight cluster, it can be difficult */
-/*                to find an RRR. A partial remedy from the user's point of */
-/*                view is to make the parameter MINRGP smaller and recompile. */
-/*                However, as the orthogonality of the computed vectors is */
-/*                proportional to 1/MINRGP, the user should be aware that */
-/*                he might be trading in precision when he decreases MINRGP. */
-/*          =-3:  Problem in ODRRB when refining a single eigenvalue */
-/*                after the Rayleigh correction was rejected. */
-/*          = 5:  The Rayleigh Quotient Iteration failed to converge to */
-/*                full accuracy in MAXITR steps. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     .. */
-/*     The first N entries of WORK are reserved for the eigenvalues */
-    /* Parameter adjustments */
-    --d__;
-    --l;
-    --isplit;
-    --w;
-    --werr;
-    --wgap;
-    --iblock;
-    --indexw;
-    --gers;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --isuppz;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    indld = *n + 1;
-    indlld = (*n << 1) + 1;
-    indwrk = *n * 3 + 1;
-    minwsize = *n * 12;
-    i__1 = minwsize;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	work[i__] = 0.;
-/* L5: */
-    }
-/*     IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
-/*     factorization used to compute the FP vector */
-    iindr = 0;
-/*     IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
-/*     layer and the one above. */
-    iindc1 = *n;
-    iindc2 = *n << 1;
-    iindwk = *n * 3 + 1;
-    miniwsize = *n * 7;
-    i__1 = miniwsize;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	iwork[i__] = 0;
-/* L10: */
-    }
-    zusedl = 1;
-    if (*dol > 1) {
-/*        Set lower bound for use of Z */
-	zusedl = *dol - 1;
-    }
-    zusedu = *m;
-    if (*dou < *m) {
-/*        Set lower bound for use of Z */
-	zusedu = *dou + 1;
-    }
-/*     The width of the part of Z that is used */
-    zusedw = zusedu - zusedl + 1;
-    odset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
-    eps = DBL_EPSILON; // eps = odmch("Precision");
-    rqtol = eps * 2.;
-
-/*     Set expert flags for standard code. */
-    tryrqc = TRUE_;
-    if (*dol == 1 && *dou == *m) {
-    } else {
-/*        Only selected eigenpairs are computed. Since the other evalues */
-/*        are not refined by RQ iteration, bisection has to compute to full */
-/*        accuracy. */
-	*rtol1 = eps * 4.;
-	*rtol2 = eps * 4.;
-    }
-/*     The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
-/*     desired eigenvalues. The support of the nonzero eigenvector */
-/*     entries is contained in the interval IBEGIN:IEND. */
-/*     Remark that if k eigenpairs are desired, then the eigenvectors */
-/*     are stored in k contiguous columns of Z. */
-/*     DONE is the number of eigenvectors already computed */
-    done = 0;
-    ibegin = 1;
-    wbegin = 1;
-    i__1 = iblock[*m];
-    for (jblk = 1; jblk <= i__1; ++jblk) {
-	iend = isplit[jblk];
-	sigma = l[iend];
-/*        Find the eigenvectors of the submatrix indexed IBEGIN */
-/*        through IEND. */
-	wend = wbegin - 1;
-L15:
-	if (wend < *m) {
-	    if (iblock[wend + 1] == jblk) {
-		++wend;
-		goto L15;
-	    }
-	}
-	if (wend < wbegin) {
-	    ibegin = iend + 1;
-	    goto L170;
-	} else if (wend < *dol || wbegin > *dou) {
-	    ibegin = iend + 1;
-	    wbegin = wend + 1;
-	    goto L170;
-	}
-/*        Find local spectral diameter of the block */
-	gl = gers[(ibegin << 1) - 1];
-	gu = gers[ibegin * 2];
-	i__2 = iend;
-	for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
-/* Computing MIN */
-	    d__1 = gers[(i__ << 1) - 1];
-	    gl = fmin(d__1,gl);
-/* Computing MAX */
-	    d__1 = gers[i__ * 2];
-	    gu = fmax(d__1,gu);
-/* L20: */
-	}
-	spdiam = gu - gl;
-/*        OLDIEN is the last index of the previous block */
-	oldien = ibegin - 1;
-/*        Calculate the size of the current block */
-	in = iend - ibegin + 1;
-/*        The number of eigenvalues in the current block */
-	im = wend - wbegin + 1;
-/*        This is for a 1x1 block */
-	if (ibegin == iend) {
-	    ++done;
-	    z__[ibegin + wbegin * z_dim1] = 1.;
-	    isuppz[(wbegin << 1) - 1] = ibegin;
-	    isuppz[wbegin * 2] = ibegin;
-	    w[wbegin] += sigma;
-	    work[wbegin] = w[wbegin];
-	    ibegin = iend + 1;
-	    ++wbegin;
-	    goto L170;
-	}
-/*        The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
-/*        Note that these can be approximations, in this case, the corresp. */
-/*        entries of WERR give the size of the uncertainty interval. */
-/*        The eigenvalue approximations will be refined when necessary as */
-/*        high relative accuracy is required for the computation of the */
-/*        corresponding eigenvectors. */
-	odcpy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
-/*        We store in W the eigenvalue approximations w.r.t. the original */
-/*        matrix T. */
-	i__2 = im;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    w[wbegin + i__ - 1] += sigma;
-/* L30: */
-	}
-/*        NDEPTH is the current depth of the representation tree */
-	ndepth = 0;
-/*        PARITY is either 1 or 0 */
-	parity = 1;
-/*        NCLUS is the number of clusters for the next level of the */
-/*        representation tree, we start with NCLUS = 1 for the root */
-	nclus = 1;
-	iwork[iindc1 + 1] = 1;
-	iwork[iindc1 + 2] = im;
-/*        IDONE is the number of eigenvectors already computed in the current */
-/*        block */
-	idone = 0;
-/*        loop while( IDONE.LT.IM ) */
-/*        generate the representation tree for the current block and */
-/*        compute the eigenvectors */
-L40:
-	if (idone < im) {
-/*           This is a crude protection against infinitely deep trees */
-	    if (ndepth > *m) {
-		*info = -2;
-		return 0;
-	    }
-/*           breadth first processing of the current level of the representation */
-/*           tree: OLDNCL = number of clusters on current level */
-	    oldncl = nclus;
-/*           reset NCLUS to count the number of child clusters */
-	    nclus = 0;
-
-	    parity = 1 - parity;
-	    if (parity == 0) {
-		oldcls = iindc1;
-		newcls = iindc2;
-	    } else {
-		oldcls = iindc2;
-		newcls = iindc1;
-	    }
-/*           Process the clusters on the current level */
-	    i__2 = oldncl;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		j = oldcls + (i__ << 1);
-/*              OLDFST, OLDLST = first, last index of current cluster. */
-/*                               cluster indices start with 1 and are relative */
-/*                               to WBEGIN when accessing W, WGAP, WERR, Z */
-		oldfst = iwork[j - 1];
-		oldlst = iwork[j];
-		if (ndepth > 0) {
-/*                 Retrieve relatively robust representation (RRR) of cluster */
-/*                 that has been computed at the previous level */
-/*                 The RRR is stored in Z and overwritten once the eigenvectors */
-/*                 have been computed or when the cluster is refined */
-		    if (*dol == 1 && *dou == *m) {
-/*                    Get representation from location of the leftmost evalue */
-/*                    of the cluster */
-			j = wbegin + oldfst - 1;
-		    } else {
-			if (wbegin + oldfst - 1 < *dol) {
-/*                       Get representation from the left end of Z array */
-			    j = *dol - 1;
-			} else if (wbegin + oldfst - 1 > *dou) {
-/*                       Get representation from the right end of Z array */
-			    j = *dou;
-			} else {
-			    j = wbegin + oldfst - 1;
-			}
-		    }
-		    odcpy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
-, &c__1);
-		    i__3 = in - 1;
-		    odcpy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
-			    ibegin], &c__1);
-		    sigma = z__[iend + (j + 1) * z_dim1];
-/*                 Set the corresponding entries in Z to zero */
-		    odset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j 
-			    * z_dim1], ldz);
-		}
-/*              Compute DL and DLL of current RRR */
-		i__3 = iend - 1;
-		for (j = ibegin; j <= i__3; ++j) {
-		    tmp = d__[j] * l[j];
-		    work[indld - 1 + j] = tmp;
-		    work[indlld - 1 + j] = tmp * l[j];
-/* L50: */
-		}
-		if (ndepth > 0) {
-/*                 P and Q are index of the first and last eigenvalue to compute */
-/*                 within the current block */
-		    p = indexw[wbegin - 1 + oldfst];
-		    q = indexw[wbegin - 1 + oldlst];
-/*                 Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
-/*                 thru' Q-OFFSET elements of these arrays are to be used. */
-/*                  OFFSET = P-OLDFST */
-		    offset = indexw[wbegin] - 1;
-/*                 perform limited bisection (if necessary) to get approximate */
-/*                 eigenvalues to the precision needed. */
-		    odrrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, 
-			     &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
-			    wbegin], &werr[wbegin], &work[indwrk], &iwork[
-			    iindwk], pivmin, &spdiam, &in, &iinfo);
-		    if (iinfo != 0) {
-			*info = -1;
-			return 0;
-		    }
-/*                 We also recompute the extremal gaps. W holds all eigenvalues */
-/*                 of the unshifted matrix and must be used for computation */
-/*                 of WGAP, the entries of WORK might stem from RRRs with */
-/*                 different shifts. The gaps from WBEGIN-1+OLDFST to */
-/*                 WBEGIN-1+OLDLST are correctly computed in ODRRB. */
-/*                 However, we only allow the gaps to become greater since */
-/*                 this is what should happen when we decrease WERR */
-		    if (oldfst > 1) {
-/* Computing MAX */
-			d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + 
-				oldfst - 1] - werr[wbegin + oldfst - 1] - w[
-				wbegin + oldfst - 2] - werr[wbegin + oldfst - 
-				2];
-			wgap[wbegin + oldfst - 2] = fmax(d__1,d__2);
-		    }
-		    if (wbegin + oldlst - 1 < wend) {
-/* Computing MAX */
-			d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + 
-				oldlst] - werr[wbegin + oldlst] - w[wbegin + 
-				oldlst - 1] - werr[wbegin + oldlst - 1];
-			wgap[wbegin + oldlst - 1] = fmax(d__1,d__2);
-		    }
-/*                 Each time the eigenvalues in WORK get refined, we store */
-/*                 the newly found approximation with all shifts applied in W */
-		    i__3 = oldlst;
-		    for (j = oldfst; j <= i__3; ++j) {
-			w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
-/* L53: */
-		    }
-		}
-/*              Process the current node. */
-		newfst = oldfst;
-		i__3 = oldlst;
-		for (j = oldfst; j <= i__3; ++j) {
-		    if (j == oldlst) {
-/*                    we are at the right end of the cluster, this is also the */
-/*                    boundary of the child cluster */
-			newlst = j;
-		    } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
-			    wbegin + j - 1], fabs(d__1))) {
-/*                    the right relative gap is big enough, the child cluster */
-/*                    (NEWFST,..,NEWLST) is well separated from the following */
-			newlst = j;
-		    } else {
-/*                    inside a child cluster, the relative gap is not */
-/*                    big enough. */
-			goto L140;
-		    }
-/*                 Compute size of child cluster found */
-		    newsiz = newlst - newfst + 1;
-/*                 NEWFTT is the place in Z where the new RRR or the computed */
-/*                 eigenvector is to be stored */
-		    if (*dol == 1 && *dou == *m) {
-/*                    Store representation at location of the leftmost evalue */
-/*                    of the cluster */
-			newftt = wbegin + newfst - 1;
-		    } else {
-			if (wbegin + newfst - 1 < *dol) {
-/*                       Store representation at the left end of Z array */
-			    newftt = *dol - 1;
-			} else if (wbegin + newfst - 1 > *dou) {
-/*                       Store representation at the right end of Z array */
-			    newftt = *dou;
-			} else {
-			    newftt = wbegin + newfst - 1;
-			}
-		    }
-		    if (newsiz > 1) {
-
-/*                    Current child is not a singleton but a cluster. */
-/*                    Compute and store new representation of child. */
-
-
-/*                    Compute left and right cluster gap. */
-
-/*                    LGAP and RGAP are not computed from WORK because */
-/*                    the eigenvalue approximations may stem from RRRs */
-/*                    different shifts. However, W hold all eigenvalues */
-/*                    of the unshifted matrix. Still, the entries in WGAP */
-/*                    have to be computed from WORK since the entries */
-/*                    in W might be of the same order so that gaps are not */
-/*                    exhibited correctly for very close eigenvalues. */
-			if (newfst == 1) {
-/* Computing MAX */
-			    d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
-			    lgap = fmax(d__1,d__2);
-			} else {
-			    lgap = wgap[wbegin + newfst - 2];
-			}
-			rgap = wgap[wbegin + newlst - 1];
-
-/*                    Compute left- and rightmost eigenvalue of child */
-/*                    to high precision in order to shift as close */
-/*                    as possible and obtain as large relative gaps */
-/*                    as possible */
-
-			for (k = 1; k <= 2; ++k) {
-			    if (k == 1) {
-				p = indexw[wbegin - 1 + newfst];
-			    } else {
-				p = indexw[wbegin - 1 + newlst];
-			    }
-			    offset = indexw[wbegin] - 1;
-			    odrrb(&in, &d__[ibegin], &work[indlld + ibegin 
-				    - 1], &p, &p, &rqtol, &rqtol, &offset, &
-				    work[wbegin], &wgap[wbegin], &werr[wbegin]
-, &work[indwrk], &iwork[iindwk], pivmin, &
-				    spdiam, &in, &iinfo);
-/* L55: */
-			}
-
-			if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 
-				> *dou) {
-/*                       if the cluster contains no desired eigenvalues */
-/*                       skip the computation of that branch of the rep. tree */
-
-/*                       We could skip before the refinement of the extremal */
-/*                       eigenvalues of the child, but then the representation */
-/*                       tree could be different from the one when nothing is */
-/*                       skipped. For this reason we skip at this place. */
-			    idone = idone + newlst - newfst + 1;
-			    goto L139;
-			}
-
-/*                    Compute RRR of child cluster. */
-/*                    Note that the new RRR is stored in Z */
-
-/*                    ODRRF needs LWORK = 2*N */
-			odrrf(&in, &d__[ibegin], &l[ibegin], &work[indld + 
-				ibegin - 1], &newfst, &newlst, &work[wbegin], 
-				&wgap[wbegin], &werr[wbegin], &spdiam, &lgap, 
-				&rgap, pivmin, &tau, &z__[ibegin + newftt * 
-				z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], 
-				 &work[indwrk], &iinfo);
-			if (iinfo == 0) {
-/*                       a new RRR for the cluster was found by ODRRF */
-/*                       update shift and store it */
-			    ssigma = sigma + tau;
-			    z__[iend + (newftt + 1) * z_dim1] = ssigma;
-/*                       WORK() are the midpoints and WERR() the semi-width */
-/*                       Note that the entries in W are unchanged. */
-			    i__4 = newlst;
-			    for (k = newfst; k <= i__4; ++k) {
-				fudge = eps * 3. * (d__1 = work[wbegin + k - 
-					1], fabs(d__1));
-				work[wbegin + k - 1] -= tau;
-				fudge += eps * 4. * (d__1 = work[wbegin + k - 
-					1], fabs(d__1));
-/*                          Fudge errors */
-				werr[wbegin + k - 1] += fudge;
-/*                          Gaps are not fudged. Provided that WERR is small */
-/*                          when eigenvalues are close, a zero gap indicates */
-/*                          that a new representation is needed for resolving */
-/*                          the cluster. A fudge could lead to a wrong decision */
-/*                          of judging eigenvalues 'separated' which in */
-/*                          reality are not. This could have a negative impact */
-/*                          on the orthogonality of the computed eigenvectors. */
-/* L116: */
-			    }
-			    ++nclus;
-			    k = newcls + (nclus << 1);
-			    iwork[k - 1] = newfst;
-			    iwork[k] = newlst;
-			} else {
-			    *info = -2;
-			    return 0;
-			}
-		    } else {
-
-/*                    Compute eigenvector of singleton */
-
-			iter = 0;
-
-			tol = log((double) in) * 4. * eps;
-
-			k = newfst;
-			windex = wbegin + k - 1;
-/* Computing MAX */
-			i__4 = windex - 1;
-			windmn = imax(i__4,1);
-/* Computing MIN */
-			i__4 = windex + 1;
-			windpl = imin(i__4,*m);
-			lambda = work[windex];
-			++done;
-/*                    Check if eigenvector computation is to be skipped */
-			if (windex < *dol || windex > *dou) {
-			    eskip = TRUE_;
-			    goto L125;
-			} else {
-			    eskip = FALSE_;
-			}
-			left = work[windex] - werr[windex];
-			right = work[windex] + werr[windex];
-			indeig = indexw[windex];
-/*                    Note that since we compute the eigenpairs for a child, */
-/*                    all eigenvalue approximations are w.r.t the same shift. */
-/*                    In this case, the entries in WORK should be used for */
-/*                    computing the gaps since they exhibit even very small */
-/*                    differences in the eigenvalues, as opposed to the */
-/*                    entries in W which might "look" the same. */
-			if (k == 1) {
-/*                       In the case RANGE='I' and with not much initial */
-/*                       accuracy in LAMBDA and VL, the formula */
-/*                       LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
-/*                       can lead to an overestimation of the left gap and */
-/*                       thus to inadequately early RQI 'convergence'. */
-/*                       Prevent this by forcing a small left gap. */
-/* Computing MAX */
-			    d__1 = fabs(left), d__2 = fabs(right);
-			    lgap = eps * fmax(d__1,d__2);
-			} else {
-			    lgap = wgap[windmn];
-			}
-			if (k == im) {
-/*                       In the case RANGE='I' and with not much initial */
-/*                       accuracy in LAMBDA and VU, the formula */
-/*                       can lead to an overestimation of the right gap and */
-/*                       thus to inadequately early RQI 'convergence'. */
-/*                       Prevent this by forcing a small right gap. */
-/* Computing MAX */
-			    d__1 = fabs(left), d__2 = fabs(right);
-			    rgap = eps * fmax(d__1,d__2);
-			} else {
-			    rgap = wgap[windex];
-			}
-			gap = fmin(lgap,rgap);
-			if (k == 1 || k == im) {
-/*                       The eigenvector support can become wrong */
-/*                       because significant entries could be cut off due to a */
-/*                       large GAPTOL parameter in LAR1V. Prevent this. */
-			    gaptol = 0.;
-			} else {
-			    gaptol = gap * eps;
-			}
-			isupmn = in;
-			isupmx = 1;
-/*                    Update WGAP so that it holds the minimum gap */
-/*                    to the left or the right. This is crucial in the */
-/*                    case where bisection is used to ensure that the */
-/*                    eigenvalue is refined up to the required precision. */
-/*                    The correct value is restored afterwards. */
-			savgap = wgap[windex];
-			wgap[windex] = gap;
-/*                    We want to use the Rayleigh Quotient Correction */
-/*                    as often as possible since it converges quadratically */
-/*                    when we are close enough to the desired eigenvalue. */
-/*                    However, the Rayleigh Quotient can have the wrong sign */
-/*                    and lead us away from the desired eigenvalue. In this */
-/*                    case, the best we can do is to use bisection. */
-			usedbs = FALSE_;
-			usedrq = FALSE_;
-/*                    Bisection is initially turned off unless it is forced */
-			needbs = ! tryrqc;
-L120:
-/*                    Check if bisection should be used to refine eigenvalue */
-			if (needbs) {
-/*                       Take the bisection as new iterate */
-			    usedbs = TRUE_;
-			    itmp1 = iwork[iindr + windex];
-			    offset = indexw[wbegin] - 1;
-			    d__1 = eps * 2.;
-			    odrrb(&in, &d__[ibegin], &work[indlld + ibegin 
-				    - 1], &indeig, &indeig, &c_b5, &d__1, &
-				    offset, &work[wbegin], &wgap[wbegin], &
-				    werr[wbegin], &work[indwrk], &iwork[
-				    iindwk], pivmin, &spdiam, &itmp1, &iinfo);
-			    if (iinfo != 0) {
-				*info = -3;
-				return 0;
-			    }
-			    lambda = work[windex];
-/*                       Reset twist index from inaccurate LAMBDA to */
-/*                       force computation of true MINGMA */
-			    iwork[iindr + windex] = 0;
-			}
-/*                    Given LAMBDA, compute the eigenvector. */
-			L__1 = ! usedbs;
-			odr1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
-				ibegin], &work[indld + ibegin - 1], &work[
-				indlld + ibegin - 1], pivmin, &gaptol, &z__[
-				ibegin + windex * z_dim1], &L__1, &negcnt, &
-				ztz, &mingma, &iwork[iindr + windex], &isuppz[
-				(windex << 1) - 1], &nrminv, &resid, &rqcorr, 
-				&work[indwrk]);
-			if (iter == 0) {
-			    bstres = resid;
-			    bstw = lambda;
-			} else if (resid < bstres) {
-			    bstres = resid;
-			    bstw = lambda;
-			}
-/* Computing MIN */
-			i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
-			isupmn = imin(i__4,i__5);
-/* Computing MAX */
-			i__4 = isupmx, i__5 = isuppz[windex * 2];
-			isupmx = imax(i__4,i__5);
-			++iter;
-/*                    sin alpha <= |resid|/gap */
-/*                    Note that both the residual and the gap are */
-/*                    proportional to the matrix, so ||T|| doesn't play */
-/*                    a role in the quotient */
-
-/*                    Convergence test for Rayleigh-Quotient iteration */
-/*                    (omitted when Bisection has been used) */
-
-			if (resid > tol * gap && fabs(rqcorr) > rqtol * fabs(
-				lambda) && ! usedbs) {
-/*                       We need to check that the RQCORR update doesn't */
-/*                       move the eigenvalue away from the desired one and */
-/*                       towards a neighbor. -> protection with bisection */
-			    if (indeig <= negcnt) {
-/*                          The wanted eigenvalue lies to the left */
-				sgndef = -1.;
-			    } else {
-/*                          The wanted eigenvalue lies to the right */
-				sgndef = 1.;
-			    }
-/*                       We only use the RQCORR if it improves the */
-/*                       the iterate reasonably. */
-			    if (rqcorr * sgndef >= 0. && lambda + rqcorr <= 
-				    right && lambda + rqcorr >= left) {
-				usedrq = TRUE_;
-/*                          Store new midpoint of bisection interval in WORK */
-				if (sgndef == 1.) {
-/*                             The current LAMBDA is on the left of the true */
-/*                             eigenvalue */
-				    left = lambda;
-/*                             We prefer to assume that the error estimate */
-/*                             is correct. We could make the interval not */
-/*                             as a bracket but to be modified if the RQCORR */
-/*                             chooses to. In this case, the RIGHT side should */
-/*                             be modified as follows: */
-/*                              RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
-				} else {
-/*                             The current LAMBDA is on the right of the true */
-/*                             eigenvalue */
-				    right = lambda;
-/*                             See comment about assuming the error estimate is */
-/*                             correct above. */
-/*                              LEFT = MIN(LEFT, LAMBDA + RQCORR) */
-				}
-				work[windex] = (right + left) * .5;
-/*                          Take RQCORR since it has the correct sign and */
-/*                          improves the iterate reasonably */
-				lambda += rqcorr;
-/*                          Update width of error interval */
-				werr[windex] = (right - left) * .5;
-			    } else {
-				needbs = TRUE_;
-			    }
-			    if (right - left < rqtol * fabs(lambda)) {
-/*                             The eigenvalue is computed to bisection accuracy */
-/*                             compute eigenvector and stop */
-				usedbs = TRUE_;
-				goto L120;
-			    } else if (iter < 10) {
-				goto L120;
-			    } else if (iter == 10) {
-				needbs = TRUE_;
-				goto L120;
-			    } else {
-				*info = 5;
-				return 0;
-			    }
-			} else {
-			    stp2ii = FALSE_;
-			    if (usedrq && usedbs && bstres <= resid) {
-				lambda = bstw;
-				stp2ii = TRUE_;
-			    }
-			    if (stp2ii) {
-/*                          improve error angle by second step */
-				L__1 = ! usedbs;
-				odr1v(&in, &c__1, &in, &lambda, &d__[ibegin]
-, &l[ibegin], &work[indld + ibegin - 
-					1], &work[indlld + ibegin - 1], 
-					pivmin, &gaptol, &z__[ibegin + windex 
-					* z_dim1], &L__1, &negcnt, &ztz, &
-					mingma, &iwork[iindr + windex], &
-					isuppz[(windex << 1) - 1], &nrminv, &
-					resid, &rqcorr, &work[indwrk]);
-			    }
-			    work[windex] = lambda;
-			}
-
-/*                    Compute FP-vector support w.r.t. whole matrix */
-
-			isuppz[(windex << 1) - 1] += oldien;
-			isuppz[windex * 2] += oldien;
-			zfrom = isuppz[(windex << 1) - 1];
-			zto = isuppz[windex * 2];
-			isupmn += oldien;
-			isupmx += oldien;
-/*                    Ensure vector is ok if support in the RQI has changed */
-			if (isupmn < zfrom) {
-			    i__4 = zfrom - 1;
-			    for (ii = isupmn; ii <= i__4; ++ii) {
-				z__[ii + windex * z_dim1] = 0.;
-/* L122: */
-			    }
-			}
-			if (isupmx > zto) {
-			    i__4 = isupmx;
-			    for (ii = zto + 1; ii <= i__4; ++ii) {
-				z__[ii + windex * z_dim1] = 0.;
-/* L123: */
-			    }
-			}
-			i__4 = zto - zfrom + 1;
-			odscal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], 
-                               &c__1);
-L125:
-/*                    Update W */
-			w[windex] = lambda + sigma;
-/*                    Recompute the gaps on the left and right */
-/*                    But only allow them to become larger and not */
-/*                    smaller (which can only happen through "bad" */
-/*                    cancellation and doesn't reflect the theory */
-/*                    where the initial gaps are underestimated due */
-/*                    to WERR being too crude.) */
-			if (! eskip) {
-			    if (k > 1) {
-/* Computing MAX */
-				d__1 = wgap[windmn], d__2 = w[windex] - werr[
-					windex] - w[windmn] - werr[windmn];
-				wgap[windmn] = fmax(d__1,d__2);
-			    }
-			    if (windex < wend) {
-/* Computing MAX */
-				d__1 = savgap, d__2 = w[windpl] - werr[windpl]
-					 - w[windex] - werr[windex];
-				wgap[windex] = fmax(d__1,d__2);
-			    }
-			}
-			++idone;
-		    }
-/*                 here ends the code for the current child */
-
-L139:
-/*                 Proceed to any remaining child nodes */
-		    newfst = j + 1;
-L140:
-		    ;
-		}
-/* L150: */
-	    }
-	    ++ndepth;
-	    goto L40;
-	}
-	ibegin = iend + 1;
-	wbegin = wend + 1;
-L170:
-	;
-    }
-
-    return 0;
-
-/*     End of ODRRV */
-
-} /* odrrv_ */
diff --git a/external/pmrrr/src/lapack/odruv.c b/external/pmrrr/src/lapack/odruv.c
deleted file mode 100644
index 170bf103a6..0000000000
--- a/external/pmrrr/src/lapack/odruv.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/* dlaruv.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define imax(a,b) ( (a) > (b) ? (a) : (b) )
-#define imin(a,b) ( (a) < (b) ? (a) : (b) )
-
-/* Subroutine */ 
-int odruv(int *iseed, int *n, double *x)
-{
-    /* Initialized data */
-
-    static int mm[512]	/* was [128][4] */ = { 494,2637,255,2008,1253,
-	    3344,4084,1739,3143,3468,688,1657,1238,3166,1292,3422,1270,2016,
-	    154,2862,697,1706,491,931,1444,444,3577,3944,2184,1661,3482,657,
-	    3023,3618,1267,1828,164,3798,3087,2400,2870,3876,1905,1593,1797,
-	    1234,3460,328,2861,1950,617,2070,3331,769,1558,2412,2800,189,287,
-	    2045,1227,2838,209,2770,3654,3993,192,2253,3491,2889,2857,2094,
-	    1818,688,1407,634,3231,815,3524,1914,516,164,303,2144,3480,119,
-	    3357,837,2826,2332,2089,3780,1700,3712,150,2000,3375,1621,3090,
-	    3765,1149,3146,33,3082,2741,359,3316,1749,185,2784,2202,2199,1364,
-	    1244,2020,3160,2785,2772,1217,1822,1245,2252,3904,2774,997,2573,
-	    1148,545,322,789,1440,752,2859,123,1848,643,2405,2638,2344,46,
-	    3814,913,3649,339,3808,822,2832,3078,3633,2970,637,2249,2081,4019,
-	    1478,242,481,2075,4058,622,3376,812,234,641,4005,1122,3135,2640,
-	    2302,40,1832,2247,2034,2637,1287,1691,496,1597,2394,2584,1843,336,
-	    1472,2407,433,2096,1761,2810,566,442,41,1238,1086,603,840,3168,
-	    1499,1084,3438,2408,1589,2391,288,26,512,1456,171,1677,2657,2270,
-	    2587,2961,1970,1817,676,1410,3723,2803,3185,184,663,499,3784,1631,
-	    1925,3912,1398,1349,1441,2224,2411,1907,3192,2786,382,37,759,2948,
-	    1862,3802,2423,2051,2295,1332,1832,2405,3638,3661,327,3660,716,
-	    1842,3987,1368,1848,2366,2508,3754,1766,3572,2893,307,1297,3966,
-	    758,2598,3406,2922,1038,2934,2091,2451,1580,1958,2055,1507,1078,
-	    3273,17,854,2916,3971,2889,3831,2621,1541,893,736,3992,787,2125,
-	    2364,2460,257,1574,3912,1216,3248,3401,2124,2762,149,2245,166,466,
-	    4018,1399,190,2879,153,2320,18,712,2159,2318,2091,3443,1510,449,
-	    1956,2201,3137,3399,1321,2271,3667,2703,629,2365,2431,1113,3922,
-	    2554,184,2099,3228,4012,1921,3452,3901,572,3309,3171,817,3039,
-	    1696,1256,3715,2077,3019,1497,1101,717,51,981,1978,1813,3881,76,
-	    3846,3694,1682,124,1660,3997,479,1141,886,3514,1301,3604,1888,
-	    1836,1990,2058,692,1194,20,3285,2046,2107,3508,3525,3801,2549,
-	    1145,2253,305,3301,1065,3133,2913,3285,1241,1197,3729,2501,1673,
-	    541,2753,949,2361,1165,4081,2725,3305,3069,3617,3733,409,2157,
-	    1361,3973,1865,2525,1409,3445,3577,77,3761,2149,1449,3005,225,85,
-	    3673,3117,3089,1349,2057,413,65,1845,697,3085,3441,1573,3689,2941,
-	    929,533,2841,4077,721,2821,2249,2397,2817,245,1913,1997,3121,997,
-	    1833,2877,1633,981,2009,941,2449,197,2441,285,1473,2741,3129,909,
-	    2801,421,4073,2813,2337,1429,1177,1901,81,1669,2633,2269,129,1141,
-	    249,3917,2481,3941,2217,2749,3041,1877,345,2861,1809,3141,2825,
-	    157,2881,3637,1465,2829,2161,3365,361,2685,3745,2325,3609,3821,
-	    3537,517,3017,2141,1537 };
-
-    /* System generated locals */
-    int i__1;
-
-    /* Local variables */
-    int i__, i1, i2, i3, i4, it1, it2, it3, it4;
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODRUV returns a vector of n random real numbers from a uniform (0,1) */
-/*  distribution (n <= 128). */
-
-/*  This is an auxiliary routine called by DLARNV and ZLARNV. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  ISEED   (input/output) INT array, dimension (4) */
-/*          On entry, the seed of the random number generator; the array */
-/*          elements must be between 0 and 4095, and ISEED(4) must be */
-/*          odd. */
-/*          On exit, the seed is updated. */
-
-/*  N       (input) INT */
-/*          The number of random numbers to be generated. N <= 128. */
-
-/*  X       (output) DOUBLE PRECISION array, dimension (N) */
-/*          The generated random numbers. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  This routine uses a multiplicative congruential method with modulus */
-/*  2**48 and multiplier 33952834046453 (see G.S.Fishman, */
-/*  'Multiplicative congruential random number generators with modulus */
-/*  2**b: an exhaustive analysis for b = 32 and a partial analysis for */
-/*  b = 48', Math. Comp. 189, pp 331-344, 1990). */
-
-/*  48-bit ints are stored in 4 int array elements with 12 bits */
-/*  per element. Hence the routine is portable across machines with */
-/*  ints of 32 bits or more. */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Local Arrays .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Data statements .. */
-    /* Parameter adjustments */
-    --iseed;
-    --x;
-
-    /* Function Body */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    i1 = iseed[1];
-    i2 = iseed[2];
-    i3 = iseed[3];
-    i4 = iseed[4];
-
-    i__1 = imin(*n,128);
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-L20:
-
-/*        Multiply the seed by i-th power of the multiplier modulo 2**48 */
-
-	it4 = i4 * mm[i__ + 383];
-	it3 = it4 / 4096;
-	it4 -= it3 << 12;
-	it3 = it3 + i3 * mm[i__ + 383] + i4 * mm[i__ + 255];
-	it2 = it3 / 4096;
-	it3 -= it2 << 12;
-	it2 = it2 + i2 * mm[i__ + 383] + i3 * mm[i__ + 255] + i4 * mm[i__ + 
-		127];
-	it1 = it2 / 4096;
-	it2 -= it1 << 12;
-	it1 = it1 + i1 * mm[i__ + 383] + i2 * mm[i__ + 255] + i3 * mm[i__ + 
-		127] + i4 * mm[i__ - 1];
-	it1 %= 4096;
-
-/*        Convert 48-bit int to a real number in the interval (0,1) */
-
-	x[i__] = ((double) it1 + ((double) it2 + ((double) it3 + (
-		double) it4 * 2.44140625e-4) * 2.44140625e-4) * 
-		2.44140625e-4) * 2.44140625e-4;
-
-	if (x[i__] == 1.) {
-/*           If a real number has n bits of precision, and the first */
-/*           n bits of the 48-bit int above happen to be all 1 (which */
-/*           will occur about once every 2**n calls), then X( I ) will */
-/*           be rounded to exactly 1.0. */
-/*           Since X( I ) is not supposed to return exactly 0.0 or 1.0, */
-/*           the statistically correct thing to do in this situation is */
-/*           simply to iterate again. */
-/*           N.B. the case X( I ) = 0.0 should not be possible. */
-	    i1 += 2;
-	    i2 += 2;
-	    i3 += 2;
-	    i4 += 2;
-	    goto L20;
-	}
-
-/* L10: */
-    }
-
-/*     Return final value of seed */
-
-    iseed[1] = it1;
-    iseed[2] = it2;
-    iseed[3] = it3;
-    iseed[4] = it4;
-    return 0;
-
-/*     End of ODRUV */
-
-} /* odruv_ */
diff --git a/external/pmrrr/src/lapack/odset.c b/external/pmrrr/src/lapack/odset.c
deleted file mode 100644
index c9d5e1003e..0000000000
--- a/external/pmrrr/src/lapack/odset.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/* dlaset.f -- translated by f2c (version 20061008).
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#define imin(a,b) ( (a) < (b) ? (a) : (b) )
-
-/* Subroutine */ int odset(char *uplo, int *m, int *n, double *alpha, 
-double *beta, double *a, int *lda)
-{
-    /* System generated locals */
-    int a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    int i__, j;
-    extern int olsame(char *, char *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSET initializes an m-by-n matrix A to BETA on the diagonal and */
-/*  ALPHA on the offdiagonals. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  UPLO    (input) CHARACTER*1 */
-/*          Specifies the part of the matrix A to be set. */
-/*          = 'U':      Upper triangular part is set; the strictly lower */
-/*                      triangular part of A is not changed. */
-/*          = 'L':      Lower triangular part is set; the strictly upper */
-/*                      triangular part of A is not changed. */
-/*          Otherwise:  All of the matrix A is set. */
-
-/*  M       (input) INT */
-/*          The number of rows of the matrix A.  M >= 0. */
-
-/*  N       (input) INT */
-/*          The number of columns of the matrix A.  N >= 0. */
-
-/*  ALPHA   (input) DOUBLE PRECISION */
-/*          The constant to which the offdiagonal elements are to be set. */
-
-/*  BETA    (input) DOUBLE PRECISION */
-/*          The constant to which the diagonal elements are to be set. */
-
-/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
-/*          On exit, the leading m-by-n submatrix of A is set as follows: */
-
-/*          if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */
-/*          if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */
-/*          otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */
-
-/*          and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */
-
-/*  LDA     (input) INT */
-/*          The leading dimension of the array A.  LDA >= max(1,M). */
-
-/* ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    if (olsame(uplo, "U")) {
-
-/*        Set the strictly upper triangular or trapezoidal part of the */
-/*        array to ALPHA. */
-
-	i__1 = *n;
-	for (j = 2; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = j - 1;
-	    i__2 = imin(i__3,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = *alpha;
-/* L10: */
-	    }
-/* L20: */
-	}
-
-    } else if (olsame(uplo, "L")) {
-
-/*        Set the strictly lower triangular or trapezoidal part of the */
-/*        array to ALPHA. */
-
-	i__1 = imin(*m,*n);
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j + 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = *alpha;
-/* L30: */
-	    }
-/* L40: */
-	}
-
-    } else {
-
-/*        Set the leading m-by-n submatrix to ALPHA. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = *alpha;
-/* L50: */
-	    }
-/* L60: */
-	}
-    }
-
-/*     Set the first imin(M,N) diagonal elements to BETA. */
-
-    i__1 = imin(*m,*n);
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	a[i__ + i__ * a_dim1] = *beta;
-/* L70: */
-    }
-
-    return 0;
-
-/*     End of ODSET */
-
-} /* odset_ */
diff --git a/external/pmrrr/src/lapack/odsnan.c b/external/pmrrr/src/lapack/odsnan.c
deleted file mode 100644
index 7def93e524..0000000000
--- a/external/pmrrr/src/lapack/odsnan.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* dlaisnan.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-int odsnan(double *din1, double *din2)
-{
-    /* System generated locals */
-    int ret_val;
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  This routine is not for general use.  It exists solely to avoid */
-/*  over-optimization in DISNAN. */
-
-/*  ODSNAN checks for NaNs by comparing its two arguments for */
-/*  inequality.  NaN is the only floating-point value where NaN != NaN */
-/*  returns .TRUE.  To check for NaNs, pass the same variable as both */
-/*  arguments. */
-
-/*  A compiler must assume that the two arguments are */
-/*  not the same variable, and the test will not be optimized away. */
-/*  Interprocedural or whole-program optimization may delete this */
-/*  test.  The ISNAN functions will be replaced by the correct */
-/*  Fortran 03 intrinsic once the intrinsic is widely available. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  DIN1     (input) DOUBLE PRECISION */
-/*  DIN2     (input) DOUBLE PRECISION */
-/*          Two numbers to compare for inequality. */
-
-/*  ===================================================================== */
-
-/*  .. Executable Statements .. */
-    ret_val = *din1 != *din2;
-    return ret_val;
-} /* odsnan_ */
-
diff --git a/external/pmrrr/src/lapack/odsq2.c b/external/pmrrr/src/lapack/odsq2.c
deleted file mode 100644
index 12c823074d..0000000000
--- a/external/pmrrr/src/lapack/odsq2.c
+++ /dev/null
@@ -1,595 +0,0 @@
-/* dlasq2.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Table of constant values */
-static int c__1 = 1;
-static int c__2 = 2;
-/*
-static int c__10 = 10;
-static int c__3 = 3;
-static int c__4 = 4;
-static int c__11 = 11;
-*/
-
-/* Subroutine */ 
-int odsq2(int *n, double *z__, int *info)
-{
-    /* System generated locals */
-    int i__1, i__2, i__3;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    double d__, e, g;
-    int k;
-    double s, t;
-    int i0, i4, n0;
-    double dn;
-    int pp;
-    double dn1, dn2, dee, eps, tau, tol;
-    int ipn4;
-    double tol2;
-    int ieee;
-    int nbig;
-    double dmin__, emin, emax;
-    int kmin, ndiv, iter;
-    double qmin, temp, qmax, zmax;
-    int splt;
-    double dmin1, dmin2;
-    int nfail;
-    double desig, trace, sigma;
-    int iinfo, ttype;
-    extern /* Subroutine */ int odsq3(int *, int *, double *, 
-	    int *, double *, double *, double *, double *, 
-	     int *, int *, int *, int *, int *, 
-	    double *, double *, double *, double *, 
-	    double *, double *, double *);
-    // extern double odmch(char *);
-    double deemin;
-    int iwhila, iwhilb;
-    double oldemn, safmin;
-    extern /* Subroutine */ int oerbla(char *, int *);
-    /* extern int oienv(int *, char *, char *, int *, int *,  */
-    /* 	    int *, int *); */
-    extern /* Subroutine */ int odsrt(char *, int *, double *, 
-	    int *);
-
-
-/*  -- LAPACK routine (version 3.2)                                    -- */
-
-/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
-/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
-/*  -- Berkeley                                                        -- */
-/*  -- November 2008                                                   -- */
-
-/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
-/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSQ2 computes all the eigenvalues of the symmetric positive */
-/*  definite tridiagonal matrix associated with the qd array Z to high */
-/*  relative accuracy are computed to high relative accuracy, in the */
-/*  absence of denormalization, underflow and overflow. */
-
-/*  To see the relation of Z to the tridiagonal matrix, let L be a */
-/*  unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */
-/*  let U be an upper bidiagonal matrix with 1's above and diagonal */
-/*  Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */
-/*  symmetric tridiagonal to which it is similar. */
-
-/*  Note : ODSQ2 defines a int variable, IEEE, which is true */
-/*  on machines which follow ieee-754 floating-point standard in their */
-/*  handling of infinities and NaNs, and false otherwise. This variable */
-/*  is passed to ODSQ3. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N     (input) INT */
-/*        The number of rows and columns in the matrix. N >= 0. */
-
-/*  Z     (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */
-/*        On entry Z holds the qd array. On exit, entries 1 to N hold */
-/*        the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */
-/*        trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */
-/*        N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */
-/*        holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */
-/*        shifts that failed. */
-
-/*  INFO  (output) INT */
-/*        = 0: successful exit */
-/*        < 0: if the i-th argument is a scalar and had an illegal */
-/*             value, then INFO = -i, if the i-th argument is an */
-/*             array and the j-entry had an illegal value, then */
-/*             INFO = -(i*100+j) */
-/*        > 0: the algorithm failed */
-/*              = 1, a split was marked by a positive value in E */
-/*              = 2, current block of Z not diagonalized after 30*N */
-/*                   iterations (in inner while loop) */
-/*              = 3, termination criterion of outer while loop not met */
-/*                   (program created more than N unreduced blocks) */
-
-/*  Further Details */
-/*  =============== */
-/*  Local Variables: I0:N0 defines a current unreduced segment of Z. */
-/*  The shifts are accumulated in SIGMA. Iteration count is in ITER. */
-/*  Ping-pong is controlled by PP (alternates between 0 and 1). */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input arguments. */
-/*     (in case ODSQ2 is not called by DLASQ1) */
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    *info = 0;
-    eps = DBL_EPSILON; // odmch("Precision");
-    safmin = DBL_MIN; // odmch("Safe minimum");
-    tol = eps * 100.;
-/* Computing 2nd power */
-    d__1 = tol;
-    tol2 = d__1 * d__1;
-
-    if (*n < 0) {
-	*info = -1;
-	oerbla("ODSQ2", &c__1);
-	return 0;
-    } else if (*n == 0) {
-	return 0;
-    } else if (*n == 1) {
-
-/*        1-by-1 case. */
-
-	if (z__[1] < 0.) {
-	    *info = -201;
-	    oerbla("ODSQ2", &c__2);
-	}
-	return 0;
-    } else if (*n == 2) {
-
-/*        2-by-2 case. */
-
-	if (z__[2] < 0. || z__[3] < 0.) {
-	    *info = -2;
-	    oerbla("ODSQ2", &c__2);
-	    return 0;
-	} else if (z__[3] > z__[1]) {
-	    d__ = z__[3];
-	    z__[3] = z__[1];
-	    z__[1] = d__;
-	}
-	z__[5] = z__[1] + z__[2] + z__[3];
-	if (z__[2] > z__[3] * tol2) {
-	    t = (z__[1] - z__[3] + z__[2]) * .5;
-	    s = z__[3] * (z__[2] / t);
-	    if (s <= t) {
-		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
-	    } else {
-		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
-	    }
-	    t = z__[1] + (s + z__[2]);
-	    z__[3] *= z__[1] / t;
-	    z__[1] = t;
-	}
-	z__[2] = z__[3];
-	z__[6] = z__[2] + z__[1];
-	return 0;
-    }
-
-/*     Check for negative data and compute sums of q's and e's. */
-
-    z__[*n * 2] = 0.;
-    emin = z__[2];
-    qmax = 0.;
-    zmax = 0.;
-    d__ = 0.;
-    e = 0.;
-
-    i__1 = *n - 1 << 1;
-    for (k = 1; k <= i__1; k += 2) {
-	if (z__[k] < 0.) {
-	    *info = -(k + 200);
-	    oerbla("ODSQ2", &c__2);
-	    return 0;
-	} else if (z__[k + 1] < 0.) {
-	    *info = -(k + 201);
-	    oerbla("ODSQ2", &c__2);
-	    return 0;
-	}
-	d__ += z__[k];
-	e += z__[k + 1];
-/* Computing MAX */
-	d__1 = qmax, d__2 = z__[k];
-	qmax = fmax(d__1,d__2);
-/* Computing MIN */
-	d__1 = emin, d__2 = z__[k + 1];
-	emin = fmin(d__1,d__2);
-/* Computing MAX */
-	d__1 = fmax(qmax,zmax), d__2 = z__[k + 1];
-	zmax = fmax(d__1,d__2);
-/* L10: */
-    }
-    if (z__[(*n << 1) - 1] < 0.) {
-	*info = -((*n << 1) + 199);
-	oerbla("ODSQ2", &c__2);
-	return 0;
-    }
-    d__ += z__[(*n << 1) - 1];
-/* Computing MAX */
-    d__1 = qmax, d__2 = z__[(*n << 1) - 1];
-    qmax = fmax(d__1,d__2);
-    zmax = fmax(qmax,zmax);
-
-/*     Check for diagonality. */
-
-    if (e == 0.) {
-	i__1 = *n;
-	for (k = 2; k <= i__1; ++k) {
-	    z__[k] = z__[(k << 1) - 1];
-/* L20: */
-	}
-	odsrt("D", n, &z__[1], &iinfo);
-	z__[(*n << 1) - 1] = d__;
-	return 0;
-    }
-
-    trace = d__ + e;
-
-/*     Check for zero data. */
-
-    if (trace == 0.) {
-	z__[(*n << 1) - 1] = 0.;
-	return 0;
-    }
-
-/*     Check whether the machine is IEEE conformable. */
-    /* ieee = oienv(&c__10, "ODSQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && oienv(&c__11, "ODSQ2", "N", &c__1, &c__2,  */
-    /* 	     &c__3, &c__4) == 1; */
-    ieee = 1;
-
-/*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
-
-    for (k = *n << 1; k >= 2; k += -2) {
-	z__[k * 2] = 0.;
-	z__[(k << 1) - 1] = z__[k];
-	z__[(k << 1) - 2] = 0.;
-	z__[(k << 1) - 3] = z__[k - 1];
-/* L30: */
-    }
-
-    i0 = 1;
-    n0 = *n;
-
-/*     Reverse the qd-array, if warranted. */
-
-    if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
-	ipn4 = i0 + n0 << 2;
-	i__1 = i0 + n0 - 1 << 1;
-	for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
-	    temp = z__[i4 - 3];
-	    z__[i4 - 3] = z__[ipn4 - i4 - 3];
-	    z__[ipn4 - i4 - 3] = temp;
-	    temp = z__[i4 - 1];
-	    z__[i4 - 1] = z__[ipn4 - i4 - 5];
-	    z__[ipn4 - i4 - 5] = temp;
-/* L40: */
-	}
-    }
-
-/*     Initial split checking via dqd and Li's test. */
-
-    pp = 0;
-
-    for (k = 1; k <= 2; ++k) {
-
-	d__ = z__[(n0 << 2) + pp - 3];
-	i__1 = (i0 << 2) + pp;
-	for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
-	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.;
-		d__ = z__[i4 - 3];
-	    } else {
-		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
-	    }
-/* L50: */
-	}
-
-/*        dqd maps Z to ZZ plus Li's test. */
-
-	emin = z__[(i0 << 2) + pp + 1];
-	d__ = z__[(i0 << 2) + pp - 3];
-	i__1 = (n0 - 1 << 2) + pp;
-	for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
-	    z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
-	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.;
-		z__[i4 - (pp << 1) - 2] = d__;
-		z__[i4 - (pp << 1)] = 0.;
-		d__ = z__[i4 + 1];
-	    } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && 
-		    safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
-		temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
-		z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
-		d__ *= temp;
-	    } else {
-		z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
-			pp << 1) - 2]);
-		d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
-	    }
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[i4 - (pp << 1)];
-	    emin = fmin(d__1,d__2);
-/* L60: */
-	}
-	z__[(n0 << 2) - pp - 2] = d__;
-
-/*        Now find qmax. */
-
-	qmax = z__[(i0 << 2) - pp - 2];
-	i__1 = (n0 << 2) - pp - 2;
-	for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
-/* Computing MAX */
-	    d__1 = qmax, d__2 = z__[i4];
-	    qmax = fmax(d__1,d__2);
-/* L70: */
-	}
-
-/*        Prepare for the next iteration on K. */
-
-	pp = 1 - pp;
-/* L80: */
-    }
-
-/*     Initialise variables to pass to ODSQ3. */
-
-    ttype = 0;
-    dmin1 = 0.;
-    dmin2 = 0.;
-    dn = 0.;
-    dn1 = 0.;
-    dn2 = 0.;
-    g = 0.;
-    tau = 0.;
-
-    iter = 2;
-    nfail = 0;
-    ndiv = n0 - i0 << 1;
-
-    i__1 = *n + 1;
-    for (iwhila = 1; iwhila <= i__1; ++iwhila) {
-	if (n0 < 1) {
-	    goto L170;
-	}
-
-/*        While array unfinished do */
-
-/*        E(N0) holds the value of SIGMA when submatrix in I0:N0 */
-/*        splits from the rest of the array, but is negated. */
-
-	desig = 0.;
-	if (n0 == *n) {
-	    sigma = 0.;
-	} else {
-	    sigma = -z__[(n0 << 2) - 1];
-	}
-	if (sigma < 0.) {
-	    *info = 1;
-	    return 0;
-	}
-
-/*        Find last unreduced submatrix's top index I0, find QMAX and */
-/*        EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
-
-	emax = 0.;
-	if (n0 > i0) {
-	    emin = (d__1 = z__[(n0 << 2) - 5], fabs(d__1));
-	} else {
-	    emin = 0.;
-	}
-	qmin = z__[(n0 << 2) - 3];
-	qmax = qmin;
-	for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
-	    if (z__[i4 - 5] <= 0.) {
-		goto L100;
-	    }
-	    if (qmin >= emax * 4.) {
-/* Computing MIN */
-		d__1 = qmin, d__2 = z__[i4 - 3];
-		qmin = fmin(d__1,d__2);
-/* Computing MAX */
-		d__1 = emax, d__2 = z__[i4 - 5];
-		emax = fmax(d__1,d__2);
-	    }
-/* Computing MAX */
-	    d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
-	    qmax = fmax(d__1,d__2);
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[i4 - 5];
-	    emin = fmin(d__1,d__2);
-/* L90: */
-	}
-	i4 = 4;
-
-L100:
-	i0 = i4 / 4;
-	pp = 0;
-
-	if (n0 - i0 > 1) {
-	    dee = z__[(i0 << 2) - 3];
-	    deemin = dee;
-	    kmin = i0;
-	    i__2 = (n0 << 2) - 3;
-	    for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
-		dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
-		if (dee <= deemin) {
-		    deemin = dee;
-		    kmin = (i4 + 3) / 4;
-		}
-/* L110: */
-	    }
-	    if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * 
-		    .5) {
-		ipn4 = i0 + n0 << 2;
-		pp = 2;
-		i__2 = i0 + n0 - 1 << 1;
-		for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
-		    temp = z__[i4 - 3];
-		    z__[i4 - 3] = z__[ipn4 - i4 - 3];
-		    z__[ipn4 - i4 - 3] = temp;
-		    temp = z__[i4 - 2];
-		    z__[i4 - 2] = z__[ipn4 - i4 - 2];
-		    z__[ipn4 - i4 - 2] = temp;
-		    temp = z__[i4 - 1];
-		    z__[i4 - 1] = z__[ipn4 - i4 - 5];
-		    z__[ipn4 - i4 - 5] = temp;
-		    temp = z__[i4];
-		    z__[i4] = z__[ipn4 - i4 - 4];
-		    z__[ipn4 - i4 - 4] = temp;
-/* L120: */
-		}
-	    }
-	}
-
-/*        Put -(initial shift) into DMIN. */
-
-/* Computing MAX */
-	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
-	dmin__ = -fmax(d__1,d__2);
-
-/*        Now I0:N0 is unreduced. */
-/*        PP = 0 for ping, PP = 1 for pong. */
-/*        PP = 2 indicates that flipping was applied to the Z array and */
-/*               and that the tests for deflation upon entry in ODSQ3 */
-/*               should not be performed. */
-
-	nbig = (n0 - i0 + 1) * 100;
-	i__2 = nbig;
-	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
-	    if (i0 > n0) {
-		goto L150;
-	    }
-
-/*           While submatrix unfinished take a good dqds step. */
-
-	    odsq3(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
-		    nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
-		    dn1, &dn2, &g, &tau);
-
-	    pp = 1 - pp;
-
-/*           When EMIN is very small check for splits. */
-
-	    if (pp == 0 && n0 - i0 >= 3) {
-		if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
-			 sigma) {
-		    splt = i0 - 1;
-		    qmax = z__[(i0 << 2) - 3];
-		    emin = z__[(i0 << 2) - 1];
-		    oldemn = z__[i0 * 4];
-		    i__3 = n0 - 3 << 2;
-		    for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
-			if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= 
-				tol2 * sigma) {
-			    z__[i4 - 1] = -sigma;
-			    splt = i4 / 4;
-			    qmax = 0.;
-			    emin = z__[i4 + 3];
-			    oldemn = z__[i4 + 4];
-			} else {
-/* Computing MAX */
-			    d__1 = qmax, d__2 = z__[i4 + 1];
-			    qmax = fmax(d__1,d__2);
-/* Computing MIN */
-			    d__1 = emin, d__2 = z__[i4 - 1];
-			    emin = fmin(d__1,d__2);
-/* Computing MIN */
-			    d__1 = oldemn, d__2 = z__[i4];
-			    oldemn = fmin(d__1,d__2);
-			}
-/* L130: */
-		    }
-		    z__[(n0 << 2) - 1] = emin;
-		    z__[n0 * 4] = oldemn;
-		    i0 = splt + 1;
-		}
-	    }
-
-/* L140: */
-	}
-
-	*info = 2;
-	return 0;
-
-/*        end IWHILB */
-
-L150:
-
-/* L160: */
-	;
-    }
-
-    *info = 3;
-    return 0;
-
-/*     end IWHILA */
-
-L170:
-
-/*     Move q's to the front. */
-    i__1 = *n;
-    for (k = 2; k <= i__1; ++k) {
-	z__[k] = z__[(k << 2) - 3];
-/* L180: */
-    }
-
-/*     Sort and compute sum of eigenvalues. */
-    odsrt("D", n, &z__[1], &iinfo);
-
-    e = 0.;
-    for (k = *n; k >= 1; --k) {
-	e += z__[k];
-/* L190: */
-    }
-
-/*     Store trace, sum(eigenvalues) and information on performance. */
-    z__[(*n << 1) + 1] = trace;
-    z__[(*n << 1) + 2] = e;
-    z__[(*n << 1) + 3] = (double) iter;
-/* Computing 2nd power */
-    i__1 = *n;
-    z__[(*n << 1) + 4] = (double) ndiv / (double) (i__1 * i__1);
-    z__[(*n << 1) + 5] = nfail * 100. / (double) iter;
-    return 0;
-
-/*     End of ODSQ2 */
-
-} /* odsq2_ */
diff --git a/external/pmrrr/src/lapack/odsq3.c b/external/pmrrr/src/lapack/odsq3.c
deleted file mode 100644
index 624522ddec..0000000000
--- a/external/pmrrr/src/lapack/odsq3.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/* dlasq3.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ int odsq3(int *i0, int *n0, double *z__, 
-	int *pp, double *dmin__, double *sigma, double *desig, 
-	 double *qmax, int *nfail, int *iter, int *ndiv, 
-	int *ieee, int *ttype, double *dmin1, double *dmin2, 
-	double *dn, double *dn1, double *dn2, double *g, 
-	double *tau)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    double s, t;
-    int j4, nn;
-    double eps, tol;
-    int n0in, ipn4;
-    double tol2, temp;
-    extern /* Subroutine */ int odsq4(int *, int *, double *, 
-	    int *, int *, double *, double *, double *, 
-	    double *, double *, double *, double *, int *, 
-	     double *), odsq5(int *, int *, double *, 
-	    int *, double *, double *, double *, double *, 
-	     double *, double *, double *, int *), odsq6(
-	    int *, int *, double *, int *, double *, 
-	    double *, double *, double *, double *, 
-	    double *);
-    // extern double odmch(char *);
-    extern int odnan(double *);
-
-
-/*  -- LAPACK routine (version 3.2)                                    -- */
-
-/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
-/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
-/*  -- Berkeley                                                        -- */
-/*  -- November 2008                                                   -- */
-
-/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
-/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
-/*  In case of failure it changes shifts, and tries again until output */
-/*  is positive. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  I0     (input) INT */
-/*         First index. */
-
-/*  N0     (input) INT */
-/*         Last index. */
-
-/*  Z      (input) DOUBLE PRECISION array, dimension ( 4*N ) */
-/*         Z holds the qd array. */
-
-/*  PP     (input/output) INT */
-/*         PP=0 for ping, PP=1 for pong. */
-/*         PP=2 indicates that flipping was applied to the Z array */
-/*         and that the initial tests for deflation should not be */
-/*         performed. */
-
-/*  DMIN   (output) DOUBLE PRECISION */
-/*         Minimum value of d. */
-
-/*  SIGMA  (output) DOUBLE PRECISION */
-/*         Sum of shifts used in current segment. */
-
-/*  DESIG  (input/output) DOUBLE PRECISION */
-/*         Lower order part of SIGMA */
-
-/*  QMAX   (input) DOUBLE PRECISION */
-/*         Maximum value of q. */
-
-/*  NFAIL  (output) INT */
-/*         Number of times shift was too big. */
-
-/*  ITER   (output) INT */
-/*         Number of iterations. */
-
-/*  NDIV   (output) INT */
-/*         Number of divisions. */
-
-/*  IEEE   (input) INT */
-/*         Flag for IEEE or non IEEE arithmetic (passed to ODSQ5). */
-
-/*  TTYPE  (input/output) INT */
-/*         Shift type. */
-
-/*  DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
-/*         These are passed as arguments in order to save their values */
-/*         between calls to ODSQ3. */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. External Function .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    n0in = *n0;
-    eps = DBL_EPSILON; // odmch("Precision");
-    tol = eps * 100.;
-/* Computing 2nd power */
-    d__1 = tol;
-    tol2 = d__1 * d__1;
-
-/*     Check for deflation. */
-
-L10:
-
-    if (*n0 < *i0) {
-	return 0;
-    }
-    if (*n0 == *i0) {
-	goto L20;
-    }
-    nn = (*n0 << 2) + *pp;
-    if (*n0 == *i0 + 1) {
-	goto L40;
-    }
-
-/*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
-
-    if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - 
-	    4] > tol2 * z__[nn - 7]) {
-	goto L30;
-    }
-
-L20:
-
-    z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
-    --(*n0);
-    goto L10;
-
-/*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
-
-L30:
-
-    if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
-	    nn - 11]) {
-	goto L50;
-    }
-
-L40:
-
-    if (z__[nn - 3] > z__[nn - 7]) {
-	s = z__[nn - 3];
-	z__[nn - 3] = z__[nn - 7];
-	z__[nn - 7] = s;
-    }
-    if (z__[nn - 5] > z__[nn - 3] * tol2) {
-	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
-	s = z__[nn - 3] * (z__[nn - 5] / t);
-	if (s <= t) {
-	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
-	} else {
-	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
-	}
-	t = z__[nn - 7] + (s + z__[nn - 5]);
-	z__[nn - 3] *= z__[nn - 7] / t;
-	z__[nn - 7] = t;
-    }
-    z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
-    z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
-    *n0 += -2;
-    goto L10;
-
-L50:
-    if (*pp == 2) {
-	*pp = 0;
-    }
-
-/*     Reverse the qd-array, if warranted. */
-
-    if (*dmin__ <= 0. || *n0 < n0in) {
-	if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
-	    ipn4 = *i0 + *n0 << 2;
-	    i__1 = *i0 + *n0 - 1 << 1;
-	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
-		temp = z__[j4 - 3];
-		z__[j4 - 3] = z__[ipn4 - j4 - 3];
-		z__[ipn4 - j4 - 3] = temp;
-		temp = z__[j4 - 2];
-		z__[j4 - 2] = z__[ipn4 - j4 - 2];
-		z__[ipn4 - j4 - 2] = temp;
-		temp = z__[j4 - 1];
-		z__[j4 - 1] = z__[ipn4 - j4 - 5];
-		z__[ipn4 - j4 - 5] = temp;
-		temp = z__[j4];
-		z__[j4] = z__[ipn4 - j4 - 4];
-		z__[ipn4 - j4 - 4] = temp;
-/* L60: */
-	    }
-	    if (*n0 - *i0 <= 4) {
-		z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
-		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
-	    }
-/* Computing MIN */
-	    d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
-	    *dmin2 = fmin(d__1,d__2);
-/* Computing MIN */
-	    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
-		    , d__1 = fmin(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
-	    z__[(*n0 << 2) + *pp - 1] = fmin(d__1,d__2);
-/* Computing MIN */
-	    d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
-		     fmin(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
-	    z__[(*n0 << 2) - *pp] = fmin(d__1,d__2);
-/* Computing MAX */
-	    d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = fmax(d__1,
-		    d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
-	    *qmax = fmax(d__1,d__2);
-	    *dmin__ = -0.;
-	}
-    }
-
-/*     Choose a shift. */
-
-    odsq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, 
-	    tau, ttype, g);
-
-/*     Call dqds until DMIN > 0. */
-
-L70:
-
-    odsq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, 
-	    ieee);
-
-    *ndiv += *n0 - *i0 + 2;
-    ++(*iter);
-
-/*     Check status. */
-
-    if (*dmin__ >= 0. && *dmin1 > 0.) {
-
-/*        Success. */
-
-	goto L90;
-
-    } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol 
-	    * (*sigma + *dn1) && fabs(*dn) < tol * *sigma) {
-
-/*        Convergence hidden by negative DN. */
-
-	z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
-	*dmin__ = 0.;
-	goto L90;
-    } else if (*dmin__ < 0.) {
-
-/*        TAU too big. Select new TAU and try again. */
-
-	++(*nfail);
-	if (*ttype < -22) {
-
-/*           Failed twice. Play it safe. */
-
-	    *tau = 0.;
-	} else if (*dmin1 > 0.) {
-
-/*           Late failure. Gives excellent shift. */
-
-	    *tau = (*tau + *dmin__) * (1. - eps * 2.);
-	    *ttype += -11;
-	} else {
-
-/*           Early failure. Divide by 4. */
-
-	    *tau *= .25;
-	    *ttype += -12;
-	}
-	goto L70;
-    } else if (odnan(dmin__)) {
-
-/*        NaN. */
-
-	if (*tau == 0.) {
-	    goto L80;
-	} else {
-	    *tau = 0.;
-	    goto L70;
-	}
-    } else {
-
-/*        Possible underflow. Play it safe. */
-
-	goto L80;
-    }
-
-/*     Risk of underflow. */
-
-L80:
-    odsq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
-    *ndiv += *n0 - *i0 + 2;
-    ++(*iter);
-    *tau = 0.;
-
-L90:
-    if (*tau < *sigma) {
-	*desig += *tau;
-	t = *sigma + *desig;
-	*desig -= t - *sigma;
-    } else {
-	t = *sigma + *tau;
-	*desig = *sigma - (t - *tau) + *desig;
-    }
-    *sigma = t;
-
-    return 0;
-
-/*     End of ODSQ3 */
-
-} /* odsq3_ */
diff --git a/external/pmrrr/src/lapack/odsq4.c b/external/pmrrr/src/lapack/odsq4.c
deleted file mode 100644
index 6fef7a1a16..0000000000
--- a/external/pmrrr/src/lapack/odsq4.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/* dlasq4.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odsq4(int *i0, int *n0, double *z__, 
-	    int *pp, int *n0in, double *dmin__, double *dmin1, 
-	    double *dmin2, double *dn, double *dn1, double *dn2, 
-	    double *tau, int *ttype, double *g)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    double s, a2, b1, b2;
-    int i4, nn, np;
-    double gam, gap1, gap2;
-
-
-/*  -- LAPACK routine (version 3.2)                                    -- */
-
-/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
-/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
-/*  -- Berkeley                                                        -- */
-/*  -- November 2008                                                   -- */
-
-/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
-/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSQ4 computes an approximation TAU to the smallest eigenvalue */
-/*  using values of d from the previous transform. */
-
-/*  I0    (input) INT */
-/*        First index. */
-
-/*  N0    (input) INT */
-/*        Last index. */
-
-/*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
-/*        Z holds the qd array. */
-
-/*  PP    (input) INT */
-/*        PP=0 for ping, PP=1 for pong. */
-
-/*  NOIN  (input) INT */
-/*        The value of N0 at start of EIGTEST. */
-
-/*  DMIN  (input) DOUBLE PRECISION */
-/*        Minimum value of d. */
-
-/*  DMIN1 (input) DOUBLE PRECISION */
-/*        Minimum value of d, excluding D( N0 ). */
-
-/*  DMIN2 (input) DOUBLE PRECISION */
-/*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
-
-/*  DN    (input) DOUBLE PRECISION */
-/*        d(N) */
-
-/*  DN1   (input) DOUBLE PRECISION */
-/*        d(N-1) */
-
-/*  DN2   (input) DOUBLE PRECISION */
-/*        d(N-2) */
-
-/*  TAU   (output) DOUBLE PRECISION */
-/*        This is the shift. */
-
-/*  TTYPE (output) INT */
-/*        Shift type. */
-
-/*  G     (input/output) REAL */
-/*        G is passed as an argument in order to save its value between */
-/*        calls to ODSQ4. */
-
-/*  Further Details */
-/*  =============== */
-/*  CNST1 = 9/16 */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     A negative DMIN forces the shift to take that absolute value */
-/*     TTYPE records the type of shift. */
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    if (*dmin__ <= 0.) {
-	*tau = -(*dmin__);
-	*ttype = -1;
-	return 0;
-    }
-
-    nn = (*n0 << 2) + *pp;
-    if (*n0in == *n0) {
-
-/*        No eigenvalues deflated. */
-
-	if (*dmin__ == *dn || *dmin__ == *dn1) {
-
-	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
-	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
-	    a2 = z__[nn - 7] + z__[nn - 5];
-
-/*           Cases 2 and 3. */
-
-	    if (*dmin__ == *dn && *dmin1 == *dn1) {
-		gap2 = *dmin2 - a2 - *dmin2 * .25;
-		if (gap2 > 0. && gap2 > b2) {
-		    gap1 = a2 - *dn - b2 / gap2 * b2;
-		} else {
-		    gap1 = a2 - *dn - (b1 + b2);
-		}
-		if (gap1 > 0. && gap1 > b1) {
-/* Computing MAX */
-		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
-		    s = fmax(d__1,d__2);
-		    *ttype = -2;
-		} else {
-		    s = 0.;
-		    if (*dn > b1) {
-			s = *dn - b1;
-		    }
-		    if (a2 > b1 + b2) {
-/* Computing MIN */
-			d__1 = s, d__2 = a2 - (b1 + b2);
-			s = fmin(d__1,d__2);
-		    }
-/* Computing MAX */
-		    d__1 = s, d__2 = *dmin__ * .333;
-		    s = fmax(d__1,d__2);
-		    *ttype = -3;
-		}
-	    } else {
-
-/*              Case 4. */
-
-		*ttype = -4;
-		s = *dmin__ * .25;
-		if (*dmin__ == *dn) {
-		    gam = *dn;
-		    a2 = 0.;
-		    if (z__[nn - 5] > z__[nn - 7]) {
-			return 0;
-		    }
-		    b2 = z__[nn - 5] / z__[nn - 7];
-		    np = nn - 9;
-		} else {
-		    np = nn - (*pp << 1);
-		    b2 = z__[np - 2];
-		    gam = *dn1;
-		    if (z__[np - 4] > z__[np - 2]) {
-			return 0;
-		    }
-		    a2 = z__[np - 4] / z__[np - 2];
-		    if (z__[nn - 9] > z__[nn - 11]) {
-			return 0;
-		    }
-		    b2 = z__[nn - 9] / z__[nn - 11];
-		    np = nn - 13;
-		}
-
-/*              Approximate contribution to norm squared from I < NN-1. */
-
-		a2 += b2;
-		i__1 = (*i0 << 2) - 1 + *pp;
-		for (i4 = np; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.) {
-			goto L20;
-		    }
-		    b1 = b2;
-		    if (z__[i4] > z__[i4 - 2]) {
-			return 0;
-		    }
-		    b2 *= z__[i4] / z__[i4 - 2];
-		    a2 += b2;
-		    if (fmax(b2,b1) * 100. < a2 || .563 < a2) {
-			goto L20;
-		    }
-/* L10: */
-		}
-L20:
-		a2 *= 1.05;
-
-/*              Rayleigh quotient residual bound. */
-
-		if (a2 < .563) {
-		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
-		}
-	    }
-	} else if (*dmin__ == *dn2) {
-
-/*           Case 5. */
-
-	    *ttype = -5;
-	    s = *dmin__ * .25;
-
-/*           Compute contribution to norm squared from I > NN-2. */
-
-	    np = nn - (*pp << 1);
-	    b1 = z__[np - 2];
-	    b2 = z__[np - 6];
-	    gam = *dn2;
-	    if (z__[np - 8] > b2 || z__[np - 4] > b1) {
-		return 0;
-	    }
-	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
-
-/*           Approximate contribution to norm squared from I < NN-2. */
-
-	    if (*n0 - *i0 > 2) {
-		b2 = z__[nn - 13] / z__[nn - 15];
-		a2 += b2;
-		i__1 = (*i0 << 2) - 1 + *pp;
-		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.) {
-			goto L40;
-		    }
-		    b1 = b2;
-		    if (z__[i4] > z__[i4 - 2]) {
-			return 0;
-		    }
-		    b2 *= z__[i4] / z__[i4 - 2];
-		    a2 += b2;
-		    if (fmax(b2,b1) * 100. < a2 || .563 < a2) {
-			goto L40;
-		    }
-/* L30: */
-		}
-L40:
-		a2 *= 1.05;
-	    }
-
-	    if (a2 < .563) {
-		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
-	    }
-	} else {
-
-/*           Case 6, no information to guide us. */
-
-	    if (*ttype == -6) {
-		*g += (1. - *g) * .333;
-	    } else if (*ttype == -18) {
-		*g = .083250000000000005;
-	    } else {
-		*g = .25;
-	    }
-	    s = *g * *dmin__;
-	    *ttype = -6;
-	}
-
-    } else if (*n0in == *n0 + 1) {
-
-/*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
-
-	if (*dmin1 == *dn1 && *dmin2 == *dn2) {
-
-/*           Cases 7 and 8. */
-
-	    *ttype = -7;
-	    s = *dmin1 * .333;
-	    if (z__[nn - 5] > z__[nn - 7]) {
-		return 0;
-	    }
-	    b1 = z__[nn - 5] / z__[nn - 7];
-	    b2 = b1;
-	    if (b2 == 0.) {
-		goto L60;
-	    }
-	    i__1 = (*i0 << 2) - 1 + *pp;
-	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
-		a2 = b1;
-		if (z__[i4] > z__[i4 - 2]) {
-		    return 0;
-		}
-		b1 *= z__[i4] / z__[i4 - 2];
-		b2 += b1;
-		if (fmax(b1,a2) * 100. < b2) {
-		    goto L60;
-		}
-/* L50: */
-	    }
-L60:
-	    b2 = sqrt(b2 * 1.05);
-/* Computing 2nd power */
-	    d__1 = b2;
-	    a2 = *dmin1 / (d__1 * d__1 + 1.);
-	    gap2 = *dmin2 * .5 - a2;
-	    if (gap2 > 0. && gap2 > b2 * a2) {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
-		s = fmax(d__1,d__2);
-	    } else {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
-		s = fmax(d__1,d__2);
-		*ttype = -8;
-	    }
-	} else {
-
-/*           Case 9. */
-
-	    s = *dmin1 * .25;
-	    if (*dmin1 == *dn1) {
-		s = *dmin1 * .5;
-	    }
-	    *ttype = -9;
-	}
-
-    } else if (*n0in == *n0 + 2) {
-
-/*        Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
-
-/*        Cases 10 and 11. */
-
-	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
-	    *ttype = -10;
-	    s = *dmin2 * .333;
-	    if (z__[nn - 5] > z__[nn - 7]) {
-		return 0;
-	    }
-	    b1 = z__[nn - 5] / z__[nn - 7];
-	    b2 = b1;
-	    if (b2 == 0.) {
-		goto L80;
-	    }
-	    i__1 = (*i0 << 2) - 1 + *pp;
-	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
-		if (z__[i4] > z__[i4 - 2]) {
-		    return 0;
-		}
-		b1 *= z__[i4] / z__[i4 - 2];
-		b2 += b1;
-		if (b1 * 100. < b2) {
-		    goto L80;
-		}
-/* L70: */
-	    }
-L80:
-	    b2 = sqrt(b2 * 1.05);
-/* Computing 2nd power */
-	    d__1 = b2;
-	    a2 = *dmin2 / (d__1 * d__1 + 1.);
-	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
-		    nn - 9]) - a2;
-	    if (gap2 > 0. && gap2 > b2 * a2) {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
-		s = fmax(d__1,d__2);
-	    } else {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
-		s = fmax(d__1,d__2);
-	    }
-	} else {
-	    s = *dmin2 * .25;
-	    *ttype = -11;
-	}
-    } else if (*n0in > *n0 + 2) {
-
-/*        Case 12, more than two eigenvalues deflated. No information. */
-
-	s = 0.;
-	*ttype = -12;
-    }
-
-    *tau = s;
-    return 0;
-
-/*     End of ODSQ4 */
-
-} /* odsq4_ */
diff --git a/external/pmrrr/src/lapack/odsq5.c b/external/pmrrr/src/lapack/odsq5.c
deleted file mode 100644
index 47dd5a9d9c..0000000000
--- a/external/pmrrr/src/lapack/odsq5.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/* dlasq5.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odsq5(int *i0, int *n0, double *z__, 
-	int *pp, double *tau, double *dmin__, double *dmin1, 
-	double *dmin2, double *dn, double *dnm1, double *dnm2, 
-	 int *ieee)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2;
-
-    /* Local variables */
-    double d__;
-    int j4, j4p2;
-    double emin, temp;
-
-
-/*  -- LAPACK routine (version 3.2)                                    -- */
-
-/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
-/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
-/*  -- Berkeley                                                        -- */
-/*  -- November 2008                                                   -- */
-
-/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
-/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSQ5 computes one dqds transform in ping-pong form, one */
-/*  version for IEEE machines another for non IEEE machines. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  I0    (input) INT */
-/*        First index. */
-
-/*  N0    (input) INT */
-/*        Last index. */
-
-/*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
-/*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
-/*        an extra argument. */
-
-/*  PP    (input) INT */
-/*        PP=0 for ping, PP=1 for pong. */
-
-/*  TAU   (input) DOUBLE PRECISION */
-/*        This is the shift. */
-
-/*  DMIN  (output) DOUBLE PRECISION */
-/*        Minimum value of d. */
-
-/*  DMIN1 (output) DOUBLE PRECISION */
-/*        Minimum value of d, excluding D( N0 ). */
-
-/*  DMIN2 (output) DOUBLE PRECISION */
-/*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
-
-/*  DN    (output) DOUBLE PRECISION */
-/*        d(N0), the last value of d. */
-
-/*  DNM1  (output) DOUBLE PRECISION */
-/*        d(N0-1). */
-
-/*  DNM2  (output) DOUBLE PRECISION */
-/*        d(N0-2). */
-
-/*  IEEE  (input) INT */
-/*        Flag for IEEE or non IEEE arithmetic. */
-
-/*  ===================================================================== */
-
-/*     .. Parameter .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    if (*n0 - *i0 - 1 <= 0) {
-	return 0;
-    }
-
-    j4 = (*i0 << 2) + *pp - 3;
-    emin = z__[j4 + 4];
-    d__ = z__[j4] - *tau;
-    *dmin__ = d__;
-    *dmin1 = -z__[j4];
-
-    if (*ieee) {
-
-/*        Code for IEEE arithmetic. */
-
-	if (*pp == 0) {
-	    i__1 = *n0 - 3 << 2;
-	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
-		z__[j4 - 2] = d__ + z__[j4 - 1];
-		temp = z__[j4 + 1] / z__[j4 - 2];
-		d__ = d__ * temp - *tau;
-		*dmin__ = fmin(*dmin__,d__);
-		z__[j4] = z__[j4 - 1] * temp;
-/* Computing MIN */
-		d__1 = z__[j4];
-		emin = fmin(d__1,emin);
-/* L10: */
-	    }
-	} else {
-	    i__1 = *n0 - 3 << 2;
-	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
-		z__[j4 - 3] = d__ + z__[j4];
-		temp = z__[j4 + 2] / z__[j4 - 3];
-		d__ = d__ * temp - *tau;
-		*dmin__ = fmin(*dmin__,d__);
-		z__[j4 - 1] = z__[j4] * temp;
-/* Computing MIN */
-		d__1 = z__[j4 - 1];
-		emin = fmin(d__1,emin);
-/* L20: */
-	    }
-	}
-
-/*        Unroll last two steps. */
-
-	*dnm2 = d__;
-	*dmin2 = *dmin__;
-	j4 = (*n0 - 2 << 2) - *pp;
-	j4p2 = j4 + (*pp << 1) - 1;
-	z__[j4 - 2] = *dnm2 + z__[j4p2];
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
-	*dmin__ = fmin(*dmin__,*dnm1);
-
-	*dmin1 = *dmin__;
-	j4 += 4;
-	j4p2 = j4 + (*pp << 1) - 1;
-	z__[j4 - 2] = *dnm1 + z__[j4p2];
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
-	*dmin__ = fmin(*dmin__,*dn);
-
-    } else {
-
-/*        Code for non IEEE arithmetic. */
-
-	if (*pp == 0) {
-	    i__1 = *n0 - 3 << 2;
-	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
-		z__[j4 - 2] = d__ + z__[j4 - 1];
-		if (d__ < 0.) {
-		    return 0;
-		} else {
-		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
-		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
-		}
-		*dmin__ = fmin(*dmin__,d__);
-/* Computing MIN */
-		d__1 = emin, d__2 = z__[j4];
-		emin = fmin(d__1,d__2);
-/* L30: */
-	    }
-	} else {
-	    i__1 = *n0 - 3 << 2;
-	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
-		z__[j4 - 3] = d__ + z__[j4];
-		if (d__ < 0.) {
-		    return 0;
-		} else {
-		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
-		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
-		}
-		*dmin__ = fmin(*dmin__,d__);
-/* Computing MIN */
-		d__1 = emin, d__2 = z__[j4 - 1];
-		emin = fmin(d__1,d__2);
-/* L40: */
-	    }
-	}
-
-/*        Unroll last two steps. */
-
-	*dnm2 = d__;
-	*dmin2 = *dmin__;
-	j4 = (*n0 - 2 << 2) - *pp;
-	j4p2 = j4 + (*pp << 1) - 1;
-	z__[j4 - 2] = *dnm2 + z__[j4p2];
-	if (*dnm2 < 0.) {
-	    return 0;
-	} else {
-	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
-	}
-	*dmin__ = fmin(*dmin__,*dnm1);
-
-	*dmin1 = *dmin__;
-	j4 += 4;
-	j4p2 = j4 + (*pp << 1) - 1;
-	z__[j4 - 2] = *dnm1 + z__[j4p2];
-	if (*dnm1 < 0.) {
-	    return 0;
-	} else {
-	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
-	}
-	*dmin__ = fmin(*dmin__,*dn);
-
-    }
-
-    z__[j4 + 2] = *dn;
-    z__[(*n0 << 2) - *pp] = emin;
-    return 0;
-
-/*     End of ODSQ5 */
-
-} /* odsq5_ */
diff --git a/external/pmrrr/src/lapack/odsq6.c b/external/pmrrr/src/lapack/odsq6.c
deleted file mode 100644
index f9822e86ee..0000000000
--- a/external/pmrrr/src/lapack/odsq6.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/* dlasq6.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odsq6(int *i0, int *n0, double *z__, 
-	int *pp, double *dmin__, double *dmin1, double *dmin2, 
-	 double *dn, double *dnm1, double *dnm2)
-{
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2;
-
-    /* Local variables */
-    double d__;
-    int j4, j4p2;
-    double emin, temp;
-    //    extern double odmch(char *);
-    double safmin;
-
-
-/*  -- LAPACK routine (version 3.2)                                    -- */
-
-/*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
-/*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
-/*  -- Berkeley                                                        -- */
-/*  -- November 2008                                                   -- */
-
-/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
-/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSQ6 computes one dqd (shift equal to zero) transform in */
-/*  ping-pong form, with protection against underflow and overflow. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  I0    (input) INT */
-/*        First index. */
-
-/*  N0    (input) INT */
-/*        Last index. */
-
-/*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
-/*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
-/*        an extra argument. */
-
-/*  PP    (input) INT */
-/*        PP=0 for ping, PP=1 for pong. */
-
-/*  DMIN  (output) DOUBLE PRECISION */
-/*        Minimum value of d. */
-
-/*  DMIN1 (output) DOUBLE PRECISION */
-/*        Minimum value of d, excluding D( N0 ). */
-
-/*  DMIN2 (output) DOUBLE PRECISION */
-/*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
-
-/*  DN    (output) DOUBLE PRECISION */
-/*        d(N0), the last value of d. */
-
-/*  DNM1  (output) DOUBLE PRECISION */
-/*        d(N0-1). */
-
-/*  DNM2  (output) DOUBLE PRECISION */
-/*        d(N0-2). */
-
-/*  ===================================================================== */
-
-/*     .. Parameter .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Function .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    if (*n0 - *i0 - 1 <= 0) {
-	return 0;
-    }
-
-    safmin = DBL_MIN; // odmch("Safe minimum");
-    j4 = (*i0 << 2) + *pp - 3;
-    emin = z__[j4 + 4];
-    d__ = z__[j4];
-    *dmin__ = d__;
-
-    if (*pp == 0) {
-	i__1 = (*n0-3) << 2;
-	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
-	    z__[j4 - 2] = d__ + z__[j4 - 1];
-	    if (z__[j4 - 2] == 0.) {
-		z__[j4] = 0.;
-		d__ = z__[j4 + 1];
-		*dmin__ = d__;
-		emin = 0.;
-	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 
-		    - 2] < z__[j4 + 1]) {
-		temp = z__[j4 + 1] / z__[j4 - 2];
-		z__[j4] = z__[j4 - 1] * temp;
-		d__ *= temp;
-	    } else {
-		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
-		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
-	    }
-	    *dmin__ = fmin(*dmin__,d__);
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[j4];
-	    emin = fmin(d__1,d__2);
-/* L10: */
-	}
-    } else {
-	i__1 = (*n0-3) << 2;
-	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
-	    z__[j4 - 3] = d__ + z__[j4];
-	    if (z__[j4 - 3] == 0.) {
-		z__[j4 - 1] = 0.;
-		d__ = z__[j4 + 2];
-		*dmin__ = d__;
-		emin = 0.;
-	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 
-		    - 3] < z__[j4 + 2]) {
-		temp = z__[j4 + 2] / z__[j4 - 3];
-		z__[j4 - 1] = z__[j4] * temp;
-		d__ *= temp;
-	    } else {
-		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
-		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
-	    }
-	    *dmin__ = fmin(*dmin__,d__);
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[j4 - 1];
-	    emin = fmin(d__1,d__2);
-/* L20: */
-	}
-    }
-
-/*     Unroll last two steps. */
-
-    *dnm2 = d__;
-    *dmin2 = *dmin__;
-    j4 = ((*n0-2) << 2) - *pp;
-    j4p2 = j4 + (*pp << 1) - 1;
-    z__[j4 - 2] = *dnm2 + z__[j4p2];
-    if (z__[j4 - 2] == 0.) {
-	z__[j4] = 0.;
-	*dnm1 = z__[j4p2 + 2];
-	*dmin__ = *dnm1;
-	emin = 0.;
-    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < 
-	    z__[j4p2 + 2]) {
-	temp = z__[j4p2 + 2] / z__[j4 - 2];
-	z__[j4] = z__[j4p2] * temp;
-	*dnm1 = *dnm2 * temp;
-    } else {
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
-    }
-    *dmin__ = fmin(*dmin__,*dnm1);
-
-    *dmin1 = *dmin__;
-    j4 += 4;
-    j4p2 = j4 + (*pp << 1) - 1;
-    z__[j4 - 2] = *dnm1 + z__[j4p2];
-    if (z__[j4 - 2] == 0.) {
-	z__[j4] = 0.;
-	*dn = z__[j4p2 + 2];
-	*dmin__ = *dn;
-	emin = 0.;
-    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < 
-	    z__[j4p2 + 2]) {
-	temp = z__[j4p2 + 2] / z__[j4 - 2];
-	z__[j4] = z__[j4p2] * temp;
-	*dn = *dnm1 * temp;
-    } else {
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
-    }
-    *dmin__ = fmin(*dmin__,*dn);
-
-    z__[j4 + 2] = *dn;
-    z__[(*n0 << 2) - *pp] = emin;
-    return 0;
-
-/*     End of ODSQ6 */
-
-} /* odsq6_ */
diff --git a/external/pmrrr/src/lapack/odsrt.c b/external/pmrrr/src/lapack/odsrt.c
deleted file mode 100644
index 03adb04dc8..0000000000
--- a/external/pmrrr/src/lapack/odsrt.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/* dlasrt.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odsrt(char *id, int *n, double *d__, int *info)
-{
-    /* System generated locals */
-    int i__1, i__2;
-
-    /* Local variables */
-    int i__, j;
-    double d1, d2, d3;
-    int dir;
-    double tmp;
-    int endd;
-    extern int olsame(char *, char *);
-    int stack[64]	/* was [2][32] */;
-    double dmnmx;
-    int start;
-    extern /* Subroutine */ int oerbla(char *, int *);
-    int stkpnt;
-
-
-/*  -- LAPACK routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  Sort the numbers in D in increasing order (if ID = 'I') or */
-/*  in decreasing order (if ID = 'D' ). */
-
-/*  Use Quick Sort, reverting to Insertion sort on arrays of */
-/*  size <= 20. Dimension of STACK limits N to about 2**32. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  ID      (input) CHARACTER*1 */
-/*          = 'I': sort D in increasing order; */
-/*          = 'D': sort D in decreasing order. */
-
-/*  N       (input) INT */
-/*          The length of the array D. */
-
-/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the array to be sorted. */
-/*          On exit, D has been sorted into increasing order */
-/*          (D(1) <= ... <= D(N) ) or into decreasing order */
-/*          (D(1) >= ... >= D(N) ), depending on ID. */
-
-/*  INFO    (output) INT */
-/*          = 0:  successful exit */
-/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Local Arrays .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input paramters. */
-
-    /* Parameter adjustments */
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-    dir = -1;
-    if (olsame(id, "D")) {
-	dir = 0;
-    } else if (olsame(id, "I")) {
-	dir = 1;
-    }
-    if (dir == -1) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	oerbla("ODSRT", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 1) {
-	return 0;
-    }
-
-    stkpnt = 1;
-    stack[0] = 1;
-    stack[1] = *n;
-L10:
-    start = stack[(stkpnt << 1) - 2];
-    endd = stack[(stkpnt << 1) - 1];
-    --stkpnt;
-    if (endd - start <= 20 && endd - start > 0) {
-
-/*        Do Insertion sort on D( START:ENDD ) */
-
-	if (dir == 0) {
-
-/*           Sort into decreasing order */
-
-	    i__1 = endd;
-	    for (i__ = start + 1; i__ <= i__1; ++i__) {
-		i__2 = start + 1;
-		for (j = i__; j >= i__2; --j) {
-		    if (d__[j] > d__[j - 1]) {
-			dmnmx = d__[j];
-			d__[j] = d__[j - 1];
-			d__[j - 1] = dmnmx;
-		    } else {
-			goto L30;
-		    }
-/* L20: */
-		}
-L30:
-		;
-	    }
-
-	} else {
-
-/*           Sort into increasing order */
-
-	    i__1 = endd;
-	    for (i__ = start + 1; i__ <= i__1; ++i__) {
-		i__2 = start + 1;
-		for (j = i__; j >= i__2; --j) {
-		    if (d__[j] < d__[j - 1]) {
-			dmnmx = d__[j];
-			d__[j] = d__[j - 1];
-			d__[j - 1] = dmnmx;
-		    } else {
-			goto L50;
-		    }
-/* L40: */
-		}
-L50:
-		;
-	    }
-
-	}
-
-    } else if (endd - start > 20) {
-
-/*        Partition D( START:ENDD ) and stack parts, largest one first */
-
-/*        Choose partition entry as median of 3 */
-
-	d1 = d__[start];
-	d2 = d__[endd];
-	i__ = (start + endd) / 2;
-	d3 = d__[i__];
-	if (d1 < d2) {
-	    if (d3 < d1) {
-		dmnmx = d1;
-	    } else if (d3 < d2) {
-		dmnmx = d3;
-	    } else {
-		dmnmx = d2;
-	    }
-	} else {
-	    if (d3 < d2) {
-		dmnmx = d2;
-	    } else if (d3 < d1) {
-		dmnmx = d3;
-	    } else {
-		dmnmx = d1;
-	    }
-	}
-
-	if (dir == 0) {
-
-/*           Sort into decreasing order */
-
-	    i__ = start - 1;
-	    j = endd + 1;
-L60:
-L70:
-	    --j;
-	    if (d__[j] < dmnmx) {
-		goto L70;
-	    }
-L80:
-	    ++i__;
-	    if (d__[i__] > dmnmx) {
-		goto L80;
-	    }
-	    if (i__ < j) {
-		tmp = d__[i__];
-		d__[i__] = d__[j];
-		d__[j] = tmp;
-		goto L60;
-	    }
-	    if (j - start > endd - j - 1) {
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = start;
-		stack[(stkpnt << 1) - 1] = j;
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = j + 1;
-		stack[(stkpnt << 1) - 1] = endd;
-	    } else {
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = j + 1;
-		stack[(stkpnt << 1) - 1] = endd;
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = start;
-		stack[(stkpnt << 1) - 1] = j;
-	    }
-	} else {
-
-/*           Sort into increasing order */
-
-	    i__ = start - 1;
-	    j = endd + 1;
-L90:
-L100:
-	    --j;
-	    if (d__[j] > dmnmx) {
-		goto L100;
-	    }
-L110:
-	    ++i__;
-	    if (d__[i__] < dmnmx) {
-		goto L110;
-	    }
-	    if (i__ < j) {
-		tmp = d__[i__];
-		d__[i__] = d__[j];
-		d__[j] = tmp;
-		goto L90;
-	    }
-	    if (j - start > endd - j - 1) {
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = start;
-		stack[(stkpnt << 1) - 1] = j;
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = j + 1;
-		stack[(stkpnt << 1) - 1] = endd;
-	    } else {
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = j + 1;
-		stack[(stkpnt << 1) - 1] = endd;
-		++stkpnt;
-		stack[(stkpnt << 1) - 2] = start;
-		stack[(stkpnt << 1) - 1] = j;
-	    }
-	}
-    }
-    if (stkpnt > 0) {
-	goto L10;
-    }
-    return 0;
-
-/*     End of ODSRT */
-
-} /* odsrt_ */
diff --git a/external/pmrrr/src/lapack/odssq.c b/external/pmrrr/src/lapack/odssq.c
deleted file mode 100644
index 5da0ce9a7c..0000000000
--- a/external/pmrrr/src/lapack/odssq.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/* dlassq.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int odssq(int *n, double *x, int *incx, double *scale, double *sumsq)
-{
-    /* System generated locals */
-    int i__1, i__2;
-    double d__1;
-
-    /* Local variables */
-    int ix;
-    double absxi;
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSSQ  returns the values  scl  and  smsq  such that */
-
-/*     ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */
-
-/*  where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is */
-/*  assumed to be non-negative and  scl  returns the value */
-
-/*     scl = max( scale, abs( x( i ) ) ). */
-
-/*  scale and sumsq must be supplied in SCALE and SUMSQ and */
-/*  scl and smsq are overwritten on SCALE and SUMSQ respectively. */
-
-/*  The routine makes only one pass through the vector x. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  N       (input) INT */
-/*          The number of elements to be used from the vector X. */
-
-/*  X       (input) DOUBLE PRECISION array, dimension (N) */
-/*          The vector for which a scaled sum of squares is computed. */
-/*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */
-
-/*  INCX    (input) INT */
-/*          The increment between successive values of the vector X. */
-/*          INCX > 0. */
-
-/*  SCALE   (input/output) DOUBLE PRECISION */
-/*          On entry, the value  scale  in the equation above. */
-/*          On exit, SCALE is overwritten with  scl , the scaling factor */
-/*          for the sum of squares. */
-
-/*  SUMSQ   (input/output) DOUBLE PRECISION */
-/*          On entry, the value  sumsq  in the equation above. */
-/*          On exit, SUMSQ is overwritten with  smsq , the basic sum of */
-/*          squares from which  scl  has been factored out. */
-
-/* ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n > 0) {
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    if (x[ix] != 0.) {
-		absxi = (d__1 = x[ix], fabs(d__1));
-		if (*scale < absxi) {
-/* Computing 2nd power */
-		    d__1 = *scale / absxi;
-		    *sumsq = *sumsq * (d__1 * d__1) + 1;
-		    *scale = absxi;
-		} else {
-/* Computing 2nd power */
-		    d__1 = absxi / *scale;
-		    *sumsq += d__1 * d__1;
-		}
-	    }
-/* L10: */
-	}
-    }
-    return 0;
-
-/*     End of ODSSQ */
-
-} /* odssq_ */
diff --git a/external/pmrrr/src/lapack/odstmr.c b/external/pmrrr/src/lapack/odstmr.c
deleted file mode 100644
index 1c159ef7ea..0000000000
--- a/external/pmrrr/src/lapack/odstmr.c
+++ /dev/null
@@ -1,726 +0,0 @@
-/* dstemr.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Table of constant values */
-static int c__1 = 1;
-static double c_b18 = .001;
-#define TRUE_ (1)
-#define FALSE_ (0)
-#define iabs(a) ( (a) > (0) ? (a) : (-a) )
-
-
-/* Subroutine */ 
-int odstmr(char *jobz, char *range, int *n, double *d__, 
-	double *e, double *vl, double *vu, int *il, 
-	int *iu, int *m, double *w, double *z__, int *ldz, 
-	 int *nzc, int *isuppz, int *tryrac, double *work, 
-	int *lwork, int *iwork, int *liwork, int *info)
-{
-    /* System generated locals */
-    int z_dim1, z_offset, i__1, i__2;
-    double d__1, d__2;
-
-    /* Builtin functions */
-    // double sqrt(double);
-
-    /* Local variables */
-    int i__, j;
-    double r1, r2;
-    int jj;
-    double cs;
-    int in;
-    double sn, wl, wu;
-    int iil, iiu;
-    double eps, tmp;
-    int indd, iend, jblk, wend;
-    double rmin, rmax;
-    int itmp;
-    double tnrm;
-    extern /* Subroutine */ int ode2(double *, double *, double 
-	    *, double *, double *);
-    int inde2, itmp2;
-    double rtol1, rtol2;
-    extern /* Subroutine */ int odscal(int *, double *, double *, 
-	    int *);
-    double scale;
-    int indgp;
-    extern int olsame(char *, char *);
-    int iinfo, iindw, ilast;
-    extern /* Subroutine */ int odcpy(int *, double *, int *, 
-	    double *, int *), odswap(int *, double *, int 
-	    *, double *, int *);
-    int lwmin;
-    int wantz;
-    extern /* Subroutine */ int odev2(double *, double *, 
-	    double *, double *, double *, double *, 
-	    double *);
-    // extern double odmch(char *);
-    int alleig;
-    int ibegin;
-    int indeig;
-    int iindbl;
-    int valeig;
-    extern /* Subroutine */ int odrrc(char *, int *, double *, 
-	    double *, double *, double *, double *, int *, 
-	     int *, int *, int *), odrre(char *, 
-	    int *, double *, double *, int *, int *, 
-	    double *, double *, double *, double *, 
-	    double *, double *, int *, int *, int *, 
-	    double *, double *, double *, int *, int *, 
-	    double *, double *, double *, int *, int *);
-    int wbegin;
-    double safmin;
-    extern /* Subroutine */ int odrrj(int *, double *, double *, 
-	     int *, int *, double *, int *, double *, 
-	    double *, double *, int *, double *, double *, 
-	     int *), oerbla(char *, int *);
-    double bignum;
-    int inderr, iindwk, indgrs, offset;
-    extern double odnst(char *, int *, double *, double *);
-    extern /* Subroutine */ int odrrr(int *, double *, double *, 
-	     int *), odrrv(int *, double *, double *, 
-	    double *, double *, double *, int *, int *, 
-	    int *, int *, double *, double *, double *, 
-	    double *, double *, double *, int *, int *, 
-	    double *, double *, int *, int *, double *, 
-	    int *, int *), odsrt(char *, int *, double *, 
-	    int *);
-    double thresh;
-    int iinspl, ifirst, indwrk, liwmin, nzcmin;
-    double pivmin;
-    int nsplit;
-    double smlnum;
-    int lquery, zquery;
-
-
-/*  -- LAPACK computational routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ODSTMR computes selected eigenvalues and, optionally, eigenvectors */
-/*  of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */
-/*  a well defined set of pairwise different real eigenvalues, the corresponding */
-/*  real eigenvectors are pairwise orthogonal. */
-
-/*  The spectrum may be computed either completely or partially by specifying */
-/*  either an interval (VL,VU] or a range of indices IL:IU for the desired */
-/*  eigenvalues. */
-
-/*  Depending on the number of desired eigenvalues, these are computed either */
-/*  by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are */
-/*  computed by the use of various suitable L D L^T factorizations near clusters */
-/*  of close eigenvalues (referred to as RRRs, Relatively Robust */
-/*  Representations). An informal sketch of the algorithm follows. */
-
-/*  For each unreduced block (submatrix) of T, */
-/*     (a) Compute T - sigma I  = L D L^T, so that L and D */
-/*         define all the wanted eigenvalues to high relative accuracy. */
-/*         This means that small relative changes in the entries of D and L */
-/*         cause only small relative changes in the eigenvalues and */
-/*         eigenvectors. The standard (unfactored) representation of the */
-/*         tridiagonal matrix T does not have this property in general. */
-/*     (b) Compute the eigenvalues to suitable accuracy. */
-/*         If the eigenvectors are desired, the algorithm attains full */
-/*         accuracy of the computed eigenvalues only right before */
-/*         the corresponding vectors have to be computed, see steps c) and d). */
-/*     (c) For each cluster of close eigenvalues, select a new */
-/*         shift close to the cluster, find a new factorization, and refine */
-/*         the shifted eigenvalues to suitable accuracy. */
-/*     (d) For each eigenvalue with a large enough relative separation compute */
-/*         the corresponding eigenvector by forming a rank revealing twisted */
-/*         factorization. Go back to (c) for any clusters that remain. */
-
-/*  For more details, see: */
-/*  - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */
-/*    to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */
-/*    Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */
-/*  - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */
-/*    Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */
-/*    2004.  Also LAPACK Working Note 154. */
-/*  - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */
-/*    tridiagonal eigenvalue/eigenvector problem", */
-/*    Computer Science Division Technical Report No. UCB/CSD-97-971, */
-/*    UC Berkeley, May 1997. */
-
-/*  Notes: */
-/*  1.ODSTMR works only on machines which follow IEEE-754 */
-/*  floating-point standard in their handling of infinities and NaNs. */
-/*  This permits the use of efficient inner loops avoiding a check for */
-/*  zero divisors. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  JOBZ    (input) CHARACTER*1 */
-/*          = 'N':  Compute eigenvalues only; */
-/*          = 'V':  Compute eigenvalues and eigenvectors. */
-
-/*  RANGE   (input) CHARACTER*1 */
-/*          = 'A': all eigenvalues will be found. */
-/*          = 'V': all eigenvalues in the half-open interval (VL,VU] */
-/*                 will be found. */
-/*          = 'I': the IL-th through IU-th eigenvalues will be found. */
-
-/*  N       (input) INT */
-/*          The order of the matrix.  N >= 0. */
-
-/*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the N diagonal elements of the tridiagonal matrix */
-/*          T. On exit, D is overwritten. */
-
-/*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
-/*          On entry, the (N-1) subdiagonal elements of the tridiagonal */
-/*          matrix T in elements 1 to N-1 of E. E(N) need not be set on */
-/*          input, but is used internally as workspace. */
-/*          On exit, E is overwritten. */
-
-/*  VL      (input) DOUBLE PRECISION */
-/*  VU      (input) DOUBLE PRECISION */
-/*          If RANGE='V', the lower and upper bounds of the interval to */
-/*          be searched for eigenvalues. VL < VU. */
-/*          Not referenced if RANGE = 'A' or 'I'. */
-
-/*  IL      (input) INT */
-/*  IU      (input) INT */
-/*          If RANGE='I', the indices (in ascending order) of the */
-/*          smallest and largest eigenvalues to be returned. */
-/*          1 <= IL <= IU <= N, if N > 0. */
-/*          Not referenced if RANGE = 'A' or 'V'. */
-
-/*  M       (output) INT */
-/*          The total number of eigenvalues found.  0 <= M <= N. */
-/*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
-
-/*  W       (output) DOUBLE PRECISION array, dimension (N) */
-/*          The first M elements contain the selected eigenvalues in */
-/*          ascending order. */
-
-/*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
-/*          If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */
-/*          contain the orthonormal eigenvectors of the matrix T */
-/*          corresponding to the selected eigenvalues, with the i-th */
-/*          column of Z holding the eigenvector associated with W(i). */
-/*          If JOBZ = 'N', then Z is not referenced. */
-/*          Note: the user must ensure that at least max(1,M) columns are */
-/*          supplied in the array Z; if RANGE = 'V', the exact value of M */
-/*          is not known in advance and can be computed with a workspace */
-/*          query by setting NZC = -1, see below. */
-
-/*  LDZ     (input) INT */
-/*          The leading dimension of the array Z.  LDZ >= 1, and if */
-/*          JOBZ = 'V', then LDZ >= max(1,N). */
-
-/*  NZC     (input) INT */
-/*          The number of eigenvectors to be held in the array Z. */
-/*          If RANGE = 'A', then NZC >= max(1,N). */
-/*          If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. */
-/*          If RANGE = 'I', then NZC >= IU-IL+1. */
-/*          If NZC = -1, then a workspace query is assumed; the */
-/*          routine calculates the number of columns of the array Z that */
-/*          are needed to hold the eigenvectors. */
-/*          This value is returned as the first entry of the Z array, and */
-/*          no error message related to NZC is issued by OERBLA. */
-
-/*  ISUPPZ  (output) INT ARRAY, dimension ( 2*max(1,M) ) */
-/*          The support of the eigenvectors in Z, i.e., the indices */
-/*          indicating the nonzero elements in Z. The i-th computed eigenvector */
-/*          is nonzero only in elements ISUPPZ( 2*i-1 ) through */
-/*          ISUPPZ( 2*i ). This is relevant in the case when the matrix */
-/*          is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */
-
-/*  TRYRAC  (input/output) INT */
-/*          If TRYRAC.EQ..TRUE., indicates that the code should check whether */
-/*          the tridiagonal matrix defines its eigenvalues to high relative */
-/*          accuracy.  If so, the code uses relative-accuracy preserving */
-/*          algorithms that might be (a bit) slower depending on the matrix. */
-/*          If the matrix does not define its eigenvalues to high relative */
-/*          accuracy, the code can uses possibly faster algorithms. */
-/*          If TRYRAC.EQ..FALSE., the code is not required to guarantee */
-/*          relatively accurate eigenvalues and can use the fastest possible */
-/*          techniques. */
-/*          On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix */
-/*          does not define its eigenvalues to high relative accuracy. */
-
-/*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */
-/*          On exit, if INFO = 0, WORK(1) returns the optimal */
-/*          (and minimal) LWORK. */
-
-/*  LWORK   (input) INT */
-/*          The dimension of the array WORK. LWORK >= max(1,18*N) */
-/*          if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */
-/*          If LWORK = -1, then a workspace query is assumed; the routine */
-/*          only calculates the optimal size of the WORK array, returns */
-/*          this value as the first entry of the WORK array, and no error */
-/*          message related to LWORK is issued by OERBLA. */
-
-/*  IWORK   (workspace/output) INT array, dimension (LIWORK) */
-/*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
-
-/*  LIWORK  (input) INT */
-/*          The dimension of the array IWORK.  LIWORK >= max(1,10*N) */
-/*          if the eigenvectors are desired, and LIWORK >= max(1,8*N) */
-/*          if only the eigenvalues are to be computed. */
-/*          If LIWORK = -1, then a workspace query is assumed; the */
-/*          routine only calculates the optimal size of the IWORK array, */
-/*          returns this value as the first entry of the IWORK array, and */
-/*          no error message related to LIWORK is issued by OERBLA. */
-
-/*  INFO    (output) INT */
-/*          On exit, INFO */
-/*          = 0:  successful exit */
-/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
-/*          > 0:  if INFO = 1X, internal error in ODRRE, */
-/*                if INFO = 2X, internal error in ODRRV. */
-/*                Here, the digit X = ABS( IINFO ) < 10, where IINFO is */
-/*                the nonzero error code returned by ODRRE or */
-/*                ODRRV, respectively. */
-
-
-/*  Further Details */
-/*  =============== */
-
-/*  Based on contributions by */
-/*     Beresford Parlett, University of California, Berkeley, USA */
-/*     Jim Demmel, University of California, Berkeley, USA */
-/*     Inderjit Dhillon, University of Texas, Austin, USA */
-/*     Osni Marques, LBNL/NERSC, USA */
-/*     Christof Voemel, University of California, Berkeley, USA */
-
-/*  ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    --w;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --isuppz;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    wantz = olsame(jobz, "V");
-    alleig = olsame(range, "A");
-    valeig = olsame(range, "V");
-    indeig = olsame(range, "I");
-
-    lquery = *lwork == -1 || *liwork == -1;
-    zquery = *nzc == -1;
-/*     ODSTMR needs WORK of size 6*N, IWORK of size 3*N. */
-/*     In addition, ODRRE needs WORK of size 6*N, IWORK of size 5*N. */
-/*     Furthermore, ODRRV needs WORK of size 12*N, IWORK of size 7*N. */
-    if (wantz) {
-	lwmin = *n * 18;
-	liwmin = *n * 10;
-    } else {
-/*        need less workspace if only the eigenvalues are wanted */
-	lwmin = *n * 12;
-	liwmin = *n << 3;
-    }
-    wl = 0.;
-    wu = 0.;
-    iil = 0;
-    iiu = 0;
-    if (valeig) {
-/*        We do not reference VL, VU in the cases RANGE = 'I','A' */
-/*        The interval (WL, WU] contains all the wanted eigenvalues. */
-/*        It is either given by the user or computed in ODRRE. */
-	wl = *vl;
-	wu = *vu;
-    } else if (indeig) {
-/*        We do not reference IL, IU in the cases RANGE = 'V','A' */
-	iil = *il;
-	iiu = *iu;
-    }
-
-    *info = 0;
-    if (! (wantz || olsame(jobz, "N"))) {
-	*info = -1;
-    } else if (! (alleig || valeig || indeig)) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (valeig && *n > 0 && wu <= wl) {
-	*info = -7;
-    } else if (indeig && (iil < 1 || iil > *n)) {
-	*info = -8;
-    } else if (indeig && (iiu < iil || iiu > *n)) {
-	*info = -9;
-    } else if (*ldz < 1 || wantz && *ldz < *n) {
-	*info = -13;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -17;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -19;
-    }
-
-/*     Get machine constants. */
-
-    safmin = DBL_MIN; // safmin = odmch("Safe minimum");
-    eps = DBL_EPSILON; // eps = odmch("Precision");
-    smlnum = safmin / eps;
-    bignum = 1. / smlnum;
-    rmin = sqrt(smlnum);
-/* Computing MIN */
-    d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin));
-    rmax = fmin(d__1,d__2);
-
-    if (*info == 0) {
-	work[1] = (double) lwmin;
-	iwork[1] = liwmin;
-
-	if (wantz && alleig) {
-	    nzcmin = *n;
-	} else if (wantz && valeig) {
-	    odrrc("T", n, vl, vu, &d__[1], &e[1], &safmin, &nzcmin, &itmp, &
-		    itmp2, info);
-	} else if (wantz && indeig) {
-	    nzcmin = iiu - iil + 1;
-	} else {
-/*           WANTZ .EQ. FALSE. */
-	    nzcmin = 0;
-	}
-	if (zquery && *info == 0) {
-	    z__[z_dim1 + 1] = (double) nzcmin;
-	} else if (*nzc < nzcmin && ! zquery) {
-	    *info = -14;
-	}
-    }
-    if (*info != 0) {
-
-	i__1 = -(*info);
-	oerbla("ODSTMR", &i__1);
-
-	return 0;
-    } else if (lquery || zquery) {
-	return 0;
-    }
-
-/*     Handle N = 0, 1, and 2 cases immediately */
-
-    *m = 0;
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (*n == 1) {
-	if (alleig || indeig) {
-	    *m = 1;
-	    w[1] = d__[1];
-	} else {
-	    if (wl < d__[1] && wu >= d__[1]) {
-		*m = 1;
-		w[1] = d__[1];
-	    }
-	}
-	if (wantz && ! zquery) {
-	    z__[z_dim1 + 1] = 1.;
-	    isuppz[1] = 1;
-	    isuppz[2] = 1;
-	}
-	return 0;
-    }
-
-    if (*n == 2) {
-	if (! wantz) {
-	    ode2(&d__[1], &e[1], &d__[2], &r1, &r2);
-	} else if (wantz && ! zquery) {
-	    odev2(&d__[1], &e[1], &d__[2], &r1, &r2, &cs, &sn);
-	}
-	if (alleig || valeig && r2 > wl && r2 <= wu || indeig && iil == 1) {
-	    ++(*m);
-	    w[*m] = r2;
-	    if (wantz && ! zquery) {
-		z__[*m * z_dim1 + 1] = -sn;
-		z__[*m * z_dim1 + 2] = cs;
-/*              Note: At most one of SN and CS can be zero. */
-		if (sn != 0.) {
-		    if (cs != 0.) {
-			isuppz[(*m << 1) - 1] = 1;
-			isuppz[(*m << 1) - 1] = 2;
-		    } else {
-			isuppz[(*m << 1) - 1] = 1;
-			isuppz[(*m << 1) - 1] = 1;
-		    }
-		} else {
-		    isuppz[(*m << 1) - 1] = 2;
-		    isuppz[*m * 2] = 2;
-		}
-	    }
-	}
-	if (alleig || valeig && r1 > wl && r1 <= wu || indeig && iiu == 2) {
-	    ++(*m);
-	    w[*m] = r1;
-	    if (wantz && ! zquery) {
-		z__[*m * z_dim1 + 1] = cs;
-		z__[*m * z_dim1 + 2] = sn;
-/*              Note: At most one of SN and CS can be zero. */
-		if (sn != 0.) {
-		    if (cs != 0.) {
-			isuppz[(*m << 1) - 1] = 1;
-			isuppz[(*m << 1) - 1] = 2;
-		    } else {
-			isuppz[(*m << 1) - 1] = 1;
-			isuppz[(*m << 1) - 1] = 1;
-		    }
-		} else {
-		    isuppz[(*m << 1) - 1] = 2;
-		    isuppz[*m * 2] = 2;
-		}
-	    }
-	}
-	return 0;
-    }
-/*     Continue with general N */
-    indgrs = 1;
-    inderr = (*n << 1) + 1;
-    indgp = *n * 3 + 1;
-    indd = (*n << 2) + 1;
-    inde2 = *n * 5 + 1;
-    indwrk = *n * 6 + 1;
-
-    iinspl = 1;
-    iindbl = *n + 1;
-    iindw = (*n << 1) + 1;
-    iindwk = *n * 3 + 1;
-
-/*     Scale matrix to allowable range, if necessary. */
-/*     The allowable range is related to the PIVMIN parameter; see the */
-/*     comments in DLARRD.  The preference for scaling small values */
-/*     up is heuristic; we expect users' matrices not to be close to the */
-/*     RMAX threshold. */
-
-    scale = 1.;
-    tnrm = odnst("M", n, &d__[1], &e[1]);
-    if (tnrm > 0. && tnrm < rmin) {
-	scale = rmin / tnrm;
-    } else if (tnrm > rmax) {
-	scale = rmax / tnrm;
-    }
-    if (scale != 1.) {
-	odscal(n, &scale, &d__[1], &c__1);
-	i__1 = *n - 1;
-	odscal(&i__1, &scale, &e[1], &c__1);
-	tnrm *= scale;
-	if (valeig) {
-/*           If eigenvalues in interval have to be found, */
-/*           scale (WL, WU] accordingly */
-	    wl *= scale;
-	    wu *= scale;
-	}
-    }
-
-/*     Compute the desired eigenvalues of the tridiagonal after splitting */
-/*     into smaller subblocks if the corresponding off-diagonal elements */
-/*     are small */
-/*     THRESH is the splitting parameter for ODRRE */
-/*     A negative THRESH forces the old splitting criterion based on the */
-/*     size of the off-diagonal. A positive THRESH switches to splitting */
-/*     which preserves relative accuracy. */
-
-    if (*tryrac) {
-/*        Test whether the matrix warrants the more expensive relative approach. */
-	odrrr(n, &d__[1], &e[1], &iinfo);
-    } else {
-/*        The user does not care about relative accurately eigenvalues */
-	iinfo = -1;
-    }
-/*     Set the splitting criterion */
-    if (iinfo == 0) {
-	thresh = eps;
-    } else {
-	thresh = -eps;
-/*        relative accuracy is desired but T does not guarantee it */
-	*tryrac = FALSE_;
-    }
-
-    if (*tryrac) {
-/*        Copy original diagonal, needed to guarantee relative accuracy */
-	odcpy(n, &d__[1], &c__1, &work[indd], &c__1);
-    }
-/*     Store the squares of the offdiagonal values of T */
-    i__1 = *n - 1;
-    for (j = 1; j <= i__1; ++j) {
-/* Computing 2nd power */
-	d__1 = e[j];
-	work[inde2 + j - 1] = d__1 * d__1;
-/* L5: */
-    }
-/*     Set the tolerance parameters for bisection */
-    if (! wantz) {
-/*        ODRRE computes the eigenvalues to full precision. */
-	rtol1 = eps * 4.;
-	rtol2 = eps * 4.;
-    } else {
-/*        ODRRE computes the eigenvalues to less than full precision. */
-/*        ODRRV will refine the eigenvalue approximations, and we can */
-/*        need less accurate initial bisection in ODRRE. */
-/*        Note: these settings do only affect the subset case and ODRRE */
-	rtol1 = sqrt(eps);
-/* Computing MAX */
-	d__1 = sqrt(eps) * .005, d__2 = eps * 4.;
-	rtol2 = fmax(d__1,d__2);
-    }
-    odrre(range, n, &wl, &wu, &iil, &iiu, &d__[1], &e[1], &work[inde2], &
-	    rtol1, &rtol2, &thresh, &nsplit, &iwork[iinspl], m, &w[1], &work[
-	    inderr], &work[indgp], &iwork[iindbl], &iwork[iindw], &work[
-	    indgrs], &pivmin, &work[indwrk], &iwork[iindwk], &iinfo);
-    if (iinfo != 0) {
-	*info = iabs(iinfo) + 10;
-	return 0;
-    }
-/*     Note that if RANGE .NE. 'V', ODRRE computes bounds on the desired */
-/*     part of the spectrum. All desired eigenvalues are contained in */
-/*     (WL,WU] */
-    if (wantz) {
-
-/*        Compute the desired eigenvectors corresponding to the computed */
-/*        eigenvalues */
-
-	odrrv(n, &wl, &wu, &d__[1], &e[1], &pivmin, &iwork[iinspl], m, &
-		c__1, m, &c_b18, &rtol1, &rtol2, &w[1], &work[inderr], &work[
-		indgp], &iwork[iindbl], &iwork[iindw], &work[indgrs], &z__[
-		z_offset], ldz, &isuppz[1], &work[indwrk], &iwork[iindwk], &
-		iinfo);
-	if (iinfo != 0) {
-	    *info = iabs(iinfo) + 20;
-	    return 0;
-	}
-    } else {
-/*        ODRRE computes eigenvalues of the (shifted) root representation */
-/*        ODRRV returns the eigenvalues of the unshifted matrix. */
-/*        However, if the eigenvectors are not desired by the user, we need */
-/*        to apply the corresponding shifts from ODRRE to obtain the */
-/*        eigenvalues of the original matrix. */
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    itmp = iwork[iindbl + j - 1];
-	    w[j] += e[iwork[iinspl + itmp - 1]];
-/* L20: */
-	}
-    }
-
-    if (*tryrac) {
-/*        Refine computed eigenvalues so that they are relatively accurate */
-/*        with respect to the original matrix T. */
-	ibegin = 1;
-	wbegin = 1;
-	i__1 = iwork[iindbl + *m - 1];
-	for (jblk = 1; jblk <= i__1; ++jblk) {
-	    iend = iwork[iinspl + jblk - 1];
-	    in = iend - ibegin + 1;
-	    wend = wbegin - 1;
-/*           check if any eigenvalues have to be refined in this block */
-L36:
-	    if (wend < *m) {
-		if (iwork[iindbl + wend] == jblk) {
-		    ++wend;
-		    goto L36;
-		}
-	    }
-	    if (wend < wbegin) {
-		ibegin = iend + 1;
-		goto L39;
-	    }
-	    offset = iwork[iindw + wbegin - 1] - 1;
-	    ifirst = iwork[iindw + wbegin - 1];
-	    ilast = iwork[iindw + wend - 1];
-	    rtol2 = eps * 4.;
-	    odrrj(&in, &work[indd + ibegin - 1], &work[inde2 + ibegin - 1], 
-		    &ifirst, &ilast, &rtol2, &offset, &w[wbegin], &work[
-		    inderr + wbegin - 1], &work[indwrk], &iwork[iindwk], &
-		    pivmin, &tnrm, &iinfo);
-	    ibegin = iend + 1;
-	    wbegin = wend + 1;
-L39:
-	    ;
-	}
-    }
-
-/*     If matrix was scaled, then rescale eigenvalues appropriately. */
-
-    if (scale != 1.) {
-	d__1 = 1. / scale;
-	odscal(m, &d__1, &w[1], &c__1);
-    }
-
-/*     If eigenvalues are not in increasing order, then sort them, */
-/*     possibly along with eigenvectors. */
-
-    if (nsplit > 1) {
-	if (! wantz) {
-	    odsrt("I", m, &w[1], &iinfo);
-	    if (iinfo != 0) {
-		*info = 3;
-		return 0;
-	    }
-	} else {
-	    i__1 = *m - 1;
-	    for (j = 1; j <= i__1; ++j) {
-		i__ = 0;
-		tmp = w[j];
-		i__2 = *m;
-		for (jj = j + 1; jj <= i__2; ++jj) {
-		    if (w[jj] < tmp) {
-			i__ = jj;
-			tmp = w[jj];
-		    }
-/* L50: */
-		}
-		if (i__ != 0) {
-		    w[i__] = w[j];
-		    w[j] = tmp;
-		    if (wantz) {
-			odswap(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * 
-				z_dim1 + 1], &c__1);
-			itmp = isuppz[(i__ << 1) - 1];
-			isuppz[(i__ << 1) - 1] = isuppz[(j << 1) - 1];
-			isuppz[(j << 1) - 1] = itmp;
-			itmp = isuppz[i__ * 2];
-			isuppz[i__ * 2] = isuppz[j * 2];
-			isuppz[j * 2] = itmp;
-		    }
-		}
-/* L60: */
-	    }
-	}
-    }
-
-
-    work[1] = (double) lwmin;
-    iwork[1] = liwmin;
-    return 0;
-
-/*     End of ODSTMR */
-
-} /* odstmr_ */
diff --git a/external/pmrrr/src/lapack/oerbla.c b/external/pmrrr/src/lapack/oerbla.c
deleted file mode 100644
index 2615e37458..0000000000
--- a/external/pmrrr/src/lapack/oerbla.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* xerbla.f -- translated by f2c (version 20061008) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-/* Subroutine */ 
-int oerbla(char *srname, int *info)
-{
-    
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  OERBLA  is an error handler for the LAPACK routines. */
-/*  It is called by an LAPACK routine if an input parameter has an */
-/*  invalid value.  A message is printed and execution stops. */
-
-/*  Installers may consider modifying the STOP statement in order to */
-/*  call system-specific exception-handling facilities. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  SRNAME  (input) CHARACTER*(*) */
-/*          The name of the routine which called OERBLA. */
-
-/*  INFO    (input) INT */
-/*          The position of the invalid parameter in the parameter list */
-/*          of the calling routine. */
-
-/* ===================================================================== */
-
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    printf("** On entry to %6s, parameter number %2i had an illegal value\n",
-		srname, *info);
-
-
-/*     End of OERBLA */
-
-    return 0;
-} /* oerbla_ */
diff --git a/external/pmrrr/src/lapack/oerbla.cpp b/external/pmrrr/src/lapack/oerbla.cpp
new file mode 100644
index 0000000000..00ea20e504
--- /dev/null
+++ b/external/pmrrr/src/lapack/oerbla.cpp
@@ -0,0 +1,66 @@
+/**
+	C++ template version of LAPACK routine xerbla.
+	Based on C code translated by f2c (version 20061008).
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	/* Subroutine */ 
+	int oerbla(const char *srname, int *info)
+	{
+		
+		/* Table of constant values */
+		static int c__1 = 1;
+
+		/*  -- LAPACK auxiliary routine (version 3.2) -- */
+		/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+		/*     November 2006 */
+
+		/*     .. Scalar Arguments .. */
+		/*     .. */
+
+		/*  Purpose */
+		/*  ======= */
+
+		/*  OERBLA  is an error handler for the LAPACK routines. */
+		/*  It is called by an LAPACK routine if an input parameter has an */
+		/*  invalid value.  A message is printed and execution stops. */
+
+		/*  Installers may consider modifying the STOP statement in order to */
+		/*  call system-specific exception-handling facilities. */
+
+		/*  Arguments */
+		/*  ========= */
+
+		/*  SRNAME  (input) CHARACTER*(*) */
+		/*          The name of the routine which called OERBLA. */
+
+		/*  INFO    (input) INT */
+		/*          The position of the invalid parameter in the parameter list */
+		/*          of the calling routine. */
+
+		/* ===================================================================== */
+
+		/*     .. Intrinsic Functions .. */
+		/*     .. */
+		/*     .. Executable Statements .. */
+
+			printf("** On entry to %6s, parameter number %2i had an illegal value\n",
+				srname, *info);
+
+
+		/*     End of OERBLA */
+
+		return 0;
+	} /* oerbla_ */
+
+}
+
+}
diff --git a/external/pmrrr/src/lapack/olsame.c b/external/pmrrr/src/lapack/olsame.c
deleted file mode 100644
index e332449f83..0000000000
--- a/external/pmrrr/src/lapack/olsame.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*  -- translated by f2c (version 19940927) */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-int olsame(char *ca, char *cb)
-{
-
-
-    /* System generated locals */
-    int ret_val;
-
-    /* Local variables */
-    static int inta, intb, zcode;
-
-
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       January 31, 1994   
-
-
-    Purpose   
-    =======   
-
-    OLSAME returns .TRUE. if CA is the same letter as CB regardless of   
-    case.   
-
-    Arguments   
-    =========   
-
-    CA      (input) CHARACTER*1   
-    CB      (input) CHARACTER*1   
-            CA and CB specify the single characters to be compared.   
-
-   ===================================================================== 
-  
-
-
-       Test if the characters are equal */
-
-    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
-    if (ret_val) {
-	return ret_val;
-    }
-
-/*     Now test for equivalence if both characters are alphabetic. */
-
-    zcode = 'Z';
-
-/*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime   
-       machines, on which ICHAR returns a value with bit 8 set.   
-       ICHAR('A') on Prime machines returns 193 which is the same as   
-       ICHAR('A') on an EBCDIC machine. */
-
-    inta = *(unsigned char *)ca;
-    intb = *(unsigned char *)cb;
-
-    if (zcode == 90 || zcode == 122) {
-
-/*        ASCII is assumed - ZCODE is the ASCII code of either lower o
-r   
-          upper case 'Z'. */
-
-	if (inta >= 97 && inta <= 122) {
-	    inta += -32;
-	}
-	if (intb >= 97 && intb <= 122) {
-	    intb += -32;
-	}
-
-    } else if (zcode == 233 || zcode == 169) {
-
-/*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower
- or   
-          upper case 'Z'. */
-
-	if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta 
-		>= 162 && inta <= 169) {
-	    inta += 64;
-	}
-	if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb 
-		>= 162 && intb <= 169) {
-	    intb += 64;
-	}
-
-    } else if (zcode == 218 || zcode == 250) {
-
-/*        ASCII is assumed, on Prime machines - ZCODE is the ASCII cod
-e   
-          plus 128 of either lower or upper case 'Z'. */
-
-	if (inta >= 225 && inta <= 250) {
-	    inta += -32;
-	}
-	if (intb >= 225 && intb <= 250) {
-	    intb += -32;
-	}
-    }
-    ret_val = inta == intb;
-
-/*     RETURN   
-
-       End of OLSAME */
-
-    return ret_val;
-} /* olsame_ */
-
diff --git a/external/pmrrr/src/lapack/olsame.cpp b/external/pmrrr/src/lapack/olsame.cpp
new file mode 100644
index 0000000000..fa93ed5011
--- /dev/null
+++ b/external/pmrrr/src/lapack/olsame.cpp
@@ -0,0 +1,118 @@
+/*  -- translated by f2c (version 19940927) */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+namespace pmrrr { namespace lapack {
+
+	int olsame(const char *ca, const char *cb)
+	{
+
+
+		/* System generated locals */
+		int ret_val;
+
+		/* Local variables */
+		static int inta, intb, zcode;
+
+
+	/*  -- LAPACK auxiliary routine (version 2.0) --   
+		   Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+		   Courant Institute, Argonne National Lab, and Rice University   
+		   January 31, 1994   
+
+
+		Purpose   
+		=======   
+
+		OLSAME returns .TRUE. if CA is the same letter as CB regardless of   
+		case.   
+
+		Arguments   
+		=========   
+
+		CA      (input) CHARACTER*1   
+		CB      (input) CHARACTER*1   
+		        CA and CB specify the single characters to be compared.   
+
+	   ===================================================================== 
+	  
+
+
+		   Test if the characters are equal */
+
+		ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+		if (ret_val) {
+		return ret_val;
+		}
+
+	/*     Now test for equivalence if both characters are alphabetic. */
+
+		zcode = 'Z';
+
+	/*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime   
+		   machines, on which ICHAR returns a value with bit 8 set.   
+		   ICHAR('A') on Prime machines returns 193 which is the same as   
+		   ICHAR('A') on an EBCDIC machine. */
+
+		inta = *(unsigned char *)ca;
+		intb = *(unsigned char *)cb;
+
+		if (zcode == 90 || zcode == 122) {
+
+	/*        ASCII is assumed - ZCODE is the ASCII code of either lower o
+	r   
+		      upper case 'Z'. */
+
+		if (inta >= 97 && inta <= 122) {
+			inta += -32;
+		}
+		if (intb >= 97 && intb <= 122) {
+			intb += -32;
+		}
+
+		} else if (zcode == 233 || zcode == 169) {
+
+	/*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower
+	 or   
+		      upper case 'Z'. */
+
+		if ((inta >= 129 && inta <= 137 ) || (inta >= 145 && inta <= 153) || (inta 
+			>= 162 && inta <= 169)) {
+			inta += 64;
+		}
+		if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb 
+			>= 162 && intb <= 169)) {
+			intb += 64;
+		}
+
+		} else if (zcode == 218 || zcode == 250) {
+
+	/*        ASCII is assumed, on Prime machines - ZCODE is the ASCII cod
+	e   
+		      plus 128 of either lower or upper case 'Z'. */
+
+		if (inta >= 225 && inta <= 250) {
+			inta += -32;
+		}
+		if (intb >= 225 && intb <= 250) {
+			intb += -32;
+		}
+		}
+		ret_val = inta == intb;
+
+	/*     RETURN   
+
+		   End of OLSAME */
+
+		return ret_val;
+	} /* olsame_ */
+
+}
+
+}
+
diff --git a/external/pmrrr/src/plarre.c b/external/pmrrr/src/plarre.c
deleted file mode 100644
index 42651ace4c..0000000000
--- a/external/pmrrr/src/plarre.c
+++ /dev/null
@@ -1,1203 +0,0 @@
-/* Parallel computation of eigenvalues and symmetric tridiagonal 
- * matrix T, given by its diagonal elements D and its super-/sub-
- * diagonal elements E.
- *
- * Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.7
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include "pmrrr.h" 
-#include "pmrrr/plarre.h"
-#include "pmrrr/structs.h" 
-
-#define ONE                1.0
-#define HUNDRED          100.0
-#define HALF               0.5
-#define FOURTH             0.25
-
-
-static void *eigval_subset_thread_a(void *argin);
-static void *eigval_subset_thread_r(void *argin);
-static void clean_up_plarre(double*, double*, int*, int*, int*);
-
-
-static
-int eigval_approx_proc(proc_t *procinfo, int ifirst, int ilast, 
-			   int n, double *D, double *E, double *E2,  
-			   int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
-			   double *W, double *Werr, double *Wgap, double *work,
-			   int *iwork);
-
-static
-int eigval_root_proc(proc_t *procinfo, int ifirst, int ilast, 
-			   int n, double *D, double *E, double *E2,  
-			   int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
-			   double *W, double *Werr, double *Wgap, double *work,
-			 int *iwork);
-
-static
-int eigval_refine_proc(proc_t *procinfo, int ifirst, int ilast, 
-			   int n, double *D, double *E, double *E2,  
-			   int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
-			   double *W, double *Werr, double *Wgap, double *work,
-			   int *iwork);
-
-static 
-auxarg1_t *create_auxarg1(int, double*, double*, double*, int, int, 
-			  int, int, int, int*, double,  double,
-			  double*, double*, double*, int*, int*);
-static 
-void retrieve_auxarg1(auxarg1_t*, int*, double**, double**, double**,
-		      int*, int*, int*, int*, int*, int**, double*, 
-		      double*, double**, double**, double**, int**, 
-		      int**);
-static
-auxarg2_t *create_auxarg2(int, double*, double*, int, int, double*,
-			      double*,double*,int*,double, double, double, double);
-static
-void retrieve_auxarg2(auxarg2_t*, int*, double**, double**, int*,
-			  int*, double**, double**, double**, int**, double*, double*, double*,
-		      double*);
-
-static int cmp(const void*, const void*);
-
-
-
-
-/* Routine to compute eigenvalues */
-int plarre(proc_t *procinfo, char *jobz, char *range, in_t *Dstruct, 
-	       val_t *Wstruct, tol_t *tolstruct, int *nzp, int *offsetp)
-{
-  /* input variables */
-  int              nproc  = procinfo->nproc;
-  bool             wantZ  = (jobz[0]  == 'V' || jobz[0]  == 'v');
-  bool             cntval = (jobz[0]  == 'C' || jobz[0]  == 'c');
-  int              n      = Dstruct->n;
-  double *restrict D      = Dstruct->D;
-  double *restrict E      = Dstruct->E;
-  int    *restrict isplit = Dstruct->isplit;
-  double           *vl    = Wstruct->vl;
-  double           *vu    = Wstruct->vu;
-  int              *il    = Wstruct->il;
-  int              *iu    = Wstruct->iu;
-  double *restrict W      = Wstruct->W;
-  double *restrict Werr   = Wstruct->Werr;
-  double *restrict Wgap   = Wstruct->Wgap;
-  int    *restrict Windex = Wstruct->Windex;
-  int    *restrict iblock = Wstruct->iblock;
-  double *restrict gersch = Wstruct->gersch;
-
-  /* constants */
-  int             IZERO = 0,   IONE = 1;
-  double          DZERO = 0.0;
-
-  /* work space */
-  double          *E2;
-  double         *work;
-  int             *iwork;
-
-  /* compute geschgorin disks and spectral diameter */
-  double          gl, gu, eold, emax, eabs;
-
-  /* compute splitting points */
-  int             bl_begin, bl_end, bl_size;
-
-  /* distribute work among processes */
-  int             ifirst, ilast, ifirst_tmp, ilast_tmp;
-  int             chunk, isize, iil, iiu;
-
-  /* gather results */
-  int             *rcount, *rdispl;
-
-  /* others */
-  int             info, i, j, jbl, idummy;
-  double          tmp1, dummy;
-  bool             sorted;
-  enum range_enum {allrng=1, valrng=2, indrng=3} irange;
-  double          intervals[2];
-  int             negcounts[2];
-  double          sigma;
-
-  if (range[0] == 'A' || range[0] == 'a') {
-    irange = allrng;
-  } else if (range[0] == 'V' || range[0] == 'v') {
-    irange = valrng;
-  } else if (range[0] == 'I' || range[0] == 'i') {
-    irange = indrng;
-  } else {
-    return 1;
-  }
-
-  /* allocate work space */
-  E2     = (double *) malloc(     n * sizeof(double) );
-  assert(E2 != NULL);
-  work   = (double *) malloc(   4*n * sizeof(double) );
-  assert(work != NULL);
-  iwork  = (int *)    malloc(   3*n * sizeof(int) );
-  assert(iwork != NULL);
-  rcount = (int *)    malloc( nproc * sizeof(int) );
-  assert(rcount != NULL);
-  rdispl = (int *)    malloc( nproc * sizeof(int) );
-  assert(rdispl != NULL);
-
-  /* Compute square of off-diagonal elements */
-  for (i=0; i<n-1; i++) {
-    E2[i] = E[i]*E[i];
-  }
-
-  /* compute geschgorin disks and spectral diameter */
-  gl     = D[0];
-  gu     = D[0];
-  eold   =  0.0;
-  emax   =  0.0;
-  E[n-1] =  0.0;
-
-  for (i=0; i<n; i++) {
-    eabs = fabs(E[i]);
-    if (eabs >= emax) emax = eabs;
-    tmp1 = eabs + eold;
-    gersch[2*i] = D[i] - tmp1;
-    gl = fmin(gl, gersch[2*i]);
-    gersch[2*i+1] = D[i] + tmp1;
-    gu = fmax(gu, gersch[2*i+1]);
-    eold = eabs;
-  }
-  /* min. pivot allowed in the Sturm sequence of T */
-  tolstruct->pivmin = DBL_MIN * fmax(1.0, emax*emax);
-  /* estimate of spectral diameter */
-  Dstruct->spdiam = gu - gl;
-
-  /* compute splitting points with threshold "split" */
-  odrra(&n, D, E, E2, &tolstruct->split, &Dstruct->spdiam,
-        &Dstruct->nsplit, isplit, &info);
-  assert(info == 0);
-
-  if (irange == allrng || irange == indrng) {
-    *vl = gl;
-    *vu = gu;
-  }
-
-  /* set eigenvalue indices in case of all or subset by value has
-   * to be computed; thereby convert all problem to subset by index
-   * computation */
-  if (irange == allrng) {
-    *il = 1;
-    *iu = n;
-  } else if (irange == valrng) {
-    intervals[0] = *vl; intervals[1] = *vu;
-    
-    /* find negcount at boundaries 'vl' and 'vu';
-     * needs work of dim(n) and iwork of dim(n) */
-    odebz(&IONE, &IZERO, &n, &IONE, &IONE, &IZERO,
-  	  &DZERO, &DZERO, &tolstruct->pivmin, D, E, E2, &idummy,
-  	  intervals, &dummy, &idummy, negcounts, work, iwork, &info);
-    assert(info == 0);
-    
-    /* update negcounts of whole matrix with negcounts found for block */
-    *il = negcounts[0] + 1;
-    *iu = negcounts[1];
-  }
-
-  if (cntval && irange == valrng) {
-    /* clean up and return */
-    *nzp = iceil(*iu-*il+1, nproc);
-    clean_up_plarre(E2, work, iwork, rcount, rdispl);
-    return 0;
-  }
-
-
-  /* loop over unreduced blocks */  
-  bl_begin  = 0;
-  
-  for (jbl=0; jbl<Dstruct->nsplit; jbl++) {
-    
-    bl_end  = isplit[jbl] - 1;
-    bl_size = bl_end - bl_begin + 1;
-    
-    /* deal with 1x1 block immediately */
-    if (bl_size == 1) {
-      E[bl_end] = 0.0;
-      W[bl_begin]      = D[bl_begin];
-      Werr[bl_begin]   = 0.0;
-      Werr[bl_begin]   = 0.0;
-      iblock[bl_begin] = jbl + 1;
-      Windex[bl_begin] = 1;
-      bl_begin  = bl_end + 1;
-      continue;
-    }
-
-    /* Indix range of block */
-    iil = 1;
-    iiu = bl_size;
-
-    /* each process computes a subset of the eigenvalues of the block */
-    ifirst_tmp = iil;
-    for (i=0; i<nproc; i++) {
-      chunk  = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc);
-      if (i == nproc-1) {
-	ilast_tmp = iiu;
-      } else {
-	ilast_tmp = ifirst_tmp + chunk - 1;
-	ilast_tmp = imin(ilast_tmp, iiu);
-      }
-      if (i == procinfo->pid) {
-	ifirst    = ifirst_tmp;
-	ilast     = ilast_tmp;
-	isize     = ilast - ifirst + 1;
-	*offsetp = ifirst - iil;
-	*nzp      = isize;
-      }
-      rcount[i]  = ilast_tmp - ifirst_tmp + 1;
-      rdispl[i]  = ifirst_tmp - iil;
-      ifirst_tmp = ilast_tmp + 1;
-      ifirst_tmp = imin(ifirst_tmp, iiu + 1);
-    }
-    
-    /* approximate eigenvalues of input assigned to process */
-    if (isize != 0) {      
-      info = eigval_approx_proc(procinfo, ifirst, ilast,
-				    bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], 
-				    &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin],
-				    tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], 
-				    work, iwork);
-      assert(info == 0);    
-    }
-
-    /* compute root representation of block */
-    info = eigval_root_proc(procinfo, ifirst, ilast,
-				  bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], 
-				  &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin],
-				  tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], 
-				  work, iwork);
-    assert(info == 0);    
-
-    /* refine eigenvalues assigned to process w.r.t root */
-    if (isize != 0) {
-      info = eigval_refine_proc(procinfo, ifirst, ilast,
-				    bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], 
-				    &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin],
-				    tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], 
-				    work, iwork);
-      assert(info == 0);    
-    }
-    
-    memcpy(work, &W[bl_begin], isize * sizeof(double) );
-    MPI_Allgatherv(work, isize, MPI_DOUBLE, &W[bl_begin], rcount, rdispl,
-		   MPI_DOUBLE, procinfo->comm);
-    
-    memcpy(work, &Werr[bl_begin], isize * sizeof(double) );
-    MPI_Allgatherv(work, isize, MPI_DOUBLE, &Werr[bl_begin], rcount, rdispl,
-		   MPI_DOUBLE, procinfo->comm);
-    
-    memcpy(iwork, &Windex[bl_begin], isize * sizeof(int) );
-    MPI_Allgatherv(iwork, isize, MPI_INT, &Windex[bl_begin], rcount, rdispl,
-		   MPI_INT, procinfo->comm);
-    
-    /* Ensure that within block eigenvalues sorted */
-    sorted = false;
-    while (sorted == false) {
-    	sorted = true;
-    	for (j=bl_begin; j < bl_end; j++) {
-    	  if (W[j+1] < W[j]) {
-    	    sorted = false;
-    	    tmp1 = W[j];
-    	    W[j] = W[j+1];
-    	    W[j+1] = tmp1;
-    	    tmp1 = Werr[j];
-    	    Werr[j] = Werr[j+1];
-    	    Werr[j+1] = tmp1;
-    	  }
-    	}
-    }
-    
-    /* Set indices index correctly */
-    for (j=bl_begin; j <= bl_end; j++)
-      iblock[j] = jbl + 1;
-    
-    /* Recompute gaps within the blocks */
-    for (j = bl_begin; j < bl_end; j++) {
-      Wgap[j] = fmax(0.0, (W[j+1] - Werr[j+1]) - (W[j] + Werr[j]) );
-    }
-    sigma = E[bl_end];
-    Wgap[bl_end] = fmax(0.0, (gu - sigma) - (W[bl_end] + Werr[bl_end]) );
-
-    /* Compute UNSHIFTED eigenvalues */
-    if (!wantZ) {
-      sigma = E[bl_end];
-      for (i=bl_begin; i<=bl_end; i++) {
-	W[i]   += sigma;
-      }
-    }
-    
-    /* Proceed with next block */
-    bl_begin  = bl_end  + 1;
-  }
-  /* end of loop over unreduced blocks */    
-  
-  /* free memory */
-  clean_up_plarre(E2, work, iwork, rcount, rdispl);
-  
-  return 0;
-}
-
-/*
- * Free's on allocated memory of plarre routine
- */
-static  
-void clean_up_plarre(double *E2, double *work, int *iwork, 
-		     int *rcount, int *rdispl)
-{
-  free(E2);
-  free(work);
-  free(iwork);
-  free(rcount);
-  free(rdispl);
-}
-
-#ifndef DISABLE_PTHREADS
-static 
-int eigval_approx_proc
-(proc_t *procinfo, int ifirst, int ilast, 
- int n, double *D, double *E, double *E2,  
- int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
- double *W, double *Werr, double *Wgap, double *work, int *iwork)
-{
-  /* Input parameter */
-  int    isize  = ilast-ifirst+1;
-  double pivmin = tolstruct->pivmin;
-
-  /* /\* Multithreading *\/ */
-  int            max_nthreads = procinfo->nthreads;
-  int            iifirst, iilast, chunk;
-  pthread_t      *threads;
-  pthread_attr_t attr;
-  auxarg1_t      *auxarg1;
-
-  /* Others */
-  int    info, m, i, j;
-  double dummy;
-  
-  /* Allocate workspace */
-  int *isplit = (int *) malloc( n * sizeof(int) );
-  assert(isplit != NULL);
-  threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) );
-  assert(threads != NULL);
-
-  /* This is an unreduced block */
-  int nsplit = 1;
-  isplit[0] = n;
-  
-  if (max_nthreads > 1) {
-    pthread_attr_init(&attr);
-    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
-    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
-  }
-
-  /* Set tolerance parameters */
-  double bsrtol = sqrt(DBL_EPSILON);    
-
-
-  /* APPROXIMATE EIGENVALUES */
-
-  /* compute approximations of the eigenvalues with muliple threads */
-  /* equivalent to: */
-  /* dlarrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch, */
-  /*        &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr, */
-  /*        &wl, &wu, iblock, Windex, work, iwork, &info); */
-  /* assert(info == 0); */
-  /* assert(m == ilast-ifirst+1); */
-  
-  int nthreads = max_nthreads;
-  while (nthreads > 1 && isize / nthreads < 2)
-    nthreads--;
-
-  if (nthreads > 1) {
-    
-    /* each threads computes W[iifirst:iilast] and places them in
-     * work[0:n-1]; the corresponding errors in work[n:2*n-1];
-     * the blocks they belong in iwork[0:n-1]; and their indices in
-     * iwork[n:2*n-1]; */
-    
-    iifirst = ifirst;
-    chunk = isize / nthreads;
-    for (i=1; i<nthreads; i++) {
-      
-      iilast = iifirst + chunk - 1;
-
-      auxarg1 = create_auxarg1(n, D, E, E2, ifirst, ilast, iifirst, iilast,
-			       nsplit, isplit, bsrtol, pivmin, gersch,
-			       &work[0], &work[n], &iwork[n], &iwork[0]);
-      
-      info = pthread_create(&threads[i], &attr,
-			    eigval_subset_thread_a,
-			    (void *) auxarg1);
-      assert(info == 0);
-      
-      iifirst = iilast + 1;
-    }
-    iilast = ilast;
-
-    auxarg1 = create_auxarg1(n, D, E, E2, ifirst, ilast, iifirst, iilast,
-			     nsplit, isplit, bsrtol, pivmin, gersch,
-			     &work[0], &work[n], &iwork[n], &iwork[0]);
-    
-    void *status = eigval_subset_thread_a( (void *) auxarg1 );
-    assert(status == NULL);
-    
-    /* join threads */
-    for (i=1; i<nthreads; i++) {
-      info = pthread_join(threads[i], &status);
-      assert(info == 0 && status == NULL);
-    }
-    
-    /* m counts the numbers of eigenvalues computed by process */
-    m = isize;
-    for (j=0; j<isize; j++) {
-      W[j]      = work[j];
-      Werr[j]   = work[j+n];
-      iblock[j] = iwork[j];
-      Windex[j] = iwork[j+n];
-    }
-    
-  } else {
-    /* no multithreaded computation */
-    
-    double wl, wu;
-    odrrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch,
-  	  &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr,
-  	  &wl, &wu, iblock, Windex, work, iwork, &info);
-    assert(info == 0);
-    assert(m == ilast-ifirst+1);
-  }
-
-  /* clean up */
-  free(threads);
-  free(isplit);
-
-  if (max_nthreads > 1) {
-    pthread_attr_destroy(&attr);
-  }
-
-  return 0;
-}
-#else
-static 
-int eigval_approx_proc
-(proc_t *procinfo, int ifirst, int ilast, 
- int n, double *D, double *E, double *E2,  
- int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
- double *W, double *Werr, double *Wgap, double *work,
- int *iwork)
-{
-  /* Input parameter */
-  double pivmin = tolstruct->pivmin;
-
-  /* Allocate workspace */
-  int *isplit = (int *) malloc( n * sizeof(int) );
-  assert(isplit != NULL);
-
-  /* This is an unreduced block */
-  int nsplit = 1;
-  isplit[0] = n;
-  
-  /* Set tolerance parameters */
-  double bsrtol = sqrt(DBL_EPSILON);    
-
-  /* APPROXIMATE EIGENVALUES */
-  int m, info;
-  double wl, wu, dummy;
-  odrrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch,
-    &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr,
-    &wl, &wu, iblock, Windex, work, iwork, &info);
-  assert(info == 0);
-  assert(m == ilast-ifirst+1);
-
-  /* clean up */
-  free(isplit);
-
-  return 0;
-}
-#endif
-
-static 
-int eigval_root_proc
-(proc_t *procinfo, int ifirst, int ilast, 
- int n, double *D, double *E, double *E2,  
- int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
- double *W, double *Werr, double *Wgap, double *work, int *iwork)
-{
-  /* Input parameter */
-  double pivmin = tolstruct->pivmin;
-
-  /* Create random vector to perturb rrr, same seed */
-  int    two_n = 2*n;
-  int    iseed[4] = {1,1,1,1};
-
-  double Dpivot, Dmax;
-  bool   noREP;
-
-  int   info, i, j;
-  int   IONE = 1, ITWO = 2;
-  double tmp, tmp1, tmp2;
-
-  /* Set tolerance parameters (need to be same as in refine function) */
-  double rtl = sqrt(DBL_EPSILON);
-  
-  /* Allocate workspace */
-  double *randvec = (double *) malloc( 2*n * sizeof(double) );
-  assert(randvec != NULL);
-
-  /* create random vector to perturb rrr and broadcast it */
-  odrnv(&ITWO, iseed, &two_n, randvec);
-  
-  /* store shift of initial RRR, here set to zero */
-  E[n-1] = 0.0;
-
-  /* find outer bounds GL, GU for block and spectral diameter */
-  double gl = D[0];
-  double gu = D[0];
-  for (i = 0; i < n; i++) {
-    gl = fmin(gl, gersch[2*i]  );
-    gu = fmax(gu, gersch[2*i+1]);
-  }
-  double spdiam = gu - gl;
-  
-  /* find approximation of extremal eigenvalues of the block
-   * odrrk computes one eigenvalue of tridiagonal matrix T
-   * tmp1 and tmp2 one hold the eigenvalue and error, respectively */
-  odrrk(&n, &IONE, &gl, &gu, D, E2,
-        &pivmin, &rtl, &tmp1, &tmp2, &info);
-  assert(info == 0);  /* if info=-1 => eigenvalue did not converge */
-    
-  double isleft = fmax(gl, tmp1-tmp2 - HUNDRED*DBL_EPSILON*fabs(tmp1-tmp2) );
-    
-  odrrk(&n, &n, &gl, &gu, D, E2, &pivmin, &rtl, &tmp1, &tmp2, &info);
-  assert(info == 0);  /* if info=-1 => eigenvalue did not converge */
-    
-  double isright = fmin(gu, tmp1+tmp2 + HUNDRED*DBL_EPSILON*fabs(tmp1+tmp2) );
-  
-  spdiam = isright - isleft;
-  
-  /* compute negcount at points s1 and s2 */
-  double s1 = isleft  + HALF   * spdiam;
-  double s2 = isright - FOURTH * spdiam;  /* not needed currently */
-
-  /* compute negcount at points s1 and s2 */
-  /* cnt = number of eigenvalues in (s1,s2] = count_right - count_left
-   * negcnt_lft = number of eigenvalues smaller equals than s1
-   * negcnt_rgt = number of eigenvalues smaller equals than s2 */
-  int cnt, negcnt_lft, negcnt_rgt;
-  odrrc("T", &n, &s1, &s2, D, E, &pivmin,
-        &cnt, &negcnt_lft, &negcnt_rgt, &info);
-  assert(info == 0);
-  
-  /* if more of the desired eigenvectors are in the left part shift left
-   * and the other way around */
-  int sgndef;
-  double sigma;
-  if ( negcnt_lft >= n - negcnt_lft ) {
-    /* shift left */
-    sigma = isleft;
-    sgndef = ONE;
-  } else {
-    /* shift right */
-    sigma = isright;
-    sgndef = -ONE;
-  }
-
-  /* define increment to perturb initial shift to find RRR
-   * with not too much element growth */
-  double tau = spdiam*DBL_EPSILON*n + 2.0*pivmin;
-
-
-  /* try to find initial RRR of block:
-   * need work space of 3*n here to store D, L, D^-1 of possible
-   * representation:
-   * D_try      = work[0  :  n-1]
-   * L_try      = work[n  :2*n-1]
-   * inv(D_try) = work[2*n:3*n-1] */
-
-  int off_L    = n;
-  int off_invD = 2*n;
-    
-  int jtry;
-  for (jtry = 0; jtry < MAX_TRY_RRR; jtry++) {
-
-    Dpivot  = D[0] - sigma;
-    work[0] = Dpivot;
-    Dmax    = fabs( work[0] );
-    j = 0;
-
-    for (i = 0; i < n-1; i++) {
-      work[i+off_invD] = 1.0 / work[i];
-      tmp = E[j] * work[i+off_invD];
-      work[i+off_L] = tmp;
-      Dpivot = (D[j+1] - sigma) - tmp*E[j];
-      work[i+1] = Dpivot;
-      Dmax = fmax(Dmax, fabs(Dpivot) );
-      j++;
-    }
-      
-    /* except representation only if not too much element growth */
-    if (Dmax > MAX_GROWTH*spdiam) {
-      noREP = true;
-    } else {
-      noREP = false;
-    }
-      
-    if (noREP == true) {
-      /* if all eigenvalues are desired shift is made definite to use DQDS
-       * so we should not end here */
-      if (jtry == MAX_TRY_RRR-2) {
-	if (sgndef == ONE) { /* floating point comparison okay here */
-	  sigma = gl - FUDGE_FACTOR*spdiam*DBL_EPSILON*n
-	    - FUDGE_FACTOR*2.0*pivmin;
-	} else {
-	  sigma = gu + FUDGE_FACTOR*spdiam*DBL_EPSILON*n
-	    + FUDGE_FACTOR*2.0*pivmin;
-	}
-      } else if (jtry == MAX_TRY_RRR-1) {
-	fprintf(stderr,"No initial representation could be found.\n");
-	exit(3);
-      } else {
-	sigma -= sgndef*tau;
-	tau   *= 2.0;
-	continue;
-      }
-    } else {   /* found representation */
-      break;
-    }
-  }
-  /* end trying to find initial RRR of block */
-
-  /* save initial RRR and corresponding shift */
-  memcpy(D, &work[0],  n    * sizeof(double) );
-  memcpy(E, &work[n], (n-1) * sizeof(double) );
-  E[n-1] = sigma;
-  /* work[0:4*n-1] can now be used again for anything */
-
-  /* perturb root rrr by small relative amount, first make sure
-   * that at least two values are actually disturbed enough,
-   * which might not be necessary */
-  while( fabs(randvec[0])*RAND_FACTOR < 1.0 )
-    randvec[0] *= 2.0;
-  while( fabs(randvec[n-1])  *RAND_FACTOR < 1.0 )
-    randvec[n-1]   *= 2.0;
-
-  for (i=0; i<n-1; i++) {
-    D[i] *= 1.0 + DBL_EPSILON*RAND_FACTOR*randvec[i];
-    E[i] *= 1.0 + DBL_EPSILON*RAND_FACTOR*randvec[i+n];
-  }
-  D[n-1] *= 1.0 + DBL_EPSILON*RAND_FACTOR*randvec[n-1];
-
-  /* clean up */
-  free(randvec);
-
-  return 0;
-}
-
-#ifndef DISABLE_PTHREADS
-static 
-int eigval_refine_proc
-(proc_t *procinfo, int ifirst, int ilast, 
- int n, double *D, double *E, double *E2,  
- int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
- double *W, double *Werr, double *Wgap, double *work, int *iwork)
-{
-  /* Input parameter */
-  int    isize  = ilast-ifirst+1;
-  double pivmin = tolstruct->pivmin;
-
-  /* Multithreading */
-  int            nthreads;
-  int            max_nthreads = procinfo->nthreads;
-  int            chunk;
-  pthread_t      *threads;
-  pthread_attr_t attr;
-  auxarg2_t      *auxarg2;
-
-  int    info, i;
-
-  /* Allocate space */
-  threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) );
-  assert(threads != NULL);
-  int *isplit = (int *) malloc( n * sizeof(int) );
-  assert(isplit != NULL);
-
-  /* This is an unreduced block (nsplit=1) */
-  isplit[0] = n;
-  
-  /* Prepare multi-threading */
-  if (max_nthreads > 1) {
-    pthread_attr_init(&attr);
-    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
-    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
-  }
-
-  /* find outer bounds GL, GU for block and spectral diameter */
-  double gl = D[0];
-  double gu = D[0];
-  for (i = 0; i < n; i++) {
-    gl = fmin(gl, gersch[2*i]  );
-    gu = fmax(gu, gersch[2*i+1]);
-  }
-  double spdiam = gu - gl;
-
-  /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */
-  
-  int i_low = Windex[0];
-  int i_upp = Windex[isize-1];
-  double sigma = E[n-1];
-
-  /* calculate gaps */
-  for (i=0; i<isize-1; i++) {
-    Wgap[i] = fmax(0.0, (W[i+1] - Werr[i+1]) - (W[i] + Werr[i]) );
-  }
-  Wgap[isize-1] = fmax(0.0, gu - (W[isize-1] + Werr[isize-1]) );
-    
-  /* shift eigenvalues to be consistent with dqds
-   * and compute eigenvalues of SHIFTED matrix */
-  for (i=0; i<isize; i++) {
-    W[i]    -= sigma;
-    Werr[i] += fabs(W[i])*DBL_EPSILON;
-  }
-
-  /* work  for sequential odrrb = work[0:2*n-1]
-   * iwork for sequential odrrb = iwork[0:2*n-1]
-   * DE2 = work[2*n:3*n-1] strting at bl_begin */
-  int off_DE2 = 2*n;
-    
-  /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */
-  for (i=0; i<n; i++) {
-    work[i+off_DE2] = D[i]*E[i]*E[i];
-  }
-    
-  nthreads = max_nthreads;
-  while (nthreads > 1 && isize/nthreads < 2) {
-    nthreads--;
-  }
-
-  if (nthreads > 1) {
-
-    int rf_begin=0, rf_end;
-    chunk    = isize / nthreads;
-    for (i=1; i<nthreads; i++) {
-      
-      rf_end = rf_begin + chunk - 1;
-            
-      auxarg2 = create_auxarg2(n, D,
-			       &work[off_DE2],
-			       rf_begin, rf_end, W, Werr, Wgap, Windex,
-			       tolstruct->rtol1, tolstruct->rtol2,
-			       pivmin, spdiam);
-      
-      info = pthread_create(&threads[i], &attr,
-  			      eigval_subset_thread_r,
-			    (void *) auxarg2);
-      assert(info == 0);
-      
-      rf_begin = rf_end + 1;
-    }
-    rf_end = isize-1;
-
-    auxarg2 = create_auxarg2(n, D,
-			     &work[off_DE2],
-			     rf_begin, rf_end, W, Werr, Wgap, Windex,
-			     tolstruct->rtol1, tolstruct->rtol2,
-			     pivmin, spdiam);
-      
-    void *status = eigval_subset_thread_r( (void *) auxarg2 );
-    assert(status == NULL);
-    
-    /* join threads */
-    for (i=1; i<nthreads; i++) {
-      info = pthread_join(threads[i], &status);
-      assert(info == 0 && status == NULL);
-    }
-    /* should update gaps at splitting points here, but the gaps
-     * will be recomputed anyway */
-      
-  } else {
-    
-    int offset = i_low-1;
-    
-    /* refine eigenvalues found by odrrb for i_low:i_upp */
-    odrrb(&n, D, &work[off_DE2], &i_low,
-	  &i_upp, &tolstruct->rtol1, &tolstruct->rtol2, &offset, W, Wgap, 
-	  Werr, work, iwork, &pivmin, &spdiam, &n, &info);
-    assert(info == 0);
-    /* needs work of dim(2*n) and iwork of dim(2*n) */
-  }
-  /* odrrb computes gaps correctly, but not last one;
-   * this is ignored since the gaps are recomputed anyway */
-  
-  /* clean up */
-  free(threads);
-  free(isplit);
-  
-  if (max_nthreads > 1) {
-    pthread_attr_destroy(&attr);
-  }
-  
-  return 0;
-}
-#else
-static 
-int eigval_refine_proc
-(proc_t *procinfo, int ifirst, int ilast, 
- int n, double *D, double *E, double *E2,  
- int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
- double *W, double *Werr, double *Wgap, double *work, int *iwork)
-{
-  /* Input parameter */
-  int    isize  = ilast-ifirst+1;
-  double pivmin = tolstruct->pivmin;
-
-  /* Allocate space */
-  int *isplit = (int *) malloc( n * sizeof(int) );
-  assert(isplit != NULL);
-
-  /* This is an unreduced block (nsplit=1) */
-  isplit[0] = n;
-  
-  /* find outer bounds GL, GU for block and spectral diameter */
-  double gl = D[0];
-  double gu = D[0];
-  int i;
-  for (i = 0; i < n; i++) {
-    gl = fmin(gl, gersch[2*i]  );
-    gu = fmax(gu, gersch[2*i+1]);
-  }
-  double spdiam = gu - gl;
-
-  /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */
-  
-  int i_low = Windex[0];
-  int i_upp = Windex[isize-1];
-  double sigma = E[n-1];
-
-  /* calculate gaps */
-  for (i=0; i<isize-1; i++) {
-    Wgap[i] = fmax(0.0, (W[i+1] - Werr[i+1]) - (W[i] + Werr[i]) );
-  }
-  Wgap[isize-1] = fmax(0.0, gu - (W[isize-1] + Werr[isize-1]) );
-    
-  /* shift eigenvalues to be consistent with dqds
-   * and compute eigenvalues of SHIFTED matrix */
-  for (i=0; i<isize; i++) {
-    W[i]    -= sigma;
-    Werr[i] += fabs(W[i])*DBL_EPSILON;
-  }
-
-  /* work  for sequential odrrb = work[0:2*n-1]
-   * iwork for sequential odrrb = iwork[0:2*n-1]
-   * DE2 = work[2*n:3*n-1] strting at bl_begin */
-  int off_DE2 = 2*n;
-    
-  /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */
-  for (i=0; i<n; i++) {
-    work[i+off_DE2] = D[i]*E[i]*E[i];
-  }
-    
-  int offset = i_low-1;
-    
-  /* refine eigenvalues found by odrrb for i_low:i_upp */
-  int info;
-  odrrb
-  (&n, D, &work[off_DE2], &i_low,
-   &i_upp, &tolstruct->rtol1, &tolstruct->rtol2, &offset, W, Wgap, 
-   Werr, work, iwork, &pivmin, &spdiam, &n, &info);
-  assert(info == 0);
-  /* needs work of dim(2*n) and iwork of dim(2*n) */
-
-  /* odrrb computes gaps correctly, but not last one;
-   * this is ignored since the gaps are recomputed anyway */
-  
-  /* clean up */
-  free(isplit);
-  
-  return 0;
-}
-#endif
-
-static 
-void *eigval_subset_thread_a(void *argin)
-{
-  /* from input argument */
-  int    n, il, iu, my_il, my_iu;
-  double *D, *E, *E2, *gersch;
-  double bsrtol, pivmin;
-  int    nsplit, *isplit;
-
-  /* others */
-  int    info;
-  double dummy1, dummy2;
-  int    num_vals;
-  double *W_tmp, *Werr_tmp, *W, *Werr;
-  int    *iblock_tmp, *Windex_tmp, *iblock, *Windex;
-  double *work;
-  int    *iwork;
-  
-  retrieve_auxarg1((auxarg1_t *) argin, &n, &D, &E, &E2,
-		   &il, &iu, &my_il, &my_iu, &nsplit,
-		   &isplit, &bsrtol, &pivmin, &gersch,
-		   &W, &Werr, &Windex, &iblock);
-
-  /* allocate memory */
-  W_tmp = (double *) malloc( n * sizeof(double) );
-  assert(W_tmp != NULL);
-  
-  Werr_tmp = (double *) malloc( n * sizeof(double) );
-  assert(Werr_tmp != NULL);
-  
-  Windex_tmp = (int *) malloc( n * sizeof(int) );
-  assert(Windex_tmp != NULL);
-
-  iblock_tmp = (int *) malloc( n * sizeof(int) );
-  assert(iblock_tmp != NULL);
-
-  work  = (double *) malloc( 4*n * sizeof(double) );
-  assert (work != NULL);
-
-  iwork = (int *) malloc( 3*n * sizeof(int) );
-  assert (iwork != NULL);
-
-  /* compute eigenvalues 'my_il' to 'my_iu', put into temporary arrays */
-  odrrd("I", "B", &n, &dummy1, &dummy2, &my_il, &my_iu, gersch,
-        &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &num_vals,
-        W_tmp, Werr_tmp, &dummy1, &dummy2, iblock_tmp, Windex_tmp,
-        work, iwork, &info);
-
-  assert(info == 0);
-
-  /* copy computed values in W, Werr, Windex, iblock (which are work space) */
-  memcpy(&W[my_il-il],      W_tmp,      num_vals * sizeof(double) );
-  memcpy(&Werr[my_il-il],   Werr_tmp,   num_vals * sizeof(double) );
-  memcpy(&Windex[my_il-il], Windex_tmp, num_vals * sizeof(int)    );
-  memcpy(&iblock[my_il-il], iblock_tmp, num_vals * sizeof(int)    );
-  
-  free(W_tmp);
-  free(Werr_tmp);
-  free(Windex_tmp);
-  free(iblock_tmp);
-  free(work);
-  free(iwork);
-
-  return NULL;
-}
-
-static 
-auxarg1_t *create_auxarg1(int n, double *D, double *E, double *E2,
-			  int il, int iu, int my_il, int my_iu, 
-			  int nsplit, int *isplit, double bsrtol, 
-			  double pivmin, double *gersch, double *W, 
-			  double *Werr, int *Windex, int *iblock)
-{
-  auxarg1_t *arg;
-
-  arg = (auxarg1_t *) malloc( sizeof(auxarg1_t) );
-  assert(arg != NULL);
-
-  arg->n       = n;
-  arg->D       = D;
-  arg->E       = E;
-  arg->E2      = E2;
-  arg->il      = il;
-  arg->iu      = iu;
-  arg->my_il   = my_il;
-  arg->my_iu   = my_iu;
-  arg->nsplit  = nsplit;
-  arg->isplit  = isplit;
-  arg->bsrtol  = bsrtol;
-  arg->pivmin  = pivmin;
-  arg->gersch  = gersch;
-  arg->W       = W;
-  arg->Werr    = Werr;
-  arg->Windex  = Windex;
-  arg->iblock  = iblock;
-
-  return arg;
-}
-
-static 
-void retrieve_auxarg1(auxarg1_t *arg, int *n, double **D, double **E,
-		      double **E2, int *il, int *iu, int *my_il, 
-		      int *my_iu, int *nsplit, int **isplit, 
-		      double *bsrtol, double *pivmin, double **gersch, 
-		      double **W, double **Werr, int **Windex, 
-		      int **iblock)
-{
-  *n      = arg->n;
-  *D      = arg->D;
-  *E      = arg->E;
-  *E2     = arg->E2;
-  *il     = arg->il;
-  *iu     = arg->iu;
-  *my_il  = arg->my_il;
-  *my_iu  = arg->my_iu;
-  *nsplit = arg->nsplit;
-  *isplit = arg->isplit;
-  *bsrtol = arg->bsrtol;
-  *pivmin = arg->pivmin;
-  *gersch = arg->gersch;
-  *W      = arg->W;
-  *Werr   = arg->Werr;
-  *Windex = arg->Windex;
-  *iblock = arg->iblock;
-
-  free(arg);
-}
-
-static 
-void *eigval_subset_thread_r(void *argin)
-{
-  /* from input argument */
-  int          bl_size, rf_begin, rf_end;
-  double       *D, *DE2;
-  double       rtol1, rtol2, pivmin;
-  double       bl_spdiam;
-
-  /* others */
-  int          info, offset;
-  double       *W, *Werr, *Wgap;
-  int          *Windex;
-  double       *work;
-  int          *iwork;
-
-  retrieve_auxarg2((auxarg2_t *) argin, &bl_size, &D, &DE2,
-		   &rf_begin, &rf_end, &W, &Werr, &Wgap, &Windex, &rtol1, &rtol2,
-		   &pivmin, &bl_spdiam);
-
-  /* malloc work space */
-  work = (double *) malloc( 2*bl_size * sizeof(double) );
-  assert(work != NULL);
-  
-  iwork = (int *)   malloc( 2*bl_size * sizeof(int) );
-  assert(iwork != NULL);
-
-  /* special case of only one eigenvalue */
-  if (rf_begin == rf_end)
-    Wgap[rf_begin] = 0.0;
- 
-  offset = Windex[rf_begin] - 1;
-  
-  /* call bisection routine to refine the eigenvalues */
-  odrrb(&bl_size, D, DE2, &Windex[rf_begin], &Windex[rf_end],
-        &rtol1, &rtol2, &offset, &W[rf_begin], &Wgap[rf_begin],
-        &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam,
-        &bl_size, &info);
-  assert(info == 0);
-
-  /* clean up */
-  free(work);
-  free(iwork);
-
-  return NULL;
-}
-
-static 
-auxarg2_t *create_auxarg2(int bl_size, double *D, double *DE2,
-			  int rf_begin, int rf_end, double *W, double *Werr,
-			  double *Wgap, int *Windex,    
-			  double rtol1, double rtol2, double pivmin, 
-			  double bl_spdiam)
-{
-  auxarg2_t *arg;
-
-  arg = (auxarg2_t *) malloc( sizeof(auxarg2_t) );
-  assert(arg != NULL);
-
-  arg->bl_size   = bl_size;
-  arg->D         = D;
-  arg->DE2       = DE2;
-  arg->rf_begin  = rf_begin;
-  arg->rf_end    = rf_end;
-  arg->W   = W;
-  arg->Werr   = Werr;
-  arg->Wgap   = Wgap;
-  arg->Windex   = Windex;
-  arg->rtol1     = rtol1;
-  arg->rtol2     = rtol2;
-  arg->pivmin    = pivmin;
-  arg->bl_spdiam = bl_spdiam;
-
-  return arg;
-}
-
-static 
-void retrieve_auxarg2(auxarg2_t *arg, int *bl_size, double **D,
-		      double **DE2, int *rf_begin, int *rf_end,
-		      double **W, double **Werr, double **Wgap, int **Windex, 
-		      double *rtol1, double *rtol2,
-		      double *pivmin, double *bl_spdiam)
-{
-  *bl_size   = arg->bl_size;
-  *D         = arg->D;
-  *DE2       = arg->DE2;
-  *rf_begin  = arg->rf_begin;
-  *rf_end    = arg->rf_end;
-  *W   = arg->W;
-  *Werr   = arg->Werr;
-  *Wgap   = arg->Wgap;
-  *Windex   = arg->Windex;
-  *rtol1     = arg->rtol1;
-  *rtol2     = arg->rtol2;
-  *pivmin    = arg->pivmin;
-  *bl_spdiam = arg->bl_spdiam;
-
-  free(arg);
-}
-
-/*
- * Compare function for using qsort() on an array
- * of doubles
- */
-static 
-int cmp(const void *a1, const void *a2)
-{
-  double arg1 = *(double *)a1;
-  double arg2 = *(double *)a2;
-
-  if (arg1 < arg2)
-    return -1;
-  else
-    return 1;
-}
diff --git a/external/pmrrr/src/plarrv.c b/external/pmrrr/src/plarrv.c
deleted file mode 100644
index bafb7bdb85..0000000000
--- a/external/pmrrr/src/plarrv.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/* Parallel computation of eigenvectors and symmetric tridiagonal 
- * matrix T, which is preprocessed by the routine 'plarre'.
- *
- * Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include "pmrrr.h"
-#include "pmrrr/plarrv.h"
-#include "pmrrr/process_task.h"
-#include "pmrrr/rrr.h"
-#include "pmrrr/queue.h"
-#include "pmrrr/structs.h"
-#include "pmrrr/counter.h"
-
-static int assign_to_proc
-(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, vec_t *Zstruct, 
- int *nzp, int *myfirstp);
-static int cmpa(const void*, const void*);
-static int init_workQ
-(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ);
-static void *empty_workQ(void*);
-static workQ_t *create_workQ();
-static void destroy_workQ(workQ_t*);
-static auxarg3_t *create_auxarg3
-(int, proc_t*, val_t*, vec_t*, tol_t*, workQ_t*, counter_t*);
-static void retrieve_auxarg3
-(auxarg3_t*, int*, proc_t**, val_t**, vec_t**, tol_t**, workQ_t**, counter_t**);
-
-/*
- * Computation of eigenvectors of a symmetric tridiagonal
- */
-#ifndef DISABLE_PTHREADS
-int plarrv
-(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct,
- vec_t *Zstruct, tol_t *tolstruct, int *nzp, int *myfirstp)
-{
-  int     nthreads = procinfo->nthreads;
-  int     n        = Dstruct->n;
-  double  *W       = Wstruct->W;
-
-  /* Allocate work space and copy eigenvalues */
-  double *Wshifted = (double*)malloc(n*sizeof(double));
-  assert(Wshifted != NULL);
-
-  memcpy(Wshifted, W, n*sizeof(double));
-  Wstruct->Wshifted = Wshifted;
-
-  pthread_t *threads = (pthread_t*)malloc(nthreads*sizeof(pthread_t));
-  assert(threads != NULL);
-
-  /* Assign eigenvectors to processes */
-  assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp, myfirstp);
-
-  /* Create work queue Q, counter, threads to empty Q */
-  workQ_t *workQ = create_workQ();
-  counter_t *num_left = PMR_create_counter(*nzp);
-
-  threads[0] = pthread_self();
-  pthread_attr_t attr;
-  pthread_attr_init(&attr);
-  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
-  pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); 
-
-  int i;
-  for (i=1; i<nthreads; i++) {
-    auxarg3_t *auxarg = 
-      create_auxarg3(i, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left);
-    int info = pthread_create(&threads[i], &attr, empty_workQ, (void*)auxarg);
-    assert(info == 0);
-  }
-
-  /* Initialize work queue of process */
-  int info = init_workQ(procinfo, Dstruct, Wstruct, nzp, workQ);
-  assert(info == 0);
-
-  /* Empty the work queue */
-  auxarg3_t *auxarg = 
-    create_auxarg3(0, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left);
-  void *status = empty_workQ((void*)auxarg);
-  assert(status == NULL);
-
-  /* Join all the worker thread */
-  for (i=1; i<nthreads; i++) {
-    info = pthread_join(threads[i], &status);
-    assert(info == 0 && status == NULL);
-  }
-
-  /* Clean up */
-  free(Wshifted);
-  free(threads);
-  pthread_attr_destroy(&attr);
-  destroy_workQ(workQ);
-  PMR_destroy_counter(num_left);
-
-  return 0;
-}
-#else
-int plarrv
-(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct,
- vec_t *Zstruct, tol_t *tolstruct, int *nzp, int *myfirstp)
-{
-  int     n  = Dstruct->n;
-  double  *W = Wstruct->W;
-
-  /* Allocate work space and copy eigenvalues */
-  double *Wshifted = (double*)malloc(n*sizeof(double));
-  assert(Wshifted != NULL);
-
-  memcpy(Wshifted, W, n*sizeof(double));
-  Wstruct->Wshifted = Wshifted;
-
-  /* Assign eigenvectors to processes */
-  assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp, myfirstp);
-
-  /* Create work queue Q, counter, threads to empty Q */
-  workQ_t *workQ = create_workQ();
-  counter_t *num_left = PMR_create_counter(*nzp);
-
-  /* Initialize work queue of process */
-  int info = init_workQ(procinfo, Dstruct, Wstruct, nzp, workQ);
-  assert(info == 0);
-
-  /* Empty the work queue */
-  auxarg3_t *auxarg = 
-    create_auxarg3(0, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left);
-  void *status = empty_workQ((void*)auxarg);
-  assert(status == NULL);
-
-  /* Clean up */
-  free(Wshifted);
-  destroy_workQ(workQ);
-  PMR_destroy_counter(num_left);
-
-  return 0;
-}
-#endif
-
-/* 
- * Assign the computation of eigenvectors to the processes
- */
-static  
-int assign_to_proc
-(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct,
- vec_t *Zstruct, int *nzp, int *myfirstp)
-{
-  /* From inputs */
-  int              pid     = procinfo->pid;
-  int              nproc   = procinfo->nproc;
-  double *restrict L       = Dstruct->E;
-  int    *restrict isplit  = Dstruct->isplit;
-  int              n       = Wstruct->n;
-  int              il      = *(Wstruct->il);
-  int              iu      = *(Wstruct->iu);
-  double *restrict W       = Wstruct->W;
-  int    *restrict Windex  = Wstruct->Windex;
-  int    *restrict iblock  = Wstruct->iblock;
-  int    *restrict iproc   = Wstruct->iproc;
-  int    *restrict Zindex  = Zstruct->Zindex;
-  
-  sort_struct_t *array = (sort_struct_t *) malloc(n*sizeof(sort_struct_t));
-  
-  int i;
-  for (i=0; i<n; i++) {
-    /* Find shift of block */
-    int iblk = iblock[i];
-    int ishift  = isplit[iblk-1] - 1;
-    /* Apply shift so that unshifted eigenvalues can be sorted */
-    array[i].lambda    = W[i] + L[ishift]; 
-    array[i].local_ind = Windex[i];
-    array[i].block_ind = iblk;
-    array[i].ind       = i;
-  }
-  /* Alternatively could sort list of pointers, which
-     requires less data copying */
-
-  qsort(array, n, sizeof(sort_struct_t), cmpa);
-
-  /* Mark eigenvectors that do not need to be computed */
-  int j;
-  for (j = 0; j < il-1; j++ ) {
-    iproc[array[j].ind]  = -1;
-    Zindex[array[j].ind] = -1;
-  }
-
-  int isize = iu - il + 1;
-
-  int ibegin=il-1, iend;
-  int id;
-  for (id=0; id<nproc; id++) {
-
-    int chunk = imax(1, isize/nproc + (id < isize%nproc));
-    
-    if (id==nproc-1) {
-      iend = iu - 1;
-    } else {
-      iend = ibegin + chunk - 1;
-      iend = imin(iend, iu -1);
-    }
-
-    int k = 0;
-    for (j=ibegin; j<=iend; j++) {
-      iproc[array[j].ind]  = id;
-      Zindex[array[j].ind] = k;
-      k++;
-    }
-
-    if (id == pid) {
-      *myfirstp   = ibegin - il + 1;
-      *nzp        = iend - ibegin + 1;
-      Zstruct->nz = *nzp; 
-    }
-
-    ibegin = iend + 1;
-    ibegin = imin(ibegin, iu);
-  } /* end id */
-
-  for (j = iend+1; j < n; j++ ) {
-    iproc[array[j].ind]  = -1;
-    Zindex[array[j].ind] = -1;
-  }
-  
-  free(array);
-  return 0;
-}
-
-/* 
- * Compare function for using qsort() on an array of 
- * sort_structs
- */
-static 
-int cmpa(const void *a1, const void *a2)
-{
-  sort_struct_t *arg1, *arg2;
-
-  arg1 = (sort_struct_t *) a1;
-  arg2 = (sort_struct_t *) a2;
-
-  /* Within block local index decides */
-  if (arg1->block_ind == arg2->block_ind) {
-    return (arg1->local_ind - arg2->local_ind);
-  } else {
-    if (arg1->lambda < arg2->lambda) {
-      return -1;
-    } else if (arg1->lambda > arg2->lambda) {
-      return 1;
-    } else {
-      if (arg1->local_ind < arg2->local_ind)
-        return -1;
-      else
-        return 1;
-    }
-  }
-}
-
-/*
- * Initialize work queue by putting all tasks for the process
- * into the work queue.
- */
-static 
-int init_workQ
-(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ)
-{
-  int              pid      = procinfo->pid;
-  int              nproc    = procinfo->nproc;
-  int              nthreads = procinfo->nthreads;
-  double *restrict D        = Dstruct->D;
-  double *restrict L        = Dstruct->E;
-  int              nsplit   = Dstruct->nsplit;
-  int    *restrict isplit   = Dstruct->isplit;
-  double *restrict W        = Wstruct->W;
-  double *restrict Werr     = Wstruct->Werr;
-  double *restrict Wgap     = Wstruct->Wgap;
-  int    *restrict iproc    = Wstruct->iproc;
-  double *restrict Wshifted = Wstruct->Wshifted;
-  double *restrict gersch   = Wstruct->gersch;
-  int              nz       = *nzp;
-
-  /* Loop over blocks */
-  int i, j, k, l;
-  int ibegin  = 0;
-  for ( j=0; j<nsplit; j++ ) {
-    int iend = isplit[j] - 1;
-    int isize = iend - ibegin + 1;
-    double sigma = L[iend];
-
-    /* Use Gerschgorin disks to find spectral diameter */
-    double gl = gersch[2*ibegin    ];
-    double gu = gersch[2*ibegin + 1];
-    for (i=ibegin+1; i<iend; i++) {
-      gl = fmin(gl, gersch[2*i    ]);
-      gu = fmax(gu, gersch[2*i + 1]);
-    }
-    double spdiam = gu - gl;
-    double avggap = spdiam / (isize-1);
-
-    /* Find eigenvalues in block */
-    int nbl = 0;
-    int iWbegin = iend   + 1;
-    int iWend   = ibegin - 1;
-    for (i=ibegin; i<=iend; i++) {
-      if (nbl == 0 && iproc[i] == pid) {
-        iWbegin = i;
-        iWend   = i;
-        nbl++;
-        k = i+1;
-        while (k <=iend && iproc[k] == pid) {
-          iWend++;
-          nbl++;
-          k++;
-        }
-        /* iWend = iWbegin + nbl - 1; instead of incrementing in loop */
-      }
-    }
-
-    /* If no eigenvalues for process in block continue */
-    if (nbl == 0) {
-      ibegin = iend + 1;
-      continue;
-    }
-
-    /* Compute DL and DLL for later computation of singletons
-     * (freed when all singletons of root are computed) */
-    double *DL = (double*)malloc(isize*sizeof(double));
-    assert(DL != NULL);
-    
-    double *DLL = (double*)malloc(isize*sizeof(double));
-    assert(DLL != NULL);
-
-    for (i=0; i<isize-1; i++) {
-      double tmp = D[i+ibegin]*L[i+ibegin];
-      DL[i]      = tmp;
-      DLL[i]     = tmp*L[i+ibegin];
-    }
-
-    rrr_t *RRR = PMR_create_rrr(&D[ibegin], &L[ibegin], DL, DLL, isize, 0);
-    PMR_increment_rrr_dependencies(RRR);
-    
-    /* In W apply shift of current block to eigenvalues
-     * to get unshifted values w.r.t. T */
-    for (i=ibegin; i<=iend; i++)
-      W[i] += sigma;
-
-    /* Split eigenvalues of block into singletons and clusters
-     * and add them to process work queue */
-    int max_size = imax(1, nz/nthreads);
-    bool task_inserted = false;
-    int new_first=ibegin, new_last;
-    int sn_first, sn_last, sn_size;
-    for (i=ibegin; i<=iend; i++) {
-      if (i == iend)
-        new_last = i;
-      else if (Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]))
-        new_last = i;
-      else
-        continue;
-
-      /* Skip rest if no eigenvalues of process */
-      if (new_first > iWend || new_last < iWbegin) {
-        new_first = i + 1;
-        continue;
-      }
-
-      int new_size = new_last - new_first + 1;
-      
-      if (new_size == 1) {
-        /* Singleton was found */
-        if (new_first < iWbegin || new_first > iWend) {
-          new_first = i + 1;
-          continue;
-        } else {
-          if (new_first==iWbegin || task_inserted==true) {
-            /* Initialize new singleton task */
-            sn_first = new_first;
-            sn_last  = new_first;
-            sn_size  = 1;
-          } else {
-            /* Extend singleton task by one */
-            sn_last++;
-            sn_size++;
-          }
-        }
-
-        /* Insert task if ... */
-        if (i==iWend || sn_size>=max_size ||
-            Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {
-
-          double lgap;
-          if (sn_first == ibegin) {
-            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
-          } else {
-            lgap = Wgap[sn_first-1];
-          }
-
-          PMR_increment_rrr_dependencies(RRR);
-
-          task_t *task = 
-            PMR_create_s_task
-            (sn_first, sn_last, 1, ibegin, iend, spdiam, lgap, RRR);
-          
-          PMR_insert_task_at_back(workQ->s_queue, task);
-          
-          task_inserted = true;
-        } else {
-          task_inserted = false;
-        }
-      } else {
-        /* Cluster was found */
-        int cl_first = new_first;
-        int cl_last  = new_last;
-        int cl_size  = new_size;
-
-        /* Split cluster into clusters by absolut criterion */
-        if (cl_size > 3) {
-          /* Split cluster to smaller clusters [cl_first:cl_last] */
-          for (k=new_first+1; k<new_last; k++) {
-            if (k == new_last-1)
-              cl_last = new_last;
-            else if (k != cl_first && Wgap[k] > 0.8*avggap)
-              cl_last = k;
-            else
-              continue;
-
-            /* Skip cluster if no eigenvalues of process in it */
-            if (cl_last < iWbegin || cl_first > iWend) {
-              cl_first = k + 1;
-              continue;
-            }
-
-            /* Record left gap of cluster */
-            double lgap;
-            if (cl_first == ibegin) {
-              lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl);
-            } else {
-              lgap = Wgap[cl_first-1];
-            }
-
-            /* Determine processes involved in processing the cluster */
-            int left_pid  = nproc-1;
-            int right_pid = 0;
-            for (l=cl_first; l<=cl_last; l++) {
-              if (iproc[l] != -1) {
-                left_pid  = imin(left_pid,  iproc[l]);
-                right_pid = imax(right_pid, iproc[l]);
-              }
-            }
-
-            rrr_t *RRR_parent = 
-              PMR_create_rrr(&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);
-
-            task_t *task = 
-              PMR_create_c_task
-              (cl_first, cl_last, 1, ibegin, iend, spdiam, lgap, iWbegin, 
-               iWend, left_pid, right_pid, RRR_parent);
-
-            /* Insert task into queue, depending if cluster need
-             * communication with other processes */
-            if (left_pid != right_pid)
-              PMR_insert_task_at_back(workQ->r_queue, task);
-            else
-              PMR_insert_task_at_back(workQ->c_queue, task);
-            
-            cl_first = k + 1;
-          } /* end k */
-        } else {
-          /* Cluster is too small to split, so insert it to queue */
-
-          /* Record left gap of cluster */
-          double lgap;
-          if (cl_first == ibegin) {
-            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
-          } else {
-            lgap = Wgap[cl_first-1];
-          }
-
-          /* Determine processes involved */
-          int left_pid  = nproc-1;
-          int right_pid = 0;
-          for (l=cl_first; l<=cl_last; l++) {
-            if (iproc[l] != -1) {
-              left_pid  = imin(left_pid,  iproc[l]);
-              right_pid = imax(right_pid, iproc[l]);
-            }
-          }
-
-          rrr_t *RRR_parent = 
-            PMR_create_rrr
-            (&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);
-
-          task_t *task = 
-            PMR_create_c_task
-            (cl_first, cl_last, 1, ibegin, 
-             iend, spdiam, lgap, iWbegin, iWend,
-             left_pid, right_pid, RRR_parent);
-
-          /* Insert task into queue, depending if cluster need
-           * communication with other processes */
-          if (left_pid != right_pid)
-            PMR_insert_task_at_back(workQ->r_queue, task);
-          else
-            PMR_insert_task_at_back(workQ->c_queue, task);
-        }
-        task_inserted = true;
-      } /* end new_size */
-
-      new_first = i + 1;
-    } /* end of splitting eigenvalues into tasks */
-
-    /* Set flag in RRR that last singleton is created */
-    PMR_set_parent_processed_flag(RRR);
-    PMR_try_destroy_rrr(RRR);
-
-    ibegin = iend + 1;
-  } /* end loop over blocks */ 
-
-  return 0;
-}
-
-/*
- * Processes all the tasks put in the work queue.
- */
-static 
-void *empty_workQ(void *argin)
-{
-  int        tid;
-  proc_t    *procinfo;
-  val_t     *Wstruct;
-  vec_t     *Zstruct;
-  tol_t     *tolstruct;
-  workQ_t   *workQ;
-  counter_t *num_left;
-  retrieve_auxarg3
-  ((auxarg3_t*)argin, &tid, &procinfo, &Wstruct,
-   &Zstruct, &tolstruct, &workQ, &num_left);
-
-  int n = Wstruct->n;
-
-  /* max. needed double precision work space: odr1v */
-  double *work = (double*)malloc(4*n*sizeof(double));
-  assert(work != NULL);
-
-  /* max. needed double precision work space: odrrb */
-  int *iwork = (int*)malloc(2*n*sizeof(int));
-  assert(iwork != NULL);
-
-  /* while loop to empty the work queue */
-  while (PMR_get_counter_value(num_left) > 0) {
-    /* empty r-queue before processing other tasks */
-    PMR_process_r_queue
-    (tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork);
-
-    task_t *task = PMR_remove_task_at_front(workQ->s_queue);
-    if ( task != NULL ) {
-      assert(task->flag == SINGLETON_TASK_FLAG);
-
-      PMR_process_s_task
-      ((singleton_t*)task->data, tid, procinfo,
-       Wstruct, Zstruct, tolstruct, num_left, work, iwork);
-      free(task);
-      continue;
-    }
-    
-    task = PMR_remove_task_at_front(workQ->c_queue);
-    if ( task != NULL ) {
-      assert(task->flag == CLUSTER_TASK_FLAG);
-
-      PMR_process_c_task
-      ((cluster_t*)task->data, tid, procinfo,
-       Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork);
-      free(task);
-      continue;
-    }
-  } /* end while */
-
-  free(work);
-  free(iwork);
-
-  return NULL;
-}
-
-static workQ_t *create_workQ()
-{
-  workQ_t *wq = (workQ_t*)malloc(sizeof(workQ_t));
-
-  wq->r_queue = PMR_create_empty_queue();
-  wq->s_queue = PMR_create_empty_queue();
-  wq->c_queue = PMR_create_empty_queue();
-
-  return wq;
-}
-
-static void destroy_workQ(workQ_t *wq)
-{
-  PMR_destroy_queue(wq->r_queue);
-  PMR_destroy_queue(wq->s_queue);
-  PMR_destroy_queue(wq->c_queue);
-  free(wq);
-}
-
-static auxarg3_t*
-create_auxarg3
-(int tid, proc_t *procinfo, val_t *Wstruct, vec_t *Zstruct, 
- tol_t *tolstruct, workQ_t *workQ, counter_t *num_left)
-{
-  auxarg3_t *arg = (auxarg3_t*)malloc(sizeof(auxarg3_t));
-  assert(arg != NULL);
-
-  arg->tid       = tid;
-  arg->procinfo  = procinfo;
-  arg->Wstruct   = Wstruct;
-  arg->Zstruct   = Zstruct;
-  arg->tolstruct = tolstruct; 
-  arg->workQ     = workQ;
-  arg->num_left  = num_left;
-
-  return arg;
-}
-
-static void 
-retrieve_auxarg3
-(auxarg3_t *arg, int *tid, proc_t **procinfo, 
- val_t **Wstruct, vec_t **Zstruct, tol_t **tolstruct, 
- workQ_t **workQ, counter_t **num_left)
-{
-  *tid       = arg->tid;
-  *procinfo  = arg->procinfo;
-  *Wstruct   = arg->Wstruct;
-  *Zstruct   = arg->Zstruct;
-  *tolstruct = arg->tolstruct;
-  *workQ     = arg->workQ;
-  *num_left  = arg->num_left;
-
-  free(arg);
-}
diff --git a/external/pmrrr/src/pmrrr.c b/external/pmrrr/src/pmrrr.c
deleted file mode 100644
index 1c8df22eb1..0000000000
--- a/external/pmrrr/src/pmrrr.c
+++ /dev/null
@@ -1,680 +0,0 @@
-/* Computation of eigenvalues and eigenvectors of a symmetric
- * tridiagonal matrix T, given by its diagonal elements D
- * and its super-/subdiagonal elements E.
- *
- * See INCLUDE/pmrrr.h for more information.
- *
- * Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include "pmrrr.h"
-#include "pmrrr/plarre.h"
-#include "pmrrr/plarrv.h"
-#include "pmrrr/structs.h"
-
-static int handle_small_cases
-(char*, char*, int*, double*, double*,
- double*, double*, int*, int*, int*,
- MPI_Comm, int*, int*, double*, double*,int*, int*);
-
-static double scale_matrix(in_t*, val_t*, bool);
-static void invscale_eigenvalues(val_t*, double, int);
-static void clean_up
-(MPI_Comm, double*, double*, double*, 
- int*, int*, int*, int*, int*, proc_t*, 
- in_t*, val_t*, vec_t*, tol_t*);
-static int refine_to_highrac
-(proc_t*, char*, double*, double*,in_t*, int*, val_t*, tol_t*);
-
-static int cmp(const void*, const void*);
-static int cmpb(const void*, const void*);
-
-/* 
- * Computation of eigenvalues and eigenvectors of a symmetric
- * tridiagonal matrix T, given by its diagonal elements D
- * and its super-/subdiagonal elements E.
- * See README or 'pmrrr.h' for details.
- */
-
-int pmrrr
-(char *jobz, char *range, int *np, double  *D,
- double *E, double *vl, double *vu, int *il,
- int *iu, int *tryracp, MPI_Comm comm, int *nzp,
- int *offsetp, double *W, double *Z, int *ldz, int *Zsupp)
-{
-  /* Input parameter */
-  int  n      = *np;
-  bool onlyW = toupper(jobz[0]) == 'N';
-  bool wantZ = toupper(jobz[0]) == 'V';
-  bool cntval = toupper(jobz[0]) == 'C';
-  bool alleig = toupper(range[0]) == 'A';
-  bool valeig = toupper(range[0]) == 'V';
-  bool indeig = toupper(range[0]) == 'I';
-
-  /* Check input parameters */
-  if(!(onlyW  || wantZ  || cntval)) return 1;
-  if(!(alleig || valeig || indeig)) return 1;
-  if(n <= 0) return 1;
-  if (valeig) {
-    if(*vu<=*vl) return 1;
-  } else if (indeig) {
-    if (*il<1 || *il>n || *iu<*il || *iu>n) return 1;
-  }
-  
-  /* MPI & multithreading info */
-  int is_init, is_final;
-  MPI_Initialized(&is_init);
-  MPI_Finalized(&is_final);
-  if (is_init!=1 || is_final==1) {
-    fprintf(stderr, "ERROR: MPI is not active! (init=%d, final=%d) \n", 
-      is_init, is_final);
-    return 1;
-  }
-  MPI_Comm comm_dup;
-  MPI_Comm_dup(comm, &comm_dup);
-  int nproc, pid, thread_support;
-  MPI_Comm_size(comm_dup, &nproc);
-  MPI_Comm_rank(comm_dup, &pid);
-  MPI_Query_thread(&thread_support);
-
-  int nthreads;
-  if ( !(thread_support == MPI_THREAD_MULTIPLE ||
-         thread_support == MPI_THREAD_FUNNELED) ) {
-    /* Disable multithreading; note: to support multithreading with 
-     * MPI_THREAD_SERIALIZED the code must be changed slightly; this 
-     * is not supported at the moment */
-    nthreads = 1;
-  } else {
-    char *ompvar = getenv("PMR_NUM_THREADS");
-    if (ompvar == NULL) {
-      nthreads = DEFAULT_NUM_THREADS;
-    } else {
-      nthreads = atoi(ompvar);
-    }
-  }
-
-#if defined(MVAPICH2_VERSION)
-  if (nthreads>1) {
-    int mv2_affinity=1;
-    char *mv2_string = getenv("MV2_ENABLE_AFFINITY");
-    if (mv2_string != NULL) 
-      mv2_affinity = atoi(mv2_string);
-    if (mv2_affinity!=0) {
-      nthreads = 1;
-      if (pid==0) {
-        fprintf(stderr, "WARNING: PMRRR incurs a significant performance penalty when multithreaded with MVAPICH2 with affinity enabled. The number of threads has been reduced to one; please rerun with MV2_ENABLE_AFFINITY=0 or PMR_NUM_THREADS=1 in the future.\n");
-        fflush(stderr);
-      }
-    }
-  }
-#endif
-
-  /* If only maximal number of local eigenvectors are queried
-   * return if possible here */
-  *nzp     = 0;
-  *offsetp = 0;
-  if (cntval) {
-    if ( alleig || n < DSTEMR_IF_SMALLER ) {
-      *nzp = iceil(n,nproc);
-      MPI_Comm_free(&comm_dup);
-      return 0;
-    } else if (indeig) {
-      *nzp = iceil(*iu-*il+1,nproc);
-      MPI_Comm_free(&comm_dup);
-      return 0;
-    }
-  }
-
-  /* Check if computation should be done by multiple processes */
-  int info;
-  if (n < DSTEMR_IF_SMALLER) {
-    info = handle_small_cases(jobz, range, np, D, E, vl, vu, il,
-			      iu, tryracp, comm, nzp, offsetp, W,
-			      Z, ldz, Zsupp);
-    MPI_Comm_free(&comm_dup);
-    return info;
-  }
-
-  /* Allocate memory */
-  double *Werr = (double*)malloc(n*sizeof(double)); assert(Werr!=NULL);
-  double *Wgap = (double*)malloc(n*sizeof(double)); assert(Wgap!=NULL);
-  double *gersch = (double*)malloc(2*n*sizeof(double)); assert(gersch!=NULL);
-  int *iblock = (int*)calloc(n,sizeof(int)); assert(iblock!=NULL);
-  int *iproc  = (int*)malloc(n*sizeof(int)); assert(iproc!=NULL);
-  int *Windex = (int*)malloc(n*sizeof(int)); assert(Windex!=NULL);
-  int *isplit = (int*)malloc(n*sizeof(int)); assert(isplit!=NULL);
-  int *Zindex = (int*)malloc(n*sizeof(int)); assert(Zindex!=NULL);
-  proc_t *procinfo = (proc_t*)malloc(sizeof(proc_t)); assert(procinfo!=NULL);
-  in_t *Dstruct = (in_t*)malloc(sizeof(in_t)); assert(Dstruct!=NULL);
-  val_t *Wstruct = (val_t*)malloc(sizeof(val_t)); assert(Wstruct!=NULL);
-  vec_t *Zstruct = (vec_t*)malloc(sizeof(vec_t)); assert(Zstruct!=NULL);
-  tol_t *tolstruct = (tol_t*)malloc(sizeof(tol_t)); assert(tolstruct!=NULL);
-
-  /* Bundle variables into a structures */
-  procinfo->pid            = pid;
-  procinfo->nproc          = nproc;
-  procinfo->comm           = comm_dup;
-  procinfo->nthreads       = nthreads;
-  procinfo->thread_support = thread_support;
-
-  Dstruct->n      = n;
-  Dstruct->D      = D;
-  Dstruct->E      = E;
-  Dstruct->isplit = isplit;
-
-  Wstruct->n      = n;
-  Wstruct->vl     = vl;
-  Wstruct->vu     = vu;
-  Wstruct->il     = il;
-  Wstruct->iu     = iu;
-  Wstruct->W      = W;
-  Wstruct->Werr   = Werr;
-  Wstruct->Wgap   = Wgap;
-  Wstruct->Windex = Windex;
-  Wstruct->iblock = iblock;
-  Wstruct->iproc  = iproc;
-  Wstruct->gersch = gersch;
-
-  Zstruct->ldz    = *ldz;
-  Zstruct->nz     = 0;
-  Zstruct->Z      = Z;
-  Zstruct->Zsupp  = Zsupp;
-  Zstruct->Zindex = Zindex;
-
-  /* Scale matrix to allowable range, returns 1.0 if not scaled */
-  double scale = scale_matrix(Dstruct, Wstruct, valeig);
-
-  /*  Test if matrix warrants more expensive computations which
-   *  guarantees high relative accuracy */
-  if (*tryracp)
-    odrrr(&n, D, E, &info); /* 0 - rel acc */
-  else info = -1;
-
-  int i;
-  double *Dcopy, *E2copy;
-  if (info == 0) {
-    /* This case is extremely rare in practice */ 
-    tolstruct->split = DBL_EPSILON;
-    /* Copy original data needed for refinement later */
-    Dcopy = (double*)malloc(n*sizeof(double)); assert(Dcopy!=NULL);
-    memcpy(Dcopy, D, n*sizeof(double));  
-    E2copy = (double*)malloc(n*sizeof(double)); assert(E2copy!=NULL);
-    for (i=0; i<n-1; i++) 
-      E2copy[i] = E[i]*E[i];
-  } else {
-    /* Neg. threshold forces old splitting criterion */
-    tolstruct->split = -DBL_EPSILON; 
-    *tryracp = 0;
-  }
-
-  if (!wantZ) {
-    /* Compute eigenvalues to full precision */
-    tolstruct->rtol1 = 4.0 * DBL_EPSILON;
-    tolstruct->rtol2 = 4.0 * DBL_EPSILON;
-  } else {
-    /* Do not compute to full accuracy first, but refine later */
-    tolstruct->rtol1 = sqrt(DBL_EPSILON);
-    tolstruct->rtol1 = fmin(1e-2*MIN_RELGAP, tolstruct->rtol1);
-    tolstruct->rtol2 = sqrt(DBL_EPSILON)*5.0E-3;
-    tolstruct->rtol2 = fmin(5e-6*MIN_RELGAP, tolstruct->rtol2);
-    tolstruct->rtol2 = fmax(4.0 * DBL_EPSILON, tolstruct->rtol2);
-  }
-
-  /*  Compute all eigenvalues: sorted by block */
-  info = plarre(procinfo,jobz,range,Dstruct,Wstruct,tolstruct,nzp,offsetp);
-  assert(info == 0);
-
-  /* If just number of local eigenvectors are queried */
-  if (cntval & valeig) {    
-    clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
-	     isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct,
-	     tolstruct);
-    return 0;
-  }
-
-  /* If only eigenvalues are to be computed */
-  if (!wantZ) {
-
-    /* Refine to high relative with respect to input T */
-    if (*tryracp) {
-      info = 
-        refine_to_highrac
-        (procinfo, jobz, Dcopy, E2copy, Dstruct, nzp, Wstruct, tolstruct);
-      assert(info == 0);
-    }
-
-    /* Sort eigenvalues */
-    qsort(W, n, sizeof(double), cmp);
-
-    /* Only keep subset ifirst:ilast */
-    int ifirst, ilast, isize;
-    int iil = *il;
-    int iiu = *iu;
-    int ifirst_tmp=iil;
-    for (i=0; i<nproc; i++) {
-      int chunk  = (iiu-iil+1)/nproc + (i < (iiu-iil+1)%nproc);
-      int ilast_tmp;
-      if (i == nproc-1) {
-	ilast_tmp = iiu;
-      } else {
-	ilast_tmp = ifirst_tmp + chunk - 1;
-	ilast_tmp = imin(ilast_tmp, iiu);
-      }
-      if (i == pid) {
-	ifirst    = ifirst_tmp;
-	ilast     = ilast_tmp;
-	isize     = ilast - ifirst + 1;
-	*offsetp = ifirst - iil;
-	*nzp      = isize;
-      }
-      ifirst_tmp = ilast_tmp + 1;
-      ifirst_tmp = imin(ifirst_tmp, iiu + 1);
-    }
-    if (isize > 0) {
-      memmove(W, &W[ifirst-1], *nzp * sizeof(double));
-    }
-
-    /* If matrix was scaled, rescale eigenvalues */
-    invscale_eigenvalues(Wstruct, scale, *nzp);
-
-    clean_up
-    (comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
-     isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct);
-
-    return 0;
-  } /* end of only eigenvalues to compute */
-
-  /* Compute eigenvectors */
-  info = plarrv(procinfo, Dstruct, Wstruct, Zstruct, tolstruct, 
-		nzp, offsetp);
-  assert(info == 0);
-
-  /* Refine to high relative with respect to input matrix */
-  if (*tryracp) {
-    info = refine_to_highrac(procinfo, jobz, Dcopy, E2copy, 
-			     Dstruct, nzp, Wstruct, tolstruct);
-    assert(info == 0);
-  }
-
-  /* If matrix was scaled, rescale eigenvalues */
-  invscale_eigenvalues(Wstruct, scale, n);
-
-  /* Make the first nz elements of W contains the eigenvalues
-   * associated to the process */
-  int j, im=0;
-  for (j=0; j<n; j++) {
-    if (iproc[j] == pid) {
-      W[im]      = W[j];
-      Windex[im] = Windex[j];
-      Zindex[im] = Zindex[j];
-      im++;
-    }
-  }
-  
-  sort_struct_t *sort_array = (sort_struct_t*)malloc(im*sizeof(sort_struct_t));
-  for (j=0; j<im; j++) {
-    sort_array[j].lambda = W[j];
-    sort_array[j].local_ind = Windex[j];
-    sort_array[j].block_ind = 0;
-    sort_array[j].ind = Zindex[j];
-  }
-  qsort(sort_array, im, sizeof(sort_struct_t), cmpb);
-  for (j=0; j<im; j++) {
-    W[j] = sort_array[j].lambda;
-    Windex[j] = sort_array[j].local_ind;
-  }
-  free(sort_array);
-
-  clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex,
-	   isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct,
-	   tolstruct);
-  if (*tryracp) {
-    free(Dcopy);
-    free(E2copy);
-  }
-
-  return 0;
-} /* end pmrrr */
-
-static int cmpb(const void *a1, const void *a2)
-{
-  sort_struct_t *arg1, *arg2;
-  
-  arg1 = (sort_struct_t*) a1;
-  arg2 = (sort_struct_t*) a2;
-  
-  if (arg1->ind < arg2->ind)
-    return -1;
-  else
-    return 1;
-}
-
-/*
- * Free's on allocated memory of pmrrr routine
- */
-static  
-void clean_up
-(MPI_Comm comm, double *Werr, double *Wgap,
- double *gersch, int *iblock, int *iproc,
- int *Windex, int *isplit, int *Zindex,
- proc_t *procinfo, in_t *Dstruct,
- val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct)
-{
-  MPI_Comm_free(&comm);
-  free(Werr);
-  free(Wgap);
-  free(gersch);
-  free(iblock);
-  free(iproc);
-  free(Windex);
-  free(isplit);
-  free(Zindex);
-  free(procinfo);
-  free(Dstruct);
-  free(Wstruct);
-  free(Zstruct);
-  free(tolstruct);
-}
-
-/*
- * Wrapper to call LAPACKs DSTEMR for small matrices
- */
-static
-int handle_small_cases
-(char *jobz, char *range, int *np, double  *D,
- double *E, double *vlp, double *vup, int *ilp,
- int *iup, int *tryracp, MPI_Comm comm, int *nzp,
- int *myfirstp, double *W, double *Z, int *ldzp, int *Zsupp)
-{
-  bool cntval = toupper(jobz[0]) == 'C';
-  bool onlyW = toupper(jobz[0]) == 'N';
-  bool wantZ = toupper(jobz[0]) == 'V';
-  bool indeig = toupper(range[0]) == 'I';
-  int n       = *np;
-  int ldz_tmp = *np;
-  int ldz     = *ldzp;
-
-  int nproc, pid;
-  MPI_Comm_size(comm, &nproc);
-  MPI_Comm_rank(comm, &pid);
-  
-  int lwork, liwork;
-  double *Z_tmp;
-  if (onlyW) {
-    lwork  = 12*n;
-    liwork =  8*n;
-  } else if (cntval) {
-    lwork  = 18*n;
-    liwork = 10*n;
-  } else if (wantZ) {
-    lwork  = 18*n;
-    liwork = 10*n;
-    int itmp;
-    if (indeig) itmp = *iup-*ilp+1;
-    else        itmp = n;
-    Z_tmp = (double*)malloc(n*itmp*sizeof(double)); assert(Z_tmp!=NULL);
-  } else {
-    return 1;
-  }
-
-  double *work = (double*)malloc(lwork*sizeof(double)); assert(work != NULL);
-  int *iwork = (int*)malloc(liwork*sizeof(int)); assert(iwork!=NULL);
-
-  if (cntval) {
-    /* Note: at the moment, jobz="C" should never get here, since
-     * it is blocked before. */
-    int m, info, MINUSONE=-1;
-    double cnt;
-    odstmr("V", "V", np, D, E, vlp, vup, ilp, iup, &m, W, &cnt,
-	   &ldz_tmp, &MINUSONE, Zsupp, tryracp, work, &lwork, iwork,
-	   &liwork, &info);
-    assert(info == 0);
-    
-    *nzp = (int) ceil(cnt/nproc);
-    free(work); free(iwork);
-    return 0;
-  }
-
-  int m, info;
-  odstmr(jobz, range, np, D, E, vlp, vup, ilp, iup, &m, W, Z_tmp,
-	 &ldz_tmp, np, Zsupp, tryracp, work, &lwork, iwork,
-	 &liwork, &info);
-  assert(info == 0);
-
-  int chunk   = iceil(m,nproc);
-  int myfirst = imin(pid * chunk, m);
-  int mylast  = imin((pid+1)*chunk - 1, m - 1);
-  int mysize  = mylast - myfirst + 1;
-
-  if (mysize > 0) {
-    memmove(W, &W[myfirst], mysize*sizeof(double));
-    if (wantZ) {
-      if (ldz == ldz_tmp) {
-	/* copy everything in one chunk */
-	memcpy(Z, &Z_tmp[myfirst*ldz_tmp], n*mysize*sizeof(double));
-      } else {
-	/* copy each vector seperately */
-        int i;
-	for (i=0; i<mysize; i++)
-	  memcpy(&Z[i*ldz], &Z_tmp[(myfirst+i)*ldz_tmp], 
-		 n*sizeof(double));
-      } 
-    } /* if (wantZ) */
-  } 
-  
-  *myfirstp = myfirst;
-  *nzp      = mysize;
-
-  if (wantZ) free(Z_tmp);
-  free(work);
-  free(iwork);
-
-  return 0;
-}
-
-/*
- * Scale matrix to allowable range, returns 1.0 if not scaled
- */
-static 
-double scale_matrix(in_t *Dstruct, val_t *Wstruct, bool valeig)
-{
-  int              n  = Dstruct->n;
-  double *restrict D  = Dstruct->D;
-  double *restrict E  = Dstruct->E;
-  double          *vl = Wstruct->vl;
-  double          *vu = Wstruct->vu;
-
-  /* Set some machine dependent constants */
-  double smlnum = DBL_MIN / DBL_EPSILON;
-  double bignum = 1.0 / smlnum;
-  double rmin   = sqrt(smlnum);
-  double rmax   = fmin(sqrt(bignum), 1.0 / sqrt(sqrt(DBL_MIN)));
-
-  /*  Scale matrix to allowable range */
-  double scale = 1.0;
-  double T_norm = odnst("M", &n, D, E);  /* returns max(|T(i,j)|) */
-  if (T_norm > 0 && T_norm < rmin) {
-    scale = rmin / T_norm;
-  } else if (T_norm > rmax) {
-    scale = rmax / T_norm;
-  }
-
-  if (scale != 1.0) {  /* FP cmp okay */
-    /* Scale matrix and matrix norm */
-    int itmp = n-1;
-    int IONE = 1;
-    odscal(&n,    &scale, D, &IONE);
-    odscal(&itmp, &scale, E, &IONE);
-    if (valeig == true) {
-      /* Scale eigenvalue bounds */
-      *vl *= scale;
-      *vu *= scale;
-    }
-  } /* end scaling */
-
-  return scale;
-}
-
-/*
- * If matrix scaled, rescale eigenvalues
- */
-static 
-void invscale_eigenvalues(val_t *Wstruct, double scale, int size)
-{
-  if (scale != 1.0) {  /* FP cmp okay */
-    double *vl = Wstruct->vl;
-    double *vu = Wstruct->vu;
-    double *W  = Wstruct->W;
-
-    double invscale = 1.0 / scale;
-    *vl *= invscale;
-    *vu *= invscale;
-    int IONE = 1;
-    odscal(&size, &invscale, W, &IONE);
-  }
-}
-
-/*
- * Refines the eigenvalue to high relative accuracy with
- * respect to the input matrix;
- * Note: In principle this part could be fully parallel too,
- * but it will only rarely be called and not much work
- * is involved, if the eigenvalues are not small in magnitude
- * even no work at all is not uncommon. 
- */
-static 
-int refine_to_highrac
-(proc_t *procinfo, char *jobz, double *D, double *E2, in_t *Dstruct, int *nzp,
- val_t *Wstruct, tol_t *tolstruct)
-{
-  int              n      = Dstruct->n;
-  int              nsplit = Dstruct->nsplit;
-  int    *restrict isplit = Dstruct->isplit;
-  double           spdiam = Dstruct->spdiam;
-  double *restrict W      = Wstruct->W;
-  double *restrict Werr   = Wstruct->Werr;
-  
-  double *work = (double*)malloc(2*n*sizeof(double)); assert(work!=NULL);
-  int *iwork = (int*)malloc(2*n*sizeof(int)); assert(iwork!=NULL);
-
-  int j, ibegin=0;
-  for (j=0; j<nsplit; j++) {
-    int iend = isplit[j] - 1;
-    int isize = iend - ibegin + 1;
-    int nbl = isize;
-    if (nbl == 1) {
-      ibegin = iend + 1;
-      continue;
-    }
-    
-    int ifirst=1, ilast=nbl, offset=0, info;
-    double tol = 4*DBL_EPSILON;
-    double pivmin = tolstruct->pivmin; 
-    odrrj(&isize, &D[ibegin], &E2[ibegin], &ifirst, &ilast, &tol,
-	  &offset, &W[ibegin], &Werr[ibegin], work, iwork, &pivmin,
-	  &spdiam, &info);
-    assert(info == 0);
-    
-    ibegin = iend + 1;
-  } /* end j */
-  
-  free(work);
-  free(iwork);
-  return 0;
-}
-
-/*
- * Compare function for using qsort() on an array
- * of doubles
- */
-static 
-int cmp(const void *a1, const void *a2)
-{
-  double arg1 = *(double *)a1;
-  double arg2 = *(double *)a2;
-
-  if (arg1 < arg2)
-    return -1;
-  else
-    return 1;
-}
-
-/*
- * Routine to communicate eigenvalues such that every process has
- * all computed eigenvalues (iu-il+1) in W; this routine is designed 
- * to be called right after 'pmrrr'.
- */
-int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *myfirstp, double *W)
-{
-  MPI_Comm comm_dup;
-  MPI_Comm_dup(comm, &comm_dup);
-  int nproc;
-  MPI_Comm_size(comm_dup, &nproc);
-
-  int *rcount = (int*)malloc(nproc*sizeof(int)); assert(rcount!=NULL);
-  int *rdispl = (int*)malloc(nproc*sizeof(int)); assert(rdispl!=NULL);
-  double *work = (double*)malloc((*nz+1)*sizeof(double)); assert(work!=NULL);
-
-  if (*nz > 0)
-    memcpy(work, W, (*nz)*sizeof(double) );
-
-  MPI_Allgather(nz, 1, MPI_INT, rcount, 1, MPI_INT, comm_dup);
-
-  MPI_Allgather(myfirstp, 1, MPI_INT, rdispl, 1, MPI_INT, comm_dup);
-  
-  MPI_Allgatherv
-  (work, *nz, MPI_DOUBLE, W, rcount, rdispl, MPI_DOUBLE, comm_dup);
-
-  MPI_Comm_free(&comm_dup);
-  free(rcount);
-  free(rdispl);
-  free(work);
-
-  return 0;
-}
-
-void pmr_comm_eigvals_
-(MPI_Fint *comm, int *nz, int *myfirstp, double *W, int *info)
-{
-  MPI_Comm c_comm = MPI_Comm_f2c(*comm);
-  *info = PMR_comm_eigvals(c_comm, nz, myfirstp, W);
-}
diff --git a/external/pmrrr/src/process_c_task.c b/external/pmrrr/src/process_c_task.c
deleted file mode 100644
index 984258880e..0000000000
--- a/external/pmrrr/src/process_c_task.c
+++ /dev/null
@@ -1,800 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include "pmrrr.h"
-#include "pmrrr/plarrv.h"
-#include "pmrrr/queue.h"
-#include "pmrrr/counter.h"
-#include "pmrrr/rrr.h"
-#include "pmrrr/structs.h"
-#include "pmrrr/process_task.h"
-
-#define THREE            3.0
-#define FOUR             4.0
-
-static inline 
-rrr_t* compute_new_rrr
-(cluster_t *cl, int tid, proc_t *procinfo,
- val_t *Wstruct, vec_t *Zstruct,
- tol_t *tolstruct, double *work, int *iwork);
-
-static inline 
-int refine_eigvals
-(cluster_t *cl, int rf_begin, int rf_end,
- int tid, proc_t *procinfo,
- rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct,
- tol_t *tolstruct, counter_t *num_left, 
- workQ_t *workQ, double *work, int *iwork);
-
-static inline 
-int communicate_refined_eigvals
-(cluster_t *cl, proc_t *procinfo, int tid, val_t *Wstruct, rrr_t *RRR);
-
-static inline 
-int test_comm_status(cluster_t *cl, val_t *Wstruct);
-
-static inline 
-int create_subtasks
-(cluster_t *cl, int tid, proc_t *procinfo,
- rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct,
- workQ_t *workQ, counter_t *num_left);
-
-int PMR_process_c_task
-(cluster_t *cl, int tid, proc_t *procinfo,
- val_t *Wstruct, vec_t *Zstruct, 
- tol_t *tolstruct, workQ_t *workQ, 
- counter_t *num_left, double *work, int *iwork)
-{
-  int   depth     = cl->depth;
-  int   left_pid  = cl->left_pid;
-  int   right_pid = cl->right_pid;
-  int   pid       = procinfo->pid;
-  int   n         = Wstruct->n;
-
-  /* Protection against infinitely deep trees */
-  assert(depth < n);
-
-  /* Check if task only need to be split into subtasks */
-  int status;
-  if (cl->wait_until_refined == true) {
-    status = test_comm_status(cl, Wstruct);
-    if (status == COMM_COMPLETE) {
-      create_subtasks
-      (cl, tid, procinfo, cl->RRR, Wstruct, Zstruct, workQ, num_left);
-      return C_TASK_PROCESSED;
-    } else {
-      return C_TASK_NOT_PROCESSED;
-    }
-  }
-
-  /* Otherwise: compute new rrr, refine part own cluster,
-   * communicate the refined eigenvalues if necessary,
-   * and create subtasks if possible */
-  rrr_t *RRR = 
-    compute_new_rrr
-    (cl, tid, procinfo, Wstruct, Zstruct, tolstruct, work, iwork);
-
-  /* Refine eigenvalues 'rf_begin' to 'rf_end' */
-  int rf_begin, rf_end;
-  if (left_pid != right_pid) {
-    rf_begin = imax(cl->begin, cl->proc_W_begin);
-    rf_end   = imin(cl->end,   cl->proc_W_end);
-  } 
-  if (pid == left_pid ) rf_begin = cl->begin;
-  if (pid == right_pid) rf_end   = cl->end;
-
-  refine_eigvals
-  (cl, rf_begin, rf_end, tid, procinfo, RRR,
-   Wstruct, Zstruct, tolstruct, num_left, workQ, work, iwork);
-
-  /* Communicate results: non-blocking */
-  status = COMM_COMPLETE;
-  if (left_pid != right_pid) {
-    status = communicate_refined_eigvals(cl, procinfo, tid, Wstruct, RRR);
-    /* status = COMM_INCOMPLETE if communication not finished */
-  }
-
-  if (status == COMM_COMPLETE) {
-    create_subtasks(cl, tid, procinfo, RRR, Wstruct, Zstruct, workQ, num_left);
-    return C_TASK_PROCESSED;
-  } else {
-    return C_TASK_NOT_PROCESSED;
-  }
-} /* end process_c_task */
-
-static inline 
-rrr_t* compute_new_rrr
-(cluster_t *cl, int tid, proc_t *procinfo,
- val_t *Wstruct, vec_t *Zstruct,
- tol_t *tolstruct, double *work, int *iwork)
-{
-  int    cl_begin    = cl->begin;
-  int    cl_end      = cl->end;
-  int    cl_size     = cl_end - cl_begin + 1;
-  int    depth       = cl->depth;
-  int    bl_begin    = cl->bl_begin;
-  int    bl_end      = cl->bl_end;
-  int    bl_size     = bl_end - bl_begin + 1;
-  double bl_spdiam   = cl->bl_spdiam;
-  rrr_t  *RRR_parent = cl->RRR;
-
-  double *restrict Werr     = Wstruct->Werr;
-  double *restrict Wgap     = Wstruct->Wgap;
-  int    *restrict Windex   = Wstruct->Windex;
-  double *restrict Wshifted = Wstruct->Wshifted;
-
-  /* Allocate memory for new representation for cluster */
-  double *D = (double*)malloc(bl_size*sizeof(double)); 
-  double *L = (double*)malloc(bl_size*sizeof(double)); 
-  double *DL = (double*)malloc(bl_size*sizeof(double)); 
-  double *DLL = (double*)malloc(bl_size*sizeof(double)); 
-  assert(D!=NULL);
-  assert(L!=NULL);
-  assert(DL!=NULL);
-  assert(DLL!=NULL);
-
-  /* Recompute DL and DLL */
-  int i;
-  double tmp;
-  double *D_parent = RRR_parent->D;
-  double *L_parent = RRR_parent->L;
-  for (i=0; i<bl_size-1; i++) {
-    tmp    = D_parent[i]*L_parent[i];
-    DL[i]  = tmp;
-    DLL[i] = tmp*L_parent[i];
-  }
-  double *DL_parent  = DL;
-  double *DLL_parent = DLL;
-
-  double RQtol = 2*DBL_EPSILON;
-  double pivmin = tolstruct->pivmin;
-
-  /* to shift as close as possible refine extremal eigenvalues */
-  int k, p;
-  double savegap;
-  for (k=0; k<2; k++) {
-    if (k == 0) {
-      p              = Windex[cl_begin];
-      savegap        = Wgap[cl_begin];
-      Wgap[cl_begin] = 0.0;
-    } else {
-      p              = Windex[cl_end  ];
-      savegap        = Wgap[cl_end];
-      Wgap[cl_end]   = 0.0;
-    }
-    
-    int info;
-    int offset  = Windex[cl_begin] - 1;
-    odrrb
-    (&bl_size, D_parent, DLL_parent, &p, &p, &RQtol,
-     &RQtol, &offset, &Wshifted[cl_begin], &Wgap[cl_begin],
-     &Werr[cl_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
-    assert( info == 0 );
-
-    if (k == 0) {
-      Wgap[cl_begin] = fmax(0, (Wshifted[cl_begin+1]-Werr[cl_begin+1])
-			    - (Wshifted[cl_begin]+Werr[cl_begin]) );
-    } else {
-      Wgap[cl_end]   = savegap;
-    }
-  } /* end k */
-
-  double left_gap  = cl->lgap;
-  double right_gap = Wgap[cl_end];
-
-  /* Compute new RRR and store it in D and L */
-  int info;
-  int IONE=1;
-  double tau;
-  odrrf
-  (&bl_size, D_parent, L_parent, DL_parent,
-   &IONE, &cl_size, &Wshifted[cl_begin], &Wgap[cl_begin],
-   &Werr[cl_begin], &bl_spdiam, &left_gap, &right_gap,
-   &pivmin, &tau, D, L, work, &info);
-  assert(info == 0);
-
-  /* Update shift and store it */
-  tmp = L_parent[bl_size-1] + tau;
-  L[bl_size-1] = tmp;
-
-  /* Compute D*L and D*L*L */
-  for (i=0; i<bl_size-1; i++) {
-    tmp    = D[i]*L[i];
-    DL[i]  = tmp;
-    DLL[i] = tmp*L[i];
-  }
-
-  /* New RRR of cluster is usually created at the parent level and
-   * initialized to parent RRR, now reset to contain new RRR */
-  if (RRR_parent->copied_parent_rrr == true) {
-    free(RRR_parent->D);
-    free(RRR_parent->L);
-  }
-  rrr_t *RRR = PMR_reset_rrr(RRR_parent, D, L, DL, DLL, bl_size, depth+1);
-  
-  /* Update shifted eigenvalues */
-  for (k=cl_begin; k<=cl_end; k++) {
-    double fudge = THREE*DBL_EPSILON*fabs(Wshifted[k]);
-    Wshifted[k] -= tau;
-    fudge += FOUR*DBL_EPSILON*fabs(Wshifted[k]);
-    Werr[k] += fudge;
-  }
-
-  /* Assure that structure is not freed while it is processed */
-  PMR_increment_rrr_dependencies(RRR);
-
-  return RRR;
-} /* end compute_new_rrr */
-
-/* 
- * Refine eigenvalues with respect to new rrr 
- */
-static inline 
-int refine_eigvals
-(cluster_t *cl, int rf_begin, int rf_end,
- int tid, proc_t *procinfo, rrr_t *RRR, 
- val_t *Wstruct, vec_t *Zstruct,
- tol_t *tolstruct, counter_t *num_left,
- workQ_t *workQ, double *work, int *iwork)
-{
-  int    rf_size   = rf_end-rf_begin+1;
-  int    bl_begin  = cl->bl_begin;
-  int    bl_end    = cl->bl_end;
-  int    bl_size   = bl_end - bl_begin + 1;
-  double bl_spdiam = cl->bl_spdiam;
-
-  double *restrict D   = RRR->D;
-  double *restrict L   = RRR->L;
-  double *restrict DLL = RRR->DLL;
-
-  double *restrict W         = Wstruct->W;
-  double *restrict Werr      = Wstruct->Werr;
-  double *restrict Wgap      = Wstruct->Wgap;
-  int    *restrict Windex    = Wstruct->Windex;
-  double *restrict Wshifted  = Wstruct->Wshifted;
-
-  double pivmin = tolstruct->pivmin;
-  double rtol1 = tolstruct->rtol1;
-  double rtol2 = tolstruct->rtol2;
-
-  /* Determine if refinement should be split into tasks */
-  int left = PMR_get_counter_value(num_left);
-  int nz = Zstruct->nz;
-  int nthreads = procinfo->nthreads;
-  int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads));
-  int own_part = (int)fmax(ceil((double)left/nthreads),MIN_REFINE_CHUNK);
-
-  int offset, i, p, q;
-  double savegap;
-  task_t *task;
-  if (own_part < rf_size) {
-    int others_part = rf_size - own_part;
-    int num_tasks   = iceil(rf_size, own_part) - 1; /* >1 */
-    int chunk       = others_part/num_tasks;        /* floor */
-
-    int ts_begin=rf_begin, ts_end;
-    p = Windex[rf_begin];
-    for (i=0; i<num_tasks; i++) {
-      ts_end = ts_begin + chunk - 1;
-      q      = p        + chunk - 1;
-
-      task = 
-        PMR_create_r_task
-        (ts_begin, ts_end, D, DLL, p, q, bl_size, bl_spdiam, tid);
-     
-      if (ts_begin <= ts_end)
-	PMR_insert_task_at_back(workQ->r_queue, task);
-      else
-        PMR_refine_sem_post(task->data); /* case chunk=0 */
-
-      ts_begin = ts_end + 1;
-      p        = q      + 1;
-    }
-    ts_end = rf_end;
-    q      = Windex[rf_end];
-    offset = Windex[ts_begin] - 1;
-
-    /* Call bisection routine to refine the values */
-    if (ts_begin <= ts_end) {
-      int info;
-      odrrb
-      (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
-       &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin],
-       work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
-      assert( info == 0 );
-    }
-
-    /* Empty "all" r-queue refine tasks before waiting */
-    int num_iter = PMR_get_num_tasks(workQ->r_queue);
-    for (i=0; i<num_iter; i++) {
-      task = PMR_remove_task_at_front(workQ->r_queue);
-      if (task != NULL) {
-	if (task->flag == REFINE_TASK_FLAG) {
-	  PMR_process_r_task
-          ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork);
-	  free(task);
-	} else {
-	  PMR_insert_task_at_back(workQ->r_queue, task);
-	}
-      } /* if task */
-    } /* end for i */
-    
-    /* Barrier: wait until all created tasks finished */
-    int count = num_tasks;
-    while (count > 0) {
-      while ( PMR_refine_sem_wait(task->data) != 0 ) { };
-      count--;
-    }
-    PMR_refine_sem_destroy(task->data);
-
-    /* Edit right gap at splitting point */
-    ts_begin = rf_begin;
-    for (i=0; i<num_tasks; i++) {
-      ts_end = ts_begin + chunk - 1;
-      
-      Wgap[ts_end] = fmax(0.0, Wshifted[ts_end + 1] - Werr[ts_end + 1]
-			  - Wshifted[ts_end] - Werr[ts_end]);
-      
-      ts_begin = ts_end + 1;
-    }
-  } else {
-    /* Refinement of cluster without creating tasks */
-    
-    /* 'p' and 'q' are local (within block) indices of
-     * the first/last eigenvalue of the cluster */
-    p = Windex[rf_begin];
-    q = Windex[rf_end];
-    
-    offset = Windex[rf_begin] - 1;    /* = p - 1 */
-    
-    if (p == q) {
-      savegap = Wgap[rf_begin];
-      Wgap[rf_begin] = 0.0;
-    }  
-    
-    /* Bisection routine to refine the values */
-    int info;
-    odrrb
-    (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
-     &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin],
-     work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
-    assert( info == 0 );
-    
-    if (p == q)
-      Wgap[rf_begin] = savegap;
-  } /* end refine with or without creating tasks */
-
-  /* refined eigenvalues with all shifts applied in W */
-  double sigma = L[bl_size-1];
-  for (i=rf_begin; i<=rf_end; i++)
-    W[i] = Wshifted[i] + sigma;
-
-  return 0;
-} /* end refine_eigvals */
-
-static inline 
-int communicate_refined_eigvals
-(cluster_t *cl, proc_t *procinfo, int tid, val_t *Wstruct, rrr_t *RRR)
-{
-  int cl_begin     = cl->begin;
-  int cl_end       = cl->end;
-  int bl_begin     = cl->bl_begin;
-  int bl_end       = cl->bl_end;
-  int proc_W_begin = cl->proc_W_begin;
-  int proc_W_end   = cl->proc_W_end;
-  int left_pid     = cl->left_pid;
-  int right_pid    = cl->right_pid;
-  int pid          = procinfo->pid;
-
-  double *restrict W        = Wstruct->W;
-  double *restrict Werr     = Wstruct->Werr;
-  double *restrict Wgap     = Wstruct->Wgap;
-  double *restrict Wshifted = Wstruct->Wshifted;
-  int    *restrict iproc    = Wstruct->iproc;
-
-  int my_begin = imax(cl_begin, proc_W_begin);
-  int my_end   = imin(cl_end,   proc_W_end);
-  if (pid == left_pid ) my_begin = cl_begin;
-  if (pid == right_pid) my_end   = cl_end;
-  int my_size  = my_end - my_begin + 1;
-
-  int i, k;
-  int num_messages = 0;
-  for (i=left_pid; i<=right_pid; i++) {
-    for (k=cl_begin; k<=cl_end; k++) {
-      if (iproc[k] == i) {
-	num_messages += 4;
-	break;
-      }
-    }    
-  }
-
-  MPI_Request *requests=(MPI_Request*)malloc(num_messages*sizeof(MPI_Request));
-  MPI_Status *stats = (MPI_Status*)malloc(num_messages*sizeof(MPI_Status));
-
-  int p;
-  int i_msg = 0;
-  int other_begin, other_end, other_size;
-  for (p=left_pid; p<=right_pid; p++) {
-    bool proc_involved = false;
-    for (k=cl_begin; k<=cl_end; k++) {
-      if (iproc[k] == p) {
-	proc_involved = true;
-	break;
-      }
-    }
-
-    int u;
-    if (p != pid && proc_involved == true) {
-      /* send message to process p (non-blocking) */
-      MPI_Isend(&Wshifted[my_begin], my_size, MPI_DOUBLE, p,
-		my_begin, procinfo->comm, &requests[4*i_msg]);
-
-      MPI_Isend(&Werr[my_begin], my_size, MPI_DOUBLE, p,
-		my_begin, procinfo->comm, &requests[4*i_msg+1]);
-
-      /* Find eigenvalues in of process p */
-      other_size = 0;
-      for (k=cl_begin; k<=cl_end; k++) {
-	if (other_size == 0 && iproc[k] == p) {
-	  other_begin = k;
-	  other_end   = k;
-	  other_size++;
-	  u = k+1;
-	  while (u <=cl_end && iproc[u] == p) {
-	    other_end++;
-	    other_size++;
-	    u++;
-	  }
-	}
-      }
-      if (p == left_pid) {
-	other_begin = cl_begin;
-	u = cl_begin;
-	while (iproc[u] == -1) {
-	  other_size++;
-	  u++;
-	}
-      }
-      if (p == right_pid) {
-	other_end = cl_end;
-	u = cl_end;
-	while (iproc[u] == -1) {
-	  other_size++;
-	  u--;
-	}
-      }
-
-      /* receive message from process p (non-blocking) */
-      MPI_Irecv(&Wshifted[other_begin], other_size, MPI_DOUBLE,	p,
-		other_begin, procinfo->comm, &requests[4*i_msg+2]);
-
-      MPI_Irecv(&Werr[other_begin], other_size, MPI_DOUBLE, p,
-		other_begin, procinfo->comm, &requests[4*i_msg+3]);
-     
-      i_msg++;
-    }
-  } /* end for p */
-  num_messages = 4*i_msg; /* messages actually send */
-
-  int communication_done;
-  int status = MPI_Testall(num_messages, requests, &communication_done, stats);
-  assert(status == MPI_SUCCESS);
-
-  if (communication_done == true) {
-    double sigma = RRR->L[bl_end-bl_begin];
-    for (k=cl_begin; k<cl_end; k++) {
-      W[k]    = Wshifted[k] + sigma;
-      Wgap[k] = fmax(0, Wshifted[k+1]-Werr[k+1] 
-                        - (Wshifted[k]-Werr[k]));
-    }
-    W[cl_end] = Wshifted[cl_end] + sigma;
-
-    free(requests);
-    free(stats);
-    status = COMM_COMPLETE;
-  } else {
-    comm_t *comm = (comm_t*)malloc(sizeof(comm_t));
-    assert(comm != NULL);
-
-    comm->num_messages      = num_messages;
-    comm->requests          = requests;
-    comm->stats             = stats;
-    cl->wait_until_refined  = true;
-    cl->messages            = comm;
-    
-    status = COMM_INCOMPLETE;
-  }
-  
-  return status;
-} /* end communicate_refined_eigvals */
-
-static inline 
-int test_comm_status(cluster_t *cl, val_t *Wstruct)
-{
-  int         cl_begin            = cl->begin;
-  int         cl_end              = cl->end;
-  int         bl_begin            = cl->bl_begin;
-  int         bl_end              = cl->bl_end;
-  rrr_t       *RRR                = cl->RRR;
-  comm_t      *comm               = cl->messages;
-  int         num_messages        = comm->num_messages;
-  MPI_Request *requests           = comm->requests;
-  MPI_Status  *stats              = comm->stats;
-  double      *restrict W         = Wstruct->W;
-  double      *restrict Werr      = Wstruct->Werr;
-  double      *restrict Wgap      = Wstruct->Wgap;
-  double      *restrict Wshifted  = Wstruct->Wshifted;
-
-  /* Test if communication complete */
-  int communication_done;
-  int status = MPI_Testall(num_messages, requests, &communication_done, stats);
-  assert(status == MPI_SUCCESS);
-
-  if (communication_done == true) {
-    cl->wait_until_refined = false;
-
-    int k;
-    double sigma = RRR->L[bl_end-bl_begin];
-    for (k=cl_begin; k<cl_end; k++) {
-      W[k]    = Wshifted[k] + sigma;
-      Wgap[k] = fmax(0, Wshifted[k+1]-Werr[k+1] 
-		        - (Wshifted[k]-Werr[k]));
-    }
-    W[cl_end] = Wshifted[cl_end] + sigma;
-    
-    free(comm);
-    free(requests);
-    free(stats);
-    status = COMM_COMPLETE;
-  } else {
-    status = COMM_INCOMPLETE;
-  }
-
-  return status;
-} /* test_comm_status */
-
-/* TODO: Refactor this routine */
-static inline 
-int create_subtasks
-(cluster_t *cl, int tid, proc_t *procinfo, 
- rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct,
- workQ_t *workQ, counter_t *num_left)
-{
-  /* From inputs */
-  int              cl_begin  = cl->begin;
-  int              cl_end    = cl->end;
-  int              depth     = cl->depth;
-  int              bl_begin  = cl->bl_begin;
-  int              bl_end    = cl->bl_end;
-  int              bl_size   = bl_end - bl_begin + 1;
-  double           bl_spdiam = cl->bl_spdiam;
-  double           lgap;
-
-  int  pid       = procinfo->pid;
-  int  nproc     = procinfo->nproc;
-  int  nthreads  = procinfo->nthreads;
-  bool proc_involved=true;
-
-  double *restrict Wgap      = Wstruct->Wgap;
-  double *restrict Wshifted  = Wstruct->Wshifted;
-  int    *restrict iproc     = Wstruct->iproc;
-
-  int              ldz       = Zstruct->ldz;
-  double *restrict Z         = Zstruct->Z;
-  int    *restrict Zindex    = Zstruct->Zindex;
-
-  /* others */
-  int    i, l, k;
-  int    max_size;
-  task_t *task;
-  bool   task_inserted;
-  int    new_first, new_last, new_size, new_ftt1, new_ftt2;
-  int    sn_first, sn_last, sn_size;
-  rrr_t  *RRR_parent;
-  int    new_lpid, new_rpid;
-  double *restrict D_parent;
-  double *restrict L_parent;
-  int    my_first, my_last;
-  bool   copy_parent_rrr;
-
-
-  max_size = fmax(1, PMR_get_counter_value(num_left) /
-		     (fmin(depth+1,4)*nthreads) );
-  task_inserted = true;
-  new_first = cl_begin;
-  for (i=cl_begin; i<=cl_end; i++) {    
-
-    if ( i == cl_end )
-      new_last = i;
-    else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) )
-      new_last = i;
-    else
-      continue;
-
-    new_size = new_last - new_first + 1;
-
-    if (new_size == 1) {
-      /* singleton was found */
-      
-      if (new_first==cl_begin || task_inserted==true) {
-	/* initialize new singleton task */
-	sn_first = new_first;
-	sn_last  = new_first;
-	sn_size  = 1;
-      } else {
-	/* extend singleton task by one */
-	sn_last++;
-	sn_size++;
-      }
-      
-      /* insert task if ... */
-      if (i==cl_end || sn_size>=max_size ||
-	    Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {
-
-	/* Check if process involved in s-task */
-	proc_involved = false;
-	for (k=sn_first; k<=sn_last; k++) {
-	  if (iproc[k] == pid) {
-	    proc_involved = true;
-	    break;
-	  }
-	}
-	if (proc_involved == false) {
-	  task_inserted = true;
-	  new_first = i + 1;
-	  continue;
-	}
-
-	/* Insert task as process is involved */
-	if (sn_first == cl_begin) {
-	  lgap = cl->lgap;
-	} else {
-	  lgap = Wgap[sn_first-1];
-	}
-	
-	PMR_increment_rrr_dependencies(RRR);
-	
-	task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin,
-				 bl_end, bl_spdiam, lgap, RRR);
-	
-	PMR_insert_task_at_back(workQ->s_queue, task);
-	  
-	task_inserted = true;
-      } else {
-	task_inserted = false;
-      }
-      
-    } else {
-      /* cluster was found */
-
-      /* check if process involved in processing the new cluster */
-      new_lpid = nproc-1;
-      new_rpid = -1;
-      for (l=new_first; l<=new_last; l++) {
-	if (iproc[l] != -1) {
-	  new_lpid = imin(new_lpid, iproc[l]);
-	  new_rpid = imax(new_rpid, iproc[l]);
-	  }
-      }
-      if (new_lpid > pid || new_rpid < pid) {
-	task_inserted = true;
-	new_first = i + 1;
-	continue;
-      }
-
-      /* find gap to the left */
-      if (new_first == cl_begin) {
-	lgap = cl->lgap;
-      } else {
-	lgap = Wgap[new_first - 1];
-      }
-    
-      /* determine where to store the parent rrr needed by the
-       * cluster to find its new rrr */
-      my_first = imax(new_first, cl->proc_W_begin);
-      my_last  = imin(new_last,  cl->proc_W_end);
-      if ( my_first == my_last ) {
-	/* only one eigenvalue of cluster belongs to process */
-	copy_parent_rrr = true;
-      } else {
-	/* store parent rrr in Z at column new_ftt */
-	copy_parent_rrr = false;
-      }
-      new_ftt1 = Zindex[my_first    ];
-      new_ftt2 = Zindex[my_first + 1];
-
-      if (copy_parent_rrr == true) {
-	/* Copy parent RRR into alloceted arrays and mark them
-	 * for freeing later */
-	D_parent = (double *) malloc(bl_size * sizeof(double));
-	assert(D_parent != NULL);
-	
-	L_parent = (double *) malloc(bl_size * sizeof(double));
-	assert(L_parent != NULL);
-
-	memcpy(D_parent, RRR->D, bl_size*sizeof(double));
-	memcpy(L_parent, RRR->L, bl_size*sizeof(double));
-
-	RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, 
-				    NULL, bl_size, depth);
-	PMR_set_copied_parent_rrr_flag(RRR_parent, true);
-
-      } else {
-	/* copy parent RRR into Z to make cluster task independent */
-	memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, 
-	       bl_size*sizeof(double));
-	memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, 
-	       bl_size*sizeof(double));
-
-	RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin],
-				    &Z[new_ftt2*ldz + bl_begin],
-				    NULL, NULL, bl_size, depth);
-      }
-      
-      /* Create the task for the cluster and put it in the queue */ 
-      task = PMR_create_c_task(new_first, new_last, depth+1, 
-			       bl_begin, bl_end, bl_spdiam, lgap, 
-			       cl->proc_W_begin, cl->proc_W_end, 
-			       new_lpid, new_rpid, RRR_parent);
-
-      if (new_lpid != new_rpid)
-	PMR_insert_task_at_back(workQ->r_queue, task);
-      else
-	PMR_insert_task_at_back(workQ->c_queue, task);
-
-      task_inserted = true;
-      
-    } /* if singleton or cluster found */
-
-    new_first = i + 1;
-  } /* end i */
-  
-  /* set flag in RRR that last singleton is created */
-  PMR_set_parent_processed_flag(RRR);
-  
-  /* clean up */
-  PMR_try_destroy_rrr(RRR);
-  free(cl);
-
-  return 0;
-} /* end create_subtasks */
diff --git a/external/pmrrr/src/process_r_task.c b/external/pmrrr/src/process_r_task.c
deleted file mode 100644
index e587122e5f..0000000000
--- a/external/pmrrr/src/process_r_task.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include "pmrrr.h"
-#include "pmrrr/plarrv.h"
-#include "pmrrr/queue.h"
-#include "pmrrr/counter.h"
-#include "pmrrr/structs.h"
-#include "pmrrr/tasks.h"
-#include "pmrrr/process_task.h"
-
-int PMR_process_r_task(refine_t *rf, proc_t *procinfo, val_t *Wstruct,
-                       tol_t *tolstruct, double *work, int *iwork);
-
-/*
- * Executes all tasks which are in the r-queue at the moment of the 
- * call. This routine is called to make sure that all tasks in the 
- * queue are dequeued before continueing with other tasks.
- */
-void PMR_process_r_queue
-(int tid, proc_t *procinfo, val_t *Wstruct, 
- vec_t *Zstruct, tol_t *tolstruct, 
- workQ_t *workQ, counter_t *num_left, 
- double *work, int *iwork)
-{
-  int thread_support = procinfo->thread_support;
-  int num_tasks = PMR_get_num_tasks(workQ->r_queue);
-
-  int t;
-  for (t=0; t<num_tasks; t++) {
-    task_t *task = PMR_remove_task_at_front(workQ->r_queue);
-    if (task != NULL) {
-      if (task->flag == CLUSTER_TASK_FLAG) {
-        if (thread_support != MPI_THREAD_FUNNELED || tid == 0) {
-          /* if MPI_THREAD_FUNNELED only tid==0 should process 
-           * these tasks, otherwise any thread can do it */
-          int status = 
-            PMR_process_c_task
-            ((cluster_t*)task->data, tid, procinfo, Wstruct,
-             Zstruct, tolstruct, workQ, num_left, work, iwork);
-          if (status == C_TASK_PROCESSED)
-            free(task);
-          else
-            PMR_insert_task_at_back(workQ->r_queue, task);
-        } else {
-          PMR_insert_task_at_back(workQ->r_queue, task);
-        }
-      } /* end if cluster task */
-      else if (task->flag == REFINE_TASK_FLAG) {
-        PMR_process_r_task
-        ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork);
-        free(task);
-      }
-    } /* end if task removed */
-  } /* end for t */
-} /* end process_r_queue */
-
-/*
- * Process the task of refining a subset of eigenvalues.
- */
-int PMR_process_r_task
-(refine_t *rf, proc_t *procinfo, 
- val_t *Wstruct, tol_t *tolstruct, double *work, int *iwork)
-{
-  int              ts_begin  = rf->begin;
-  double *restrict D         = rf->D;
-  double *restrict DLL       = rf->DLL;
-  int              p         = rf->p;
-  int              q         = rf->q;
-  int              bl_size   = rf->bl_size;
-  double           bl_spdiam = rf->bl_spdiam;
-
-  double *restrict Werr      = Wstruct->Werr;
-  double *restrict Wgap      = Wstruct->Wgap;
-  int    *restrict Windex    = Wstruct->Windex;
-  double *restrict Wshifted  = Wstruct->Wshifted;
-
-  double savegap;
-  if (p == q) {
-    savegap = Wgap[ts_begin];
-    Wgap[ts_begin] = 0.0;
-  }  
-
-  int info;
-  int offset = Windex[ts_begin]-1;
-  double rtol1  = tolstruct->rtol1;
-  double rtol2  = tolstruct->rtol2;
-  double pivmin = tolstruct->pivmin;
-  odrrb
-  (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
-   &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin],
-   work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
-  assert(info == 0);
-
-  if (p == q)
-    Wgap[ts_begin] = savegap;
-
-  PMR_refine_sem_post(rf);
-  free(rf);
-
-  return 0;
-}
diff --git a/external/pmrrr/src/process_s_task.c b/external/pmrrr/src/process_s_task.c
deleted file mode 100644
index 5d6b6660e9..0000000000
--- a/external/pmrrr/src/process_s_task.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/* Copyright (c) 2010, RWTH Aachen University
- * All rights reserved.
- *
- * Copyright (c) 2015, Jack Poulson
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or 
- * without modification, are permitted provided that the following
- * conditions are met:
- *   * Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the following
- *     disclaimer.
- *   * Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the following 
- *     disclaimer in the documentation and/or other materials 
- *     provided with the distribution.
- *   * Neither the name of the RWTH Aachen University nor the
- *     names of its contributors may be used to endorse or promote 
- *     products derived from this software without specific prior 
- *     written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
- * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
- * SUCH DAMAGE.
- *
- * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
- * August 2010, Version 0.6
- *
- * This code was the result of a collaboration between 
- * Matthias Petschow and Paolo Bientinesi. When you use this 
- * code, kindly reference a paper related to this work.
- *
- */
-#include "pmrrr.h"
-#include "pmrrr/rrr.h"
-#include "pmrrr/counter.h"
-#include "pmrrr/structs.h"
-#include "pmrrr/tasks.h"
-#include "pmrrr/process_task.h"
-
-int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo,
-		       val_t *Wstruct, vec_t *Zstruct, 
-		       tol_t *tolstruct, counter_t *num_left, 
-		       double *work, int *iwork)
-{
-  /* Inputs */
-  int    begin         = sng->begin; 
-  int    end           = sng->end;
-  int    bl_begin      = sng->bl_begin;
-  int    bl_end        = sng->bl_end;
-  int    bl_size       = bl_end - bl_begin + 1;
-  double bl_spdiam     = sng->bl_spdiam; 
-  rrr_t  *RRR          = sng->RRR;
-  double *restrict D   = RRR->D; 
-  double *restrict L   = RRR->L; 
-  double *restrict DL  = RRR->DL;
-  double *restrict DLL = RRR->DLL;
-
-  int              pid      = procinfo->pid;  
-  int              n        = Wstruct->n;
-  double *restrict W        = Wstruct->W;
-  double *restrict Werr     = Wstruct->Werr;
-  double *restrict Wgap     = Wstruct->Wgap;
-  int    *restrict Windex   = Wstruct->Windex;  
-  int    *restrict iproc    = Wstruct->iproc;  
-  double *restrict Wshifted = Wstruct->Wshifted;
-  int              ldz      = Zstruct->ldz;
-  double *restrict Z        = Zstruct->Z;
-  int    *restrict isuppZ   = Zstruct->Zsupp;;
-  int    *restrict Zindex   = Zstruct->Zindex;
-  double           pivmin   = tolstruct->pivmin;
-
-  /* others */
-  int              info, i, k, itmp, num_decrement=0;
-  int              IONE = 1;
-  double           DZERO = 0.0;
-  double           tol, lambda, left, right;
-  int              i_local, zind;
-  double           gap, lgap, rgap, gaptol, savedgap, tmp;
-  bool             usedBS, usedRQ, needBS, wantNC, step2II;
-  int              r, offset;
-  double           twoeps = 2*DBL_EPSILON, RQtol = 2*DBL_EPSILON;
-  double           residual, bstres, bstw; 
-  int              i_supmn, i_supmx;
-  double           RQcorr;
-  int              negcount;
-  int              sgndef, suppsize;
-  double           sigma;
-  int              i_Zfrom, i_Zto;
-  double           ztz, norminv, mingma;
-
-
-  /* set tolerance parameter */
-  tol  = 4.0 * log( (double) bl_size ) * DBL_EPSILON;
-
-  /* loop over all singletons in the task */
-  for (i=begin; i<=end; i++) {
-
-    /* check if eigenvector is supposed to be computed by
-     * the process */
-    if (iproc[i] != pid)
-      continue;
-    num_decrement++;
-
-    if (bl_size == 1) {
-      /* set eigenvector to column of identity matrix */
-      zind = Zindex[i];
-      memset(&Z[zind*ldz], 0.0, n*sizeof(double) );
-      Z[zind*ldz + bl_begin] = 1.0;
-      isuppZ[2*zind    ]     = bl_begin + 1;
-      isuppZ[2*zind + 1]     = bl_begin + 1;
-      continue;
-    }
-
-    lambda  = Wshifted[i];  
-    left    = Wshifted[i] - Werr[i];
-    right   = Wshifted[i] + Werr[i];
-    i_local = Windex[i];
-    r       = 0;
-    
-    /* compute left and right gap */
-    if (i == bl_begin)
-      lgap = DBL_EPSILON * fmax( fabs(left), fabs(right) );
-    else if (i == begin)
-      lgap = sng->lgap;
-    else
-      lgap = Wgap[i-1];
-
-    if (i == bl_end) {
-      rgap = DBL_EPSILON * fmax( fabs(left), fabs(right) );
-    } else {
-      rgap = Wgap[i];
-    }
-
-    gap = fmin(lgap, rgap);
-
-    if ( i == bl_begin || i == bl_end ) {
-      gaptol = 0.0;
-    } else {
-      gaptol = gap * DBL_EPSILON;
-    }
-
-    /* initialize lower and upper value of support */
-    i_supmn = bl_size;
-    i_supmx = 1;
-
-    /* update Wgap so that it holds minimum gap and save the 
-     * old value */
-    savedgap  = Wgap[i];
-    Wgap[i]   = gap;
-    
-    /* initialize flags indicating if bisection or Rayleigh-Quotient
-     * correction was used */
-    usedBS = false;
-    usedRQ = false;
-  
-    /* the need for bisection is initially turned off */
-    needBS = !TRY_RQC;
-
-    /* IEEE floating point is assumed, so that all 0 bits are 0.0 */
-    zind = Zindex[i];
-    memset(&Z[zind*ldz], 0.0, n*sizeof(double));
-
-    /* inverse iteration with twisted factorization */
-    for (k=1; k<=MAXITER; k++) {
-
-      if (needBS == true) {
-	usedBS = true;
-	itmp   = r;
-	
-	offset  = Windex[i] - 1;
-	tmp     = Wgap[i]; 
-	Wgap[i] = 0.0;
-	
-	odrrb(&bl_size, D, DLL, &i_local, &i_local, &DZERO, 
-	      &twoeps, &offset, &Wshifted[i], &Wgap[i],
-	      &Werr[i], work, iwork, &pivmin, &bl_spdiam,
-	      &itmp, &info);
-	assert(info == 0);
-	
-	Wgap[i] = tmp;
-	lambda = Wshifted[i];
-	r = 0;
-      }
-      wantNC = (usedBS == true) ? false : true;
-
-      /* compute the eigenvector corresponding to lambda */
-      odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
-	    &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
-	    &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
-	    &norminv, &residual, &RQcorr, work);
-
-      if (k == 1) {
-	bstres = residual;
-	bstw   = lambda;
-      } else if (residual < bstres) {
-	bstres = residual;
-	bstw   = lambda;
-      }
-      
-      /* update support held */
-      i_supmn = imin(i_supmn, isuppZ[2*zind    ]);
-      i_supmx = imax(i_supmx, isuppZ[2*zind + 1]);
-
-      /* Convergence test for Rayleigh Quotient Iteration
-       * not done if bisection was used */
-      if ( !usedBS && residual > tol*gap 
-	   && fabs(RQcorr) > RQtol*fabs(lambda) ) {
-      
-	if (i_local <= negcount) {
-	  sgndef = -1;    /* wanted eigenvalue lies to the left  */
-	} else {
-	  sgndef =  1;    /* wanted eigenvalue lies to the right */
-	}
-	
-	if ( RQcorr*sgndef >= 0.0
-	     && lambda+RQcorr <= right 
-	     && lambda+RQcorr >= left ) {
-	  usedRQ = true;
-	  if ( sgndef == 1 )
-	    left  = lambda;
-	  else
-	    right = lambda;
-	  Wshifted[i] = 0.5*(left + right);
-	  lambda     += RQcorr;
-	} else { /* bisection is needed */
-	  needBS = true;
-	}
-	
-	if ( right-left < RQtol*fabs(lambda) ) {
-	  /* eigenvalue computed to bisection accuracy
-	   * => compute eigenvector */
-	  usedBS = true;
-	} else if ( k == MAXITER-1 ) {
-	  /* for last iteration use bisection */
-	  needBS = true;
-	}
-      } else {
-	/* go to next iteration */
-	break;
-      }
-
-    } /* end k */
-
-    /* if necessary call odr1v to improve error angle by 2nd step */
-    step2II = false;
-    if ( usedRQ && usedBS && (bstres <= residual) ) {
-      lambda = bstw;
-      step2II = true;
-    }
-    if ( step2II == true ) {
-      odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
-	    &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
-	    &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
-	    &norminv, &residual, &RQcorr, work);
-    }
-    Wshifted[i] = lambda;
-
-    /* compute support w.r.t. whole matrix
-     * block beginning is offset for each support */
-    isuppZ[2*zind    ] += bl_begin;
-    isuppZ[2*zind + 1] += bl_begin;
-  
-    /* ensure vector is okay if support changed in RQI 
-     * minus ones because of indices starting from zero */
-    i_Zfrom    = isuppZ[2*zind    ] - 1;
-    i_Zto      = isuppZ[2*zind + 1] - 1;
-    i_supmn   += bl_begin - 1;
-    i_supmx   += bl_begin - 1;
-    if ( i_supmn < i_Zfrom ) {
-      for ( k=i_supmn; k < i_Zfrom; k++ ) {
-	Z[k + zind*ldz] = 0.0;
-      }
-    }
-    if ( i_supmx > i_Zto ) {
-      for ( k=i_Zto+1; k <= i_supmx; k++ ) {
-	Z[k + zind*ldz] = 0.0;
-      }
-    }
-    
-    /* normalize eigenvector */
-    suppsize = i_Zto - i_Zfrom + 1;
-    odscal(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE);
-
-    sigma = L[bl_size-1];
-    W[i]  = lambda + sigma;
-    
-    if (i < end)
-      Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]);
-
-  } /* end i */
-
-  /* decrement counter */
-  PMR_decrement_counter(num_left, num_decrement);
-
-  /* clean up */
-  free(sng);
-  PMR_try_destroy_rrr(RRR);
-
-  return 0;
-}
diff --git a/external/pmrrr/src/queue.cpp b/external/pmrrr/src/queue.cpp
new file mode 100644
index 0000000000..5a9ef0db8c
--- /dev/null
+++ b/external/pmrrr/src/queue.cpp
@@ -0,0 +1,251 @@
+/* Copyright (c) 2010, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Copyright (c) 2015, Jack Poulson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *   * Redistributions of source code must retain the above 
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer.
+ *   * Redistributions in binary form must reproduce the above 
+ *     copyright notice, this list of conditions and the following 
+ *     disclaimer in the documentation and/or other materials 
+ *     provided with the distribution.
+ *   * Neither the name of the RWTH Aachen University nor the
+ *     names of its contributors may be used to endorse or promote 
+ *     products derived from this software without specific prior 
+ *     written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH 
+ * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE.
+ *
+ * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de),
+ * August 2010, Version 0.6
+ *
+ * This code was the result of a collaboration between 
+ * Matthias Petschow and Paolo Bientinesi. When you use this 
+ * code, kindly reference a paper related to this work.
+ *
+ */
+
+#include <cstdlib>
+#include <cstdio>
+#include <cassert>
+
+#include <pmrrr/definitions/global.h>
+#include <pmrrr/definitions/queue.h>
+
+#ifndef DISABLE_PTHREADS
+# include <errno.h>
+#endif
+
+namespace pmrrr { namespace detail {
+
+	int PMR_queue_init_lock(queue_t *queue)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  int info = pthread_mutex_init(&queue->lock, NULL);
+	 #else
+	  int info = pthread_spin_init(&queue->lock, PTHREAD_PROCESS_PRIVATE);
+	 #endif
+	  assert(info == 0);
+	  return info;
+	#else
+	  return 0;
+	#endif
+	}
+
+	void PMR_queue_destroy_lock(queue_t *queue)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  pthread_mutex_destroy(&queue->lock);
+	 #else
+	  pthread_spin_destroy(&queue->lock);
+	 #endif
+	#endif
+	}
+
+	int PMR_queue_lock(queue_t *queue)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  int info = pthread_mutex_lock(&queue->lock);
+	  if( info == EINVAL )
+	    fprintf(stderr,"pthread_mutex_lock returned EINVAL\n");
+	  else if( info == EAGAIN )
+	    fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n");
+	  else if( info == EDEADLK )
+	    fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n");
+	  else if( info == EPERM )
+	    fprintf(stderr,"pthread_mutex_lock returned EPERM\n");
+	  else
+	    fprintf(stderr,"pthread_mutex_lock returned %d\n",info);
+	 #else
+	  int info = pthread_spin_lock(&queue->lock);
+	 #endif
+	  assert(info == 0);
+	  return info;
+	#else
+	  return 0;
+	#endif
+	}
+
+	int PMR_queue_unlock(queue_t *queue)
+	{
+	#ifndef DISABLE_PTHREADS
+	 #ifdef NOSPINLOCKS
+	  int info = pthread_mutex_unlock(&queue->lock);
+	  if( info == EINVAL )
+	    fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n");
+	  else if( info == EAGAIN )
+	    fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n");
+	  else if( info == EDEADLK )
+	    fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n");
+	  else if( info == EPERM )
+	    fprintf(stderr,"pthread_mutex_unlock returned EPERM\n");
+	  else
+	    fprintf(stderr,"pthread_mutex_unlock returned %d\n",info);
+	 #else
+	  int info = pthread_spin_unlock(&queue->lock);
+	 #endif
+	  assert(info == 0);
+	  return info;
+	#else
+	  return 0;
+	#endif
+	}
+
+	queue_t *PMR_create_empty_queue(void)
+	{
+	  queue_t *queue = (queue_t *) malloc(sizeof(queue_t));
+	  assert(queue != NULL);
+	  
+	  queue->num_tasks = 0;
+	  queue->head      = NULL;
+	  queue->back      = NULL;
+
+	  PMR_queue_init_lock(queue); 
+	  return queue;
+	}
+
+	void PMR_destroy_queue(queue_t *queue)
+	{
+	  PMR_queue_destroy_lock(queue);
+	  free(queue);
+	}
+
+	int PMR_insert_task_at_front(queue_t *queue, task_t *task)
+	{
+	  
+	  int info = PMR_queue_lock(queue);
+
+	  queue->num_tasks++;
+	  task->next = queue->head;
+	  if (queue->head == NULL)
+	    queue->back = task;
+	  else
+	    queue->head->prev = task;
+	  queue->head = task;
+
+	  info |= PMR_queue_unlock(queue);
+	  assert(info == 0);
+	  return info;
+	}
+
+	int PMR_insert_task_at_back(queue_t *queue, task_t *task)
+	{
+  	  int info = PMR_queue_lock(queue);
+	 
+	  queue->num_tasks++;
+	  task->prev = queue->back;
+	  task->next = NULL;
+	  if (queue->head == NULL)
+		queue->head = task;
+	  else
+		queue->back->next = task;
+	  queue->back = task;
+
+	  info |= PMR_queue_unlock(queue);
+	  assert(info == 0);
+	  return info;	
+	}
+
+	task_t *PMR_remove_task_at_front(queue_t *queue)
+	{
+  	  int info = PMR_queue_lock(queue);
+
+	  task_t *task = queue->head;
+	  if (queue->head != NULL) {
+		/* at least one element */
+		queue->num_tasks--;
+		if (queue->head->next == NULL) {
+		  /* last task removed */
+		  queue->head = NULL;
+		  queue->back = NULL;
+		} else {
+		  /* at least two tasks */
+		  queue->head->next->prev = NULL;
+		  queue->head = queue->head->next;
+		}
+	  }
+
+	  info |= PMR_queue_unlock(queue);
+	  assert(info == 0);
+	  return task;  
+	}
+
+	task_t *PMR_remove_task_at_back (queue_t *queue)
+	{
+	  int info = PMR_queue_lock(queue);
+
+	  task_t *task = queue->back;
+	  if (queue->back != NULL) {
+		/* at least one element */
+		queue->num_tasks--;
+		if (queue->back->prev == NULL) {
+		  /* last task removed */
+		  queue->head = NULL;
+		  queue->back = NULL;
+		} else {
+		  /* at least two tasks */
+		  queue->back->prev->next = NULL;
+		  queue->back = queue->back->prev;
+		}
+	  }
+
+	  info |= PMR_queue_unlock(queue);
+	  assert(info == 0);
+	  return task;  
+	}
+
+
+
+	int PMR_get_num_tasks(queue_t *queue)
+	{
+
+	  int info = PMR_queue_lock(queue);
+	  int num_tasks = queue->num_tasks;
+	  info |= PMR_queue_unlock(queue);
+	  assert(info == 0);
+	  return num_tasks;
+	}
+
+}	// namespace detail
+
+}	// namespace pmrrr
+
diff --git a/include/El/core/imports/pmrrr.hpp b/include/El/core/imports/pmrrr.hpp
index 40538713e7..42aff99b98 100644
--- a/include/El/core/imports/pmrrr.hpp
+++ b/include/El/core/imports/pmrrr.hpp
@@ -6,9 +6,14 @@
    which can be found in the LICENSE file in the root directory, or at 
    http://opensource.org/licenses/BSD-2-Clause
 */
+
 #ifndef EL_IMPORTS_PMRRR_HPP
 #define EL_IMPORTS_PMRRR_HPP
 
+#include <El-lite.hpp>
+
+#include <pmrrr/pmrrr.hpp>
+
 namespace El {
 namespace herm_tridiag_eig {
 
@@ -17,11 +22,6 @@ struct Estimate {
     int numGlobalEigenvalues;
 };
 
-// Return an upper bound on the number of (local) eigenvalues in the given range
-Estimate EigEstimate
-( int n,  double* d, double* e, double* w, mpi::Comm comm, 
-  double lowerBound, double upperBound );
-
 struct Info {
     int numLocalEigenvalues;
     int numGlobalEigenvalues;
@@ -29,35 +29,204 @@ struct Info {
     int firstLocalEigenvalue;
 };
 
+// Return upper bounds on the number of (local) eigenvalues in the given range,
+// (lowerBound,upperBound]
+template<typename FloatingType>
+Estimate EigEstimate
+( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm, 
+  FloatingType lowerBound, FloatingType upperBound )
+{
+    DEBUG_ONLY(CSE cse("herm_tridiag_eig::EigEstimate"))
+
+    Estimate estimate;
+    char jobz='C';
+    char range='V';
+    int il, iu;
+    int highAccuracy=0;
+    int nz, offset;
+    int ldz=1;
+    vector<int> ZSupport(2*n);
+    int retval = pmrrr::pmrrr
+    ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, 
+      &highAccuracy, comm.comm, &nz, &offset, w, static_cast<FloatingType*>(nullptr), &ldz, ZSupport.data() );
+    if( retval != 0 )
+        RuntimeError("pmrrr returned ",retval);
+
+    estimate.numLocalEigenvalues = nz;
+    estimate.numGlobalEigenvalues = mpi::AllReduce( nz, comm );
+    return estimate;
+}
+
 // Compute all of the eigenvalues
-Info Eig( int n, double* d, double* e, double* w, mpi::Comm comm );
+template<typename FloatingType>
+Info Eig( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm )
+{
+    DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig"))
+
+    Info info;
+    char jobz='N';
+    char range='A';
+    FloatingType vl, vu;
+    int il, iu;
+    int highAccuracy=0; 
+    int nz, offset;
+    int ldz=1;
+    vector<int> ZSupport(2*n);
+    int retval = pmrrr::pmrrr
+    ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm,
+      &nz, &offset, w, static_cast<FloatingType*>(nullptr), &ldz, ZSupport.data() );
+    if( retval != 0 )
+        RuntimeError("pmrrr returned ",retval);
+
+    info.numLocalEigenvalues=nz;
+    info.firstLocalEigenvalue=offset;
+    info.numGlobalEigenvalues=n;
+    return info;
+}
 
 // Compute all of the eigenpairs
+template<typename FloatingType>
 Info Eig
-( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm );
+( int n, FloatingType* d, FloatingType* e, FloatingType* w, FloatingType* Z, int ldz, mpi::Comm comm )
+{
+    DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig"))
+ 
+    Info info;
+    char jobz='V';
+    char range='A';
+    FloatingType vl, vu;
+    int il, iu;
+    int highAccuracy=0; 
+    int nz, offset;
+    vector<int> ZSupport(2*n);
+    int retval = pmrrr::pmrrr
+    ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm,
+      &nz, &offset, w, Z, &ldz, ZSupport.data() );
+    if( retval != 0 )
+        RuntimeError("pmrrr returned ",retval);
+
+    info.numLocalEigenvalues=nz;
+    info.firstLocalEigenvalue=offset;
+    info.numGlobalEigenvalues=n;
+    return info;
+}
 
-// Compute all of the eigenvalues in [lowerBound,upperBound)
+// Compute all of the eigenvalues in (lowerBound,upperBound]
+template<typename FloatingType>
 Info Eig
-( int n, double* d, double* e, double* w, mpi::Comm comm, 
-  double lowerBound, double upperBound );
+( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm, 
+  FloatingType lowerBound, FloatingType upperBound )
+{
+    DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig"))
+
+    Info info;
+    char jobz='N';
+    char range='V';
+    int il, iu;
+    int highAccuracy=0; 
+    int nz, offset;
+    int ldz=1;
+    vector<int> ZSupport(2*n);
+    int retval = pmrrr::pmrrr
+    ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, 
+      &highAccuracy, comm.comm, &nz, &offset, w, static_cast<FloatingType*>(nullptr), &ldz, ZSupport.data() );
+    if( retval != 0 )
+        RuntimeError("pmrrr returned ",retval);
 
-// Compute all of the eigenpairs with eigenvalues in [lowerBound,upperBound)
+    info.numLocalEigenvalues=nz;
+    info.firstLocalEigenvalue=offset;
+    mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm );
+    return info;
+}
+
+// Compute all of the eigenpairs with eigenvalues in (lowerBound,upperBound]
+template<typename FloatingType>
 Info Eig
-( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, 
-  double lowerBound, double upperBound );
+( int n, FloatingType* d, FloatingType* e, FloatingType* w, FloatingType* Z, int ldz, mpi::Comm comm, 
+  FloatingType lowerBound, FloatingType upperBound )
+{
+    DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig"))
+
+    Info info;
+    char jobz='V';
+    char range='V';
+    int il, iu;
+    int highAccuracy=0; 
+    int nz, offset;
+    vector<int> ZSupport(2*n);
+    int retval = pmrrr::pmrrr
+    ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, 
+      &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() );
+    if( retval != 0 )
+        RuntimeError("pmrrr returned ",retval);
 
-// Compute all of the eigenvalues with indices in [lowerBound,upperBound)
+    info.numLocalEigenvalues=nz;
+    info.firstLocalEigenvalue=offset;
+    mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm );
+    return info;
+}
+
+// Compute all of the eigenvalues with indices in [lowerBound,upperBound]
+template<typename FloatingType>
 Info Eig
-( int n, double* d, double* e, double* w, mpi::Comm comm, 
-  int lowerBound, int upperBound );
+( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm, 
+  int lowerBound, int upperBound )
+{
+    DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig"))
+
+    Info info;
+    ++lowerBound;
+    ++upperBound;
+    char jobz='N';
+    char range='I';
+    FloatingType vl, vu;
+    int highAccuracy=0; 
+    int nz, offset;
+    int ldz=1;
+    vector<int> ZSupport(2*n);
+    int retval = pmrrr::pmrrr
+    ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, 
+      &highAccuracy, comm.comm, &nz, &offset, w, static_cast<FloatingType*>(nullptr), &ldz, ZSupport.data() );
+    if( retval != 0 )
+        RuntimeError("pmrrr returned ",retval);
+
+    info.numLocalEigenvalues=nz;
+    info.firstLocalEigenvalue=offset;
+    info.numGlobalEigenvalues=(upperBound-lowerBound)+1;
+    return info;
+}
 
-// Compute all of the eigenpairs with ordered eigenvalue indices in 
-// [lowerBound,upperBound)
+// Compute all of the eigenpairs with eigenvalues indices in 
+// [lowerBound,upperBound]
+template<typename FloatingType>
 Info Eig
-( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, 
-  int lowerBound, int upperBound );
+( int n, FloatingType* d, FloatingType* e, FloatingType* w, FloatingType* Z, int ldz, mpi::Comm comm, 
+  int lowerBound, int upperBound )
+{
+    DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig"))
+
+    Info info;
+    ++lowerBound;
+    ++upperBound;
+    char jobz='V';
+    char range='I';
+    FloatingType vl, vu;
+    int highAccuracy=0; 
+    int nz, offset;
+    vector<int> ZSupport(2*n);
+    int retval = pmrrr::pmrrr
+    ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, 
+      &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() );
+    if( retval != 0 )
+        RuntimeError("pmrrr returned ",retval);
+
+    info.numLocalEigenvalues=nz;
+    info.firstLocalEigenvalue=offset;
+    info.numGlobalEigenvalues=(upperBound-lowerBound)+1;
+    return info;
+}
 
 } // namespace herm_tridiag_eig
 } // namespace El
 
-#endif // ifndef EL_IMPORTS_PMRRR_HPP
+#endif
diff --git a/include/El/core/imports/pmrrr.hpp.old b/include/El/core/imports/pmrrr.hpp.old
new file mode 100644
index 0000000000..f7a7de571b
--- /dev/null
+++ b/include/El/core/imports/pmrrr.hpp.old
@@ -0,0 +1,64 @@
+/*
+   Copyright (c) 2009-2015, Jack Poulson
+   All rights reserved.
+
+   This file is part of Elemental and is under the BSD 2-Clause License, 
+   which can be found in the LICENSE file in the root directory, or at 
+   http://opensource.org/licenses/BSD-2-Clause
+*/
+#pragma once
+#ifndef EL_IMPORTS_PMRRR_HPP
+#define EL_IMPORTS_PMRRR_HPP
+
+namespace El {
+namespace herm_tridiag_eig {
+
+struct Estimate {
+    int numLocalEigenvalues;
+    int numGlobalEigenvalues;
+};
+
+// Return an upper bound on the number of (local) eigenvalues in the given range
+Estimate EigEstimate
+( int n,  double* d, double* e, double* w, mpi::Comm comm, 
+  double lowerBound, double upperBound );
+
+struct Info {
+    int numLocalEigenvalues;
+    int numGlobalEigenvalues;
+
+    int firstLocalEigenvalue;
+};
+
+// Compute all of the eigenvalues
+Info Eig( int n, double* d, double* e, double* w, mpi::Comm comm );
+
+// Compute all of the eigenpairs
+Info Eig
+( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm );
+
+// Compute all of the eigenvalues in [lowerBound,upperBound)
+Info Eig
+( int n, double* d, double* e, double* w, mpi::Comm comm, 
+  double lowerBound, double upperBound );
+
+// Compute all of the eigenpairs with eigenvalues in [lowerBound,upperBound)
+Info Eig
+( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, 
+  double lowerBound, double upperBound );
+
+// Compute all of the eigenvalues with indices in [lowerBound,upperBound)
+Info Eig
+( int n, double* d, double* e, double* w, mpi::Comm comm, 
+  int lowerBound, int upperBound );
+
+// Compute all of the eigenpairs with ordered eigenvalue indices in 
+// [lowerBound,upperBound)
+Info Eig
+( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, 
+  int lowerBound, int upperBound );
+
+} // namespace herm_tridiag_eig
+} // namespace El
+
+#endif // ifndef EL_IMPORTS_PMRRR_HPP
diff --git a/src/core/imports/pmrrr.cpp b/src/core/imports/pmrrr.cpp
deleted file mode 100644
index a1438a6a99..0000000000
--- a/src/core/imports/pmrrr.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
-   Copyright (c) 2009-2016, Jack Poulson
-   All rights reserved.
-
-   This file is part of Elemental and is under the BSD 2-Clause License, 
-   which can be found in the LICENSE file in the root directory, or at 
-   http://opensource.org/licenses/BSD-2-Clause
-*/
-#include <El-lite.hpp>
-
-extern "C" {
-
-int pmrrr
-( const char* jobz,  // 'N' ~ only eigenvalues, 'V' ~ also eigenvectors
-  const char* range, // 'A'~all eigenpairs, 'V'~interval (vl,vu], 'I'~il-iu
-  const int* n,      // size of matrix
-        double* d,   // full diagonal of tridiagonal matrix [length n]
-        double* e,   // full subdiagonal in first n-1 entries [length n]
-  const double* vl,  // if range=='V', compute eigenpairs in (vl,vu]
-  const double* vu,
-  const int* il, // if range=='I', compute il-iu eigenpairs
-  const int* iu,
-  int* tryrac, // if nonzero, try for high relative accuracy
-  MPI_Comm comm,
-  int* nz,        // number of locally computed eigenvectors
-  int* offset,    // the first eigenpair computed by our process
-  double* w,      // eigenvalues corresponding to local eigenvectors [length nz]
-  double* Z,      // local eigenvectors [size ldz x nz]
-  const int* ldz, // leading dimension of Z
-  int* ZSupp      // support of eigenvectors [length 2n]
-);
-
-} // extern "C"
-
-namespace El {
-namespace herm_tridiag_eig {
-
-// Return upper bounds on the number of (local) eigenvalues in the given range,
-// (lowerBound,upperBound]
-Estimate EigEstimate
-( int n, double* d, double* e, double* w, mpi::Comm comm, 
-  double lowerBound, double upperBound )
-{
-    DEBUG_CSE
-    Estimate estimate;
-    char jobz='C';
-    char range='V';
-    int il, iu;
-    int highAccuracy=0;
-    int nz, offset;
-    int ldz=1;
-    vector<int> ZSupport(2*n);
-    int retval = pmrrr
-    ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, 
-      &highAccuracy, comm.comm, &nz, &offset, w, 0, &ldz, ZSupport.data() );
-    if( retval != 0 )
-        RuntimeError("pmrrr returned ",retval);
-
-    estimate.numLocalEigenvalues = nz;
-    estimate.numGlobalEigenvalues = mpi::AllReduce( nz, comm );
-    return estimate;
-}
-
-// Compute all of the eigenvalues
-Info Eig( int n, double* d, double* e, double* w, mpi::Comm comm )
-{
-    DEBUG_CSE
-    Info info;
-    char jobz='N';
-    char range='A';
-    double vl, vu;
-    int il, iu;
-    int highAccuracy=0; 
-    int nz, offset;
-    int ldz=1;
-    vector<int> ZSupport(2*n);
-    int retval = pmrrr
-    ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm,
-      &nz, &offset, w, 0, &ldz, ZSupport.data() );
-    if( retval != 0 )
-        RuntimeError("pmrrr returned ",retval);
-
-    info.numLocalEigenvalues=nz;
-    info.firstLocalEigenvalue=offset;
-    info.numGlobalEigenvalues=n;
-    return info;
-}
-
-// Compute all of the eigenpairs
-Info Eig
-( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm )
-{
-    DEBUG_CSE
-    Info info;
-    char jobz='V';
-    char range='A';
-    double vl, vu;
-    int il, iu;
-    int highAccuracy=0; 
-    int nz, offset;
-    vector<int> ZSupport(2*n);
-    int retval = pmrrr
-    ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm,
-      &nz, &offset, w, Z, &ldz, ZSupport.data() );
-    if( retval != 0 )
-        RuntimeError("pmrrr returned ",retval);
-
-    info.numLocalEigenvalues=nz;
-    info.firstLocalEigenvalue=offset;
-    info.numGlobalEigenvalues=n;
-    return info;
-}
-
-// Compute all of the eigenvalues in (lowerBound,upperBound]
-Info Eig
-( int n, double* d, double* e, double* w, mpi::Comm comm, 
-  double lowerBound, double upperBound )
-{
-    DEBUG_CSE
-    Info info;
-    char jobz='N';
-    char range='V';
-    int il, iu;
-    int highAccuracy=0; 
-    int nz, offset;
-    int ldz=1;
-    vector<int> ZSupport(2*n);
-    int retval = pmrrr
-    ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, 
-      &highAccuracy, comm.comm, &nz, &offset, w, 0, &ldz, ZSupport.data() );
-    if( retval != 0 )
-        RuntimeError("pmrrr returned ",retval);
-
-    info.numLocalEigenvalues=nz;
-    info.firstLocalEigenvalue=offset;
-    mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm );
-    return info;
-}
-
-// Compute all of the eigenpairs with eigenvalues in (lowerBound,upperBound]
-Info Eig
-( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, 
-  double lowerBound, double upperBound )
-{
-    DEBUG_CSE
-    Info info;
-    char jobz='V';
-    char range='V';
-    int il, iu;
-    int highAccuracy=0; 
-    int nz, offset;
-    vector<int> ZSupport(2*n);
-    int retval = pmrrr
-    ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, 
-      &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() );
-    if( retval != 0 )
-        RuntimeError("pmrrr returned ",retval);
-
-    info.numLocalEigenvalues=nz;
-    info.firstLocalEigenvalue=offset;
-    mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm );
-    return info;
-}
-
-// Compute all of the eigenvalues with indices in [lowerBound,upperBound]
-Info Eig
-( int n, double* d, double* e, double* w, mpi::Comm comm, 
-  int lowerBound, int upperBound )
-{
-    DEBUG_CSE
-    Info info;
-    ++lowerBound;
-    ++upperBound;
-    char jobz='N';
-    char range='I';
-    double vl, vu;
-    int highAccuracy=0; 
-    int nz, offset;
-    int ldz=1;
-    vector<int> ZSupport(2*n);
-    int retval = pmrrr
-    ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, 
-      &highAccuracy, comm.comm, &nz, &offset, w, 0, &ldz, ZSupport.data() );
-    if( retval != 0 )
-        RuntimeError("pmrrr returned ",retval);
-
-    info.numLocalEigenvalues=nz;
-    info.firstLocalEigenvalue=offset;
-    info.numGlobalEigenvalues=(upperBound-lowerBound)+1;
-    return info;
-}
-
-// Compute all of the eigenpairs with eigenvalues indices in 
-// [lowerBound,upperBound]
-Info Eig
-( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, 
-  int lowerBound, int upperBound )
-{
-    DEBUG_CSE
-    Info info;
-    ++lowerBound;
-    ++upperBound;
-    char jobz='V';
-    char range='I';
-    double vl, vu;
-    int highAccuracy=0; 
-    int nz, offset;
-    vector<int> ZSupport(2*n);
-    int retval = pmrrr
-    ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, 
-      &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() );
-    if( retval != 0 )
-        RuntimeError("pmrrr returned ",retval);
-
-    info.numLocalEigenvalues=nz;
-    info.firstLocalEigenvalue=offset;
-    info.numGlobalEigenvalues=(upperBound-lowerBound)+1;
-    return info;
-}
-
-} // namespace herm_tridiag_eig
-} // namespace El
diff --git a/src/lapack_like/spectral/HermitianTridiagEig.cpp b/src/lapack_like/spectral/HermitianTridiagEig.cpp
index 01da2f6ede..342bbadb63 100644
--- a/src/lapack_like/spectral/HermitianTridiagEig.cpp
+++ b/src/lapack_like/spectral/HermitianTridiagEig.cpp
@@ -1141,11 +1141,11 @@ MRRRHelper
     QCtrl.rowAlign = 0;
 
     DistMatrixWriteProxy<Real,Real,VR,STAR> wProx( wPre, wCtrl );
-    DistMatrixWriteProxy<Real,double,STAR,VR> QProx( QPre, QCtrl );
+    DistMatrixWriteProxy<Real,Real,STAR,VR> QProx( QPre, QCtrl );
     auto& w = wProx.Get();
     auto& Q = QProx.Get();
 
-    DistMatrix<double,STAR,STAR> d_STAR_STAR(g), dSub_STAR_STAR(g);
+    DistMatrix<Real,STAR,STAR> d_STAR_STAR(g), dSub_STAR_STAR(g);
     Copy( d, d_STAR_STAR );
     dSub_STAR_STAR.Resize( n-1, 1, n );
     Copy( dSub, dSub_STAR_STAR );
@@ -1153,7 +1153,7 @@ MRRRHelper
     Int k;
     if( ctrl.subset.rangeSubset )
     {
-        vector<double> dVector(n), dSubVector(n), wVector(n);
+        vector<Real> dVector(n), dSubVector(n), wVector(n);
         MemCopy( dVector.data(), d_STAR_STAR.Buffer(), n );
         MemCopy( dSubVector.data(), dSub_STAR_STAR.Buffer(), n-1 );
         auto estimate = herm_tridiag_eig::EigEstimate
@@ -1171,7 +1171,7 @@ MRRRHelper
     Q.Resize( n, k );
 
     herm_tridiag_eig::Info rangeInfo;
-    vector<double> wVector(n);
+    vector<Real> wVector(n);
     if( ctrl.subset.rangeSubset )
         rangeInfo = herm_tridiag_eig::Eig
           ( int(n), d_STAR_STAR.Buffer(), dSub_STAR_STAR.Buffer(), 
@@ -1215,14 +1215,14 @@ MRRRHelper
     typedef Complex<Real> C;
     HermitianTridiagEigInfo info;
 
-    DistMatrix<double,STAR,STAR> d_STAR_STAR(g);
-    DistMatrix<Complex<double>,STAR,STAR> dSub_STAR_STAR(g);
+    DistMatrix<Real,STAR,STAR> d_STAR_STAR(g);
+    DistMatrix<Complex<Real>,STAR,STAR> dSub_STAR_STAR(g);
     Copy( d, d_STAR_STAR );
     dSub_STAR_STAR.Resize( n-1, 1, n );
     Copy( dSub, dSub_STAR_STAR );
 
-    DistMatrix<double,STAR,STAR> dSubReal(g);
-    DistMatrix<Complex<double>,STAR,STAR> phase(g);
+    DistMatrix<Real,STAR,STAR> dSubReal(g);
+    DistMatrix<Complex<Real>,STAR,STAR> phase(g);
     RemovePhase( dSub_STAR_STAR, dSubReal, phase );
 
     ElementalProxyCtrl wCtrl, QCtrl;
@@ -1239,7 +1239,7 @@ MRRRHelper
     Int k;
     if( ctrl.subset.rangeSubset )
     {
-        vector<double> dVector(n), dSubVector(n), wVector(n);
+        vector<Real> dVector(n), dSubVector(n), wVector(n);
         MemCopy( dVector.data(), d_STAR_STAR.Buffer(), n );
         MemCopy( dSubVector.data(), dSubReal.Buffer(), n-1 );
         auto estimate = herm_tridiag_eig::EigEstimate
@@ -1254,11 +1254,11 @@ MRRRHelper
         k = ( n==0 ? 0 : ctrl.subset.upperIndex-ctrl.subset.lowerIndex+1 );
     else
         k = n;
-    DistMatrix<double,STAR,VR> QReal(g);
+    DistMatrix<Real,STAR,VR> QReal(g);
     QReal.Resize( n, k );
 
     herm_tridiag_eig::Info rangeInfo;
-    vector<double> wVector(n);
+    vector<Real> wVector(n);
     if( ctrl.subset.rangeSubset )
         rangeInfo = herm_tridiag_eig::Eig
           ( int(n), d_STAR_STAR.Buffer(), dSubReal.Buffer(), 
@@ -1407,12 +1407,12 @@ Int MRRREstimateHelper
 {
     DEBUG_CSE
     const Int n = d.Height();
-    DistMatrix<double,STAR,STAR> d_STAR_STAR( d.Grid() );
-    DistMatrix<double,STAR,STAR> dSub_STAR_STAR( d.Grid() );
+    DistMatrix<Real,STAR,STAR> d_STAR_STAR( d.Grid() );
+    DistMatrix<Real,STAR,STAR> dSub_STAR_STAR( d.Grid() );
     Copy( d, d_STAR_STAR );
     dSub_STAR_STAR.Resize( n-1, 1, n );
     Copy( dSub, dSub_STAR_STAR );
-    vector<double> dVector(n), dSubVector(n), wVector(n);
+    vector<Real> dVector(n), dSubVector(n), wVector(n);
     MemCopy( dVector.data(), d_STAR_STAR.Buffer(), n );
     MemCopy( dSubVector.data(), dSub_STAR_STAR.Buffer(), n-1 );
     auto estimate = herm_tridiag_eig::EigEstimate