diff --git a/CMakeLists.txt b/CMakeLists.txt index 58794058a9..6c2e21353c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -410,6 +410,7 @@ endif() # Elemental's mod's of Parallel Multiple Relatively Robust Representations # ------------------------------------------------------------------------ add_subdirectory(external/pmrrr) +include_directories(external/pmrrr/include) if(EL_BUILT_SCALAPACK) add_dependencies(pmrrr project_scalapack) else() diff --git a/examples/lapack_like/HermitianEig.cpp b/examples/lapack_like/HermitianEig.cpp index 07c7010714..fd42a8882b 100644 --- a/examples/lapack_like/HermitianEig.cpp +++ b/examples/lapack_like/HermitianEig.cpp @@ -7,12 +7,88 @@ http://opensource.org/licenses/BSD-2-Clause */ #include + using namespace std; using namespace El; -// Typedef our real and complex types to 'Real' and 'C' for convenience -typedef double Real; -typedef Complex C; +template +void run_example(Int n, bool print) +{ + typedef Complex C; + + // Create a 2d process grid from a communicator. In our case, it is + // mpi::COMM_WORLD. There is another constructor that allows you to + // specify the grid dimensions, Grid g( comm, r ), which creates a + // grid of height r. + Grid g( mpi::COMM_WORLD ); + + // Create an n x n complex distributed matrix, + // We distribute the matrix using grid 'g'. + // + // There are quite a few available constructors, including ones that + // allow you to pass in your own local buffer and to specify the + // distribution alignments (i.e., which process row and column owns the + // top-left element) + DistMatrix H( n, n, g ); + + // Fill the matrix since we did not pass in a buffer. + // + // We will fill entry (i,j) with the complex value (i+j,i-j) so that + // the global matrix is Hermitian. However, only one triangle of the + // matrix actually needs to be filled, the symmetry can be implicit. + // + const Int localHeight = H.LocalHeight(); + const Int localWidth = H.LocalWidth(); + for( Int jLoc=0; jLoc w( g ); + DistMatrix Q( g ); + HermitianEig( LOWER, H, w, Q ); + + if( print ) + { + Print( HCopy, "H" ); + Print( Q, "Q" ); + Print( w, "w" ); + } + + // Check the residual, || H Q - Omega Q ||_F + const Real frobH = HermitianFrobeniusNorm( LOWER, HCopy ); + auto E( Q ); + DiagonalScale( RIGHT, NORMAL, w, E ); + Hemm( LEFT, LOWER, C(-1), HCopy, Q, C(1), E ); + const Real frobResid = FrobeniusNorm( E ); + + // Check the orthogonality of Q + Identity( E, n, n ); + Herk( LOWER, ADJOINT, Real(-1), Q, Real(1), E ); + const Real frobOrthog = HermitianFrobeniusNorm( LOWER, E ); + + if( mpi::Rank() == 0 ) + Output + ("|| H ||_F = ",frobH,"\n", + "|| H Q - Q Omega ||_F / || A ||_F = ",frobResid/frobH,"\n", + "|| Q' Q - I ||_F = ",frobOrthog,"\n"); +} + int main( int argc, char* argv[] ) @@ -27,80 +103,14 @@ main( int argc, char* argv[] ) { const Int n = Input("--size","size of matrix",100); const bool print = Input("--print","print matrices?",false); + const bool single_precision = Input("--single", "single precision?", false); ProcessInput(); PrintInputReport(); - - // Create a 2d process grid from a communicator. In our case, it is - // mpi::COMM_WORLD. There is another constructor that allows you to - // specify the grid dimensions, Grid g( comm, r ), which creates a - // grid of height r. - Grid g( mpi::COMM_WORLD ); - - // Create an n x n complex distributed matrix, - // We distribute the matrix using grid 'g'. - // - // There are quite a few available constructors, including ones that - // allow you to pass in your own local buffer and to specify the - // distribution alignments (i.e., which process row and column owns the - // top-left element) - DistMatrix H( n, n, g ); - - // Fill the matrix since we did not pass in a buffer. - // - // We will fill entry (i,j) with the complex value (i+j,i-j) so that - // the global matrix is Hermitian. However, only one triangle of the - // matrix actually needs to be filled, the symmetry can be implicit. - // - const Int localHeight = H.LocalHeight(); - const Int localWidth = H.LocalWidth(); - for( Int jLoc=0; jLoc(n, print); + } else { + run_example(n, print); } - - // Make a backup of H before we overwrite it within the eigensolver - auto HCopy( H ); - - // Call the eigensolver. We first create an empty complex eigenvector - // matrix, Q[MC,MR], and an eigenvalue column vector, w[VR,* ] - // - // Optional: set blocksizes and algorithmic choices here. See the - // 'Tuning' section of the README for details. - DistMatrix w( g ); - DistMatrix Q( g ); - HermitianEig( LOWER, H, w, Q ); - - if( print ) - { - Print( HCopy, "H" ); - Print( Q, "Q" ); - Print( w, "w" ); - } - - // Check the residual, || H Q - Omega Q ||_F - const Real frobH = HermitianFrobeniusNorm( LOWER, HCopy ); - auto E( Q ); - DiagonalScale( RIGHT, NORMAL, w, E ); - Hemm( LEFT, LOWER, C(-1), HCopy, Q, C(1), E ); - const Real frobResid = FrobeniusNorm( E ); - - // Check the orthogonality of Q - Identity( E, n, n ); - Herk( LOWER, ADJOINT, Real(-1), Q, Real(1), E ); - const Real frobOrthog = HermitianFrobeniusNorm( LOWER, E ); - - if( mpi::Rank() == 0 ) - Output - ("|| H ||_F = ",frobH,"\n", - "|| H Q - Q Omega ||_F / || A ||_F = ",frobResid/frobH,"\n", - "|| Q' Q - I ||_F = ",frobOrthog,"\n"); } catch( exception& e ) { ReportException(e); } diff --git a/external/pmrrr/CMakeLists.txt b/external/pmrrr/CMakeLists.txt index e6e7347502..e8265f9242 100644 --- a/external/pmrrr/CMakeLists.txt +++ b/external/pmrrr/CMakeLists.txt @@ -10,10 +10,13 @@ # http://opensource.org/licenses/BSD-2-Clause # +# Create a set of defines for including in templated code +set(pmrrr_defines "\n") + option(HAVE_SPINLOCKS "Enable if pthread lib supports spinlocks" OFF) MARK_AS_ADVANCED(HAVE_SPINLOCKS) if(NOT HAVE_SPINLOCKS) - add_definitions(-DNOSPINLOCKS) + set(pmrrr_defines "${pmrrr_defines}#define NOSPINLOCKS\n") endif() # Include the header directory @@ -24,9 +27,6 @@ include_directories(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/include") # Define the header files installation rules # ------------------------------------------ -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - FILES_MATCHING PATTERN "*.h") if(WIN32 OR APPLE OR NOT EL_HYBRID) # Disable for Windows because of lack of POSIX support; # disable for Mac OS X because of deprecation of unnamed semaphores @@ -36,9 +36,21 @@ else() option(DISABLE_PTHREADS "Disable pthreads?" OFF) endif() if(DISABLE_PTHREADS) - add_definitions(-DDISABLE_PTHREADS) + set(pmrrr_defines "${pmrrr_defines}#define DISABLE_PTHREADS\n") endif() +#Build directory +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/global.h.in" + "${PROJECT_BINARY_DIR}/include/pmrrr/definitions/global.h" + @ONLY) +#Install directory +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/global.h.in" + "${CMAKE_INSTALL_INCLUDEDIR}/pmrrr/definitions/lobal.h" + @ONLY) +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp") + # Ensure that an MPI C compiler was found if(NOT MPI_C_FOUND) message(FATAL_ERROR "No MPI C compiler was found, so PMRRR cannot be built") @@ -74,7 +86,7 @@ set(CMAKE_THREAD_LIBS_INIT ${CMAKE_THREAD_LIBS_INIT} PARENT_SCOPE) # Define the main library and its link libraries # ---------------------------------------------- -file(GLOB_RECURSE PMRRR_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.c" "*.h") +file(GLOB_RECURSE PMRRR_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp" "*.h") add_library(pmrrr ${LIBRARY_TYPE} ${PMRRR_SRC}) if(DISABLE_PTHREADS) target_link_libraries(pmrrr ${MPI_C_LIBRARIES} ${MATH_LIBS}) diff --git a/external/pmrrr/LICENSE b/external/pmrrr/LICENSE deleted file mode 100644 index 669f274118..0000000000 --- a/external/pmrrr/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2010, RWTH Aachen University -All rights reserved. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the following -conditions are met: - * Redistributions of source code must retain the above - copyright notice, this list of conditions and the following - disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - * Neither the name of the RWTH Aachen University nor the - names of its contributors may be used to endorse or promote - products derived from this software without specific prior - written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH -AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -SUCH DAMAGE. diff --git a/external/pmrrr/README b/external/pmrrr/README deleted file mode 100644 index f27c2a620d..0000000000 --- a/external/pmrrr/README +++ /dev/null @@ -1,194 +0,0 @@ -These files provide the routine 'pmrrr'. Its purpose is to compute all -or a subset of eigenvalues and optionally eigenvectors of a symmetric -tridiagonal matrix. 'pmrrr' is based on the algorithm of Multiple -Relatively Robust Representations (MRRR). The routine thereby targets -hybrid architectures consisting of multiple SMP nodes. It also runs in -fully distributed mode, with each node having only one processor, and -fully SMP mode, in which case no message passing is required. The -implementation is based on LAPACK's routine 'dstemr'. - - -Using libmrrr.a: ----------------- -The folder EXAMPLES contains examples of how to use the routine -'pmrrr' in C and Fortran code. Edit the 'Makefile' in these folders if -you do not use the GNU compilers and run 'make' to compile the -examples. - -In general, the code that calls 'pmrrr' needs to be linked to the -library 'libpmrrr.a', which is created in the LIB folder. -Additionally, it has to be linked to the libraries on which it -depends (see example files). - -Below are given the C and Fortran prototypes of the function 'pmrrr'. -For more information please see 'INCLUDE/pmrrr.h'. - - -###################################################################### -# C function prototype: # -###################################################################### -# int pmrrr(char *jobz, char *range, int *n, double *D, # -# double *E, double *vl, double *vu, int *il, int *iu, # -# int *tryrac, MPI_Comm comm, int *nz, int *offset, # -# double *W, double *Z, int *ldz, int *Zsupp); # -# # -# Arguments: # -# ---------- # -# # -# INPUTS: # -# ------- # -# jobz "N" or "n" - compute only eigenvalues # -# "V" or "v" - compute also eigenvectors # -# "C" or "c" - count the maximal number of # -# locally computed eigenvectors # -# range "A" or "a" - all # -# "V" or "v" - by interval: (VL,VU] # -# "I" or "i" - by index: IL-IU # -# n matrix size # -# ldz must be set on input to the leading dimension # -# of of eigenvector matrix Z; this is often equal # -# to matrix size n (not changed on output) # -# # -# INPUT + OUTPUT: # -# --------------- # -# D (double[n]) Diagonal elements of tridiagonal T. # -# (On output the array will be overwritten). # -# E (double[n]) Off-diagonal elements of tridiagonal T. # -# First n-1 elements contain off-diagonals, # -# the last element can have an abitrary value. # -# (On output the array will be overwritten.) # -# vl If range="V", lower bound of interval # -# (vl,vu], on output refined. # -# If range="A" or "I" not referenced as input. # -# On output the interval (vl,vu] contains ALL # -# the computed eigenvalues. # -# vu If range="V", upper bound of interval # -# (vl,vu], on output refined. # -# If range="A" or "I" not referenced as input. # -# On output the interval (vl,vu] contains ALL # -# the computed eigenvalues. # -# il If range="I", lower index (1-based indexing) of # -# the subset 'il' to 'iu'. # -# If range="A" or "V" not referenced as input. # -# On output the eigenvalues with index il to iu # -# are computed by ALL processes. # -# iu If range="I", upper index (1-based indexing) of # -# the subset 'il' to 'iu'. # -# If range="A" or "V" not referenced as input. # -# On output the eigenvalues with index il to iu # -# are computed by ALL processes. # -# tryrac 0 - do not try to achieve high relative accuracy.# -# 1 - relative accuracy will be attempted; # -# on output it is set to zero if high relative # -# accuracy is not achieved. # -# comm MPI communicator; commonly: MPI_COMM_WORLD. # -# # -# OUTPUT: # -# ------- # -# nz Number of eigenvalues and eigenvectors computed # -# locally. # -# If jobz="C", 'nz' will be set to the maximal # -# number of locally computed eigenvectors such # -# that double[n*nz] will provide enough memory # -# for the local eigenvectors; this is only # -# important in case of range="V" since # -# '#eigenpairs' are not known in advance # -# offset Index, relative to the computed eigenvalues, of # -# the smallest eigenvalue computed locally # -# (0-based indexing). # -# W (double[n]) Locally computed eigenvalues; # -# The first nz entries contain the eigenvalues # -# computed locally; the first entry contains the # -# 'offset + 1'-th eigenvalue computed and the # -# 'offset + il'-th eigenvalue of the input matrix # -# (1-based indexing in both cases). # -# In some situations it is desirable to have all # -# computed eigenvalues in W, instead of only # -# those computed locally. In this case the # -# routine 'PMR_comm_eigvals' can be called right # -# after 'pmrrr' returns (see below). # -# Z Locally computed eigenvectors. # -# (double[n*nz]) Enough space must be provided to store the # -# vectors. 'nz' should be bigger or equal # -# to ceil('#eigenpairs'/'#processes'), where # -# '#eigenpairs' is 'n' in case of range="A" and # -# 'iu-il+1' in case of range="I". Alternatively, # -# and for range="V" 'nz' can be obtained # -# by running the routine with jobz="C". # -# Zsupp Support of eigenvectors, which is given by # -# (double[2*n]) Z[2*i] to Z[2*i+1] for the i-th eigenvector # -# stored locally (1-based indexing in both # -# cases). # -# # -# RETURN VALUE: # -# ------------- # -# 0 - success # -# 1 - wrong input parameter # -# 2 - misc errors # -# # -###################################################################### - -###################################################################### -# Fortran prototype: # -###################################################################### -# PMRRR(JOBZ, RANGE, N, D, E, VL, VU, IL, IU, TRYRAC, # -# MPI_COMM, NZ, OFFSET, W, Z, LDZ, ZSUPP, INFO) # -# # -# CHARACTER JOBZ, RANGE # -# INTEGER N, IL, IU, TRYRAC, MPI_COMM, NZ, OFFSET, LDZ, # -# ZSUPP(*), INFO # -# DOUBLE PRECISION D(*), D(*), VL, VU, W(*), Z(*,*) # -###################################################################### - - -The function is called by every process of the MPI communicator -specified as an argument. Each process is assumed to have the diagonal -and sub-diagonal of the symmetric tridiagonal input matrix. It is -thereby important that MPI is always initialized by -'MPI_Init_thread' (as opposed to 'MPI_Init'!). If multithreading is -desired, the thread level support MPI_THREAD_MULTIPLE has to be -requested in 'MPI_Init_thread'. If no multithreading is used, the -thread support 'MPI_THREAD_SINGLE' can be requested. Notice that -multithreading is disabled if the implementation of the MPI library -does not support MPI_THREAD_MULTIPLE. -The number of threads created in each process can be specified via the -environment variable PMR_NUM_THREADS. In case this variable is not -defined, then the routine will create as many threads as specified by -the variable DEFAULT_NUM_THREADS (which is set in 'pmrrr.h'). -Note: Do not forget to tell mpiexec/mpirun about the environment -variable PMR_NUM_THREADS if you use it. This is typically done via the -option '-env' or '-x'. - - -Some additinal comments: ------------------------- - -COMMUNICATING COMPUTED EIGENVALUES: -Upon completion of routine 'pmrrr', each process has a subset of the -eigenvalues, stored in the array W (see 'pmrrr.h'). In some situations -it is desirable to let all processes have all the computed eigenvalues. -To this end, an additional routine 'PMR_comm_eigvals' is provided. It -can be called right after 'pmrrr' returned. Upon completion, each -process has all the computed eigenvalues, stored in W. -The C and Fortran prototypes of the function 'PMR_comm_eigvals' are -given below. For more information please see 'pmrrr.h'. - -###################################################################### -# C function prototype: # -###################################################################### -# int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *offset, # -# double *W); # -###################################################################### - -###################################################################### -# Fortran prototype: # -###################################################################### -# PMR_COMM_EIGVALS(MPI_COMM, NZ, OFFSET, W, INFO) # -# # -# INTEGER MPI_COMM, NZ, OFFSET, INFO # -# DOUBLE PRECISION W(*) # -###################################################################### - - -COMMENTS AND BUGS: -petschow@aices.rwth-aachen.de diff --git a/external/pmrrr/include/pmrrr/global.h b/external/pmrrr/cmake/global.h.in similarity index 60% rename from external/pmrrr/include/pmrrr/global.h rename to external/pmrrr/cmake/global.h.in index 5668e25ff2..b9d0f2d3d6 100644 --- a/external/pmrrr/include/pmrrr/global.h +++ b/external/pmrrr/cmake/global.h.in @@ -41,56 +41,91 @@ #ifndef GGLOBAL_H #define GGLOBAL_H +#include + #ifdef __STDC__ - /* Some version of Standard C */ -#if defined (__STDC_VERSION__) && __STDC_VERSION__>=199901L - /* C99 */ + /* Some version of Standard C */ + #if defined (__STDC_VERSION__) && __STDC_VERSION__>=199901L + /* C99 */ -#include -/* #pragma STDC FP_CONTRACT ON|OFF|DEFAULT */ + #include + /* #pragma STDC FP_CONTRACT ON|OFF|DEFAULT */ -#else - /* C89 with or without Amendment 1, see H&S p.53 */ + #else + /* C89 with or without Amendment 1, see H&S p.53 */ -#define restrict /*nothing*/ -#define inline /*nothing*/ + #define restrict /*nothing*/ + //#define inline /*nothing*/ -#define fmax(a,b) ( (a) > (b) ? (a) : (b) ) -#define fmin(a,b) ( (a) < (b) ? (a) : (b) ) + /* + #define fmax(a,b) ( (a) > (b) ? (a) : (b) ) + #define fmin(a,b) ( (a) < (b) ? (a) : (b) ) + */ -typedef int bool; -#define false 0 -#define true 1 + /* C++ provides a full support for booleans, but LAPACK->C interface accepts integers. + For compatibility with previous code, provide an integer_boolean + */ + #ifndef __cplusplus + typedef int bool; + #define false 0 + #define true 1 + #endif -#endif + #endif #else /* __STDC__ not defined */ - /* Not Standard C */ - -#define inline /*nothing*/ + /* Not Standard C */ - /* in case compiler does not support type qualifiers - * see Harbison and Steele p. 89*/ -#define const /*nothing*/ -#define volatile /*nothing*/ -#define restrict /*nothing*/ + //#define inline /*nothing*/ -#define fmax(a,b) ( (a) > (b) ? (a) : (b) ) -#define fmin(a,b) ( (a) < (b) ? (a) : (b) ) - -typedef int bool; -#define false 0 -#define true 1 - - /* need also remove all // style comments */ - /* inline ... */ + /* in case compiler does not support type qualifiers + * see Harbison and Steele p. 89*/ + #define const /*nothing*/ + #define volatile /*nothing*/ + #define restrict /*nothing*/ + /* + #define fmax(a,b) ( (a) > (b) ? (a) : (b) ) + #define fmin(a,b) ( (a) < (b) ? (a) : (b) ) + */ + typedef int bool_; + #define false 0 + #define true 1 + /* need also remove all // style comments */ + /* inline ... */ #endif /* see H&S p.70 to include C++ in the list above */ +//#define restrict /**/ /* max/min/ceil for integers */ #define imax(a,b) ( (a) > (b) ? (a) : (b) ) #define imin(a,b) ( (a) < (b) ? (a) : (b) ) #define iceil(a,b) ( (((a)%(b))!=0) ? (((a)/(b))+1) : ((a)/(b)) ) +namespace pmrrr { + + template + struct float_traits; + + template<> + struct float_traits { + static MPI_Datatype mpi_type() + { + return MPI_FLOAT; + } + }; + + template<> + struct float_traits { + static MPI_Datatype mpi_type() + { + return MPI_DOUBLE; + } + }; + +} + +@pmrrr_defines@ + /* End of header file */ #endif + diff --git a/external/pmrrr/include/pmrrr.h b/external/pmrrr/include/pmrrr.h deleted file mode 100644 index 1b83d813d1..0000000000 --- a/external/pmrrr/include/pmrrr.h +++ /dev/null @@ -1,313 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015 Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ - -#ifndef PPMRRR_H -#define PPMRRR_H - -#include -#include -#include -#include -#include -#include -#include -#include "mpi.h" -#include "pmrrr/global.h" - -/* Parallel computation of all or a subset of eigenvalues and - * optionally eigenvectors of a symmetric tridiagonal matrix based on - * the algorithm of Multiple Relatively Robust Representations (MRRR). - * The routine targets hybrid architectures consisting of multiple SMP - * nodes. It also runs in fully distributed mode, with each node - * having only one processor, and fully SMP mode, in which case no - * message passing is required. The implementation is based on - * LAPACK's routine 'dstemr'. - * - * Function prototype: */ - -int pmrrr(char *jobz, char *range, int *n, double *D, - double *E, double *vl, double *vu, int *il, int *iu, - int *tryrac, MPI_Comm comm, int *nz, int *offset, - double *W, double *Z, int *ldz, int *Zsupp); - -/* Arguments: - * ---------- - * - * INPUTS: - * ------- - * jobz "N" or "n" - compute only eigenvalues - * "V" or "v" - compute also eigenvectors - * "C" or "c" - count the maximal number of - * locally computed eigenvectors - * range "A" or "a" - all - * "V" or "v" - by interval: (VL,VU] - * "I" or "i" - by index: IL-IU - * n matrix size - * ldz must be set on input to the leading dimension - * of of eigenvector matrix Z; this is often equal - * to matrix size n (not changed on output) - * - * INPUT + OUTPUT: - * --------------- - * D (double[n]) Diagonal elements of tridiagonal T. - * (On output the array will be overwritten). - * E (double[n]) Off-diagonal elements of tridiagonal T. - * First n-1 elements contain off-diagonals, - * the last element can have an abitrary value. - * (On output the array will be overwritten.) - * vl If range="V", lower bound of interval - * (vl,vu], on output refined. - * If range="A" or "I" not referenced as input. - * On output the interval (vl,vu] contains ALL - * the computed eigenvalues. - * vu If range="V", upper bound of interval - * (vl,vu], on output refined. - * If range="A" or "I" not referenced as input. - * On output the interval (vl,vu] contains ALL - * the computed eigenvalues. - * il If range="I", lower index (1-based indexing) of - * the subset 'il' to 'iu'. - * If range="A" or "V" not referenced as input. - * On output the eigenvalues with index il to iu are - * computed by ALL processes. - * iu If range="I", upper index (1-based indexing) of - * the subset 'il' to 'iu'. - * If range="A" or "V" not referenced as input. - * On output the eigenvalues with index il to iu are - * computed by ALL processes. - * tryrac 0 - do not try to achieve high relative accuracy. - * NOTE: this should be the default in context of - * dense eigenproblems. - * 1 - relative accuracy will be attempted; - * on output it is set to zero if high relative - * accuracy is not achieved. - * comm MPI communicator; commonly: MPI_COMM_WORLD. - * - * OUTPUT: - * ------- - * nz Number of eigenvalues and eigenvectors computed - * locally. - * If jobz="C", 'nz' will be set to the maximal - * number of locally computed eigenvectors such - * that double[n*nz] will provide enough memory - * for the local eigenvectors; this is only - * important in case of range="V" since - * '#eigenpairs' are not known in advance - * offset Index, relative to the computed eigenvalues, of - * the smallest eigenvalue computed locally - * (0-based indexing). - * W (double[n]) Locally computed eigenvalues; - * The first nz entries contain the eigenvalues - * computed locally; the first entry contains the - * 'offset + 1'-th computed eigenvalue, which is the - * 'offset + il'-th eigenvalue of the input matrix - * (1-based indexing in both cases). - * In some situations it is desirable to have all - * computed eigenvalues in W, instead of only - * those computed locally. In this case, call - * routine 'PMR_comm_eigvals' after - * 'pmrrr' returns (see example and interface below). - * Z Locally computed eigenvectors. - * (double[n*nz]) Enough space must be provided to store the - * vectors. 'nz' should be bigger or equal - * to ceil('#eigenpairs'/'#processes'), where - * '#eigenpairs' is 'n' in case of range="A" and - * 'iu-il+1' in case of range="I". Alternatively, - * and for range="V" 'nz' can be obtained - * by running the routine with jobz="C". - * Zsupp Support of eigenvectors, which is given by - * (double[2*n]) i1=Zsupp[2*i] to i2=Zsupp[2*i+1] for the i-th local eigenvector - * (returns 1-based indexing; e.g. in C Z[i1-1:i2-1] are non-zero and - * in Fotran Z(i1:i2) are non-zero). - * - * RETURN VALUE: - * ------------- - * 0 - success - * 1 - wrong input parameter - * 2 - misc errors - * - * The Fortran interface takes an additinal integer argument INFO - * to retrieve the return value. - * An example call in Fortran looks therefore like - * - * CALL PMRRR('V', 'A', N, D, E, VL, VU, IL, IU, TRYRAC, - * MPI_COMM_WORLD, NZ, MYFIRST, W, Z, LDZ, ZSUPP, INFO) - * - * - * EXAMPLE CALL: - * ------------- - * char *jobz, *range; - * int n, il, iu, tryRAC=0, nz, offset, ldz, *Zsupp; - * double *D, *E, *W, *Z, vl, vu; - * - * // allocate space for D, E, W, Z - * // initialize D, E - * // set jobz, range, ldz, and if necessary, il, iu or vl, vu - * - * info = pmrrr(jobz, range, &n, D, E, &vl, &vu, &il, &iu, - * &tryRAC, MPI_COMM_WORLD, &nz, &myfirst, W, - * Z, &ldz , Zsupp); - * - * // optional: - * PMR_comm_eigvals(MPI_COMM_WORLD, &nz, &myfirst, W); - * - */ - - - -/* Set the number of threads in case PMR_NUM_THREADS is not - * specified */ -#define DEFAULT_NUM_THREADS 1 - -/* Call LAPACK's dstemr in every process to compute all desiered - * eigenpairs redundantly (and discard the once that would usually - * not be computed by the process) if n < DSTEMR_IF_SMALLER; - * default: 4 */ -#define DSTEMR_IF_SMALLER 4 - -/* Make sure that eigenpairs are sorted globally; if set to false - * they are in most cases sorted, but it is not double checked and - * can therefore not be guaranteed; default: true */ -#define ASSERT_SORTED_EIGENPAIRS false - -/* Set flag if Rayleigh Quotient Correction should be used, - * which is usually faster; default: true */ -#define TRY_RQC true - -/* Maximum numver of iterations of inverse iteration; - * default: 10 */ -#define MAXITER 10 - -/* Set the min. relative gap for an eigenvalue to be considered - * well separated, that is a singleton; this is a very important - * parameter of the computation; default: 10e-3 */ -#define MIN_RELGAP 1e-3 - -/* Set the maximal allowed element growth for being accepted as - * an RRR, that is if max. pivot < MAX_GROWTH * 'spectral diameter' - * the RRR is accepted; default: 64.0 */ -#define MAX_GROWTH 64.0 - -/* Set how many iterations should be executed to find the root - * representation; default: 6 */ -#define MAX_TRY_RRR 10 - - - -/* - * Routine to communicate eigenvalues such that every process has - * all computed eigenvalues (iu-il+1) in W; this routine is designed - * to be called right after 'pmrrr'. - */ -int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *ifirst, double *W); -/* Arguments: - * ---------- - * - * INPUTS: - * ------- - * comm MPI communicator; commonly: MPI_COMM_WORLD. - * nz Number of eigenvalues local in W as returned - * from 'pmrrr'. - * offset Index, relative to the computed eigenvalues, of - * the smallest eigenvalue computed locally - * (0-based indexing). - * - * INPUT + OUTPUT: - * --------------- - * W (double[n]) Eigenvalues. - * On input the first nz elements of W contain - * the eigenvalues computed locally. - * On output the first 'iu-il+1' of W contain - * all computed eigenvalues. - * - * The Fortran interface takes an additinal integer argument INFO - * to retrieve the return value. - * An example call in Fortran looks therefore like - * - * CALL PMR_COMM_EIGVALS(MPI_COMM_WORLD, NZ, OFFSET, W, INFO) - * - */ - - -/* LAPACK and BLAS function prototypes - * Note: type specifier 'extern' does not matter in declaration - * so here used to mark routines from LAPACK and BLAS libraries */ -extern int odscal(int*, double*, double*, int*); -extern double odnst(char*, int*, double*, double*); -extern void odrrr(int*, double*, double*, int*); -extern void odrra(int*, double*, double*, double*, double*, - double*, int*, int*, int*); -extern void odrrc(char*, int*, double*, double*, double*, double*, - double*, int*, int*, int*, int*); -extern void odrrd(char*, char*, int*, double*, double*, int*, - int*, double*, double*, double*, double*, - double*, double*, int*, int*, int*, double*, - double*, double*, double*, int*, int*, double*, - int*, int*); -extern void odrrb(int*, double*, double*, int*, int*, double*, - double*, int*, double*, double*, double*, double*, - int*, double*, double*, int*, int*); -extern void odrrk(int*, int*, double*, double*, double*, double*, - double*, double*, double*, double*, int*); -extern void odebz(int*, int*, int*, int*, int*, int*, double*, - double*, double*, double*, double*, double*, - int*, double*, double*, int*, int*, double*, - int*, int*); -extern void odrnv(int*, int*, int*, double*); -extern void odrrf(int*, double*, double*, double*, int*, int*, - double*, double*, double*, double*, double*, - double*, double*, double*, double*, double*, - double*, int*); -extern void odr1v(int*, int*, int*, double*, double*, double*, - double*, double*, double*, double*, double*, - bool*, int*, double*, double*, int*, int*, - double*, double*, double*, double*); -extern void odrrj(int*, double*, double*, int*, int*, double*, - int*, double*, double*, double*, int*, double*, - double*, int*); -extern void odstmr(char*, char*, int*, double*, double*, double*, - double*, int*, int*, int*, double*, double*, - int*, int*, int*, int*, double*, int*, int*, - int*, int*); - -#endif /* End of header file */ diff --git a/external/pmrrr/include/pmrrr/blas/odcpy.hpp b/external/pmrrr/include/pmrrr/blas/odcpy.hpp new file mode 100644 index 0000000000..2473c56776 --- /dev/null +++ b/external/pmrrr/include/pmrrr/blas/odcpy.hpp @@ -0,0 +1,115 @@ +/* odcpy.f -- translated by f2c (version 20061008) */ + + +#ifndef __ODCPY_HPP__ +#define __ODCPY_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace blas { + + /* Subroutine */ + template + int odcpy(int *n, FloatingType *dx, int *incx, + FloatingType *dy, int *incy) + { + /* System generated locals */ + int i__1; + + /* Local variables */ + int i__, m, ix, iy, mp1; + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* copies a vector, x, to a vector, y. */ + /* uses unrolled loops for increments equal to one. */ + /* jack dongarra, linpack, 3/11/78. */ + /* modified 12/3/93, array(1) declarations changed to array(*) */ + + + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + + /* code for unequal increments or equal increments */ + /* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] = dx[ix]; + ix += *incx; + iy += *incy; + /* L10: */ + } + return 0; + + /* code for both increments equal to 1 */ + + + /* clean-up loop */ + + L20: + m = *n % 7; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] = dx[i__]; + /* L30: */ + } + if (*n < 7) { + return 0; + } + L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + dy[i__] = dx[i__]; + dy[i__ + 1] = dx[i__ + 1]; + dy[i__ + 2] = dx[i__ + 2]; + dy[i__ + 3] = dx[i__ + 3]; + dy[i__ + 4] = dx[i__ + 4]; + dy[i__ + 5] = dx[i__ + 5]; + dy[i__ + 6] = dx[i__ + 6]; + /* L50: */ + } + return 0; + } /* odcpy_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/blas/odscal.hpp b/external/pmrrr/include/pmrrr/blas/odscal.hpp new file mode 100644 index 0000000000..6ef6be31ab --- /dev/null +++ b/external/pmrrr/include/pmrrr/blas/odscal.hpp @@ -0,0 +1,107 @@ +/** + C++ template version of BLAS routine odscal. + + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSCAL_HPP__ +#define __ODSCAL_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace blas { + + /* Subroutine */ + template + int odscal(int *n, FloatingType *da, FloatingType *dx, + int *incx) + { + /* System generated locals */ + int i__1, i__2; + + /* Local variables */ + int i__, m, mp1, nincx; + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + /* * */ + /* scales a vector by a constant. */ + /* uses unrolled loops for increment equal to one. */ + /* jack dongarra, linpack, 3/11/78. */ + /* modified 3/93 to return if incx .le. 0. */ + /* modified 12/3/93, array(1) declarations changed to array(*) */ + + + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + + /* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dx[i__] = *da * dx[i__]; + /* L10: */ + } + return 0; + + /* code for increment equal to 1 */ + + + /* clean-up loop */ + + L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + dx[i__] = *da * dx[i__]; + /* L30: */ + } + if (*n < 5) { + return 0; + } + L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + dx[i__] = *da * dx[i__]; + dx[i__ + 1] = *da * dx[i__ + 1]; + dx[i__ + 2] = *da * dx[i__ + 2]; + dx[i__ + 3] = *da * dx[i__ + 3]; + dx[i__ + 4] = *da * dx[i__ + 4]; + /* L50: */ + } + return 0; + } + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/blas/odswap.hpp b/external/pmrrr/include/pmrrr/blas/odswap.hpp new file mode 100644 index 0000000000..5c49d3518a --- /dev/null +++ b/external/pmrrr/include/pmrrr/blas/odswap.hpp @@ -0,0 +1,121 @@ +/* dswap.f -- translated by f2c (version 20061008) */ + +#ifndef __ODSWAP_HPP__ +#define __ODSWAP_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace blas { + + /* Subroutine */ + template + int odswap(int *n, FloatingType *dx, int *incx, + FloatingType *dy, int *incy) + { + /* System generated locals */ + int i__1; + + /* Local variables */ + int i__, m, ix, iy, mp1; + FloatingType dtemp; + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* interchanges two vectors. */ + /* uses unrolled loops for increments equal one. */ + /* jack dongarra, linpack, 3/11/78. */ + /* modified 12/3/93, array(1) declarations changed to array(*) */ + + + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + + /* code for unequal increments or equal increments not equal */ + /* to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[ix]; + dx[ix] = dy[iy]; + dy[iy] = dtemp; + ix += *incx; + iy += *incy; + /* L10: */ + } + return 0; + + /* code for both increments equal to 1 */ + + + /* clean-up loop */ + + L20: + m = *n % 3; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; + /* L30: */ + } + if (*n < 3) { + return 0; + } + L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 3) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; + dtemp = dx[i__ + 1]; + dx[i__ + 1] = dy[i__ + 1]; + dy[i__ + 1] = dtemp; + dtemp = dx[i__ + 2]; + dx[i__ + 2] = dy[i__ + 2]; + dy[i__ + 2] = dtemp; + /* L50: */ + } + return 0; + } /* dswap_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/rrr.h b/external/pmrrr/include/pmrrr/definitions/counter.h similarity index 66% rename from external/pmrrr/include/pmrrr/rrr.h rename to external/pmrrr/include/pmrrr/definitions/counter.h index df78e4984c..a560b52d14 100644 --- a/external/pmrrr/include/pmrrr/rrr.h +++ b/external/pmrrr/include/pmrrr/definitions/counter.h @@ -41,46 +41,40 @@ * */ -#ifndef RRR_H -#define RRR_H +#ifndef CCOUNTER_H +#define CCOUNTER_H #ifndef DISABLE_PTHREADS #include #endif -#include "global.h" - -typedef struct { - double *restrict D; - double *restrict L; - double *restrict DL; - double *restrict DLL; - int size; - int depth; - bool parent_processed; - bool copied_parent_rrr; - int ndepend; -#ifndef DISABLE_PTHREADS - pthread_mutex_t mutex; -#endif -} rrr_t; +#include -rrr_t *PMR_create_rrr(double *restrict D, double *restrict L, - double *restrict DL, double *restrict DLL, - int size, int depth); +namespace pmrrr { namespace detail { -rrr_t *PMR_reset_rrr (rrr_t *restrict RRR, double *restrict D, - double *restrict L, double *restrict DL, - double *restrict DLL, int size, int depth); + typedef struct { + int value; + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + pthread_mutex_t lock; + #else + pthread_spinlock_t lock; + #endif + #endif + } counter_t; -int PMR_increment_rrr_dependencies(rrr_t *RRR); -int PMR_set_parent_processed_flag (rrr_t *RRR); -int PMR_set_copied_parent_rrr_flag(rrr_t *RRR, bool val); -int PMR_try_destroy_rrr(rrr_t *RRR); + counter_t *PMR_create_counter(int init_value); + int PMR_get_counter_value(counter_t *counter); + int PMR_set_counter_value(counter_t *counter, int value); + int PMR_decrement_counter(counter_t *counter, int amount); + int PMR_increment_counter(counter_t *counter, int amount); + void PMR_destroy_counter(counter_t *counter); -int PMR_rrr_init_lock(rrr_t *RRR); -void PMR_rrr_destroy_lock(rrr_t *RRR); -int PMR_rrr_lock(rrr_t *RRR); -int PMR_rrr_unlock(rrr_t *RRR); + int PMR_counter_init_lock(counter_t *counter); + void PMR_counter_destroy_lock(counter_t *counter); + int PMR_counter_lock(counter_t *counter); + int PMR_counter_unlock(counter_t *counter); +} } + #endif diff --git a/external/pmrrr/include/pmrrr/plarre.h b/external/pmrrr/include/pmrrr/definitions/plarre.h similarity index 93% rename from external/pmrrr/include/pmrrr/plarre.h rename to external/pmrrr/include/pmrrr/definitions/plarre.h index 66a47070c2..5db7be4fc9 100644 --- a/external/pmrrr/include/pmrrr/plarre.h +++ b/external/pmrrr/include/pmrrr/definitions/plarre.h @@ -41,8 +41,8 @@ #ifndef PPLARRE_H #define PPLARRE_H -#include "global.h" -#include "structs.h" +#include +#include /* * Computing the eigenvalues and of a symmetric tridiagonal matrix @@ -51,8 +51,6 @@ * Note: this implementation is here is not really optimized in * terms of performance and memory usage. */ -int plarre(proc_t *procinfo, char *jobz, char *range, in_t *Dstruct, - val_t *Wstruct, tol_t *tolstruct, int *nzp, int *offsetp); /* Perturb the initial root representation by "1 + eps*RAND_FACTOR*rand"; * default: 8.0 */ diff --git a/external/pmrrr/include/pmrrr/plarrv.h b/external/pmrrr/include/pmrrr/definitions/plarrv.h similarity index 92% rename from external/pmrrr/include/pmrrr/plarrv.h rename to external/pmrrr/include/pmrrr/definitions/plarrv.h index 8d921aa270..9e0a289a61 100644 --- a/external/pmrrr/include/pmrrr/plarrv.h +++ b/external/pmrrr/include/pmrrr/definitions/plarrv.h @@ -41,16 +41,12 @@ #ifndef PPLARRV_H #define PPLARRV_H -#include "global.h" -#include "structs.h" +#include +#include /* * Computation of eigenvectors of a symmetric tridiagonal */ -int plarrv(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, - vec_t *Zstruct, tol_t *tolstruct, int *nzp, - int *myfirstp); - #define COMM_COMPLETE 0 #define COMM_INCOMPLETE 1 #define C_TASK_PROCESSED 0 diff --git a/external/pmrrr/include/pmrrr/definitions/pmrrr.h b/external/pmrrr/include/pmrrr/definitions/pmrrr.h new file mode 100644 index 0000000000..4a514602be --- /dev/null +++ b/external/pmrrr/include/pmrrr/definitions/pmrrr.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015 Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef PPMRRR_H +#define PPMRRR_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Parallel computation of all or a subset of eigenvalues and + * optionally eigenvectors of a symmetric tridiagonal matrix based on + * the algorithm of Multiple Relatively Robust Representations (MRRR). + * The routine targets hybrid architectures consisting of multiple SMP + * nodes. It also runs in fully distributed mode, with each node + * having only one processor, and fully SMP mode, in which case no + * message passing is required. The implementation is based on + * LAPACK's routine 'dstemr'. + */ + +/* Set the number of threads in case PMR_NUM_THREADS is not + * specified */ +#define DEFAULT_NUM_THREADS 1 + +/* Call LAPACK's dstemr in every process to compute all desiered + * eigenpairs redundantly (and discard the once that would usually + * not be computed by the process) if n < DSTEMR_IF_SMALLER; + * default: 4 */ +#define DSTEMR_IF_SMALLER 4 + +/* Make sure that eigenpairs are sorted globally; if set to false + * they are in most cases sorted, but it is not double checked and + * can therefore not be guaranteed; default: true */ +#define ASSERT_SORTED_EIGENPAIRS false + +/* Set flag if Rayleigh Quotient Correction should be used, + * which is usually faster; default: true */ +#define TRY_RQC true + +/* Maximum numver of iterations of inverse iteration; + * default: 10 */ +#define MAXITER 10 + +/* Set the min. relative gap for an eigenvalue to be considered + * well separated, that is a singleton; this is a very important + * parameter of the computation; default: 10e-3 */ +#define MIN_RELGAP 1e-3 + +/* Set the maximal allowed element growth for being accepted as + * an RRR, that is if max. pivot < MAX_GROWTH * 'spectral diameter' + * the RRR is accepted; default: 64.0 */ +#define MAX_GROWTH 64.0 + +/* Set how many iterations should be executed to find the root + * representation; default: 6 */ +#define MAX_TRY_RRR 10 + +#endif /* End of header file */ diff --git a/external/pmrrr/include/pmrrr/queue.h b/external/pmrrr/include/pmrrr/definitions/queue.h similarity index 64% rename from external/pmrrr/include/pmrrr/queue.h rename to external/pmrrr/include/pmrrr/definitions/queue.h index 266737caa9..6e6ec4ba7d 100644 --- a/external/pmrrr/include/pmrrr/queue.h +++ b/external/pmrrr/include/pmrrr/definitions/queue.h @@ -47,42 +47,41 @@ #ifndef DISABLE_PTHREADS #include #endif -#include "global.h" -typedef struct task_aux task_t; -struct task_aux { - void *data; /* ptr to data, has to be casted */ - int flag; /* flag specifying the task */ - task_t *next; /* ptr to next task; NULL if non-existing; */ - task_t *prev; /* ptr to prev. task; NULL if non-existing; */ -}; +#include -typedef struct { - int num_tasks; - task_t *head; - task_t *back; -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - pthread_mutex_t lock; - #else - pthread_spinlock_t lock; - #endif -#endif -} queue_t; +namespace pmrrr { namespace detail { + + typedef struct task_aux task_t; + struct task_aux { + void *data; /* ptr to data, has to be casted */ + int flag; /* flag specifying the task */ + task_t *next; /* ptr to next task; NULL if non-existing; */ + task_t *prev; /* ptr to prev. task; NULL if non-existing; */ + }; + typedef struct { + int num_tasks; + task_t *head; + task_t *back; + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + pthread_mutex_t lock; + #else + pthread_spinlock_t lock; + #endif + #endif + } queue_t; -/* functionality of the queue */ -queue_t *PMR_create_empty_queue (void); -int PMR_insert_task_at_front (queue_t *queue, task_t *task); -int PMR_insert_task_at_back (queue_t *queue, task_t *task); -task_t *PMR_remove_task_at_front(queue_t *queue); -task_t *PMR_remove_task_at_back (queue_t *queue); -int PMR_get_num_tasks(queue_t *queue); -void PMR_destroy_queue(queue_t *queue); + /* functionality of the queue */ + queue_t *PMR_create_empty_queue (void); + int PMR_insert_task_at_front (queue_t *queue, task_t *task); + int PMR_insert_task_at_back (queue_t *queue, task_t *task); + task_t *PMR_remove_task_at_front(queue_t *queue); + task_t *PMR_remove_task_at_back (queue_t *queue); + int PMR_get_num_tasks(queue_t *queue); + void PMR_destroy_queue(queue_t *queue); -int PMR_queue_init_lock(queue_t *queue); -void PMR_queue_destroy_lock(queue_t *queue); -int PMR_queue_lock(queue_t *queue); -int PMR_queue_unlock(queue_t *queue); +} } #endif diff --git a/external/pmrrr/include/pmrrr/counter.h b/external/pmrrr/include/pmrrr/definitions/rrr.h similarity index 73% rename from external/pmrrr/include/pmrrr/counter.h rename to external/pmrrr/include/pmrrr/definitions/rrr.h index e31d50e45a..d98c77e0d1 100644 --- a/external/pmrrr/include/pmrrr/counter.h +++ b/external/pmrrr/include/pmrrr/definitions/rrr.h @@ -41,35 +41,33 @@ * */ -#ifndef CCOUNTER_H -#define CCOUNTER_H +#ifndef RRR_H +#define RRR_H #ifndef DISABLE_PTHREADS #include #endif -#include "global.h" -typedef struct { - int value; -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - pthread_mutex_t lock; - #else - pthread_spinlock_t lock; - #endif -#endif -} counter_t; +#include + +namespace pmrrr { namespace detail { -counter_t *PMR_create_counter(int init_value); -int PMR_get_counter_value(counter_t *counter); -int PMR_set_counter_value(counter_t *counter, int value); -int PMR_decrement_counter(counter_t *counter, int amount); -int PMR_increment_counter(counter_t *counter, int amount); -void PMR_destroy_counter(counter_t *counter); + template + struct rrr_t { + FloatingType *restrict D; + FloatingType *restrict L; + FloatingType *restrict DL; + FloatingType *restrict DLL; + int size; + int depth; + bool parent_processed; + bool copied_parent_rrr; + int ndepend; + #ifndef DISABLE_PTHREADS + pthread_mutex_t mutex; + #endif + }; -int PMR_counter_init_lock(counter_t *counter); -void PMR_counter_destroy_lock(counter_t *counter); -int PMR_counter_lock(counter_t *counter); -int PMR_counter_unlock(counter_t *counter); +}} #endif diff --git a/external/pmrrr/include/pmrrr/definitions/structs.h b/external/pmrrr/include/pmrrr/definitions/structs.h new file mode 100644 index 0000000000..6cf43f4c72 --- /dev/null +++ b/external/pmrrr/include/pmrrr/definitions/structs.h @@ -0,0 +1,176 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef SSTRUCTS_H +#define SSTRUCTS_H + +#include + +#include +#include +#include + +namespace pmrrr { namespace detail { + + template + struct in_t { + int n; + FloatingType *restrict D; + FloatingType *restrict E; + int nsplit; + int *restrict isplit ; + FloatingType spdiam; + }; + + template + struct val_t { + int n; + FloatingType *vl; + FloatingType *vu; + int *il; + int *iu; + FloatingType *restrict W; + FloatingType *restrict Werr; + FloatingType *restrict Wgap; + int *restrict Windex; + int *restrict iblock; + int *restrict iproc; + FloatingType *restrict Wshifted; + FloatingType *restrict gersch; + }; + + template + struct vec_t { + int ldz; + int nz; + FloatingType *restrict Z; + int *restrict Zsupp; + int *restrict Zindex; + }; + + struct proc_t { + int pid; + int nproc; + MPI_Comm comm; + int nthreads; + int thread_support; + }; + + template + struct tol_t { + FloatingType split; + FloatingType rtol1; + FloatingType rtol2; + FloatingType pivmin; + }; + + typedef struct { + int num_messages; + MPI_Request *requests; + MPI_Status *stats; + } comm_t; + + typedef struct { + queue_t *r_queue; + queue_t *s_queue; + queue_t *c_queue; + } workQ_t; + + template + struct sort_struct_t{ + FloatingType lambda; + int local_ind; + int block_ind; + int ind; + }; + + template + struct auxarg1_t { + int n; + FloatingType *D; + FloatingType *E; + FloatingType *E2; + int il; + int iu; + int my_il; + int my_iu; + int nsplit; + int *isplit; + FloatingType bsrtol; + FloatingType pivmin; + FloatingType *gersch; + FloatingType *W; + FloatingType *Werr; + int *Windex; + int *iblock; + }; + + template + struct auxarg2_t { + int bl_size; + FloatingType *D; + FloatingType *DE2; + int rf_begin; + int rf_end; + FloatingType *W; + FloatingType *Werr; + FloatingType *Wgap; + int *Windex; + FloatingType rtol1; + FloatingType rtol2; + FloatingType pivmin; + FloatingType bl_spdiam; + }; + + template + struct auxarg3_t { + int tid; + proc_t *procinfo; + val_t *Wstruct; + vec_t *Zstruct; + tol_t *tolstruct; + workQ_t *workQ; + counter_t *num_left; + }; + +} // namespace detail +} // namespace pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/tasks.h b/external/pmrrr/include/pmrrr/definitions/tasks.h similarity index 54% rename from external/pmrrr/include/pmrrr/tasks.h rename to external/pmrrr/include/pmrrr/definitions/tasks.h index 3a6d47a7f1..56e951b284 100644 --- a/external/pmrrr/include/pmrrr/tasks.h +++ b/external/pmrrr/include/pmrrr/definitions/tasks.h @@ -3,7 +3,7 @@ * * Copyright (c) 2015, Jack Poulson * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: @@ -47,79 +47,65 @@ #ifndef DISABLE_PTHREADS #include #endif -#include "global.h" -#include "queue.h" -#include "rrr.h" -#include "structs.h" + +#include +#include +#include +#include #define SINGLETON_TASK_FLAG 0 #define CLUSTER_TASK_FLAG 1 #define REFINE_TASK_FLAG 2 -typedef struct { - int begin; - int end; - int depth; - int bl_begin; - int bl_end; - double bl_spdiam; - double lgap; - rrr_t *RRR; -} singleton_t; - -typedef struct { - int begin; - int end; - int depth; - int bl_begin; /* In priciple not needed since info */ - int bl_end; /* also contained in iblock+isplit */ - double bl_spdiam; - double lgap; - int proc_W_begin; - int proc_W_end; - int left_pid; - int right_pid; - rrr_t *RRR; - bool wait_until_refined; - comm_t *messages; -} cluster_t; - - -typedef struct { - int begin; - int end; - double *D; - double *DLL; - int p; - int q; - int bl_size; - double bl_spdiam; - int producer_tid; // not longer needed -#ifndef DISABLE_PTHREADS - sem_t *sem; /* since semt_t is a handle could also store it - instead of pointer to it, but pointer is all - that is needed */ -#endif -} refine_t; - - -task_t *PMR_create_s_task(int first, int last, int depth, - int bl_begin, int bl_end, double spdiam, - double lgap, rrr_t *RRR); +namespace pmrrr { namespace detail { -task_t *PMR_create_c_task(int first, int last, int depth, - int bl_begin, int bl_end, double spdiam, - double lgap, int proc_W_begin, - int proc_W_end, int left_pid, int right_pid, - rrr_t *RRR); + template + struct singleton_t { + int begin; + int end; + int depth; + int bl_begin; + int bl_end; + FloatingType bl_spdiam; + FloatingType lgap; + rrr_t *RRR; + }; -task_t *PMR_create_r_task(int begin, int end, double *D, - double *DLL, int p, int q, int bl_size, - double bl_spdiam, int tid); + template + struct cluster_t { + int begin; + int end; + int depth; + int bl_begin; /* In priciple not needed since info */ + int bl_end; /* also contained in iblock+isplit */ + FloatingType bl_spdiam; + FloatingType lgap; + int proc_W_begin; + int proc_W_end; + int left_pid; + int right_pid; + rrr_t *RRR; + bool wait_until_refined; + comm_t *messages; + }; -int PMR_refine_sem_init(refine_t *refine); -int PMR_refine_sem_destroy(refine_t *refine); -int PMR_refine_sem_post(refine_t *refine); -int PMR_refine_sem_wait(refine_t *refine); + template + struct refine_t{ + int begin; + int end; + FloatingType *D; + FloatingType *DLL; + int p; + int q; + int bl_size; + FloatingType bl_spdiam; + int producer_tid; // not longer needed + #ifndef DISABLE_PTHREADS + sem_t *sem; /* since semt_t is a handle could also store it + instead of pointer to it, but pointer is all + that is needed */ + #endif + }; +} } #endif diff --git a/external/pmrrr/include/pmrrr/process_task.h b/external/pmrrr/include/pmrrr/lapack/lapack.hpp similarity index 67% rename from external/pmrrr/include/pmrrr/process_task.h rename to external/pmrrr/include/pmrrr/lapack/lapack.hpp index 5add95c779..bfad85b239 100644 --- a/external/pmrrr/include/pmrrr/process_task.h +++ b/external/pmrrr/include/pmrrr/lapack/lapack.hpp @@ -1,4 +1,7 @@ /* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2016, Marcin Copik * All rights reserved. * * Redistribution and use in source and binary forms, with or @@ -38,33 +41,17 @@ * */ -#ifndef PPROCESS_TASK_H -#define PPROCESS_TASK_H - -#include "global.h" -#include "structs.h" -#include "tasks.h" -#include "counter.h" -#include "queue.h" +#ifndef __LAPACK_HPP___ +#define __LAPACK_HPP___ -/* Function prototypes */ -int PMR_process_c_task(cluster_t *cl, int tid, proc_t *procinfo, - val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, workQ_t *workQ, - counter_t *num_left, double *work, int *iwork); +namespace pmrrr { namespace lapack { -int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo, - val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, counter_t *num_left, - double *work, int *iwork); + /* Subroutine */ + int oerbla(const char *srname, int *info); + + int olsame(const char *ca, const char *cb); -int PMR_process_r_task(refine_t *rf, proc_t *procinfo, val_t *Wstruct, - tol_t *tolstruct, double *work, int *iwork); +} } -void PMR_process_r_queue(int tid, proc_t *procinfo, val_t *Wstruct, - vec_t *Zstruct, tol_t *tolstruct, - workQ_t *workQ, - counter_t *num_left, double *work, - int *iwork); -#endif /* End of header file */ +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/ode2.hpp b/external/pmrrr/include/pmrrr/lapack/ode2.hpp new file mode 100644 index 0000000000..378c0e2964 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/ode2.hpp @@ -0,0 +1,152 @@ +/** + C++ template version of LAPACK routine dlae2. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODE2_HPP__ +#define __ODE2_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int ode2(FloatingType *a, FloatingType *b, FloatingType *c__, + FloatingType *rt1, FloatingType *rt2) + { + /* System generated locals */ + FloatingType d__1; + + /* Builtin functions */ + // double sqrt(double); + + /* Local variables */ + FloatingType ab, df, tb, sm, rt, adf, acmn, acmx; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODE2 computes the eigenvalues of a 2-by-2 symmetric matrix */ + /* [ A B ] */ + /* [ B C ]. */ + /* On return, RT1 is the eigenvalue of larger absolute value, and RT2 */ + /* is the eigenvalue of smaller absolute value. */ + + /* Arguments */ + /* ========= */ + + /* A (input) DOUBLE PRECISION */ + /* The (1,1) element of the 2-by-2 matrix. */ + + /* B (input) DOUBLE PRECISION */ + /* The (1,2) and (2,1) elements of the 2-by-2 matrix. */ + + /* C (input) DOUBLE PRECISION */ + /* The (2,2) element of the 2-by-2 matrix. */ + + /* RT1 (output) DOUBLE PRECISION */ + /* The eigenvalue of larger absolute value. */ + + /* RT2 (output) DOUBLE PRECISION */ + /* The eigenvalue of smaller absolute value. */ + + /* Further Details */ + /* =============== */ + + /* RT1 is accurate to a few ulps barring over/underflow. */ + + /* RT2 may be inaccurate if there is massive cancellation in the */ + /* determinant A*C-B*B; higher precision or correctly rounded or */ + /* correctly truncated arithmetic would be needed to compute RT2 */ + /* accurately in all cases. */ + + /* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ + /* Underflow is harmless if the input data is 0 or exceeds */ + /* underflow_threshold / macheps. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Compute the eigenvalues */ + + sm = *a + *c__; + df = *a - *c__; + adf = fabs(df); + tb = *b + *b; + ab = fabs(tb); + if (fabs(*a) > fabs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { + /* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { + /* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + + /* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + + /* Order of execution important. */ + /* To get fully accurate smaller eigenvalue, */ + /* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + + /* Order of execution important. */ + /* To get fully accurate smaller eigenvalue, */ + /* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + + /* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + } + return 0; + + /* End of ODE2 */ + + } /* ode2_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odebz.hpp b/external/pmrrr/include/pmrrr/lapack/odebz.hpp new file mode 100644 index 0000000000..90335ab79f --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odebz.hpp @@ -0,0 +1,637 @@ +/** + C++ template version of LAPACK routine dlaebz. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODEBZ_HPP__ +#define __ODEBZ_HPP__ + +#include +#include +#include +#include +#include +#include + +#define imax(a,b) ( (a) > (b) ? (a) : (b) ) +#define imin(a,b) ( (a) < (b) ? (a) : (b) ) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odebz(int *ijob, int *nitmax, int *n, + int *mmax, int *minp, int *nbmin, FloatingType *abstol, + FloatingType *reltol, FloatingType *pivmin, FloatingType *d__, FloatingType * + e, FloatingType *e2, int *nval, FloatingType *ab, FloatingType *c__, + int *mout, int *nab, FloatingType *work, int *iwork, + int *info) + { + /* System generated locals */ + int nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, + i__5, i__6; + FloatingType d__1, d__2, d__3, d__4; + + /* Local variables */ + int j, kf, ji, kl, jp, jit; + FloatingType tmp1, tmp2; + int itmp1, itmp2, kfnew, klnew; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODEBZ contains the iteration loops which compute and use the */ + /* function N(w), which is the count of eigenvalues of a symmetric */ + /* tridiagonal matrix T less than or equal to its argument w. It */ + /* performs a choice of two types of loops: */ + + /* IJOB=1, followed by */ + /* IJOB=2: It takes as input a list of intervals and returns a list of */ + /* sufficiently small intervals whose union contains the same */ + /* eigenvalues as the union of the original intervals. */ + /* The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. */ + /* The output interval (AB(j,1),AB(j,2)] will contain */ + /* eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. */ + + /* IJOB=3: It performs a binary search in each input interval */ + /* (AB(j,1),AB(j,2)] for a point w(j) such that */ + /* N(w(j))=NVAL(j), and uses C(j) as the starting point of */ + /* the search. If such a w(j) is found, then on output */ + /* AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output */ + /* (AB(j,1),AB(j,2)] will be a small interval containing the */ + /* point where N(w) jumps through NVAL(j), unless that point */ + /* lies outside the initial interval. */ + + /* Note that the intervals are in all cases half-open intervals, */ + /* i.e., of the form (a,b] , which includes b but not a . */ + + /* To avoid underflow, the matrix should be scaled so that its largest */ + /* element is no greater than overflow**(1/2) * underflow**(1/4) */ + /* in absolute value. To assure the most accurate computation */ + /* of small eigenvalues, the matrix should be scaled to be */ + /* not much smaller than that, either. */ + + /* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ + /* Matrix", Report CS41, Computer Science Dept., Stanford */ + /* University, July 21, 1966 */ + + /* Note: the arguments are, in general, *not* checked for unreasonable */ + /* values. */ + + /* Arguments */ + /* ========= */ + + /* IJOB (input) INT */ + /* Specifies what is to be done: */ + /* = 1: Compute NAB for the initial intervals. */ + /* = 2: Perform bisection iteration to find eigenvalues of T. */ + /* = 3: Perform bisection iteration to invert N(w), i.e., */ + /* to find a point which has a specified number of */ + /* eigenvalues of T to its left. */ + /* Other values will cause ODEBZ to return with INFO=-1. */ + + /* NITMAX (input) INT */ + /* The maximum number of "levels" of bisection to be */ + /* performed, i.e., an interval of width W will not be made */ + /* smaller than 2^(-NITMAX) * W. If not all intervals */ + /* have converged after NITMAX iterations, then INFO is set */ + /* to the number of non-converged intervals. */ + + /* N (input) INT */ + /* The dimension n of the tridiagonal matrix T. It must be at */ + /* least 1. */ + + /* MMAX (input) INT */ + /* The maximum number of intervals. If more than MMAX intervals */ + /* are generated, then ODEBZ will quit with INFO=MMAX+1. */ + + /* MINP (input) INT */ + /* The initial number of intervals. It may not be greater than */ + /* MMAX. */ + + /* NBMIN (input) INT */ + /* The smallest number of intervals that should be processed */ + /* using a vector loop. If zero, then only the scalar loop */ + /* will be used. */ + + /* ABSTOL (input) DOUBLE PRECISION */ + /* The minimum (absolute) width of an interval. When an */ + /* interval is narrower than ABSTOL, or than RELTOL times the */ + /* larger (in magnitude) endpoint, then it is considered to be */ + /* sufficiently small, i.e., converged. This must be at least */ + /* zero. */ + + /* RELTOL (input) DOUBLE PRECISION */ + /* The minimum relative width of an interval. When an interval */ + /* is narrower than ABSTOL, or than RELTOL times the larger (in */ + /* magnitude) endpoint, then it is considered to be */ + /* sufficiently small, i.e., converged. Note: this should */ + /* always be at least radix*machine epsilon. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum absolute value of a "pivot" in the Sturm */ + /* sequence loop. This *must* be at least max |e(j)**2| * */ + /* safe_min and at least safe_min, where safe_min is at least */ + /* the smallest number that can divide one without overflow. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The diagonal elements of the tridiagonal matrix T. */ + + /* E (input) DOUBLE PRECISION array, dimension (N) */ + /* The offdiagonal elements of the tridiagonal matrix T in */ + /* positions 1 through N-1. E(N) is arbitrary. */ + + /* E2 (input) DOUBLE PRECISION array, dimension (N) */ + /* The squares of the offdiagonal elements of the tridiagonal */ + /* matrix T. E2(N) is ignored. */ + + /* NVAL (input/output) INT array, dimension (MINP) */ + /* If IJOB=1 or 2, not referenced. */ + /* If IJOB=3, the desired values of N(w). The elements of NVAL */ + /* will be reordered to correspond with the intervals in AB. */ + /* Thus, NVAL(j) on output will not, in general be the same as */ + /* NVAL(j) on input, but it will correspond with the interval */ + /* (AB(j,1),AB(j,2)] on output. */ + + /* AB (input/output) DOUBLE PRECISION array, dimension (MMAX,2) */ + /* The endpoints of the intervals. AB(j,1) is a(j), the left */ + /* endpoint of the j-th interval, and AB(j,2) is b(j), the */ + /* right endpoint of the j-th interval. The input intervals */ + /* will, in general, be modified, split, and reordered by the */ + /* calculation. */ + + /* C (input/output) DOUBLE PRECISION array, dimension (MMAX) */ + /* If IJOB=1, ignored. */ + /* If IJOB=2, workspace. */ + /* If IJOB=3, then on input C(j) should be initialized to the */ + /* first search point in the binary search. */ + + /* MOUT (output) INT */ + /* If IJOB=1, the number of eigenvalues in the intervals. */ + /* If IJOB=2 or 3, the number of intervals output. */ + /* If IJOB=3, MOUT will equal MINP. */ + + /* NAB (input/output) INT array, dimension (MMAX,2) */ + /* If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). */ + /* If IJOB=2, then on input, NAB(i,j) should be set. It must */ + /* satisfy the condition: */ + /* N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), */ + /* which means that in interval i only eigenvalues */ + /* NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, */ + /* NAB(i,j)=N(AB(i,j)), from a previous call to ODEBZ with */ + /* IJOB=1. */ + /* On output, NAB(i,j) will contain */ + /* max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of */ + /* the input interval that the output interval */ + /* (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the */ + /* the input values of NAB(k,1) and NAB(k,2). */ + /* If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), */ + /* unless N(w) > NVAL(i) for all search points w , in which */ + /* case NAB(i,1) will not be modified, i.e., the output */ + /* value will be the same as the input value (modulo */ + /* reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) */ + /* for all search points w , in which case NAB(i,2) will */ + /* not be modified. Normally, NAB should be set to some */ + /* distinctive value(s) before ODEBZ is called. */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (MMAX) */ + /* Workspace. */ + + /* IWORK (workspace) INT array, dimension (MMAX) */ + /* Workspace. */ + + /* INFO (output) INT */ + /* = 0: All intervals converged. */ + /* = 1--MMAX: The last INFO intervals did not converge. */ + /* = MMAX+1: More than MMAX intervals were generated. */ + + /* Further Details */ + /* =============== */ + + /* This routine is intended to be called only by other LAPACK */ + /* routines, thus the interface is less user-friendly. It is intended */ + /* for two purposes: */ + + /* (a) finding eigenvalues. In this case, ODEBZ should have one or */ + /* more initial intervals set up in AB, and ODEBZ should be called */ + /* with IJOB=1. This sets up NAB, and also counts the eigenvalues. */ + /* Intervals with no eigenvalues would usually be thrown out at */ + /* this point. Also, if not all the eigenvalues in an interval i */ + /* are desired, NAB(i,1) can be increased or NAB(i,2) decreased. */ + /* For example, set NAB(i,1)=NAB(i,2)-1 to get the largest */ + /* eigenvalue. ODEBZ is then called with IJOB=2 and MMAX */ + /* no smaller than the value of MOUT returned by the call with */ + /* IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 */ + /* through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the */ + /* tolerance specified by ABSTOL and RELTOL. */ + + /* (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). */ + /* In this case, start with a Gershgorin interval (a,b). Set up */ + /* AB to contain 2 search intervals, both initially (a,b). One */ + /* NVAL element should contain f-1 and the other should contain l */ + /* , while C should contain a and b, resp. NAB(i,1) should be -1 */ + /* and NAB(i,2) should be N+1, to flag an error if the desired */ + /* interval does not lie in (a,b). ODEBZ is then called with */ + /* IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- */ + /* j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while */ + /* if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r */ + /* >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and */ + /* N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and */ + /* w(l-r)=...=w(l+k) are handled similarly. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Check for Errors */ + + /* Parameter adjustments */ + nab_dim1 = *mmax; + nab_offset = 1 + nab_dim1; + nab -= nab_offset; + ab_dim1 = *mmax; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --d__; + --e; + --e2; + --nval; + --c__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + if (*ijob < 1 || *ijob > 3) { + *info = -1; + return 0; + } + + /* Initialize NAB */ + + if (*ijob == 1) { + + /* Compute the number of eigenvalues in the initial intervals. */ + + *mout = 0; + i__1 = *minp; + for (ji = 1; ji <= i__1; ++ji) { + for (jp = 1; jp <= 2; ++jp) { + tmp1 = d__[1] - ab[ji + jp * ab_dim1]; + if (fabs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + nab[ji + jp * nab_dim1] = 0; + if (tmp1 <= 0.) { + nab[ji + jp * nab_dim1] = 1; + } + + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + tmp1 = d__[j] - e2[j - 1] / tmp1 - ab[ji + jp * ab_dim1]; + if (fabs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + if (tmp1 <= 0.) { + ++nab[ji + jp * nab_dim1]; + } + /* L10: */ + } + /* L20: */ + } + *mout = *mout + nab[ji + (nab_dim1 << 1)] - nab[ji + nab_dim1]; + /* L30: */ + } + return 0; + } + + /* Initialize for loop */ + + /* KF and KL have the following meaning: */ + /* Intervals 1,...,KF-1 have converged. */ + /* Intervals KF,...,KL still need to be refined. */ + + kf = 1; + kl = *minp; + + /* If IJOB=2, initialize C. */ + /* If IJOB=3, use the user-supplied starting point. */ + + if (*ijob == 2) { + i__1 = *minp; + for (ji = 1; ji <= i__1; ++ji) { + c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; + /* L40: */ + } + } + + /* Iteration loop */ + + i__1 = *nitmax; + for (jit = 1; jit <= i__1; ++jit) { + + /* Loop over intervals */ + + if (kl - kf + 1 >= *nbmin && *nbmin > 0) { + + /* Begin of Parallel Version of the loop */ + + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + + /* Compute N(c), the number of eigenvalues less than c */ + + work[ji] = d__[1] - c__[ji]; + iwork[ji] = 0; + if (work[ji] <= *pivmin) { + iwork[ji] = 1; + /* Computing MIN */ + d__1 = work[ji], d__2 = -(*pivmin); + work[ji] = fmin(d__1,d__2); + } + + i__3 = *n; + for (j = 2; j <= i__3; ++j) { + work[ji] = d__[j] - e2[j - 1] / work[ji] - c__[ji]; + if (work[ji] <= *pivmin) { + ++iwork[ji]; + /* Computing MIN */ + d__1 = work[ji], d__2 = -(*pivmin); + work[ji] = fmin(d__1,d__2); + } + /* L50: */ + } + /* L60: */ + } + + if (*ijob <= 2) { + + /* IJOB=2: Choose all intervals containing eigenvalues. */ + + klnew = kl; + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + + /* Insure that N(w) is monotone */ + + /* Computing MIN */ + /* Computing MAX */ + i__5 = nab[ji + nab_dim1], i__6 = iwork[ji]; + i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,i__6); + iwork[ji] = imin(i__3,i__4); + + /* Update the Queue -- add intervals if both halves */ + /* contain eigenvalues. */ + + if (iwork[ji] == nab[ji + (nab_dim1 << 1)]) { + + /* No eigenvalue in the upper interval: */ + /* just use the lower interval. */ + + ab[ji + (ab_dim1 << 1)] = c__[ji]; + + } else if (iwork[ji] == nab[ji + nab_dim1]) { + + /* No eigenvalue in the lower interval: */ + /* just use the upper interval. */ + + ab[ji + ab_dim1] = c__[ji]; + } else { + ++klnew; + if (klnew <= *mmax) { + + /* Eigenvalue in both intervals -- add upper to */ + /* queue. */ + + ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << + 1)]; + nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 + << 1)]; + ab[klnew + ab_dim1] = c__[ji]; + nab[klnew + nab_dim1] = iwork[ji]; + ab[ji + (ab_dim1 << 1)] = c__[ji]; + nab[ji + (nab_dim1 << 1)] = iwork[ji]; + } else { + *info = *mmax + 1; + } + } + /* L70: */ + } + if (*info != 0) { + return 0; + } + kl = klnew; + } else { + + /* IJOB=3: Binary search. Keep only the interval containing */ + /* w s.t. N(w) = NVAL */ + + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + if (iwork[ji] <= nval[ji]) { + ab[ji + ab_dim1] = c__[ji]; + nab[ji + nab_dim1] = iwork[ji]; + } + if (iwork[ji] >= nval[ji]) { + ab[ji + (ab_dim1 << 1)] = c__[ji]; + nab[ji + (nab_dim1 << 1)] = iwork[ji]; + } + /* L80: */ + } + } + + } else { + + /* End of Parallel Version of the loop */ + + /* Begin of Serial Version of the loop */ + + klnew = kl; + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + + /* Compute N(w), the number of eigenvalues less than w */ + + tmp1 = c__[ji]; + tmp2 = d__[1] - tmp1; + itmp1 = 0; + if (tmp2 <= *pivmin) { + itmp1 = 1; + /* Computing MIN */ + d__1 = tmp2, d__2 = -(*pivmin); + tmp2 = fmin(d__1,d__2); + } + + i__3 = *n; + for (j = 2; j <= i__3; ++j) { + tmp2 = d__[j] - e2[j - 1] / tmp2 - tmp1; + if (tmp2 <= *pivmin) { + ++itmp1; + /* Computing MIN */ + d__1 = tmp2, d__2 = -(*pivmin); + tmp2 = fmin(d__1,d__2); + } + /* L90: */ + } + + if (*ijob <= 2) { + + /* IJOB=2: Choose all intervals containing eigenvalues. */ + + /* Insure that N(w) is monotone */ + + /* Computing MIN */ + /* Computing MAX */ + i__5 = nab[ji + nab_dim1]; + i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,itmp1); + itmp1 = imin(i__3,i__4); + + /* Update the Queue -- add intervals if both halves */ + /* contain eigenvalues. */ + + if (itmp1 == nab[ji + (nab_dim1 << 1)]) { + + /* No eigenvalue in the upper interval: */ + /* just use the lower interval. */ + + ab[ji + (ab_dim1 << 1)] = tmp1; + + } else if (itmp1 == nab[ji + nab_dim1]) { + + /* No eigenvalue in the lower interval: */ + /* just use the upper interval. */ + + ab[ji + ab_dim1] = tmp1; + } else if (klnew < *mmax) { + + /* Eigenvalue in both intervals -- add upper to queue. */ + + ++klnew; + ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 1)]; + nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 << + 1)]; + ab[klnew + ab_dim1] = tmp1; + nab[klnew + nab_dim1] = itmp1; + ab[ji + (ab_dim1 << 1)] = tmp1; + nab[ji + (nab_dim1 << 1)] = itmp1; + } else { + *info = *mmax + 1; + return 0; + } + } else { + + /* IJOB=3: Binary search. Keep only the interval */ + /* containing w s.t. N(w) = NVAL */ + + if (itmp1 <= nval[ji]) { + ab[ji + ab_dim1] = tmp1; + nab[ji + nab_dim1] = itmp1; + } + if (itmp1 >= nval[ji]) { + ab[ji + (ab_dim1 << 1)] = tmp1; + nab[ji + (nab_dim1 << 1)] = itmp1; + } + } + /* L100: */ + } + kl = klnew; + + /* End of Serial Version of the loop */ + + } + + /* Check for convergence */ + + kfnew = kf; + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + tmp1 = (d__1 = ab[ji + (ab_dim1 << 1)] - ab[ji + ab_dim1], fabs( + d__1)); + /* Computing MAX */ + d__3 = (d__1 = ab[ji + (ab_dim1 << 1)], fabs(d__1)), d__4 = (d__2 = + ab[ji + ab_dim1], fabs(d__2)); + tmp2 = fmax(d__3,d__4); + /* Computing MAX */ + d__1 = fmax(*abstol,*pivmin), d__2 = *reltol * tmp2; + if (tmp1 < fmax(d__1,d__2) || nab[ji + nab_dim1] >= nab[ji + ( + nab_dim1 << 1)]) { + + /* Converged -- Swap with position KFNEW, */ + /* then increment KFNEW */ + + if (ji > kfnew) { + tmp1 = ab[ji + ab_dim1]; + tmp2 = ab[ji + (ab_dim1 << 1)]; + itmp1 = nab[ji + nab_dim1]; + itmp2 = nab[ji + (nab_dim1 << 1)]; + ab[ji + ab_dim1] = ab[kfnew + ab_dim1]; + ab[ji + (ab_dim1 << 1)] = ab[kfnew + (ab_dim1 << 1)]; + nab[ji + nab_dim1] = nab[kfnew + nab_dim1]; + nab[ji + (nab_dim1 << 1)] = nab[kfnew + (nab_dim1 << 1)]; + ab[kfnew + ab_dim1] = tmp1; + ab[kfnew + (ab_dim1 << 1)] = tmp2; + nab[kfnew + nab_dim1] = itmp1; + nab[kfnew + (nab_dim1 << 1)] = itmp2; + if (*ijob == 3) { + itmp1 = nval[ji]; + nval[ji] = nval[kfnew]; + nval[kfnew] = itmp1; + } + } + ++kfnew; + } + /* L110: */ + } + kf = kfnew; + + /* Choose Midpoints */ + + i__2 = kl; + for (ji = kf; ji <= i__2; ++ji) { + c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; + /* L120: */ + } + + /* If no more intervals to refine, quit. */ + + if (kf > kl) { + goto L140; + } + /* L130: */ + } + + /* Converged */ + + L140: + /* Computing MAX */ + i__1 = kl + 1 - kf; + *info = imax(i__1,0); + *mout = kl; + + return 0; + + /* End of ODEBZ */ + + } /* odebz_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odev2.hpp b/external/pmrrr/include/pmrrr/lapack/odev2.hpp new file mode 100644 index 0000000000..79078a0aa9 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odev2.hpp @@ -0,0 +1,198 @@ +/** + C++ template version of LAPACK routine dlaev2. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODEV2_HPP__ +#define __ODEV2_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odev2(FloatingType *a, FloatingType *b, FloatingType *c__, + FloatingType *rt1, FloatingType *rt2, FloatingType *cs1, FloatingType *sn1) + { + /* System generated locals */ + FloatingType d__1; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + FloatingType ab, df, cs, ct, tb, sm, tn, rt, adf, acs; + int sgn1, sgn2; + FloatingType acmn, acmx; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix */ + /* [ A B ] */ + /* [ B C ]. */ + /* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the */ + /* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right */ + /* eigenvector for RT1, giving the decomposition */ + + /* [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] */ + /* [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. */ + + /* Arguments */ + /* ========= */ + + /* A (input) DOUBLE PRECISION */ + /* The (1,1) element of the 2-by-2 matrix. */ + + /* B (input) DOUBLE PRECISION */ + /* The (1,2) element and the conjugate of the (2,1) element of */ + /* the 2-by-2 matrix. */ + + /* C (input) DOUBLE PRECISION */ + /* The (2,2) element of the 2-by-2 matrix. */ + + /* RT1 (output) DOUBLE PRECISION */ + /* The eigenvalue of larger absolute value. */ + + /* RT2 (output) DOUBLE PRECISION */ + /* The eigenvalue of smaller absolute value. */ + + /* CS1 (output) DOUBLE PRECISION */ + /* SN1 (output) DOUBLE PRECISION */ + /* The vector (CS1, SN1) is a unit right eigenvector for RT1. */ + + /* Further Details */ + /* =============== */ + + /* RT1 is accurate to a few ulps barring over/underflow. */ + + /* RT2 may be inaccurate if there is massive cancellation in the */ + /* determinant A*C-B*B; higher precision or correctly rounded or */ + /* correctly truncated arithmetic would be needed to compute RT2 */ + /* accurately in all cases. */ + + /* CS1 and SN1 are accurate to a few ulps barring over/underflow. */ + + /* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ + /* Underflow is harmless if the input data is 0 or exceeds */ + /* underflow_threshold / macheps. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Compute the eigenvalues */ + + sm = *a + *c__; + df = *a - *c__; + adf = fabs(df); + tb = *b + *b; + ab = fabs(tb); + if (fabs(*a) > fabs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { + /* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { + /* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + + /* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + sgn1 = -1; + + /* Order of execution important. */ + /* To get fully accurate smaller eigenvalue, */ + /* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + sgn1 = 1; + + /* Order of execution important. */ + /* To get fully accurate smaller eigenvalue, */ + /* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + + /* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + sgn1 = 1; + } + + /* Compute the eigenvector */ + + if (df >= 0.) { + cs = df + rt; + sgn2 = 1; + } else { + cs = df - rt; + sgn2 = -1; + } + acs = fabs(cs); + if (acs > ab) { + ct = -tb / cs; + *sn1 = 1. / sqrt(ct * ct + 1.); + *cs1 = ct * *sn1; + } else { + if (ab == 0.) { + *cs1 = 1.; + *sn1 = 0.; + } else { + tn = -cs / tb; + *cs1 = 1. / sqrt(tn * tn + 1.); + *sn1 = tn * *cs1; + } + } + if (sgn1 == sgn2) { + tn = *cs1; + *cs1 = -(*sn1); + *sn1 = tn; + } + return 0; + + /* End of ODEV2 */ + + } /* odev2_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odnan.hpp b/external/pmrrr/include/pmrrr/lapack/odnan.hpp new file mode 100644 index 0000000000..2b3cb50990 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odnan.hpp @@ -0,0 +1,63 @@ +/** + C++ template version of LAPACK routine disnan. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODNAN_HPP__ +#define __ODNAN_HPP__ + +#include +#include +#include +#include +#include +#include + +#include + +namespace pmrrr { namespace lapack { + + template + int odnan(FloatingType *din) + { + /* System generated locals */ + int ret_val; + + /* Local variables */ + //extern int odsnan_(FloatingType *, FloatingType *); + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */ + /* otherwise. To be replaced by the Fortran 2003 intrinsic in the */ + /* future. */ + + /* Arguments */ + /* ========= */ + + /* DIN (input) DOUBLE PRECISION */ + /* Input to test for NaN. */ + + /* ===================================================================== */ + + /* .. External Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + ret_val = odsnan(din, din); + return ret_val; + } /* odnan_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odneg.hpp b/external/pmrrr/include/pmrrr/lapack/odneg.hpp new file mode 100644 index 0000000000..ae5dc62b49 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odneg.hpp @@ -0,0 +1,232 @@ +/** + C++ template version of LAPACK routine dlaneg. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODNEG_HPP__ +#define __ODNEG_HPP__ + +#include +#include +#include +#include +#include +#include + +#include + +#define imax(a,b) ( (a) > (b) ? (a) : (b) ) +#define imin(a,b) ( (a) < (b) ? (a) : (b) ) + +namespace pmrrr { namespace lapack { + + template + int odneg(int *n, FloatingType *d__, FloatingType *lld, FloatingType * + sigma, FloatingType *pivmin, int *r__) + { + /* System generated locals */ + int ret_val, i__1, i__2, i__3, i__4; + + /* Local variables */ + int j; + FloatingType p, t; + int bj; + FloatingType tmp; + int neg1, neg2; + FloatingType bsav, gamma, dplus; + //extern int odnan_(FloatingType *); + int negcnt; + int sawnan; + FloatingType dminus; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODNEG computes the Sturm count, the number of negative pivots */ + /* encountered while factoring tridiagonal T - sigma I = L D L^T. */ + /* This implementation works directly on the factors without forming */ + /* the tridiagonal matrix T. The Sturm count is also the number of */ + /* eigenvalues of T less than sigma. */ + + /* This routine is called from DLARRB. */ + + /* The current routine does not use the PIVMIN parameter but rather */ + /* requires IEEE-754 propagation of Infinities and NaNs. This */ + /* routine also has no input range restrictions but does require */ + /* default exception handling such that x/0 produces Inf when x is */ + /* non-zero, and Inf/Inf produces NaN. For more information, see: */ + + /* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */ + /* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */ + /* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */ + /* (Tech report version in LAWN 172 with the same title.) */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The N diagonal elements of the diagonal matrix D. */ + + /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (N-1) elements L(i)*L(i)*D(i). */ + + /* SIGMA (input) DOUBLE PRECISION */ + /* Shift amount in T - sigma I = L D L^T. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot in the Sturm sequence. May be used */ + /* when zero pivots are encountered on non-IEEE-754 */ + /* architectures. */ + + /* R (input) INT */ + /* The twist index for the twisted factorization that is used */ + /* for the negcount. */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + /* Jason Riedy, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* Some architectures propagate Infinities and NaNs very slowly, so */ + /* the code computes counts in BLKLEN chunks. Then a NaN can */ + /* propagate at most BLKLEN columns before being detected. This is */ + /* not a general tuning parameter; it needs only to be just large */ + /* enough that the overhead is tiny in common cases. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + /* Parameter adjustments */ + --lld; + --d__; + + /* Function Body */ + negcnt = 0; + /* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */ + t = -(*sigma); + i__1 = *r__ - 1; + for (bj = 1; bj <= i__1; bj += 128) { + neg1 = 0; + bsav = t; + /* Computing MIN */ + i__3 = bj + 127, i__4 = *r__ - 1; + i__2 = imin(i__3,i__4); + for (j = bj; j <= i__2; ++j) { + dplus = d__[j] + t; + if (dplus < 0.) { + ++neg1; + } + tmp = t / dplus; + t = tmp * lld[j] - *sigma; + /* L21: */ + } + sawnan = odnan(&t); + /* Run a slower version of the above loop if a NaN is detected. */ + /* A NaN should occur only with a zero pivot after an infinite */ + /* pivot. In that case, substituting 1 for T/DPLUS is the */ + /* correct limit. */ + if (sawnan) { + neg1 = 0; + t = bsav; + /* Computing MIN */ + i__3 = bj + 127, i__4 = *r__ - 1; + i__2 = imin(i__3,i__4); + for (j = bj; j <= i__2; ++j) { + dplus = d__[j] + t; + if (dplus < 0.) { + ++neg1; + } + tmp = t / dplus; + if (odnan(&tmp)) { + tmp = 1.; + } + t = tmp * lld[j] - *sigma; + /* L22: */ + } + } + negcnt += neg1; + /* L210: */ + } + + /* II) lower part: L D L^T - SIGMA I = U- D- U-^T */ + p = d__[*n] - *sigma; + i__1 = *r__; + for (bj = *n - 1; bj >= i__1; bj += -128) { + neg2 = 0; + bsav = p; + /* Computing MAX */ + i__3 = bj - 127; + i__2 = imax(i__3,*r__); + for (j = bj; j >= i__2; --j) { + dminus = lld[j] + p; + if (dminus < 0.) { + ++neg2; + } + tmp = p / dminus; + p = tmp * d__[j] - *sigma; + /* L23: */ + } + sawnan = odnan(&p); + /* As above, run a slower version that substitutes 1 for Inf/Inf. */ + + if (sawnan) { + neg2 = 0; + p = bsav; + /* Computing MAX */ + i__3 = bj - 127; + i__2 = imax(i__3,*r__); + for (j = bj; j >= i__2; --j) { + dminus = lld[j] + p; + if (dminus < 0.) { + ++neg2; + } + tmp = p / dminus; + if (odnan(&tmp)) { + tmp = 1.; + } + p = tmp * d__[j] - *sigma; + /* L24: */ + } + } + negcnt += neg2; + /* L230: */ + } + + /* III) Twist index */ + /* T was shifted by SIGMA initially. */ + gamma = t + *sigma + p; + if (gamma < 0.) { + ++negcnt; + } + ret_val = negcnt; + return ret_val; + } /* odneg_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odnst.hpp b/external/pmrrr/include/pmrrr/lapack/odnst.hpp new file mode 100644 index 0000000000..a1d8cc1817 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odnst.hpp @@ -0,0 +1,177 @@ +/** + C++ template version of LAPACK routine dlanst. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODNST_HPP__ +#define __ODNST_HPP__ + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace pmrrr { namespace lapack { + + template + FloatingType odnst(const char *norm, int *n, FloatingType *d__, FloatingType *e) + { + /* Table of constant values */ + static int c__1 = 1; + + /* System generated locals */ + int i__1; + FloatingType ret_val, d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + int i__; + FloatingType sum, scale; + //extern int olsame(char *, char *); + FloatingType anorm; + //extern /* Subroutine */ int odssq_(int *, FloatingType *, int *, + //FloatingType *, FloatingType *); + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODNST returns the value of the one norm, or the Frobenius norm, or */ + /* the infinity norm, or the element of largest absolute value of a */ + /* real symmetric tridiagonal matrix A. */ + + /* Description */ + /* =========== */ + + /* ODNST returns the value */ + + /* ODNST = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ + /* ( */ + /* ( norm1(A), NORM = '1', 'O' or 'o' */ + /* ( */ + /* ( normI(A), NORM = 'I' or 'i' */ + /* ( */ + /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ + + /* where norm1 denotes the one norm of a matrix (maximum column sum), */ + /* normI denotes the infinity norm of a matrix (maximum row sum) and */ + /* normF denotes the Frobenius norm of a matrix (square root of sum of */ + /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ + + /* Arguments */ + /* ========= */ + + /* NORM (input) CHARACTER*1 */ + /* Specifies the value to be returned in ODNST as described */ + /* above. */ + + /* N (input) INT */ + /* The order of the matrix A. N >= 0. When N = 0, ODNST is */ + /* set to zero. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The diagonal elements of A. */ + + /* E (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (n-1) sub-diagonal or super-diagonal elements of A. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.; + } else if (olsame(norm, "M")) { + + /* Find max(abs(A(i,j))). */ + + anorm = (d__1 = d__[*n], fabs(d__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + /* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = d__[i__], fabs(d__1)); + anorm = fmax(d__2,d__3); + /* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = e[i__], fabs(d__1)); + anorm = fmax(d__2,d__3); + /* L10: */ + } + } else if (olsame(norm, "O") || *(unsigned char *) + norm == '1' || olsame(norm, "I")) { + + /* Find norm1(A). */ + + if (*n == 1) { + anorm = fabs(d__[1]); + } else { + /* Computing MAX */ + d__3 = fabs(d__[1]) + fabs(e[1]), d__4 = (d__1 = e[*n - 1], fabs( + d__1)) + (d__2 = d__[*n], fabs(d__2)); + anorm = fmax(d__3,d__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { + /* Computing MAX */ + d__4 = anorm, d__5 = (d__1 = d__[i__], fabs(d__1)) + (d__2 = e[ + i__], fabs(d__2)) + (d__3 = e[i__ - 1], fabs(d__3)); + anorm = fmax(d__4,d__5); + /* L20: */ + } + } + } else if (olsame(norm, "F") || olsame(norm, "E")) { + + /* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (*n > 1) { + i__1 = *n - 1; + odssq(&i__1, &e[1], &c__1, &scale, &sum); + sum *= 2; + } + odssq(n, &d__[1], &c__1, &scale, &sum); + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + + /* End of ODNST */ + + } /* odnst_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odr1v.hpp b/external/pmrrr/include/pmrrr/lapack/odr1v.hpp new file mode 100644 index 0000000000..462702c6e4 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odr1v.hpp @@ -0,0 +1,457 @@ +/** + C++ template version of LAPACK routine odr1v. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODR1V_HPP__ +#define __ODR1V_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define TRUE_ (1) +#define FALSE_ (0) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odr1v(int *n, int *b1, int *bn, FloatingType + *lambda, FloatingType *d__, FloatingType *l, FloatingType *ld, FloatingType * + lld, FloatingType *pivmin, FloatingType *gaptol, FloatingType *z__, int + *wantnc, int *negcnt, FloatingType *ztz, FloatingType *mingma, + int *r__, int *isuppz, FloatingType *nrminv, FloatingType *resid, + FloatingType *rqcorr, FloatingType *work) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2, d__3; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + int i__; + FloatingType s; + int r1, r2; + FloatingType eps, tmp; + int neg1, neg2, indp, inds; + FloatingType dplus; + // extern FloatingType odmch_(char *); + //extern int odnan_(FloatingType *); + int indlpl, indumn; + FloatingType dminus; + int sawnan1, sawnan2; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODR1V computes the (scaled) r-th column of the inverse of */ + /* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */ + /* L D L^T - sigma I. When sigma is close to an eigenvalue, the */ + /* computed vector is an accurate eigenvector. Usually, r corresponds */ + /* to the index where the eigenvector is largest in magnitude. */ + /* The following steps accomplish this computation : */ + /* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */ + /* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */ + /* (c) Computation of the diagonal elements of the inverse of */ + /* L D L^T - sigma I by combining the above transforms, and choosing */ + /* r as the index where the diagonal of the inverse is (one of the) */ + /* largest in magnitude. */ + /* (d) Computation of the (scaled) r-th column of the inverse using the */ + /* twisted factorization obtained by combining the top part of the */ + /* the stationary and the bottom part of the progressive transform. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix L D L^T. */ + + /* B1 (input) INT */ + /* First index of the submatrix of L D L^T. */ + + /* BN (input) INT */ + /* Last index of the submatrix of L D L^T. */ + + /* LAMBDA (input) DOUBLE PRECISION */ + /* The shift. In order to compute an accurate eigenvector, */ + /* LAMBDA should be a good approximation to an eigenvalue */ + /* of L D L^T. */ + + /* L (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (n-1) subdiagonal elements of the unit bidiagonal matrix */ + /* L, in elements 1 to N-1. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The n diagonal elements of the diagonal matrix D. */ + + /* LD (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The n-1 elements L(i)*D(i). */ + + /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The n-1 elements L(i)*L(i)*D(i). */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot in the Sturm sequence. */ + + /* GAPTOL (input) DOUBLE PRECISION */ + /* Tolerance that indicates when eigenvector entries are negligible */ + /* w.r.t. their contribution to the residual. */ + + /* Z (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On input, all entries of Z must be set to 0. */ + /* On output, Z contains the (scaled) r-th column of the */ + /* inverse. The scaling is such that Z(R) equals 1. */ + + /* WANTNC (input) INT */ + /* Specifies whether NEGCNT has to be computed. */ + + /* NEGCNT (output) INT */ + /* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */ + /* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */ + + /* ZTZ (output) DOUBLE PRECISION */ + /* The square of the 2-norm of Z. */ + + /* MINGMA (output) DOUBLE PRECISION */ + /* The reciprocal of the largest (in magnitude) diagonal */ + /* element of the inverse of L D L^T - sigma I. */ + + /* R (input/output) INT */ + /* The twist index for the twisted factorization used to */ + /* compute Z. */ + /* On input, 0 <= R <= N. If R is input as 0, R is set to */ + /* the index where (L D L^T - sigma I)^{-1} is largest */ + /* in magnitude. If 1 <= R <= N, R is unchanged. */ + /* On output, R contains the twist index used to compute Z. */ + /* Ideally, R designates the position of the maximum entry in the */ + /* eigenvector. */ + + /* ISUPPZ (output) INT array, dimension (2) */ + /* The support of the vector in Z, i.e., the vector Z is */ + /* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */ + + /* NRMINV (output) DOUBLE PRECISION */ + /* NRMINV = 1/SQRT( ZTZ ) */ + + /* RESID (output) DOUBLE PRECISION */ + /* The residual of the FP vector. */ + /* RESID = ABS( MINGMA )/SQRT( ZTZ ) */ + + /* RQCORR (output) DOUBLE PRECISION */ + /* The Rayleigh Quotient correction to LAMBDA. */ + /* RQCORR = MINGMA*TMP */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --isuppz; + --z__; + --lld; + --ld; + --l; + --d__; + + /* Function Body */ + eps = std::numeric_limits::epsilon(); // eps = odmch_("Precision"); + if (*r__ == 0) { + r1 = *b1; + r2 = *bn; + } else { + r1 = *r__; + r2 = *r__; + } + /* Storage for LPLUS */ + indlpl = 0; + /* Storage for UMINUS */ + indumn = *n; + inds = (*n << 1) + 1; + indp = *n * 3 + 1; + if (*b1 == 1) { + work[inds] = 0.; + } else { + work[inds + *b1 - 1] = lld[*b1 - 1]; + } + + /* Compute the stationary transform (using the differential form) */ + /* until the index R2. */ + + sawnan1 = FALSE_; + neg1 = 0; + s = work[inds + *b1 - 1] - *lambda; + i__1 = r1 - 1; + for (i__ = *b1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + work[indlpl + i__] = ld[i__] / dplus; + if (dplus < 0.) { + ++neg1; + } + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + s = work[inds + i__] - *lambda; + /* L50: */ + } + sawnan1 = odnan(&s); + if (sawnan1) { + goto L60; + } + i__1 = r2 - 1; + for (i__ = r1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + work[indlpl + i__] = ld[i__] / dplus; + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + s = work[inds + i__] - *lambda; + /* L51: */ + } + sawnan1 = odnan(&s); + + L60: + if (sawnan1) { + /* Runs a slower version of the above loop if a NaN is detected */ + neg1 = 0; + s = work[inds + *b1 - 1] - *lambda; + i__1 = r1 - 1; + for (i__ = *b1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + if (fabs(dplus) < *pivmin) { + dplus = -(*pivmin); + } + work[indlpl + i__] = ld[i__] / dplus; + if (dplus < 0.) { + ++neg1; + } + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + if (work[indlpl + i__] == 0.) { + work[inds + i__] = lld[i__]; + } + s = work[inds + i__] - *lambda; + /* L70: */ + } + i__1 = r2 - 1; + for (i__ = r1; i__ <= i__1; ++i__) { + dplus = d__[i__] + s; + if (fabs(dplus) < *pivmin) { + dplus = -(*pivmin); + } + work[indlpl + i__] = ld[i__] / dplus; + work[inds + i__] = s * work[indlpl + i__] * l[i__]; + if (work[indlpl + i__] == 0.) { + work[inds + i__] = lld[i__]; + } + s = work[inds + i__] - *lambda; + /* L71: */ + } + } + + /* Compute the progressive transform (using the differential form) */ + /* until the index R1 */ + + sawnan2 = FALSE_; + neg2 = 0; + work[indp + *bn - 1] = d__[*bn] - *lambda; + i__1 = r1; + for (i__ = *bn - 1; i__ >= i__1; --i__) { + dminus = lld[i__] + work[indp + i__]; + tmp = d__[i__] / dminus; + if (dminus < 0.) { + ++neg2; + } + work[indumn + i__] = l[i__] * tmp; + work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; + /* L80: */ + } + tmp = work[indp + r1 - 1]; + sawnan2 = odnan(&tmp); + if (sawnan2) { + /* Runs a slower version of the above loop if a NaN is detected */ + neg2 = 0; + i__1 = r1; + for (i__ = *bn - 1; i__ >= i__1; --i__) { + dminus = lld[i__] + work[indp + i__]; + if (fabs(dminus) < *pivmin) { + dminus = -(*pivmin); + } + tmp = d__[i__] / dminus; + if (dminus < 0.) { + ++neg2; + } + work[indumn + i__] = l[i__] * tmp; + work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; + if (tmp == 0.) { + work[indp + i__ - 1] = d__[i__] - *lambda; + } + /* L100: */ + } + } + + /* Find the index (from R1 to R2) of the largest (in magnitude) */ + /* diagonal element of the inverse */ + + *mingma = work[inds + r1 - 1] + work[indp + r1 - 1]; + if (*mingma < 0.) { + ++neg1; + } + if (*wantnc) { + *negcnt = neg1 + neg2; + } else { + *negcnt = -1; + } + if (fabs(*mingma) == 0.) { + *mingma = eps * work[inds + r1 - 1]; + } + *r__ = r1; + i__1 = r2 - 1; + for (i__ = r1; i__ <= i__1; ++i__) { + tmp = work[inds + i__] + work[indp + i__]; + if (tmp == 0.) { + tmp = eps * work[inds + i__]; + } + if (fabs(tmp) <= fabs(*mingma)) { + *mingma = tmp; + *r__ = i__ + 1; + } + /* L110: */ + } + + /* Compute the FP vector: solve N^T v = e_r */ + + isuppz[1] = *b1; + isuppz[2] = *bn; + z__[*r__] = 1.; + *ztz = 1.; + + /* Compute the FP vector upwards from R */ + + if (! sawnan1 && ! sawnan2) { + i__1 = *b1; + for (i__ = *r__ - 1; i__ >= i__1; --i__) { + z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); + if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( + d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { + z__[i__] = 0.; + isuppz[1] = i__ + 1; + goto L220; + } + *ztz += z__[i__] * z__[i__]; + /* L210: */ + } + L220: + ; + } else { + /* Run slower loop if NaN occurred. */ + i__1 = *b1; + for (i__ = *r__ - 1; i__ >= i__1; --i__) { + if (z__[i__ + 1] == 0.) { + z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2]; + } else { + z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); + } + if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( + d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { + z__[i__] = 0.; + isuppz[1] = i__ + 1; + goto L240; + } + *ztz += z__[i__] * z__[i__]; + /* L230: */ + } + L240: + ; + } + /* Compute the FP vector downwards from R in blocks of size BLKSIZ */ + if (! sawnan1 && ! sawnan2) { + i__1 = *bn - 1; + for (i__ = *r__; i__ <= i__1; ++i__) { + z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); + if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( + d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { + z__[i__ + 1] = 0.; + isuppz[2] = i__; + goto L260; + } + *ztz += z__[i__ + 1] * z__[i__ + 1]; + /* L250: */ + } + L260: + ; + } else { + /* Run slower loop if NaN occurred. */ + i__1 = *bn - 1; + for (i__ = *r__; i__ <= i__1; ++i__) { + if (z__[i__] == 0.) { + z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1]; + } else { + z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); + } + if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( + d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { + z__[i__ + 1] = 0.; + isuppz[2] = i__; + goto L280; + } + *ztz += z__[i__ + 1] * z__[i__ + 1]; + /* L270: */ + } + L280: + ; + } + + /* Compute quantities for convergence test */ + + tmp = 1. / *ztz; + *nrminv = sqrt(tmp); + *resid = fabs(*mingma) * *nrminv; + *rqcorr = *mingma * tmp; + + + return 0; + + /* End of ODR1V */ + + } /* odr1v_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrnv.hpp b/external/pmrrr/include/pmrrr/lapack/odrnv.hpp new file mode 100644 index 0000000000..b271e06553 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrnv.hpp @@ -0,0 +1,160 @@ +/** + C++ template version of LAPACK routine dlarnv. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRNV_HPP__ +#define __ODRNV_HPP__ + +#include +#include +#include +#include +#include +#include + +#include + +#define imin(a,b) ( (a) < (b) ? (a) : (b) ) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrnv(int *idist, int *iseed, int *n, + FloatingType *x) + { + /* System generated locals */ + int i__1, i__2, i__3; + + /* Builtin functions */ + // FloatingType log(FloatingType), sqrt(FloatingType), cos(FloatingType); + + /* Local variables */ + int i__; + FloatingType u[128]; + int il, iv, il2; + //extern int odruv_(int *, int *, FloatingType *); + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODRNV returns a vector of n random real numbers from a uniform or */ + /* normal distribution. */ + + /* Arguments */ + /* ========= */ + + /* IDIST (input) INT */ + /* Specifies the distribution of the random numbers: */ + /* = 1: uniform (0,1) */ + /* = 2: uniform (-1,1) */ + /* = 3: normal (0,1) */ + + /* ISEED (input/output) INT array, dimension (4) */ + /* On entry, the seed of the random number generator; the array */ + /* elements must be between 0 and 4095, and ISEED(4) must be */ + /* odd. */ + /* On exit, the seed is updated. */ + + /* N (input) INT */ + /* The number of random numbers to be generated. */ + + /* X (output) DOUBLE PRECISION array, dimension (N) */ + /* The generated random numbers. */ + + /* Further Details */ + /* =============== */ + + /* This routine calls the auxiliary routine ODRUV to generate random */ + /* real numbers from a uniform (0,1) distribution, in batches of up to */ + /* 128 using vectorisable code. The Box-Muller method is used to */ + /* transform numbers from a uniform to a normal distribution. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Local Arrays .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + --iseed; + + /* Function Body */ + i__1 = *n; + for (iv = 1; iv <= i__1; iv += 64) { + /* Computing MIN */ + i__2 = 64, i__3 = *n - iv + 1; + il = imin(i__2,i__3); + if (*idist == 3) { + il2 = il << 1; + } else { + il2 = il; + } + + /* Call ODRUV to generate IL2 numbers from a uniform (0,1) */ + /* distribution (IL2 <= LV) */ + + odruv(&iseed[1], &il2, u); + + if (*idist == 1) { + + /* Copy generated numbers */ + + i__2 = il; + for (i__ = 1; i__ <= i__2; ++i__) { + x[iv + i__ - 1] = u[i__ - 1]; + /* L10: */ + } + } else if (*idist == 2) { + + /* Convert generated numbers to uniform (-1,1) distribution */ + + i__2 = il; + for (i__ = 1; i__ <= i__2; ++i__) { + x[iv + i__ - 1] = u[i__ - 1] * 2. - 1.; + /* L20: */ + } + } else if (*idist == 3) { + + /* Convert generated numbers to normal (0,1) distribution */ + + i__2 = il; + for (i__ = 1; i__ <= i__2; ++i__) { + x[iv + i__ - 1] = sqrt(log(u[(i__ << 1) - 2]) * -2.) * cos(u[( + i__ << 1) - 1] * 6.2831853071795864769252867663); + /* L30: */ + } + } + /* L40: */ + } + return 0; + + /* End of ODRNV */ + + } /* odrnv_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrra.hpp b/external/pmrrr/include/pmrrr/lapack/odrra.hpp new file mode 100644 index 0000000000..5028cc3a04 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrra.hpp @@ -0,0 +1,165 @@ +/** + C++ template version of LAPACK routine dlarra. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRA_HPP__ +#define __ODRRA_HPP__ + +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrra(int *n, FloatingType *d__, FloatingType *e, + FloatingType *e2, FloatingType *spltol, FloatingType *tnrm, int *nsplit, + int *isplit, int *info) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + int i__; + FloatingType tmp1, eabs; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* Compute the splitting points with threshold SPLTOL. */ + /* ODRRA sets any "small" off-diagonal elements to zero. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix. N > 0. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the N diagonal elements of the tridiagonal */ + /* matrix T. */ + + /* E (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the first (N-1) entries contain the subdiagonal */ + /* elements of the tridiagonal matrix T; E(N) need not be set. */ + /* On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, */ + /* are set to zero, the other entries of E are untouched. */ + + /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the first (N-1) entries contain the SQUARES of the */ + /* subdiagonal elements of the tridiagonal matrix T; */ + /* E2(N) need not be set. */ + /* On exit, the entries E2( ISPLIT( I ) ), */ + /* 1 <= I <= NSPLIT, have been set to zero */ + + /* SPLTOL (input) DOUBLE PRECISION */ + /* The threshold for splitting. Two criteria can be used: */ + /* SPLTOL<0 : criterion based on absolute off-diagonal value */ + /* SPLTOL>0 : criterion that preserves relative accuracy */ + + /* TNRM (input) DOUBLE PRECISION */ + /* The norm of the matrix. */ + + /* NSPLIT (output) INT */ + /* The number of blocks T splits into. 1 <= NSPLIT <= N. */ + + /* ISPLIT (output) INT array, dimension (N) */ + /* The splitting points, at which T breaks up into blocks. */ + /* The first block consists of rows/columns 1 to ISPLIT(1), */ + /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ + /* etc., and the NSPLIT-th consists of rows/columns */ + /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ + + + /* INFO (output) INT */ + /* = 0: successful exit */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --isplit; + --e2; + --e; + --d__; + + /* Function Body */ + *info = 0; + /* Compute splitting points */ + *nsplit = 1; + if (*spltol < 0.) { + /* Criterion based on absolute off-diagonal value */ + tmp1 = fabs(*spltol) * *tnrm; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + eabs = (d__1 = e[i__], fabs(d__1)); + if (eabs <= tmp1) { + e[i__] = 0.; + e2[i__] = 0.; + isplit[*nsplit] = i__; + ++(*nsplit); + } + /* L9: */ + } + } else { + /* Criterion that guarantees relative accuracy */ + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + eabs = (d__1 = e[i__], fabs(d__1)); + if (eabs <= *spltol * sqrt((d__1 = d__[i__], fabs(d__1))) * sqrt(( + d__2 = d__[i__ + 1], fabs(d__2)))) { + e[i__] = 0.; + e2[i__] = 0.; + isplit[*nsplit] = i__; + ++(*nsplit); + } + /* L10: */ + } + } + isplit[*nsplit] = *n; + return 0; + + /* End of ODRRA */ + + } /* odrra_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrb.hpp b/external/pmrrr/include/pmrrr/lapack/odrrb.hpp new file mode 100644 index 0000000000..f01ff3d1d2 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrb.hpp @@ -0,0 +1,361 @@ +/** + C++ template version of LAPACK routine dlarrb. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRB_HPP__ +#define __ODRRB_HPP__ + +#include +#include +#include +#include +#include +#include + +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrrb(int *n, FloatingType *d__, FloatingType *lld, + int *ifirst, int *ilast, FloatingType *rtol1, FloatingType *rtol2, + int *offset, FloatingType *w, FloatingType *wgap, FloatingType *werr, + FloatingType *work, int *iwork, FloatingType *pivmin, FloatingType * + spdiam, int *twist, int *info) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType log(FloatingType); + + /* Local variables */ + int i__, k, r__, i1, ii, ip; + FloatingType gap, mid, tmp, back, lgap, rgap, left; + int iter, nint, prev, next; + FloatingType cvrgd, right, width; + //extern int odneg_(int *, FloatingType *, FloatingType *, FloatingType * + //, FloatingType *, int *); + int negcnt; + FloatingType mnwdth; + int olnint, maxitr; + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* Given the relatively robust representation(RRR) L D L^T, ODRRB */ + /* does "limited" bisection to refine the eigenvalues of L D L^T, */ + /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ + /* guesses for these eigenvalues are input in W, the corresponding estimate */ + /* of the error in these guesses and their gaps are input in WERR */ + /* and WGAP, respectively. During bisection, intervals */ + /* [left, right] are maintained by storing their mid-points and */ + /* semi-widths in the arrays W and WERR respectively. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The N diagonal elements of the diagonal matrix D. */ + + /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (N-1) elements L(i)*L(i)*D(i). */ + + /* IFIRST (input) INT */ + /* The index of the first eigenvalue to be computed. */ + + /* ILAST (input) INT */ + /* The index of the last eigenvalue to be computed. */ + + /* RTOL1 (input) DOUBLE PRECISION */ + /* RTOL2 (input) DOUBLE PRECISION */ + /* Tolerance for the convergence of the bisection intervals. */ + /* An interval [LEFT,RIGHT] has converged if */ + /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ + /* where GAP is the (estimated) distance to the nearest */ + /* eigenvalue. */ + + /* OFFSET (input) INT */ + /* Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */ + /* through ILAST-OFFSET elements of these arrays are to be used. */ + + /* W (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ + /* estimates of the eigenvalues of L D L^T indexed IFIRST throug */ + /* ILAST. */ + /* On output, these estimates are refined. */ + + /* WGAP (input/output) DOUBLE PRECISION array, dimension (N-1) */ + /* On input, the (estimated) gaps between consecutive */ + /* eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */ + /* eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */ + /* then WGAP(IFIRST-OFFSET) must be set to ZERO. */ + /* On output, these gaps are refined. */ + + /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ + /* the errors in the estimates of the corresponding elements in W. */ + /* On output, these errors are refined. */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + /* Workspace. */ + + /* IWORK (workspace) INT array, dimension (2*N) */ + /* Workspace. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot in the Sturm sequence. */ + + /* SPDIAM (input) DOUBLE PRECISION */ + /* The spectral diameter of the matrix. */ + + /* TWIST (input) INT */ + /* The twist index for the twisted factorization that is used */ + /* for the negcount. */ + /* TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */ + /* TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */ + /* TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */ + + /* INFO (output) INT */ + /* Error flag. */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --werr; + --wgap; + --w; + --lld; + --d__; + + /* Function Body */ + *info = 0; + + maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + + 2; + mnwdth = *pivmin * 2.; + + r__ = *twist; + if (r__ < 1 || r__ > *n) { + r__ = *n; + } + + /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ + /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ + /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */ + /* for an unconverged interval is set to the index of the next unconverged */ + /* interval, and is -1 or 0 for a converged interval. Thus a linked */ + /* list of unconverged intervals is set up. */ + + i1 = *ifirst; + /* The number of unconverged intervals */ + nint = 0; + /* The last unconverged interval found */ + prev = 0; + rgap = wgap[i1 - *offset]; + i__1 = *ilast; + for (i__ = i1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; + left = w[ii] - werr[ii]; + right = w[ii] + werr[ii]; + lgap = rgap; + rgap = wgap[ii]; + gap = fmin(lgap,rgap); + /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ + /* Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */ + + /* Do while( NEGCNT(LEFT).GT.I-1 ) */ + + back = werr[ii]; + L20: + negcnt = odneg(n, &d__[1], &lld[1], &left, pivmin, &r__); + if (negcnt > i__ - 1) { + left -= back; + back *= 2.; + goto L20; + } + + /* Do while( NEGCNT(RIGHT).LT.I ) */ + /* Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */ + + back = werr[ii]; + L50: + negcnt = odneg(n, &d__[1], &lld[1], &right, pivmin, &r__); + if (negcnt < i__) { + right += back; + back *= 2.; + goto L50; + } + width = (d__1 = left - right, fabs(d__1)) * .5; + /* Computing MAX */ + d__1 = fabs(left), d__2 = fabs(right); + tmp = fmax(d__1,d__2); + /* Computing MAX */ + d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; + cvrgd = fmax(d__1,d__2); + if (width <= cvrgd || width <= mnwdth) { + /* This interval has already converged and does not need refinement. */ + /* (Note that the gaps might change through refining the */ + /* eigenvalues, however, they can only get bigger.) */ + /* Remove it from the list. */ + iwork[k - 1] = -1; + /* Make sure that I1 always points to the first unconverged interval */ + if (i__ == i1 && i__ < *ilast) { + i1 = i__ + 1; + } + if (prev >= i1 && i__ <= *ilast) { + iwork[(prev << 1) - 1] = i__ + 1; + } + } else { + /* unconverged interval found */ + prev = i__; + ++nint; + iwork[k - 1] = i__ + 1; + iwork[k] = negcnt; + } + work[k - 1] = left; + work[k] = right; + /* L75: */ + } + + /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ + /* and while (ITER.LT.MAXITR) */ + + iter = 0; + L80: + prev = i1 - 1; + i__ = i1; + olnint = nint; + i__1 = olnint; + for (ip = 1; ip <= i__1; ++ip) { + k = i__ << 1; + ii = i__ - *offset; + rgap = wgap[ii]; + lgap = rgap; + if (ii > 1) { + lgap = wgap[ii - 1]; + } + gap = fmin(lgap,rgap); + next = iwork[k - 1]; + left = work[k - 1]; + right = work[k]; + mid = (left + right) * .5; + /* semiwidth of interval */ + width = right - mid; + /* Computing MAX */ + d__1 = fabs(left), d__2 = fabs(right); + tmp = fmax(d__1,d__2); + /* Computing MAX */ + d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; + cvrgd = fmax(d__1,d__2); + if (width <= cvrgd || width <= mnwdth || iter == maxitr) { + /* reduce number of unconverged intervals */ + --nint; + /* Mark interval as converged. */ + iwork[k - 1] = 0; + if (i1 == i__) { + i1 = next; + } else { + /* Prev holds the last unconverged interval previously examined */ + if (prev >= i1) { + iwork[(prev << 1) - 1] = next; + } + } + i__ = next; + goto L100; + } + prev = i__; + + /* Perform one bisection step */ + + negcnt = odneg(n, &d__[1], &lld[1], &mid, pivmin, &r__); + if (negcnt <= i__ - 1) { + work[k - 1] = mid; + } else { + work[k] = mid; + } + i__ = next; + L100: + ; + } + ++iter; + /* do another loop if there are still unconverged intervals */ + /* However, in the last iteration, all intervals are accepted */ + /* since this is the best we can do. */ + if (nint > 0 && iter <= maxitr) { + goto L80; + } + + + /* At this point, all the intervals have converged */ + i__1 = *ilast; + for (i__ = *ifirst; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; + /* All intervals marked by '0' have been refined. */ + if (iwork[k - 1] == 0) { + w[ii] = (work[k - 1] + work[k]) * .5; + werr[ii] = work[k] - w[ii]; + } + /* L110: */ + } + + i__1 = *ilast; + for (i__ = *ifirst + 1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; + /* Computing MAX */ + d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1]; + wgap[ii - 1] = fmax(d__1,d__2); + /* L111: */ + } + return 0; + + /* End of ODRRB */ + + } /* odrrb_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrc.hpp b/external/pmrrr/include/pmrrr/lapack/odrrc.hpp new file mode 100644 index 0000000000..26bafcde38 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrc.hpp @@ -0,0 +1,194 @@ +/** + C++ template version of LAPACK routine dlarrc. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRC_HPP__ +#define __ODRRC_HPP__ + +#include +#include +#include +#include +#include +#include + +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrrc(const char *jobt, int *n, FloatingType *vl, + FloatingType *vu, FloatingType *d__, FloatingType *e, FloatingType *pivmin, + int *eigcnt, int *lcnt, int *rcnt, int *info) + { + /* System generated locals */ + int i__1; + FloatingType d__1; + + /* Local variables */ + int i__; + FloatingType sl, su, tmp, tmp2; + int matt; + FloatingType lpivot, rpivot; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* Find the number of eigenvalues of the symmetric tridiagonal matrix T */ + /* that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T */ + /* if JOBT = 'L'. */ + + /* Arguments */ + /* ========= */ + + /* JOBT (input) CHARACTER*1 */ + /* = 'T': Compute Sturm count for matrix T. */ + /* = 'L': Compute Sturm count for matrix L D L^T. */ + + /* N (input) INT */ + /* The order of the matrix. N > 0. */ + + /* VL (input) DOUBLE PRECISION */ + /* VU (input) DOUBLE PRECISION */ + /* The lower and upper bounds for the eigenvalues. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. */ + /* JOBT = 'L': The N diagonal elements of the diagonal matrix D. */ + + /* E (input) DOUBLE PRECISION array, dimension (N) */ + /* JOBT = 'T': The N-1 offdiagonal elements of the matrix T. */ + /* JOBT = 'L': The N-1 offdiagonal elements of the matrix L. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot in the Sturm sequence for T. */ + + /* EIGCNT (output) INT */ + /* The number of eigenvalues of the symmetric tridiagonal matrix T */ + /* that are in the interval (VL,VU] */ + + /* LCNT (output) INT */ + /* RCNT (output) INT */ + /* The left and right negcounts of the interval. */ + + /* INFO (output) INT */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + *lcnt = 0; + *rcnt = 0; + *eigcnt = 0; + matt = olsame(jobt, "T"); + if (matt) { + /* Sturm sequence count on T */ + lpivot = d__[1] - *vl; + rpivot = d__[1] - *vu; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + /* Computing 2nd power */ + d__1 = e[i__]; + tmp = d__1 * d__1; + lpivot = d__[i__ + 1] - *vl - tmp / lpivot; + rpivot = d__[i__ + 1] - *vu - tmp / rpivot; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } + /* L10: */ + } + } else { + /* Sturm sequence count on L D L^T */ + sl = -(*vl); + su = -(*vu); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + lpivot = d__[i__] + sl; + rpivot = d__[i__] + su; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } + tmp = e[i__] * d__[i__] * e[i__]; + + tmp2 = tmp / lpivot; + if (tmp2 == 0.) { + sl = tmp - *vl; + } else { + sl = sl * tmp2 - *vl; + } + + tmp2 = tmp / rpivot; + if (tmp2 == 0.) { + su = tmp - *vu; + } else { + su = su * tmp2 - *vu; + } + /* L20: */ + } + lpivot = d__[*n] + sl; + rpivot = d__[*n] + su; + if (lpivot <= 0.) { + ++(*lcnt); + } + if (rpivot <= 0.) { + ++(*rcnt); + } + } + *eigcnt = *rcnt - *lcnt; + return 0; + + /* end of ODRRC */ + + } /* odrrc_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrd.hpp b/external/pmrrr/include/pmrrr/lapack/odrrd.hpp new file mode 100644 index 0000000000..a803eb6c67 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrd.hpp @@ -0,0 +1,804 @@ +/** + C++ template version of LAPACK routine dlarrd. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRD_HPP__ +#define __ODRRD_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define imax(a,b) ( (a) > (b) ? (a) : (b) ) +#define imin(a,b) ( (a) < (b) ? (a) : (b) ) +#define TRUE_ (1) +#define FALSE_ (0) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrrd(const char *range, const char *order, int *n, FloatingType *vl, + FloatingType *vu, int *il, int *iu, FloatingType *gers, + FloatingType *reltol, FloatingType *d__, FloatingType *e, FloatingType *e2, + FloatingType *pivmin, int *nsplit, int *isplit, int *m, + FloatingType *w, FloatingType *werr, FloatingType *wl, FloatingType *wu, + int *iblock, int *indexw, FloatingType *work, int *iwork, + int *info) + { + /* Table of constant values */ + static int c__1 = 1; + static int c_n1 = -1; + static int c__3 = 3; + static int c__2 = 2; + static int c__0 = 0; + /* System generated locals */ + int i__1, i__2, i__3; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType log(FloatingType); + + /* Local variables */ + int i__, j, ib, ie, je, nb; + FloatingType gl; + int im, in; + FloatingType gu; + int iw, jee; + FloatingType eps; + int nwl; + FloatingType wlu, wul; + int nwu; + FloatingType tmp1, tmp2; + int iend, jblk, ioff, iout, itmp1, itmp2, jdisc; + int iinfo; + FloatingType atoli; + int iwoff, itmax; + FloatingType wkill, rtoli, uflow, tnorm; + // extern FloatingType dlamch_(char *); + int ibegin; + int irange, idiscl, idumma[1]; + /* extern int ilaenv_(int *, char *, char *, int *, int *, */ + /* int *, int *); */ + int idiscu; + int ncnvrg, toofew; + + + /* -- LAPACK auxiliary routine (version 3.2.1) -- */ + /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ + /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + /* -- April 2009 -- */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODRRD computes the eigenvalues of a symmetric tridiagonal */ + /* matrix T to suitable accuracy. This is an auxiliary code to be */ + /* called from DSTEMR. */ + /* The user may ask for all eigenvalues, all eigenvalues */ + /* in the half-open interval (VL, VU], or the IL-th through IU-th */ + /* eigenvalues. */ + + /* To avoid overflow, the matrix must be scaled so that its */ + /* largest element is no greater than overflow**(1/2) * */ + /* underflow**(1/4) in absolute value, and for greatest */ + /* accuracy, it should not be much smaller than that. */ + + /* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ + /* Matrix", Report CS41, Computer Science Dept., Stanford */ + /* University, July 21, 1966. */ + + /* Arguments */ + /* ========= */ + + /* RANGE (input) CHARACTER */ + /* = 'A': ("All") all eigenvalues will be found. */ + /* = 'V': ("Value") all eigenvalues in the half-open interval */ + /* (VL, VU] will be found. */ + /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ + /* entire matrix) will be found. */ + + /* ORDER (input) CHARACTER */ + /* = 'B': ("By Block") the eigenvalues will be grouped by */ + /* split-off block (see IBLOCK, ISPLIT) and */ + /* ordered from smallest to largest within */ + /* the block. */ + /* = 'E': ("Entire matrix") */ + /* the eigenvalues for the entire matrix */ + /* will be ordered from smallest to */ + /* largest. */ + + /* N (input) INT */ + /* The order of the tridiagonal matrix T. N >= 0. */ + + /* VL (input) DOUBLE PRECISION */ + /* VU (input) DOUBLE PRECISION */ + /* If RANGE='V', the lower and upper bounds of the interval to */ + /* be searched for eigenvalues. Eigenvalues less than or equal */ + /* to VL, or greater than VU, will not be returned. VL < VU. */ + /* Not referenced if RANGE = 'A' or 'I'. */ + + /* IL (input) INT */ + /* IU (input) INT */ + /* If RANGE='I', the indices (in ascending order) of the */ + /* smallest and largest eigenvalues to be returned. */ + /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ + /* Not referenced if RANGE = 'A' or 'V'. */ + + /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ + /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ + /* is (GERS(2*i-1), GERS(2*i)). */ + + /* RELTOL (input) DOUBLE PRECISION */ + /* The minimum relative width of an interval. When an interval */ + /* is narrower than RELTOL times the larger (in */ + /* magnitude) endpoint, then it is considered to be */ + /* sufficiently small, i.e., converged. Note: this should */ + /* always be at least radix*machine epsilon. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The n diagonal elements of the tridiagonal matrix T. */ + + /* E (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (n-1) off-diagonal elements of the tridiagonal matrix T. */ + + /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot allowed in the Sturm sequence for T. */ + + /* NSPLIT (input) INT */ + /* The number of diagonal blocks in the matrix T. */ + /* 1 <= NSPLIT <= N. */ + + /* ISPLIT (input) INT array, dimension (N) */ + /* The splitting points, at which T breaks up into submatrices. */ + /* The first submatrix consists of rows/columns 1 to ISPLIT(1), */ + /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ + /* etc., and the NSPLIT-th consists of rows/columns */ + /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ + /* (Only the first NSPLIT elements will actually be used, but */ + /* since the user cannot know a priori what value NSPLIT will */ + /* have, N words must be reserved for ISPLIT.) */ + + /* M (output) INT */ + /* The actual number of eigenvalues found. 0 <= M <= N. */ + /* (See also the description of INFO=2,3.) */ + + /* W (output) DOUBLE PRECISION array, dimension (N) */ + /* On exit, the first M elements of W will contain the */ + /* eigenvalue approximations. ODRRD computes an interval */ + /* I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue */ + /* approximation is given as the interval midpoint */ + /* W(j)= ( a_j + b_j)/2. The corresponding error is bounded by */ + /* WERR(j) = abs( a_j - b_j)/2 */ + + /* WERR (output) DOUBLE PRECISION array, dimension (N) */ + /* The error bound on the corresponding eigenvalue approximation */ + /* in W. */ + + /* WL (output) DOUBLE PRECISION */ + /* WU (output) DOUBLE PRECISION */ + /* The interval (WL, WU] contains all the wanted eigenvalues. */ + /* If RANGE='V', then WL=VL and WU=VU. */ + /* If RANGE='A', then WL and WU are the global Gerschgorin bounds */ + /* on the spectrum. */ + /* If RANGE='I', then WL and WU are computed by ODEBZ from the */ + /* index range specified. */ + + /* IBLOCK (output) INT array, dimension (N) */ + /* At each row/column j where E(j) is zero or small, the */ + /* matrix T is considered to split into a block diagonal */ + /* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which */ + /* block (from 1 to the number of blocks) the eigenvalue W(i) */ + /* belongs. (ODRRD may use the remaining N-M elements as */ + /* workspace.) */ + + /* INDEXW (output) INT array, dimension (N) */ + /* The indices of the eigenvalues within each block (submatrix); */ + /* for example, INDEXW(i)= j and IBLOCK(i)=k imply that the */ + /* i-th eigenvalue W(i) is the j-th eigenvalue in block k. */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ + + /* IWORK (workspace) INT array, dimension (3*N) */ + + /* INFO (output) INT */ + /* = 0: successful exit */ + /* < 0: if INFO = -i, the i-th argument had an illegal value */ + /* > 0: some or all of the eigenvalues failed to converge or */ + /* were not computed: */ + /* =1 or 3: Bisection failed to converge for some */ + /* eigenvalues; these eigenvalues are flagged by a */ + /* negative block number. The effect is that the */ + /* eigenvalues may not be as accurate as the */ + /* absolute and relative tolerances. This is */ + /* generally caused by unexpectedly inaccurate */ + /* arithmetic. */ + /* =2 or 3: RANGE='I' only: Not all of the eigenvalues */ + /* IL:IU were found. */ + /* Effect: M < IU+1-IL */ + /* Cause: non-monotonic arithmetic, causing the */ + /* Sturm sequence to be non-monotonic. */ + /* Cure: recalculate, using RANGE='A', and pick */ + /* out eigenvalues IL:IU. In some cases, */ + /* increasing the PARAMETER "FUDGE" may */ + /* make things work. */ + /* = 4: RANGE='I', and the Gershgorin interval */ + /* initially used was too small. No eigenvalues */ + /* were computed. */ + /* Probable cause: your machine has sloppy */ + /* floating-point arithmetic. */ + /* Cure: Increase the PARAMETER "FUDGE", */ + /* recompile, and try again. */ + + /* Internal Parameters */ + /* =================== */ + + /* FUDGE DOUBLE PRECISION, default = 2 */ + /* A "fudge factor" to widen the Gershgorin intervals. Ideally, */ + /* a value of 1 should work, but on machines with sloppy */ + /* arithmetic, this needs to be larger. The default for */ + /* publicly released versions should be large enough to handle */ + /* the worst machine around. Note that this has no effect */ + /* on accuracy of the solution. */ + + /* Based on contributions by */ + /* W. Kahan, University of California, Berkeley, USA */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Local Arrays .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --indexw; + --iblock; + --werr; + --w; + --isplit; + --e2; + --e; + --d__; + --gers; + + /* Function Body */ + *info = 0; + + /* Decode RANGE */ + + if (olsame(range, "A")) { + irange = 1; + } else if (olsame(range, "V")) { + irange = 2; + } else if (olsame(range, "I")) { + irange = 3; + } else { + irange = 0; + } + + /* Check for Errors */ + + if (irange <= 0) { + *info = -1; + } else if (! (olsame(order, "B") || olsame(order, + "E"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (irange == 2) { + if (*vl >= *vu) { + *info = -5; + } + } else if (irange == 3 && (*il < 1 || *il > imax(1,*n))) { + *info = -6; + } else if (irange == 3 && (*iu < imin(*n,*il) || *iu > *n)) { + *info = -7; + } + + if (*info != 0) { + return 0; + } + /* Initialize error flags */ + *info = 0; + ncnvrg = FALSE_; + toofew = FALSE_; + /* Quick return if possible */ + *m = 0; + if (*n == 0) { + return 0; + } + /* Simplification: */ + if (irange == 3 && *il == 1 && *iu == *n) { + irange = 1; + } + /* Get machine constants */ + eps = std::numeric_limits::epsilon(); // odmch_("P"); + uflow = std::numeric_limits::min(); // odmch_("U"); + /* Special Case when N=1 */ + /* Treat case of 1x1 matrix for quick return */ + if (*n == 1) { + if (irange == 1 || (irange == 2 && d__[1] > *vl && d__[1] <= *vu) || + (irange == 3 && *il == 1 && *iu == 1)) { + *m = 1; + w[1] = d__[1]; + /* The computation error of the eigenvalue is zero */ + werr[1] = 0.; + iblock[1] = 1; + indexw[1] = 1; + } + return 0; + } + /* NB is the minimum vector length for vector bisection, or 0 */ + /* if only scalar is to be done. */ + nb = 1; // ilaenv_(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1); + if (nb <= 1) { + nb = 0; + } + /* Find global spectral radius */ + gl = d__[1]; + gu = d__[1]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + /* Computing MIN */ + d__1 = gl, d__2 = gers[(i__ << 1) - 1]; + gl = fmin(d__1,d__2); + /* Computing MAX */ + d__1 = gu, d__2 = gers[i__ * 2]; + gu = fmax(d__1,d__2); + /* L5: */ + } + /* Compute global Gerschgorin bounds and spectral diameter */ + /* Computing MAX */ + d__1 = fabs(gl), d__2 = fabs(gu); + tnorm = fmax(d__1,d__2); + gl = gl - tnorm * 2. * eps * *n - *pivmin * 4.; + gu = gu + tnorm * 2. * eps * *n + *pivmin * 4.; + /* [JAN/28/2009] remove the line below since SPDIAM variable not use */ + /* SPDIAM = GU - GL */ + /* Input arguments for ODEBZ: */ + /* The relative tolerance. An interval (a,b] lies within */ + /* "relative tolerance" if b-a < RELTOL*max(|a|,|b|), */ + rtoli = *reltol; + /* Set the absolute tolerance for interval convergence to zero to force */ + /* interval convergence based on relative size of the interval. */ + /* This is dangerous because intervals might not converge when RELTOL is */ + /* small. But at least a very small number should be selected so that for */ + /* strongly graded matrices, the code can get relatively accurate */ + /* eigenvalues. */ + atoli = uflow * 4. + *pivmin * 4.; + if (irange == 3) { + /* RANGE='I': Compute an interval containing eigenvalues */ + /* IL through IU. The initial interval [GL,GU] from the global */ + /* Gerschgorin bounds GL and GU is refined by ODEBZ. */ + itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + + 2; + work[*n + 1] = gl; + work[*n + 2] = gl; + work[*n + 3] = gu; + work[*n + 4] = gu; + work[*n + 5] = gl; + work[*n + 6] = gu; + iwork[1] = -1; + iwork[2] = -1; + iwork[3] = *n + 1; + iwork[4] = *n + 1; + iwork[5] = *il - 1; + iwork[6] = *iu; + + odebz(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, pivmin, & + d__[1], &e[1], &e2[1], &iwork[5], &work[*n + 1], &work[*n + 5] + , &iout, &iwork[1], &w[1], &iblock[1], &iinfo); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + /* On exit, output intervals may not be ordered by ascending negcount */ + if (iwork[6] == *iu) { + *wl = work[*n + 1]; + wlu = work[*n + 3]; + nwl = iwork[1]; + *wu = work[*n + 4]; + wul = work[*n + 2]; + nwu = iwork[4]; + } else { + *wl = work[*n + 2]; + wlu = work[*n + 4]; + nwl = iwork[2]; + *wu = work[*n + 3]; + wul = work[*n + 1]; + nwu = iwork[3]; + } + /* On exit, the interval [WL, WLU] contains a value with negcount NWL, */ + /* and [WUL, WU] contains a value with negcount NWU. */ + if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) { + *info = 4; + return 0; + } + } else if (irange == 2) { + *wl = *vl; + *wu = *vu; + } else if (irange == 1) { + *wl = gl; + *wu = gu; + } + /* Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. */ + /* NWL accumulates the number of eigenvalues .le. WL, */ + /* NWU accumulates the number of eigenvalues .le. WU */ + *m = 0; + iend = 0; + *info = 0; + nwl = 0; + nwu = 0; + + i__1 = *nsplit; + for (jblk = 1; jblk <= i__1; ++jblk) { + ioff = iend; + ibegin = ioff + 1; + iend = isplit[jblk]; + in = iend - ioff; + + if (in == 1) { + /* 1x1 block */ + if (*wl >= d__[ibegin] - *pivmin) { + ++nwl; + } + if (*wu >= d__[ibegin] - *pivmin) { + ++nwu; + } + if (irange == 1 || (*wl < d__[ibegin] - *pivmin && *wu >= d__[ + ibegin] - *pivmin)) { + ++(*m); + w[*m] = d__[ibegin]; + werr[*m] = 0.; + /* The gap for a single block doesn't matter for the later */ + /* algorithm and is assigned an arbitrary large value */ + iblock[*m] = jblk; + indexw[*m] = 1; + } + /* Disabled 2x2 case because of a failure on the following matrix */ + /* RANGE = 'I', IL = IU = 4 */ + /* Original Tridiagonal, d = [ */ + /* -0.150102010615740E+00 */ + /* -0.849897989384260E+00 */ + /* -0.128208148052635E-15 */ + /* 0.128257718286320E-15 */ + /* ]; */ + /* e = [ */ + /* -0.357171383266986E+00 */ + /* -0.180411241501588E-15 */ + /* -0.175152352710251E-15 */ + /* ]; */ + + /* ELSE IF( IN.EQ.2 ) THEN */ + /* * 2x2 block */ + /* DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) */ + /* TMP1 = HALF*(D(IBEGIN)+D(IEND)) */ + /* L1 = TMP1 - DISC */ + /* IF( WL.GE. L1-PIVMIN ) */ + /* $ NWL = NWL + 1 */ + /* IF( WU.GE. L1-PIVMIN ) */ + /* $ NWU = NWU + 1 */ + /* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. */ + /* $ L1-PIVMIN ) ) THEN */ + /* M = M + 1 */ + /* W( M ) = L1 */ + /* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ + /* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ + /* IBLOCK( M ) = JBLK */ + /* INDEXW( M ) = 1 */ + /* ENDIF */ + /* L2 = TMP1 + DISC */ + /* IF( WL.GE. L2-PIVMIN ) */ + /* $ NWL = NWL + 1 */ + /* IF( WU.GE. L2-PIVMIN ) */ + /* $ NWU = NWU + 1 */ + /* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. */ + /* $ L2-PIVMIN ) ) THEN */ + /* M = M + 1 */ + /* W( M ) = L2 */ + /* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ + /* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ + /* IBLOCK( M ) = JBLK */ + /* INDEXW( M ) = 2 */ + /* ENDIF */ + } else { + /* General Case - block of size IN >= 2 */ + /* Compute local Gerschgorin interval and use it as the initial */ + /* interval for ODEBZ */ + gu = d__[ibegin]; + gl = d__[ibegin]; + tmp1 = 0.; + i__2 = iend; + for (j = ibegin; j <= i__2; ++j) { + /* Computing MIN */ + d__1 = gl, d__2 = gers[(j << 1) - 1]; + gl = fmin(d__1,d__2); + /* Computing MAX */ + d__1 = gu, d__2 = gers[j * 2]; + gu = fmax(d__1,d__2); + /* L40: */ + } + /* [JAN/28/2009] */ + /* change SPDIAM by TNORM in lines 2 and 3 thereafter */ + /* line 1: remove computation of SPDIAM (not useful anymore) */ + /* SPDIAM = GU - GL */ + /* GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN */ + /* GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN */ + gl = gl - tnorm * 2. * eps * in - *pivmin * 2.; + gu = gu + tnorm * 2. * eps * in + *pivmin * 2.; + + if (irange > 1) { + if (gu < *wl) { + /* the local block contains none of the wanted eigenvalues */ + nwl += in; + nwu += in; + goto L70; + } + /* refine search interval if possible, only range (WL,WU] matters */ + gl = fmax(gl,*wl); + gu = fmin(gu,*wu); + if (gl >= gu) { + goto L70; + } + } + /* Find negcount of initial interval boundaries GL and GU */ + work[*n + 1] = gl; + work[*n + in + 1] = gu; + odebz(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, + pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & + work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], & + w[*m + 1], &iblock[*m + 1], &iinfo); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + + nwl += iwork[1]; + nwu += iwork[in + 1]; + iwoff = *m - iwork[1]; + /* Compute Eigenvalues */ + itmax = (int) ((log(gu - gl + *pivmin) - log(*pivmin)) / log( + 2.)) + 2; + odebz(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, + pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & + work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], + &w[*m + 1], &iblock[*m + 1], &iinfo); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + + /* Copy eigenvalues into W and IBLOCK */ + /* Use -JBLK for block number for unconverged eigenvalues. */ + /* Loop over the number of output intervals from ODEBZ */ + i__2 = iout; + for (j = 1; j <= i__2; ++j) { + /* eigenvalue approximation is middle point of interval */ + tmp1 = (work[j + *n] + work[j + in + *n]) * .5; + /* semi length of error interval */ + tmp2 = (d__1 = work[j + *n] - work[j + in + *n], fabs(d__1)) * + .5; + if (j > iout - iinfo) { + /* Flag non-convergence. */ + ncnvrg = TRUE_; + ib = -jblk; + } else { + ib = jblk; + } + i__3 = iwork[j + in] + iwoff; + for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) { + w[je] = tmp1; + werr[je] = tmp2; + indexw[je] = je - iwoff; + iblock[je] = ib; + /* L50: */ + } + /* L60: */ + } + + *m += im; + } + L70: + ; + } + /* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */ + /* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */ + if (irange == 3) { + idiscl = *il - 1 - nwl; + idiscu = nwu - *iu; + + if (idiscl > 0) { + im = 0; + i__1 = *m; + for (je = 1; je <= i__1; ++je) { + /* Remove some of the smallest eigenvalues from the left so that */ + /* at the end IDISCL =0. Move all eigenvalues up to the left. */ + if (w[je] <= wlu && idiscl > 0) { + --idiscl; + } else { + ++im; + w[im] = w[je]; + werr[im] = werr[je]; + indexw[im] = indexw[je]; + iblock[im] = iblock[je]; + } + /* L80: */ + } + *m = im; + } + if (idiscu > 0) { + /* Remove some of the largest eigenvalues from the right so that */ + /* at the end IDISCU =0. Move all eigenvalues up to the left. */ + im = *m + 1; + for (je = *m; je >= 1; --je) { + if (w[je] >= wul && idiscu > 0) { + --idiscu; + } else { + --im; + w[im] = w[je]; + werr[im] = werr[je]; + indexw[im] = indexw[je]; + iblock[im] = iblock[je]; + } + /* L81: */ + } + jee = 0; + i__1 = *m; + for (je = im; je <= i__1; ++je) { + ++jee; + w[jee] = w[je]; + werr[jee] = werr[je]; + indexw[jee] = indexw[je]; + iblock[jee] = iblock[je]; + /* L82: */ + } + *m = *m - im + 1; + } + if (idiscl > 0 || idiscu > 0) { + /* Code to deal with effects of bad arithmetic. (If N(w) is */ + /* monotone non-decreasing, this should never happen.) */ + /* Some low eigenvalues to be discarded are not in (WL,WLU], */ + /* or high eigenvalues to be discarded are not in (WUL,WU] */ + /* so just kill off the smallest IDISCL/largest IDISCU */ + /* eigenvalues, by marking the corresponding IBLOCK = 0 */ + if (idiscl > 0) { + wkill = *wu; + i__1 = idiscl; + for (jdisc = 1; jdisc <= i__1; ++jdisc) { + iw = 0; + i__2 = *m; + for (je = 1; je <= i__2; ++je) { + if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) { + iw = je; + wkill = w[je]; + } + /* L90: */ + } + iblock[iw] = 0; + /* L100: */ + } + } + if (idiscu > 0) { + wkill = *wl; + i__1 = idiscu; + for (jdisc = 1; jdisc <= i__1; ++jdisc) { + iw = 0; + i__2 = *m; + for (je = 1; je <= i__2; ++je) { + if (iblock[je] != 0 && (w[je] >= wkill || iw == 0)) { + iw = je; + wkill = w[je]; + } + /* L110: */ + } + iblock[iw] = 0; + /* L120: */ + } + } + /* Now erase all eigenvalues with IBLOCK set to zero */ + im = 0; + i__1 = *m; + for (je = 1; je <= i__1; ++je) { + if (iblock[je] != 0) { + ++im; + w[im] = w[je]; + werr[im] = werr[je]; + indexw[im] = indexw[je]; + iblock[im] = iblock[je]; + } + /* L130: */ + } + *m = im; + } + if (idiscl < 0 || idiscu < 0) { + toofew = TRUE_; + } + } + + if ((irange == 1 && *m != *n) || (irange == 3 && *m != *iu - *il + 1)) { + toofew = TRUE_; + } + /* If ORDER='B', do nothing the eigenvalues are already sorted by */ + /* block. */ + /* If ORDER='E', sort the eigenvalues from smallest to largest */ + if (olsame(order, "E") && *nsplit > 1) { + i__1 = *m - 1; + for (je = 1; je <= i__1; ++je) { + ie = 0; + tmp1 = w[je]; + i__2 = *m; + for (j = je + 1; j <= i__2; ++j) { + if (w[j] < tmp1) { + ie = j; + tmp1 = w[j]; + } + /* L140: */ + } + if (ie != 0) { + tmp2 = werr[ie]; + itmp1 = iblock[ie]; + itmp2 = indexw[ie]; + w[ie] = w[je]; + werr[ie] = werr[je]; + iblock[ie] = iblock[je]; + indexw[ie] = indexw[je]; + w[je] = tmp1; + werr[je] = tmp2; + iblock[je] = itmp1; + indexw[je] = itmp2; + } + /* L150: */ + } + } + + *info = 0; + if (ncnvrg) { + ++(*info); + } + if (toofew) { + *info += 2; + } + return 0; + + /* End of ODRRD */ + + } /* odrrd_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrre.hpp b/external/pmrrr/include/pmrrr/lapack/odrre.hpp new file mode 100644 index 0000000000..d48f393975 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrre.hpp @@ -0,0 +1,869 @@ +/** + C++ template version of LAPACK routine dlarre. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRE_HPP__ +#define __ODRRE_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define TRUE_ (1) +#define FALSE_ (0) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrre(const char *range, int *n, FloatingType *vl, + FloatingType *vu, int *il, int *iu, FloatingType *d__, FloatingType *e, + FloatingType *e2, FloatingType *rtol1, FloatingType *rtol2, FloatingType *spltol, + int *nsplit, int *isplit, int *m, FloatingType *w, + FloatingType *werr, FloatingType *wgap, int *iblock, int *indexw, + FloatingType *gers, FloatingType *pivmin, FloatingType *work, int * + iwork, int *info) + { + /* Table of constant values */ + static int c__1 = 1; + static int c__2 = 2; + + /* System generated locals */ + int i__1, i__2; + FloatingType d__1, d__2, d__3; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType), log(FloatingType); + + /* Local variables */ + int i__, j; + FloatingType s1, s2; + int mb; + FloatingType gl; + int in, mm; + FloatingType gu; + int cnt; + FloatingType eps, tau, tmp, rtl; + int cnt1, cnt2; + FloatingType tmp1, eabs; + int iend, jblk; + FloatingType eold; + int indl; + FloatingType dmax__, emax; + int wend, idum, indu; + FloatingType rtol; + int iseed[4]; + FloatingType avgap, sigma; + //extern int olsame_(char *, char *); + int iinfo; + // extern /* Subroutine */ int odcpy_(int *, FloatingType *, int *, + // FloatingType *, int *); + FloatingType norep; + //extern /* Subroutine */ int odsq2_(int *, FloatingType *, int *); + // extern FloatingType odmch_(char *); + int ibegin; + FloatingType forceb; + int irange; + FloatingType sgndef; + + int wbegin; + + FloatingType safmin, spdiam; + + FloatingType usedqd; + FloatingType clwdth, isleft; + + FloatingType isrght, bsrtol, dpivot; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* To find the desired eigenvalues of a given real symmetric */ + /* tridiagonal matrix T, ODRRE sets any "small" off-diagonal */ + /* elements to zero, and for each unreduced block T_i, it finds */ + /* (a) a suitable shift at one end of the block's spectrum, */ + /* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */ + /* (c) eigenvalues of each L_i D_i L_i^T. */ + /* The representations and eigenvalues found are then used by */ + /* DSTEMR to compute the eigenvectors of T. */ + /* The accuracy varies depending on whether bisection is used to */ + /* find a few eigenvalues or the dqds algorithm (subroutine ODSQ2) to */ + /* conpute all and then discard any unwanted one. */ + /* As an added benefit, ODRRE also outputs the n */ + /* Gerschgorin intervals for the matrices L_i D_i L_i^T. */ + + /* Arguments */ + /* ========= */ + + /* RANGE (input) CHARACTER */ + /* = 'A': ("All") all eigenvalues will be found. */ + /* = 'V': ("Value") all eigenvalues in the half-open interval */ + /* (VL, VU] will be found. */ + /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ + /* entire matrix) will be found. */ + + /* N (input) INT */ + /* The order of the matrix. N > 0. */ + + /* VL (input/output) DOUBLE PRECISION */ + /* VU (input/output) DOUBLE PRECISION */ + /* If RANGE='V', the lower and upper bounds for the eigenvalues. */ + /* Eigenvalues less than or equal to VL, or greater than VU, */ + /* will not be returned. VL < VU. */ + /* If RANGE='I' or ='A', ODRRE computes bounds on the desired */ + /* part of the spectrum. */ + + /* IL (input) INT */ + /* IU (input) INT */ + /* If RANGE='I', the indices (in ascending order) of the */ + /* smallest and largest eigenvalues to be returned. */ + /* 1 <= IL <= IU <= N. */ + + /* D (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the N diagonal elements of the tridiagonal */ + /* matrix T. */ + /* On exit, the N diagonal elements of the diagonal */ + /* matrices D_i. */ + + /* E (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the first (N-1) entries contain the subdiagonal */ + /* elements of the tridiagonal matrix T; E(N) need not be set. */ + /* On exit, E contains the subdiagonal elements of the unit */ + /* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */ + /* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */ + + /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the first (N-1) entries contain the SQUARES of the */ + /* subdiagonal elements of the tridiagonal matrix T; */ + /* E2(N) need not be set. */ + /* On exit, the entries E2( ISPLIT( I ) ), */ + /* 1 <= I <= NSPLIT, have been set to zero */ + + /* RTOL1 (input) DOUBLE PRECISION */ + /* RTOL2 (input) DOUBLE PRECISION */ + /* Parameters for bisection. */ + /* An interval [LEFT,RIGHT] has converged if */ + /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ + + /* SPLTOL (input) DOUBLE PRECISION */ + /* The threshold for splitting. */ + + /* NSPLIT (output) INT */ + /* The number of blocks T splits into. 1 <= NSPLIT <= N. */ + + /* ISPLIT (output) INT array, dimension (N) */ + /* The splitting points, at which T breaks up into blocks. */ + /* The first block consists of rows/columns 1 to ISPLIT(1), */ + /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ + /* etc., and the NSPLIT-th consists of rows/columns */ + /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ + + /* M (output) INT */ + /* The total number of eigenvalues (of all L_i D_i L_i^T) */ + /* found. */ + + /* W (output) DOUBLE PRECISION array, dimension (N) */ + /* The first M elements contain the eigenvalues. The */ + /* eigenvalues of each of the blocks, L_i D_i L_i^T, are */ + /* sorted in ascending order ( ODRRE may use the */ + /* remaining N-M elements as workspace). */ + + /* WERR (output) DOUBLE PRECISION array, dimension (N) */ + /* The error bound on the corresponding eigenvalue in W. */ + + /* WGAP (output) DOUBLE PRECISION array, dimension (N) */ + /* The separation from the right neighbor eigenvalue in W. */ + /* The gap is only with respect to the eigenvalues of the same block */ + /* as each block has its own representation tree. */ + /* Exception: at the right end of a block we store the left gap */ + + /* IBLOCK (output) INT array, dimension (N) */ + /* The indices of the blocks (submatrices) associated with the */ + /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ + /* W(i) belongs to the first block from the top, =2 if W(i) */ + /* belongs to the second block, etc. */ + + /* INDEXW (output) INT array, dimension (N) */ + /* The indices of the eigenvalues within each block (submatrix); */ + /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ + /* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */ + + /* GERS (output) DOUBLE PRECISION array, dimension (2*N) */ + /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ + /* is (GERS(2*i-1), GERS(2*i)). */ + + /* PIVMIN (output) DOUBLE PRECISION */ + /* The minimum pivot in the Sturm sequence for T. */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ + /* Workspace. */ + + /* IWORK (workspace) INT array, dimension (5*N) */ + /* Workspace. */ + + /* INFO (output) INT */ + /* = 0: successful exit */ + /* > 0: A problem occured in ODRRE. */ + /* < 0: One of the called subroutines signaled an internal problem. */ + /* Needs inspection of the corresponding parameter IINFO */ + /* for further information. */ + + /* =-1: Problem in ODRRD. */ + /* = 2: No base representation could be found in MAXTRY iterations. */ + /* Increasing MAXTRY and recompilation might be a remedy. */ + /* =-3: Problem in ODRRB when computing the refined root */ + /* representation for ODSQ2. */ + /* =-4: Problem in ODRRB when preforming bisection on the */ + /* desired part of the spectrum. */ + /* =-5: Problem in ODSQ2. */ + /* =-6: Problem in ODSQ2. */ + + /* Further Details */ + /* The base representations are required to suffer very little */ + /* element growth and consequently define all their eigenvalues to */ + /* high relative accuracy. */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Local Arrays .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --gers; + --indexw; + --iblock; + --wgap; + --werr; + --w; + --isplit; + --e2; + --e; + --d__; + + /* Function Body */ + *info = 0; + + /* Decode RANGE */ + + if (olsame(range, "A")) { + irange = 1; + } else if (olsame(range, "V")) { + irange = 3; + } else if (olsame(range, "I")) { + irange = 2; + } + *m = 0; + /* Get machine constants */ + safmin = std::numeric_limits::min(); // odmch_("S"); + eps = std::numeric_limits::epsilon(); // odmch_("P"); + /* Set parameters */ + rtl = sqrt(eps); + bsrtol = sqrt(eps); + /* Treat case of 1x1 matrix for quick return */ + if (*n == 1) { + if (irange == 1 || (irange == 3 && d__[1] > *vl && d__[1] <= *vu) || + (irange == 2 && *il == 1 && *iu == 1) ) { + *m = 1; + w[1] = d__[1]; + /* The computation error of the eigenvalue is zero */ + werr[1] = 0.; + wgap[1] = 0.; + iblock[1] = 1; + indexw[1] = 1; + gers[1] = d__[1]; + gers[2] = d__[1]; + } + /* store the shift for the initial RRR, which is zero in this case */ + e[1] = 0.; + return 0; + } + /* General case: tridiagonal matrix of order > 1 */ + + /* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */ + /* Compute maximum off-diagonal entry and pivmin. */ + gl = d__[1]; + gu = d__[1]; + eold = 0.; + emax = 0.; + e[*n] = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + werr[i__] = 0.; + wgap[i__] = 0.; + eabs = (d__1 = e[i__], fabs(d__1)); + if (eabs >= emax) { + emax = eabs; + } + tmp1 = eabs + eold; + gers[(i__ << 1) - 1] = d__[i__] - tmp1; + /* Computing MIN */ + d__1 = gl, d__2 = gers[(i__ << 1) - 1]; + gl = fmin(d__1,d__2); + gers[i__ * 2] = d__[i__] + tmp1; + /* Computing MAX */ + d__1 = gu, d__2 = gers[i__ * 2]; + gu = fmax(d__1,d__2); + eold = eabs; + /* L5: */ + } + /* The minimum pivot allowed in the Sturm sequence for T */ + /* Computing MAX */ + /* Computing 2nd power */ + d__3 = emax; + d__1 = 1., d__2 = d__3 * d__3; + *pivmin = safmin * fmax(d__1,d__2); + /* Compute spectral diameter. The Gerschgorin bounds give an */ + /* estimate that is wrong by at most a factor of SQRT(2) */ + spdiam = gu - gl; + /* Compute splitting points */ + odrra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], & + iinfo); + /* Can force use of bisection instead of faster DQDS. */ + /* Option left in the code for future multisection work. */ + forceb = FALSE_; + /* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */ + /* explicitly wants bisection. */ + usedqd = irange == 1 && ! forceb; + if (irange == 1 && ! forceb) { + /* Set interval [VL,VU] that contains all eigenvalues */ + *vl = gl; + *vu = gu; + } else { + /* We call ODRRD to find crude approximations to the eigenvalues */ + /* in the desired range. In case IRANGE = INDRNG, we also obtain the */ + /* interval (VL,VU] that contains all the wanted eigenvalues. */ + /* An interval [LEFT,RIGHT] has converged if */ + /* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */ + /* ODRRD needs a WORK of size 4*N, IWORK of size 3*N */ + odrrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[ + 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], + vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } + /* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */ + i__1 = *n; + for (i__ = mm + 1; i__ <= i__1; ++i__) { + w[i__] = 0.; + werr[i__] = 0.; + iblock[i__] = 0; + indexw[i__] = 0; + /* L14: */ + } + } + /* ** */ + /* Loop over unreduced blocks */ + ibegin = 1; + wbegin = 1; + i__1 = *nsplit; + for (jblk = 1; jblk <= i__1; ++jblk) { + iend = isplit[jblk]; + in = iend - ibegin + 1; + /* 1 X 1 block */ + if (in == 1) { + if (irange == 1 || (irange == 3 && d__[ibegin] > *vl && d__[ibegin] + <= *vu) || (irange == 2 && iblock[wbegin] == jblk) ) { + ++(*m); + w[*m] = d__[ibegin]; + werr[*m] = 0.; + /* The gap for a single block doesn't matter for the later */ + /* algorithm and is assigned an arbitrary large value */ + wgap[*m] = 0.; + iblock[*m] = jblk; + indexw[*m] = 1; + ++wbegin; + } + /* E( IEND ) holds the shift for the initial RRR */ + e[iend] = 0.; + ibegin = iend + 1; + goto L170; + } + + /* Blocks of size larger than 1x1 */ + + /* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */ + e[iend] = 0.; + + /* Find local outer bounds GL,GU for the block */ + gl = d__[ibegin]; + gu = d__[ibegin]; + i__2 = iend; + for (i__ = ibegin; i__ <= i__2; ++i__) { + /* Computing MIN */ + d__1 = gers[(i__ << 1) - 1]; + gl = fmin(d__1,gl); + /* Computing MAX */ + d__1 = gers[i__ * 2]; + gu = fmax(d__1,gu); + /* L15: */ + } + spdiam = gu - gl; + if (! (irange == 1 && ! forceb)) { + /* Count the number of eigenvalues in the current block. */ + mb = 0; + i__2 = mm; + for (i__ = wbegin; i__ <= i__2; ++i__) { + if (iblock[i__] == jblk) { + ++mb; + } else { + goto L21; + } + /* L20: */ + } + L21: + if (mb == 0) { + /* No eigenvalue in the current block lies in the desired range */ + /* E( IEND ) holds the shift for the initial RRR */ + e[iend] = 0.; + ibegin = iend + 1; + goto L170; + } else { + /* Decide whether dqds or bisection is more efficient */ + usedqd = (FloatingType) mb > in * .5 && ! forceb; + wend = wbegin + mb - 1; + /* Calculate gaps for the current block */ + /* In later stages, when representations for individual */ + /* eigenvalues are different, we use SIGMA = E( IEND ). */ + sigma = 0.; + i__2 = wend - 1; + for (i__ = wbegin; i__ <= i__2; ++i__) { + /* Computing MAX */ + d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + + werr[i__]); + wgap[i__] = fmax(d__1,d__2); + /* L30: */ + } + /* Computing MAX */ + d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); + wgap[wend] = fmax(d__1,d__2); + /* Find local index of the first and last desired evalue. */ + indl = indexw[wbegin]; + indu = indexw[wend]; + } + } + if ((irange == 1 && ! forceb) || usedqd) { + /* Case of DQDS */ + /* Find approximations to the extremal eigenvalues of the block */ + odrrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & + rtl, &tmp, &tmp1, &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } + /* Computing MAX */ + d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, + fabs(d__1)); + isleft = fmax(d__2,d__3); + odrrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & + rtl, &tmp, &tmp1, &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } + /* Computing MIN */ + d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, + fabs(d__1)); + isrght = fmin(d__2,d__3); + /* Improve the estimate of the spectral diameter */ + spdiam = isrght - isleft; + } else { + /* Case of bisection */ + /* Find approximations to the wanted extremal eigenvalues */ + /* Computing MAX */ + d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = + w[wbegin] - werr[wbegin], fabs(d__1)); + isleft = fmax(d__2,d__3); + /* Computing MIN */ + d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[ + wend] + werr[wend], fabs(d__1)); + isrght = fmin(d__2,d__3); + } + /* Decide whether the base representation for the current block */ + /* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */ + /* should be on the left or the right end of the current block. */ + /* The strategy is to shift to the end which is "more populated" */ + /* Furthermore, decide whether to use DQDS for the computation of */ + /* the eigenvalue approximations at the end of ODRRE or bisection. */ + /* dqds is chosen if all eigenvalues are desired or the number of */ + /* eigenvalues to be computed is large compared to the blocksize. */ + if (irange == 1 && ! forceb) { + /* If all the eigenvalues have to be computed, we use dqd */ + usedqd = TRUE_; + /* INDL is the local index of the first eigenvalue to compute */ + indl = 1; + indu = in; + /* MB = number of eigenvalues to compute */ + mb = in; + wend = wbegin + mb - 1; + /* Define 1/4 and 3/4 points of the spectrum */ + s1 = isleft + spdiam * .25; + s2 = isrght - spdiam * .25; + } else { + /* ODRRD has computed IBLOCK and INDEXW for each eigenvalue */ + /* approximation. */ + /* choose sigma */ + if (usedqd) { + s1 = isleft + spdiam * .25; + s2 = isrght - spdiam * .25; + } else { + tmp = fmin(isrght,*vu) - fmax(isleft,*vl); + s1 = fmax(isleft,*vl) + tmp * .25; + s2 = fmin(isrght,*vu) - tmp * .25; + } + } + /* Compute the negcount at the 1/4 and 3/4 points */ + if (mb > 1) { + odrrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, & + cnt, &cnt1, &cnt2, &iinfo); + } + if (mb == 1) { + sigma = gl; + sgndef = 1.; + } else if (cnt1 - indl >= indu - cnt2) { + if (irange == 1 && ! forceb) { + sigma = fmax(isleft,gl); + } else if (usedqd) { + /* use Gerschgorin bound as shift to get pos def matrix */ + /* for dqds */ + sigma = isleft; + } else { + /* use approximation of the first desired eigenvalue of the */ + /* block as shift */ + sigma = fmax(isleft,*vl); + } + sgndef = 1.; + } else { + if (irange == 1 && ! forceb) { + sigma = fmin(isrght,gu); + } else if (usedqd) { + /* use Gerschgorin bound as shift to get neg def matrix */ + /* for dqds */ + sigma = isrght; + } else { + /* use approximation of the first desired eigenvalue of the */ + /* block as shift */ + sigma = fmin(isrght,*vu); + } + sgndef = -1.; + } + /* An initial SIGMA has been chosen that will be used for computing */ + /* T - SIGMA I = L D L^T */ + /* Define the increment TAU of the shift in case the initial shift */ + /* needs to be refined to obtain a factorization with not too much */ + /* element growth. */ + if (usedqd) { + /* The initial SIGMA was to the outer end of the spectrum */ + /* the matrix is definite and we need not retreat. */ + tau = spdiam * eps * *n + *pivmin * 2.; + tau = fmax(tau, 2 * eps * fabs(sigma)); + } else { + if (mb > 1) { + clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin]; + avgap = (d__1 = clwdth / (FloatingType) (wend - wbegin), fabs( + d__1)); + if (sgndef == 1.) { + /* Computing MAX */ + d__1 = wgap[wbegin]; + tau = fmax(d__1,avgap) * .5; + /* Computing MAX */ + d__1 = tau, d__2 = werr[wbegin]; + tau = fmax(d__1,d__2); + } else { + /* Computing MAX */ + d__1 = wgap[wend - 1]; + tau = fmax(d__1,avgap) * .5; + /* Computing MAX */ + d__1 = tau, d__2 = werr[wend]; + tau = fmax(d__1,d__2); + } + } else { + tau = werr[wbegin]; + } + } + + for (idum = 1; idum <= 6; ++idum) { + /* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */ + /* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */ + /* pivots in WORK(2*IN+1:3*IN) */ + dpivot = d__[ibegin] - sigma; + work[1] = dpivot; + dmax__ = fabs(work[1]); + j = ibegin; + i__2 = in - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[(in << 1) + i__] = 1. / work[i__]; + tmp = e[j] * work[(in << 1) + i__]; + work[in + i__] = tmp; + dpivot = d__[j + 1] - sigma - tmp * e[j]; + work[i__ + 1] = dpivot; + /* Computing MAX */ + d__1 = dmax__, d__2 = fabs(dpivot); + dmax__ = fmax(d__1,d__2); + ++j; + /* L70: */ + } + /* check for element growth */ + if (dmax__ > spdiam * 64.) { + norep = TRUE_; + } else { + norep = FALSE_; + } + if (usedqd && ! norep) { + /* Ensure the definiteness of the representation */ + /* All entries of D (of L D L^T) must have the same sign */ + i__2 = in; + for (i__ = 1; i__ <= i__2; ++i__) { + tmp = sgndef * work[i__]; + if (tmp < 0.) { + norep = TRUE_; + } + /* L71: */ + } + } + if (norep) { + /* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */ + /* shift which makes the matrix definite. So we should end up */ + /* here really only in the case of IRANGE = VALRNG or INDRNG. */ + if (idum == 5) { + if (sgndef == 1.) { + /* The fudged Gerschgorin shift should succeed */ + sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.; + } else { + sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.; + } + } else { + sigma -= sgndef * tau; + tau *= 2.; + } + } else { + /* an initial RRR is found */ + goto L83; + } + /* L80: */ + } + /* if the program reaches this point, no base representation could be */ + /* found in MAXTRY iterations. */ + *info = 2; + return 0; + L83: + /* At this point, we have found an initial base representation */ + /* T - SIGMA I = L D L^T with not too much element growth. */ + /* Store the shift. */ + e[iend] = sigma; + /* Store D and L. */ + blas::odcpy(&in, &work[1], &c__1, &d__[ibegin], &c__1); + i__2 = in - 1; + blas::odcpy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1); + if (mb > 1) { + + /* Perturb each entry of the base representation by a small */ + /* (but random) relative amount to overcome difficulties with */ + /* glued matrices. */ + + for (i__ = 1; i__ <= 4; ++i__) { + iseed[i__ - 1] = 1; + /* L122: */ + } + i__2 = (in << 1) - 1; + odrnv(&c__2, iseed, &i__2, &work[1]); + i__2 = in - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.; + e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.; + /* L125: */ + } + d__[iend] *= eps * 4. * work[in] + 1.; + + } + + /* Don't update the Gerschgorin intervals because keeping track */ + /* of the updates would be too much work in DLARRV. */ + /* We update W instead and use it to locate the proper Gerschgorin */ + /* intervals. */ + /* Compute the required eigenvalues of L D L' by bisection or dqds */ + if (! usedqd) { + /* If ODRRD has been used, shift the eigenvalue approximations */ + /* according to their representation. This is necessary for */ + /* a uniform DLARRV since dqds computes eigenvalues of the */ + /* shifted representation. In DLARRV, W will always hold the */ + /* UNshifted eigenvalue approximation. */ + i__2 = wend; + for (j = wbegin; j <= i__2; ++j) { + w[j] -= sigma; + werr[j] += (d__1 = w[j], fabs(d__1)) * eps; + /* L134: */ + } + /* call ODRRB to reduce eigenvalue error of the approximations */ + /* from ODRRD */ + i__2 = iend - 1; + for (i__ = ibegin; i__ <= i__2; ++i__) { + /* Computing 2nd power */ + d__1 = e[i__]; + work[i__] = d__[i__] * (d__1 * d__1); + /* L135: */ + } + /* use bisection to find EV from INDL to INDU */ + i__2 = indl - 1; + odrrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, + rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], & + work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, & + iinfo); + if (iinfo != 0) { + *info = -4; + return 0; + } + /* ODRRB computes all gaps correctly except for the last one */ + /* Record distance to VU/GU */ + /* Computing MAX */ + d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); + wgap[wend] = fmax(d__1,d__2); + i__2 = indu; + for (i__ = indl; i__ <= i__2; ++i__) { + ++(*m); + iblock[*m] = jblk; + indexw[*m] = i__; + /* L138: */ + } + } else { + /* Call dqds to get all eigs (and then possibly delete unwanted */ + /* eigenvalues). */ + /* Note that dqds finds the eigenvalues of the L D L^T representation */ + /* of T to high relative accuracy. High relative accuracy */ + /* might be lost when the shift of the RRR is subtracted to obtain */ + /* the eigenvalues of T. However, T is not guaranteed to define its */ + /* eigenvalues to high relative accuracy anyway. */ + /* Set RTOL to the order of the tolerance used in ODSQ2 */ + /* This is an ESTIMATED error, the worst case bound is 4*N*EPS */ + /* which is usually too large and requires unnecessary work to be */ + /* done by bisection when computing the eigenvectors */ + rtol = log((FloatingType) in) * 4. * eps; + j = ibegin; + i__2 = in - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[(i__ << 1) - 1] = (d__1 = d__[j], fabs(d__1)); + work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1]; + ++j; + /* L140: */ + } + work[(in << 1) - 1] = (d__1 = d__[iend], fabs(d__1)); + work[in * 2] = 0.; + odsq2(&in, &work[1], &iinfo); + if (iinfo != 0) { + /* If IINFO = -5 then an index is part of a tight cluster */ + /* and should be changed. The index is in IWORK(1) and the */ + /* gap is in WORK(N+1) */ + *info = -5; + return 0; + } else { + /* Test that all eigenvalues are positive as expected */ + i__2 = in; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] < 0.) { + *info = -6; + return 0; + } + /* L149: */ + } + } + if (sgndef > 0.) { + i__2 = indu; + for (i__ = indl; i__ <= i__2; ++i__) { + ++(*m); + w[*m] = work[in - i__ + 1]; + iblock[*m] = jblk; + indexw[*m] = i__; + /* L150: */ + } + } else { + i__2 = indu; + for (i__ = indl; i__ <= i__2; ++i__) { + ++(*m); + w[*m] = -work[i__]; + iblock[*m] = jblk; + indexw[*m] = i__; + /* L160: */ + } + } + i__2 = *m; + for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { + /* the value of RTOL below should be the tolerance in ODSQ2 */ + werr[i__] = rtol * (d__1 = w[i__], fabs(d__1)); + /* L165: */ + } + i__2 = *m - 1; + for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { + /* compute the right gap between the intervals */ + /* Computing MAX */ + d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[ + i__]); + wgap[i__] = fmax(d__1,d__2); + /* L166: */ + } + /* Computing MAX */ + d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]); + wgap[*m] = fmax(d__1,d__2); + } + /* proceed with next block */ + ibegin = iend + 1; + wbegin = wend + 1; + L170: + ; + } + + return 0; + + /* end of ODRRE */ + + } /* odrre_ */ + +} //namespace lapack + +} //namespace pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrf.hpp b/external/pmrrr/include/pmrrr/lapack/odrrf.hpp new file mode 100644 index 0000000000..f32e9860b8 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrf.hpp @@ -0,0 +1,439 @@ +/** + C++ template version of LAPACK routine dlarrf. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRF_HPP__ +#define __ODRRF_HPP__ + +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +namespace pmrrr { namespace lapack { + + + /* Subroutine */ + template + int odrrf(int *n, FloatingType *d__, FloatingType *l, + FloatingType *ld, int *clstrt, int *clend, FloatingType *w, + FloatingType *wgap, FloatingType *werr, FloatingType *spdiam, FloatingType * + clgapl, FloatingType *clgapr, FloatingType *pivmin, FloatingType *sigma, + FloatingType *dplus, FloatingType *lplus, FloatingType *work, int *info) + { + /* Table of constant values */ + #define TRUE_ (1) + #define FALSE_ (0) + static int c__1 = 1; + /* System generated locals */ + int i__1; + FloatingType d__1, d__2, d__3; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + int i__; + FloatingType s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, + znm2, growthbound, fail, fact, oldp; + int indx; + FloatingType prod; + int ktry; + FloatingType fail2, avgap, ldmax, rdmax; + int shift; + // extern /* Subroutine */ int odcpy_(int *, FloatingType *, int *, + // FloatingType *, int *); + int dorrr1; + // extern FloatingType odmch_(char *); + FloatingType ldelta; + int nofail; + FloatingType mingap, lsigma, rdelta; + //extern int odnan_(FloatingType *); + int forcer; + FloatingType rsigma, clwdth; + int sawnan1, sawnan2, tryrrr1; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + /* * */ + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* Given the initial representation L D L^T and its cluster of close */ + /* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */ + /* W( CLEND ), ODRRF finds a new relatively robust representation */ + /* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */ + /* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix (subblock, if the matrix splitted). */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The N diagonal elements of the diagonal matrix D. */ + + /* L (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (N-1) subdiagonal elements of the unit bidiagonal */ + /* matrix L. */ + + /* LD (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (N-1) elements L(i)*D(i). */ + + /* CLSTRT (input) INT */ + /* The index of the first eigenvalue in the cluster. */ + + /* CLEND (input) INT */ + /* The index of the last eigenvalue in the cluster. */ + + /* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ + /* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */ + /* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */ + /* close eigenalues. */ + + /* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ + /* The separation from the right neighbor eigenvalue in W. */ + + /* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ + /* WERR contain the semiwidth of the uncertainty */ + /* interval of the corresponding eigenvalue APPROXIMATION in W */ + + /* SPDIAM (input) estimate of the spectral diameter obtained from the */ + /* Gerschgorin intervals */ + + /* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */ + /* Set by the calling routine to protect against shifts too close */ + /* to eigenvalues outside the cluster. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot allowed in the Sturm sequence. */ + + /* SIGMA (output) DOUBLE PRECISION */ + /* The shift used to form L(+) D(+) L(+)^T. */ + + /* DPLUS (output) DOUBLE PRECISION array, dimension (N) */ + /* The N diagonal elements of the diagonal matrix D(+). */ + + /* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */ + /* The first (N-1) elements of LPLUS contain the subdiagonal */ + /* elements of the unit bidiagonal matrix L(+). */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + /* Workspace. */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --lplus; + --dplus; + --werr; + --wgap; + --w; + --ld; + --l; + --d__; + + /* Function Body */ + *info = 0; + fact = 2.; + eps = std::numeric_limits::epsilon(); // eps = odmch_("Precision"); + shift = 0; + forcer = FALSE_; + /* Note that we cannot guarantee that for any of the shifts tried, */ + /* the factorization has a small or even moderate element growth. */ + /* There could be Ritz values at both ends of the cluster and despite */ + /* backing off, there are examples where all factorizations tried */ + /* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */ + /* element growth. */ + /* For this reason, we should use PIVMIN in this subroutine so that at */ + /* least the L D L^T factorization exists. It can be checked afterwards */ + /* whether the element growth caused bad residuals/orthogonality. */ + /* Decide whether the code should accept the best among all */ + /* representations despite large element growth or signal INFO=1 */ + nofail = TRUE_; + + /* Compute the average gap length of the cluster */ + clwdth = (d__1 = w[*clend] - w[*clstrt], fabs(d__1)) + werr[*clend] + werr[ + *clstrt]; + avgap = clwdth / (FloatingType) (*clend - *clstrt); + mingap = fmin(*clgapl,*clgapr); + /* Initial values for shifts to both ends of cluster */ + /* Computing MIN */ + d__1 = w[*clstrt], d__2 = w[*clend]; + lsigma = fmin(d__1,d__2) - werr[*clstrt]; + /* Computing MAX */ + d__1 = w[*clstrt], d__2 = w[*clend]; + rsigma = fmax(d__1,d__2) + werr[*clend]; + /* Use a small fudge to make sure that we really shift to the outside */ + lsigma -= fabs(lsigma) * 4. * eps; + rsigma += fabs(rsigma) * 4. * eps; + /* Compute upper bounds for how much to back off the initial shifts */ + ldmax = mingap * .25 + *pivmin * 2.; + rdmax = mingap * .25 + *pivmin * 2.; + /* Computing MAX */ + d__1 = avgap, d__2 = wgap[*clstrt]; + ldelta = fmax(d__1,d__2) / fact; + /* Computing MAX */ + d__1 = avgap, d__2 = wgap[*clend - 1]; + rdelta = fmax(d__1,d__2) / fact; + + /* Initialize the record of the best representation found */ + + // s = std::numeric_limits::min(); // s = odmch_("S"); + smlgrowth = std::numeric_limits::max(); + fail = (FloatingType) (*n - 1) * mingap / (*spdiam * eps); + fail2 = (FloatingType) (*n - 1) * mingap / (*spdiam * sqrt(eps)); + bestshift = lsigma; + + /* while (KTRY <= KTRYMAX) */ + ktry = 0; + growthbound = *spdiam * 8.; + L5: + sawnan1 = FALSE_; + sawnan2 = FALSE_; + /* Ensure that we do not back off too much of the initial shifts */ + ldelta = fmin(ldmax,ldelta); + rdelta = fmin(rdmax,rdelta); + /* Compute the element growth when shifting to both ends of the cluster */ + /* accept the shift if there is no element growth at one of the two ends */ + /* Left end */ + s = -lsigma; + dplus[1] = d__[1] + s; + if (fabs(dplus[1]) < *pivmin) { + dplus[1] = -(*pivmin); + /* Need to set SAWNAN1 because refined RRR test should not be used */ + /* in this case */ + sawnan1 = TRUE_; + } + max1 = fabs(dplus[1]); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + lplus[i__] = ld[i__] / dplus[i__]; + s = s * lplus[i__] * l[i__] - lsigma; + dplus[i__ + 1] = d__[i__ + 1] + s; + if ((d__1 = dplus[i__ + 1], fabs(d__1)) < *pivmin) { + dplus[i__ + 1] = -(*pivmin); + /* Need to set SAWNAN1 because refined RRR test should not be used */ + /* in this case */ + sawnan1 = TRUE_; + } + /* Computing MAX */ + d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], fabs(d__1)); + max1 = fmax(d__2,d__3); + /* L6: */ + } + sawnan1 = sawnan1 || odnan(&max1); + if (forcer || (max1 <= growthbound && ! sawnan1)) { + *sigma = lsigma; + shift = 1; + goto L100; + } + /* Right end */ + s = -rsigma; + work[1] = d__[1] + s; + if (fabs(work[1]) < *pivmin) { + work[1] = -(*pivmin); + /* Need to set SAWNAN2 because refined RRR test should not be used */ + /* in this case */ + sawnan2 = TRUE_; + } + max2 = fabs(work[1]); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + work[*n + i__] = ld[i__] / work[i__]; + s = s * work[*n + i__] * l[i__] - rsigma; + work[i__ + 1] = d__[i__ + 1] + s; + if ((d__1 = work[i__ + 1], fabs(d__1)) < *pivmin) { + work[i__ + 1] = -(*pivmin); + /* Need to set SAWNAN2 because refined RRR test should not be used */ + /* in this case */ + sawnan2 = TRUE_; + } + /* Computing MAX */ + d__2 = max2, d__3 = (d__1 = work[i__ + 1], fabs(d__1)); + max2 = fmax(d__2,d__3); + /* L7: */ + } + sawnan2 = sawnan2 || odnan(&max2); + if (forcer || (max2 <= growthbound && ! sawnan2)) { + *sigma = rsigma; + shift = 2; + goto L100; + } + /* If we are at this point, both shifts led to too much element growth */ + /* Record the better of the two shifts (provided it didn't lead to NaN) */ + if (sawnan1 && sawnan2) { + /* both MAX1 and MAX2 are NaN */ + goto L50; + } else { + if (! sawnan1) { + indx = 1; + if (max1 <= smlgrowth) { + smlgrowth = max1; + bestshift = lsigma; + } + } + if (! sawnan2) { + if (sawnan1 || max2 <= max1) { + indx = 2; + } + if (max2 <= smlgrowth) { + smlgrowth = max2; + bestshift = rsigma; + } + } + } + /* If we are here, both the left and the right shift led to */ + /* element growth. If the element growth is moderate, then */ + /* we may still accept the representation, if it passes a */ + /* refined test for RRR. This test supposes that no NaN occurred. */ + /* Moreover, we use the refined RRR test only for isolated clusters. */ + if (clwdth < mingap / 128. && fmin(max1,max2) < fail2 && ! sawnan1 && ! + sawnan2) { + dorrr1 = TRUE_; + } else { + dorrr1 = FALSE_; + } + tryrrr1 = TRUE_; + if (tryrrr1 && dorrr1) { + if (indx == 1) { + tmp = (d__1 = dplus[*n], fabs(d__1)); + znm2 = 1.; + prod = 1.; + oldp = 1.; + for (i__ = *n - 1; i__ >= 1; --i__) { + if (prod <= eps) { + prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] * + work[*n + i__]) * oldp; + } else { + prod *= (d__1 = work[*n + i__], fabs(d__1)); + } + oldp = prod; + /* Computing 2nd power */ + d__1 = prod; + znm2 += d__1 * d__1; + /* Computing MAX */ + d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, fabs(d__1)); + tmp = fmax(d__2,d__3); + /* L15: */ + } + rrr1 = tmp / (*spdiam * sqrt(znm2)); + if (rrr1 <= 8.) { + *sigma = lsigma; + shift = 1; + goto L100; + } + } else if (indx == 2) { + tmp = (d__1 = work[*n], fabs(d__1)); + znm2 = 1.; + prod = 1.; + oldp = 1.; + for (i__ = *n - 1; i__ >= 1; --i__) { + if (prod <= eps) { + prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * + lplus[i__]) * oldp; + } else { + prod *= (d__1 = lplus[i__], fabs(d__1)); + } + oldp = prod; + /* Computing 2nd power */ + d__1 = prod; + znm2 += d__1 * d__1; + /* Computing MAX */ + d__2 = tmp, d__3 = (d__1 = work[i__] * prod, fabs(d__1)); + tmp = fmax(d__2,d__3); + /* L16: */ + } + rrr2 = tmp / (*spdiam * sqrt(znm2)); + if (rrr2 <= 8.) { + *sigma = rsigma; + shift = 2; + goto L100; + } + } + } + L50: + if (ktry < 1) { + /* If we are here, both shifts failed also the RRR test. */ + /* Back off to the outside */ + /* Computing MAX */ + d__1 = lsigma - ldelta, d__2 = lsigma - ldmax; + lsigma = fmax(d__1,d__2); + /* Computing MIN */ + d__1 = rsigma + rdelta, d__2 = rsigma + rdmax; + rsigma = fmin(d__1,d__2); + ldelta *= 2.; + rdelta *= 2.; + ++ktry; + goto L5; + } else { + /* None of the representations investigated satisfied our */ + /* criteria. Take the best one we found. */ + if (smlgrowth < fail || nofail) { + lsigma = bestshift; + rsigma = bestshift; + forcer = TRUE_; + goto L5; + } else { + *info = 1; + return 0; + } + } + L100: + if (shift == 1) { + } else if (shift == 2) { + /* store new L and D back into DPLUS, LPLUS */ + blas::odcpy(n, &work[1], &c__1, &dplus[1], &c__1); + i__1 = *n - 1; + blas::odcpy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1); + } + return 0; + + /* End of ODRRF */ + + } /* odrrf_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrj.hpp b/external/pmrrr/include/pmrrr/lapack/odrrj.hpp new file mode 100644 index 0000000000..f636e85f86 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrj.hpp @@ -0,0 +1,348 @@ +/** + C++ template version of LAPACK routine dlarrj. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRJ_HPP__ +#define __ODRRJ_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrrj(int *n, FloatingType *d__, FloatingType *e2, + int *ifirst, int *ilast, FloatingType *rtol, int *offset, + FloatingType *w, FloatingType *werr, FloatingType *work, int *iwork, + FloatingType *pivmin, FloatingType *spdiam, int *info) + { + /* System generated locals */ + int i__1, i__2; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType log(FloatingType); + + /* Local variables */ + int i__, j, k, p; + FloatingType s; + int i1, i2, ii; + FloatingType fac, mid; + int cnt; + FloatingType tmp, left; + int iter, nint, prev, next, savi1; + FloatingType right, width, dplus; + int olnint, maxitr; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* Given the initial eigenvalue approximations of T, ODRRJ */ + /* does bisection to refine the eigenvalues of T, */ + /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ + /* guesses for these eigenvalues are input in W, the corresponding estimate */ + /* of the error in these guesses in WERR. During bisection, intervals */ + /* [left, right] are maintained by storing their mid-points and */ + /* semi-widths in the arrays W and WERR respectively. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The N diagonal elements of T. */ + + /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The Squares of the (N-1) subdiagonal elements of T. */ + + /* IFIRST (input) INT */ + /* The index of the first eigenvalue to be computed. */ + + /* ILAST (input) INT */ + /* The index of the last eigenvalue to be computed. */ + + /* RTOL (input) DOUBLE PRECISION */ + /* Tolerance for the convergence of the bisection intervals. */ + /* An interval [LEFT,RIGHT] has converged if */ + /* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */ + + /* OFFSET (input) INT */ + /* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */ + /* through ILAST-OFFSET elements of these arrays are to be used. */ + + /* W (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ + /* estimates of the eigenvalues of L D L^T indexed IFIRST through */ + /* ILAST. */ + /* On output, these estimates are refined. */ + + /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ + /* the errors in the estimates of the corresponding elements in W. */ + /* On output, these errors are refined. */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ + /* Workspace. */ + + /* IWORK (workspace) INT array, dimension (2*N) */ + /* Workspace. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot in the Sturm sequence for T. */ + + /* SPDIAM (input) DOUBLE PRECISION */ + /* The spectral diameter of T. */ + + /* INFO (output) INT */ + /* Error flag. */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --iwork; + --work; + --werr; + --w; + --e2; + --d__; + + /* Function Body */ + *info = 0; + + maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + + 2; + + /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ + /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ + /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */ + /* for an unconverged interval is set to the index of the next unconverged */ + /* interval, and is -1 or 0 for a converged interval. Thus a linked */ + /* list of unconverged intervals is set up. */ + + i1 = *ifirst; + i2 = *ilast; + /* The number of unconverged intervals */ + nint = 0; + /* The last unconverged interval found */ + prev = 0; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; + left = w[ii] - werr[ii]; + mid = w[ii]; + right = w[ii] + werr[ii]; + width = right - mid; + /* Computing MAX */ + d__1 = fabs(left), d__2 = fabs(right); + tmp = fmax(d__1,d__2); + /* The following test prevents the test of converged intervals */ + if (width < *rtol * tmp) { + /* This interval has already converged and does not need refinement. */ + /* (Note that the gaps might change through refining the */ + /* eigenvalues, however, they can only get bigger.) */ + /* Remove it from the list. */ + iwork[k - 1] = -1; + /* Make sure that I1 always points to the first unconverged interval */ + if (i__ == i1 && i__ < i2) { + i1 = i__ + 1; + } + if (prev >= i1 && i__ <= i2) { + iwork[(prev << 1) - 1] = i__ + 1; + } + } else { + /* unconverged interval found */ + prev = i__; + /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ + + /* Do while( CNT(LEFT).GT.I-1 ) */ + + fac = 1.; + L20: + cnt = 0; + s = left; + dplus = d__[1] - s; + if (dplus < 0.) { + ++cnt; + } + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + dplus = d__[j] - s - e2[j - 1] / dplus; + if (dplus < 0.) { + ++cnt; + } + /* L30: */ + } + if (cnt > i__ - 1) { + left -= werr[ii] * fac; + fac *= 2.; + goto L20; + } + + /* Do while( CNT(RIGHT).LT.I ) */ + + fac = 1.; + L50: + cnt = 0; + s = right; + dplus = d__[1] - s; + if (dplus < 0.) { + ++cnt; + } + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + dplus = d__[j] - s - e2[j - 1] / dplus; + if (dplus < 0.) { + ++cnt; + } + /* L60: */ + } + if (cnt < i__) { + right += werr[ii] * fac; + fac *= 2.; + goto L50; + } + ++nint; + iwork[k - 1] = i__ + 1; + iwork[k] = cnt; + } + work[k - 1] = left; + work[k] = right; + /* L75: */ + } + savi1 = i1; + + /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ + /* and while (ITER.LT.MAXITR) */ + + iter = 0; + L80: + prev = i1 - 1; + i__ = i1; + olnint = nint; + i__1 = olnint; + for (p = 1; p <= i__1; ++p) { + k = i__ << 1; + ii = i__ - *offset; + next = iwork[k - 1]; + left = work[k - 1]; + right = work[k]; + mid = (left + right) * .5; + /* semiwidth of interval */ + width = right - mid; + /* Computing MAX */ + d__1 = fabs(left), d__2 = fabs(right); + tmp = fmax(d__1,d__2); + if (width < *rtol * tmp || iter == maxitr) { + /* reduce number of unconverged intervals */ + --nint; + /* Mark interval as converged. */ + iwork[k - 1] = 0; + if (i1 == i__) { + i1 = next; + } else { + /* Prev holds the last unconverged interval previously examined */ + if (prev >= i1) { + iwork[(prev << 1) - 1] = next; + } + } + i__ = next; + goto L100; + } + prev = i__; + + /* Perform one bisection step */ + + cnt = 0; + s = mid; + dplus = d__[1] - s; + if (dplus < 0.) { + ++cnt; + } + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + dplus = d__[j] - s - e2[j - 1] / dplus; + if (dplus < 0.) { + ++cnt; + } + /* L90: */ + } + if (cnt <= i__ - 1) { + work[k - 1] = mid; + } else { + work[k] = mid; + } + i__ = next; + L100: + ; + } + ++iter; + /* do another loop if there are still unconverged intervals */ + /* However, in the last iteration, all intervals are accepted */ + /* since this is the best we can do. */ + if (nint > 0 && iter <= maxitr) { + goto L80; + } + + + /* At this point, all the intervals have converged */ + i__1 = *ilast; + for (i__ = savi1; i__ <= i__1; ++i__) { + k = i__ << 1; + ii = i__ - *offset; + /* All intervals marked by '0' have been refined. */ + if (iwork[k - 1] == 0) { + w[ii] = (work[k - 1] + work[k]) * .5; + werr[ii] = work[k] - w[ii]; + } + /* L110: */ + } + + return 0; + + /* End of ODRRJ */ + + } /* odrrj_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrk.hpp b/external/pmrrr/include/pmrrr/lapack/odrrk.hpp new file mode 100644 index 0000000000..11e6f8a01c --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrk.hpp @@ -0,0 +1,204 @@ +/** + C++ template version of LAPACK routine dlarrk. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRK_HPP__ +#define __ODRRK_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrrk(int *n, int *iw, FloatingType *gl, + FloatingType *gu, FloatingType *d__, FloatingType *e2, FloatingType *pivmin, + FloatingType *reltol, FloatingType *w, FloatingType *werr, int *info) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType log(FloatingType); + + /* Local variables */ + int i__, it; + FloatingType mid, eps, tmp1, tmp2, left, atoli, right; + int itmax; + FloatingType rtoli, tnorm; + // extern FloatingType odmch_(char *); + int negcnt; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODRRK computes one eigenvalue of a symmetric tridiagonal */ + /* matrix T to suitable accuracy. This is an auxiliary code to be */ + /* called from DSTEMR. */ + + /* To avoid overflow, the matrix must be scaled so that its */ + /* largest element is no greater than overflow**(1/2) * */ + /* underflow**(1/4) in absolute value, and for greatest */ + /* accuracy, it should not be much smaller than that. */ + + /* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ + /* Matrix", Report CS41, Computer Science Dept., Stanford */ + /* University, July 21, 1966. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the tridiagonal matrix T. N >= 0. */ + + /* IW (input) INT */ + /* The index of the eigenvalues to be returned. */ + + /* GL (input) DOUBLE PRECISION */ + /* GU (input) DOUBLE PRECISION */ + /* An upper and a lower bound on the eigenvalue. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The n diagonal elements of the tridiagonal matrix T. */ + + /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ + /* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ + + /* PIVMIN (input) DOUBLE PRECISION */ + /* The minimum pivot allowed in the Sturm sequence for T. */ + + /* RELTOL (input) DOUBLE PRECISION */ + /* The minimum relative width of an interval. When an interval */ + /* is narrower than RELTOL times the larger (in */ + /* magnitude) endpoint, then it is considered to be */ + /* sufficiently small, i.e., converged. Note: this should */ + /* always be at least radix*machine epsilon. */ + + /* W (output) DOUBLE PRECISION */ + + /* WERR (output) DOUBLE PRECISION */ + /* The error bound on the corresponding eigenvalue approximation */ + /* in W. */ + + /* INFO (output) INT */ + /* = 0: Eigenvalue converged */ + /* = -1: Eigenvalue did NOT converge */ + + /* Internal Parameters */ + /* =================== */ + + /* FUDGE DOUBLE PRECISION, default = 2 */ + /* A "fudge factor" to widen the Gershgorin intervals. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Get machine constants */ + /* Parameter adjustments */ + --e2; + --d__; + + /* Function Body */ + eps = std::numeric_limits::epsilon(); // odmch_("P"); + /* Computing MAX */ + d__1 = fabs(*gl), d__2 = fabs(*gu); + tnorm = fmax(d__1,d__2); + rtoli = *reltol; + atoli = *pivmin * 4.; + itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2; + *info = -1; + left = *gl - tnorm * 2. * eps * *n - *pivmin * 4.; + right = *gu + tnorm * 2. * eps * *n + *pivmin * 4.; + it = 0; + L10: + + /* Check if interval converged or maximum number of iterations reached */ + + tmp1 = (d__1 = right - left, fabs(d__1)); + /* Computing MAX */ + d__1 = fabs(right), d__2 = fabs(left); + tmp2 = fmax(d__1,d__2); + /* Computing MAX */ + d__1 = fmax(atoli,*pivmin), d__2 = rtoli * tmp2; + if (tmp1 < fmax(d__1,d__2)) { + *info = 0; + goto L30; + } + if (it > itmax) { + goto L30; + } + + /* Count number of negative pivots for mid-point */ + + ++it; + mid = (left + right) * .5; + negcnt = 0; + tmp1 = d__[1] - mid; + if (fabs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + if (tmp1 <= 0.) { + ++negcnt; + } + + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + tmp1 = d__[i__] - e2[i__ - 1] / tmp1 - mid; + if (fabs(tmp1) < *pivmin) { + tmp1 = -(*pivmin); + } + if (tmp1 <= 0.) { + ++negcnt; + } + /* L20: */ + } + if (negcnt >= *iw) { + right = mid; + } else { + left = mid; + } + goto L10; + L30: + + /* Converged or maximum number of iterations reached */ + + *w = (left + right) * .5; + *werr = (d__1 = right - left, fabs(d__1)) * .5; + return 0; + + /* End of ODRRK */ + + } /* odrrk_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrr.hpp b/external/pmrrr/include/pmrrr/lapack/odrrr.hpp new file mode 100644 index 0000000000..ce9f0d287b --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrr.hpp @@ -0,0 +1,190 @@ +/** + C++ template version of LAPACK routine dlarrr. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRR_HPP__ +#define __ODRRR_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#define TRUE_ (1) +#define FALSE_ (0) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrrr(int *n, FloatingType *d__, FloatingType *e, + int *info) + { + /* System generated locals */ + int i__1; + FloatingType d__1; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + int i__; + FloatingType eps, tmp, tmp2, rmin; + // extern FloatingType odmch_(char *); + FloatingType offdig, safmin; + int yesrel; + FloatingType smlnum, offdig2; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + + /* Purpose */ + /* ======= */ + + /* Perform tests to decide whether the symmetric tridiagonal matrix T */ + /* warrants expensive computations which guarantee high relative accuracy */ + /* in the eigenvalues. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix. N > 0. */ + + /* D (input) DOUBLE PRECISION array, dimension (N) */ + /* The N diagonal elements of the tridiagonal matrix T. */ + + /* E (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the first (N-1) entries contain the subdiagonal */ + /* elements of the tridiagonal matrix T; E(N) is set to ZERO. */ + + /* INFO (output) INT */ + /* INFO = 0(default) : the matrix warrants computations preserving */ + /* relative accuracy. */ + /* INFO = 1 : the matrix warrants computations guaranteeing */ + /* only absolute accuracy. */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* As a default, do NOT go for relative-accuracy preserving computations. */ + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 1; + safmin = std::numeric_limits::min(); // safmin = odmch_("Safe minimum"); + eps = std::numeric_limits::epsilon(); // eps = odmch_("Precision"); + smlnum = safmin / eps; + rmin = sqrt(smlnum); + /* Tests for relative accuracy */ + + /* Test for scaled diagonal dominance */ + /* Scale the diagonal entries to one and check whether the sum of the */ + /* off-diagonals is less than one */ + + /* The sdd relative error bounds have a 1/(1- 2*x) factor in them, */ + /* x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative */ + /* accuracy is promised. In the notation of the code fragment below, */ + /* 1/(1 - (OFFDIG + OFFDIG2)) is the condition number. */ + /* We don't think it is worth going into "sdd mode" unless the relative */ + /* condition number is reasonable, not 1/macheps. */ + /* The threshold should be compatible with other thresholds used in the */ + /* code. We set OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds */ + /* to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 */ + /* instead of the current OFFDIG + OFFDIG2 < 1 */ + + yesrel = TRUE_; + offdig = 0.; + tmp = sqrt((fabs(d__[1]))); + if (tmp < rmin) { + yesrel = FALSE_; + } + if (! yesrel) { + goto L11; + } + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + tmp2 = sqrt((d__1 = d__[i__], fabs(d__1))); + if (tmp2 < rmin) { + yesrel = FALSE_; + } + if (! yesrel) { + goto L11; + } + offdig2 = (d__1 = e[i__ - 1], fabs(d__1)) / (tmp * tmp2); + if (offdig + offdig2 >= .999) { + yesrel = FALSE_; + } + if (! yesrel) { + goto L11; + } + tmp = tmp2; + offdig = offdig2; + /* L10: */ + } + L11: + if (yesrel) { + *info = 0; + return 0; + } else { + } + + + /* *** MORE TO BE IMPLEMENTED *** */ + + + /* Test if the lower bidiagonal matrix L from T = L D L^T */ + /* (zero shift facto) is well conditioned */ + + + /* Test if the upper bidiagonal matrix U from T = U D U^T */ + /* (zero shift facto) is well conditioned. */ + /* In this case, the matrix needs to be flipped and, at the end */ + /* of the eigenvector computation, the flip needs to be applied */ + /* to the computed eigenvectors (and the support) */ + + + return 0; + + /* END OF ODRRR */ + + } /* odrrr_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odrrv.hpp b/external/pmrrr/include/pmrrr/lapack/odrrv.hpp new file mode 100644 index 0000000000..63695dc78c --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odrrv.hpp @@ -0,0 +1,1012 @@ +/** + C++ template version of LAPACK routine dlarrv. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRRV_HPP__ +#define __ODRRV_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define imax(a,b) ( (a) > (b) ? (a) : (b) ) +#define imin(a,b) ( (a) < (b) ? (a) : (b) ) +#define TRUE_ (1) +#define FALSE_ (0) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odrrv(int *n, FloatingType *vl, FloatingType *vu, + FloatingType *d__, FloatingType *l, FloatingType *pivmin, int *isplit, + int *m, int *dol, int *dou, FloatingType *minrgp, + FloatingType *rtol1, FloatingType *rtol2, FloatingType *w, FloatingType *werr, + FloatingType *wgap, int *iblock, int *indexw, FloatingType *gers, + FloatingType *z__, int *ldz, int *isuppz, FloatingType *work, + int *iwork, int *info) + { + + /* Table of constant values */ + //TODO + static FloatingType c_b5 = 0.; + static int c__1 = 1; + static int c__2 = 2; + + /* System generated locals */ + int z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + FloatingType d__1, d__2; + int L__1; + + /* Builtin functions */ + // FloatingType log(FloatingType); + + /* Local variables */ + int minwsize, i__, j, k, p, q, miniwsize, ii; + FloatingType gl; + int im, in; + FloatingType gu, gap, eps, tau, tol, tmp; + int zto; + FloatingType ztz; + int iend, jblk; + FloatingType lgap; + int done; + FloatingType rgap, left; + int wend, iter; + FloatingType bstw; + int itmp1; + // extern /* Subroutine */ int odscl_(int *, FloatingType *, FloatingType *, + // int *); + int indld; + FloatingType fudge; + int idone; + FloatingType sigma; + int iinfo, iindr; + FloatingType resid; + int eskip; + FloatingType right; + // extern /* Subroutine */ int odcpy_(int *, FloatingType *, int *, + // FloatingType *, int *); + int nclus, zfrom; + FloatingType rqtol; + int iindc1, iindc2; + //extern /* Subroutine */ int odr1v_(int *, int *, int *, + /*FloatingType *, FloatingType *, FloatingType *, FloatingType *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *, int *, + int *, FloatingType *, FloatingType *, int *, int *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *);*/ + int stp2ii; + FloatingType lambda; + // extern FloatingType odmch_(char *); + int ibegin, indeig; + int needbs; + int indlld; + FloatingType sgndef, mingma; + //extern /* Subroutine */ int odrrb_(int *, FloatingType *, FloatingType *, + /* int *, int *, FloatingType *, FloatingType *, int *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *, int *, + FloatingType *, FloatingType *, int *, int *);*/ + int oldien, oldncl, wbegin; + FloatingType spdiam; + int negcnt; + //extern /* Subroutine */ int odrrf_(int *, FloatingType *, FloatingType *, + /* FloatingType *, int *, int *, FloatingType *, FloatingType *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *, + FloatingType *, int *);*/ + int oldcls; + FloatingType savgap; + int ndepth; + FloatingType ssigma; + //extern /* Subroutine */ int odset_(char *, int *, int *, + //FloatingType *, FloatingType *, FloatingType *, int *); + int usedbs; + int iindwk, offset; + FloatingType gaptol; + int newcls, oldfst, indwrk, windex, oldlst; + int usedrq; + int newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl; + FloatingType bstres; + int newsiz, zusedu, zusedw; + FloatingType nrminv, rqcorr; + int tryrqc; + int isupmx; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODRRV computes the eigenvectors of the tridiagonal matrix */ + /* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */ + /* The input eigenvalues should have been computed by DLARRE. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The order of the matrix. N >= 0. */ + + /* VL (input) DOUBLE PRECISION */ + /* VU (input) DOUBLE PRECISION */ + /* Lower and upper bounds of the interval that contains the desired */ + /* eigenvalues. VL < VU. Needed to compute gaps on the left or right */ + /* end of the extremal eigenvalues in the desired RANGE. */ + + /* D (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the N diagonal elements of the diagonal matrix D. */ + /* On exit, D may be overwritten. */ + + /* L (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the (N-1) subdiagonal elements of the unit */ + /* bidiagonal matrix L are in elements 1 to N-1 of L */ + /* (if the matrix is not splitted.) At the end of each block */ + /* is stored the corresponding shift as given by DLARRE. */ + /* On exit, L is overwritten. */ + + /* PIVMIN (in) DOUBLE PRECISION */ + /* The minimum pivot allowed in the Sturm sequence. */ + + /* ISPLIT (input) INT array, dimension (N) */ + /* The splitting points, at which T breaks up into blocks. */ + /* The first block consists of rows/columns 1 to */ + /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ + /* through ISPLIT( 2 ), etc. */ + + /* M (input) INT */ + /* The total number of input eigenvalues. 0 <= M <= N. */ + + /* DOL (input) INT */ + /* DOU (input) INT */ + /* If the user wants to compute only selected eigenvectors from all */ + /* the eigenvalues supplied, he can specify an index range DOL:DOU. */ + /* Or else the setting DOL=1, DOU=M should be applied. */ + /* Note that DOL and DOU refer to the order in which the eigenvalues */ + /* are stored in W. */ + /* If the user wants to compute only selected eigenpairs, then */ + /* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */ + /* computed eigenvectors. All other columns of Z are set to zero. */ + + /* MINRGP (input) DOUBLE PRECISION */ + + /* RTOL1 (input) DOUBLE PRECISION */ + /* RTOL2 (input) DOUBLE PRECISION */ + /* Parameters for bisection. */ + /* An interval [LEFT,RIGHT] has converged if */ + /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ + + /* W (input/output) DOUBLE PRECISION array, dimension (N) */ + /* The first M elements of W contain the APPROXIMATE eigenvalues for */ + /* which eigenvectors are to be computed. The eigenvalues */ + /* should be grouped by split-off block and ordered from */ + /* smallest to largest within the block ( The output array */ + /* W from DLARRE is expected here ). Furthermore, they are with */ + /* respect to the shift of the corresponding root representation */ + /* for their block. On exit, W holds the eigenvalues of the */ + /* UNshifted matrix. */ + + /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ + /* The first M elements contain the semiwidth of the uncertainty */ + /* interval of the corresponding eigenvalue in W */ + + /* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */ + /* The separation from the right neighbor eigenvalue in W. */ + + /* IBLOCK (input) INT array, dimension (N) */ + /* The indices of the blocks (submatrices) associated with the */ + /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ + /* W(i) belongs to the first block from the top, =2 if W(i) */ + /* belongs to the second block, etc. */ + + /* INDEXW (input) INT array, dimension (N) */ + /* The indices of the eigenvalues within each block (submatrix); */ + /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ + /* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */ + + /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ + /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ + /* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */ + /* be computed from the original UNshifted matrix. */ + + /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ + /* If INFO = 0, the first M columns of Z contain the */ + /* orthonormal eigenvectors of the matrix T */ + /* corresponding to the input eigenvalues, with the i-th */ + /* column of Z holding the eigenvector associated with W(i). */ + /* Note: the user must ensure that at least max(1,M) columns are */ + /* supplied in the array Z. */ + + /* LDZ (input) INT */ + /* The leading dimension of the array Z. LDZ >= 1, and if */ + /* JOBZ = 'V', LDZ >= max(1,N). */ + + /* ISUPPZ (output) INT array, dimension ( 2*max(1,M) ) */ + /* The support of the eigenvectors in Z, i.e., the indices */ + /* indicating the nonzero elements in Z. The I-th eigenvector */ + /* is nonzero only in elements ISUPPZ( 2*I-1 ) through */ + /* ISUPPZ( 2*I ). */ + + /* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */ + + /* IWORK (workspace) INT array, dimension (7*N) */ + + /* INFO (output) INT */ + /* = 0: successful exit */ + + /* > 0: A problem occured in ODRRV. */ + /* < 0: One of the called subroutines signaled an internal problem. */ + /* Needs inspection of the corresponding parameter IINFO */ + /* for further information. */ + + /* =-1: Problem in ODRRB when refining a child's eigenvalues. */ + /* =-2: Problem in ODRRF when computing the RRR of a child. */ + /* When a child is inside a tight cluster, it can be difficult */ + /* to find an RRR. A partial remedy from the user's point of */ + /* view is to make the parameter MINRGP smaller and recompile. */ + /* However, as the orthogonality of the computed vectors is */ + /* proportional to 1/MINRGP, the user should be aware that */ + /* he might be trading in precision when he decreases MINRGP. */ + /* =-3: Problem in ODRRB when refining a single eigenvalue */ + /* after the Rayleigh correction was rejected. */ + /* = 5: The Rayleigh Quotient Iteration failed to converge to */ + /* full accuracy in MAXITR steps. */ + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + /* .. */ + /* The first N entries of WORK are reserved for the eigenvalues */ + /* Parameter adjustments */ + --d__; + --l; + --isplit; + --w; + --werr; + --wgap; + --iblock; + --indexw; + --gers; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --isuppz; + --work; + --iwork; + + /* Function Body */ + indld = *n + 1; + indlld = (*n << 1) + 1; + indwrk = *n * 3 + 1; + minwsize = *n * 12; + i__1 = minwsize; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; + /* L5: */ + } + /* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */ + /* factorization used to compute the FP vector */ + iindr = 0; + /* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */ + /* layer and the one above. */ + iindc1 = *n; + iindc2 = *n << 1; + iindwk = *n * 3 + 1; + miniwsize = *n * 7; + i__1 = miniwsize; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; + /* L10: */ + } + zusedl = 1; + if (*dol > 1) { + /* Set lower bound for use of Z */ + zusedl = *dol - 1; + } + zusedu = *m; + if (*dou < *m) { + /* Set lower bound for use of Z */ + zusedu = *dou + 1; + } + /* The width of the part of Z that is used */ + zusedw = zusedu - zusedl + 1; + odset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz); + eps = std::numeric_limits::epsilon(); // eps = odmch_("Precision"); + rqtol = eps * 2.; + + /* Set expert flags for standard code. */ + tryrqc = TRUE_; + if (*dol == 1 && *dou == *m) { + } else { + /* Only selected eigenpairs are computed. Since the other evalues */ + /* are not refined by RQ iteration, bisection has to compute to full */ + /* accuracy. */ + *rtol1 = eps * 4.; + *rtol2 = eps * 4.; + } + /* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */ + /* desired eigenvalues. The support of the nonzero eigenvector */ + /* entries is contained in the interval IBEGIN:IEND. */ + /* Remark that if k eigenpairs are desired, then the eigenvectors */ + /* are stored in k contiguous columns of Z. */ + /* DONE is the number of eigenvectors already computed */ + done = 0; + ibegin = 1; + wbegin = 1; + i__1 = iblock[*m]; + for (jblk = 1; jblk <= i__1; ++jblk) { + iend = isplit[jblk]; + sigma = l[iend]; + /* Find the eigenvectors of the submatrix indexed IBEGIN */ + /* through IEND. */ + wend = wbegin - 1; + L15: + if (wend < *m) { + if (iblock[wend + 1] == jblk) { + ++wend; + goto L15; + } + } + if (wend < wbegin) { + ibegin = iend + 1; + goto L170; + } else if (wend < *dol || wbegin > *dou) { + ibegin = iend + 1; + wbegin = wend + 1; + goto L170; + } + /* Find local spectral diameter of the block */ + gl = gers[(ibegin << 1) - 1]; + gu = gers[ibegin * 2]; + i__2 = iend; + for (i__ = ibegin + 1; i__ <= i__2; ++i__) { + /* Computing MIN */ + d__1 = gers[(i__ << 1) - 1]; + gl = fmin(d__1,gl); + /* Computing MAX */ + d__1 = gers[i__ * 2]; + gu = fmax(d__1,gu); + /* L20: */ + } + spdiam = gu - gl; + /* OLDIEN is the last index of the previous block */ + oldien = ibegin - 1; + /* Calculate the size of the current block */ + in = iend - ibegin + 1; + /* The number of eigenvalues in the current block */ + im = wend - wbegin + 1; + /* This is for a 1x1 block */ + if (ibegin == iend) { + ++done; + z__[ibegin + wbegin * z_dim1] = 1.; + isuppz[(wbegin << 1) - 1] = ibegin; + isuppz[wbegin * 2] = ibegin; + w[wbegin] += sigma; + work[wbegin] = w[wbegin]; + ibegin = iend + 1; + ++wbegin; + goto L170; + } + /* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */ + /* Note that these can be approximations, in this case, the corresp. */ + /* entries of WERR give the size of the uncertainty interval. */ + /* The eigenvalue approximations will be refined when necessary as */ + /* high relative accuracy is required for the computation of the */ + /* corresponding eigenvectors. */ + blas::odcpy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1); + /* We store in W the eigenvalue approximations w.r.t. the original */ + /* matrix T. */ + i__2 = im; + for (i__ = 1; i__ <= i__2; ++i__) { + w[wbegin + i__ - 1] += sigma; + /* L30: */ + } + /* NDEPTH is the current depth of the representation tree */ + ndepth = 0; + /* PARITY is either 1 or 0 */ + parity = 1; + /* NCLUS is the number of clusters for the next level of the */ + /* representation tree, we start with NCLUS = 1 for the root */ + nclus = 1; + iwork[iindc1 + 1] = 1; + iwork[iindc1 + 2] = im; + /* IDONE is the number of eigenvectors already computed in the current */ + /* block */ + idone = 0; + /* loop while( IDONE.LT.IM ) */ + /* generate the representation tree for the current block and */ + /* compute the eigenvectors */ + L40: + if (idone < im) { + /* This is a crude protection against infinitely deep trees */ + if (ndepth > *m) { + *info = -2; + return 0; + } + /* breadth first processing of the current level of the representation */ + /* tree: OLDNCL = number of clusters on current level */ + oldncl = nclus; + /* reset NCLUS to count the number of child clusters */ + nclus = 0; + + parity = 1 - parity; + if (parity == 0) { + oldcls = iindc1; + newcls = iindc2; + } else { + oldcls = iindc2; + newcls = iindc1; + } + /* Process the clusters on the current level */ + i__2 = oldncl; + for (i__ = 1; i__ <= i__2; ++i__) { + j = oldcls + (i__ << 1); + /* OLDFST, OLDLST = first, last index of current cluster. */ + /* cluster indices start with 1 and are relative */ + /* to WBEGIN when accessing W, WGAP, WERR, Z */ + oldfst = iwork[j - 1]; + oldlst = iwork[j]; + if (ndepth > 0) { + /* Retrieve relatively robust representation (RRR) of cluster */ + /* that has been computed at the previous level */ + /* The RRR is stored in Z and overwritten once the eigenvectors */ + /* have been computed or when the cluster is refined */ + if (*dol == 1 && *dou == *m) { + /* Get representation from location of the leftmost evalue */ + /* of the cluster */ + j = wbegin + oldfst - 1; + } else { + if (wbegin + oldfst - 1 < *dol) { + /* Get representation from the left end of Z array */ + j = *dol - 1; + } else if (wbegin + oldfst - 1 > *dou) { + /* Get representation from the right end of Z array */ + j = *dou; + } else { + j = wbegin + oldfst - 1; + } + } + blas::odcpy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin] + , &c__1); + i__3 = in - 1; + blas::odcpy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[ + ibegin], &c__1); + sigma = z__[iend + (j + 1) * z_dim1]; + /* Set the corresponding entries in Z to zero */ + odset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j + * z_dim1], ldz); + } + /* Compute DL and DLL of current RRR */ + i__3 = iend - 1; + for (j = ibegin; j <= i__3; ++j) { + tmp = d__[j] * l[j]; + work[indld - 1 + j] = tmp; + work[indlld - 1 + j] = tmp * l[j]; + /* L50: */ + } + if (ndepth > 0) { + /* P and Q are index of the first and last eigenvalue to compute */ + /* within the current block */ + p = indexw[wbegin - 1 + oldfst]; + q = indexw[wbegin - 1 + oldlst]; + /* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */ + /* thru' Q-OFFSET elements of these arrays are to be used. */ + /* OFFSET = P-OLDFST */ + offset = indexw[wbegin] - 1; + /* perform limited bisection (if necessary) to get approximate */ + /* eigenvalues to the precision needed. */ + odrrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, + &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[ + wbegin], &werr[wbegin], &work[indwrk], &iwork[ + iindwk], pivmin, &spdiam, &in, &iinfo); + if (iinfo != 0) { + *info = -1; + return 0; + } + /* We also recompute the extremal gaps. W holds all eigenvalues */ + /* of the unshifted matrix and must be used for computation */ + /* of WGAP, the entries of WORK might stem from RRRs with */ + /* different shifts. The gaps from WBEGIN-1+OLDFST to */ + /* WBEGIN-1+OLDLST are correctly computed in ODRRB. */ + /* However, we only allow the gaps to become greater since */ + /* this is what should happen when we decrease WERR */ + if (oldfst > 1) { + /* Computing MAX */ + d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + + oldfst - 1] - werr[wbegin + oldfst - 1] - w[ + wbegin + oldfst - 2] - werr[wbegin + oldfst - + 2]; + wgap[wbegin + oldfst - 2] = fmax(d__1,d__2); + } + if (wbegin + oldlst - 1 < wend) { + /* Computing MAX */ + d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + + oldlst] - werr[wbegin + oldlst] - w[wbegin + + oldlst - 1] - werr[wbegin + oldlst - 1]; + wgap[wbegin + oldlst - 1] = fmax(d__1,d__2); + } + /* Each time the eigenvalues in WORK get refined, we store */ + /* the newly found approximation with all shifts applied in W */ + i__3 = oldlst; + for (j = oldfst; j <= i__3; ++j) { + w[wbegin + j - 1] = work[wbegin + j - 1] + sigma; + /* L53: */ + } + } + /* Process the current node. */ + newfst = oldfst; + i__3 = oldlst; + for (j = oldfst; j <= i__3; ++j) { + if (j == oldlst) { + /* we are at the right end of the cluster, this is also the */ + /* boundary of the child cluster */ + newlst = j; + } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[ + wbegin + j - 1], fabs(d__1))) { + /* the right relative gap is big enough, the child cluster */ + /* (NEWFST,..,NEWLST) is well separated from the following */ + newlst = j; + } else { + /* inside a child cluster, the relative gap is not */ + /* big enough. */ + goto L140; + } + /* Compute size of child cluster found */ + newsiz = newlst - newfst + 1; + /* NEWFTT is the place in Z where the new RRR or the computed */ + /* eigenvector is to be stored */ + if (*dol == 1 && *dou == *m) { + /* Store representation at location of the leftmost evalue */ + /* of the cluster */ + newftt = wbegin + newfst - 1; + } else { + if (wbegin + newfst - 1 < *dol) { + /* Store representation at the left end of Z array */ + newftt = *dol - 1; + } else if (wbegin + newfst - 1 > *dou) { + /* Store representation at the right end of Z array */ + newftt = *dou; + } else { + newftt = wbegin + newfst - 1; + } + } + if (newsiz > 1) { + + /* Current child is not a singleton but a cluster. */ + /* Compute and store new representation of child. */ + + + /* Compute left and right cluster gap. */ + + /* LGAP and RGAP are not computed from WORK because */ + /* the eigenvalue approximations may stem from RRRs */ + /* different shifts. However, W hold all eigenvalues */ + /* of the unshifted matrix. Still, the entries in WGAP */ + /* have to be computed from WORK since the entries */ + /* in W might be of the same order so that gaps are not */ + /* exhibited correctly for very close eigenvalues. */ + if (newfst == 1) { + /* Computing MAX */ + d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl; + lgap = fmax(d__1,d__2); + } else { + lgap = wgap[wbegin + newfst - 2]; + } + rgap = wgap[wbegin + newlst - 1]; + + /* Compute left- and rightmost eigenvalue of child */ + /* to high precision in order to shift as close */ + /* as possible and obtain as large relative gaps */ + /* as possible */ + + for (k = 1; k <= 2; ++k) { + if (k == 1) { + p = indexw[wbegin - 1 + newfst]; + } else { + p = indexw[wbegin - 1 + newlst]; + } + offset = indexw[wbegin] - 1; + odrrb(&in, &d__[ibegin], &work[indlld + ibegin + - 1], &p, &p, &rqtol, &rqtol, &offset, & + work[wbegin], &wgap[wbegin], &werr[wbegin] + , &work[indwrk], &iwork[iindwk], pivmin, & + spdiam, &in, &iinfo); + /* L55: */ + } + + if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 + > *dou) { + /* if the cluster contains no desired eigenvalues */ + /* skip the computation of that branch of the rep. tree */ + + /* We could skip before the refinement of the extremal */ + /* eigenvalues of the child, but then the representation */ + /* tree could be different from the one when nothing is */ + /* skipped. For this reason we skip at this place. */ + idone = idone + newlst - newfst + 1; + goto L139; + } + + /* Compute RRR of child cluster. */ + /* Note that the new RRR is stored in Z */ + + /* ODRRF needs LWORK = 2*N */ + odrrf(&in, &d__[ibegin], &l[ibegin], &work[indld + + ibegin - 1], &newfst, &newlst, &work[wbegin], + &wgap[wbegin], &werr[wbegin], &spdiam, &lgap, + &rgap, pivmin, &tau, &z__[ibegin + newftt * + z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], + &work[indwrk], &iinfo); + if (iinfo == 0) { + /* a new RRR for the cluster was found by ODRRF */ + /* update shift and store it */ + ssigma = sigma + tau; + z__[iend + (newftt + 1) * z_dim1] = ssigma; + /* WORK() are the midpoints and WERR() the semi-width */ + /* Note that the entries in W are unchanged. */ + i__4 = newlst; + for (k = newfst; k <= i__4; ++k) { + fudge = eps * 3. * (d__1 = work[wbegin + k - + 1], fabs(d__1)); + work[wbegin + k - 1] -= tau; + fudge += eps * 4. * (d__1 = work[wbegin + k - + 1], fabs(d__1)); + /* Fudge errors */ + werr[wbegin + k - 1] += fudge; + /* Gaps are not fudged. Provided that WERR is small */ + /* when eigenvalues are close, a zero gap indicates */ + /* that a new representation is needed for resolving */ + /* the cluster. A fudge could lead to a wrong decision */ + /* of judging eigenvalues 'separated' which in */ + /* reality are not. This could have a negative impact */ + /* on the orthogonality of the computed eigenvectors. */ + /* L116: */ + } + ++nclus; + k = newcls + (nclus << 1); + iwork[k - 1] = newfst; + iwork[k] = newlst; + } else { + *info = -2; + return 0; + } + } else { + + /* Compute eigenvector of singleton */ + + iter = 0; + + tol = log((FloatingType) in) * 4. * eps; + + k = newfst; + windex = wbegin + k - 1; + /* Computing MAX */ + i__4 = windex - 1; + windmn = imax(i__4,1); + /* Computing MIN */ + i__4 = windex + 1; + windpl = imin(i__4,*m); + lambda = work[windex]; + ++done; + /* Check if eigenvector computation is to be skipped */ + if (windex < *dol || windex > *dou) { + eskip = TRUE_; + goto L125; + } else { + eskip = FALSE_; + } + left = work[windex] - werr[windex]; + right = work[windex] + werr[windex]; + indeig = indexw[windex]; + /* Note that since we compute the eigenpairs for a child, */ + /* all eigenvalue approximations are w.r.t the same shift. */ + /* In this case, the entries in WORK should be used for */ + /* computing the gaps since they exhibit even very small */ + /* differences in the eigenvalues, as opposed to the */ + /* entries in W which might "look" the same. */ + if (k == 1) { + /* In the case RANGE='I' and with not much initial */ + /* accuracy in LAMBDA and VL, the formula */ + /* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */ + /* can lead to an overestimation of the left gap and */ + /* thus to inadequately early RQI 'convergence'. */ + /* Prevent this by forcing a small left gap. */ + /* Computing MAX */ + d__1 = fabs(left), d__2 = fabs(right); + lgap = eps * fmax(d__1,d__2); + } else { + lgap = wgap[windmn]; + } + if (k == im) { + /* In the case RANGE='I' and with not much initial */ + /* accuracy in LAMBDA and VU, the formula */ + /* can lead to an overestimation of the right gap and */ + /* thus to inadequately early RQI 'convergence'. */ + /* Prevent this by forcing a small right gap. */ + /* Computing MAX */ + d__1 = fabs(left), d__2 = fabs(right); + rgap = eps * fmax(d__1,d__2); + } else { + rgap = wgap[windex]; + } + gap = fmin(lgap,rgap); + if (k == 1 || k == im) { + /* The eigenvector support can become wrong */ + /* because significant entries could be cut off due to a */ + /* large GAPTOL parameter in LAR1V. Prevent this. */ + gaptol = 0.; + } else { + gaptol = gap * eps; + } + isupmn = in; + isupmx = 1; + /* Update WGAP so that it holds the minimum gap */ + /* to the left or the right. This is crucial in the */ + /* case where bisection is used to ensure that the */ + /* eigenvalue is refined up to the required precision. */ + /* The correct value is restored afterwards. */ + savgap = wgap[windex]; + wgap[windex] = gap; + /* We want to use the Rayleigh Quotient Correction */ + /* as often as possible since it converges quadratically */ + /* when we are close enough to the desired eigenvalue. */ + /* However, the Rayleigh Quotient can have the wrong sign */ + /* and lead us away from the desired eigenvalue. In this */ + /* case, the best we can do is to use bisection. */ + usedbs = FALSE_; + usedrq = FALSE_; + /* Bisection is initially turned off unless it is forced */ + needbs = ! tryrqc; + L120: + /* Check if bisection should be used to refine eigenvalue */ + if (needbs) { + /* Take the bisection as new iterate */ + usedbs = TRUE_; + itmp1 = iwork[iindr + windex]; + offset = indexw[wbegin] - 1; + d__1 = eps * 2.; + odrrb(&in, &d__[ibegin], &work[indlld + ibegin + - 1], &indeig, &indeig, &c_b5, &d__1, & + offset, &work[wbegin], &wgap[wbegin], & + werr[wbegin], &work[indwrk], &iwork[ + iindwk], pivmin, &spdiam, &itmp1, &iinfo); + if (iinfo != 0) { + *info = -3; + return 0; + } + lambda = work[windex]; + /* Reset twist index from inaccurate LAMBDA to */ + /* force computation of true MINGMA */ + iwork[iindr + windex] = 0; + } + /* Given LAMBDA, compute the eigenvector. */ + L__1 = ! usedbs; + odr1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[ + ibegin], &work[indld + ibegin - 1], &work[ + indlld + ibegin - 1], pivmin, &gaptol, &z__[ + ibegin + windex * z_dim1], &L__1, &negcnt, & + ztz, &mingma, &iwork[iindr + windex], &isuppz[ + (windex << 1) - 1], &nrminv, &resid, &rqcorr, + &work[indwrk]); + if (iter == 0) { + bstres = resid; + bstw = lambda; + } else if (resid < bstres) { + bstres = resid; + bstw = lambda; + } + /* Computing MIN */ + i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1]; + isupmn = imin(i__4,i__5); + /* Computing MAX */ + i__4 = isupmx, i__5 = isuppz[windex * 2]; + isupmx = imax(i__4,i__5); + ++iter; + /* sin alpha <= |resid|/gap */ + /* Note that both the residual and the gap are */ + /* proportional to the matrix, so ||T|| doesn't play */ + /* a role in the quotient */ + + /* Convergence test for Rayleigh-Quotient iteration */ + /* (omitted when Bisection has been used) */ + + if (resid > tol * gap && fabs(rqcorr) > rqtol * fabs( + lambda) && ! usedbs) { + /* We need to check that the RQCORR update doesn't */ + /* move the eigenvalue away from the desired one and */ + /* towards a neighbor. -> protection with bisection */ + if (indeig <= negcnt) { + /* The wanted eigenvalue lies to the left */ + sgndef = -1.; + } else { + /* The wanted eigenvalue lies to the right */ + sgndef = 1.; + } + /* We only use the RQCORR if it improves the */ + /* the iterate reasonably. */ + if (rqcorr * sgndef >= 0. && lambda + rqcorr <= + right && lambda + rqcorr >= left) { + usedrq = TRUE_; + /* Store new midpoint of bisection interval in WORK */ + if (sgndef == 1.) { + /* The current LAMBDA is on the left of the true */ + /* eigenvalue */ + left = lambda; + /* We prefer to assume that the error estimate */ + /* is correct. We could make the interval not */ + /* as a bracket but to be modified if the RQCORR */ + /* chooses to. In this case, the RIGHT side should */ + /* be modified as follows: */ + /* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */ + } else { + /* The current LAMBDA is on the right of the true */ + /* eigenvalue */ + right = lambda; + /* See comment about assuming the error estimate is */ + /* correct above. */ + /* LEFT = MIN(LEFT, LAMBDA + RQCORR) */ + } + work[windex] = (right + left) * .5; + /* Take RQCORR since it has the correct sign and */ + /* improves the iterate reasonably */ + lambda += rqcorr; + /* Update width of error interval */ + werr[windex] = (right - left) * .5; + } else { + needbs = TRUE_; + } + if (right - left < rqtol * fabs(lambda)) { + /* The eigenvalue is computed to bisection accuracy */ + /* compute eigenvector and stop */ + usedbs = TRUE_; + goto L120; + } else if (iter < 10) { + goto L120; + } else if (iter == 10) { + needbs = TRUE_; + goto L120; + } else { + *info = 5; + return 0; + } + } else { + stp2ii = FALSE_; + if (usedrq && usedbs && bstres <= resid) { + lambda = bstw; + stp2ii = TRUE_; + } + if (stp2ii) { + /* improve error angle by second step */ + L__1 = ! usedbs; + odr1v(&in, &c__1, &in, &lambda, &d__[ibegin] + , &l[ibegin], &work[indld + ibegin - + 1], &work[indlld + ibegin - 1], + pivmin, &gaptol, &z__[ibegin + windex + * z_dim1], &L__1, &negcnt, &ztz, & + mingma, &iwork[iindr + windex], & + isuppz[(windex << 1) - 1], &nrminv, & + resid, &rqcorr, &work[indwrk]); + } + work[windex] = lambda; + } + + /* Compute FP-vector support w.r.t. whole matrix */ + + isuppz[(windex << 1) - 1] += oldien; + isuppz[windex * 2] += oldien; + zfrom = isuppz[(windex << 1) - 1]; + zto = isuppz[windex * 2]; + isupmn += oldien; + isupmx += oldien; + /* Ensure vector is ok if support in the RQI has changed */ + if (isupmn < zfrom) { + i__4 = zfrom - 1; + for (ii = isupmn; ii <= i__4; ++ii) { + z__[ii + windex * z_dim1] = 0.; + /* L122: */ + } + } + if (isupmx > zto) { + i__4 = isupmx; + for (ii = zto + 1; ii <= i__4; ++ii) { + z__[ii + windex * z_dim1] = 0.; + /* L123: */ + } + } + i__4 = zto - zfrom + 1; + blas::odscal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], + &c__1); + L125: + /* Update W */ + w[windex] = lambda + sigma; + /* Recompute the gaps on the left and right */ + /* But only allow them to become larger and not */ + /* smaller (which can only happen through "bad" */ + /* cancellation and doesn't reflect the theory */ + /* where the initial gaps are underestimated due */ + /* to WERR being too crude.) */ + if (! eskip) { + if (k > 1) { + /* Computing MAX */ + d__1 = wgap[windmn], d__2 = w[windex] - werr[ + windex] - w[windmn] - werr[windmn]; + wgap[windmn] = fmax(d__1,d__2); + } + if (windex < wend) { + /* Computing MAX */ + d__1 = savgap, d__2 = w[windpl] - werr[windpl] + - w[windex] - werr[windex]; + wgap[windex] = fmax(d__1,d__2); + } + } + ++idone; + } + /* here ends the code for the current child */ + + L139: + /* Proceed to any remaining child nodes */ + newfst = j + 1; + L140: + ; + } + /* L150: */ + } + ++ndepth; + goto L40; + } + ibegin = iend + 1; + wbegin = wend + 1; + L170: + ; + } + + return 0; + + /* End of ODRRV */ + + } /* odrrv_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odruv.hpp b/external/pmrrr/include/pmrrr/lapack/odruv.hpp new file mode 100644 index 0000000000..b461dd4ec2 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odruv.hpp @@ -0,0 +1,205 @@ +/** + C++ template version of LAPACK routine dlaruv. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODRUV_HPP__ +#define __ODRUV_HPP__ + +#include +#include +#include +#include +#include +#include + +#define imax(a,b) ( (a) > (b) ? (a) : (b) ) +#define imin(a,b) ( (a) < (b) ? (a) : (b) ) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odruv(int *iseed, int *n, FloatingType *x) + { + /* Initialized data */ + + static int mm[512] /* was [128][4] */ = { 494,2637,255,2008,1253, + 3344,4084,1739,3143,3468,688,1657,1238,3166,1292,3422,1270,2016, + 154,2862,697,1706,491,931,1444,444,3577,3944,2184,1661,3482,657, + 3023,3618,1267,1828,164,3798,3087,2400,2870,3876,1905,1593,1797, + 1234,3460,328,2861,1950,617,2070,3331,769,1558,2412,2800,189,287, + 2045,1227,2838,209,2770,3654,3993,192,2253,3491,2889,2857,2094, + 1818,688,1407,634,3231,815,3524,1914,516,164,303,2144,3480,119, + 3357,837,2826,2332,2089,3780,1700,3712,150,2000,3375,1621,3090, + 3765,1149,3146,33,3082,2741,359,3316,1749,185,2784,2202,2199,1364, + 1244,2020,3160,2785,2772,1217,1822,1245,2252,3904,2774,997,2573, + 1148,545,322,789,1440,752,2859,123,1848,643,2405,2638,2344,46, + 3814,913,3649,339,3808,822,2832,3078,3633,2970,637,2249,2081,4019, + 1478,242,481,2075,4058,622,3376,812,234,641,4005,1122,3135,2640, + 2302,40,1832,2247,2034,2637,1287,1691,496,1597,2394,2584,1843,336, + 1472,2407,433,2096,1761,2810,566,442,41,1238,1086,603,840,3168, + 1499,1084,3438,2408,1589,2391,288,26,512,1456,171,1677,2657,2270, + 2587,2961,1970,1817,676,1410,3723,2803,3185,184,663,499,3784,1631, + 1925,3912,1398,1349,1441,2224,2411,1907,3192,2786,382,37,759,2948, + 1862,3802,2423,2051,2295,1332,1832,2405,3638,3661,327,3660,716, + 1842,3987,1368,1848,2366,2508,3754,1766,3572,2893,307,1297,3966, + 758,2598,3406,2922,1038,2934,2091,2451,1580,1958,2055,1507,1078, + 3273,17,854,2916,3971,2889,3831,2621,1541,893,736,3992,787,2125, + 2364,2460,257,1574,3912,1216,3248,3401,2124,2762,149,2245,166,466, + 4018,1399,190,2879,153,2320,18,712,2159,2318,2091,3443,1510,449, + 1956,2201,3137,3399,1321,2271,3667,2703,629,2365,2431,1113,3922, + 2554,184,2099,3228,4012,1921,3452,3901,572,3309,3171,817,3039, + 1696,1256,3715,2077,3019,1497,1101,717,51,981,1978,1813,3881,76, + 3846,3694,1682,124,1660,3997,479,1141,886,3514,1301,3604,1888, + 1836,1990,2058,692,1194,20,3285,2046,2107,3508,3525,3801,2549, + 1145,2253,305,3301,1065,3133,2913,3285,1241,1197,3729,2501,1673, + 541,2753,949,2361,1165,4081,2725,3305,3069,3617,3733,409,2157, + 1361,3973,1865,2525,1409,3445,3577,77,3761,2149,1449,3005,225,85, + 3673,3117,3089,1349,2057,413,65,1845,697,3085,3441,1573,3689,2941, + 929,533,2841,4077,721,2821,2249,2397,2817,245,1913,1997,3121,997, + 1833,2877,1633,981,2009,941,2449,197,2441,285,1473,2741,3129,909, + 2801,421,4073,2813,2337,1429,1177,1901,81,1669,2633,2269,129,1141, + 249,3917,2481,3941,2217,2749,3041,1877,345,2861,1809,3141,2825, + 157,2881,3637,1465,2829,2161,3365,361,2685,3745,2325,3609,3821, + 3537,517,3017,2141,1537 }; + + /* System generated locals */ + int i__1; + + /* Local variables */ + int i__, i1, i2, i3, i4, it1, it2, it3, it4; + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODRUV returns a vector of n random real numbers from a uniform (0,1) */ + /* distribution (n <= 128). */ + + /* This is an auxiliary routine called by DLARNV and ZLARNV. */ + + /* Arguments */ + /* ========= */ + + /* ISEED (input/output) INT array, dimension (4) */ + /* On entry, the seed of the random number generator; the array */ + /* elements must be between 0 and 4095, and ISEED(4) must be */ + /* odd. */ + /* On exit, the seed is updated. */ + + /* N (input) INT */ + /* The number of random numbers to be generated. N <= 128. */ + + /* X (output) DOUBLE PRECISION array, dimension (N) */ + /* The generated random numbers. */ + + /* Further Details */ + /* =============== */ + + /* This routine uses a multiplicative congruential method with modulus */ + /* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */ + /* 'Multiplicative congruential random number generators with modulus */ + /* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */ + /* b = 48', Math. Comp. 189, pp 331-344, 1990). */ + + /* 48-bit ints are stored in 4 int array elements with 12 bits */ + /* per element. Hence the routine is portable across machines with */ + /* ints of 32 bits or more. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Local Arrays .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Data statements .. */ + /* Parameter adjustments */ + --iseed; + --x; + + /* Function Body */ + /* .. */ + /* .. Executable Statements .. */ + + i1 = iseed[1]; + i2 = iseed[2]; + i3 = iseed[3]; + i4 = iseed[4]; + + i__1 = imin(*n,128); + for (i__ = 1; i__ <= i__1; ++i__) { + + L20: + + /* Multiply the seed by i-th power of the multiplier modulo 2**48 */ + + it4 = i4 * mm[i__ + 383]; + it3 = it4 / 4096; + it4 -= it3 << 12; + it3 = it3 + i3 * mm[i__ + 383] + i4 * mm[i__ + 255]; + it2 = it3 / 4096; + it3 -= it2 << 12; + it2 = it2 + i2 * mm[i__ + 383] + i3 * mm[i__ + 255] + i4 * mm[i__ + + 127]; + it1 = it2 / 4096; + it2 -= it1 << 12; + it1 = it1 + i1 * mm[i__ + 383] + i2 * mm[i__ + 255] + i3 * mm[i__ + + 127] + i4 * mm[i__ - 1]; + it1 %= 4096; + + /* Convert 48-bit int to a real number in the interval (0,1) */ + + x[i__] = ((FloatingType) it1 + ((FloatingType) it2 + ((FloatingType) it3 + ( + FloatingType) it4 * 2.44140625e-4) * 2.44140625e-4) * + 2.44140625e-4) * 2.44140625e-4; + + if (x[i__] == 1.) { + /* If a real number has n bits of precision, and the first */ + /* n bits of the 48-bit int above happen to be all 1 (which */ + /* will occur about once every 2**n calls), then X( I ) will */ + /* be rounded to exactly 1.0. */ + /* Since X( I ) is not supposed to return exactly 0.0 or 1.0, */ + /* the statistically correct thing to do in this situation is */ + /* simply to iterate again. */ + /* N.B. the case X( I ) = 0.0 should not be possible. */ + i1 += 2; + i2 += 2; + i3 += 2; + i4 += 2; + goto L20; + } + + /* L10: */ + } + + /* Return final value of seed */ + + iseed[1] = it1; + iseed[2] = it2; + iseed[3] = it3; + iseed[4] = it4; + return 0; + + /* End of ODRUV */ + + } /* odruv_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odset.hpp b/external/pmrrr/include/pmrrr/lapack/odset.hpp new file mode 100644 index 0000000000..b81d684649 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odset.hpp @@ -0,0 +1,166 @@ +/** + C++ template version of LAPACK routine dlaset. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSET_HPP__ +#define __ODSET_HPP__ + +#include +#include +#include +#include +#include +#include + +#include + +#define imin(a,b) ( (a) < (b) ? (a) : (b) ) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odset(const char *uplo, int *m, int *n, FloatingType *alpha, + FloatingType *beta, FloatingType *a, int *lda) + { + /* System generated locals */ + int a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + int i__, j; + //extern int olsame(char *, char *); + + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSET initializes an m-by-n matrix A to BETA on the diagonal and */ + /* ALPHA on the offdiagonals. */ + + /* Arguments */ + /* ========= */ + + /* UPLO (input) CHARACTER*1 */ + /* Specifies the part of the matrix A to be set. */ + /* = 'U': Upper triangular part is set; the strictly lower */ + /* triangular part of A is not changed. */ + /* = 'L': Lower triangular part is set; the strictly upper */ + /* triangular part of A is not changed. */ + /* Otherwise: All of the matrix A is set. */ + + /* M (input) INT */ + /* The number of rows of the matrix A. M >= 0. */ + + /* N (input) INT */ + /* The number of columns of the matrix A. N >= 0. */ + + /* ALPHA (input) DOUBLE PRECISION */ + /* The constant to which the offdiagonal elements are to be set. */ + + /* BETA (input) DOUBLE PRECISION */ + /* The constant to which the diagonal elements are to be set. */ + + /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ + /* On exit, the leading m-by-n submatrix of A is set as follows: */ + + /* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */ + /* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */ + /* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */ + + /* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */ + + /* LDA (input) INT */ + /* The leading dimension of the array A. LDA >= max(1,M). */ + + /* ===================================================================== */ + + /* .. Local Scalars .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (olsame(uplo, "U")) { + + /* Set the strictly upper triangular or trapezoidal part of the */ + /* array to ALPHA. */ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + /* Computing MIN */ + i__3 = j - 1; + i__2 = imin(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; + /* L10: */ + } + /* L20: */ + } + + } else if (olsame(uplo, "L")) { + + /* Set the strictly lower triangular or trapezoidal part of the */ + /* array to ALPHA. */ + + i__1 = imin(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; + /* L30: */ + } + /* L40: */ + } + + } else { + + /* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; + /* L50: */ + } + /* L60: */ + } + } + + /* Set the first imin(M,N) diagonal elements to BETA. */ + + i__1 = imin(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; + /* L70: */ + } + + return 0; + + /* End of ODSET */ + + } /* odset_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odsnan.hpp b/external/pmrrr/include/pmrrr/lapack/odsnan.hpp new file mode 100644 index 0000000000..8a9bc0c22e --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odsnan.hpp @@ -0,0 +1,67 @@ +/** + C++ template version of LAPACK routine dlaisnan. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSNAN_HPP__ +#define __ODSNAN_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + template + int odsnan(FloatingType *din1, FloatingType *din2) + { + /* System generated locals */ + int ret_val; + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* This routine is not for general use. It exists solely to avoid */ + /* over-optimization in DISNAN. */ + + /* ODSNAN checks for NaNs by comparing its two arguments for */ + /* inequality. NaN is the only floating-point value where NaN != NaN */ + /* returns .TRUE. To check for NaNs, pass the same variable as both */ + /* arguments. */ + + /* A compiler must assume that the two arguments are */ + /* not the same variable, and the test will not be optimized away. */ + /* Interprocedural or whole-program optimization may delete this */ + /* test. The ISNAN functions will be replaced by the correct */ + /* Fortran 03 intrinsic once the intrinsic is widely available. */ + + /* Arguments */ + /* ========= */ + + /* DIN1 (input) DOUBLE PRECISION */ + /* DIN2 (input) DOUBLE PRECISION */ + /* Two numbers to compare for inequality. */ + + /* ===================================================================== */ + + /* .. Executable Statements .. */ + ret_val = *din1 != *din2; + return ret_val; + } /* odsnan_ */ + +} + +} + +#endif + diff --git a/external/pmrrr/include/pmrrr/lapack/odsq2.hpp b/external/pmrrr/include/pmrrr/lapack/odsq2.hpp new file mode 100644 index 0000000000..934c5b4efb --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odsq2.hpp @@ -0,0 +1,613 @@ +/** + C++ template version of LAPACK routine dlasq2. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSQ2_HPP__ +#define __ODSQ2_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odsq2(int *n, FloatingType *z__, int *info) + { + /* Table of constant values */ + static int c__1 = 1; + static int c__2 = 2; + static int c__10 = 10; + static int c__3 = 3; + static int c__4 = 4; + static int c__11 = 11; + /* System generated locals */ + int i__1, i__2, i__3; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + FloatingType d__, e, g; + int k; + FloatingType s, t; + int i0, i4, n0; + FloatingType dn; + int pp; + FloatingType dn1, dn2, dee, eps, tau, tol; + int ipn4; + FloatingType tol2; + int ieee; + int nbig; + FloatingType dmin__, emin, emax; + int kmin, ndiv, iter; + FloatingType qmin, temp, qmax, zmax; + int splt; + FloatingType dmin1, dmin2; + int nfail; + FloatingType desig, trace, sigma; + int iinfo, ttype; + /* Subroutine */ /*extern int odsq3_(int *, int *, FloatingType *, + int *, FloatingType *, FloatingType *, FloatingType *, FloatingType *, + int *, int *, int *, int *, int *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *, + FloatingType *, FloatingType *, FloatingType *);*/ + // extern FloatingType odmch_(char *); + FloatingType deemin; + int iwhila, iwhilb; + FloatingType oldemn, safmin; + //extern /* Subroutine */ int oerbla_(char *, int *); + /* extern int oienv_(int *, char *, char *, int *, int *, */ + /* int *, int *); */ + //extern /* Subroutine */ int odsrt_(char *, int *, FloatingType *, + //int *); + + + /* -- LAPACK routine (version 3.2) -- */ + + /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ + /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ + /* -- Berkeley -- */ + /* -- November 2008 -- */ + + /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ + /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSQ2 computes all the eigenvalues of the symmetric positive */ + /* definite tridiagonal matrix associated with the qd array Z to high */ + /* relative accuracy are computed to high relative accuracy, in the */ + /* absence of denormalization, underflow and overflow. */ + + /* To see the relation of Z to the tridiagonal matrix, let L be a */ + /* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */ + /* let U be an upper bidiagonal matrix with 1's above and diagonal */ + /* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */ + /* symmetric tridiagonal to which it is similar. */ + + /* Note : ODSQ2 defines a int variable, IEEE, which is true */ + /* on machines which follow ieee-754 floating-point standard in their */ + /* handling of infinities and NaNs, and false otherwise. This variable */ + /* is passed to ODSQ3. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The number of rows and columns in the matrix. N >= 0. */ + + /* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */ + /* On entry Z holds the qd array. On exit, entries 1 to N hold */ + /* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */ + /* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */ + /* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */ + /* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */ + /* shifts that failed. */ + + /* INFO (output) INT */ + /* = 0: successful exit */ + /* < 0: if the i-th argument is a scalar and had an illegal */ + /* value, then INFO = -i, if the i-th argument is an */ + /* array and the j-entry had an illegal value, then */ + /* INFO = -(i*100+j) */ + /* > 0: the algorithm failed */ + /* = 1, a split was marked by a positive value in E */ + /* = 2, current block of Z not diagonalized after 30*N */ + /* iterations (in inner while loop) */ + /* = 3, termination criterion of outer while loop not met */ + /* (program created more than N unreduced blocks) */ + + /* Further Details */ + /* =============== */ + /* Local Variables: I0:N0 defines a current unreduced segment of Z. */ + /* The shifts are accumulated in SIGMA. Iteration count is in ITER. */ + /* Ping-pong is controlled by PP (alternates between 0 and 1). */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Test the input arguments. */ + /* (in case ODSQ2 is not called by DLASQ1) */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + *info = 0; + eps = std::numeric_limits::epsilon(); // odmch_("Precision"); + safmin = std::numeric_limits::min(); // odmch_("Safe minimum"); + tol = eps * 100.; + /* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + + if (*n < 0) { + *info = -1; + oerbla("ODSQ2", &c__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + + /* 1-by-1 case. */ + + if (z__[1] < 0.) { + *info = -201; + oerbla("ODSQ2", &c__2); + } + return 0; + } else if (*n == 2) { + + /* 2-by-2 case. */ + + if (z__[2] < 0. || z__[3] < 0.) { + *info = -2; + oerbla("ODSQ2", &c__2); + return 0; + } else if (z__[3] > z__[1]) { + d__ = z__[3]; + z__[3] = z__[1]; + z__[1] = d__; + } + z__[5] = z__[1] + z__[2] + z__[3]; + if (z__[2] > z__[3] * tol2) { + t = (z__[1] - z__[3] + z__[2]) * .5; + s = z__[3] * (z__[2] / t); + if (s <= t) { + s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[1] + (s + z__[2]); + z__[3] *= z__[1] / t; + z__[1] = t; + } + z__[2] = z__[3]; + z__[6] = z__[2] + z__[1]; + return 0; + } + + /* Check for negative data and compute sums of q's and e's. */ + + z__[*n * 2] = 0.; + emin = z__[2]; + qmax = 0.; + zmax = 0.; + d__ = 0.; + e = 0.; + + i__1 = (*n - 1) << 1; + for (k = 1; k <= i__1; k += 2) { + if (z__[k] < 0.) { + *info = -(k + 200); + oerbla("ODSQ2", &c__2); + return 0; + } else if (z__[k + 1] < 0.) { + *info = -(k + 201); + oerbla("ODSQ2", &c__2); + return 0; + } + d__ += z__[k]; + e += z__[k + 1]; + /* Computing MAX */ + d__1 = qmax, d__2 = z__[k]; + qmax = fmax(d__1,d__2); + /* Computing MIN */ + d__1 = emin, d__2 = z__[k + 1]; + emin = fmin(d__1,d__2); + /* Computing MAX */ + d__1 = fmax(qmax,zmax), d__2 = z__[k + 1]; + zmax = fmax(d__1,d__2); + /* L10: */ + } + if (z__[(*n << 1) - 1] < 0.) { + *info = -((*n << 1) + 199); + oerbla("ODSQ2", &c__2); + return 0; + } + d__ += z__[(*n << 1) - 1]; + /* Computing MAX */ + d__1 = qmax, d__2 = z__[(*n << 1) - 1]; + qmax = fmax(d__1,d__2); + zmax = fmax(qmax,zmax); + + /* Check for diagonality. */ + + if (e == 0.) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 1) - 1]; + /* L20: */ + } + odsrt("D", n, &z__[1], &iinfo); + z__[(*n << 1) - 1] = d__; + return 0; + } + + trace = d__ + e; + + /* Check for zero data. */ + + if (trace == 0.) { + z__[(*n << 1) - 1] = 0.; + return 0; + } + + /* Check whether the machine is IEEE conformable. */ + /* ieee = oienv_(&c__10, "ODSQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && oienv_(&c__11, "ODSQ2", "N", &c__1, &c__2, */ + /* &c__3, &c__4) == 1; */ + ieee = 1; + + /* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ + + for (k = *n << 1; k >= 2; k += -2) { + z__[k * 2] = 0.; + z__[(k << 1) - 1] = z__[k]; + z__[(k << 1) - 2] = 0.; + z__[(k << 1) - 3] = z__[k - 1]; + /* L30: */ + } + + i0 = 1; + n0 = *n; + + /* Reverse the qd-array, if warranted. */ + + if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { + ipn4 = (i0 + n0) << 2; + i__1 = (i0 + n0 - 1) << 1; + for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; + /* L40: */ + } + } + + /* Initial split checking via dqd and Li's test. */ + + pp = 0; + + for (k = 1; k <= 2; ++k) { + + d__ = z__[(n0 << 2) + pp - 3]; + i__1 = (i0 << 2) + pp; + for (i4 = ((n0 - 1) << 2) + pp; i4 >= i__1; i4 += -4) { + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + d__ = z__[i4 - 3]; + } else { + d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); + } + /* L50: */ + } + + /* dqd maps Z to ZZ plus Li's test. */ + + emin = z__[(i0 << 2) + pp + 1]; + d__ = z__[(i0 << 2) + pp - 3]; + i__1 = ((n0 - 1) << 2) + pp; + for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { + z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + z__[i4 - (pp << 1) - 2] = d__; + z__[i4 - (pp << 1)] = 0.; + d__ = z__[i4 + 1]; + } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && + safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { + temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; + z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; + d__ *= temp; + } else { + z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( + pp << 1) - 2]); + d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); + } + /* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - (pp << 1)]; + emin = fmin(d__1,d__2); + /* L60: */ + } + z__[(n0 << 2) - pp - 2] = d__; + + /* Now find qmax. */ + + qmax = z__[(i0 << 2) - pp - 2]; + i__1 = (n0 << 2) - pp - 2; + for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { + /* Computing MAX */ + d__1 = qmax, d__2 = z__[i4]; + qmax = fmax(d__1,d__2); + /* L70: */ + } + + /* Prepare for the next iteration on K. */ + + pp = 1 - pp; + /* L80: */ + } + + /* Initialise variables to pass to ODSQ3. */ + + ttype = 0; + dmin1 = 0.; + dmin2 = 0.; + dn = 0.; + dn1 = 0.; + dn2 = 0.; + g = 0.; + tau = 0.; + + iter = 2; + nfail = 0; + ndiv = (n0 - i0) << 1; + + i__1 = *n + 1; + for (iwhila = 1; iwhila <= i__1; ++iwhila) { + if (n0 < 1) { + goto L170; + } + + /* While array unfinished do */ + + /* E(N0) holds the value of SIGMA when submatrix in I0:N0 */ + /* splits from the rest of the array, but is negated. */ + + desig = 0.; + if (n0 == *n) { + sigma = 0.; + } else { + sigma = -z__[(n0 << 2) - 1]; + } + if (sigma < 0.) { + *info = 1; + return 0; + } + + /* Find last unreduced submatrix's top index I0, find QMAX and */ + /* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */ + + emax = 0.; + if (n0 > i0) { + emin = (d__1 = z__[(n0 << 2) - 5], fabs(d__1)); + } else { + emin = 0.; + } + qmin = z__[(n0 << 2) - 3]; + qmax = qmin; + for (i4 = n0 << 2; i4 >= 8; i4 += -4) { + if (z__[i4 - 5] <= 0.) { + goto L100; + } + if (qmin >= emax * 4.) { + /* Computing MIN */ + d__1 = qmin, d__2 = z__[i4 - 3]; + qmin = fmin(d__1,d__2); + /* Computing MAX */ + d__1 = emax, d__2 = z__[i4 - 5]; + emax = fmax(d__1,d__2); + } + /* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; + qmax = fmax(d__1,d__2); + /* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 5]; + emin = fmin(d__1,d__2); + /* L90: */ + } + i4 = 4; + + L100: + i0 = i4 / 4; + pp = 0; + + if (n0 - i0 > 1) { + dee = z__[(i0 << 2) - 3]; + deemin = dee; + kmin = i0; + i__2 = (n0 << 2) - 3; + for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) { + dee = z__[i4] * (dee / (dee + z__[i4 - 2])); + if (dee <= deemin) { + deemin = dee; + kmin = (i4 + 3) / 4; + } + /* L110: */ + } + if ((kmin - i0) << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * + .5) { + ipn4 = (i0 + n0) << 2; + pp = 2; + i__2 = (i0 + n0 - 1) << 1; + for (i4 = i0 << 2; i4 <= i__2; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 2]; + z__[i4 - 2] = z__[ipn4 - i4 - 2]; + z__[ipn4 - i4 - 2] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; + temp = z__[i4]; + z__[i4] = z__[ipn4 - i4 - 4]; + z__[ipn4 - i4 - 4] = temp; + /* L120: */ + } + } + } + + /* Put -(initial shift) into DMIN. */ + + /* Computing MAX */ + d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); + dmin__ = -fmax(d__1,d__2); + + /* Now I0:N0 is unreduced. */ + /* PP = 0 for ping, PP = 1 for pong. */ + /* PP = 2 indicates that flipping was applied to the Z array and */ + /* and that the tests for deflation upon entry in ODSQ3 */ + /* should not be performed. */ + + nbig = (n0 - i0 + 1) * 100; + i__2 = nbig; + for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { + if (i0 > n0) { + goto L150; + } + + /* While submatrix unfinished take a good dqds step. */ + + odsq3(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & + nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, & + dn1, &dn2, &g, &tau); + + pp = 1 - pp; + + /* When EMIN is very small check for splits. */ + + if (pp == 0 && n0 - i0 >= 3) { + if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * + sigma) { + splt = i0 - 1; + qmax = z__[(i0 << 2) - 3]; + emin = z__[(i0 << 2) - 1]; + oldemn = z__[i0 * 4]; + i__3 = (n0 - 3) << 2; + for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { + if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= + tol2 * sigma) { + z__[i4 - 1] = -sigma; + splt = i4 / 4; + qmax = 0.; + emin = z__[i4 + 3]; + oldemn = z__[i4 + 4]; + } else { + /* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 + 1]; + qmax = fmax(d__1,d__2); + /* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 1]; + emin = fmin(d__1,d__2); + /* Computing MIN */ + d__1 = oldemn, d__2 = z__[i4]; + oldemn = fmin(d__1,d__2); + } + /* L130: */ + } + z__[(n0 << 2) - 1] = emin; + z__[n0 * 4] = oldemn; + i0 = splt + 1; + } + } + + /* L140: */ + } + + *info = 2; + return 0; + + /* end IWHILB */ + + L150: + + /* L160: */ + ; + } + + *info = 3; + return 0; + + /* end IWHILA */ + + L170: + + /* Move q's to the front. */ + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 2) - 3]; + /* L180: */ + } + + /* Sort and compute sum of eigenvalues. */ + odsrt("D", n, &z__[1], &iinfo); + + e = 0.; + for (k = *n; k >= 1; --k) { + e += z__[k]; + /* L190: */ + } + + /* Store trace, sum(eigenvalues) and information on performance. */ + z__[(*n << 1) + 1] = trace; + z__[(*n << 1) + 2] = e; + z__[(*n << 1) + 3] = (FloatingType) iter; + /* Computing 2nd power */ + i__1 = *n; + z__[(*n << 1) + 4] = (FloatingType) ndiv / (FloatingType) (i__1 * i__1); + z__[(*n << 1) + 5] = nfail * 100. / (FloatingType) iter; + return 0; + + /* End of ODSQ2 */ + + } /* odsq2_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odsq3.hpp b/external/pmrrr/include/pmrrr/lapack/odsq3.hpp new file mode 100644 index 0000000000..cd5b7865f4 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odsq3.hpp @@ -0,0 +1,365 @@ +/** + C++ template version of LAPACK routine dlasq3. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSQ3_HPP__ +#define __ODSQ3_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odsq3(int *i0, int *n0, FloatingType *z__, + int *pp, FloatingType *dmin__, FloatingType *sigma, FloatingType *desig, + FloatingType *qmax, int *nfail, int *iter, int *ndiv, + int *ieee, int *ttype, FloatingType *dmin1, FloatingType *dmin2, + FloatingType *dn, FloatingType *dn1, FloatingType *dn2, FloatingType *g, + FloatingType *tau) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + FloatingType s, t; + int j4, nn; + FloatingType eps, tol; + int n0in, ipn4; + FloatingType tol2, temp; + /* Subroutine */ /*extern int odsq4_(int *, int *, FloatingType *, + int *, int *, FloatingType *, FloatingType *, FloatingType *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *, int *, + FloatingType *), odsq5_(int *, int *, FloatingType *, + int *, FloatingType *, FloatingType *, FloatingType *, FloatingType *, + FloatingType *, FloatingType *, FloatingType *, int *), odsq6_( + int *, int *, FloatingType *, int *, FloatingType *, + FloatingType *, FloatingType *, FloatingType *, FloatingType *, + FloatingType *);*/ + // extern FloatingType odmch_(char *); + + + /* -- LAPACK routine (version 3.2) -- */ + + /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ + /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ + /* -- Berkeley -- */ + /* -- November 2008 -- */ + + /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ + /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSQ3 checks for deflation, computes a shift (TAU) and calls dqds. */ + /* In case of failure it changes shifts, and tries again until output */ + /* is positive. */ + + /* Arguments */ + /* ========= */ + + /* I0 (input) INT */ + /* First index. */ + + /* N0 (input) INT */ + /* Last index. */ + + /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ + /* Z holds the qd array. */ + + /* PP (input/output) INT */ + /* PP=0 for ping, PP=1 for pong. */ + /* PP=2 indicates that flipping was applied to the Z array */ + /* and that the initial tests for deflation should not be */ + /* performed. */ + + /* DMIN (output) DOUBLE PRECISION */ + /* Minimum value of d. */ + + /* SIGMA (output) DOUBLE PRECISION */ + /* Sum of shifts used in current segment. */ + + /* DESIG (input/output) DOUBLE PRECISION */ + /* Lower order part of SIGMA */ + + /* QMAX (input) DOUBLE PRECISION */ + /* Maximum value of q. */ + + /* NFAIL (output) INT */ + /* Number of times shift was too big. */ + + /* ITER (output) INT */ + /* Number of iterations. */ + + /* NDIV (output) INT */ + /* Number of divisions. */ + + /* IEEE (input) INT */ + /* Flag for IEEE or non IEEE arithmetic (passed to ODSQ5). */ + + /* TTYPE (input/output) INT */ + /* Shift type. */ + + /* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */ + /* These are passed as arguments in order to save their values */ + /* between calls to ODSQ3. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. External Function .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + n0in = *n0; + eps = std::numeric_limits::epsilon(); // odmch_("Precision"); + tol = eps * 100.; + /* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + + /* Check for deflation. */ + + L10: + + if (*n0 < *i0) { + return 0; + } + if (*n0 == *i0) { + goto L20; + } + nn = (*n0 << 2) + *pp; + if (*n0 == *i0 + 1) { + goto L40; + } + + /* Check whether E(N0-1) is negligible, 1 eigenvalue. */ + + if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - + 4] > tol2 * z__[nn - 7]) { + goto L30; + } + + L20: + + z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; + --(*n0); + goto L10; + + /* Check whether E(N0-2) is negligible, 2 eigenvalues. */ + + L30: + + if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ + nn - 11]) { + goto L50; + } + + L40: + + if (z__[nn - 3] > z__[nn - 7]) { + s = z__[nn - 3]; + z__[nn - 3] = z__[nn - 7]; + z__[nn - 7] = s; + } + if (z__[nn - 5] > z__[nn - 3] * tol2) { + t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; + s = z__[nn - 3] * (z__[nn - 5] / t); + if (s <= t) { + s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[nn - 7] + (s + z__[nn - 5]); + z__[nn - 3] *= z__[nn - 7] / t; + z__[nn - 7] = t; + } + z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; + z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; + *n0 += -2; + goto L10; + + L50: + if (*pp == 2) { + *pp = 0; + } + + /* Reverse the qd-array, if warranted. */ + + if (*dmin__ <= 0. || *n0 < n0in) { + if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { + ipn4 = (*i0 + *n0) << 2; + i__1 = (*i0 + *n0 - 1) << 1; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + temp = z__[j4 - 3]; + z__[j4 - 3] = z__[ipn4 - j4 - 3]; + z__[ipn4 - j4 - 3] = temp; + temp = z__[j4 - 2]; + z__[j4 - 2] = z__[ipn4 - j4 - 2]; + z__[ipn4 - j4 - 2] = temp; + temp = z__[j4 - 1]; + z__[j4 - 1] = z__[ipn4 - j4 - 5]; + z__[ipn4 - j4 - 5] = temp; + temp = z__[j4]; + z__[j4] = z__[ipn4 - j4 - 4]; + z__[ipn4 - j4 - 4] = temp; + /* L60: */ + } + if (*n0 - *i0 <= 4) { + z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; + z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; + } + /* Computing MIN */ + d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; + *dmin2 = fmin(d__1,d__2); + /* Computing MIN */ + d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] + , d__1 = fmin(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; + z__[(*n0 << 2) + *pp - 1] = fmin(d__1,d__2); + /* Computing MIN */ + d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = + fmin(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; + z__[(*n0 << 2) - *pp] = fmin(d__1,d__2); + /* Computing MAX */ + d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = fmax(d__1, + d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; + *qmax = fmax(d__1,d__2); + *dmin__ = -0.; + } + } + + /* Choose a shift. */ + + odsq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, + tau, ttype, g); + + /* Call dqds until DMIN > 0. */ + + L70: + + odsq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, + ieee); + + *ndiv += *n0 - *i0 + 2; + ++(*iter); + + /* Check status. */ + + if (*dmin__ >= 0. && *dmin1 > 0.) { + + /* Success. */ + + goto L90; + + } else if (*dmin__ < 0. && *dmin1 > 0. && z__[((*n0 - 1) << 2) - *pp] < tol + * (*sigma + *dn1) && fabs(*dn) < tol * *sigma) { + + /* Convergence hidden by negative DN. */ + + z__[((*n0 - 1) << 2) - *pp + 2] = 0.; + *dmin__ = 0.; + goto L90; + } else if (*dmin__ < 0.) { + + /* TAU too big. Select new TAU and try again. */ + + ++(*nfail); + if (*ttype < -22) { + + /* Failed twice. Play it safe. */ + + *tau = 0.; + } else if (*dmin1 > 0.) { + + /* Late failure. Gives excellent shift. */ + + *tau = (*tau + *dmin__) * (1. - eps * 2.); + *ttype += -11; + } else { + + /* Early failure. Divide by 4. */ + + *tau *= .25; + *ttype += -12; + } + goto L70; + } else if (odnan(dmin__)) { + + /* NaN. */ + + if (*tau == 0.) { + goto L80; + } else { + *tau = 0.; + goto L70; + } + } else { + + /* Possible underflow. Play it safe. */ + + goto L80; + } + + /* Risk of underflow. */ + + L80: + odsq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); + *ndiv += *n0 - *i0 + 2; + ++(*iter); + *tau = 0.; + + L90: + if (*tau < *sigma) { + *desig += *tau; + t = *sigma + *desig; + *desig -= t - *sigma; + } else { + t = *sigma + *tau; + *desig = *sigma - (t - *tau) + *desig; + } + *sigma = t; + + return 0; + + /* End of ODSQ3 */ + + } /* odsq3_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odsq4.hpp b/external/pmrrr/include/pmrrr/lapack/odsq4.hpp new file mode 100644 index 0000000000..d7f19cec0c --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odsq4.hpp @@ -0,0 +1,412 @@ +/** + C++ template version of LAPACK routine dlasq4. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSQ4_HPP__ +#define __ODSQ4_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odsq4(int *i0, int *n0, FloatingType *z__, + int *pp, int *n0in, FloatingType *dmin__, FloatingType *dmin1, + FloatingType *dmin2, FloatingType *dn, FloatingType *dn1, FloatingType *dn2, + FloatingType *tau, int *ttype, FloatingType *g) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + FloatingType s, a2, b1, b2; + int i4, nn, np; + FloatingType gam, gap1, gap2; + + + /* -- LAPACK routine (version 3.2) -- */ + + /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ + /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ + /* -- Berkeley -- */ + /* -- November 2008 -- */ + + /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ + /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSQ4 computes an approximation TAU to the smallest eigenvalue */ + /* using values of d from the previous transform. */ + + /* I0 (input) INT */ + /* First index. */ + + /* N0 (input) INT */ + /* Last index. */ + + /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ + /* Z holds the qd array. */ + + /* PP (input) INT */ + /* PP=0 for ping, PP=1 for pong. */ + + /* NOIN (input) INT */ + /* The value of N0 at start of EIGTEST. */ + + /* DMIN (input) DOUBLE PRECISION */ + /* Minimum value of d. */ + + /* DMIN1 (input) DOUBLE PRECISION */ + /* Minimum value of d, excluding D( N0 ). */ + + /* DMIN2 (input) DOUBLE PRECISION */ + /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ + + /* DN (input) DOUBLE PRECISION */ + /* d(N) */ + + /* DN1 (input) DOUBLE PRECISION */ + /* d(N-1) */ + + /* DN2 (input) DOUBLE PRECISION */ + /* d(N-2) */ + + /* TAU (output) DOUBLE PRECISION */ + /* This is the shift. */ + + /* TTYPE (output) INT */ + /* Shift type. */ + + /* G (input/output) REAL */ + /* G is passed as an argument in order to save its value between */ + /* calls to ODSQ4. */ + + /* Further Details */ + /* =============== */ + /* CNST1 = 9/16 */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* A negative DMIN forces the shift to take that absolute value */ + /* TTYPE records the type of shift. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*dmin__ <= 0.) { + *tau = -(*dmin__); + *ttype = -1; + return 0; + } + + nn = (*n0 << 2) + *pp; + if (*n0in == *n0) { + + /* No eigenvalues deflated. */ + + if (*dmin__ == *dn || *dmin__ == *dn1) { + + b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); + b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); + a2 = z__[nn - 7] + z__[nn - 5]; + + /* Cases 2 and 3. */ + + if (*dmin__ == *dn && *dmin1 == *dn1) { + gap2 = *dmin2 - a2 - *dmin2 * .25; + if (gap2 > 0. && gap2 > b2) { + gap1 = a2 - *dn - b2 / gap2 * b2; + } else { + gap1 = a2 - *dn - (b1 + b2); + } + if (gap1 > 0. && gap1 > b1) { + /* Computing MAX */ + d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; + s = fmax(d__1,d__2); + *ttype = -2; + } else { + s = 0.; + if (*dn > b1) { + s = *dn - b1; + } + if (a2 > b1 + b2) { + /* Computing MIN */ + d__1 = s, d__2 = a2 - (b1 + b2); + s = fmin(d__1,d__2); + } + /* Computing MAX */ + d__1 = s, d__2 = *dmin__ * .333; + s = fmax(d__1,d__2); + *ttype = -3; + } + } else { + + /* Case 4. */ + + *ttype = -4; + s = *dmin__ * .25; + if (*dmin__ == *dn) { + gam = *dn; + a2 = 0.; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b2 = z__[nn - 5] / z__[nn - 7]; + np = nn - 9; + } else { + np = nn - (*pp << 1); + b2 = z__[np - 2]; + gam = *dn1; + if (z__[np - 4] > z__[np - 2]) { + return 0; + } + a2 = z__[np - 4] / z__[np - 2]; + if (z__[nn - 9] > z__[nn - 11]) { + return 0; + } + b2 = z__[nn - 9] / z__[nn - 11]; + np = nn - 13; + } + + /* Approximate contribution to norm squared from I < NN-1. */ + + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = np; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L20; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (fmax(b2,b1) * 100. < a2 || .563 < a2) { + goto L20; + } + /* L10: */ + } + L20: + a2 *= 1.05; + + /* Rayleigh quotient residual bound. */ + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } + } else if (*dmin__ == *dn2) { + + /* Case 5. */ + + *ttype = -5; + s = *dmin__ * .25; + + /* Compute contribution to norm squared from I > NN-2. */ + + np = nn - (*pp << 1); + b1 = z__[np - 2]; + b2 = z__[np - 6]; + gam = *dn2; + if (z__[np - 8] > b2 || z__[np - 4] > b1) { + return 0; + } + a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); + + /* Approximate contribution to norm squared from I < NN-2. */ + + if (*n0 - *i0 > 2) { + b2 = z__[nn - 13] / z__[nn - 15]; + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = nn - 17; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L40; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (fmax(b2,b1) * 100. < a2 || .563 < a2) { + goto L40; + } + /* L30: */ + } + L40: + a2 *= 1.05; + } + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } else { + + /* Case 6, no information to guide us. */ + + if (*ttype == -6) { + *g += (1. - *g) * .333; + } else if (*ttype == -18) { + *g = .083250000000000005; + } else { + *g = .25; + } + s = *g * *dmin__; + *ttype = -6; + } + + } else if (*n0in == *n0 + 1) { + + /* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ + + if (*dmin1 == *dn1 && *dmin2 == *dn2) { + + /* Cases 7 and 8. */ + + *ttype = -7; + s = *dmin1 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L60; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + a2 = b1; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (fmax(b1,a2) * 100. < b2) { + goto L60; + } + /* L50: */ + } + L60: + b2 = sqrt(b2 * 1.05); + /* Computing 2nd power */ + d__1 = b2; + a2 = *dmin1 / (d__1 * d__1 + 1.); + gap2 = *dmin2 * .5 - a2; + if (gap2 > 0. && gap2 > b2 * a2) { + /* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = fmax(d__1,d__2); + } else { + /* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = fmax(d__1,d__2); + *ttype = -8; + } + } else { + + /* Case 9. */ + + s = *dmin1 * .25; + if (*dmin1 == *dn1) { + s = *dmin1 * .5; + } + *ttype = -9; + } + + } else if (*n0in == *n0 + 2) { + + /* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */ + + /* Cases 10 and 11. */ + + if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { + *ttype = -10; + s = *dmin2 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L80; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (b1 * 100. < b2) { + goto L80; + } + /* L70: */ + } + L80: + b2 = sqrt(b2 * 1.05); + /* Computing 2nd power */ + d__1 = b2; + a2 = *dmin2 / (d__1 * d__1 + 1.); + gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ + nn - 9]) - a2; + if (gap2 > 0. && gap2 > b2 * a2) { + /* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = fmax(d__1,d__2); + } else { + /* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = fmax(d__1,d__2); + } + } else { + s = *dmin2 * .25; + *ttype = -11; + } + } else if (*n0in > *n0 + 2) { + + /* Case 12, more than two eigenvalues deflated. No information. */ + + s = 0.; + *ttype = -12; + } + + *tau = s; + return 0; + + /* End of ODSQ4 */ + + } /* odsq4_ */ +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odsq5.hpp b/external/pmrrr/include/pmrrr/lapack/odsq5.hpp new file mode 100644 index 0000000000..e7361978f6 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odsq5.hpp @@ -0,0 +1,250 @@ +/** + C++ template version of LAPACK routine dlasq5. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSQ5_HPP__ +#define __ODSQ5_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odsq5(int *i0, int *n0, FloatingType *z__, + int *pp, FloatingType *tau, FloatingType *dmin__, FloatingType *dmin1, + FloatingType *dmin2, FloatingType *dn, FloatingType *dnm1, FloatingType *dnm2, + int *ieee) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2; + + /* Local variables */ + FloatingType d__; + int j4, j4p2; + FloatingType emin, temp; + + + /* -- LAPACK routine (version 3.2) -- */ + + /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ + /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ + /* -- Berkeley -- */ + /* -- November 2008 -- */ + + /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ + /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSQ5 computes one dqds transform in ping-pong form, one */ + /* version for IEEE machines another for non IEEE machines. */ + + /* Arguments */ + /* ========= */ + + /* I0 (input) INT */ + /* First index. */ + + /* N0 (input) INT */ + /* Last index. */ + + /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ + /* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ + /* an extra argument. */ + + /* PP (input) INT */ + /* PP=0 for ping, PP=1 for pong. */ + + /* TAU (input) DOUBLE PRECISION */ + /* This is the shift. */ + + /* DMIN (output) DOUBLE PRECISION */ + /* Minimum value of d. */ + + /* DMIN1 (output) DOUBLE PRECISION */ + /* Minimum value of d, excluding D( N0 ). */ + + /* DMIN2 (output) DOUBLE PRECISION */ + /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ + + /* DN (output) DOUBLE PRECISION */ + /* d(N0), the last value of d. */ + + /* DNM1 (output) DOUBLE PRECISION */ + /* d(N0-1). */ + + /* DNM2 (output) DOUBLE PRECISION */ + /* d(N0-2). */ + + /* IEEE (input) INT */ + /* Flag for IEEE or non IEEE arithmetic. */ + + /* ===================================================================== */ + + /* .. Parameter .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + + if (*ieee) { + + /* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = (*n0 - 3) << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + *dmin__ = fmin(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; + /* Computing MIN */ + d__1 = z__[j4]; + emin = fmin(d__1,emin); + /* L10: */ + } + } else { + i__1 = (*n0 - 3) << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + *dmin__ = fmin(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; + /* Computing MIN */ + d__1 = z__[j4 - 1]; + emin = fmin(d__1,emin); + /* L20: */ + } + } + + /* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = ((*n0 - 2) << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = fmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = fmin(*dmin__,*dn); + + } else { + + /* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = (*n0 - 3) << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + *dmin__ = fmin(*dmin__,d__); + /* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = fmin(d__1,d__2); + /* L30: */ + } + } else { + i__1 = (*n0 - 3) << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + *dmin__ = fmin(*dmin__,d__); + /* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = fmin(d__1,d__2); + /* L40: */ + } + } + + /* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = ((*n0 - 2) << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = fmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = fmin(*dmin__,*dn); + + } + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + + /* End of ODSQ5 */ + + } /* odsq5_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odsq6.hpp b/external/pmrrr/include/pmrrr/lapack/odsq6.hpp new file mode 100644 index 0000000000..f613a90c88 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odsq6.hpp @@ -0,0 +1,223 @@ +/** + C++ template version of LAPACK routine dlasq6. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSQ6_HPP__ +#define __ODSQ6_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odsq6(int *i0, int *n0, FloatingType *z__, + int *pp, FloatingType *dmin__, FloatingType *dmin1, FloatingType *dmin2, + FloatingType *dn, FloatingType *dnm1, FloatingType *dnm2) + { + /* System generated locals */ + int i__1; + FloatingType d__1, d__2; + + /* Local variables */ + FloatingType d__; + int j4, j4p2; + FloatingType emin, temp; + // extern FloatingType odmch_(char *); + FloatingType safmin; + + + /* -- LAPACK routine (version 3.2) -- */ + + /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ + /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ + /* -- Berkeley -- */ + /* -- November 2008 -- */ + + /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ + /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSQ6 computes one dqd (shift equal to zero) transform in */ + /* ping-pong form, with protection against underflow and overflow. */ + + /* Arguments */ + /* ========= */ + + /* I0 (input) INT */ + /* First index. */ + + /* N0 (input) INT */ + /* Last index. */ + + /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ + /* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ + /* an extra argument. */ + + /* PP (input) INT */ + /* PP=0 for ping, PP=1 for pong. */ + + /* DMIN (output) DOUBLE PRECISION */ + /* Minimum value of d. */ + + /* DMIN1 (output) DOUBLE PRECISION */ + /* Minimum value of d, excluding D( N0 ). */ + + /* DMIN2 (output) DOUBLE PRECISION */ + /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ + + /* DN (output) DOUBLE PRECISION */ + /* d(N0), the last value of d. */ + + /* DNM1 (output) DOUBLE PRECISION */ + /* d(N0-1). */ + + /* DNM2 (output) DOUBLE PRECISION */ + /* d(N0-2). */ + + /* ===================================================================== */ + + /* .. Parameter .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. External Function .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + safmin = std::numeric_limits::min(); // odmch_("Safe minimum"); + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4]; + *dmin__ = d__; + + if (*pp == 0) { + i__1 = (*n0 - 3) << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + d__ = z__[j4 + 1]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 + - 2] < z__[j4 + 1]) { + temp = z__[j4 + 1] / z__[j4 - 2]; + z__[j4] = z__[j4 - 1] * temp; + d__ *= temp; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); + } + *dmin__ = fmin(*dmin__,d__); + /* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = fmin(d__1,d__2); + /* L10: */ + } + } else { + i__1 = (*n0 - 3) << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (z__[j4 - 3] == 0.) { + z__[j4 - 1] = 0.; + d__ = z__[j4 + 2]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 + - 3] < z__[j4 + 2]) { + temp = z__[j4 + 2] / z__[j4 - 3]; + z__[j4 - 1] = z__[j4] * temp; + d__ *= temp; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); + } + *dmin__ = fmin(*dmin__,d__); + /* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = fmin(d__1,d__2); + /* L20: */ + } + } + + /* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = ((*n0 - 2) << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dnm1 = z__[j4p2 + 2]; + *dmin__ = *dnm1; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dnm1 = *dnm2 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); + } + *dmin__ = fmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dn = z__[j4p2 + 2]; + *dmin__ = *dn; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dn = *dnm1 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); + } + *dmin__ = fmin(*dmin__,*dn); + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + + /* End of ODSQ6 */ + + } /* odsq6_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odsrt.hpp b/external/pmrrr/include/pmrrr/lapack/odsrt.hpp new file mode 100644 index 0000000000..869a6da155 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odsrt.hpp @@ -0,0 +1,298 @@ +/** + C++ template version of LAPACK routine dlasrt. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSRT_HPP__ +#define __ODSRT_HPP__ + +#include +#include +#include +#include +#include +#include + +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odsrt(const char *id, int *n, FloatingType *d__, int * + info) + { + /* System generated locals */ + int i__1, i__2; + + /* Local variables */ + int i__, j; + FloatingType d1, d2, d3; + int dir; + FloatingType tmp; + int endd; + //extern int olsame(char *, char *); + int stack[64] /* was [2][32] */; + FloatingType dmnmx; + int start; + //extern /* Subroutine */ int oerbla_(char *, int *); + int stkpnt; + + + /* -- LAPACK routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* Sort the numbers in D in increasing order (if ID = 'I') or */ + /* in decreasing order (if ID = 'D' ). */ + + /* Use Quick Sort, reverting to Insertion sort on arrays of */ + /* size <= 20. Dimension of STACK limits N to about 2**32. */ + + /* Arguments */ + /* ========= */ + + /* ID (input) CHARACTER*1 */ + /* = 'I': sort D in increasing order; */ + /* = 'D': sort D in decreasing order. */ + + /* N (input) INT */ + /* The length of the array D. */ + + /* D (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the array to be sorted. */ + /* On exit, D has been sorted into increasing order */ + /* (D(1) <= ... <= D(N) ) or into decreasing order */ + /* (D(1) >= ... >= D(N) ), depending on ID. */ + + /* INFO (output) INT */ + /* = 0: successful exit */ + /* < 0: if INFO = -i, the i-th argument had an illegal value */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Local Arrays .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Test the input paramters. */ + + /* Parameter adjustments */ + --d__; + + /* Function Body */ + *info = 0; + dir = -1; + if (olsame(id, "D")) { + dir = 0; + } else if (olsame(id, "I")) { + dir = 1; + } + if (dir == -1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + oerbla("ODSRT", &i__1); + return 0; + } + + /* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + + stkpnt = 1; + stack[0] = 1; + stack[1] = *n; + L10: + start = stack[(stkpnt << 1) - 2]; + endd = stack[(stkpnt << 1) - 1]; + --stkpnt; + if (endd - start <= 20 && endd - start > 0) { + + /* Do Insertion sort on D( START:ENDD ) */ + + if (dir == 0) { + + /* Sort into decreasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] > d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L30; + } + /* L20: */ + } + L30: + ; + } + + } else { + + /* Sort into increasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] < d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L50; + } + /* L40: */ + } + L50: + ; + } + + } + + } else if (endd - start > 20) { + + /* Partition D( START:ENDD ) and stack parts, largest one first */ + + /* Choose partition entry as median of 3 */ + + d1 = d__[start]; + d2 = d__[endd]; + i__ = (start + endd) / 2; + d3 = d__[i__]; + if (d1 < d2) { + if (d3 < d1) { + dmnmx = d1; + } else if (d3 < d2) { + dmnmx = d3; + } else { + dmnmx = d2; + } + } else { + if (d3 < d2) { + dmnmx = d2; + } else if (d3 < d1) { + dmnmx = d3; + } else { + dmnmx = d1; + } + } + + if (dir == 0) { + + /* Sort into decreasing order */ + + i__ = start - 1; + j = endd + 1; + L60: + L70: + --j; + if (d__[j] < dmnmx) { + goto L70; + } + L80: + ++i__; + if (d__[i__] > dmnmx) { + goto L80; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L60; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } else { + + /* Sort into increasing order */ + + i__ = start - 1; + j = endd + 1; + L90: + L100: + --j; + if (d__[j] > dmnmx) { + goto L100; + } + L110: + ++i__; + if (d__[i__] < dmnmx) { + goto L110; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L90; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } + } + if (stkpnt > 0) { + goto L10; + } + return 0; + + /* End of ODSRT */ + + } /* odsrt_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odssq.hpp b/external/pmrrr/include/pmrrr/lapack/odssq.hpp new file mode 100644 index 0000000000..d60f7f3445 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odssq.hpp @@ -0,0 +1,125 @@ +/** + C++ template version of LAPACK routine dlassq. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSSQ_HPP__ +#define __ODSSQ_HPP__ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odssq(int *n, FloatingType *x, int *incx, + FloatingType *scale, FloatingType *sumsq) + { + /* System generated locals */ + int i__1, i__2; + FloatingType d__1; + + /* Local variables */ + int ix; + FloatingType absxi; + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSSQ returns the values scl and smsq such that */ + + /* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ + + /* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is */ + /* assumed to be non-negative and scl returns the value */ + + /* scl = max( scale, abs( x( i ) ) ). */ + + /* scale and sumsq must be supplied in SCALE and SUMSQ and */ + /* scl and smsq are overwritten on SCALE and SUMSQ respectively. */ + + /* The routine makes only one pass through the vector x. */ + + /* Arguments */ + /* ========= */ + + /* N (input) INT */ + /* The number of elements to be used from the vector X. */ + + /* X (input) DOUBLE PRECISION array, dimension (N) */ + /* The vector for which a scaled sum of squares is computed. */ + /* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ + + /* INCX (input) INT */ + /* The increment between successive values of the vector X. */ + /* INCX > 0. */ + + /* SCALE (input/output) DOUBLE PRECISION */ + /* On entry, the value scale in the equation above. */ + /* On exit, SCALE is overwritten with scl , the scaling factor */ + /* for the sum of squares. */ + + /* SUMSQ (input/output) DOUBLE PRECISION */ + /* On entry, the value sumsq in the equation above. */ + /* On exit, SUMSQ is overwritten with smsq , the basic sum of */ + /* squares from which scl has been factored out. */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], fabs(d__1)); + if (*scale < absxi) { + /* Computing 2nd power */ + d__1 = *scale / absxi; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = absxi; + } else { + /* Computing 2nd power */ + d__1 = absxi / *scale; + *sumsq += d__1 * d__1; + } + } + /* L10: */ + } + } + return 0; + + /* End of ODSSQ */ + + } /* odssq_ */ + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/lapack/odstmr.hpp b/external/pmrrr/include/pmrrr/lapack/odstmr.hpp new file mode 100644 index 0000000000..17d2f5d1c5 --- /dev/null +++ b/external/pmrrr/include/pmrrr/lapack/odstmr.hpp @@ -0,0 +1,723 @@ +/** + C++ template version of LAPACK routine dstemr. + Based on C code translated by f2c (version 20061008). +*/ + +#ifndef __ODSTMR_HPP__ +#define __ODSTMR_HPP__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Table of constant values */ +#define TRUE_ (1) +#define FALSE_ (0) +#define iabs(a) ( (a) > (0) ? (a) : (-a) ) + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + template + int odstmr(const char *jobz, const char *range, int *n, FloatingType *d__, + FloatingType *e, FloatingType *vl, FloatingType *vu, int *il, + int *iu, int *m, FloatingType *w, FloatingType *z__, int *ldz, + int *nzc, int *isuppz, int *tryrac, FloatingType *work, + int *lwork, int *iwork, int *liwork, int *info) + { + /* Moved here to include floating type type */ + static FloatingType c_b18 = .001; + static int c__1 = 1; + /* System generated locals */ + int z_dim1, z_offset, i__1, i__2; + FloatingType d__1, d__2; + + /* Builtin functions */ + // FloatingType sqrt(FloatingType); + + /* Local variables */ + int i__, j; + FloatingType r1, r2; + int jj; + FloatingType cs; + int in; + FloatingType sn, wl, wu; + int iil, iiu; + FloatingType eps, tmp; + int indd, iend, jblk, wend; + FloatingType rmin, rmax; + int itmp; + FloatingType tnrm; + int inde2, itmp2; + FloatingType rtol1, rtol2; + FloatingType scale; + int indgp; + int iinfo, iindw, ilast; + int lwmin; + int wantz; + int alleig; + int ibegin; + int indeig; + int iindbl; + int valeig; + int wbegin; + FloatingType safmin; + FloatingType bignum; + int inderr, iindwk, indgrs, offset; + FloatingType thresh; + int iinspl, ifirst, indwrk, liwmin, nzcmin; + FloatingType pivmin; + int nsplit; + FloatingType smlnum; + int lquery, zquery; + + + /* -- LAPACK computational routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + /* .. Array Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* ODSTMR computes selected eigenvalues and, optionally, eigenvectors */ + /* of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */ + /* a well defined set of pairwise different real eigenvalues, the corresponding */ + /* real eigenvectors are pairwise orthogonal. */ + + /* The spectrum may be computed either completely or partially by specifying */ + /* either an interval (VL,VU] or a range of indices IL:IU for the desired */ + /* eigenvalues. */ + + /* Depending on the number of desired eigenvalues, these are computed either */ + /* by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are */ + /* computed by the use of various suitable L D L^T factorizations near clusters */ + /* of close eigenvalues (referred to as RRRs, Relatively Robust */ + /* Representations). An informal sketch of the algorithm follows. */ + + /* For each unreduced block (submatrix) of T, */ + /* (a) Compute T - sigma I = L D L^T, so that L and D */ + /* define all the wanted eigenvalues to high relative accuracy. */ + /* This means that small relative changes in the entries of D and L */ + /* cause only small relative changes in the eigenvalues and */ + /* eigenvectors. The standard (unfactored) representation of the */ + /* tridiagonal matrix T does not have this property in general. */ + /* (b) Compute the eigenvalues to suitable accuracy. */ + /* If the eigenvectors are desired, the algorithm attains full */ + /* accuracy of the computed eigenvalues only right before */ + /* the corresponding vectors have to be computed, see steps c) and d). */ + /* (c) For each cluster of close eigenvalues, select a new */ + /* shift close to the cluster, find a new factorization, and refine */ + /* the shifted eigenvalues to suitable accuracy. */ + /* (d) For each eigenvalue with a large enough relative separation compute */ + /* the corresponding eigenvector by forming a rank revealing twisted */ + /* factorization. Go back to (c) for any clusters that remain. */ + + /* For more details, see: */ + /* - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ + /* to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ + /* Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ + /* - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ + /* Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ + /* 2004. Also LAPACK Working Note 154. */ + /* - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ + /* tridiagonal eigenvalue/eigenvector problem", */ + /* Computer Science Division Technical Report No. UCB/CSD-97-971, */ + /* UC Berkeley, May 1997. */ + + /* Notes: */ + /* 1.ODSTMR works only on machines which follow IEEE-754 */ + /* floating-point standard in their handling of infinities and NaNs. */ + /* This permits the use of efficient inner loops avoiding a check for */ + /* zero divisors. */ + + /* Arguments */ + /* ========= */ + + /* JOBZ (input) CHARACTER*1 */ + /* = 'N': Compute eigenvalues only; */ + /* = 'V': Compute eigenvalues and eigenvectors. */ + + /* RANGE (input) CHARACTER*1 */ + /* = 'A': all eigenvalues will be found. */ + /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ + /* will be found. */ + /* = 'I': the IL-th through IU-th eigenvalues will be found. */ + + /* N (input) INT */ + /* The order of the matrix. N >= 0. */ + + /* D (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the N diagonal elements of the tridiagonal matrix */ + /* T. On exit, D is overwritten. */ + + /* E (input/output) DOUBLE PRECISION array, dimension (N) */ + /* On entry, the (N-1) subdiagonal elements of the tridiagonal */ + /* matrix T in elements 1 to N-1 of E. E(N) need not be set on */ + /* input, but is used internally as workspace. */ + /* On exit, E is overwritten. */ + + /* VL (input) DOUBLE PRECISION */ + /* VU (input) DOUBLE PRECISION */ + /* If RANGE='V', the lower and upper bounds of the interval to */ + /* be searched for eigenvalues. VL < VU. */ + /* Not referenced if RANGE = 'A' or 'I'. */ + + /* IL (input) INT */ + /* IU (input) INT */ + /* If RANGE='I', the indices (in ascending order) of the */ + /* smallest and largest eigenvalues to be returned. */ + /* 1 <= IL <= IU <= N, if N > 0. */ + /* Not referenced if RANGE = 'A' or 'V'. */ + + /* M (output) INT */ + /* The total number of eigenvalues found. 0 <= M <= N. */ + /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ + + /* W (output) DOUBLE PRECISION array, dimension (N) */ + /* The first M elements contain the selected eigenvalues in */ + /* ascending order. */ + + /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ + /* If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */ + /* contain the orthonormal eigenvectors of the matrix T */ + /* corresponding to the selected eigenvalues, with the i-th */ + /* column of Z holding the eigenvector associated with W(i). */ + /* If JOBZ = 'N', then Z is not referenced. */ + /* Note: the user must ensure that at least max(1,M) columns are */ + /* supplied in the array Z; if RANGE = 'V', the exact value of M */ + /* is not known in advance and can be computed with a workspace */ + /* query by setting NZC = -1, see below. */ + + /* LDZ (input) INT */ + /* The leading dimension of the array Z. LDZ >= 1, and if */ + /* JOBZ = 'V', then LDZ >= max(1,N). */ + + /* NZC (input) INT */ + /* The number of eigenvectors to be held in the array Z. */ + /* If RANGE = 'A', then NZC >= max(1,N). */ + /* If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. */ + /* If RANGE = 'I', then NZC >= IU-IL+1. */ + /* If NZC = -1, then a workspace query is assumed; the */ + /* routine calculates the number of columns of the array Z that */ + /* are needed to hold the eigenvectors. */ + /* This value is returned as the first entry of the Z array, and */ + /* no error message related to NZC is issued by OERBLA. */ + + /* ISUPPZ (output) INT ARRAY, dimension ( 2*max(1,M) ) */ + /* The support of the eigenvectors in Z, i.e., the indices */ + /* indicating the nonzero elements in Z. The i-th computed eigenvector */ + /* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ + /* ISUPPZ( 2*i ). This is relevant in the case when the matrix */ + /* is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */ + + /* TRYRAC (input/output) INT */ + /* If TRYRAC.EQ..TRUE., indicates that the code should check whether */ + /* the tridiagonal matrix defines its eigenvalues to high relative */ + /* accuracy. If so, the code uses relative-accuracy preserving */ + /* algorithms that might be (a bit) slower depending on the matrix. */ + /* If the matrix does not define its eigenvalues to high relative */ + /* accuracy, the code can uses possibly faster algorithms. */ + /* If TRYRAC.EQ..FALSE., the code is not required to guarantee */ + /* relatively accurate eigenvalues and can use the fastest possible */ + /* techniques. */ + /* On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix */ + /* does not define its eigenvalues to high relative accuracy. */ + + /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ + /* On exit, if INFO = 0, WORK(1) returns the optimal */ + /* (and minimal) LWORK. */ + + /* LWORK (input) INT */ + /* The dimension of the array WORK. LWORK >= max(1,18*N) */ + /* if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */ + /* If LWORK = -1, then a workspace query is assumed; the routine */ + /* only calculates the optimal size of the WORK array, returns */ + /* this value as the first entry of the WORK array, and no error */ + /* message related to LWORK is issued by OERBLA. */ + + /* IWORK (workspace/output) INT array, dimension (LIWORK) */ + /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ + + /* LIWORK (input) INT */ + /* The dimension of the array IWORK. LIWORK >= max(1,10*N) */ + /* if the eigenvectors are desired, and LIWORK >= max(1,8*N) */ + /* if only the eigenvalues are to be computed. */ + /* If LIWORK = -1, then a workspace query is assumed; the */ + /* routine only calculates the optimal size of the IWORK array, */ + /* returns this value as the first entry of the IWORK array, and */ + /* no error message related to LIWORK is issued by OERBLA. */ + + /* INFO (output) INT */ + /* On exit, INFO */ + /* = 0: successful exit */ + /* < 0: if INFO = -i, the i-th argument had an illegal value */ + /* > 0: if INFO = 1X, internal error in ODRRE, */ + /* if INFO = 2X, internal error in ODRRV. */ + /* Here, the digit X = ABS( IINFO ) < 10, where IINFO is */ + /* the nonzero error code returned by ODRRE or */ + /* ODRRV, respectively. */ + + + /* Further Details */ + /* =============== */ + + /* Based on contributions by */ + /* Beresford Parlett, University of California, Berkeley, USA */ + /* Jim Demmel, University of California, Berkeley, USA */ + /* Inderjit Dhillon, University of Texas, Austin, USA */ + /* Osni Marques, LBNL/NERSC, USA */ + /* Christof Voemel, University of California, Berkeley, USA */ + + /* ===================================================================== */ + + /* .. Parameters .. */ + /* .. */ + /* .. Local Scalars .. */ + /* .. */ + /* .. */ + /* .. External Functions .. */ + /* .. */ + /* .. External Subroutines .. */ + /* .. */ + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + /* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --isuppz; + --work; + --iwork; + + /* Function Body */ + wantz = olsame(jobz, "V"); + alleig = olsame(range, "A"); + valeig = olsame(range, "V"); + indeig = olsame(range, "I"); + + lquery = *lwork == -1 || *liwork == -1; + zquery = *nzc == -1; + /* ODSTMR needs WORK of size 6*N, IWORK of size 3*N. */ + /* In addition, ODRRE needs WORK of size 6*N, IWORK of size 5*N. */ + /* Furthermore, ODRRV needs WORK of size 12*N, IWORK of size 7*N. */ + if (wantz) { + lwmin = *n * 18; + liwmin = *n * 10; + } else { + /* need less workspace if only the eigenvalues are wanted */ + lwmin = *n * 12; + liwmin = *n << 3; + } + wl = 0.; + wu = 0.; + iil = 0; + iiu = 0; + if (valeig) { + /* We do not reference VL, VU in the cases RANGE = 'I','A' */ + /* The interval (WL, WU] contains all the wanted eigenvalues. */ + /* It is either given by the user or computed in ODRRE. */ + wl = *vl; + wu = *vu; + } else if (indeig) { + /* We do not reference IL, IU in the cases RANGE = 'V','A' */ + iil = *il; + iiu = *iu; + } + + *info = 0; + if (! (wantz || olsame(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (valeig && *n > 0 && wu <= wl) { + *info = -7; + } else if (indeig && (iil < 1 || iil > *n)) { + *info = -8; + } else if (indeig && (iiu < iil || iiu > *n)) { + *info = -9; + } else if (*ldz < 1 || (wantz && *ldz < *n)) { + *info = -13; + } else if (*lwork < lwmin && ! lquery) { + *info = -17; + } else if (*liwork < liwmin && ! lquery) { + *info = -19; + } + + /* Get machine constants. */ + + safmin = std::numeric_limits::min(); // safmin = odmch_("Safe minimum"); + eps = std::numeric_limits::epsilon(); // eps = odmch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + /* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = fmin(d__1,d__2); + + if (*info == 0) { + work[1] = (FloatingType) lwmin; + iwork[1] = liwmin; + + if (wantz && alleig) { + nzcmin = *n; + } else if (wantz && valeig) { + odrrc("T", n, vl, vu, &d__[1], &e[1], &safmin, &nzcmin, &itmp, & + itmp2, info); + } else if (wantz && indeig) { + nzcmin = iiu - iil + 1; + } else { + /* WANTZ .EQ. FALSE. */ + nzcmin = 0; + } + if (zquery && *info == 0) { + z__[z_dim1 + 1] = (FloatingType) nzcmin; + } else if (*nzc < nzcmin && ! zquery) { + *info = -14; + } + } + if (*info != 0) { + + i__1 = -(*info); + oerbla("ODSTMR", &i__1); + + return 0; + } else if (lquery || zquery) { + return 0; + } + + /* Handle N = 0, 1, and 2 cases immediately */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + w[1] = d__[1]; + } else { + if (wl < d__[1] && wu >= d__[1]) { + *m = 1; + w[1] = d__[1]; + } + } + if (wantz && ! zquery) { + z__[z_dim1 + 1] = 1.; + isuppz[1] = 1; + isuppz[2] = 1; + } + return 0; + } + + if (*n == 2) { + if (! wantz) { + ode2(&d__[1], &e[1], &d__[2], &r1, &r2); + } else if (wantz && ! zquery) { + odev2(&d__[1], &e[1], &d__[2], &r1, &r2, &cs, &sn); + } + if (alleig || (valeig && r2 > wl && r2 <= wu) || (indeig && iil == 1)) { + ++(*m); + w[*m] = r2; + if (wantz && ! zquery) { + z__[*m * z_dim1 + 1] = -sn; + z__[*m * z_dim1 + 2] = cs; + /* Note: At most one of SN and CS can be zero. */ + if (sn != 0.) { + if (cs != 0.) { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 2; + } else { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 1; + } + } else { + isuppz[(*m << 1) - 1] = 2; + isuppz[*m * 2] = 2; + } + } + } + if (alleig || (valeig && r1 > wl && r1 <= wu) || (indeig && iiu == 2)) { + ++(*m); + w[*m] = r1; + if (wantz && ! zquery) { + z__[*m * z_dim1 + 1] = cs; + z__[*m * z_dim1 + 2] = sn; + /* Note: At most one of SN and CS can be zero. */ + if (sn != 0.) { + if (cs != 0.) { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 2; + } else { + isuppz[(*m << 1) - 1] = 1; + isuppz[(*m << 1) - 1] = 1; + } + } else { + isuppz[(*m << 1) - 1] = 2; + isuppz[*m * 2] = 2; + } + } + } + return 0; + } + /* Continue with general N */ + indgrs = 1; + inderr = (*n << 1) + 1; + indgp = *n * 3 + 1; + indd = (*n << 2) + 1; + inde2 = *n * 5 + 1; + indwrk = *n * 6 + 1; + + iinspl = 1; + iindbl = *n + 1; + iindw = (*n << 1) + 1; + iindwk = *n * 3 + 1; + + /* Scale matrix to allowable range, if necessary. */ + /* The allowable range is related to the PIVMIN parameter; see the */ + /* comments in DLARRD. The preference for scaling small values */ + /* up is heuristic; we expect users' matrices not to be close to the */ + /* RMAX threshold. */ + + scale = 1.; + tnrm = odnst("M", n, &d__[1], &e[1]); + if (tnrm > 0. && tnrm < rmin) { + scale = rmin / tnrm; + } else if (tnrm > rmax) { + scale = rmax / tnrm; + } + if (scale != 1.) { + blas::odscal(n, &scale, &d__[1], &c__1); + i__1 = *n - 1; + blas::odscal(&i__1, &scale, &e[1], &c__1); + tnrm *= scale; + if (valeig) { + /* If eigenvalues in interval have to be found, */ + /* scale (WL, WU] accordingly */ + wl *= scale; + wu *= scale; + } + } + + /* Compute the desired eigenvalues of the tridiagonal after splitting */ + /* into smaller subblocks if the corresponding off-diagonal elements */ + /* are small */ + /* THRESH is the splitting parameter for ODRRE */ + /* A negative THRESH forces the old splitting criterion based on the */ + /* size of the off-diagonal. A positive THRESH switches to splitting */ + /* which preserves relative accuracy. */ + + if (*tryrac) { + /* Test whether the matrix warrants the more expensive relative approach. */ + odrrr(n, &d__[1], &e[1], &iinfo); + } else { + /* The user does not care about relative accurately eigenvalues */ + iinfo = -1; + } + /* Set the splitting criterion */ + if (iinfo == 0) { + thresh = eps; + } else { + thresh = -eps; + /* relative accuracy is desired but T does not guarantee it */ + *tryrac = FALSE_; + } + + if (*tryrac) { + /* Copy original diagonal, needed to guarantee relative accuracy */ + blas::odcpy(n, &d__[1], &c__1, &work[indd], &c__1); + } + /* Store the squares of the offdiagonal values of T */ + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + /* Computing 2nd power */ + d__1 = e[j]; + work[inde2 + j - 1] = d__1 * d__1; + /* L5: */ + } + /* Set the tolerance parameters for bisection */ + if (! wantz) { + /* ODRRE computes the eigenvalues to full precision. */ + rtol1 = eps * 4.; + rtol2 = eps * 4.; + } else { + /* ODRRE computes the eigenvalues to less than full precision. */ + /* ODRRV will refine the eigenvalue approximations, and we can */ + /* need less accurate initial bisection in ODRRE. */ + /* Note: these settings do only affect the subset case and ODRRE */ + rtol1 = sqrt(eps); + /* Computing MAX */ + d__1 = sqrt(eps) * .005, d__2 = eps * 4.; + rtol2 = fmax(d__1,d__2); + } + odrre(range, n, &wl, &wu, &iil, &iiu, &d__[1], &e[1], &work[inde2], & + rtol1, &rtol2, &thresh, &nsplit, &iwork[iinspl], m, &w[1], &work[ + inderr], &work[indgp], &iwork[iindbl], &iwork[iindw], &work[ + indgrs], &pivmin, &work[indwrk], &iwork[iindwk], &iinfo); + if (iinfo != 0) { + *info = iabs(iinfo) + 10; + return 0; + } + /* Note that if RANGE .NE. 'V', ODRRE computes bounds on the desired */ + /* part of the spectrum. All desired eigenvalues are contained in */ + /* (WL,WU] */ + if (wantz) { + + /* Compute the desired eigenvectors corresponding to the computed */ + /* eigenvalues */ + + odrrv(n, &wl, &wu, &d__[1], &e[1], &pivmin, &iwork[iinspl], m, & + c__1, m, &c_b18, &rtol1, &rtol2, &w[1], &work[inderr], &work[ + indgp], &iwork[iindbl], &iwork[iindw], &work[indgrs], &z__[ + z_offset], ldz, &isuppz[1], &work[indwrk], &iwork[iindwk], & + iinfo); + if (iinfo != 0) { + *info = iabs(iinfo) + 20; + return 0; + } + } else { + /* ODRRE computes eigenvalues of the (shifted) root representation */ + /* ODRRV returns the eigenvalues of the unshifted matrix. */ + /* However, if the eigenvectors are not desired by the user, we need */ + /* to apply the corresponding shifts from ODRRE to obtain the */ + /* eigenvalues of the original matrix. */ + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + itmp = iwork[iindbl + j - 1]; + w[j] += e[iwork[iinspl + itmp - 1]]; + /* L20: */ + } + } + + if (*tryrac) { + /* Refine computed eigenvalues so that they are relatively accurate */ + /* with respect to the original matrix T. */ + ibegin = 1; + wbegin = 1; + i__1 = iwork[iindbl + *m - 1]; + for (jblk = 1; jblk <= i__1; ++jblk) { + iend = iwork[iinspl + jblk - 1]; + in = iend - ibegin + 1; + wend = wbegin - 1; + /* check if any eigenvalues have to be refined in this block */ + L36: + if (wend < *m) { + if (iwork[iindbl + wend] == jblk) { + ++wend; + goto L36; + } + } + if (wend < wbegin) { + ibegin = iend + 1; + goto L39; + } + offset = iwork[iindw + wbegin - 1] - 1; + ifirst = iwork[iindw + wbegin - 1]; + ilast = iwork[iindw + wend - 1]; + rtol2 = eps * 4.; + odrrj(&in, &work[indd + ibegin - 1], &work[inde2 + ibegin - 1], + &ifirst, &ilast, &rtol2, &offset, &w[wbegin], &work[ + inderr + wbegin - 1], &work[indwrk], &iwork[iindwk], & + pivmin, &tnrm, &iinfo); + ibegin = iend + 1; + wbegin = wend + 1; + L39: + ; + } + } + + /* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (scale != 1.) { + d__1 = 1. / scale; + blas::odscal(m, &d__1, &w[1], &c__1); + } + + /* If eigenvalues are not in increasing order, then sort them, */ + /* possibly along with eigenvectors. */ + + if (nsplit > 1) { + if (! wantz) { + odsrt("I", m, &w[1], &iinfo); + if (iinfo != 0) { + *info = 3; + return 0; + } + } else { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp) { + i__ = jj; + tmp = w[jj]; + } + /* L50: */ + } + if (i__ != 0) { + w[i__] = w[j]; + w[j] = tmp; + if (wantz) { + blas::odswap(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * + z_dim1 + 1], &c__1); + itmp = isuppz[(i__ << 1) - 1]; + isuppz[(i__ << 1) - 1] = isuppz[(j << 1) - 1]; + isuppz[(j << 1) - 1] = itmp; + itmp = isuppz[i__ * 2]; + isuppz[i__ * 2] = isuppz[j * 2]; + isuppz[j * 2] = itmp; + } + } + /* L60: */ + } + } + } + + + work[1] = (FloatingType) lwmin; + iwork[1] = liwmin; + return 0; + + /* End of ODSTMR */ + + } /* odstmr_ */ + +} //namespace lapack + +} //namespace pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/plarre.hpp b/external/pmrrr/include/pmrrr/plarre.hpp new file mode 100644 index 0000000000..5ac09a9d63 --- /dev/null +++ b/external/pmrrr/include/pmrrr/plarre.hpp @@ -0,0 +1,1224 @@ +/* Parallel computation of eigenvalues and symmetric tridiagonal + * matrix T, given by its diagonal elements D and its super-/sub- + * diagonal elements E. + * + * Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.7 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef __PLARRE_HPP__ +#define __PLARRE_HPP__ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define ONE 1.0 +#define HUNDRED 100.0 +#define HALF 0.5 +#define FOURTH 0.25 + +namespace pmrrr { + +namespace detail{ + + /* + * Hide functions relevant only to plarre in an anonymous namespace. + */ + namespace { + + template + static void *eigval_subset_thread_a(void *argin); + + template + static void *eigval_subset_thread_r(void *argin); + + template + static void clean_up_plarre(FloatingType*, FloatingType*, int*, int*, int*); + + template + int eigval_approx_proc(proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, + int *iwork); + + template + int eigval_root_proc(proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, + int *iwork); + + template + int eigval_refine_proc(proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, + int *iwork); + + template + auxarg1_t *create_auxarg1(int, FloatingType*, FloatingType*, FloatingType*, int, int, + int, int, int, int*, FloatingType, FloatingType, + FloatingType*, FloatingType*, FloatingType*, int*, int*); + + template + void retrieve_auxarg1(auxarg1_t*, int*, FloatingType**, FloatingType**, FloatingType**, + int*, int*, int*, int*, int*, int**, FloatingType*, + FloatingType*, FloatingType**, FloatingType**, FloatingType**, int**, + int**); + + template + auxarg2_t *create_auxarg2(int, FloatingType*, FloatingType*, int, int, FloatingType*, + FloatingType*,FloatingType*,int*,FloatingType, FloatingType, FloatingType, FloatingType); + + template + void retrieve_auxarg2(auxarg2_t*, int*, FloatingType**, FloatingType**, int*, + int*, FloatingType**, FloatingType**, FloatingType**, int**, FloatingType*, FloatingType*, FloatingType*, + FloatingType*); + + } + + /* Routine to compute eigenvalues */ + template + int plarre(proc_t *procinfo, char *jobz, char *range, in_t *Dstruct, + val_t *Wstruct, tol_t *tolstruct, int *nzp, int *offsetp) + { + /* input variables */ + int nproc = procinfo->nproc; + bool wantZ = (jobz[0] == 'V' || jobz[0] == 'v'); + bool cntval = (jobz[0] == 'C' || jobz[0] == 'c'); + int n = Dstruct->n; + FloatingType *restrict D = Dstruct->D; + FloatingType *restrict E = Dstruct->E; + int *restrict isplit = Dstruct->isplit; + FloatingType *vl = Wstruct->vl; + FloatingType *vu = Wstruct->vu; + int *il = Wstruct->il; + int *iu = Wstruct->iu; + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + int *restrict Windex = Wstruct->Windex; + int *restrict iblock = Wstruct->iblock; + FloatingType *restrict gersch = Wstruct->gersch; + + /* constants */ + int IZERO = 0, IONE = 1; + FloatingType DZERO = 0.0; + + /* work space */ + FloatingType *E2; + FloatingType *work; + int *iwork; + + /* compute geschgorin disks and spectral diameter */ + FloatingType gl, gu, eold, emax, eabs; + + /* compute splitting points */ + int bl_begin, bl_end, bl_size; + + /* distribute work among processes */ + int ifirst, ilast, ifirst_tmp, ilast_tmp; + int chunk, isize, iil, iiu; + + /* gather results */ + int *rcount, *rdispl; + + /* others */ + int info, i, j, jbl, idummy; + FloatingType tmp1, dummy; + bool sorted; + enum range_enum {allrng=1, valrng=2, indrng=3} irange; + FloatingType intervals[2]; + int negcounts[2]; + FloatingType sigma; + + if (range[0] == 'A' || range[0] == 'a') { + irange = allrng; + } else if (range[0] == 'V' || range[0] == 'v') { + irange = valrng; + } else if (range[0] == 'I' || range[0] == 'i') { + irange = indrng; + } else { + return 1; + } + + /* allocate work space */ + E2 = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(E2 != NULL); + work = (FloatingType *) malloc( 4*n * sizeof(FloatingType) ); + assert(work != NULL); + iwork = (int *) malloc( 3*n * sizeof(int) ); + assert(iwork != NULL); + rcount = (int *) malloc( nproc * sizeof(int) ); + assert(rcount != NULL); + rdispl = (int *) malloc( nproc * sizeof(int) ); + assert(rdispl != NULL); + + /* Compute square of off-diagonal elements */ + for (i=0; i= emax) emax = eabs; + tmp1 = eabs + eold; + gersch[2*i] = D[i] - tmp1; + gl = fmin(gl, gersch[2*i]); + gersch[2*i+1] = D[i] + tmp1; + gu = fmax(gu, gersch[2*i+1]); + eold = eabs; + } + /* min. pivot allowed in the Sturm sequence of T */ + tolstruct->pivmin = std::numeric_limits::min() * fmax(1.0, emax*emax); + /* estimate of spectral diameter */ + Dstruct->spdiam = gu - gl; + + /* compute splitting points with threshold "split" */ + lapack::odrra(&n, D, E, E2, &tolstruct->split, &Dstruct->spdiam, + &Dstruct->nsplit, isplit, &info); + assert(info == 0); + + if (irange == allrng || irange == indrng) { + *vl = gl; + *vu = gu; + } + + /* set eigenvalue indices in case of all or subset by value has + * to be computed; thereby convert all problem to subset by index + * computation */ + if (irange == allrng) { + *il = 1; + *iu = n; + } else if (irange == valrng) { + intervals[0] = *vl; intervals[1] = *vu; + + /* find negcount at boundaries 'vl' and 'vu'; + * needs work of dim(n) and iwork of dim(n) */ + lapack::odebz(&IONE, &IZERO, &n, &IONE, &IONE, &IZERO, + &DZERO, &DZERO, &tolstruct->pivmin, D, E, E2, &idummy, + intervals, &dummy, &idummy, negcounts, work, + iwork, &info); + assert(info == 0); + + /* update negcounts of whole matrix with negcounts found for block */ + *il = negcounts[0] + 1; + *iu = negcounts[1]; + } + + if (cntval && irange == valrng) { + /* clean up and return */ + *nzp = iceil(*iu-*il+1, nproc); + clean_up_plarre(E2, work, iwork, rcount, rdispl); + return 0; + } + + + /* loop over unreduced blocks */ + bl_begin = 0; + + for (jbl=0; jblnsplit; jbl++) { + + bl_end = isplit[jbl] - 1; + bl_size = bl_end - bl_begin + 1; + + /* deal with 1x1 block immediately */ + if (bl_size == 1) { + E[bl_end] = 0.0; + W[bl_begin] = D[bl_begin]; + Werr[bl_begin] = 0.0; + Werr[bl_begin] = 0.0; + iblock[bl_begin] = jbl + 1; + Windex[bl_begin] = 1; + bl_begin = bl_end + 1; + continue; + } + + /* Indix range of block */ + iil = 1; + iiu = bl_size; + + /* each process computes a subset of the eigenvalues of the block */ + ifirst_tmp = iil; + for (i=0; ipid) { + ifirst = ifirst_tmp; + ilast = ilast_tmp; + isize = ilast - ifirst + 1; + *offsetp = ifirst - iil; + *nzp = isize; + } + rcount[i] = ilast_tmp - ifirst_tmp + 1; + rdispl[i] = ifirst_tmp - iil; + ifirst_tmp = ilast_tmp + 1; + ifirst_tmp = imin(ifirst_tmp, iiu + 1); + } + + /* approximate eigenvalues of input assigned to process */ + if (isize != 0) { + info = eigval_approx_proc(procinfo, ifirst, ilast, + bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], + &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], + tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], + work, iwork); + assert(info == 0); + } + + /* compute root representation of block */ + info = eigval_root_proc(procinfo, ifirst, ilast, + bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], + &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], + tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], + work, iwork); + assert(info == 0); + + /* refine eigenvalues assigned to process w.r.t root */ + if (isize != 0) { + info = eigval_refine_proc(procinfo, ifirst, ilast, + bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], + &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], + tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], + work, iwork); + assert(info == 0); + } + + memcpy(work, &W[bl_begin], isize * sizeof(FloatingType) ); + MPI_Allgatherv(work, isize, float_traits::mpi_type(), &W[bl_begin], rcount, rdispl, + float_traits::mpi_type(), procinfo->comm); + + memcpy(work, &Werr[bl_begin], isize * sizeof(FloatingType) ); + MPI_Allgatherv(work, isize, float_traits::mpi_type(), &Werr[bl_begin], rcount, rdispl, + float_traits::mpi_type(), procinfo->comm); + + memcpy(iwork, &Windex[bl_begin], isize * sizeof(int) ); + MPI_Allgatherv(iwork, isize, MPI_INT, &Windex[bl_begin], rcount, rdispl, + MPI_INT, procinfo->comm); + + /* Ensure that within block eigenvalues sorted */ + sorted = false; + while (sorted == false) { + sorted = true; + for (j=bl_begin; j < bl_end; j++) { + if (W[j+1] < W[j]) { + sorted = false; + tmp1 = W[j]; + W[j] = W[j+1]; + W[j+1] = tmp1; + tmp1 = Werr[j]; + Werr[j] = Werr[j+1]; + Werr[j+1] = tmp1; + } + } + } + + /* Set indices index correctly */ + for (j=bl_begin; j <= bl_end; j++) + iblock[j] = jbl + 1; + + /* Recompute gaps within the blocks */ + for (j = bl_begin; j < bl_end; j++) { + Wgap[j] = fmax(0.0, (W[j+1] - Werr[j+1]) - (W[j] + Werr[j]) ); + } + sigma = E[bl_end]; + Wgap[bl_end] = fmax(0.0, (gu - sigma) - (W[bl_end] + Werr[bl_end]) ); + + /* Compute UNSHIFTED eigenvalues */ + if (!wantZ) { + sigma = E[bl_end]; + for (i=bl_begin; i<=bl_end; i++) { + W[i] += sigma; + } + } + + /* Proceed with next block */ + bl_begin = bl_end + 1; + } + /* end of loop over unreduced blocks */ + + /* free memory */ + clean_up_plarre(E2, work, iwork, rcount, rdispl); + + return 0; + } + + namespace { + + /* + * Free's on allocated memory of plarre routine + */ + template + void clean_up_plarre(FloatingType *E2, FloatingType *work, int *iwork, + int *rcount, int *rdispl) + { + free(E2); + free(work); + free(iwork); + free(rcount); + free(rdispl); + } + + #ifndef DISABLE_PTHREADS + template + int eigval_approx_proc(proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, + int *iwork) + { + /* Input parameter */ + int isize = ilast-ifirst+1; + FloatingType pivmin = tolstruct->pivmin; + + + /* /\* Multithreading *\/ */ + int max_nthreads = procinfo->nthreads; + int iifirst, iilast, chunk; + pthread_t *threads; + pthread_attr_t attr; + auxarg1_t *auxarg1; + + /* Others */ + int info, m, i, j; + FloatingType dummy; + + /* Allocate workspace */ + int *isplit = (int *) malloc( n * sizeof(int) ); + assert(isplit != NULL); + threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) ); + assert(threads != NULL); + + /* This is an unreduced block */ + int nsplit = 1; + isplit[0] = n; + + if (max_nthreads > 1) { + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); + } + + /* Set tolerance parameters */ + FloatingType bsrtol = sqrt(std::numeric_limits::epsilon()); + + + /* APPROXIMATE EIGENVALUES */ + + /* compute approximations of the eigenvalues with muliple threads */ + /* equivalent to: */ + /* dlarrd_("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch, */ + /* &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr, */ + /* &wl, &wu, iblock, Windex, work, iwork, &info); */ + /* assert(info == 0); */ + /* assert(m == ilast-ifirst+1); */ + + int nthreads = max_nthreads; + while (nthreads > 1 && isize / nthreads < 2) + nthreads--; + + if (nthreads > 1) { + + /* each threads computes W[iifirst:iilast] and places them in + * work[0:n-1]; the corresponding errors in work[n:2*n-1]; + * the blocks they belong in iwork[0:n-1]; and their indices in + * iwork[n:2*n-1]; */ + + iifirst = ifirst; + chunk = isize / nthreads; + for (i=1; i, + (void *) auxarg1); + assert(info == 0); + + iifirst = iilast + 1; + } + iilast = ilast; + + auxarg1 = create_auxarg1(n, D, E, E2, ifirst, ilast, iifirst, iilast, + nsplit, isplit, bsrtol, pivmin, gersch, + &work[0], &work[n], &iwork[n], &iwork[0]); + + void * status = eigval_subset_thread_a( (void *) auxarg1 ); + assert(status == NULL); + + /* join threads */ + for (i=1; i 1) { + pthread_attr_destroy(&attr); + } + + return 0; + } + #else + template + int eigval_approx_proc + (proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, + int *iwork) + { + /* Input parameter */ + FloatingType pivmin = tolstruct->pivmin; + + /* Allocate workspace */ + int *isplit = (int *) malloc( n * sizeof(int) ); + assert(isplit != NULL); + + /* This is an unreduced block */ + int nsplit = 1; + isplit[0] = n; + + /* Set tolerance parameters */ + FloatingType bsrtol = sqrt(std::numeric_limits::epsilon()); + + /* APPROXIMATE EIGENVALUES */ + int m, info; + FloatingType wl, wu, dummy; + lapack::odrrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch, + &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr, + &wl, &wu, iblock, Windex, work, iwork, &info); + assert(info == 0); + assert(m == ilast-ifirst+1); + + /* clean up */ + free(isplit); + + return 0; + } + #endif + + template + int eigval_root_proc(proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, + int *iwork) + { + /* Input parameter */ + FloatingType pivmin = tolstruct->pivmin; + + /* Create random vector to perturb rrr, same seed */ + int two_n = 2*n; + int iseed[4] = {1,1,1,1}; + + FloatingType Dpivot, Dmax; + bool noREP; + + int info, i, j; + int IONE = 1, ITWO = 2; + FloatingType tmp, tmp1, tmp2; + + /* Set tolerance parameters (need to be same as in refine function) */ + FloatingType rtl = sqrt(std::numeric_limits::epsilon()); + + /* Allocate workspace */ + FloatingType *randvec = (FloatingType *) malloc( 2*n * sizeof(FloatingType) ); + assert(randvec != NULL); + + /* create random vector to perturb rrr and broadcast it */ + lapack::odrnv(&ITWO, iseed, &two_n, randvec); + + /* store shift of initial RRR, here set to zero */ + E[n-1] = 0.0; + + /* find outer bounds GL, GU for block and spectral diameter */ + FloatingType gl = D[0]; + FloatingType gu = D[0]; + for (i = 0; i < n; i++) { + gl = fmin(gl, gersch[2*i] ); + gu = fmax(gu, gersch[2*i+1]); + } + FloatingType spdiam = gu - gl; + + /* find approximation of extremal eigenvalues of the block + * odrrk computes one eigenvalue of tridiagonal matrix T + * tmp1 and tmp2 one hold the eigenvalue and error, respectively */ + lapack::odrrk(&n, &IONE, &gl, &gu, D, E2, + &pivmin, &rtl, &tmp1, &tmp2, &info); + assert(info == 0); /* if info=-1 => eigenvalue did not converge */ + + FloatingType isleft = fmax(gl, tmp1-tmp2 - HUNDRED*std::numeric_limits::epsilon()*fabs(tmp1-tmp2) ); + + lapack::odrrk(&n, &n, &gl, &gu, D, E2, + &pivmin, &rtl, &tmp1, &tmp2, &info); + assert(info == 0); /* if info=-1 => eigenvalue did not converge */ + + FloatingType isright = fmin(gu, tmp1+tmp2 + HUNDRED*std::numeric_limits::epsilon()*fabs(tmp1+tmp2) ); + + spdiam = isright - isleft; + + /* compute negcount at points s1 and s2 */ + FloatingType s1 = isleft + HALF * spdiam; + FloatingType s2 = isright - FOURTH * spdiam; /* not needed currently */ + + /* compute negcount at points s1 and s2 */ + /* cnt = number of eigenvalues in (s1,s2] = count_right - count_left + * negcnt_lft = number of eigenvalues smaller equals than s1 + * negcnt_rgt = number of eigenvalues smaller equals than s2 */ + int cnt, negcnt_lft, negcnt_rgt; + lapack::odrrc("T", &n, &s1, &s2, D, E, &pivmin, + &cnt, &negcnt_lft, &negcnt_rgt, &info); + assert(info == 0); + + /* if more of the desired eigenvectors are in the left part shift left + * and the other way around */ + int sgndef; + FloatingType sigma; + if ( negcnt_lft >= n - negcnt_lft ) { + /* shift left */ + sigma = isleft; + sgndef = ONE; + } else { + /* shift right */ + sigma = isright; + sgndef = -ONE; + } + + /* define increment to perturb initial shift to find RRR + * with not too much element growth */ + FloatingType tau = spdiam*std::numeric_limits::epsilon()*n + 2.0*pivmin; + + + /* try to find initial RRR of block: + * need work space of 3*n here to store D, L, D^-1 of possible + * representation: + * D_try = work[0 : n-1] + * L_try = work[n :2*n-1] + * inv(D_try) = work[2*n:3*n-1] */ + + int off_L = n; + int off_invD = 2*n; + + int jtry; + for (jtry = 0; jtry < MAX_TRY_RRR; jtry++) { + + Dpivot = D[0] - sigma; + work[0] = Dpivot; + Dmax = fabs( work[0] ); + j = 0; + + for (i = 0; i < n-1; i++) { + work[i+off_invD] = 1.0 / work[i]; + tmp = E[j] * work[i+off_invD]; + work[i+off_L] = tmp; + Dpivot = (D[j+1] - sigma) - tmp*E[j]; + work[i+1] = Dpivot; + Dmax = fmax(Dmax, fabs(Dpivot) ); + j++; + } + + /* except representation only if not too much element growth */ + if (Dmax > MAX_GROWTH*spdiam) { + noREP = true; + } else { + noREP = false; + } + + if (noREP == true) { + /* if all eigenvalues are desired shift is made definite to use DQDS + * so we should not end here */ + if (jtry == MAX_TRY_RRR-2) { + if (sgndef == ONE) { /* floating point comparison okay here */ + sigma = gl - FUDGE_FACTOR*spdiam*std::numeric_limits::epsilon()*n + - FUDGE_FACTOR*2.0*pivmin; + } else { + sigma = gu + FUDGE_FACTOR*spdiam*std::numeric_limits::epsilon()*n + + FUDGE_FACTOR*2.0*pivmin; + } + } else if (jtry == MAX_TRY_RRR-1) { + fprintf(stderr,"No initial representation could be found.\n"); + exit(3); + } else { + sigma -= sgndef*tau; + tau *= 2.0; + continue; + } + } else { /* found representation */ + break; + } + } + /* end trying to find initial RRR of block */ + + /* save initial RRR and corresponding shift */ + memcpy(D, &work[0], n * sizeof(FloatingType) ); + memcpy(E, &work[n], (n-1) * sizeof(FloatingType) ); + E[n-1] = sigma; + /* work[0:4*n-1] can now be used again for anything */ + + /* perturb root rrr by small relative amount, first make sure + * that at least two values are actually disturbed enough, + * which might not be necessary */ + while( fabs(randvec[0])*RAND_FACTOR < 1.0 ) + randvec[0] *= 2.0; + while( fabs(randvec[n-1]) *RAND_FACTOR < 1.0 ) + randvec[n-1] *= 2.0; + + for (i=0; i::epsilon()*RAND_FACTOR*randvec[i]; + E[i] *= 1.0 + std::numeric_limits::epsilon()*RAND_FACTOR*randvec[i+n]; + } + D[n-1] *= 1.0 + std::numeric_limits::epsilon()*RAND_FACTOR*randvec[n-1]; + + /* clean up */ + free(randvec); + + return 0; + } + + #ifndef DISABLE_PTHREADS + template + int eigval_refine_proc(proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, + int *iwork) + { + /* Input parameter */ + int isize = ilast-ifirst+1; + FloatingType pivmin = tolstruct->pivmin; + + /* Multithreading */ + int nthreads; + int max_nthreads = procinfo->nthreads; + int chunk; + pthread_t *threads; + pthread_attr_t attr; + auxarg2_t *auxarg2; + + int info, i; + + /* Allocate space */ + threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) ); + assert(threads != NULL); + int *isplit = (int *) malloc( n * sizeof(int) ); + assert(isplit != NULL); + + /* This is an unreduced block */ + isplit[0] = n; + + /* Prepare multi-threading */ + if (max_nthreads > 1) { + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); + } + + /* find outer bounds GL, GU for block and spectral diameter */ + FloatingType gl = D[0]; + FloatingType gu = D[0]; + for (i = 0; i < n; i++) { + gl = fmin(gl, gersch[2*i] ); + gu = fmax(gu, gersch[2*i+1]); + } + FloatingType spdiam = gu - gl; + + /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */ + + int i_low = Windex[0]; + int i_upp = Windex[isize-1]; + FloatingType sigma = E[n-1]; + + /* calculate gaps */ + for (i=0; i::epsilon(); + } + + /* work for sequential odrrb = work[0:2*n-1] + * iwork for sequential odrrb = iwork[0:2*n-1] + * DE2 = work[2*n:3*n-1] strting at bl_begin */ + int off_DE2 = 2*n; + + /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */ + for (i=0; i 1 && isize/nthreads < 2) { + nthreads--; + } + + if (nthreads > 1) { + + int rf_begin = 0, rf_end; + chunk = isize / nthreads; + for (i=1; irtol1, tolstruct->rtol2, + pivmin, spdiam); + + info = pthread_create(&threads[i], &attr, + eigval_subset_thread_r, + (void *) auxarg2); + assert(info == 0); + + rf_begin = rf_end + 1; + } + rf_end = isize-1; + + auxarg2 = create_auxarg2(n, D, + &work[off_DE2], + rf_begin, rf_end, W, Werr, Wgap, Windex, + tolstruct->rtol1, tolstruct->rtol2, + pivmin, spdiam); + + void * status = eigval_subset_thread_r( (void *) auxarg2 ); + assert(status == NULL); + + /* join threads */ + for (i=1; irtol1, &tolstruct->rtol2, &offset, W, Wgap, + Werr, work, iwork, &pivmin, &spdiam, &n, &info); + assert(info == 0); + /* needs work of dim(2*n) and iwork of dim(2*n) */ + } + /* odrrb computes gaps correctly, but not last one; + * this is ignored since the gaps are recomputed anyway */ + + /* clean up */ + free(threads); + free(isplit); + + if (max_nthreads > 1) { + pthread_attr_destroy(&attr); + } + + return 0; + } + #else + template + int eigval_refine_proc + (proc_t *procinfo, int ifirst, int ilast, + int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int *Windex, int *iblock, FloatingType *gersch, tol_t *tolstruct, + FloatingType *W, FloatingType *Werr, FloatingType *Wgap, FloatingType *work, int *iwork) + { + /* Input parameter */ + int isize = ilast-ifirst+1; + FloatingType pivmin = tolstruct->pivmin; + + /* Allocate space */ + int *isplit = (int *) malloc( n * sizeof(int) ); + assert(isplit != NULL); + + /* This is an unreduced block (nsplit=1) */ + isplit[0] = n; + + /* find outer bounds GL, GU for block and spectral diameter */ + FloatingType gl = D[0]; + FloatingType gu = D[0]; + int i; + for (i = 0; i < n; i++) { + gl = fmin(gl, gersch[2*i] ); + gu = fmax(gu, gersch[2*i+1]); + } + FloatingType spdiam = gu - gl; + + /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */ + + int i_low = Windex[0]; + int i_upp = Windex[isize-1]; + FloatingType sigma = E[n-1]; + + /* calculate gaps */ + for (i=0; i::epsilon(); + } + + /* work for sequential odrrb = work[0:2*n-1] + * iwork for sequential odrrb = iwork[0:2*n-1] + * DE2 = work[2*n:3*n-1] strting at bl_begin */ + int off_DE2 = 2*n; + + /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */ + for (i=0; irtol1, &tolstruct->rtol2, &offset, W, Wgap, + Werr, work, iwork, &pivmin, &spdiam, &n, &info); + assert(info == 0); + /* needs work of dim(2*n) and iwork of dim(2*n) */ + + /* odrrb computes gaps correctly, but not last one; + * this is ignored since the gaps are recomputed anyway */ + + /* clean up */ + free(isplit); + + return 0; + } + #endif + + template + void *eigval_subset_thread_a(void *argin) + { + /* from input argument */ + int n, il, iu, my_il, my_iu; + FloatingType *D, *E, *E2, *gersch; + FloatingType bsrtol, pivmin; + int nsplit, *isplit; + + /* others */ + int info; + FloatingType dummy1, dummy2; + int num_vals; + FloatingType *W_tmp, *Werr_tmp, *W, *Werr; + int *iblock_tmp, *Windex_tmp, *iblock, *Windex; + FloatingType *work; + int *iwork; + + retrieve_auxarg1((auxarg1_t *) argin, &n, &D, &E, &E2, + &il, &iu, &my_il, &my_iu, &nsplit, + &isplit, &bsrtol, &pivmin, &gersch, + &W, &Werr, &Windex, &iblock); + + /* allocate memory */ + W_tmp = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(W_tmp != NULL); + + Werr_tmp = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(Werr_tmp != NULL); + + Windex_tmp = (int *) malloc( n * sizeof(int) ); + assert(Windex_tmp != NULL); + + iblock_tmp = (int *) malloc( n * sizeof(int) ); + assert(iblock_tmp != NULL); + + work = (FloatingType *) malloc( 4*n * sizeof(FloatingType) ); + assert (work != NULL); + + iwork = (int *) malloc( 3*n * sizeof(int) ); + assert (iwork != NULL); + + /* compute eigenvalues 'my_il' to 'my_iu', put into temporary arrays */ + lapack::odrrd("I", "B", &n, &dummy1, &dummy2, &my_il, &my_iu, gersch, + &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &num_vals, + W_tmp, Werr_tmp, &dummy1, &dummy2, iblock_tmp, Windex_tmp, + work, iwork, &info); + + assert(info == 0); + + /* copy computed values in W, Werr, Windex, iblock (which are work space) */ + memcpy(&W[my_il-il], W_tmp, num_vals * sizeof(FloatingType) ); + memcpy(&Werr[my_il-il], Werr_tmp, num_vals * sizeof(FloatingType) ); + memcpy(&Windex[my_il-il], Windex_tmp, num_vals * sizeof(int) ); + memcpy(&iblock[my_il-il], iblock_tmp, num_vals * sizeof(int) ); + + free(W_tmp); + free(Werr_tmp); + free(Windex_tmp); + free(iblock_tmp); + free(work); + free(iwork); + + return NULL; + } + + template + auxarg1_t *create_auxarg1(int n, FloatingType *D, FloatingType *E, FloatingType *E2, + int il, int iu, int my_il, int my_iu, + int nsplit, int *isplit, FloatingType bsrtol, + FloatingType pivmin, FloatingType *gersch, FloatingType *W, + FloatingType *Werr, int *Windex, int *iblock) + { + auxarg1_t *arg; + + arg = (auxarg1_t *) malloc( sizeof(auxarg1_t) ); + assert(arg != NULL); + + arg->n = n; + arg->D = D; + arg->E = E; + arg->E2 = E2; + arg->il = il; + arg->iu = iu; + arg->my_il = my_il; + arg->my_iu = my_iu; + arg->nsplit = nsplit; + arg->isplit = isplit; + arg->bsrtol = bsrtol; + arg->pivmin = pivmin; + arg->gersch = gersch; + arg->W = W; + arg->Werr = Werr; + arg->Windex = Windex; + arg->iblock = iblock; + + return arg; + } + + template + void retrieve_auxarg1(auxarg1_t *arg, int *n, FloatingType **D, FloatingType **E, + FloatingType **E2, int *il, int *iu, int *my_il, + int *my_iu, int *nsplit, int **isplit, + FloatingType *bsrtol, FloatingType *pivmin, FloatingType **gersch, + FloatingType **W, FloatingType **Werr, int **Windex, + int **iblock) + { + *n = arg->n; + *D = arg->D; + *E = arg->E; + *E2 = arg->E2; + *il = arg->il; + *iu = arg->iu; + *my_il = arg->my_il; + *my_iu = arg->my_iu; + *nsplit = arg->nsplit; + *isplit = arg->isplit; + *bsrtol = arg->bsrtol; + *pivmin = arg->pivmin; + *gersch = arg->gersch; + *W = arg->W; + *Werr = arg->Werr; + *Windex = arg->Windex; + *iblock = arg->iblock; + + free(arg); + } + + template + void *eigval_subset_thread_r(void *argin) + { + /* from input argument */ + int bl_size, rf_begin, rf_end; + FloatingType *D, *DE2; + FloatingType rtol1, rtol2, pivmin; + FloatingType bl_spdiam; + + /* others */ + int info, offset; + FloatingType *W, *Werr, *Wgap; + int *Windex; + FloatingType *work; + int *iwork; + + retrieve_auxarg2((auxarg2_t *) argin, &bl_size, &D, &DE2, + &rf_begin, &rf_end, &W, &Werr, &Wgap, &Windex, &rtol1, &rtol2, + &pivmin, &bl_spdiam); + + /* malloc work space */ + work = (FloatingType *) malloc( 2*bl_size * sizeof(FloatingType) ); + assert(work != NULL); + + iwork = (int *) malloc( 2*bl_size * sizeof(int) ); + assert(iwork != NULL); + + /* special case of only one eigenvalue */ + if (rf_begin == rf_end) + Wgap[rf_begin] = 0.0; + + offset = Windex[rf_begin] - 1; + + /* call bisection routine to refine the eigenvalues */ + lapack::odrrb(&bl_size, D, DE2, &Windex[rf_begin], &Windex[rf_end], + &rtol1, &rtol2, &offset, &W[rf_begin], &Wgap[rf_begin], + &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam, + &bl_size, &info); + assert(info == 0); + + /* clean up */ + free(work); + free(iwork); + + return NULL; + } + + template + auxarg2_t *create_auxarg2(int bl_size, FloatingType *D, FloatingType *DE2, + int rf_begin, int rf_end, FloatingType *W, FloatingType *Werr, + FloatingType *Wgap, int *Windex, + FloatingType rtol1, FloatingType rtol2, FloatingType pivmin, + FloatingType bl_spdiam) + { + auxarg2_t *arg; + + arg = (auxarg2_t *) malloc( sizeof(auxarg2_t) ); + assert(arg != NULL); + + arg->bl_size = bl_size; + arg->D = D; + arg->DE2 = DE2; + arg->rf_begin = rf_begin; + arg->rf_end = rf_end; + arg->W = W; + arg->Werr = Werr; + arg->Wgap = Wgap; + arg->Windex = Windex; + arg->rtol1 = rtol1; + arg->rtol2 = rtol2; + arg->pivmin = pivmin; + arg->bl_spdiam = bl_spdiam; + + return arg; + } + + template + void retrieve_auxarg2(auxarg2_t *arg, int *bl_size, FloatingType **D, + FloatingType **DE2, int *rf_begin, int *rf_end, + FloatingType **W, FloatingType **Werr, FloatingType **Wgap, int **Windex, + FloatingType *rtol1, FloatingType *rtol2, + FloatingType *pivmin, FloatingType *bl_spdiam) + { + *bl_size = arg->bl_size; + *D = arg->D; + *DE2 = arg->DE2; + *rf_begin = arg->rf_begin; + *rf_end = arg->rf_end; + *W = arg->W; + *Werr = arg->Werr; + *Wgap = arg->Wgap; + *Windex = arg->Windex; + *rtol1 = arg->rtol1; + *rtol2 = arg->rtol2; + *pivmin = arg->pivmin; + *bl_spdiam = arg->bl_spdiam; + + free(arg); + } + } // anonymous + +} // detail + +} // pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/plarrv.hpp b/external/pmrrr/include/pmrrr/plarrv.hpp new file mode 100644 index 0000000000..1077c358de --- /dev/null +++ b/external/pmrrr/include/pmrrr/plarrv.hpp @@ -0,0 +1,744 @@ +/* Parallel computation of eigenvectors and symmetric tridiagonal + * matrix T, which is preprocessed by the routine 'plarre'. + * + * Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef __PLARRV_HPP__ +#define __PLARRV_HPP__ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using std::sort; + +namespace pmrrr { namespace detail{ + + namespace { + + template + int assign_to_proc(proc_t *procinfo, in_t *Dstruct, + val_t *Wstruct, vec_t *Zstruct, int *nzp, + int *myfirstp); + + template + int init_workQ(proc_t *procinfo, in_t *Dstruct, + val_t *Wstruct, int *nzp, + workQ_t *workQ); + + template + void *empty_workQ(void*); + + static workQ_t *create_workQ(); + + static void destroy_workQ(workQ_t*); + + template + auxarg3_t *create_auxarg3(int, proc_t*, val_t*, vec_t*, + tol_t*, workQ_t*, counter_t*); + + template + void retrieve_auxarg3(auxarg3_t*, int*, proc_t**, val_t**, + vec_t**, tol_t**, workQ_t**, + counter_t**); + + template + bool cmp(const sort_struct_t & arg1, const sort_struct_t & arg2); + + } + + +/* + * Computation of eigenvectors of a symmetric tridiagonal + */ +#ifndef DISABLE_PTHREADS + template + int plarrv(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, + vec_t *Zstruct, tol_t *tolstruct, int *nzp, + int *myfirstp) + { + /* Input variables */ + int nthreads = procinfo->nthreads; + int n = Dstruct->n; + FloatingType *W = Wstruct->W; + + /* Allocate work space and copy eigenvalues */ + FloatingType *Wshifted = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(Wshifted != NULL); + + memcpy(Wshifted, W, n*sizeof(FloatingType)); + Wstruct->Wshifted = Wshifted; + + /* Multi-threading */ + pthread_t *threads = (pthread_t *) malloc(nthreads * sizeof(pthread_t)); + assert(threads != NULL); + + /* Assign eigenvectors to processes */ + assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp, + myfirstp); + + /* Create work queue Q, counter, threads to empty Q */ + workQ_t *workQ = create_workQ(); + counter_t *num_left = PMR_create_counter(*nzp); + + threads[0] = pthread_self(); + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); + + int i; + for (i=1; i *auxarg = + create_auxarg3(i, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left); + int info = pthread_create(&threads[i], &attr, empty_workQ, (void *) auxarg); + assert(info == 0); + } + + /* Initialize work queue of process */ + int info = init_workQ(procinfo, Dstruct, Wstruct, nzp, workQ); + assert(info == 0); + + /* Empty the work queue */ + auxarg3_t * auxarg = + create_auxarg3(0, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left); + void * status = empty_workQ((void *) auxarg); + assert(status == NULL); + + /* Join all the worker thread */ + for (i=1; i + int plarrv(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, + vec_t *Zstruct, tol_t *tolstruct, int *nzp, + int *myfirstp) + { + int n = Dstruct->n; + FloatingType *W = Wstruct->W; + + /* Allocate work space and copy eigenvalues */ + FloatingType *Wshifted = (FloatingType*)malloc(n*sizeof(FloatingType)); + assert(Wshifted != NULL); + + memcpy(Wshifted, W, n*sizeof(FloatingType)); + Wstruct->Wshifted = Wshifted; + + /* Assign eigenvectors to processes */ + assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp, myfirstp); + + /* Create work queue Q, counter, threads to empty Q */ + workQ_t *workQ = create_workQ(); + counter_t *num_left = PMR_create_counter(*nzp); + + /* Initialize work queue of process */ + int info = init_workQ(procinfo, Dstruct, Wstruct, nzp, workQ); + assert(info == 0); + + /* Empty the work queue */ + auxarg3_t *auxarg = + create_auxarg3(0, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left); + void *status = empty_workQ((void*)auxarg); + assert(status == NULL); + + /* Clean up */ + free(Wshifted); + destroy_workQ(workQ); + PMR_destroy_counter(num_left); + + return 0; + } +#endif + + namespace { + + /* + * Assign the computation of eigenvectors to the processes + */ + template + int assign_to_proc(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, + vec_t *Zstruct, int *nzp, int *myfirstp) + { + /* From inputs */ + int pid = procinfo->pid; + int nproc = procinfo->nproc; + FloatingType *restrict L = Dstruct->E; + int *restrict isplit = Dstruct->isplit; + int n = Wstruct->n; + int il = *(Wstruct->il); + int iu = *(Wstruct->iu); + FloatingType *restrict W = Wstruct->W; + int *restrict Windex = Wstruct->Windex; + int *restrict iblock = Wstruct->iblock; + int *restrict iproc = Wstruct->iproc; + int *restrict Zindex = Zstruct->Zindex; + + sort_struct_t *array = + (sort_struct_t *) malloc(n*sizeof(sort_struct_t)); + + int i; + for (i=0; i); + + /* Mark eigenvectors that do not need to be computed */ + int j; + for (j = 0; j < il-1; j++ ) { + iproc[array[j].ind] = -1; + Zindex[array[j].ind] = -1; + } + + int isize = iu - il + 1; + + int ibegin = il - 1, iend; + int id; + for (id=0; idnz = *nzp; + } + + ibegin = iend + 1; + ibegin = imin(ibegin, iu); + } /* end id */ + + for (j = iend+1; j < n; j++ ) { + iproc[array[j].ind] = -1; + Zindex[array[j].ind] = -1; + } + + free(array); + return 0; + } + + /* + * Compare function for using qsort() on an array of + * sort_structs + */ + template + bool cmp(const sort_struct_t & arg1, const sort_struct_t & arg2) + { + /* Within block local index decides */ + if (arg1.block_ind == arg2.block_ind) { + // Was: (arg1.local_ind - arg2.local_ind) + // Now: true iff arg1.local_ind is lower, false for equal or greater + return arg1.local_ind < arg2.local_ind; + } else { + if (arg1.lambda < arg2.lambda) { + return true; + } else if (arg1.lambda > arg2.lambda) { + return false; + } else { + if (arg1.local_ind < arg2.local_ind) + return true; + else + return false; + } + } + } + + + /* + * Initialize work queue by putting all tasks for the process + * into the work queue. + */ + template + int init_workQ(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, + int *nzp, workQ_t *workQ) + { + /* Input arguments */ + int pid = procinfo->pid; + int nproc = procinfo->nproc; + int nthreads = procinfo->nthreads; + FloatingType *restrict D = Dstruct->D; + FloatingType *restrict L = Dstruct->E; + int nsplit = Dstruct->nsplit; + int *restrict isplit = Dstruct->isplit; + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + int *restrict iproc = Wstruct->iproc; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + FloatingType *restrict gersch = Wstruct->gersch; + int nz = *nzp; + + /* Loop over blocks */ + int i, j, k, l; + int ibegin = 0; + for ( j=0; j *RRR = PMR_create_rrr(&D[ibegin], &L[ibegin], DL, DLL, isize, 0); + PMR_increment_rrr_dependencies(RRR); + + /* In W apply shift of current block to eigenvalues + * to get unshifted values w.r.t. T */ + for (i=ibegin; i<=iend; i++) { + W[i] += sigma; + } + + /* Split eigenvalues of block into singletons and clusters + * and add them to process work queue */ + int max_size = imax(1, nz/nthreads); + bool task_inserted = false; + int new_first=ibegin, new_last; + int sn_first, sn_last, sn_size; + for (i=ibegin; i<=iend; i++) { + + if (i == iend) + new_last = i; + else if (Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i])) + new_last = i; + else + continue; + + /* Skip rest if no eigenvalues of process */ + if (new_first > iWend || new_last < iWbegin) { + new_first = i + 1; + continue; + } + + int new_size = new_last - new_first + 1; + + if (new_size == 1) { + /* Singleton was found */ + + if (new_first < iWbegin || new_first > iWend) { + new_first = i + 1; + continue; + } else { + if (new_first==iWbegin || task_inserted==true) { + /* Initialize new singleton task */ + sn_first = new_first; + sn_last = new_first; + sn_size = 1; + } else { + /* Extend singleton task by one */ + sn_last++; + sn_size++; + } + } + + /* Insert task if ... */ + if (i==iWend || sn_size>=max_size || + Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) { + + FloatingType lgap; + if (sn_first == ibegin) { + lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl ); + } else { + lgap = Wgap[sn_first-1]; + } + + PMR_increment_rrr_dependencies(RRR); + + task_t *task = PMR_create_s_task(sn_first, sn_last, 1, ibegin, + iend, spdiam, lgap, RRR); + + PMR_insert_task_at_back(workQ->s_queue, task); + + task_inserted = true; + } else { + task_inserted = false; + } + + } else { + /* Cluster was found */ + + int cl_first = new_first; + int cl_last = new_last; + int cl_size = new_size; + + /* Split cluster into clusters by absolut criterion */ + if (cl_size > 3) { + + /* Split cluster to smaller clusters [cl_first:cl_last] */ + for (k=new_first+1; k 0.8*avggap) + cl_last = k; + else + continue; + + /* Skip cluster if no eigenvalues of process in it */ + if (cl_last < iWbegin || cl_first > iWend) { + cl_first = k + 1; + continue; + } + + /* Record left gap of cluster */ + FloatingType lgap; + if (cl_first == ibegin) { + lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl); + } else { + lgap = Wgap[cl_first-1]; + } + + /* Determine processes involved in processing the cluster */ + int left_pid = nproc-1; + int right_pid = 0; + for (l=cl_first; l<=cl_last; l++) { + if (iproc[l] != -1) { + left_pid = imin(left_pid, iproc[l]); + right_pid = imax(right_pid, iproc[l]); + } + } + + /* + * We have to explicitly specify the type because neither NULL nor nullptr + * can't be used for template type deduction. + */ + rrr_t *RRR_parent = PMR_create_rrr(&D[ibegin], &L[ibegin], + NULL, NULL, isize, 0); + + task_t *task = PMR_create_c_task(cl_first, cl_last, 1, ibegin, + iend, spdiam, lgap, iWbegin, + iWend, left_pid, right_pid, + RRR_parent); + + /* Insert task into queue, depending if cluster need + * communication with other processes */ + if (left_pid != right_pid) + PMR_insert_task_at_back(workQ->r_queue, task); + else + PMR_insert_task_at_back(workQ->c_queue, task); + + cl_first = k + 1; + } /* end k */ + + } else { + /* Cluster is too small to split, so insert it to queue */ + + /* Record left gap of cluster */ + FloatingType lgap; + if (cl_first == ibegin) { + lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl ); + } else { + lgap = Wgap[cl_first-1]; + } + + /* Determine processes involved */ + int left_pid = nproc-1; + int right_pid = 0; + for (l=cl_first; l<=cl_last; l++) { + if (iproc[l] != -1) { + left_pid = imin(left_pid, iproc[l]); + right_pid = imax(right_pid, iproc[l]); + } + } + + /* + * We have to explicitly specify the type because neither NULL nor nullptr + * can't be used for template type deduction. + */ + rrr_t *RRR_parent = PMR_create_rrr(&D[ibegin], &L[ibegin], + NULL, NULL, isize, 0); + + task_t *task = PMR_create_c_task(cl_first, cl_last, 1, ibegin, + iend, spdiam, lgap, iWbegin, iWend, + left_pid, right_pid, RRR_parent); + + /* Insert task into queue, depending if cluster need + * communication with other processes */ + if (left_pid != right_pid) + PMR_insert_task_at_back(workQ->r_queue, task); + else + PMR_insert_task_at_back(workQ->c_queue, task); + + } + task_inserted = true; + + } /* end new_size */ + + new_first = i + 1; + } /* end of splitting eigenvalues into tasks */ + + /* Set flag in RRR that last singleton is created */ + PMR_set_parent_processed_flag(RRR); + PMR_try_destroy_rrr(RRR); + + ibegin = iend + 1; + } /* end loop over blocks */ + + return 0; + } + + /* + * Processes all the tasks put in the work queue. + */ + template + void *empty_workQ(void *argin) + { + int tid; + proc_t *procinfo; + val_t *Wstruct; + vec_t *Zstruct; + tol_t *tolstruct; + workQ_t *workQ; + counter_t *num_left; + retrieve_auxarg3((auxarg3_t *) argin, &tid, &procinfo, &Wstruct, + &Zstruct, &tolstruct, &workQ, &num_left); + + int n = Wstruct->n; + + /* max. needed double precision work space: odr1v */ + FloatingType *work = (FloatingType *) malloc(4*n * sizeof(FloatingType)); + assert(work != NULL); + + /* max. needed double precision work space: odrrb */ + int *iwork = (int *) malloc(2*n * sizeof(int) ); + assert(iwork != NULL); + + + /* while loop to empty the work queue */ + while (PMR_get_counter_value(num_left) > 0) { + + /* empty r-queue before processing other tasks */ + PMR_process_r_queue(tid, procinfo, Wstruct, Zstruct, tolstruct, + workQ, num_left, work, iwork); + + task_t *task = PMR_remove_task_at_front(workQ->s_queue); + if ( task != NULL ) { + assert(task->flag == SINGLETON_TASK_FLAG); + + PMR_process_s_task((singleton_t *) task->data, tid, procinfo, + Wstruct, Zstruct, tolstruct, num_left, + work, iwork); + free(task); + continue; + } + + task = PMR_remove_task_at_front(workQ->c_queue); + if ( task != NULL ) { + assert(task->flag == CLUSTER_TASK_FLAG); + + PMR_process_c_task((cluster_t *) task->data, tid, procinfo, + Wstruct, Zstruct, tolstruct, workQ, + num_left, work, iwork); + free(task); + continue; + } + + } /* end while */ + + free(work); + free(iwork); + + return NULL; + } + + static workQ_t *create_workQ() + { + workQ_t *wq; + + wq = (workQ_t *) malloc(sizeof(workQ_t)); + + wq->r_queue = PMR_create_empty_queue(); + wq->s_queue = PMR_create_empty_queue(); + wq->c_queue = PMR_create_empty_queue(); + + return(wq); + } + + static void destroy_workQ(workQ_t *wq) + { + PMR_destroy_queue(wq->r_queue); + PMR_destroy_queue(wq->s_queue); + PMR_destroy_queue(wq->c_queue); + free(wq); + } + + template + auxarg3_t* + create_auxarg3(int tid, proc_t *procinfo, val_t *Wstruct, + vec_t *Zstruct, tol_t *tolstruct, + workQ_t *workQ, counter_t *num_left) + { + auxarg3_t *arg = + (auxarg3_t *) malloc( sizeof(auxarg3_t) ); + assert(arg != NULL); + + arg->tid = tid; + arg->procinfo = procinfo; + arg->Wstruct = Wstruct; + arg->Zstruct = Zstruct; + arg->tolstruct = tolstruct; + arg->workQ = workQ; + arg->num_left = num_left; + + return arg; + } + + + + template + void + retrieve_auxarg3(auxarg3_t *arg, int *tid, proc_t **procinfo, + val_t **Wstruct, vec_t **Zstruct, + tol_t **tolstruct, workQ_t **workQ, + counter_t **num_left) + { + *tid = arg->tid; + *procinfo = arg->procinfo; + *Wstruct = arg->Wstruct; + *Zstruct = arg->Zstruct; + *tolstruct = arg->tolstruct; + *workQ = arg->workQ; + *num_left = arg->num_left; + + free(arg); + } + } + +} + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/pmrrr.hpp b/external/pmrrr/include/pmrrr/pmrrr.hpp new file mode 100644 index 0000000000..1125fb45d2 --- /dev/null +++ b/external/pmrrr/include/pmrrr/pmrrr.hpp @@ -0,0 +1,1223 @@ +/* Computation of eigenvalues and eigenvectors of a symmetric + * tridiagonal matrix T, given by its diagonal elements D + * and its super-/subdiagonal elements E. + * + * See INCLUDE/pmrrr.h for more information. + * + * Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef __PMRRR_HPP__ +#define __PMRRR_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using std::sort; + +/* + * Computation of eigenvalues and eigenvectors of a symmetric + * tridiagonal matrix T, given by its diagonal elements D + * and its super-/subdiagonal elements E. + * See README or 'pmrrr.h' for details. + */ +namespace pmrrr { + + namespace detail{ + template + FloatingType scale_matrix(in_t *Dstruct, val_t *Wstruct, bool valeig); + + template + int handle_small_cases(char *jobz, char *range, int *np, FloatingType *D, + FloatingType *E, FloatingType *vlp, FloatingType *vup, int *ilp, + int *iup, int *tryracp, MPI_Comm comm, int *nzp, + int *myfirstp, FloatingType *W, FloatingType *Z, int *ldzp, + int *Zsupp); + + template + bool cmp(const FloatingType & a1, const FloatingType & a2); + + /* + * Template template parameter required because of the sort function not catching overload properly. + */ + template + bool cmp_sort_struct(const sort_struct_t & a1, const sort_struct_t & a2); + + + template + void clean_up(MPI_Comm comm, FloatingType *Werr, FloatingType *Wgap, + FloatingType *gersch, int *iblock, int *iproc, + int *Windex, int *isplit, int *Zindex, + proc_t *procinfo, in_t *Dstruct, + val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct); + + template + int sort_eigenpairs(proc_t *procinfo, val_t *Wstruct, vec_t *Zstruct); + + template + int refine_to_highrac(proc_t *procinfo, char *jobz, FloatingType *D, + FloatingType *E2, in_t *Dstruct, int *nzp, + val_t *Wstruct, tol_t *tolstruct); + + template + void invscale_eigenvalues(val_t *Wstruct, FloatingType scale, + int size); + } + + /* Arguments: + * ---------- + * + * INPUTS: + * ------- + * jobz "N" or "n" - compute only eigenvalues + * "V" or "v" - compute also eigenvectors + * "C" or "c" - count the maximal number of + * locally computed eigenvectors + * range "A" or "a" - all + * "V" or "v" - by interval: (VL,VU] + * "I" or "i" - by index: IL-IU + * n matrix size + * ldz must be set on input to the leading dimension + * of of eigenvector matrix Z; this is often equal + * to matrix size n (not changed on output) + * + * INPUT + OUTPUT: + * --------------- + * D (double[n]) Diagonal elements of tridiagonal T. + * (On output the array will be overwritten). + * E (double[n]) Off-diagonal elements of tridiagonal T. + * First n-1 elements contain off-diagonals, + * the last element can have an abitrary value. + * (On output the array will be overwritten.) + * vl If range="V", lower bound of interval + * (vl,vu], on output refined. + * If range="A" or "I" not referenced as input. + * On output the interval (vl,vu] contains ALL + * the computed eigenvalues. + * vu If range="V", upper bound of interval + * (vl,vu], on output refined. + * If range="A" or "I" not referenced as input. + * On output the interval (vl,vu] contains ALL + * the computed eigenvalues. + * il If range="I", lower index (1-based indexing) of + * the subset 'il' to 'iu'. + * If range="A" or "V" not referenced as input. + * On output the eigenvalues with index il to iu are + * computed by ALL processes. + * iu If range="I", upper index (1-based indexing) of + * the subset 'il' to 'iu'. + * If range="A" or "V" not referenced as input. + * On output the eigenvalues with index il to iu are + * computed by ALL processes. + * tryrac 0 - do not try to achieve high relative accuracy. + * NOTE: this should be the default in context of + * dense eigenproblems. + * 1 - relative accuracy will be attempted; + * on output it is set to zero if high relative + * accuracy is not achieved. + * comm MPI communicator; commonly: MPI_COMM_WORLD. + * + * OUTPUT: + * ------- + * nz Number of eigenvalues and eigenvectors computed + * locally. + * If jobz="C", 'nz' will be set to the maximal + * number of locally computed eigenvectors such + * that double[n*nz] will provide enough memory + * for the local eigenvectors; this is only + * important in case of range="V" since + * '#eigenpairs' are not known in advance + * offset Index, relative to the computed eigenvalues, of + * the smallest eigenvalue computed locally + * (0-based indexing). + * W (double[n]) Locally computed eigenvalues; + * The first nz entries contain the eigenvalues + * computed locally; the first entry contains the + * 'offset + 1'-th computed eigenvalue, which is the + * 'offset + il'-th eigenvalue of the input matrix + * (1-based indexing in both cases). + * In some situations it is desirable to have all + * computed eigenvalues in W, instead of only + * those computed locally. In this case, call + * routine 'PMR_comm_eigvals' after + * 'pmrrr' returns (see example and interface below). + * Z Locally computed eigenvectors. + * (double[n*nz]) Enough space must be provided to store the + * vectors. 'nz' should be bigger or equal + * to ceil('#eigenpairs'/'#processes'), where + * '#eigenpairs' is 'n' in case of range="A" and + * 'iu-il+1' in case of range="I". Alternatively, + * and for range="V" 'nz' can be obtained + * by running the routine with jobz="C". + * Zsupp Support of eigenvectors, which is given by + * (double[2*n]) i1=Zsupp[2*i] to i2=Zsupp[2*i+1] for the i-th local eigenvector + * (returns 1-based indexing; e.g. in C Z[i1-1:i2-1] are non-zero and + * in Fotran Z(i1:i2) are non-zero). + * + * RETURN VALUE: + * ------------- + * 0 - success + * 1 - wrong input parameter + * 2 - misc errors + * + * The Fortran interface takes an additinal integer argument INFO + * to retrieve the return value. + * An example call in Fortran looks therefore like + * + * CALL PMRRR('V', 'A', N, D, E, VL, VU, IL, IU, TRYRAC, + * MPI_COMM_WORLD, NZ, MYFIRST, W, Z, LDZ, ZSUPP, INFO) + * + * + * EXAMPLE CALL: + * ------------- + * char *jobz, *range; + * int n, il, iu, tryRAC=0, nz, offset, ldz, *Zsupp; + * double *D, *E, *W, *Z, vl, vu; + * + * // allocate space for D, E, W, Z + * // initialize D, E + * // set jobz, range, ldz, and if necessary, il, iu or vl, vu + * + * info = pmrrr(jobz, range, &n, D, E, &vl, &vu, &il, &iu, + * &tryRAC, MPI_COMM_WORLD, &nz, &myfirst, W, + * Z, &ldz , Zsupp); + * + * // optional: + * PMR_comm_eigvals(MPI_COMM_WORLD, &nz, &myfirst, W); + * + */ + template + int pmrrr(char *jobz, char *range, int *np, FloatingType *D, + FloatingType *E, FloatingType *vl, FloatingType *vu, int *il, + int *iu, int *tryracp, MPI_Comm comm, int *nzp, + int *offsetp, FloatingType *W, FloatingType *Z, int *ldz, + int *Zsupp) + { + /* Input parameter */ + int n = *np; + bool onlyW = toupper(jobz[0]) == 'N'; + bool wantZ = toupper(jobz[0]) == 'V'; + bool cntval = toupper(jobz[0]) == 'C'; + bool alleig = toupper(range[0]) == 'A'; + bool valeig = toupper(range[0]) == 'V'; + bool indeig = toupper(range[0]) == 'I'; + + /* Check input parameters */ + if(!(onlyW || wantZ || cntval)) return 1; + if(!(alleig || valeig || indeig)) return 1; + if(n <= 0) return 1; + if (valeig) { + if(*vu<=*vl) return 1; + } else if (indeig) { + if (*il<1 || *il>n || *iu<*il || *iu>n) return 1; + } + + /* MPI & multithreading info */ + int is_init, is_final; + MPI_Initialized(&is_init); + MPI_Finalized(&is_final); + if (is_init!=1 || is_final==1) { + fprintf(stderr, "ERROR: MPI is not active! (init=%d, final=%d) \n", + is_init, is_final); + return 1; + } + MPI_Comm comm_dup; + MPI_Comm_dup(comm, &comm_dup); + int nproc, pid, thread_support; + MPI_Comm_size(comm_dup, &nproc); + MPI_Comm_rank(comm_dup, &pid); + MPI_Query_thread(&thread_support); + + int nthreads; + if ( !(thread_support == MPI_THREAD_MULTIPLE || + thread_support == MPI_THREAD_FUNNELED) ) { + /* Disable multithreading; note: to support multithreading with + * MPI_THREAD_SERIALIZED the code must be changed slightly; this + * is not supported at the moment */ + nthreads = 1; + } else { + char *ompvar = getenv("PMR_NUM_THREADS"); + if (ompvar == NULL) { + nthreads = DEFAULT_NUM_THREADS; + } else { + nthreads = atoi(ompvar); + } + } + + #if defined(MVAPICH2_VERSION) + if (nthreads>1) { + int mv2_affinity=1; + char *mv2_string = getenv("MV2_ENABLE_AFFINITY"); + if (mv2_string != NULL) { + mv2_affinity = atoi(mv2_string); + } + if (mv2_affinity!=0) { + nthreads = 1; + if (pid==0) { + fprintf(stderr, "WARNING: PMRRR incurs a significant performance penalty when multithreaded with MVAPICH2 with affinity enabled. The number of threads has been reduced to one; please rerun with MV2_ENABLE_AFFINITY=0 or PMR_NUM_THREADS=1 in the future.\n"); + fflush(stderr); + } + } + } + #endif + + /* If only maximal number of local eigenvectors are queried + * return if possible here */ + *nzp = 0; + *offsetp = 0; + if (cntval) { + if ( alleig || n < DSTEMR_IF_SMALLER ) { + *nzp = iceil(n,nproc); + MPI_Comm_free(&comm_dup); + return 0; + } else if (indeig) { + *nzp = iceil(*iu-*il+1,nproc); + MPI_Comm_free(&comm_dup); + return 0; + } + } + + /* Check if computation should be done by multiple processes */ + int info; + if (n < DSTEMR_IF_SMALLER) { + info = detail::handle_small_cases(jobz, range, np, D, E, vl, vu, il, + iu, tryracp, comm, nzp, offsetp, W, + Z, ldz, Zsupp); + MPI_Comm_free(&comm_dup); + return info; + } + + /* Allocate memory */ + FloatingType *Werr = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(Werr != NULL); + FloatingType *Wgap = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(Wgap != NULL); + FloatingType *gersch = (FloatingType *) malloc( 2*n*sizeof(FloatingType) ); + assert(gersch != NULL); + int *iblock = (int *) calloc( n , sizeof(int) ); + assert(iblock != NULL); + int *iproc = (int *) malloc( n * sizeof(int) ); + assert(iproc != NULL); + int *Windex = (int *) malloc( n * sizeof(int) ); + assert(Windex != NULL); + int *isplit = (int *) malloc( n * sizeof(int) ); + assert(isplit != NULL); + int *Zindex = (int *) malloc( n * sizeof(int) ); + assert(Zindex != NULL); + detail::proc_t *procinfo = (detail::proc_t *) malloc( sizeof(detail::proc_t) ); + assert(procinfo != NULL); + detail::in_t *Dstruct = (detail::in_t *) malloc( sizeof(detail::in_t) ); + assert(Dstruct != NULL); + detail::val_t *Wstruct = (detail::val_t *) malloc( sizeof(detail::val_t) ); + assert(Wstruct != NULL); + detail::vec_t *Zstruct = (detail::vec_t *) malloc( sizeof(detail::vec_t) ); + assert(Zstruct != NULL); + detail::tol_t *tolstruct = (detail::tol_t *) malloc( sizeof(detail::tol_t) ); + assert(tolstruct != NULL); + + /* Bundle variables into a structures */ + procinfo->pid = pid; + procinfo->nproc = nproc; + procinfo->comm = comm_dup; + procinfo->nthreads = nthreads; + procinfo->thread_support = thread_support; + + Dstruct->n = n; + Dstruct->D = D; + Dstruct->E = E; + Dstruct->isplit = isplit; + + Wstruct->n = n; + Wstruct->vl = vl; + Wstruct->vu = vu; + Wstruct->il = il; + Wstruct->iu = iu; + Wstruct->W = W; + Wstruct->Werr = Werr; + Wstruct->Wgap = Wgap; + Wstruct->Windex = Windex; + Wstruct->iblock = iblock; + Wstruct->iproc = iproc; + Wstruct->gersch = gersch; + + Zstruct->ldz = *ldz; + Zstruct->nz = 0; + Zstruct->Z = Z; + Zstruct->Zsupp = Zsupp; + Zstruct->Zindex = Zindex; + + /* Scale matrix to allowable range, returns 1.0 if not scaled */ + FloatingType scale = detail::scale_matrix(Dstruct, Wstruct, valeig); + + /* Test if matrix warrants more expensive computations which + * guarantees high relative accuracy */ + if (*tryracp) + lapack::odrrr(&n, D, E, &info); /* 0 - rel acc */ + else info = -1; + + int i; + FloatingType *Dcopy, *E2copy; + if (info == 0) { + /* This case is extremely rare in practice */ + tolstruct->split = std::numeric_limits::epsilon(); + /* Copy original data needed for refinement later */ + Dcopy = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(Dcopy != NULL); + memcpy(Dcopy, D, n*sizeof(FloatingType)); + E2copy = (FloatingType *) malloc( n * sizeof(FloatingType) ); + assert(E2copy != NULL); + for (i=0; isplit = -std::numeric_limits::epsilon(); + *tryracp = 0; + } + + if (!wantZ) { + /* Compute eigenvalues to full precision */ + tolstruct->rtol1 = 4.0 * std::numeric_limits::epsilon(); + tolstruct->rtol2 = 4.0 * std::numeric_limits::epsilon(); + } else { + /* Do not compute to full accuracy first, but refine later */ + tolstruct->rtol1 = sqrt(std::numeric_limits::epsilon()); + tolstruct->rtol1 = fmin(1e-2*MIN_RELGAP, tolstruct->rtol1); + tolstruct->rtol2 = sqrt(std::numeric_limits::epsilon())*5.0E-3; + tolstruct->rtol2 = fmin(5e-6*MIN_RELGAP, tolstruct->rtol2); + tolstruct->rtol2 = fmax(4.0 * std::numeric_limits::epsilon(), tolstruct->rtol2); + } + + /* Compute all eigenvalues: sorted by block */ + // TODO: change later the casting + info = detail::plarre(procinfo, jobz, range, Dstruct, Wstruct, tolstruct, nzp, offsetp); + assert(info == 0); + + /* If just number of local eigenvectors are queried */ + if (cntval & valeig) { + detail::clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, + isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, + tolstruct); + return 0; + } + + /* If only eigenvalues are to be computed */ + if (!wantZ) { + + /* Refine to high relative with respect to input T */ + if (*tryracp) { + info = detail::refine_to_highrac(procinfo, jobz, Dcopy, E2copy, + Dstruct, nzp, Wstruct, tolstruct); + assert(info == 0); + } + + /* Sort eigenvalues */ + sort(W, W + n, detail::cmp); + + /* Only keep subset ifirst:ilast */ + int ifirst, ilast, isize; + int iil = *il; + int iiu = *iu; + int ifirst_tmp = iil; + for (i=0; i 0) { + memmove(W, &W[ifirst-1], *nzp * sizeof(FloatingType)); + } + + /* If matrix was scaled, rescale eigenvalues */ + detail::invscale_eigenvalues(Wstruct, scale, *nzp); + + detail::clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, + isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, + tolstruct); + + return 0; + } /* end of only eigenvalues to compute */ + + /* Compute eigenvectors */ + info = detail::plarrv(procinfo, Dstruct, Wstruct, Zstruct, tolstruct, + nzp, offsetp); + assert(info == 0); + + /* Refine to high relative with respect to input matrix */ + if (*tryracp) { + info = detail::refine_to_highrac(procinfo, jobz, Dcopy, E2copy, + Dstruct, nzp, Wstruct, tolstruct); + assert(info == 0); + } + + /* If matrix was scaled, rescale eigenvalues */ + detail::invscale_eigenvalues(Wstruct, scale, n); + + /* Sort eigenvalues and eigenvectors of process */ + detail::sort_eigenpairs(procinfo, Wstruct, Zstruct); + + detail::clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, + isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, + tolstruct); + if (*tryracp) { + free(Dcopy); + free(E2copy); + } + + return 0; + } /* end pmrrr */ + + + /* + * Routine to communicate eigenvalues such that every process has + * all computed eigenvalues (iu-il+1) in W; this routine is designed + * to be called right after 'pmrrr'. + * + * Arguments: + * ---------- + * + * INPUTS: + * ------- + * jobz "N" or "n" - compute only eigenvalues + * "V" or "v" - compute also eigenvectors + * "C" or "c" - count the maximal number of + * locally computed eigenvectors + * range "A" or "a" - all + * "V" or "v" - by interval: (VL,VU] + * "I" or "i" - by index: IL-IU + * n matrix size + * ldz must be set on input to the leading dimension + * of of eigenvector matrix Z; this is often equal + * to matrix size n (not changed on output) + * + * INPUT + OUTPUT: + * --------------- + * D (double[n]) Diagonal elements of tridiagonal T. + * (On output the array will be overwritten). + * E (double[n]) Off-diagonal elements of tridiagonal T. + * First n-1 elements contain off-diagonals, + * the last element can have an abitrary value. + * (On output the array will be overwritten.) + * vl If range="V", lower bound of interval + * (vl,vu], on output refined. + * If range="A" or "I" not referenced as input. + * On output the interval (vl,vu] contains ALL + * the computed eigenvalues. + * vu If range="V", upper bound of interval + * (vl,vu], on output refined. + * If range="A" or "I" not referenced as input. + * On output the interval (vl,vu] contains ALL + * the computed eigenvalues. + * il If range="I", lower index (1-based indexing) of + * the subset 'il' to 'iu'. + * If range="A" or "V" not referenced as input. + * On output the eigenvalues with index il to iu are + * computed by ALL processes. + * iu If range="I", upper index (1-based indexing) of + * the subset 'il' to 'iu'. + * If range="A" or "V" not referenced as input. + * On output the eigenvalues with index il to iu are + * computed by ALL processes. + * tryrac 0 - do not try to achieve high relative accuracy. + * NOTE: this should be the default in context of + * dense eigenproblems. + * 1 - relative accuracy will be attempted; + * on output it is set to zero if high relative + * accuracy is not achieved. + * comm MPI communicator; commonly: MPI_COMM_WORLD. + * + * OUTPUT: + * ------- + * nz Number of eigenvalues and eigenvectors computed + * locally. + * If jobz="C", 'nz' will be set to the maximal + * number of locally computed eigenvectors such + * that double[n*nz] will provide enough memory + * for the local eigenvectors; this is only + * important in case of range="V" since + * '#eigenpairs' are not known in advance + * offset Index, relative to the computed eigenvalues, of + * the smallest eigenvalue computed locally + * (0-based indexing). + * W (double[n]) Locally computed eigenvalues; + * The first nz entries contain the eigenvalues + * computed locally; the first entry contains the + * 'offset + 1'-th computed eigenvalue, which is the + * 'offset + il'-th eigenvalue of the input matrix + * (1-based indexing in both cases). + * In some situations it is desirable to have all + * computed eigenvalues in W, instead of only + * those computed locally. In this case, call + * routine 'PMR_comm_eigvals' after + * 'pmrrr' returns (see example and interface below). + * Z Locally computed eigenvectors. + * (double[n*nz]) Enough space must be provided to store the + * vectors. 'nz' should be bigger or equal + * to ceil('#eigenpairs'/'#processes'), where + * '#eigenpairs' is 'n' in case of range="A" and + * 'iu-il+1' in case of range="I". Alternatively, + * and for range="V" 'nz' can be obtained + * by running the routine with jobz="C". + * Zsupp Support of eigenvectors, which is given by + * (double[2*n]) i1=Zsupp[2*i] to i2=Zsupp[2*i+1] for the i-th local eigenvector + * (returns 1-based indexing; e.g. in C Z[i1-1:i2-1] are non-zero and + * in Fotran Z(i1:i2) are non-zero). + * + * RETURN VALUE: + * ------------- + * 0 - success + * 1 - wrong input parameter + * 2 - misc errors + * + * The Fortran interface takes an additinal integer argument INFO + * to retrieve the return value. + * An example call in Fortran looks therefore like + * + * CALL PMRRR('V', 'A', N, D, E, VL, VU, IL, IU, TRYRAC, + * MPI_COMM_WORLD, NZ, MYFIRST, W, Z, LDZ, ZSUPP, INFO) + * + * + * EXAMPLE CALL: + * ------------- + * char *jobz, *range; + * int n, il, iu, tryRAC=0, nz, offset, ldz, *Zsupp; + * double *D, *E, *W, *Z, vl, vu; + * + * // allocate space for D, E, W, Z + * // initialize D, E + * // set jobz, range, ldz, and if necessary, il, iu or vl, vu + * + * info = pmrrr(jobz, range, &n, D, E, &vl, &vu, &il, &iu, + * &tryRAC, MPI_COMM_WORLD, &nz, &myfirst, W, + * Z, &ldz , Zsupp); + * + * // optional: + * PMR_comm_eigvals(MPI_COMM_WORLD, &nz, &myfirst, W); + * + */ + template + int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *myfirstp, FloatingType *W) + { + MPI_Comm comm_dup; + MPI_Comm_dup(comm, &comm_dup); + int nproc; + MPI_Comm_size(comm_dup, &nproc); + + int *rcount = (int *) malloc( nproc * sizeof(int) ); + assert(rcount != NULL); + int *rdispl = (int *) malloc( nproc * sizeof(int) ); + assert(rdispl != NULL); + FloatingType *work = (FloatingType *) malloc((*nz+1) * sizeof(FloatingType)); + assert(work != NULL); + + if (*nz > 0) + memcpy(work, W, (*nz) * sizeof(FloatingType) ); + + MPI_Allgather(nz, 1, MPI_INT, rcount, 1, MPI_INT, comm_dup); + + MPI_Allgather(myfirstp, 1, MPI_INT, rdispl, 1, MPI_INT, comm_dup); + + MPI_Allgatherv(work, *nz, float_traits::mpi_type(), W, rcount, rdispl, + float_traits::mpi_type(), comm_dup); + + MPI_Comm_free(&comm_dup); + free(rcount); + free(rdispl); + free(work); + + return 0; + } + + /** + Helper methods + **/ + + namespace detail{ + + /* + * Free's on allocated memory of pmrrr routine + */ + template + void clean_up(MPI_Comm comm, FloatingType *Werr, FloatingType *Wgap, + FloatingType *gersch, int *iblock, int *iproc, + int *Windex, int *isplit, int *Zindex, + proc_t *procinfo, in_t *Dstruct, + val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct) + { + MPI_Comm_free(&comm); + free(Werr); + free(Wgap); + free(gersch); + free(iblock); + free(iproc); + free(Windex); + free(isplit); + free(Zindex); + free(procinfo); + free(Dstruct); + free(Wstruct); + free(Zstruct); + free(tolstruct); + } + + /* + * Wrapper to call LAPACKs DSTEMR for small matrices + */ + template + int handle_small_cases(char *jobz, char *range, int *np, FloatingType *D, + FloatingType *E, FloatingType *vlp, FloatingType *vup, int *ilp, + int *iup, int *tryracp, MPI_Comm comm, int *nzp, + int *myfirstp, FloatingType *W, FloatingType *Z, int *ldzp, + int *Zsupp) + { + bool cntval = (jobz[0] == 'C' || jobz[0] == 'c'); + bool onlyW = (jobz[0] == 'N' || jobz[0] == 'n'); + bool wantZ = (jobz[0] == 'V' || jobz[0] == 'v'); + bool indeig = (range[0] == 'I' || range[0] == 'i'); + int n = *np; + int ldz_tmp = *np; + int ldz = *ldzp; + + int nproc, pid; + MPI_Comm_size(comm, &nproc); + MPI_Comm_rank(comm, &pid); + + int lwork, liwork; + FloatingType *Z_tmp; + if (onlyW) { + lwork = 12*n; + liwork = 8*n; + } else if (cntval) { + lwork = 18*n; + liwork = 10*n; + } else if (wantZ) { + lwork = 18*n; + liwork = 10*n; + int itmp; + if (indeig) itmp = *iup-*ilp+1; + else itmp = n; + Z_tmp = (FloatingType *) malloc(n*itmp * sizeof(FloatingType)); + assert(Z_tmp != NULL); + } else { + return 1; + } + + FloatingType *work = (FloatingType *) malloc( lwork * sizeof(FloatingType)); + assert(work != NULL); + int *iwork = (int *) malloc( liwork * sizeof(int)); + assert(iwork != NULL); + + if (cntval) { + /* Note: at the moment, jobz="C" should never get here, since + * it is blocked before. */ + int m, info, MINUSONE = -1; + FloatingType cnt; + lapack::odstmr("V", "V", np, D, E, vlp, vup, ilp, iup, &m, W, &cnt, + &ldz_tmp, &MINUSONE, Zsupp, tryracp, work, &lwork, iwork, + &liwork, &info); + assert(info == 0); + + *nzp = (int) ceil(cnt/nproc); + free(work); free(iwork); + return 0; + } + + int m, info; + lapack::odstmr(jobz, range, np, D, E, vlp, vup, ilp, iup, &m, W, Z_tmp, + &ldz_tmp, np, Zsupp, tryracp, work, &lwork, iwork, + &liwork, &info); + assert(info == 0); + + int chunk = iceil(m,nproc); + int myfirst = imin(pid * chunk, m); + int mylast = imin((pid+1)*chunk - 1, m - 1); + int mysize = mylast - myfirst + 1; + + if (mysize > 0) { + memmove(W, &W[myfirst], mysize*sizeof(FloatingType)); + if (wantZ) { + if (ldz == ldz_tmp) { + /* copy everything in one chunk */ + memcpy(Z, &Z_tmp[myfirst*ldz_tmp], n*mysize*sizeof(FloatingType)); + } else { + int i; + /* copy each vector seperately */ + for (i=0; i + FloatingType scale_matrix(in_t *Dstruct, val_t *Wstruct, bool valeig) + { + int n = Dstruct->n; + FloatingType *restrict D = Dstruct->D; + FloatingType *restrict E = Dstruct->E; + FloatingType *vl = Wstruct->vl; + FloatingType *vu = Wstruct->vu; + + /* Set some machine dependent constants */ + FloatingType smlnum = std::numeric_limits::min() / std::numeric_limits::epsilon(); + FloatingType bignum = 1.0 / smlnum; + FloatingType rmin = sqrt(smlnum); + FloatingType rmax = fmin(sqrt(bignum), 1.0 / sqrt(sqrt(std::numeric_limits::min()))); + + /* Scale matrix to allowable range */ + FloatingType scale = 1.0; + FloatingType T_norm = lapack::odnst("M", &n, D, E); /* returns max(|T(i,j)|) */ + if (T_norm > 0 && T_norm < rmin) { + scale = rmin / T_norm; + } else if (T_norm > rmax) { + scale = rmax / T_norm; + } + + if (scale != 1.0) { /* FP cmp okay */ + /* Scale matrix and matrix norm */ + int itmp = n-1; + int IONE = 1; + blas::odscal(&n, &scale, D, &IONE); + blas::odscal(&itmp, &scale, E, &IONE); + if (valeig == true) { + /* Scale eigenvalue bounds */ + *vl *= scale; + *vu *= scale; + } + } /* end scaling */ + + return scale; + } + + /* + * If matrix scaled, rescale eigenvalues + */ + template + void invscale_eigenvalues(val_t *Wstruct, FloatingType scale, + int size) + { + if (scale != 1.0) { /* FP cmp okay */ + FloatingType *vl = Wstruct->vl; + FloatingType *vu = Wstruct->vu; + FloatingType *W = Wstruct->W; + + FloatingType invscale = 1.0 / scale; + int IONE = 1; + *vl *= invscale; + *vu *= invscale; + blas::odscal(&size, &invscale, W, &IONE); + } + + } + + template + int sort_eigenpairs_local(proc_t *procinfo, int m, val_t *Wstruct, vec_t *Zstruct) + { + int pid = procinfo->pid; + int n = Wstruct->n; + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict work = Wstruct->gersch; + int ldz = Zstruct->ldz; + FloatingType *restrict Z = Zstruct->Z; + int *restrict Zsupp = Zstruct->Zsupp; + + bool sorted; + int j; + FloatingType tmp; + int itmp1, itmp2; + + /* Make sure that sorted correctly; ineffective implementation, + * but usually no or very little swapping should be done here */ + sorted = false; + while (sorted == false) { + sorted = true; + for (j=0; j W[j+1]) { + sorted = false; + /* swap eigenvalue */ + tmp = W[j]; + W[j] = W[j+1]; + W[j+1] = tmp; + /* swap eigenvalue support */ + itmp1 = Zsupp[2*j]; + Zsupp[2*j] = Zsupp[2*(j+1)]; + Zsupp[2*(j+1)] = itmp1; + itmp2 = Zsupp[2*j + 1]; + Zsupp[2*j + 1] = Zsupp[2*(j+1) + 1]; + Zsupp[2*(j+1) +1 ] = itmp2; + /* swap eigenvector */ + memcpy(work, &Z[j*ldz], n*sizeof(FloatingType)); + memcpy(&Z[j*ldz], &Z[(j+1)*ldz], n*sizeof(FloatingType)); + memcpy(&Z[(j+1)*ldz], work, n*sizeof(FloatingType)); + } + } + } /* end while */ + + return(0); + } + + template + int sort_eigenpairs_global(proc_t *procinfo, int m, val_t *Wstruct, + vec_t *Zstruct) + { + int pid = procinfo->pid; + int nproc = procinfo->nproc; + int n = Wstruct->n; + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict work = Wstruct->gersch; + int ldz = Zstruct->ldz; + FloatingType *restrict Z = Zstruct->Z; + int *restrict Zsupp = Zstruct->Zsupp; + + FloatingType *minW, *maxW, *minmax; + int i, p, lp, itmp[2]; + bool sorted; + MPI_Status status; + FloatingType nan_value = 0.0/0.0; + + minW = (FloatingType *) malloc( nproc*sizeof(FloatingType)); + assert(minW != NULL); + maxW = (FloatingType *) malloc( nproc*sizeof(FloatingType)); + assert(maxW != NULL); + minmax = (FloatingType *) malloc(2*nproc*sizeof(FloatingType)); + assert(minmax != NULL); + + if (m == 0) { + MPI_Allgather(&nan_value, 1, float_traits::mpi_type(), minW, 1, float_traits::mpi_type(), + procinfo->comm); + MPI_Allgather(&nan_value, 1, float_traits::mpi_type(), maxW, 1, float_traits::mpi_type(), + procinfo->comm); + } else { + MPI_Allgather(&W[0], 1, float_traits::mpi_type(), minW, 1, float_traits::mpi_type(), + procinfo->comm); + MPI_Allgather(&W[m-1], 1, float_traits::mpi_type(), maxW, 1, float_traits::mpi_type(), + procinfo->comm); + } + + for (i=0; i minmax[i+1]) sorted = false; + } + + /* Make sure that sorted correctly; ineffective implementation, + * but usually no or very little swapping should be done here */ + while (sorted == false) { + + sorted = true; + + for (p=1; p::mpi_type(), p, lp, + work, n, float_traits::mpi_type(), p, p, + procinfo->comm, &status); + memcpy(&Z[(m-1)*ldz], work, n*sizeof(FloatingType)); + } + if (pid == p) { + W[0] = maxW[p-1]; + MPI_Sendrecv(&Z[0], n, float_traits::mpi_type(), lp, p, + work, n, float_traits::mpi_type(), lp, lp, + procinfo->comm, &status); + memcpy(&Z[0], work, n*sizeof(FloatingType)); + } + } + + /* swap eigenvector support as well; + * (would better be recomputed here though) */ + if ((pid == lp || pid == p) && minW[p] < maxW[lp]) { + if (pid == lp) { + MPI_Sendrecv(&Zsupp[2*(m-1)], 2, MPI_INT, p, lp, + itmp, 2, MPI_INT, p, p, + procinfo->comm, &status); + Zsupp[2*(m-1)] = itmp[0]; + Zsupp[2*(m-1) + 1] = itmp[1]; + } + if (pid == p) { + MPI_Sendrecv(&Zsupp[0], 2, MPI_INT, lp, p, + itmp, 2, MPI_INT, lp, lp, + procinfo->comm, &status); + Zsupp[0] = itmp[0]; + Zsupp[1] = itmp[1]; + } + } + } + + /* sort local again */ + sort_eigenpairs_local(procinfo, m, Wstruct, Zstruct); + + /* check again if globally sorted */ + if (m == 0) { + MPI_Allgather(&nan_value, 1, float_traits::mpi_type(), minW, 1, float_traits::mpi_type(), + procinfo->comm); + MPI_Allgather(&nan_value, 1, float_traits::mpi_type(), maxW, 1, float_traits::mpi_type(), + procinfo->comm); + } else { + MPI_Allgather(&W[0], 1, float_traits::mpi_type(), minW, 1, float_traits::mpi_type(), + procinfo->comm); + MPI_Allgather(&W[m-1], 1, float_traits::mpi_type(), maxW, 1, float_traits::mpi_type(), + procinfo->comm); + } + + for (i=0; i minmax[i+1]) sorted = false; + } + + } /* end while not sorted */ + + free(minW); + free(maxW); + free(minmax); + + return 0; + } + + /* Routine to sort the eigenpairs */ + template + int sort_eigenpairs(proc_t *procinfo, val_t *Wstruct, vec_t *Zstruct) + { + /* From inputs */ + int pid = procinfo->pid; + int n = Wstruct->n; + FloatingType *restrict W = Wstruct->W; + int *restrict Windex = Wstruct->Windex; + int *restrict iproc = Wstruct->iproc; + int *restrict Zindex = Zstruct->Zindex; + + /* Others */ + int im, j; + sort_struct_t *sort_array; + + /* Make the first nz elements of W contains the eigenvalues + * associated to the process */ + im = 0; + for (j=0; j *) malloc(im*sizeof(sort_struct_t)); + + for (j=0; j); + + for (j=0; j(procinfo, im, Wstruct, Zstruct); + + /* Make sure eigenpairs are sorted globally; this is a very + * inefficient way sorting, but in general no or very little + * swapping of eigenpairs is expected here */ + if (ASSERT_SORTED_EIGENPAIRS == true) + sort_eigenpairs_global(procinfo, im, Wstruct, Zstruct); + + free(sort_array); + + return(0); + } + + /* + * Refines the eigenvalue to high relative accuracy with + * respect to the input matrix; + * Note: In principle this part could be fully parallel too, + * but it will only rarely be called and not much work + * is involved, if the eigenvalues are not small in magnitude + * even no work at all is not uncommon. + */ + template + int refine_to_highrac(proc_t *procinfo, char *jobz, FloatingType *D, + FloatingType *E2, in_t *Dstruct, int *nzp, + val_t *Wstruct, tol_t *tolstruct) + { + int n = Dstruct->n; + int nsplit = Dstruct->nsplit; + int *restrict isplit = Dstruct->isplit; + FloatingType spdiam = Dstruct->spdiam; + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict Werr = Wstruct->Werr; + + FloatingType *work = (FloatingType *) malloc( 2*n * sizeof(FloatingType) ); + assert (work != NULL); + int *iwork = (int *) malloc( 2*n * sizeof(int) ); + assert (iwork != NULL); + + int j, ibegin = 0; + for (j=0; jpivmin; + FloatingType tol = 4 * std::numeric_limits::epsilon(); + lapack::odrrj(&isize, &D[ibegin], &E2[ibegin], &ifirst, &ilast, &tol, + &offset, &W[ibegin], &Werr[ibegin], work, iwork, &pivmin, + &spdiam, &info); + assert(info == 0); + + ibegin = iend + 1; + } /* end j */ + + free(work); + free(iwork); + return 0; + } + + /* + * Compare function for using qsort() on an array + * of FloatingTypes + */ + template + bool cmp(const FloatingType & arg1, const FloatingType & arg2) + { + return arg1 < arg2; + } + + /* + * Compare function for using qsort() on an array of + * sort_structs + */ + template + bool cmp_sort_struct(const sort_struct_t & arg1, const sort_struct_t & arg2) + { + /* Within block local index decides */ + return arg1.ind < arg2.ind; + } + } +/* Fortran function prototype */ +/*void pmrrr_(char *jobz, char *range, int *n, FloatingType *D, + FloatingType *E, FloatingType *vl, FloatingType *vu, int *il, int *iu, + int *tryracp, MPI_Fint *comm, int *nz, int *myfirst, + FloatingType *W, FloatingType *Z, int *ldz, int *Zsupp, int* info) +{ + MPI_Comm c_comm = MPI_Comm_f2c(*comm); + + *info = pmrrr(jobz, range, n, D, E, vl, vu, il, iu, tryracp, + c_comm, nz, myfirst, W, Z, ldz, Zsupp); +} + +void pmr_comm_eigvals_(MPI_Fint *comm, int *nz, int *myfirstp, + FloatingType *W, int *info) +{ + MPI_Comm c_comm = MPI_Comm_f2c(*comm); + + *info = PMR_comm_eigvals(c_comm, nz, myfirstp, W); +}*/ + +} + +#endif diff --git a/external/pmrrr/include/pmrrr/process_c_task.hpp b/external/pmrrr/include/pmrrr/process_c_task.hpp new file mode 100644 index 0000000000..7128f97e9b --- /dev/null +++ b/external/pmrrr/include/pmrrr/process_c_task.hpp @@ -0,0 +1,854 @@ + +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + + +#ifndef __PROCESS_C_TASK_HPP__ +#define __PROCESS_C_TASK_HPP__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +#define THREE 3.0 +#define FOUR 4.0 + +namespace pmrrr { namespace detail { + + namespace { + + template + rrr_t* compute_new_rrr(cluster_t *cl, int tid, proc_t *procinfo, + val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct, FloatingType *work, int *iwork); + + template + inline int refine_eigvals(cluster_t *cl, int rf_begin, int rf_end, + int tid, proc_t *procinfo, + rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct, counter_t *num_left, + workQ_t *workQ, FloatingType *work, + int *iwork); + + template + inline int communicate_refined_eigvals(cluster_t *cl, proc_t *procinfo, + int tid, val_t *Wstruct, rrr_t *RRR); + + template + inline int test_comm_status(cluster_t *cl, val_t *Wstruct); + + template + inline int create_subtasks(cluster_t *cl, int tid, proc_t *procinfo, + rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, + workQ_t *workQ, + counter_t *num_left); + + } + + template + int PMR_process_c_task(cluster_t *cl, int tid, proc_t *procinfo, + val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct, workQ_t *workQ, + counter_t *num_left, FloatingType *work, int *iwork) + { + /* From inputs */ + int depth = cl->depth; + int left_pid = cl->left_pid; + int right_pid = cl->right_pid; + int pid = procinfo->pid; + int n = Wstruct->n; + + /* Protection against infinitely deep trees */ + assert(depth < n); + + /* Check if task only need to be split into subtasks */ + int status; + if (cl->wait_until_refined == true) { + status = test_comm_status(cl, Wstruct); + if (status == COMM_COMPLETE) { + create_subtasks(cl, tid, procinfo, cl->RRR, Wstruct, Zstruct, + workQ, num_left); + return C_TASK_PROCESSED; + } else { + return C_TASK_NOT_PROCESSED; + } + } + + /* Otherwise: compute new rrr, refine part own cluster, + * communicate the refined eigenvalues if necessary, + * and create subtasks if possible */ + + rrr_t *RRR = compute_new_rrr(cl, tid, procinfo, Wstruct, Zstruct, + tolstruct, work, iwork); + + /* Refine eigenvalues 'rf_begin' to 'rf_end' */ + int rf_begin, rf_end; + if (left_pid != right_pid) { + rf_begin = imax(cl->begin, cl->proc_W_begin); + rf_end = imin(cl->end, cl->proc_W_end); + } + if (pid == left_pid ) rf_begin = cl->begin; + if (pid == right_pid) rf_end = cl->end; + + refine_eigvals(cl, rf_begin, rf_end, tid, procinfo, RRR, + Wstruct, Zstruct, tolstruct, num_left, + workQ, work, iwork); + + /* Communicate results: non-blocking */ + status = COMM_COMPLETE; + if (left_pid != right_pid) { + + status = communicate_refined_eigvals(cl, procinfo, tid, Wstruct, RRR); + /* status = COMM_INCOMPLETE if communication not finished */ + } + + if (status == COMM_COMPLETE) { + + create_subtasks(cl, tid, procinfo, RRR, Wstruct, Zstruct, + workQ, num_left); + + return C_TASK_PROCESSED; + } else { + return C_TASK_NOT_PROCESSED; + } + + } /* end process_c_task */ + + namespace { + + template + rrr_t* compute_new_rrr(cluster_t *cl, int tid, proc_t *procinfo, + val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct, FloatingType *work, int *iwork) + { + /* From inputs */ + int cl_begin = cl->begin; + int cl_end = cl->end; + int cl_size = cl_end - cl_begin + 1; + int depth = cl->depth; + int bl_begin = cl->bl_begin; + int bl_end = cl->bl_end; + int bl_size = bl_end - bl_begin + 1; + FloatingType bl_spdiam = cl->bl_spdiam; + rrr_t *RRR_parent = cl->RRR; + + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + int *restrict Windex = Wstruct->Windex; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + + /* Allocate memory for new representation for cluster */ + FloatingType *D = (FloatingType *) malloc(bl_size * sizeof(FloatingType)); + FloatingType *L = (FloatingType *) malloc(bl_size * sizeof(FloatingType)); + FloatingType *DL = (FloatingType *) malloc(bl_size * sizeof(FloatingType)); + FloatingType *DLL = (FloatingType *) malloc(bl_size * sizeof(FloatingType)); + assert(D != NULL); + assert(L != NULL); + assert(DL != NULL); + assert(DLL != NULL); + + /* Recompute DL and DLL */ + int i; + FloatingType tmp; + FloatingType *D_parent = RRR_parent->D; + FloatingType *L_parent = RRR_parent->L; + for (i=0; i::epsilon(); + FloatingType pivmin = tolstruct->pivmin; + + /* to shift as close as possible refine extremal eigenvalues */ + int k, p; + FloatingType savegap; + for (k=0; k<2; k++) { + if (k == 0) { + p = Windex[cl_begin]; + savegap = Wgap[cl_begin]; + Wgap[cl_begin] = 0.0; + } else { + p = Windex[cl_end ]; + savegap = Wgap[cl_end]; + Wgap[cl_end] = 0.0; + } + + int info; + int offset = Windex[cl_begin] - 1; + + lapack::odrrb(&bl_size, D_parent, DLL_parent, &p, &p, &RQtol, + &RQtol, &offset, &Wshifted[cl_begin], &Wgap[cl_begin], + &Werr[cl_begin], work, iwork, &pivmin, &bl_spdiam, + &bl_size, &info); + assert( info == 0 ); + + if (k == 0) { + Wgap[cl_begin] = fmax(0, (Wshifted[cl_begin+1]-Werr[cl_begin+1]) + - (Wshifted[cl_begin]+Werr[cl_begin]) ); + } else { + Wgap[cl_end] = savegap; + } + } /* end k */ + + FloatingType left_gap = cl->lgap; + FloatingType right_gap = Wgap[cl_end]; + + /* Compute new RRR and store it in D and L */ + int info; + int IONE=1; + FloatingType tau; + lapack::odrrf(&bl_size, D_parent, L_parent, DL_parent, + &IONE, &cl_size, &Wshifted[cl_begin], &Wgap[cl_begin], + &Werr[cl_begin], &bl_spdiam, &left_gap, &right_gap, + &pivmin, &tau, D, L, work, &info); + assert(info == 0); + + /* Update shift and store it */ + tmp = L_parent[bl_size-1] + tau; + L[bl_size-1] = tmp; + + /* Compute D*L and D*L*L */ + for (i=0; icopied_parent_rrr == true) { + free(RRR_parent->D); + free(RRR_parent->L); + } + rrr_t *RRR = PMR_reset_rrr(RRR_parent, D, L, DL, DLL, bl_size, depth+1); + + /* Update shifted eigenvalues */ + for (k=cl_begin; k<=cl_end; k++) { + FloatingType fudge = THREE * std::numeric_limits::epsilon() * fabs( Wshifted[k] ); + Wshifted[k] -= tau; + fudge += FOUR * std::numeric_limits::epsilon() * fabs( Wshifted[k] ); + Werr[k] += fudge; + } + + /* Assure that structure is not freed while it is processed */ + PMR_increment_rrr_dependencies(RRR); + + return RRR; + } /* end compute_new_rrr */ + + /* + * Refine eigenvalues with respect to new rrr + */ + template + int refine_eigvals(cluster_t *cl, int rf_begin, int rf_end, + int tid, proc_t *procinfo, rrr_t *RRR, + val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct, counter_t *num_left, + workQ_t *workQ, FloatingType *work, + int *iwork) + { + typedef refine_t* data_t; + + /* From inputs */ + int rf_size = rf_end-rf_begin+1; + int bl_begin = cl->bl_begin; + int bl_end = cl->bl_end; + int bl_size = bl_end - bl_begin + 1; + FloatingType bl_spdiam = cl->bl_spdiam; + + FloatingType *restrict D = RRR->D; + FloatingType *restrict L = RRR->L; + FloatingType *restrict DLL = RRR->DLL; + + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + int *restrict Windex = Wstruct->Windex; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + + FloatingType pivmin = tolstruct->pivmin; + FloatingType rtol1 = tolstruct->rtol1; + FloatingType rtol2 = tolstruct->rtol2; + + /* Determine if refinement should be split into tasks */ + int left = PMR_get_counter_value(num_left); + int nz = Zstruct->nz; + int nthreads = procinfo->nthreads; + int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads)); + int own_part = (int)fmax(ceil((FloatingType)left/nthreads),MIN_REFINE_CHUNK); + + int offset, i, p, q; + FloatingType savegap; + task_t *task; + if (own_part < rf_size) { + + int others_part = rf_size - own_part; + int num_tasks = iceil(rf_size, own_part) - 1; /* >1 */ + int chunk = others_part/num_tasks; /* floor */ + + int ts_begin = rf_begin, ts_end; + p = Windex[rf_begin]; + for (i=0; ir_queue, task); + else + /* TODO: remove casting if void* pointers are gone from task_t */ + PMR_refine_sem_post(static_cast(task->data)); /* case chunk=0 */ + + ts_begin = ts_end + 1; + p = q + 1; + } + ts_end = rf_end; + q = Windex[rf_end]; + offset = Windex[ts_begin] - 1; + + /* Call bisection routine to refine the values */ + if (ts_begin <= ts_end) { + int info; + lapack::odrrb(&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, + &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin], + work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); + assert( info == 0 ); + } + + /* Empty "all" r-queue refine tasks before waiting */ + int num_iter = PMR_get_num_tasks(workQ->r_queue); + for (i=0; ir_queue); + if (task != NULL) { + if (task->flag == REFINE_TASK_FLAG) { + PMR_process_r_task((refine_t *) task->data, procinfo, + Wstruct, tolstruct, work, iwork); + free(task); + } else { + PMR_insert_task_at_back(workQ->r_queue, task); + } + } /* if task */ + } /* end for i */ + + /* Barrier: wait until all created tasks finished */ + int count = num_tasks; + while (count > 0) { + /* TODO: remove casting if void* pointers are gone from task_t */ + while ( PMR_refine_sem_wait(static_cast(task->data)) != 0 ) { }; + count--; + } + /* TODO: remove casting if void* pointers are gone from task_t */ + PMR_refine_sem_destroy(static_cast(task->data)); + + /* Edit right gap at splitting point */ + ts_begin = rf_begin; + for (i=0; i + int communicate_refined_eigvals(cluster_t *cl, proc_t *procinfo, + int tid, val_t *Wstruct, rrr_t *RRR) + { + /* From inputs */ + int cl_begin = cl->begin; + int cl_end = cl->end; + int bl_begin = cl->bl_begin; + int bl_end = cl->bl_end; + int proc_W_begin = cl->proc_W_begin; + int proc_W_end = cl->proc_W_end; + int left_pid = cl->left_pid; + int right_pid = cl->right_pid; + int pid = procinfo->pid; + + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + int *restrict iproc = Wstruct->iproc; + + int my_begin = imax(cl_begin, proc_W_begin); + int my_end = imin(cl_end, proc_W_end); + if (pid == left_pid ) my_begin = cl_begin; + if (pid == right_pid) my_end = cl_end; + int my_size = my_end - my_begin + 1; + + int i, k; + int num_messages = 0; + for (i=left_pid; i<=right_pid; i++) { + for (k=cl_begin; k<=cl_end; k++) { + if (iproc[k] == i) { + num_messages += 4; + break; + } + } + } + + MPI_Request *requests = (MPI_Request *) malloc( num_messages * + sizeof(MPI_Request) ); + MPI_Status *stats = (MPI_Status *) malloc( num_messages * + sizeof(MPI_Status) ); + + int p; + int i_msg = 0; + int other_begin, other_end, other_size; + for (p=left_pid; p<=right_pid; p++) { + + bool proc_involved = false; + for (k=cl_begin; k<=cl_end; k++) { + if (iproc[k] == p) { + proc_involved = true; + break; + } + } + + int u; + if (p != pid && proc_involved == true) { + + /* send message to process p (non-blocking) */ + MPI_Isend(&Wshifted[my_begin], my_size, float_traits::mpi_type(), p, + my_begin, procinfo->comm, &requests[4*i_msg]); + + MPI_Isend(&Werr[my_begin], my_size, float_traits::mpi_type(), p, + my_begin, procinfo->comm, &requests[4*i_msg+1]); + + /* Find eigenvalues in of process p */ + other_size = 0; + for (k=cl_begin; k<=cl_end; k++) { + if (other_size == 0 && iproc[k] == p) { + other_begin = k; + other_end = k; + other_size++; + u = k+1; + while (u <=cl_end && iproc[u] == p) { + other_end++; + other_size++; + u++; + } + } + } + if (p == left_pid) { + other_begin = cl_begin; + u = cl_begin; + while (iproc[u] == -1) { + other_size++; + u++; + } + } + if (p == right_pid) { + other_end = cl_end; + u = cl_end; + while (iproc[u] == -1) { + other_size++; + u--; + } + } + + /* receive message from process p (non-blocking) */ + MPI_Irecv(&Wshifted[other_begin], other_size, float_traits::mpi_type(), p, + other_begin, procinfo->comm, &requests[4*i_msg+2]); + + MPI_Irecv(&Werr[other_begin], other_size, float_traits::mpi_type(), p, + other_begin, procinfo->comm, &requests[4*i_msg+3]); + + i_msg++; + } + + } /* end for p */ + num_messages = 4*i_msg; /* messages actually send */ + + int communication_done; + int status = MPI_Testall(num_messages, requests, + &communication_done, stats); + assert(status == MPI_SUCCESS); + + if (communication_done == true) { + + FloatingType sigma = RRR->L[bl_end-bl_begin]; + for (k=cl_begin; knum_messages = num_messages; + comm->requests = requests; + comm->stats = stats; + cl->wait_until_refined = true; + cl->messages = comm; + + status = COMM_INCOMPLETE; + } + + return status; + } /* end communicate_refined_eigvals */ + + + template + int test_comm_status(cluster_t *cl, val_t *Wstruct) + { + int cl_begin = cl->begin; + int cl_end = cl->end; + int bl_begin = cl->bl_begin; + int bl_end = cl->bl_end; + rrr_t *RRR = cl->RRR; + comm_t *comm = cl->messages; + int num_messages = comm->num_messages; + MPI_Request *requests = comm->requests; + MPI_Status *stats = comm->stats; + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + + /* Test if communication complete */ + int communication_done; + int status = MPI_Testall(num_messages, requests, + &communication_done, stats); + assert(status == MPI_SUCCESS); + + if (communication_done == true) { + + cl->wait_until_refined = false; + + int k; + FloatingType sigma = RRR->L[bl_end-bl_begin]; + for (k=cl_begin; k + int create_subtasks(cluster_t *cl, int tid, proc_t *procinfo, + rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, + workQ_t *workQ, counter_t *num_left) + { + /* From inputs */ + int cl_begin = cl->begin; + int cl_end = cl->end; + int depth = cl->depth; + int bl_begin = cl->bl_begin; + int bl_end = cl->bl_end; + int bl_size = bl_end - bl_begin + 1; + FloatingType bl_spdiam = cl->bl_spdiam; + FloatingType lgap; + + int pid = procinfo->pid; + int nproc = procinfo->nproc; + int nthreads = procinfo->nthreads; + bool proc_involved=true; + + FloatingType *restrict Wgap = Wstruct->Wgap; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + int *restrict iproc = Wstruct->iproc; + + int ldz = Zstruct->ldz; + FloatingType *restrict Z = Zstruct->Z; + int *restrict Zindex = Zstruct->Zindex; + + /* others */ + int i, l, k; + int max_size; + task_t *task; + bool task_inserted; + int new_first, new_last, new_size, new_ftt1, new_ftt2; + int sn_first, sn_last, sn_size; + rrr_t *RRR_parent; + int new_lpid, new_rpid; + FloatingType *restrict D_parent; + FloatingType *restrict L_parent; + int my_first, my_last; + bool copy_parent_rrr; + + + max_size = fmax(1, PMR_get_counter_value(num_left) / + (fmin(depth+1,4)*nthreads) ); + task_inserted = true; + new_first = cl_begin; + for (i=cl_begin; i<=cl_end; i++) { + + if ( i == cl_end ) + new_last = i; + else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) ) + new_last = i; + else + continue; + + new_size = new_last - new_first + 1; + + if (new_size == 1) { + /* singleton was found */ + + if (new_first==cl_begin || task_inserted==true) { + /* initialize new singleton task */ + sn_first = new_first; + sn_last = new_first; + sn_size = 1; + } else { + /* extend singleton task by one */ + sn_last++; + sn_size++; + } + + /* insert task if ... */ + if (i==cl_end || sn_size>=max_size || + Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) { + + /* Check if process involved in s-task */ + proc_involved = false; + for (k=sn_first; k<=sn_last; k++) { + if (iproc[k] == pid) { + proc_involved = true; + break; + } + } + if (proc_involved == false) { + task_inserted = true; + new_first = i + 1; + continue; + } + + /* Insert task as process is involved */ + if (sn_first == cl_begin) { + lgap = cl->lgap; + } else { + lgap = Wgap[sn_first-1]; + } + + PMR_increment_rrr_dependencies(RRR); + + task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin, + bl_end, bl_spdiam, lgap, RRR); + + PMR_insert_task_at_back(workQ->s_queue, task); + + task_inserted = true; + } else { + task_inserted = false; + } + + } else { + /* cluster was found */ + + /* check if process involved in processing the new cluster */ + new_lpid = nproc-1; + new_rpid = -1; + for (l=new_first; l<=new_last; l++) { + if (iproc[l] != -1) { + new_lpid = imin(new_lpid, iproc[l]); + new_rpid = imax(new_rpid, iproc[l]); + } + } + if (new_lpid > pid || new_rpid < pid) { + task_inserted = true; + new_first = i + 1; + continue; + } + + /* find gap to the left */ + if (new_first == cl_begin) { + lgap = cl->lgap; + } else { + lgap = Wgap[new_first - 1]; + } + + /* determine where to store the parent rrr needed by the + * cluster to find its new rrr */ + my_first = imax(new_first, cl->proc_W_begin); + my_last = imin(new_last, cl->proc_W_end); + if ( my_first == my_last ) { + /* only one eigenvalue of cluster belongs to process */ + copy_parent_rrr = true; + } else { + /* store parent rrr in Z at column new_ftt */ + copy_parent_rrr = false; + } + new_ftt1 = Zindex[my_first ]; + new_ftt2 = Zindex[my_first + 1]; + + if (copy_parent_rrr == true) { + /* Copy parent RRR into alloceted arrays and mark them + * for freeing later */ + D_parent = (FloatingType *) malloc(bl_size * sizeof(FloatingType)); + assert(D_parent != NULL); + + L_parent = (FloatingType *) malloc(bl_size * sizeof(FloatingType)); + assert(L_parent != NULL); + + memcpy(D_parent, RRR->D, bl_size*sizeof(FloatingType)); + memcpy(L_parent, RRR->L, bl_size*sizeof(FloatingType)); + + /* + * We have to explicitly specify the type because neither NULL nor nullptr + * can't be used for template type deduction. + */ + RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, + NULL, bl_size, depth); + PMR_set_copied_parent_rrr_flag(RRR_parent, true); + + } else { + /* copy parent RRR into Z to make cluster task independent */ + memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, + bl_size*sizeof(FloatingType)); + memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, + bl_size*sizeof(FloatingType)); + /* + * We have to explicitly specify the type because neither NULL nor nullptr + * can't be used for template type deduction. + */ + RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin], + &Z[new_ftt2*ldz + bl_begin], + NULL, NULL, bl_size, depth); + } + + /* Create the task for the cluster and put it in the queue */ + task = PMR_create_c_task(new_first, new_last, depth+1, + bl_begin, bl_end, bl_spdiam, lgap, + cl->proc_W_begin, cl->proc_W_end, + new_lpid, new_rpid, RRR_parent); + + if (new_lpid != new_rpid) + PMR_insert_task_at_back(workQ->r_queue, task); + else + PMR_insert_task_at_back(workQ->c_queue, task); + + task_inserted = true; + + } /* if singleton or cluster found */ + + new_first = i + 1; + } /* end i */ + + /* set flag in RRR that last singleton is created */ + PMR_set_parent_processed_flag(RRR); + + /* clean up */ + PMR_try_destroy_rrr(RRR); + free(cl); + + return(0); + } /* end create_subtasks */ + } + +} //namespace detail + +} //namespace pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/process_r_task.hpp b/external/pmrrr/include/pmrrr/process_r_task.hpp new file mode 100644 index 0000000000..b9bda64d9b --- /dev/null +++ b/external/pmrrr/include/pmrrr/process_r_task.hpp @@ -0,0 +1,163 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef __PROCESS_R_TASK_HPP__ +#define __PROCESS_R_TASK_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace pmrrr { namespace detail { + + /* + * Executes all tasks which are in the r-queue at the moment of the + * call. This routine is called to make sure that all tasks in the + * queue are dequeued before continueing with other tasks. + */ + template + void PMR_process_r_queue(int tid, proc_t *procinfo, val_t *Wstruct, + vec_t *Zstruct, tol_t *tolstruct, + workQ_t *workQ, counter_t *num_left, + FloatingType *work, int *iwork) + { + int thread_support = procinfo->thread_support; + int num_tasks = PMR_get_num_tasks(workQ->r_queue); + + int t; + for (t=0; tr_queue); + + if ( task != NULL ) { + + if (task->flag == CLUSTER_TASK_FLAG) { + + if (thread_support != MPI_THREAD_FUNNELED || tid == 0) { + /* if MPI_THREAD_FUNNELED only tid==0 should process + * these tasks, otherwise any thread can do it */ + int status = PMR_process_c_task((cluster_t *) task->data, + tid, procinfo, Wstruct, + Zstruct, tolstruct, workQ, + num_left, work, iwork); + + if (status == C_TASK_PROCESSED) { + free(task); + } else { + PMR_insert_task_at_back(workQ->r_queue, task); + } + } else { + PMR_insert_task_at_back(workQ->r_queue, task); + } + + } /* end if cluster task */ + else if (task->flag == REFINE_TASK_FLAG) { + PMR_process_r_task((refine_t *) task->data, procinfo, + Wstruct, tolstruct, work, iwork); + free(task); + } + } /* end if task removed */ + } /* end for t */ + } /* end process_entire_r_queue */ + + /* + * Process the task of refining a subset of eigenvalues. + */ + template + int PMR_process_r_task(refine_t *rf, proc_t *procinfo, + val_t *Wstruct, tol_t *tolstruct, + FloatingType *work, int *iwork) + { + /* From inputs */ + int ts_begin = rf->begin; + FloatingType *restrict D = rf->D; + FloatingType *restrict DLL = rf->DLL; + int p = rf->p; + int q = rf->q; + int bl_size = rf->bl_size; + FloatingType bl_spdiam = rf->bl_spdiam; + + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + int *restrict Windex = Wstruct->Windex; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + + FloatingType savegap; + + if (p == q) { + savegap = Wgap[ts_begin]; + Wgap[ts_begin] = 0.0; + } + + int info; + int offset = Windex[ts_begin]-1; + FloatingType rtol1 = tolstruct->rtol1; + FloatingType rtol2 = tolstruct->rtol2; + FloatingType pivmin = tolstruct->pivmin; + lapack::odrrb(&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, + &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin], + work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); + assert(info == 0); + + if (p == q) { + Wgap[ts_begin] = savegap; + } + + PMR_refine_sem_post(rf); + free(rf); + + return 0; + } + +} //namespace detail + +} //namespace pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/process_s_task.hpp b/external/pmrrr/include/pmrrr/process_s_task.hpp new file mode 100644 index 0000000000..febec9484c --- /dev/null +++ b/external/pmrrr/include/pmrrr/process_s_task.hpp @@ -0,0 +1,328 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef __PROCESS_S_TASK_HPP__ +#define __PROCESS_S_TASK_HPP__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace pmrrr { namespace detail { + + template + int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo, + val_t *Wstruct, vec_t *Zstruct, + tol_t *tolstruct, counter_t *num_left, + FloatingType *work, int *iwork) + { + /* Inputs */ + int begin = sng->begin; + int end = sng->end; + int bl_begin = sng->bl_begin; + int bl_end = sng->bl_end; + int bl_size = bl_end - bl_begin + 1; + FloatingType bl_spdiam = sng->bl_spdiam; + rrr_t *RRR = sng->RRR; + FloatingType *restrict D = RRR->D; + FloatingType *restrict L = RRR->L; + FloatingType *restrict DL = RRR->DL; + FloatingType *restrict DLL = RRR->DLL; + + int pid = procinfo->pid; + int n = Wstruct->n; + FloatingType *restrict W = Wstruct->W; + FloatingType *restrict Werr = Wstruct->Werr; + FloatingType *restrict Wgap = Wstruct->Wgap; + int *restrict Windex = Wstruct->Windex; + int *restrict iproc = Wstruct->iproc; + FloatingType *restrict Wshifted = Wstruct->Wshifted; + int ldz = Zstruct->ldz; + FloatingType *restrict Z = Zstruct->Z; + int *restrict isuppZ = Zstruct->Zsupp;; + int *restrict Zindex = Zstruct->Zindex; + FloatingType pivmin = tolstruct->pivmin; + + /* others */ + int info, i, k, itmp, num_decrement=0; + int IONE = 1; + FloatingType DZERO = 0.0; + FloatingType tol, lambda, left, right; + int i_local, zind; + FloatingType gap, lgap, rgap, gaptol, savedgap, tmp; + int usedBS, usedRQ, needBS, wantNC, step2II; + int r, offset; + FloatingType twoeps = 2*std::numeric_limits::epsilon(), RQtol = 2*std::numeric_limits::epsilon(); + FloatingType residual, bstres, bstw; + int i_supmn, i_supmx; + FloatingType RQcorr; + int negcount; + int sgndef, suppsize; + FloatingType sigma; + int i_Zfrom, i_Zto; + FloatingType ztz, norminv, mingma; + + + /* set tolerance parameter */ + tol = 4.0 * log( (FloatingType) bl_size ) * std::numeric_limits::epsilon(); + + /* loop over all singletons in the task */ + for (i=begin; i<=end; i++) { + + /* check if eigenvector is supposed to be computed by + * the process */ + if (iproc[i] != pid) + continue; + num_decrement++; + + if (bl_size == 1) { + /* set eigenvector to column of identity matrix */ + zind = Zindex[i]; + memset(&Z[zind*ldz], 0.0, n*sizeof(FloatingType) ); + Z[zind*ldz + bl_begin] = 1.0; + isuppZ[2*zind ] = bl_begin + 1; + isuppZ[2*zind + 1] = bl_begin + 1; + continue; + } + + lambda = Wshifted[i]; + left = Wshifted[i] - Werr[i]; + right = Wshifted[i] + Werr[i]; + i_local = Windex[i]; + r = 0; + + /* compute left and right gap */ + if (i == bl_begin) + lgap = std::numeric_limits::epsilon() * fmax( fabs(left), fabs(right) ); + else if (i == begin) + lgap = sng->lgap; + else + lgap = Wgap[i-1]; + + if (i == bl_end) { + rgap = std::numeric_limits::epsilon() * fmax( fabs(left), fabs(right) ); + } else { + rgap = Wgap[i]; + } + + gap = fmin(lgap, rgap); + + if ( i == bl_begin || i == bl_end ) { + gaptol = 0.0; + } else { + gaptol = gap * std::numeric_limits::epsilon(); + } + + /* initialize lower and upper value of support */ + i_supmn = bl_size; + i_supmx = 1; + + /* update Wgap so that it holds minimum gap and save the + * old value */ + savedgap = Wgap[i]; + Wgap[i] = gap; + + /* initialize flags indicating if bisection or Rayleigh-Quotient + * correction was used */ + usedBS = false; + usedRQ = false; + + /* the need for bisection is initially turned off */ + needBS = !TRY_RQC; + + /* IEEE floating point is assumed, so that all 0 bits are 0.0 */ + zind = Zindex[i]; + memset(&Z[zind*ldz], 0.0, n*sizeof(FloatingType)); + + /* inverse iteration with twisted factorization */ + for (k=1; k<=MAXITER; k++) { + + if (needBS == true) { + usedBS = true; + itmp = r; + + offset = Windex[i] - 1; + tmp = Wgap[i]; + Wgap[i] = 0.0; + + lapack::odrrb(&bl_size, D, DLL, &i_local, &i_local, &DZERO, + &twoeps, &offset, &Wshifted[i], &Wgap[i], + &Werr[i], work, iwork, &pivmin, &bl_spdiam, + &itmp, &info); + assert(info == 0); + + Wgap[i] = tmp; + lambda = Wshifted[i]; + r = 0; + } + wantNC = (usedBS == true) ? false : true; + + /* compute the eigenvector corresponding to lambda */ + lapack::odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, + &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, + &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], + &norminv, &residual, &RQcorr, work); + + if (k == 1) { + bstres = residual; + bstw = lambda; + } else if (residual < bstres) { + bstres = residual; + bstw = lambda; + } + + /* update support held */ + i_supmn = imin(i_supmn, isuppZ[2*zind ]); + i_supmx = imax(i_supmx, isuppZ[2*zind + 1]); + + /* Convergence test for Rayleigh Quotient Iteration + * not done if bisection was used */ + if ( !usedBS && residual > tol*gap + && fabs(RQcorr) > RQtol*fabs(lambda) ) { + + if (i_local <= negcount) { + sgndef = -1; /* wanted eigenvalue lies to the left */ + } else { + sgndef = 1; /* wanted eigenvalue lies to the right */ + } + + if ( RQcorr*sgndef >= 0.0 + && lambda+RQcorr <= right + && lambda+RQcorr >= left ) { + usedRQ = true; + if ( sgndef == 1 ) + left = lambda; + else + right = lambda; + Wshifted[i] = 0.5*(left + right); + lambda += RQcorr; + } else { /* bisection is needed */ + needBS = true; + } + + if ( right-left < RQtol*fabs(lambda) ) { + /* eigenvalue computed to bisection accuracy + * => compute eigenvector */ + usedBS = true; + } else if ( k == MAXITER-1 ) { + /* for last iteration use bisection */ + needBS = true; + } + } else { + /* go to next iteration */ + break; + } + + } /* end k */ + + /* if necessary call odr1v to improve error angle by 2nd step */ + step2II = false; + if ( usedRQ && usedBS && (bstres <= residual) ) { + lambda = bstw; + step2II = true; + } + if ( step2II == true ) { + lapack::odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, + &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, + &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], + &norminv, &residual, &RQcorr, work); + } + Wshifted[i] = lambda; + + /* compute support w.r.t. whole matrix + * block beginning is offset for each support */ + isuppZ[2*zind ] += bl_begin; + isuppZ[2*zind + 1] += bl_begin; + + /* ensure vector is okay if support changed in RQI + * minus ones because of indices starting from zero */ + i_Zfrom = isuppZ[2*zind ] - 1; + i_Zto = isuppZ[2*zind + 1] - 1; + i_supmn += bl_begin - 1; + i_supmx += bl_begin - 1; + if ( i_supmn < i_Zfrom ) { + for ( k=i_supmn; k < i_Zfrom; k++ ) { + Z[k + zind*ldz] = 0.0; + } + } + if ( i_supmx > i_Zto ) { + for ( k=i_Zto+1; k <= i_supmx; k++ ) { + Z[k + zind*ldz] = 0.0; + } + } + + /* normalize eigenvector */ + suppsize = i_Zto - i_Zfrom + 1; + blas::odscal(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE); + + sigma = L[bl_size-1]; + W[i] = lambda + sigma; + + if (i < end) + Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]); + + } /* end i */ + + /* decrement counter */ + PMR_decrement_counter(num_left, num_decrement); + + /* clean up */ + free(sng); + PMR_try_destroy_rrr(RRR); + + return 0; + } + +} // detail + +} // pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/rrr.hpp b/external/pmrrr/include/pmrrr/rrr.hpp new file mode 100644 index 0000000000..be4fc7b6b9 --- /dev/null +++ b/external/pmrrr/include/pmrrr/rrr.hpp @@ -0,0 +1,227 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef __RRR_HPP__ +#define __RRR_HPP__ + +#include +#include +#include + +#include +#include + +#ifndef DISABLE_PTHREADS +# include +#endif + +namespace pmrrr { namespace detail { + + template + int PMR_rrr_init_lock(rrr_t *RRR) + { + #ifndef DISABLE_PTHREADS + int info = pthread_mutex_init(&RRR->mutex, NULL); + assert(info == 0); + return info; + #else + return 0; + #endif + } + + template + void PMR_rrr_destroy_lock(rrr_t *RRR) + { + #ifndef DISABLE_PTHREADS + pthread_mutex_destroy(&RRR->mutex); + #endif + } + + template + int PMR_rrr_lock(rrr_t *RRR) + { + #ifndef DISABLE_PTHREADS + int info = pthread_mutex_lock(&RRR->mutex); + if( info == EINVAL ) + fprintf(stderr,"pthread_mutex_lock returned EINVAL\n"); + else if( info == EAGAIN ) + fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n"); + else if( info == EDEADLK ) + fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n"); + else if( info == EPERM ) + fprintf(stderr,"pthread_mutex_lock returned EPERM\n"); + else + fprintf(stderr,"pthread_mutex_lock returned %d\n",info); + assert(info == 0); + return info; + #else + return 0; + #endif + } + + template + int PMR_rrr_unlock(rrr_t *RRR) + { + #ifndef DISABLE_PTHREADS + int info = pthread_mutex_unlock(&RRR->mutex); + if( info == EINVAL ) + fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n"); + else if( info == EAGAIN ) + fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n"); + else if( info == EDEADLK ) + fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n"); + else if( info == EPERM ) + fprintf(stderr,"pthread_mutex_unlock returned EPERM\n"); + else + fprintf(stderr,"pthread_mutex_unlock returned %d\n",info); + assert(info == 0); + return info; + #else + return 0; + #endif + } + + template + rrr_t *PMR_create_rrr(FloatingType *restrict D, FloatingType *restrict L, + FloatingType *restrict DL, FloatingType *restrict DLL, + int size, int depth) + { + rrr_t *RRR = (rrr_t *) malloc( sizeof(rrr_t) ); + assert(RRR != NULL); + + RRR->D = D; + RRR->L = L; + RRR->DL = DL; + RRR->DLL = DLL; + RRR->size = size; + RRR->depth = depth; + RRR->parent_processed = false; + RRR->copied_parent_rrr = false; + RRR->ndepend = 0; + + int info = PMR_rrr_init_lock(RRR); + + return RRR; + } + + template + rrr_t *PMR_reset_rrr(rrr_t *RRR, FloatingType *restrict D, + FloatingType *restrict L, FloatingType *restrict DL, + FloatingType *restrict DLL, int size, int depth) + { + RRR->D = D; + RRR->L = L; + RRR->DL = DL; + RRR->DLL = DLL; + RRR->size = size; + RRR->depth = depth; + RRR->parent_processed = false; + + return RRR; + } + + template + int PMR_increment_rrr_dependencies(rrr_t *RRR) + { + /* returns number of dependencies */ + int info = PMR_rrr_lock(RRR); + RRR->ndepend++; + int i = RRR->ndepend; + info |= PMR_rrr_unlock(RRR); + return i; + } + + template + int PMR_set_parent_processed_flag(rrr_t *RRR) + { + int info = PMR_rrr_lock(RRR); + RRR->parent_processed = true; + info |= PMR_rrr_unlock(RRR); + return info; + } + + template + int PMR_set_copied_parent_rrr_flag(rrr_t *RRR, bool val) + { + int info = PMR_rrr_lock(RRR); + RRR->copied_parent_rrr = val; + info |= PMR_rrr_unlock(RRR); + return info; + } + + template + int PMR_try_destroy_rrr(rrr_t *RRR) + { + /* return 0 on success, otherwise 1 */ + int info = PMR_rrr_lock(RRR); + + RRR->ndepend--; + int tmp = 0; + if (RRR->ndepend == 0 && RRR->parent_processed == true) { + if (RRR->depth >0) { + free(RRR->D); + free(RRR->L); + } + if (RRR->depth >=0) { + free(RRR->DL); + free(RRR->DLL); + } + tmp = 1; + } + + info |= PMR_rrr_unlock(RRR); + + if (tmp == 1) { + PMR_rrr_destroy_lock(RRR); + free(RRR); + return 0; + } else { + return 1; + } + } + +} // detail + +} // pmrrr + +#endif diff --git a/external/pmrrr/include/pmrrr/structs.h b/external/pmrrr/include/pmrrr/structs.h deleted file mode 100644 index e40ef3533f..0000000000 --- a/external/pmrrr/include/pmrrr/structs.h +++ /dev/null @@ -1,162 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ - -#ifndef SSTRUCTS_H -#define SSTRUCTS_H - -#include "mpi.h" -#include "global.h" -#include "counter.h" -#include "queue.h" - -typedef struct { - int n; - double *restrict D; - double *restrict E; - int nsplit; - int *restrict isplit ; - double spdiam; -} in_t; - -typedef struct { - int n; - double *vl; - double *vu; - int *il; - int *iu; - double *restrict W; - double *restrict Werr; - double *restrict Wgap; - int *restrict Windex; - int *restrict iblock; - int *restrict iproc; - double *restrict Wshifted; - double *restrict gersch; -} val_t; - -typedef struct { - int ldz; - int nz; - double *restrict Z; - int *restrict Zsupp; - int *restrict Zindex; -} vec_t; - -typedef struct { - int pid; - int nproc; - MPI_Comm comm; - int nthreads; - int thread_support; -} proc_t; - -typedef struct { - double split; - double rtol1; - double rtol2; - double pivmin; -} tol_t; - -typedef struct { - int num_messages; - MPI_Request *requests; - MPI_Status *stats; -} comm_t; - -typedef struct { - queue_t *r_queue; - queue_t *s_queue; - queue_t *c_queue; -} workQ_t; - -typedef struct { - double lambda; - int local_ind; - int block_ind; - int ind; -} sort_struct_t; - -typedef struct { - int n; - double *D; - double *E; - double *E2; - int il; - int iu; - int my_il; - int my_iu; - int nsplit; - int *isplit; - double bsrtol; - double pivmin; - double *gersch; - double *W; - double *Werr; - int *Windex; - int *iblock; -} auxarg1_t; - -typedef struct { - int bl_size; - double *D; - double *DE2; - int rf_begin; - int rf_end; - double *W; - double *Werr; - double *Wgap; - int *Windex; - double rtol1; - double rtol2; - double pivmin; - double bl_spdiam; -} auxarg2_t; - -typedef struct { - int tid; - proc_t *procinfo; - val_t *Wstruct; - vec_t *Zstruct; - tol_t *tolstruct; - workQ_t *workQ; - counter_t *num_left; -} auxarg3_t; - -#endif diff --git a/external/pmrrr/include/pmrrr/tasks.hpp b/external/pmrrr/include/pmrrr/tasks.hpp new file mode 100644 index 0000000000..66941b8da3 --- /dev/null +++ b/external/pmrrr/include/pmrrr/tasks.hpp @@ -0,0 +1,200 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#ifndef __TASKS_HPP__ +#define __TASKS_HPP__ + +#include +#include +#include + +#include +#include +#include + +namespace pmrrr { namespace detail { + + template + task_t *PMR_create_s_task(int first, int last, int depth, + int bl_begin, int bl_end, + FloatingType spdiam, FloatingType lgap, rrr_t *RRR) + { + task_t *t = (task_t *) malloc(sizeof(task_t)); + singleton_t *s = (singleton_t *) malloc( sizeof(singleton_t) ); + assert(t != NULL); + assert(s != NULL); + + s->begin = first; + s->end = last; + s->depth = depth; + s->bl_begin = bl_begin; + s->bl_end = bl_end; + s->bl_spdiam = spdiam; + s->lgap = lgap; + s->RRR = RRR; + + t->data = (void *) s; + t->flag = SINGLETON_TASK_FLAG; + t->next = NULL; + t->prev = NULL; + + return t; + } + + template + task_t *PMR_create_c_task(int first, int last, int depth, + int bl_begin, int bl_end, FloatingType spdiam, + FloatingType lgap, int proc_W_begin, + int proc_W_end, int lpid, int rpid, + rrr_t *RRR) + { + task_t *t = (task_t *) malloc(sizeof(task_t)); + cluster_t *c = (cluster_t *) malloc( sizeof(cluster_t) ); + assert(t != NULL); + assert(c != NULL); + + c->begin = first; + c->end = last; + c->depth = depth; + c->bl_begin = bl_begin; + c->bl_end = bl_end; + c->bl_spdiam = spdiam; + c->lgap = lgap; + c->proc_W_begin = proc_W_begin; + c->proc_W_end = proc_W_end; + c->left_pid = lpid; + c->right_pid = rpid; + c->RRR = RRR; + c->wait_until_refined = false; + + t->data = (void *) c; + t->flag = CLUSTER_TASK_FLAG; + t->next = NULL; + t->prev = NULL; + + return t; + } + + template + int PMR_refine_sem_init(refine_t *refine) + { + #ifndef DISABLE_PTHREADS + int info = sem_init(refine->sem, 0, 0); + assert(info == 0); + return info; + #else + return 0; + #endif + } + + template + int PMR_refine_sem_destroy(refine_t *refine) + { + #ifndef DISABLE_PTHREADS + int info = sem_destroy(refine->sem); + assert(info == 0); + return info; + #else + return 0; + #endif + } + + template + int PMR_refine_sem_wait(refine_t *refine) + { + #ifndef DISABLE_PTHREADS + int info = sem_wait(refine->sem); + assert(info == 0); + return info; + #else + return 0; + #endif + } + + template + int PMR_refine_sem_post(refine_t *refine) + { + #ifndef DISABLE_PTHREADS + int info = sem_post(refine->sem); + assert(info == 0); + return info; + #else + return 0; + #endif + } + + template + task_t *PMR_create_r_task(int begin, int end, FloatingType *D, + FloatingType *DLL, int p, int q, int bl_size, + FloatingType bl_spdiam, int tid) + { + task_t *t= (task_t *) malloc(sizeof(task_t)); + refine_t *r = (refine_t *) malloc( sizeof(refine_t) ); + assert(t != NULL); + assert(r != NULL); + + r->begin = begin; + r->end = end; + r->D = D; + r->DLL = DLL; + r->p = p; + r->q = q; + r->bl_size = bl_size; + r->bl_spdiam = bl_spdiam; + r->producer_tid = tid; + + PMR_refine_sem_init(r); + + t->data = (void *) r; + t->flag = REFINE_TASK_FLAG; + t->next = NULL; + t->prev = NULL; + + return t; + } + +} // detail + +} // pmrrr + +#endif diff --git a/external/pmrrr/src/blas/LICENSE b/external/pmrrr/src/blas/LICENSE deleted file mode 100644 index aad36d6e12..0000000000 --- a/external/pmrrr/src/blas/LICENSE +++ /dev/null @@ -1,36 +0,0 @@ -Copyright (c) 1992-2009 The University of Tennessee. All rights reserved. - -$COPYRIGHT$ - -Additional copyrights may follow - -$HEADER$ - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -- Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer listed - in this license in the documentation and/or other materials - provided with the distribution. - -- Neither the name of the copyright holders nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/external/pmrrr/src/blas/odcpy.c b/external/pmrrr/src/blas/odcpy.c deleted file mode 100644 index ec1e53628d..0000000000 --- a/external/pmrrr/src/blas/odcpy.c +++ /dev/null @@ -1,101 +0,0 @@ -/* odcpy.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odcpy(int *n, double *dx, int *incx, double *dy, int *incy) -{ - /* System generated locals */ - int i__1; - - /* Local variables */ - int i__, m, ix, iy, mp1; - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* copies a vector, x, to a vector, y. */ -/* uses unrolled loops for increments equal to one. */ -/* jack dongarra, linpack, 3/11/78. */ -/* modified 12/3/93, array(1) declarations changed to array(*) */ - - -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ - /* Parameter adjustments */ - --dy; - --dx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* code for unequal increments or equal increments */ -/* not equal to 1 */ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dy[iy] = dx[ix]; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - - -/* clean-up loop */ - -L20: - m = *n % 7; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - dy[i__] = dx[i__]; -/* L30: */ - } - if (*n < 7) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 7) { - dy[i__] = dx[i__]; - dy[i__ + 1] = dx[i__ + 1]; - dy[i__ + 2] = dx[i__ + 2]; - dy[i__ + 3] = dx[i__ + 3]; - dy[i__ + 4] = dx[i__ + 4]; - dy[i__ + 5] = dx[i__ + 5]; - dy[i__ + 6] = dx[i__ + 6]; -/* L50: */ - } - return 0; -} diff --git a/external/pmrrr/src/blas/odscal.c b/external/pmrrr/src/blas/odscal.c deleted file mode 100644 index c02b91602e..0000000000 --- a/external/pmrrr/src/blas/odscal.c +++ /dev/null @@ -1,90 +0,0 @@ -/* dscal.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odscal(int *n, double *da, double *dx, int *incx) -{ - /* System generated locals */ - int i__1, i__2; - - /* Local variables */ - int i__, m, mp1, nincx; - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ -/* * */ -/* scales a vector by a constant. */ -/* uses unrolled loops for increment equal to one. */ -/* jack dongarra, linpack, 3/11/78. */ -/* modified 3/93 to return if incx .le. 0. */ -/* modified 12/3/93, array(1) declarations changed to array(*) */ - - -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ - /* Parameter adjustments */ - --dx; - - /* Function Body */ - if (*n <= 0 || *incx <= 0) { - return 0; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - nincx = *n * *incx; - i__1 = nincx; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - dx[i__] = *da * dx[i__]; -/* L10: */ - } - return 0; - -/* code for increment equal to 1 */ - - -/* clean-up loop */ - -L20: - m = *n % 5; - if (m == 0) { - goto L40; - } - i__2 = m; - for (i__ = 1; i__ <= i__2; ++i__) { - dx[i__] = *da * dx[i__]; -/* L30: */ - } - if (*n < 5) { - return 0; - } -L40: - mp1 = m + 1; - i__2 = *n; - for (i__ = mp1; i__ <= i__2; i__ += 5) { - dx[i__] = *da * dx[i__]; - dx[i__ + 1] = *da * dx[i__ + 1]; - dx[i__ + 2] = *da * dx[i__ + 2]; - dx[i__ + 3] = *da * dx[i__ + 3]; - dx[i__ + 4] = *da * dx[i__ + 4]; -/* L50: */ - } - return 0; -} diff --git a/external/pmrrr/src/blas/odswap.c b/external/pmrrr/src/blas/odswap.c deleted file mode 100644 index e3db133564..0000000000 --- a/external/pmrrr/src/blas/odswap.c +++ /dev/null @@ -1,108 +0,0 @@ -/* dswap.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odswap(int *n, double *dx, int *incx, double *dy, int *incy) -{ - /* System generated locals */ - int i__1; - - /* Local variables */ - int i__, m, ix, iy, mp1; - double dtemp; - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* interchanges two vectors. */ -/* uses unrolled loops for increments equal one. */ -/* jack dongarra, linpack, 3/11/78. */ -/* modified 12/3/93, array(1) declarations changed to array(*) */ - - -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ - /* Parameter adjustments */ - --dy; - --dx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* code for unequal increments or equal increments not equal */ -/* to 1 */ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp = dx[ix]; - dx[ix] = dy[iy]; - dy[iy] = dtemp; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - - -/* clean-up loop */ - -L20: - m = *n % 3; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp = dx[i__]; - dx[i__] = dy[i__]; - dy[i__] = dtemp; -/* L30: */ - } - if (*n < 3) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 3) { - dtemp = dx[i__]; - dx[i__] = dy[i__]; - dy[i__] = dtemp; - dtemp = dx[i__ + 1]; - dx[i__ + 1] = dy[i__ + 1]; - dy[i__ + 1] = dtemp; - dtemp = dx[i__ + 2]; - dx[i__ + 2] = dy[i__ + 2]; - dy[i__ + 2] = dtemp; -/* L50: */ - } - return 0; -} diff --git a/external/pmrrr/src/core/counter.c b/external/pmrrr/src/core/counter.c deleted file mode 100644 index 055d473a17..0000000000 --- a/external/pmrrr/src/core/counter.c +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include -#include -#include -#include "pmrrr/global.h" -#include "pmrrr/counter.h" - -#ifndef DISABLE_PTHREADS -# include -#endif - -int PMR_counter_init_lock(counter_t *counter) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - int info = pthread_mutex_init(&counter->lock, NULL); - #else - int info = pthread_spin_init(&counter->lock, PTHREAD_PROCESS_PRIVATE); - #endif - assert(info == 0); - return info; -#else - return 0; -#endif -} - -void PMR_counter_destroy_lock(counter_t *counter) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - pthread_mutex_destroy(&counter->lock); - #else - pthread_spin_destroy(&counter->lock); - #endif -#endif -} - -int PMR_counter_lock(counter_t *counter) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - int info = pthread_mutex_lock(&counter->lock); - if( info == EINVAL ) - fprintf(stderr,"pthread_mutex_lock returned EINVAL\n"); - else if( info == EAGAIN ) - fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n"); - else if( info == EDEADLK ) - fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n"); - else if( info == EPERM ) - fprintf(stderr,"pthread_mutex_lock returned EPERM\n"); - else - fprintf(stderr,"pthread_mutex_lock returned %d\n",info); - #else - int info = pthread_spin_lock(&counter->lock); - #endif - assert(info == 0); - return info; -#else - return 0; -#endif -} - -int PMR_counter_unlock(counter_t *counter) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - int info = pthread_mutex_unlock(&counter->lock); - if( info == EINVAL ) - fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n"); - else if( info == EAGAIN ) - fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n"); - else if( info == EDEADLK ) - fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n"); - else if( info == EPERM ) - fprintf(stderr,"pthread_mutex_unlock returned EPERM\n"); - else - fprintf(stderr,"pthread_mutex_unlock returned %d\n",info); - #else - int info = pthread_spin_unlock(&counter->lock); - #endif - assert(info == 0); - return info; -#else - return 0; -#endif -} - -counter_t *PMR_create_counter(int init_value) -{ - counter_t *counter = (counter_t *) malloc( sizeof(counter_t) ); - counter->value = init_value; - int info = PMR_counter_init_lock(counter); - return counter; -} - -void PMR_destroy_counter(counter_t *counter) -{ - PMR_counter_destroy_lock(counter); - free(counter); -} - -int PMR_get_counter_value(counter_t *counter) -{ - int info = PMR_counter_lock(counter); - int value = counter->value; - info |= PMR_counter_unlock(counter); - return value; -} - -inline int PMR_set_counter_value(counter_t *counter, int value) -{ - int info = PMR_counter_lock(counter); - counter->value = value; - info |= PMR_counter_unlock(counter); - return value; -} - -int PMR_decrement_counter(counter_t *counter, int amount) -{ - int info = PMR_counter_lock(counter); - counter->value -= amount; - int value = counter->value; - info |= PMR_counter_unlock(counter); - return value; -} - -int PMR_increment_counter(counter_t *counter, int amount) -{ - int info = PMR_counter_lock(counter); - counter->value += amount; - int value = counter->value; - info |= PMR_counter_unlock(counter); - return value; -} diff --git a/external/pmrrr/src/core/queue.c b/external/pmrrr/src/core/queue.c deleted file mode 100644 index 622cadc345..0000000000 --- a/external/pmrrr/src/core/queue.c +++ /dev/null @@ -1,234 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include -#include -#include -#include "pmrrr/global.h" -#include "pmrrr/queue.h" - -#ifndef DISABLE_PTHREADS -# include -#endif - -int PMR_queue_init_lock(queue_t *queue) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - int info = pthread_mutex_init(&queue->lock, NULL); - #else - int info = pthread_spin_init(&queue->lock, PTHREAD_PROCESS_PRIVATE); - #endif - assert(info == 0); - return info; -#else - return 0; -#endif -} - -void PMR_queue_destroy_lock(queue_t *queue) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - pthread_mutex_destroy(&queue->lock); - #else - pthread_spin_destroy(&queue->lock); - #endif -#endif -} - -int PMR_queue_lock(queue_t *queue) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - int info = pthread_mutex_lock(&queue->lock); - if( info == EINVAL ) - fprintf(stderr,"pthread_mutex_lock returned EINVAL\n"); - else if( info == EAGAIN ) - fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n"); - else if( info == EDEADLK ) - fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n"); - else if( info == EPERM ) - fprintf(stderr,"pthread_mutex_lock returned EPERM\n"); - else - fprintf(stderr,"pthread_mutex_lock returned %d\n",info); - #else - int info = pthread_spin_lock(&queue->lock); - #endif - assert(info == 0); - return info; -#else - return 0; -#endif -} - -int PMR_queue_unlock(queue_t *queue) -{ -#ifndef DISABLE_PTHREADS - #ifdef NOSPINLOCKS - int info = pthread_mutex_unlock(&queue->lock); - if( info == EINVAL ) - fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n"); - else if( info == EAGAIN ) - fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n"); - else if( info == EDEADLK ) - fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n"); - else if( info == EPERM ) - fprintf(stderr,"pthread_mutex_unlock returned EPERM\n"); - else - fprintf(stderr,"pthread_mutex_unlock returned %d\n",info); - #else - int info = pthread_spin_unlock(&queue->lock); - #endif - assert(info == 0); - return info; -#else - return 0; -#endif -} - -queue_t *PMR_create_empty_queue(void) -{ - queue_t* queue = (queue_t*)malloc(sizeof(queue_t)); assert(queue!=NULL); - - queue->num_tasks = 0; - queue->head = NULL; - queue->back = NULL; - - PMR_queue_init_lock(queue); - return queue; -} - -void PMR_destroy_queue(queue_t *queue) -{ - PMR_queue_destroy_lock(queue); - free(queue); -} - -int PMR_insert_task_at_front(queue_t *queue, task_t *task) -{ - int info = PMR_queue_lock(queue); - - queue->num_tasks++; - task->next = queue->head; - if (queue->head == NULL) - queue->back = task; - else - queue->head->prev = task; - queue->head = task; - - info |= PMR_queue_unlock(queue); - return info; -} - -int PMR_insert_task_at_back(queue_t *queue, task_t *task) -{ - int info = PMR_queue_lock(queue); - - queue->num_tasks++; - task->prev = queue->back; - task->next = NULL; - if (queue->head == NULL) - queue->head = task; - else - queue->back->next = task; - queue->back = task; - - info |= PMR_queue_unlock(queue); - return info; -} - -/* returns NULL when empty */ -task_t *PMR_remove_task_at_front(queue_t *queue) -{ - int info = PMR_queue_lock(queue); - - task_t *task = queue->head; - if (queue->head != NULL) { - /* at least one element */ - queue->num_tasks--; - if (queue->head->next == NULL) { - /* last task removed */ - queue->head = NULL; - queue->back = NULL; - } else { - /* at least two tasks */ - queue->head->next->prev = NULL; - queue->head = queue->head->next; - } - } - - info |= PMR_queue_unlock(queue); - return task; -} - -/* returns NULL when empty */ -task_t *PMR_remove_task_at_back (queue_t *queue) -{ - int info = PMR_queue_lock(queue); - - task_t *task = queue->back; - if (queue->back != NULL) { - /* at least one element */ - queue->num_tasks--; - if (queue->back->prev == NULL) { - /* last task removed */ - queue->head = NULL; - queue->back = NULL; - } else { - /* at least two tasks */ - queue->back->prev->next = NULL; - queue->back = queue->back->prev; - } - } - - info |= PMR_queue_unlock(queue); - return task; -} - -int PMR_get_num_tasks(queue_t *queue) -{ - int info = PMR_queue_lock(queue); - int num_tasks = queue->num_tasks; - info |= PMR_queue_unlock(queue); - return num_tasks; -} diff --git a/external/pmrrr/src/core/rrr.c b/external/pmrrr/src/core/rrr.c deleted file mode 100644 index f9df579515..0000000000 --- a/external/pmrrr/src/core/rrr.c +++ /dev/null @@ -1,202 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include -#include -#include -#include "pmrrr/rrr.h" -#include "pmrrr/global.h" - -#ifndef DISABLE_PTHREADS -# include -#endif - -int PMR_rrr_init_lock(rrr_t *RRR) -{ -#ifndef DISABLE_PTHREADS - int info = pthread_mutex_init(&RRR->mutex, NULL); - assert(info == 0); - return info; -#else - return 0; -#endif -} - -void PMR_rrr_destroy_lock(rrr_t *RRR) -{ -#ifndef DISABLE_PTHREADS - pthread_mutex_destroy(&RRR->mutex); -#endif -} - -int PMR_rrr_lock(rrr_t *RRR) -{ -#ifndef DISABLE_PTHREADS - int info = pthread_mutex_lock(&RRR->mutex); - if( info == EINVAL ) - fprintf(stderr,"pthread_mutex_lock returned EINVAL\n"); - else if( info == EAGAIN ) - fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n"); - else if( info == EDEADLK ) - fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n"); - else if( info == EPERM ) - fprintf(stderr,"pthread_mutex_lock returned EPERM\n"); - else - fprintf(stderr,"pthread_mutex_lock returned %d\n",info); - assert(info == 0); - return info; -#else - return 0; -#endif -} - -int PMR_rrr_unlock(rrr_t *RRR) -{ -#ifndef DISABLE_PTHREADS - int info = pthread_mutex_unlock(&RRR->mutex); - if( info == EINVAL ) - fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n"); - else if( info == EAGAIN ) - fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n"); - else if( info == EDEADLK ) - fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n"); - else if( info == EPERM ) - fprintf(stderr,"pthread_mutex_unlock returned EPERM\n"); - else - fprintf(stderr,"pthread_mutex_unlock returned %d\n",info); - assert(info == 0); - return info; -#else - return 0; -#endif -} - -rrr_t *PMR_create_rrr -(double *restrict D, double *restrict L, - double *restrict DL, double *restrict DLL, int size, int depth) -{ - rrr_t* RRR = (rrr_t*)malloc(sizeof(rrr_t)); assert(RRR!=NULL); - - RRR->D = D; - RRR->L = L; - RRR->DL = DL; - RRR->DLL = DLL; - RRR->size = size; - RRR->depth = depth; - RRR->parent_processed = false; - RRR->copied_parent_rrr = false; - RRR->ndepend = 0; - - int info = PMR_rrr_init_lock(RRR); - return RRR; -} - -rrr_t *PMR_reset_rrr -(rrr_t *RRR, double *restrict D, - double *restrict L, double *restrict DL, - double *restrict DLL, int size, int depth) -{ - RRR->D = D; - RRR->L = L; - RRR->DL = DL; - RRR->DLL = DLL; - RRR->size = size; - RRR->depth = depth; - RRR->parent_processed = false; - - return RRR; -} - -int PMR_increment_rrr_dependencies(rrr_t *RRR) -{ - int info = PMR_rrr_lock(RRR); - RRR->ndepend++; - int i = RRR->ndepend; - info |= PMR_rrr_unlock(RRR); - return i; -} - -int PMR_set_parent_processed_flag(rrr_t *RRR) -{ - int info = PMR_rrr_lock(RRR); - RRR->parent_processed = true; - info |= PMR_rrr_unlock(RRR); - return info; -} - -int PMR_set_copied_parent_rrr_flag(rrr_t *RRR, bool val) -{ - int info = PMR_rrr_lock(RRR); - RRR->copied_parent_rrr = val; - info |= PMR_rrr_unlock(RRR); - return info; -} - -/* return 0 on success, otherwise 1 */ -int PMR_try_destroy_rrr(rrr_t *RRR) -{ - int info = PMR_rrr_lock(RRR); - - RRR->ndepend--; - int tmp=0; - if (RRR->ndepend == 0 && RRR->parent_processed == true) { - if (RRR->depth >0) { - free(RRR->D); - free(RRR->L); - } - if (RRR->depth >=0) { - free(RRR->DL); - free(RRR->DLL); - } - tmp = 1; - } - - info |= PMR_rrr_unlock(RRR); - - if (tmp == 1) { - PMR_rrr_destroy_lock(RRR); - free(RRR); - return 0; - } else { - return 1; - } -} diff --git a/external/pmrrr/src/core/tasks.c b/external/pmrrr/src/core/tasks.c deleted file mode 100644 index 1258d87dfa..0000000000 --- a/external/pmrrr/src/core/tasks.c +++ /dev/null @@ -1,172 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include -#include -#include -#include "pmrrr/global.h" -#include "pmrrr/tasks.h" -#include "pmrrr/rrr.h" - -task_t *PMR_create_s_task -(int first, int last, int depth, int bl_begin, int bl_end, - double spdiam, double lgap, rrr_t *RRR) -{ - task_t *t = (task_t*)malloc(sizeof(task_t)); assert(t!=NULL); - singleton_t *s = (singleton_t*)malloc(sizeof(singleton_t)); assert(s!=NULL); - - s->begin = first; - s->end = last; - s->depth = depth; - s->bl_begin = bl_begin; - s->bl_end = bl_end; - s->bl_spdiam = spdiam; - s->lgap = lgap; - s->RRR = RRR; - - t->data = (void*)s; - t->flag = SINGLETON_TASK_FLAG; - t->next = NULL; - t->prev = NULL; - - return t; -} - -task_t *PMR_create_c_task -(int first, int last, int depth, int bl_begin, int bl_end, double spdiam, - double lgap, int proc_W_begin, int proc_W_end, int lpid, int rpid, rrr_t *RRR) -{ - task_t *t = (task_t*)malloc(sizeof(task_t)); assert(t!=NULL); - cluster_t *c = (cluster_t*)malloc(sizeof(cluster_t)); assert(c!=NULL); - - c->begin = first; - c->end = last; - c->depth = depth; - c->bl_begin = bl_begin; - c->bl_end = bl_end; - c->bl_spdiam = spdiam; - c->lgap = lgap; - c->proc_W_begin = proc_W_begin; - c->proc_W_end = proc_W_end; - c->left_pid = lpid; - c->right_pid = rpid; - c->RRR = RRR; - c->wait_until_refined = false; - - t->data = (void*)c; - t->flag = CLUSTER_TASK_FLAG; - t->next = NULL; - t->prev = NULL; - - return t; -} - -int PMR_refine_sem_init(refine_t *refine) -{ -#ifndef DISABLE_PTHREADS - int info = sem_init(refine->sem, 0, 0); - assert(info == 0); - return info; -#else - return 0; -#endif -} - -int PMR_refine_sem_destroy(refine_t *refine) -{ -#ifndef DISABLE_PTHREADS - int info = sem_destroy(refine->sem); - assert(info == 0); - return info; -#else - return 0; -#endif -} - -int PMR_refine_sem_wait(refine_t *refine) -{ -#ifndef DISABLE_PTHREADS - int info = sem_wait(refine->sem); - assert(info == 0); - return info; -#else - return 0; -#endif -} - -int PMR_refine_sem_post(refine_t *refine) -{ -#ifndef DISABLE_PTHREADS - int info = sem_post(refine->sem); - assert(info == 0); - return info; -#else - return 0; -#endif -} - -task_t *PMR_create_r_task -(int begin, int end, double *D, double *DLL, - int p, int q, int bl_size, double bl_spdiam, int tid) -{ - task_t *t = (task_t*)malloc(sizeof(task_t)); assert(t!=NULL); - refine_t *r = (refine_t*)malloc(sizeof(refine_t)); assert(r!=NULL); - - r->begin = begin; - r->end = end; - r->D = D; - r->DLL = DLL; - r->p = p; - r->q = q; - r->bl_size = bl_size; - r->bl_spdiam = bl_spdiam; - r->producer_tid = tid; - - PMR_refine_sem_init(r); - - t->data = (void*)r; - t->flag = REFINE_TASK_FLAG; - t->next = NULL; - t->prev = NULL; - - return t; -} diff --git a/external/pmrrr/src/counter.cpp b/external/pmrrr/src/counter.cpp new file mode 100644 index 0000000000..d3a0ee1ea3 --- /dev/null +++ b/external/pmrrr/src/counter.cpp @@ -0,0 +1,183 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#include +#include + +#include +#include + +#ifndef DISABLE_PTHREADS +# include +#endif + +namespace pmrrr { namespace detail { + + int PMR_counter_init_lock(counter_t *counter) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + int info = pthread_mutex_init(&counter->lock, NULL); + #else + int info = pthread_spin_init(&counter->lock, PTHREAD_PROCESS_PRIVATE); + #endif + assert(info == 0); + return info; + #else + return 0; + #endif + } + + void PMR_counter_destroy_lock(counter_t *counter) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + pthread_mutex_destroy(&counter->lock); + #else + pthread_spin_destroy(&counter->lock); + #endif + #endif + } + + int PMR_counter_lock(counter_t *counter) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + int info = pthread_mutex_lock(&counter->lock); + if( info == EINVAL ) + fprintf(stderr,"pthread_mutex_lock returned EINVAL\n"); + else if( info == EAGAIN ) + fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n"); + else if( info == EDEADLK ) + fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n"); + else if( info == EPERM ) + fprintf(stderr,"pthread_mutex_lock returned EPERM\n"); + else + fprintf(stderr,"pthread_mutex_lock returned %d\n",info); + #else + int info = pthread_spin_lock(&counter->lock); + #endif + assert(info == 0); + return info; + #else + return 0; + #endif + } + + int PMR_counter_unlock(counter_t *counter) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + int info = pthread_mutex_unlock(&counter->lock); + if( info == EINVAL ) + fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n"); + else if( info == EAGAIN ) + fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n"); + else if( info == EDEADLK ) + fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n"); + else if( info == EPERM ) + fprintf(stderr,"pthread_mutex_unlock returned EPERM\n"); + else + fprintf(stderr,"pthread_mutex_unlock returned %d\n",info); + #else + int info = pthread_spin_unlock(&counter->lock); + #endif + assert(info == 0); + return info; + #else + return 0; + #endif + } + + counter_t *PMR_create_counter(int init_value) + { + counter_t *counter = (counter_t *) malloc( sizeof(counter_t) ); + counter->value = init_value; + int info = PMR_counter_init_lock(counter); + return counter; + } + + void PMR_destroy_counter(counter_t *counter) + { + PMR_counter_destroy_lock(counter); + free(counter); + } + + int PMR_get_counter_value(counter_t *counter) + { + int info = PMR_counter_lock(counter); + int value = counter->value; + info |= PMR_counter_unlock(counter); + return value; + } + + inline int PMR_set_counter_value(counter_t *counter, int value) + { + int info = PMR_counter_lock(counter); + counter->value = value; + info |= PMR_counter_unlock(counter); + return value; + } + + int PMR_decrement_counter(counter_t *counter, int amount) + { + int info = PMR_counter_lock(counter); + counter->value -= amount; + int value = counter->value; + info |= PMR_counter_unlock(counter); + return value; + } + + int PMR_increment_counter(counter_t *counter, int amount) + { + int info = PMR_counter_lock(counter); + counter->value += amount; + int value = counter->value; + info |= PMR_counter_unlock(counter); + return value; + } + +} // namespace detail + +} // namespace pmrrr + diff --git a/external/pmrrr/src/lapack/LICENSE b/external/pmrrr/src/lapack/LICENSE deleted file mode 100644 index aad36d6e12..0000000000 --- a/external/pmrrr/src/lapack/LICENSE +++ /dev/null @@ -1,36 +0,0 @@ -Copyright (c) 1992-2009 The University of Tennessee. All rights reserved. - -$COPYRIGHT$ - -Additional copyrights may follow - -$HEADER$ - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -- Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer listed - in this license in the documentation and/or other materials - provided with the distribution. - -- Neither the name of the copyright holders nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/external/pmrrr/src/lapack/ode2.c b/external/pmrrr/src/lapack/ode2.c deleted file mode 100644 index 80cfef8226..0000000000 --- a/external/pmrrr/src/lapack/ode2.c +++ /dev/null @@ -1,136 +0,0 @@ -/* dlae2.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int ode2(double *a, double *b, double *c__, double *rt1, double *rt2) -{ - /* System generated locals */ - double d__1; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - double ab, df, tb, sm, rt, adf, acmn, acmx; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODE2 computes the eigenvalues of a 2-by-2 symmetric matrix */ -/* [ A B ] */ -/* [ B C ]. */ -/* On return, RT1 is the eigenvalue of larger absolute value, and RT2 */ -/* is the eigenvalue of smaller absolute value. */ - -/* Arguments */ -/* ========= */ - -/* A (input) DOUBLE PRECISION */ -/* The (1,1) element of the 2-by-2 matrix. */ - -/* B (input) DOUBLE PRECISION */ -/* The (1,2) and (2,1) elements of the 2-by-2 matrix. */ - -/* C (input) DOUBLE PRECISION */ -/* The (2,2) element of the 2-by-2 matrix. */ - -/* RT1 (output) DOUBLE PRECISION */ -/* The eigenvalue of larger absolute value. */ - -/* RT2 (output) DOUBLE PRECISION */ -/* The eigenvalue of smaller absolute value. */ - -/* Further Details */ -/* =============== */ - -/* RT1 is accurate to a few ulps barring over/underflow. */ - -/* RT2 may be inaccurate if there is massive cancellation in the */ -/* determinant A*C-B*B; higher precision or correctly rounded or */ -/* correctly truncated arithmetic would be needed to compute RT2 */ -/* accurately in all cases. */ - -/* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ -/* Underflow is harmless if the input data is 0 or exceeds */ -/* underflow_threshold / macheps. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* Compute the eigenvalues */ - - sm = *a + *c__; - df = *a - *c__; - adf = fabs(df); - tb = *b + *b; - ab = fabs(tb); - if (fabs(*a) > fabs(*c__)) { - acmx = *a; - acmn = *c__; - } else { - acmx = *c__; - acmn = *a; - } - if (adf > ab) { -/* Computing 2nd power */ - d__1 = ab / adf; - rt = adf * sqrt(d__1 * d__1 + 1.); - } else if (adf < ab) { -/* Computing 2nd power */ - d__1 = adf / ab; - rt = ab * sqrt(d__1 * d__1 + 1.); - } else { - -/* Includes case AB=ADF=0 */ - - rt = ab * sqrt(2.); - } - if (sm < 0.) { - *rt1 = (sm - rt) * .5; - -/* Order of execution important. */ -/* To get fully accurate smaller eigenvalue, */ -/* next line needs to be executed in higher precision. */ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else if (sm > 0.) { - *rt1 = (sm + rt) * .5; - -/* Order of execution important. */ -/* To get fully accurate smaller eigenvalue, */ -/* next line needs to be executed in higher precision. */ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else { - -/* Includes case RT1 = RT2 = 0 */ - - *rt1 = rt * .5; - *rt2 = rt * -.5; - } - return 0; - -/* End of ODE2 */ - -} /* ode2_ */ diff --git a/external/pmrrr/src/lapack/odebz.c b/external/pmrrr/src/lapack/odebz.c deleted file mode 100644 index 5aadbc24bc..0000000000 --- a/external/pmrrr/src/lapack/odebz.c +++ /dev/null @@ -1,622 +0,0 @@ -/* dlaebz.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define imax(a,b) ( (a) > (b) ? (a) : (b) ) -#define imin(a,b) ( (a) < (b) ? (a) : (b) ) - -/* Subroutine */ -int odebz(int *ijob, int *nitmax, int *n, - int *mmax, int *minp, int *nbmin, double *abstol, - double *reltol, double *pivmin, double *d__, double * - e, double *e2, int *nval, double *ab, double *c__, - int *mout, int *nab, double *work, int *iwork, - int *info) -{ - /* System generated locals */ - int nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, - i__5, i__6; - double d__1, d__2, d__3, d__4; - - /* Local variables */ - int j, kf, ji, kl, jp, jit; - double tmp1, tmp2; - int itmp1, itmp2, kfnew, klnew; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODEBZ contains the iteration loops which compute and use the */ -/* function N(w), which is the count of eigenvalues of a symmetric */ -/* tridiagonal matrix T less than or equal to its argument w. It */ -/* performs a choice of two types of loops: */ - -/* IJOB=1, followed by */ -/* IJOB=2: It takes as input a list of intervals and returns a list of */ -/* sufficiently small intervals whose union contains the same */ -/* eigenvalues as the union of the original intervals. */ -/* The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. */ -/* The output interval (AB(j,1),AB(j,2)] will contain */ -/* eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. */ - -/* IJOB=3: It performs a binary search in each input interval */ -/* (AB(j,1),AB(j,2)] for a point w(j) such that */ -/* N(w(j))=NVAL(j), and uses C(j) as the starting point of */ -/* the search. If such a w(j) is found, then on output */ -/* AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output */ -/* (AB(j,1),AB(j,2)] will be a small interval containing the */ -/* point where N(w) jumps through NVAL(j), unless that point */ -/* lies outside the initial interval. */ - -/* Note that the intervals are in all cases half-open intervals, */ -/* i.e., of the form (a,b] , which includes b but not a . */ - -/* To avoid underflow, the matrix should be scaled so that its largest */ -/* element is no greater than overflow**(1/2) * underflow**(1/4) */ -/* in absolute value. To assure the most accurate computation */ -/* of small eigenvalues, the matrix should be scaled to be */ -/* not much smaller than that, either. */ - -/* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ -/* Matrix", Report CS41, Computer Science Dept., Stanford */ -/* University, July 21, 1966 */ - -/* Note: the arguments are, in general, *not* checked for unreasonable */ -/* values. */ - -/* Arguments */ -/* ========= */ - -/* IJOB (input) INT */ -/* Specifies what is to be done: */ -/* = 1: Compute NAB for the initial intervals. */ -/* = 2: Perform bisection iteration to find eigenvalues of T. */ -/* = 3: Perform bisection iteration to invert N(w), i.e., */ -/* to find a point which has a specified number of */ -/* eigenvalues of T to its left. */ -/* Other values will cause ODEBZ to return with INFO=-1. */ - -/* NITMAX (input) INT */ -/* The maximum number of "levels" of bisection to be */ -/* performed, i.e., an interval of width W will not be made */ -/* smaller than 2^(-NITMAX) * W. If not all intervals */ -/* have converged after NITMAX iterations, then INFO is set */ -/* to the number of non-converged intervals. */ - -/* N (input) INT */ -/* The dimension n of the tridiagonal matrix T. It must be at */ -/* least 1. */ - -/* MMAX (input) INT */ -/* The maximum number of intervals. If more than MMAX intervals */ -/* are generated, then ODEBZ will quit with INFO=MMAX+1. */ - -/* MINP (input) INT */ -/* The initial number of intervals. It may not be greater than */ -/* MMAX. */ - -/* NBMIN (input) INT */ -/* The smallest number of intervals that should be processed */ -/* using a vector loop. If zero, then only the scalar loop */ -/* will be used. */ - -/* ABSTOL (input) DOUBLE PRECISION */ -/* The minimum (absolute) width of an interval. When an */ -/* interval is narrower than ABSTOL, or than RELTOL times the */ -/* larger (in magnitude) endpoint, then it is considered to be */ -/* sufficiently small, i.e., converged. This must be at least */ -/* zero. */ - -/* RELTOL (input) DOUBLE PRECISION */ -/* The minimum relative width of an interval. When an interval */ -/* is narrower than ABSTOL, or than RELTOL times the larger (in */ -/* magnitude) endpoint, then it is considered to be */ -/* sufficiently small, i.e., converged. Note: this should */ -/* always be at least radix*machine epsilon. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum absolute value of a "pivot" in the Sturm */ -/* sequence loop. This *must* be at least max |e(j)**2| * */ -/* safe_min and at least safe_min, where safe_min is at least */ -/* the smallest number that can divide one without overflow. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The diagonal elements of the tridiagonal matrix T. */ - -/* E (input) DOUBLE PRECISION array, dimension (N) */ -/* The offdiagonal elements of the tridiagonal matrix T in */ -/* positions 1 through N-1. E(N) is arbitrary. */ - -/* E2 (input) DOUBLE PRECISION array, dimension (N) */ -/* The squares of the offdiagonal elements of the tridiagonal */ -/* matrix T. E2(N) is ignored. */ - -/* NVAL (input/output) INT array, dimension (MINP) */ -/* If IJOB=1 or 2, not referenced. */ -/* If IJOB=3, the desired values of N(w). The elements of NVAL */ -/* will be reordered to correspond with the intervals in AB. */ -/* Thus, NVAL(j) on output will not, in general be the same as */ -/* NVAL(j) on input, but it will correspond with the interval */ -/* (AB(j,1),AB(j,2)] on output. */ - -/* AB (input/output) DOUBLE PRECISION array, dimension (MMAX,2) */ -/* The endpoints of the intervals. AB(j,1) is a(j), the left */ -/* endpoint of the j-th interval, and AB(j,2) is b(j), the */ -/* right endpoint of the j-th interval. The input intervals */ -/* will, in general, be modified, split, and reordered by the */ -/* calculation. */ - -/* C (input/output) DOUBLE PRECISION array, dimension (MMAX) */ -/* If IJOB=1, ignored. */ -/* If IJOB=2, workspace. */ -/* If IJOB=3, then on input C(j) should be initialized to the */ -/* first search point in the binary search. */ - -/* MOUT (output) INT */ -/* If IJOB=1, the number of eigenvalues in the intervals. */ -/* If IJOB=2 or 3, the number of intervals output. */ -/* If IJOB=3, MOUT will equal MINP. */ - -/* NAB (input/output) INT array, dimension (MMAX,2) */ -/* If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). */ -/* If IJOB=2, then on input, NAB(i,j) should be set. It must */ -/* satisfy the condition: */ -/* N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), */ -/* which means that in interval i only eigenvalues */ -/* NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, */ -/* NAB(i,j)=N(AB(i,j)), from a previous call to ODEBZ with */ -/* IJOB=1. */ -/* On output, NAB(i,j) will contain */ -/* max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of */ -/* the input interval that the output interval */ -/* (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the */ -/* the input values of NAB(k,1) and NAB(k,2). */ -/* If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), */ -/* unless N(w) > NVAL(i) for all search points w , in which */ -/* case NAB(i,1) will not be modified, i.e., the output */ -/* value will be the same as the input value (modulo */ -/* reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) */ -/* for all search points w , in which case NAB(i,2) will */ -/* not be modified. Normally, NAB should be set to some */ -/* distinctive value(s) before ODEBZ is called. */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (MMAX) */ -/* Workspace. */ - -/* IWORK (workspace) INT array, dimension (MMAX) */ -/* Workspace. */ - -/* INFO (output) INT */ -/* = 0: All intervals converged. */ -/* = 1--MMAX: The last INFO intervals did not converge. */ -/* = MMAX+1: More than MMAX intervals were generated. */ - -/* Further Details */ -/* =============== */ - -/* This routine is intended to be called only by other LAPACK */ -/* routines, thus the interface is less user-friendly. It is intended */ -/* for two purposes: */ - -/* (a) finding eigenvalues. In this case, ODEBZ should have one or */ -/* more initial intervals set up in AB, and ODEBZ should be called */ -/* with IJOB=1. This sets up NAB, and also counts the eigenvalues. */ -/* Intervals with no eigenvalues would usually be thrown out at */ -/* this point. Also, if not all the eigenvalues in an interval i */ -/* are desired, NAB(i,1) can be increased or NAB(i,2) decreased. */ -/* For example, set NAB(i,1)=NAB(i,2)-1 to get the largest */ -/* eigenvalue. ODEBZ is then called with IJOB=2 and MMAX */ -/* no smaller than the value of MOUT returned by the call with */ -/* IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 */ -/* through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the */ -/* tolerance specified by ABSTOL and RELTOL. */ - -/* (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). */ -/* In this case, start with a Gershgorin interval (a,b). Set up */ -/* AB to contain 2 search intervals, both initially (a,b). One */ -/* NVAL element should contain f-1 and the other should contain l */ -/* , while C should contain a and b, resp. NAB(i,1) should be -1 */ -/* and NAB(i,2) should be N+1, to flag an error if the desired */ -/* interval does not lie in (a,b). ODEBZ is then called with */ -/* IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- */ -/* j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while */ -/* if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r */ -/* >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and */ -/* N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and */ -/* w(l-r)=...=w(l+k) are handled similarly. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* Check for Errors */ - - /* Parameter adjustments */ - nab_dim1 = *mmax; - nab_offset = 1 + nab_dim1; - nab -= nab_offset; - ab_dim1 = *mmax; - ab_offset = 1 + ab_dim1; - ab -= ab_offset; - --d__; - --e; - --e2; - --nval; - --c__; - --work; - --iwork; - - /* Function Body */ - *info = 0; - if (*ijob < 1 || *ijob > 3) { - *info = -1; - return 0; - } - -/* Initialize NAB */ - - if (*ijob == 1) { - -/* Compute the number of eigenvalues in the initial intervals. */ - - *mout = 0; - i__1 = *minp; - for (ji = 1; ji <= i__1; ++ji) { - for (jp = 1; jp <= 2; ++jp) { - tmp1 = d__[1] - ab[ji + jp * ab_dim1]; - if (fabs(tmp1) < *pivmin) { - tmp1 = -(*pivmin); - } - nab[ji + jp * nab_dim1] = 0; - if (tmp1 <= 0.) { - nab[ji + jp * nab_dim1] = 1; - } - - i__2 = *n; - for (j = 2; j <= i__2; ++j) { - tmp1 = d__[j] - e2[j - 1] / tmp1 - ab[ji + jp * ab_dim1]; - if (fabs(tmp1) < *pivmin) { - tmp1 = -(*pivmin); - } - if (tmp1 <= 0.) { - ++nab[ji + jp * nab_dim1]; - } -/* L10: */ - } -/* L20: */ - } - *mout = *mout + nab[ji + (nab_dim1 << 1)] - nab[ji + nab_dim1]; -/* L30: */ - } - return 0; - } - -/* Initialize for loop */ - -/* KF and KL have the following meaning: */ -/* Intervals 1,...,KF-1 have converged. */ -/* Intervals KF,...,KL still need to be refined. */ - - kf = 1; - kl = *minp; - -/* If IJOB=2, initialize C. */ -/* If IJOB=3, use the user-supplied starting point. */ - - if (*ijob == 2) { - i__1 = *minp; - for (ji = 1; ji <= i__1; ++ji) { - c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; -/* L40: */ - } - } - -/* Iteration loop */ - - i__1 = *nitmax; - for (jit = 1; jit <= i__1; ++jit) { - -/* Loop over intervals */ - - if (kl - kf + 1 >= *nbmin && *nbmin > 0) { - -/* Begin of Parallel Version of the loop */ - - i__2 = kl; - for (ji = kf; ji <= i__2; ++ji) { - -/* Compute N(c), the number of eigenvalues less than c */ - - work[ji] = d__[1] - c__[ji]; - iwork[ji] = 0; - if (work[ji] <= *pivmin) { - iwork[ji] = 1; -/* Computing MIN */ - d__1 = work[ji], d__2 = -(*pivmin); - work[ji] = fmin(d__1,d__2); - } - - i__3 = *n; - for (j = 2; j <= i__3; ++j) { - work[ji] = d__[j] - e2[j - 1] / work[ji] - c__[ji]; - if (work[ji] <= *pivmin) { - ++iwork[ji]; -/* Computing MIN */ - d__1 = work[ji], d__2 = -(*pivmin); - work[ji] = fmin(d__1,d__2); - } -/* L50: */ - } -/* L60: */ - } - - if (*ijob <= 2) { - -/* IJOB=2: Choose all intervals containing eigenvalues. */ - - klnew = kl; - i__2 = kl; - for (ji = kf; ji <= i__2; ++ji) { - -/* Insure that N(w) is monotone */ - -/* Computing MIN */ -/* Computing MAX */ - i__5 = nab[ji + nab_dim1], i__6 = iwork[ji]; - i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,i__6); - iwork[ji] = imin(i__3,i__4); - -/* Update the Queue -- add intervals if both halves */ -/* contain eigenvalues. */ - - if (iwork[ji] == nab[ji + (nab_dim1 << 1)]) { - -/* No eigenvalue in the upper interval: */ -/* just use the lower interval. */ - - ab[ji + (ab_dim1 << 1)] = c__[ji]; - - } else if (iwork[ji] == nab[ji + nab_dim1]) { - -/* No eigenvalue in the lower interval: */ -/* just use the upper interval. */ - - ab[ji + ab_dim1] = c__[ji]; - } else { - ++klnew; - if (klnew <= *mmax) { - -/* Eigenvalue in both intervals -- add upper to */ -/* queue. */ - - ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << - 1)]; - nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 - << 1)]; - ab[klnew + ab_dim1] = c__[ji]; - nab[klnew + nab_dim1] = iwork[ji]; - ab[ji + (ab_dim1 << 1)] = c__[ji]; - nab[ji + (nab_dim1 << 1)] = iwork[ji]; - } else { - *info = *mmax + 1; - } - } -/* L70: */ - } - if (*info != 0) { - return 0; - } - kl = klnew; - } else { - -/* IJOB=3: Binary search. Keep only the interval containing */ -/* w s.t. N(w) = NVAL */ - - i__2 = kl; - for (ji = kf; ji <= i__2; ++ji) { - if (iwork[ji] <= nval[ji]) { - ab[ji + ab_dim1] = c__[ji]; - nab[ji + nab_dim1] = iwork[ji]; - } - if (iwork[ji] >= nval[ji]) { - ab[ji + (ab_dim1 << 1)] = c__[ji]; - nab[ji + (nab_dim1 << 1)] = iwork[ji]; - } -/* L80: */ - } - } - - } else { - -/* End of Parallel Version of the loop */ - -/* Begin of Serial Version of the loop */ - - klnew = kl; - i__2 = kl; - for (ji = kf; ji <= i__2; ++ji) { - -/* Compute N(w), the number of eigenvalues less than w */ - - tmp1 = c__[ji]; - tmp2 = d__[1] - tmp1; - itmp1 = 0; - if (tmp2 <= *pivmin) { - itmp1 = 1; -/* Computing MIN */ - d__1 = tmp2, d__2 = -(*pivmin); - tmp2 = fmin(d__1,d__2); - } - - i__3 = *n; - for (j = 2; j <= i__3; ++j) { - tmp2 = d__[j] - e2[j - 1] / tmp2 - tmp1; - if (tmp2 <= *pivmin) { - ++itmp1; -/* Computing MIN */ - d__1 = tmp2, d__2 = -(*pivmin); - tmp2 = fmin(d__1,d__2); - } -/* L90: */ - } - - if (*ijob <= 2) { - -/* IJOB=2: Choose all intervals containing eigenvalues. */ - -/* Insure that N(w) is monotone */ - -/* Computing MIN */ -/* Computing MAX */ - i__5 = nab[ji + nab_dim1]; - i__3 = nab[ji + (nab_dim1 << 1)], i__4 = imax(i__5,itmp1); - itmp1 = imin(i__3,i__4); - -/* Update the Queue -- add intervals if both halves */ -/* contain eigenvalues. */ - - if (itmp1 == nab[ji + (nab_dim1 << 1)]) { - -/* No eigenvalue in the upper interval: */ -/* just use the lower interval. */ - - ab[ji + (ab_dim1 << 1)] = tmp1; - - } else if (itmp1 == nab[ji + nab_dim1]) { - -/* No eigenvalue in the lower interval: */ -/* just use the upper interval. */ - - ab[ji + ab_dim1] = tmp1; - } else if (klnew < *mmax) { - -/* Eigenvalue in both intervals -- add upper to queue. */ - - ++klnew; - ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 1)]; - nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 << - 1)]; - ab[klnew + ab_dim1] = tmp1; - nab[klnew + nab_dim1] = itmp1; - ab[ji + (ab_dim1 << 1)] = tmp1; - nab[ji + (nab_dim1 << 1)] = itmp1; - } else { - *info = *mmax + 1; - return 0; - } - } else { - -/* IJOB=3: Binary search. Keep only the interval */ -/* containing w s.t. N(w) = NVAL */ - - if (itmp1 <= nval[ji]) { - ab[ji + ab_dim1] = tmp1; - nab[ji + nab_dim1] = itmp1; - } - if (itmp1 >= nval[ji]) { - ab[ji + (ab_dim1 << 1)] = tmp1; - nab[ji + (nab_dim1 << 1)] = itmp1; - } - } -/* L100: */ - } - kl = klnew; - -/* End of Serial Version of the loop */ - - } - -/* Check for convergence */ - - kfnew = kf; - i__2 = kl; - for (ji = kf; ji <= i__2; ++ji) { - tmp1 = (d__1 = ab[ji + (ab_dim1 << 1)] - ab[ji + ab_dim1], fabs( - d__1)); -/* Computing MAX */ - d__3 = (d__1 = ab[ji + (ab_dim1 << 1)], fabs(d__1)), d__4 = (d__2 = - ab[ji + ab_dim1], fabs(d__2)); - tmp2 = fmax(d__3,d__4); -/* Computing MAX */ - d__1 = fmax(*abstol,*pivmin), d__2 = *reltol * tmp2; - if (tmp1 < fmax(d__1,d__2) || nab[ji + nab_dim1] >= nab[ji + ( - nab_dim1 << 1)]) { - -/* Converged -- Swap with position KFNEW, */ -/* then increment KFNEW */ - - if (ji > kfnew) { - tmp1 = ab[ji + ab_dim1]; - tmp2 = ab[ji + (ab_dim1 << 1)]; - itmp1 = nab[ji + nab_dim1]; - itmp2 = nab[ji + (nab_dim1 << 1)]; - ab[ji + ab_dim1] = ab[kfnew + ab_dim1]; - ab[ji + (ab_dim1 << 1)] = ab[kfnew + (ab_dim1 << 1)]; - nab[ji + nab_dim1] = nab[kfnew + nab_dim1]; - nab[ji + (nab_dim1 << 1)] = nab[kfnew + (nab_dim1 << 1)]; - ab[kfnew + ab_dim1] = tmp1; - ab[kfnew + (ab_dim1 << 1)] = tmp2; - nab[kfnew + nab_dim1] = itmp1; - nab[kfnew + (nab_dim1 << 1)] = itmp2; - if (*ijob == 3) { - itmp1 = nval[ji]; - nval[ji] = nval[kfnew]; - nval[kfnew] = itmp1; - } - } - ++kfnew; - } -/* L110: */ - } - kf = kfnew; - -/* Choose Midpoints */ - - i__2 = kl; - for (ji = kf; ji <= i__2; ++ji) { - c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; -/* L120: */ - } - -/* If no more intervals to refine, quit. */ - - if (kf > kl) { - goto L140; - } -/* L130: */ - } - -/* Converged */ - -L140: -/* Computing MAX */ - i__1 = kl + 1 - kf; - *info = imax(i__1,0); - *mout = kl; - - return 0; - -/* End of ODEBZ */ - -} /* odebz_ */ diff --git a/external/pmrrr/src/lapack/odev2.c b/external/pmrrr/src/lapack/odev2.c deleted file mode 100644 index b70b228676..0000000000 --- a/external/pmrrr/src/lapack/odev2.c +++ /dev/null @@ -1,183 +0,0 @@ -/* dlaev2.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odev2(double *a, double *b, double *c__, - double *rt1, double *rt2, double *cs1, double *sn1) -{ - /* System generated locals */ - double d__1; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - double ab, df, cs, ct, tb, sm, tn, rt, adf, acs; - int sgn1, sgn2; - double acmn, acmx; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix */ -/* [ A B ] */ -/* [ B C ]. */ -/* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the */ -/* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right */ -/* eigenvector for RT1, giving the decomposition */ - -/* [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] */ -/* [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. */ - -/* Arguments */ -/* ========= */ - -/* A (input) DOUBLE PRECISION */ -/* The (1,1) element of the 2-by-2 matrix. */ - -/* B (input) DOUBLE PRECISION */ -/* The (1,2) element and the conjugate of the (2,1) element of */ -/* the 2-by-2 matrix. */ - -/* C (input) DOUBLE PRECISION */ -/* The (2,2) element of the 2-by-2 matrix. */ - -/* RT1 (output) DOUBLE PRECISION */ -/* The eigenvalue of larger absolute value. */ - -/* RT2 (output) DOUBLE PRECISION */ -/* The eigenvalue of smaller absolute value. */ - -/* CS1 (output) DOUBLE PRECISION */ -/* SN1 (output) DOUBLE PRECISION */ -/* The vector (CS1, SN1) is a unit right eigenvector for RT1. */ - -/* Further Details */ -/* =============== */ - -/* RT1 is accurate to a few ulps barring over/underflow. */ - -/* RT2 may be inaccurate if there is massive cancellation in the */ -/* determinant A*C-B*B; higher precision or correctly rounded or */ -/* correctly truncated arithmetic would be needed to compute RT2 */ -/* accurately in all cases. */ - -/* CS1 and SN1 are accurate to a few ulps barring over/underflow. */ - -/* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ -/* Underflow is harmless if the input data is 0 or exceeds */ -/* underflow_threshold / macheps. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* Compute the eigenvalues */ - - sm = *a + *c__; - df = *a - *c__; - adf = fabs(df); - tb = *b + *b; - ab = fabs(tb); - if (fabs(*a) > fabs(*c__)) { - acmx = *a; - acmn = *c__; - } else { - acmx = *c__; - acmn = *a; - } - if (adf > ab) { -/* Computing 2nd power */ - d__1 = ab / adf; - rt = adf * sqrt(d__1 * d__1 + 1.); - } else if (adf < ab) { -/* Computing 2nd power */ - d__1 = adf / ab; - rt = ab * sqrt(d__1 * d__1 + 1.); - } else { - -/* Includes case AB=ADF=0 */ - - rt = ab * sqrt(2.); - } - if (sm < 0.) { - *rt1 = (sm - rt) * .5; - sgn1 = -1; - -/* Order of execution important. */ -/* To get fully accurate smaller eigenvalue, */ -/* next line needs to be executed in higher precision. */ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else if (sm > 0.) { - *rt1 = (sm + rt) * .5; - sgn1 = 1; - -/* Order of execution important. */ -/* To get fully accurate smaller eigenvalue, */ -/* next line needs to be executed in higher precision. */ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else { - -/* Includes case RT1 = RT2 = 0 */ - - *rt1 = rt * .5; - *rt2 = rt * -.5; - sgn1 = 1; - } - -/* Compute the eigenvector */ - - if (df >= 0.) { - cs = df + rt; - sgn2 = 1; - } else { - cs = df - rt; - sgn2 = -1; - } - acs = fabs(cs); - if (acs > ab) { - ct = -tb / cs; - *sn1 = 1. / sqrt(ct * ct + 1.); - *cs1 = ct * *sn1; - } else { - if (ab == 0.) { - *cs1 = 1.; - *sn1 = 0.; - } else { - tn = -cs / tb; - *cs1 = 1. / sqrt(tn * tn + 1.); - *sn1 = tn * *cs1; - } - } - if (sgn1 == sgn2) { - tn = *cs1; - *cs1 = -(*sn1); - *sn1 = tn; - } - return 0; - -/* End of ODEV2 */ - -} /* odev2_ */ diff --git a/external/pmrrr/src/lapack/odnan.c b/external/pmrrr/src/lapack/odnan.c deleted file mode 100644 index ce7abb4bcd..0000000000 --- a/external/pmrrr/src/lapack/odnan.c +++ /dev/null @@ -1,46 +0,0 @@ -/* disnan.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -int odnan(double *din) -{ - /* System generated locals */ - int ret_val; - - /* Local variables */ - extern int odsnan(double *, double *); - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */ -/* otherwise. To be replaced by the Fortran 2003 intrinsic in the */ -/* future. */ - -/* Arguments */ -/* ========= */ - -/* DIN (input) DOUBLE PRECISION */ -/* Input to test for NaN. */ - -/* ===================================================================== */ - -/* .. External Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - ret_val = odsnan(din, din); - return ret_val; -} /* odnan_ */ diff --git a/external/pmrrr/src/lapack/odneg.c b/external/pmrrr/src/lapack/odneg.c deleted file mode 100644 index 4a3836797c..0000000000 --- a/external/pmrrr/src/lapack/odneg.c +++ /dev/null @@ -1,215 +0,0 @@ -/* dlaneg.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define imax(a,b) ( (a) > (b) ? (a) : (b) ) -#define imin(a,b) ( (a) < (b) ? (a) : (b) ) - -int odneg(int *n, double *d__, double *lld, double * - sigma, double *pivmin, int *r__) -{ - /* System generated locals */ - int ret_val, i__1, i__2, i__3, i__4; - - /* Local variables */ - int j; - double p, t; - int bj; - double tmp; - int neg1, neg2; - double bsav, gamma, dplus; - extern int odnan(double *); - int negcnt; - int sawnan; - double dminus; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODNEG computes the Sturm count, the number of negative pivots */ -/* encountered while factoring tridiagonal T - sigma I = L D L^T. */ -/* This implementation works directly on the factors without forming */ -/* the tridiagonal matrix T. The Sturm count is also the number of */ -/* eigenvalues of T less than sigma. */ - -/* This routine is called from DLARRB. */ - -/* The current routine does not use the PIVMIN parameter but rather */ -/* requires IEEE-754 propagation of Infinities and NaNs. This */ -/* routine also has no input range restrictions but does require */ -/* default exception handling such that x/0 produces Inf when x is */ -/* non-zero, and Inf/Inf produces NaN. For more information, see: */ - -/* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */ -/* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */ -/* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */ -/* (Tech report version in LAWN 172 with the same title.) */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The N diagonal elements of the diagonal matrix D. */ - -/* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (N-1) elements L(i)*L(i)*D(i). */ - -/* SIGMA (input) DOUBLE PRECISION */ -/* Shift amount in T - sigma I = L D L^T. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot in the Sturm sequence. May be used */ -/* when zero pivots are encountered on non-IEEE-754 */ -/* architectures. */ - -/* R (input) INT */ -/* The twist index for the twisted factorization that is used */ -/* for the negcount. */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ -/* Jason Riedy, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* Some architectures propagate Infinities and NaNs very slowly, so */ -/* the code computes counts in BLKLEN chunks. Then a NaN can */ -/* propagate at most BLKLEN columns before being detected. This is */ -/* not a general tuning parameter; it needs only to be just large */ -/* enough that the overhead is tiny in common cases. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - /* Parameter adjustments */ - --lld; - --d__; - - /* Function Body */ - negcnt = 0; -/* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */ - t = -(*sigma); - i__1 = *r__ - 1; - for (bj = 1; bj <= i__1; bj += 128) { - neg1 = 0; - bsav = t; -/* Computing MIN */ - i__3 = bj + 127, i__4 = *r__ - 1; - i__2 = imin(i__3,i__4); - for (j = bj; j <= i__2; ++j) { - dplus = d__[j] + t; - if (dplus < 0.) { - ++neg1; - } - tmp = t / dplus; - t = tmp * lld[j] - *sigma; -/* L21: */ - } - sawnan = odnan(&t); -/* Run a slower version of the above loop if a NaN is detected. */ -/* A NaN should occur only with a zero pivot after an infinite */ -/* pivot. In that case, substituting 1 for T/DPLUS is the */ -/* correct limit. */ - if (sawnan) { - neg1 = 0; - t = bsav; -/* Computing MIN */ - i__3 = bj + 127, i__4 = *r__ - 1; - i__2 = imin(i__3,i__4); - for (j = bj; j <= i__2; ++j) { - dplus = d__[j] + t; - if (dplus < 0.) { - ++neg1; - } - tmp = t / dplus; - if (odnan(&tmp)) { - tmp = 1.; - } - t = tmp * lld[j] - *sigma; -/* L22: */ - } - } - negcnt += neg1; -/* L210: */ - } - -/* II) lower part: L D L^T - SIGMA I = U- D- U-^T */ - p = d__[*n] - *sigma; - i__1 = *r__; - for (bj = *n - 1; bj >= i__1; bj += -128) { - neg2 = 0; - bsav = p; -/* Computing MAX */ - i__3 = bj - 127; - i__2 = imax(i__3,*r__); - for (j = bj; j >= i__2; --j) { - dminus = lld[j] + p; - if (dminus < 0.) { - ++neg2; - } - tmp = p / dminus; - p = tmp * d__[j] - *sigma; -/* L23: */ - } - sawnan = odnan(&p); -/* As above, run a slower version that substitutes 1 for Inf/Inf. */ - - if (sawnan) { - neg2 = 0; - p = bsav; -/* Computing MAX */ - i__3 = bj - 127; - i__2 = imax(i__3,*r__); - for (j = bj; j >= i__2; --j) { - dminus = lld[j] + p; - if (dminus < 0.) { - ++neg2; - } - tmp = p / dminus; - if (odnan(&tmp)) { - tmp = 1.; - } - p = tmp * d__[j] - *sigma; -/* L24: */ - } - } - negcnt += neg2; -/* L230: */ - } - -/* III) Twist index */ -/* T was shifted by SIGMA initially. */ - gamma = t + *sigma + p; - if (gamma < 0.) { - ++negcnt; - } - ret_val = negcnt; - return ret_val; -} /* odneg_ */ diff --git a/external/pmrrr/src/lapack/odnst.c b/external/pmrrr/src/lapack/odnst.c deleted file mode 100644 index f12ab24f46..0000000000 --- a/external/pmrrr/src/lapack/odnst.c +++ /dev/null @@ -1,159 +0,0 @@ -/* dlanst.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Table of constant values */ -static int c__1 = 1; - -double odnst(char *norm, int *n, double *d__, double *e) -{ - /* System generated locals */ - int i__1; - double ret_val, d__1, d__2, d__3, d__4, d__5; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - int i__; - double sum, scale; - extern int olsame(char *, char *); - double anorm; - extern /* Subroutine */ int odssq(int *, double *, int *, - double *, double *); - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODNST returns the value of the one norm, or the Frobenius norm, or */ -/* the infinity norm, or the element of largest absolute value of a */ -/* real symmetric tridiagonal matrix A. */ - -/* Description */ -/* =========== */ - -/* ODNST returns the value */ - -/* ODNST = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ -/* ( */ -/* ( norm1(A), NORM = '1', 'O' or 'o' */ -/* ( */ -/* ( normI(A), NORM = 'I' or 'i' */ -/* ( */ -/* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ - -/* where norm1 denotes the one norm of a matrix (maximum column sum), */ -/* normI denotes the infinity norm of a matrix (maximum row sum) and */ -/* normF denotes the Frobenius norm of a matrix (square root of sum of */ -/* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ - -/* Arguments */ -/* ========= */ - -/* NORM (input) CHARACTER*1 */ -/* Specifies the value to be returned in ODNST as described */ -/* above. */ - -/* N (input) INT */ -/* The order of the matrix A. N >= 0. When N = 0, ODNST is */ -/* set to zero. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The diagonal elements of A. */ - -/* E (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (n-1) sub-diagonal or super-diagonal elements of A. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --e; - --d__; - - /* Function Body */ - if (*n <= 0) { - anorm = 0.; - } else if (olsame(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - anorm = (d__1 = d__[*n], fabs(d__1)); - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__2 = anorm, d__3 = (d__1 = d__[i__], fabs(d__1)); - anorm = fmax(d__2,d__3); -/* Computing MAX */ - d__2 = anorm, d__3 = (d__1 = e[i__], fabs(d__1)); - anorm = fmax(d__2,d__3); -/* L10: */ - } - } else if (olsame(norm, "O") || *(unsigned char *) - norm == '1' || olsame(norm, "I")) { - -/* Find norm1(A). */ - - if (*n == 1) { - anorm = fabs(d__[1]); - } else { -/* Computing MAX */ - d__3 = fabs(d__[1]) + fabs(e[1]), d__4 = (d__1 = e[*n - 1], fabs( - d__1)) + (d__2 = d__[*n], fabs(d__2)); - anorm = fmax(d__3,d__4); - i__1 = *n - 1; - for (i__ = 2; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__4 = anorm, d__5 = (d__1 = d__[i__], fabs(d__1)) + (d__2 = e[ - i__], fabs(d__2)) + (d__3 = e[i__ - 1], fabs(d__3)); - anorm = fmax(d__4,d__5); -/* L20: */ - } - } - } else if (olsame(norm, "F") || olsame(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - if (*n > 1) { - i__1 = *n - 1; - odssq(&i__1, &e[1], &c__1, &scale, &sum); - sum *= 2; - } - odssq(n, &d__[1], &c__1, &scale, &sum); - anorm = scale * sqrt(sum); - } - - ret_val = anorm; - return ret_val; - -/* End of ODNST */ - -} /* odnst_ */ diff --git a/external/pmrrr/src/lapack/odr1v.c b/external/pmrrr/src/lapack/odr1v.c deleted file mode 100644 index 1c30a80835..0000000000 --- a/external/pmrrr/src/lapack/odr1v.c +++ /dev/null @@ -1,439 +0,0 @@ -/* odr1v.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define TRUE_ (1) -#define FALSE_ (0) - -/* Subroutine */ -int odr1v(int *n, int *b1, int *bn, double - *lambda, double *d__, double *l, double *ld, double * - lld, double *pivmin, double *gaptol, double *z__, int - *wantnc, int *negcnt, double *ztz, double *mingma, - int *r__, int *isuppz, double *nrminv, double *resid, - double *rqcorr, double *work) -{ - /* System generated locals */ - int i__1; - double d__1, d__2, d__3; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - int i__; - double s; - int r1, r2; - double eps, tmp; - int neg1, neg2, indp, inds; - double dplus; - // extern double odmch(char *); - extern int odnan(double *); - int indlpl, indumn; - double dminus; - int sawnan1, sawnan2; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODR1V computes the (scaled) r-th column of the inverse of */ -/* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */ -/* L D L^T - sigma I. When sigma is close to an eigenvalue, the */ -/* computed vector is an accurate eigenvector. Usually, r corresponds */ -/* to the index where the eigenvector is largest in magnitude. */ -/* The following steps accomplish this computation : */ -/* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */ -/* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */ -/* (c) Computation of the diagonal elements of the inverse of */ -/* L D L^T - sigma I by combining the above transforms, and choosing */ -/* r as the index where the diagonal of the inverse is (one of the) */ -/* largest in magnitude. */ -/* (d) Computation of the (scaled) r-th column of the inverse using the */ -/* twisted factorization obtained by combining the top part of the */ -/* the stationary and the bottom part of the progressive transform. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix L D L^T. */ - -/* B1 (input) INT */ -/* First index of the submatrix of L D L^T. */ - -/* BN (input) INT */ -/* Last index of the submatrix of L D L^T. */ - -/* LAMBDA (input) DOUBLE PRECISION */ -/* The shift. In order to compute an accurate eigenvector, */ -/* LAMBDA should be a good approximation to an eigenvalue */ -/* of L D L^T. */ - -/* L (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (n-1) subdiagonal elements of the unit bidiagonal matrix */ -/* L, in elements 1 to N-1. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The n diagonal elements of the diagonal matrix D. */ - -/* LD (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The n-1 elements L(i)*D(i). */ - -/* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The n-1 elements L(i)*L(i)*D(i). */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot in the Sturm sequence. */ - -/* GAPTOL (input) DOUBLE PRECISION */ -/* Tolerance that indicates when eigenvector entries are negligible */ -/* w.r.t. their contribution to the residual. */ - -/* Z (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On input, all entries of Z must be set to 0. */ -/* On output, Z contains the (scaled) r-th column of the */ -/* inverse. The scaling is such that Z(R) equals 1. */ - -/* WANTNC (input) INT */ -/* Specifies whether NEGCNT has to be computed. */ - -/* NEGCNT (output) INT */ -/* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */ -/* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */ - -/* ZTZ (output) DOUBLE PRECISION */ -/* The square of the 2-norm of Z. */ - -/* MINGMA (output) DOUBLE PRECISION */ -/* The reciprocal of the largest (in magnitude) diagonal */ -/* element of the inverse of L D L^T - sigma I. */ - -/* R (input/output) INT */ -/* The twist index for the twisted factorization used to */ -/* compute Z. */ -/* On input, 0 <= R <= N. If R is input as 0, R is set to */ -/* the index where (L D L^T - sigma I)^{-1} is largest */ -/* in magnitude. If 1 <= R <= N, R is unchanged. */ -/* On output, R contains the twist index used to compute Z. */ -/* Ideally, R designates the position of the maximum entry in the */ -/* eigenvector. */ - -/* ISUPPZ (output) INT array, dimension (2) */ -/* The support of the vector in Z, i.e., the vector Z is */ -/* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */ - -/* NRMINV (output) DOUBLE PRECISION */ -/* NRMINV = 1/SQRT( ZTZ ) */ - -/* RESID (output) DOUBLE PRECISION */ -/* The residual of the FP vector. */ -/* RESID = ABS( MINGMA )/SQRT( ZTZ ) */ - -/* RQCORR (output) DOUBLE PRECISION */ -/* The Rayleigh Quotient correction to LAMBDA. */ -/* RQCORR = MINGMA*TMP */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --work; - --isuppz; - --z__; - --lld; - --ld; - --l; - --d__; - - /* Function Body */ - eps = DBL_EPSILON; // eps = odmch("Precision"); - if (*r__ == 0) { - r1 = *b1; - r2 = *bn; - } else { - r1 = *r__; - r2 = *r__; - } -/* Storage for LPLUS */ - indlpl = 0; -/* Storage for UMINUS */ - indumn = *n; - inds = (*n << 1) + 1; - indp = *n * 3 + 1; - if (*b1 == 1) { - work[inds] = 0.; - } else { - work[inds + *b1 - 1] = lld[*b1 - 1]; - } - -/* Compute the stationary transform (using the differential form) */ -/* until the index R2. */ - - sawnan1 = FALSE_; - neg1 = 0; - s = work[inds + *b1 - 1] - *lambda; - i__1 = r1 - 1; - for (i__ = *b1; i__ <= i__1; ++i__) { - dplus = d__[i__] + s; - work[indlpl + i__] = ld[i__] / dplus; - if (dplus < 0.) { - ++neg1; - } - work[inds + i__] = s * work[indlpl + i__] * l[i__]; - s = work[inds + i__] - *lambda; -/* L50: */ - } - sawnan1 = odnan(&s); - if (sawnan1) { - goto L60; - } - i__1 = r2 - 1; - for (i__ = r1; i__ <= i__1; ++i__) { - dplus = d__[i__] + s; - work[indlpl + i__] = ld[i__] / dplus; - work[inds + i__] = s * work[indlpl + i__] * l[i__]; - s = work[inds + i__] - *lambda; -/* L51: */ - } - sawnan1 = odnan(&s); - -L60: - if (sawnan1) { -/* Runs a slower version of the above loop if a NaN is detected */ - neg1 = 0; - s = work[inds + *b1 - 1] - *lambda; - i__1 = r1 - 1; - for (i__ = *b1; i__ <= i__1; ++i__) { - dplus = d__[i__] + s; - if (fabs(dplus) < *pivmin) { - dplus = -(*pivmin); - } - work[indlpl + i__] = ld[i__] / dplus; - if (dplus < 0.) { - ++neg1; - } - work[inds + i__] = s * work[indlpl + i__] * l[i__]; - if (work[indlpl + i__] == 0.) { - work[inds + i__] = lld[i__]; - } - s = work[inds + i__] - *lambda; -/* L70: */ - } - i__1 = r2 - 1; - for (i__ = r1; i__ <= i__1; ++i__) { - dplus = d__[i__] + s; - if (fabs(dplus) < *pivmin) { - dplus = -(*pivmin); - } - work[indlpl + i__] = ld[i__] / dplus; - work[inds + i__] = s * work[indlpl + i__] * l[i__]; - if (work[indlpl + i__] == 0.) { - work[inds + i__] = lld[i__]; - } - s = work[inds + i__] - *lambda; -/* L71: */ - } - } - -/* Compute the progressive transform (using the differential form) */ -/* until the index R1 */ - - sawnan2 = FALSE_; - neg2 = 0; - work[indp + *bn - 1] = d__[*bn] - *lambda; - i__1 = r1; - for (i__ = *bn - 1; i__ >= i__1; --i__) { - dminus = lld[i__] + work[indp + i__]; - tmp = d__[i__] / dminus; - if (dminus < 0.) { - ++neg2; - } - work[indumn + i__] = l[i__] * tmp; - work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; -/* L80: */ - } - tmp = work[indp + r1 - 1]; - sawnan2 = odnan(&tmp); - if (sawnan2) { -/* Runs a slower version of the above loop if a NaN is detected */ - neg2 = 0; - i__1 = r1; - for (i__ = *bn - 1; i__ >= i__1; --i__) { - dminus = lld[i__] + work[indp + i__]; - if (fabs(dminus) < *pivmin) { - dminus = -(*pivmin); - } - tmp = d__[i__] / dminus; - if (dminus < 0.) { - ++neg2; - } - work[indumn + i__] = l[i__] * tmp; - work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; - if (tmp == 0.) { - work[indp + i__ - 1] = d__[i__] - *lambda; - } -/* L100: */ - } - } - -/* Find the index (from R1 to R2) of the largest (in magnitude) */ -/* diagonal element of the inverse */ - - *mingma = work[inds + r1 - 1] + work[indp + r1 - 1]; - if (*mingma < 0.) { - ++neg1; - } - if (*wantnc) { - *negcnt = neg1 + neg2; - } else { - *negcnt = -1; - } - if (fabs(*mingma) == 0.) { - *mingma = eps * work[inds + r1 - 1]; - } - *r__ = r1; - i__1 = r2 - 1; - for (i__ = r1; i__ <= i__1; ++i__) { - tmp = work[inds + i__] + work[indp + i__]; - if (tmp == 0.) { - tmp = eps * work[inds + i__]; - } - if (fabs(tmp) <= fabs(*mingma)) { - *mingma = tmp; - *r__ = i__ + 1; - } -/* L110: */ - } - -/* Compute the FP vector: solve N^T v = e_r */ - - isuppz[1] = *b1; - isuppz[2] = *bn; - z__[*r__] = 1.; - *ztz = 1.; - -/* Compute the FP vector upwards from R */ - - if (! sawnan1 && ! sawnan2) { - i__1 = *b1; - for (i__ = *r__ - 1; i__ >= i__1; --i__) { - z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); - if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( - d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { - z__[i__] = 0.; - isuppz[1] = i__ + 1; - goto L220; - } - *ztz += z__[i__] * z__[i__]; -/* L210: */ - } -L220: - ; - } else { -/* Run slower loop if NaN occurred. */ - i__1 = *b1; - for (i__ = *r__ - 1; i__ >= i__1; --i__) { - if (z__[i__ + 1] == 0.) { - z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2]; - } else { - z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); - } - if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( - d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { - z__[i__] = 0.; - isuppz[1] = i__ + 1; - goto L240; - } - *ztz += z__[i__] * z__[i__]; -/* L230: */ - } -L240: - ; - } -/* Compute the FP vector downwards from R in blocks of size BLKSIZ */ - if (! sawnan1 && ! sawnan2) { - i__1 = *bn - 1; - for (i__ = *r__; i__ <= i__1; ++i__) { - z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); - if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( - d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { - z__[i__ + 1] = 0.; - isuppz[2] = i__; - goto L260; - } - *ztz += z__[i__ + 1] * z__[i__ + 1]; -/* L250: */ - } -L260: - ; - } else { -/* Run slower loop if NaN occurred. */ - i__1 = *bn - 1; - for (i__ = *r__; i__ <= i__1; ++i__) { - if (z__[i__] == 0.) { - z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1]; - } else { - z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); - } - if (((d__1 = z__[i__], fabs(d__1)) + (d__2 = z__[i__ + 1], fabs( - d__2))) * (d__3 = ld[i__], fabs(d__3)) < *gaptol) { - z__[i__ + 1] = 0.; - isuppz[2] = i__; - goto L280; - } - *ztz += z__[i__ + 1] * z__[i__ + 1]; -/* L270: */ - } -L280: - ; - } - -/* Compute quantities for convergence test */ - - tmp = 1. / *ztz; - *nrminv = sqrt(tmp); - *resid = fabs(*mingma) * *nrminv; - *rqcorr = *mingma * tmp; - - - return 0; - -/* End of ODR1V */ - -} /* odr1v_ */ diff --git a/external/pmrrr/src/lapack/odrnv.c b/external/pmrrr/src/lapack/odrnv.c deleted file mode 100644 index f0330d1f78..0000000000 --- a/external/pmrrr/src/lapack/odrnv.c +++ /dev/null @@ -1,142 +0,0 @@ -/* dlarnv.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define imin(a,b) ( (a) < (b) ? (a) : (b) ) - -/* Subroutine */ -int odrnv(int *idist, int *iseed, int *n, double *x) -{ - /* System generated locals */ - int i__1, i__2, i__3; - - /* Builtin functions */ - // double log(double), sqrt(double), cos(double); - - /* Local variables */ - int i__; - double u[128]; - int il, iv, il2; - extern int odruv(int *, int *, double *); - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODRNV returns a vector of n random real numbers from a uniform or */ -/* normal distribution. */ - -/* Arguments */ -/* ========= */ - -/* IDIST (input) INT */ -/* Specifies the distribution of the random numbers: */ -/* = 1: uniform (0,1) */ -/* = 2: uniform (-1,1) */ -/* = 3: normal (0,1) */ - -/* ISEED (input/output) INT array, dimension (4) */ -/* On entry, the seed of the random number generator; the array */ -/* elements must be between 0 and 4095, and ISEED(4) must be */ -/* odd. */ -/* On exit, the seed is updated. */ - -/* N (input) INT */ -/* The number of random numbers to be generated. */ - -/* X (output) DOUBLE PRECISION array, dimension (N) */ -/* The generated random numbers. */ - -/* Further Details */ -/* =============== */ - -/* This routine calls the auxiliary routine ODRUV to generate random */ -/* real numbers from a uniform (0,1) distribution, in batches of up to */ -/* 128 using vectorisable code. The Box-Muller method is used to */ -/* transform numbers from a uniform to a normal distribution. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Local Arrays .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --x; - --iseed; - - /* Function Body */ - i__1 = *n; - for (iv = 1; iv <= i__1; iv += 64) { -/* Computing MIN */ - i__2 = 64, i__3 = *n - iv + 1; - il = imin(i__2,i__3); - if (*idist == 3) { - il2 = il << 1; - } else { - il2 = il; - } - -/* Call ODRUV to generate IL2 numbers from a uniform (0,1) */ -/* distribution (IL2 <= LV) */ - - odruv(&iseed[1], &il2, u); - - if (*idist == 1) { - -/* Copy generated numbers */ - - i__2 = il; - for (i__ = 1; i__ <= i__2; ++i__) { - x[iv + i__ - 1] = u[i__ - 1]; -/* L10: */ - } - } else if (*idist == 2) { - -/* Convert generated numbers to uniform (-1,1) distribution */ - - i__2 = il; - for (i__ = 1; i__ <= i__2; ++i__) { - x[iv + i__ - 1] = u[i__ - 1] * 2. - 1.; -/* L20: */ - } - } else if (*idist == 3) { - -/* Convert generated numbers to normal (0,1) distribution */ - - i__2 = il; - for (i__ = 1; i__ <= i__2; ++i__) { - x[iv + i__ - 1] = sqrt(log(u[(i__ << 1) - 2]) * -2.) * cos(u[( - i__ << 1) - 1] * 6.2831853071795864769252867663); -/* L30: */ - } - } -/* L40: */ - } - return 0; - -/* End of ODRNV */ - -} /* odrnv_ */ diff --git a/external/pmrrr/src/lapack/odrra.c b/external/pmrrr/src/lapack/odrra.c deleted file mode 100644 index f0d802d1a7..0000000000 --- a/external/pmrrr/src/lapack/odrra.c +++ /dev/null @@ -1,151 +0,0 @@ -/* dlarra.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odrra(int *n, double *d__, double *e, - double *e2, double *spltol, double *tnrm, int *nsplit, - int *isplit, int *info) -{ - /* System generated locals */ - int i__1; - double d__1, d__2; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - int i__; - double tmp1, eabs; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* Compute the splitting points with threshold SPLTOL. */ -/* ODRRA sets any "small" off-diagonal elements to zero. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix. N > 0. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the N diagonal elements of the tridiagonal */ -/* matrix T. */ - -/* E (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the first (N-1) entries contain the subdiagonal */ -/* elements of the tridiagonal matrix T; E(N) need not be set. */ -/* On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, */ -/* are set to zero, the other entries of E are untouched. */ - -/* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the first (N-1) entries contain the SQUARES of the */ -/* subdiagonal elements of the tridiagonal matrix T; */ -/* E2(N) need not be set. */ -/* On exit, the entries E2( ISPLIT( I ) ), */ -/* 1 <= I <= NSPLIT, have been set to zero */ - -/* SPLTOL (input) DOUBLE PRECISION */ -/* The threshold for splitting. Two criteria can be used: */ -/* SPLTOL<0 : criterion based on absolute off-diagonal value */ -/* SPLTOL>0 : criterion that preserves relative accuracy */ - -/* TNRM (input) DOUBLE PRECISION */ -/* The norm of the matrix. */ - -/* NSPLIT (output) INT */ -/* The number of blocks T splits into. 1 <= NSPLIT <= N. */ - -/* ISPLIT (output) INT array, dimension (N) */ -/* The splitting points, at which T breaks up into blocks. */ -/* The first block consists of rows/columns 1 to ISPLIT(1), */ -/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ -/* etc., and the NSPLIT-th consists of rows/columns */ -/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ - - -/* INFO (output) INT */ -/* = 0: successful exit */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --isplit; - --e2; - --e; - --d__; - - /* Function Body */ - *info = 0; -/* Compute splitting points */ - *nsplit = 1; - if (*spltol < 0.) { -/* Criterion based on absolute off-diagonal value */ - tmp1 = fabs(*spltol) * *tnrm; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - eabs = (d__1 = e[i__], fabs(d__1)); - if (eabs <= tmp1) { - e[i__] = 0.; - e2[i__] = 0.; - isplit[*nsplit] = i__; - ++(*nsplit); - } -/* L9: */ - } - } else { -/* Criterion that guarantees relative accuracy */ - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - eabs = (d__1 = e[i__], fabs(d__1)); - if (eabs <= *spltol * sqrt((d__1 = d__[i__], fabs(d__1))) * sqrt(( - d__2 = d__[i__ + 1], fabs(d__2)))) { - e[i__] = 0.; - e2[i__] = 0.; - isplit[*nsplit] = i__; - ++(*nsplit); - } -/* L10: */ - } - } - isplit[*nsplit] = *n; - return 0; - -/* End of ODRRA */ - -} /* odrra_ */ diff --git a/external/pmrrr/src/lapack/odrrb.c b/external/pmrrr/src/lapack/odrrb.c deleted file mode 100644 index 4a1a8600f6..0000000000 --- a/external/pmrrr/src/lapack/odrrb.c +++ /dev/null @@ -1,344 +0,0 @@ -/* dlarrb.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odrrb(int *n, double *d__, double *lld, - int *ifirst, int *ilast, double *rtol1, double *rtol2, - int *offset, double *w, double *wgap, double *werr, - double *work, int *iwork, double *pivmin, double * - spdiam, int *twist, int *info) -{ - /* System generated locals */ - int i__1; - double d__1, d__2; - - /* Builtin functions */ - // double log(double); - - /* Local variables */ - int i__, k, r__, i1, ii, ip; - double gap, mid, tmp, back, lgap, rgap, left; - int iter, nint, prev, next; - double cvrgd, right, width; - extern int odneg(int *, double *, double *, double * -, double *, int *); - int negcnt; - double mnwdth; - int olnint, maxitr; - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* Given the relatively robust representation(RRR) L D L^T, ODRRB */ -/* does "limited" bisection to refine the eigenvalues of L D L^T, */ -/* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ -/* guesses for these eigenvalues are input in W, the corresponding estimate */ -/* of the error in these guesses and their gaps are input in WERR */ -/* and WGAP, respectively. During bisection, intervals */ -/* [left, right] are maintained by storing their mid-points and */ -/* semi-widths in the arrays W and WERR respectively. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The N diagonal elements of the diagonal matrix D. */ - -/* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (N-1) elements L(i)*L(i)*D(i). */ - -/* IFIRST (input) INT */ -/* The index of the first eigenvalue to be computed. */ - -/* ILAST (input) INT */ -/* The index of the last eigenvalue to be computed. */ - -/* RTOL1 (input) DOUBLE PRECISION */ -/* RTOL2 (input) DOUBLE PRECISION */ -/* Tolerance for the convergence of the bisection intervals. */ -/* An interval [LEFT,RIGHT] has converged if */ -/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ -/* where GAP is the (estimated) distance to the nearest */ -/* eigenvalue. */ - -/* OFFSET (input) INT */ -/* Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */ -/* through ILAST-OFFSET elements of these arrays are to be used. */ - -/* W (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ -/* estimates of the eigenvalues of L D L^T indexed IFIRST throug */ -/* ILAST. */ -/* On output, these estimates are refined. */ - -/* WGAP (input/output) DOUBLE PRECISION array, dimension (N-1) */ -/* On input, the (estimated) gaps between consecutive */ -/* eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */ -/* eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */ -/* then WGAP(IFIRST-OFFSET) must be set to ZERO. */ -/* On output, these gaps are refined. */ - -/* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ -/* the errors in the estimates of the corresponding elements in W. */ -/* On output, these errors are refined. */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ -/* Workspace. */ - -/* IWORK (workspace) INT array, dimension (2*N) */ -/* Workspace. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot in the Sturm sequence. */ - -/* SPDIAM (input) DOUBLE PRECISION */ -/* The spectral diameter of the matrix. */ - -/* TWIST (input) INT */ -/* The twist index for the twisted factorization that is used */ -/* for the negcount. */ -/* TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */ -/* TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */ -/* TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */ - -/* INFO (output) INT */ -/* Error flag. */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ - -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --iwork; - --work; - --werr; - --wgap; - --w; - --lld; - --d__; - - /* Function Body */ - *info = 0; - - maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + - 2; - mnwdth = *pivmin * 2.; - - r__ = *twist; - if (r__ < 1 || r__ > *n) { - r__ = *n; - } - -/* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ -/* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ -/* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */ -/* for an unconverged interval is set to the index of the next unconverged */ -/* interval, and is -1 or 0 for a converged interval. Thus a linked */ -/* list of unconverged intervals is set up. */ - - i1 = *ifirst; -/* The number of unconverged intervals */ - nint = 0; -/* The last unconverged interval found */ - prev = 0; - rgap = wgap[i1 - *offset]; - i__1 = *ilast; - for (i__ = i1; i__ <= i__1; ++i__) { - k = i__ << 1; - ii = i__ - *offset; - left = w[ii] - werr[ii]; - right = w[ii] + werr[ii]; - lgap = rgap; - rgap = wgap[ii]; - gap = fmin(lgap,rgap); -/* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ -/* Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */ - -/* Do while( NEGCNT(LEFT).GT.I-1 ) */ - - back = werr[ii]; -L20: - negcnt = odneg(n, &d__[1], &lld[1], &left, pivmin, &r__); - if (negcnt > i__ - 1) { - left -= back; - back *= 2.; - goto L20; - } - -/* Do while( NEGCNT(RIGHT).LT.I ) */ -/* Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */ - - back = werr[ii]; -L50: - negcnt = odneg(n, &d__[1], &lld[1], &right, pivmin, &r__); - if (negcnt < i__) { - right += back; - back *= 2.; - goto L50; - } - width = (d__1 = left - right, fabs(d__1)) * .5; -/* Computing MAX */ - d__1 = fabs(left), d__2 = fabs(right); - tmp = fmax(d__1,d__2); -/* Computing MAX */ - d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; - cvrgd = fmax(d__1,d__2); - if (width <= cvrgd || width <= mnwdth) { -/* This interval has already converged and does not need refinement. */ -/* (Note that the gaps might change through refining the */ -/* eigenvalues, however, they can only get bigger.) */ -/* Remove it from the list. */ - iwork[k - 1] = -1; -/* Make sure that I1 always points to the first unconverged interval */ - if (i__ == i1 && i__ < *ilast) { - i1 = i__ + 1; - } - if (prev >= i1 && i__ <= *ilast) { - iwork[(prev << 1) - 1] = i__ + 1; - } - } else { -/* unconverged interval found */ - prev = i__; - ++nint; - iwork[k - 1] = i__ + 1; - iwork[k] = negcnt; - } - work[k - 1] = left; - work[k] = right; -/* L75: */ - } - -/* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ -/* and while (ITER.LT.MAXITR) */ - - iter = 0; -L80: - prev = i1 - 1; - i__ = i1; - olnint = nint; - i__1 = olnint; - for (ip = 1; ip <= i__1; ++ip) { - k = i__ << 1; - ii = i__ - *offset; - rgap = wgap[ii]; - lgap = rgap; - if (ii > 1) { - lgap = wgap[ii - 1]; - } - gap = fmin(lgap,rgap); - next = iwork[k - 1]; - left = work[k - 1]; - right = work[k]; - mid = (left + right) * .5; -/* semiwidth of interval */ - width = right - mid; -/* Computing MAX */ - d__1 = fabs(left), d__2 = fabs(right); - tmp = fmax(d__1,d__2); -/* Computing MAX */ - d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; - cvrgd = fmax(d__1,d__2); - if (width <= cvrgd || width <= mnwdth || iter == maxitr) { -/* reduce number of unconverged intervals */ - --nint; -/* Mark interval as converged. */ - iwork[k - 1] = 0; - if (i1 == i__) { - i1 = next; - } else { -/* Prev holds the last unconverged interval previously examined */ - if (prev >= i1) { - iwork[(prev << 1) - 1] = next; - } - } - i__ = next; - goto L100; - } - prev = i__; - -/* Perform one bisection step */ - - negcnt = odneg(n, &d__[1], &lld[1], &mid, pivmin, &r__); - if (negcnt <= i__ - 1) { - work[k - 1] = mid; - } else { - work[k] = mid; - } - i__ = next; -L100: - ; - } - ++iter; -/* do another loop if there are still unconverged intervals */ -/* However, in the last iteration, all intervals are accepted */ -/* since this is the best we can do. */ - if (nint > 0 && iter <= maxitr) { - goto L80; - } - - -/* At this point, all the intervals have converged */ - i__1 = *ilast; - for (i__ = *ifirst; i__ <= i__1; ++i__) { - k = i__ << 1; - ii = i__ - *offset; -/* All intervals marked by '0' have been refined. */ - if (iwork[k - 1] == 0) { - w[ii] = (work[k - 1] + work[k]) * .5; - werr[ii] = work[k] - w[ii]; - } -/* L110: */ - } - - i__1 = *ilast; - for (i__ = *ifirst + 1; i__ <= i__1; ++i__) { - k = i__ << 1; - ii = i__ - *offset; -/* Computing MAX */ - d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1]; - wgap[ii - 1] = fmax(d__1,d__2); -/* L111: */ - } - return 0; - -/* End of ODRRB */ - -} /* odrrb_ */ diff --git a/external/pmrrr/src/lapack/odrrc.c b/external/pmrrr/src/lapack/odrrc.c deleted file mode 100644 index eab853b372..0000000000 --- a/external/pmrrr/src/lapack/odrrc.c +++ /dev/null @@ -1,179 +0,0 @@ -/* dlarrc.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - - -/* Subroutine */ -int odrrc(char *jobt, int *n, double *vl, - double *vu, double *d__, double *e, double *pivmin, - int *eigcnt, int *lcnt, int *rcnt, int *info) -{ - /* System generated locals */ - int i__1; - double d__1; - - /* Local variables */ - int i__; - double sl, su, tmp, tmp2; - int matt; - extern int olsame(char *, char *); - double lpivot, rpivot; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* Find the number of eigenvalues of the symmetric tridiagonal matrix T */ -/* that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T */ -/* if JOBT = 'L'. */ - -/* Arguments */ -/* ========= */ - -/* JOBT (input) CHARACTER*1 */ -/* = 'T': Compute Sturm count for matrix T. */ -/* = 'L': Compute Sturm count for matrix L D L^T. */ - -/* N (input) INT */ -/* The order of the matrix. N > 0. */ - -/* VL (input) DOUBLE PRECISION */ -/* VU (input) DOUBLE PRECISION */ -/* The lower and upper bounds for the eigenvalues. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. */ -/* JOBT = 'L': The N diagonal elements of the diagonal matrix D. */ - -/* E (input) DOUBLE PRECISION array, dimension (N) */ -/* JOBT = 'T': The N-1 offdiagonal elements of the matrix T. */ -/* JOBT = 'L': The N-1 offdiagonal elements of the matrix L. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot in the Sturm sequence for T. */ - -/* EIGCNT (output) INT */ -/* The number of eigenvalues of the symmetric tridiagonal matrix T */ -/* that are in the interval (VL,VU] */ - -/* LCNT (output) INT */ -/* RCNT (output) INT */ -/* The left and right negcounts of the interval. */ - -/* INFO (output) INT */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --e; - --d__; - - /* Function Body */ - *info = 0; - *lcnt = 0; - *rcnt = 0; - *eigcnt = 0; - matt = olsame(jobt, "T"); - if (matt) { -/* Sturm sequence count on T */ - lpivot = d__[1] - *vl; - rpivot = d__[1] - *vu; - if (lpivot <= 0.) { - ++(*lcnt); - } - if (rpivot <= 0.) { - ++(*rcnt); - } - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing 2nd power */ - d__1 = e[i__]; - tmp = d__1 * d__1; - lpivot = d__[i__ + 1] - *vl - tmp / lpivot; - rpivot = d__[i__ + 1] - *vu - tmp / rpivot; - if (lpivot <= 0.) { - ++(*lcnt); - } - if (rpivot <= 0.) { - ++(*rcnt); - } -/* L10: */ - } - } else { -/* Sturm sequence count on L D L^T */ - sl = -(*vl); - su = -(*vu); - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - lpivot = d__[i__] + sl; - rpivot = d__[i__] + su; - if (lpivot <= 0.) { - ++(*lcnt); - } - if (rpivot <= 0.) { - ++(*rcnt); - } - tmp = e[i__] * d__[i__] * e[i__]; - - tmp2 = tmp / lpivot; - if (tmp2 == 0.) { - sl = tmp - *vl; - } else { - sl = sl * tmp2 - *vl; - } - - tmp2 = tmp / rpivot; - if (tmp2 == 0.) { - su = tmp - *vu; - } else { - su = su * tmp2 - *vu; - } -/* L20: */ - } - lpivot = d__[*n] + sl; - rpivot = d__[*n] + su; - if (lpivot <= 0.) { - ++(*lcnt); - } - if (rpivot <= 0.) { - ++(*rcnt); - } - } - *eigcnt = *rcnt - *lcnt; - return 0; - -/* end of ODRRC */ - -} /* odrrc_ */ diff --git a/external/pmrrr/src/lapack/odrrd.c b/external/pmrrr/src/lapack/odrrd.c deleted file mode 100644 index 32bef21c97..0000000000 --- a/external/pmrrr/src/lapack/odrrd.c +++ /dev/null @@ -1,793 +0,0 @@ -/* dlarrd.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define imax(a,b) ( (a) > (b) ? (a) : (b) ) -#define imin(a,b) ( (a) < (b) ? (a) : (b) ) -#define TRUE_ (1) -#define FALSE_ (0) - -/* Table of constant values */ -static int c__1 = 1; -/* -static int c_n1 = -1; -*/ -static int c__3 = 3; -static int c__2 = 2; -static int c__0 = 0; - -/* Subroutine */ -int odrrd(char *range, char *order, int *n, double *vl, - double *vu, int *il, int *iu, double *gers, - double *reltol, double *d__, double *e, double *e2, - double *pivmin, int *nsplit, int *isplit, int *m, - double *w, double *werr, double *wl, double *wu, - int *iblock, int *indexw, double *work, int *iwork, - int *info) -{ - /* System generated locals */ - int i__1, i__2, i__3; - double d__1, d__2; - - /* Builtin functions */ - // double log(double); - - /* Local variables */ - int i__, j, ib, ie, je, nb; - double gl; - int im, in; - double gu; - int iw, jee; - double eps; - int nwl; - double wlu, wul; - int nwu; - double tmp1, tmp2; - int iend, jblk, ioff, iout, itmp1, itmp2, jdisc; - extern int olsame(char *, char *); - int iinfo; - double atoli; - int iwoff, itmax; - double wkill, rtoli, uflow, tnorm; - // extern double dlamch(char *); - int ibegin; - extern /* Subroutine */ int odebz(int *, int *, int *, - int *, int *, int *, double *, double *, - double *, double *, double *, double *, int *, - double *, double *, int *, int *, double *, - int *, int *); - int irange, idiscl, idumma[1]; - /* extern int ilaenv(int *, char *, char *, int *, int *, */ - /* int *, int *); */ - int idiscu; - int ncnvrg, toofew; - - -/* -- LAPACK auxiliary routine (version 3.2.1) -- */ -/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ -/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ -/* -- April 2009 -- */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODRRD computes the eigenvalues of a symmetric tridiagonal */ -/* matrix T to suitable accuracy. This is an auxiliary code to be */ -/* called from DSTEMR. */ -/* The user may ask for all eigenvalues, all eigenvalues */ -/* in the half-open interval (VL, VU], or the IL-th through IU-th */ -/* eigenvalues. */ - -/* To avoid overflow, the matrix must be scaled so that its */ -/* largest element is no greater than overflow**(1/2) * */ -/* underflow**(1/4) in absolute value, and for greatest */ -/* accuracy, it should not be much smaller than that. */ - -/* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ -/* Matrix", Report CS41, Computer Science Dept., Stanford */ -/* University, July 21, 1966. */ - -/* Arguments */ -/* ========= */ - -/* RANGE (input) CHARACTER */ -/* = 'A': ("All") all eigenvalues will be found. */ -/* = 'V': ("Value") all eigenvalues in the half-open interval */ -/* (VL, VU] will be found. */ -/* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ -/* entire matrix) will be found. */ - -/* ORDER (input) CHARACTER */ -/* = 'B': ("By Block") the eigenvalues will be grouped by */ -/* split-off block (see IBLOCK, ISPLIT) and */ -/* ordered from smallest to largest within */ -/* the block. */ -/* = 'E': ("Entire matrix") */ -/* the eigenvalues for the entire matrix */ -/* will be ordered from smallest to */ -/* largest. */ - -/* N (input) INT */ -/* The order of the tridiagonal matrix T. N >= 0. */ - -/* VL (input) DOUBLE PRECISION */ -/* VU (input) DOUBLE PRECISION */ -/* If RANGE='V', the lower and upper bounds of the interval to */ -/* be searched for eigenvalues. Eigenvalues less than or equal */ -/* to VL, or greater than VU, will not be returned. VL < VU. */ -/* Not referenced if RANGE = 'A' or 'I'. */ - -/* IL (input) INT */ -/* IU (input) INT */ -/* If RANGE='I', the indices (in ascending order) of the */ -/* smallest and largest eigenvalues to be returned. */ -/* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ -/* Not referenced if RANGE = 'A' or 'V'. */ - -/* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ -/* The N Gerschgorin intervals (the i-th Gerschgorin interval */ -/* is (GERS(2*i-1), GERS(2*i)). */ - -/* RELTOL (input) DOUBLE PRECISION */ -/* The minimum relative width of an interval. When an interval */ -/* is narrower than RELTOL times the larger (in */ -/* magnitude) endpoint, then it is considered to be */ -/* sufficiently small, i.e., converged. Note: this should */ -/* always be at least radix*machine epsilon. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The n diagonal elements of the tridiagonal matrix T. */ - -/* E (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (n-1) off-diagonal elements of the tridiagonal matrix T. */ - -/* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot allowed in the Sturm sequence for T. */ - -/* NSPLIT (input) INT */ -/* The number of diagonal blocks in the matrix T. */ -/* 1 <= NSPLIT <= N. */ - -/* ISPLIT (input) INT array, dimension (N) */ -/* The splitting points, at which T breaks up into submatrices. */ -/* The first submatrix consists of rows/columns 1 to ISPLIT(1), */ -/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ -/* etc., and the NSPLIT-th consists of rows/columns */ -/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ -/* (Only the first NSPLIT elements will actually be used, but */ -/* since the user cannot know a priori what value NSPLIT will */ -/* have, N words must be reserved for ISPLIT.) */ - -/* M (output) INT */ -/* The actual number of eigenvalues found. 0 <= M <= N. */ -/* (See also the description of INFO=2,3.) */ - -/* W (output) DOUBLE PRECISION array, dimension (N) */ -/* On exit, the first M elements of W will contain the */ -/* eigenvalue approximations. ODRRD computes an interval */ -/* I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue */ -/* approximation is given as the interval midpoint */ -/* W(j)= ( a_j + b_j)/2. The corresponding error is bounded by */ -/* WERR(j) = abs( a_j - b_j)/2 */ - -/* WERR (output) DOUBLE PRECISION array, dimension (N) */ -/* The error bound on the corresponding eigenvalue approximation */ -/* in W. */ - -/* WL (output) DOUBLE PRECISION */ -/* WU (output) DOUBLE PRECISION */ -/* The interval (WL, WU] contains all the wanted eigenvalues. */ -/* If RANGE='V', then WL=VL and WU=VU. */ -/* If RANGE='A', then WL and WU are the global Gerschgorin bounds */ -/* on the spectrum. */ -/* If RANGE='I', then WL and WU are computed by ODEBZ from the */ -/* index range specified. */ - -/* IBLOCK (output) INT array, dimension (N) */ -/* At each row/column j where E(j) is zero or small, the */ -/* matrix T is considered to split into a block diagonal */ -/* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which */ -/* block (from 1 to the number of blocks) the eigenvalue W(i) */ -/* belongs. (ODRRD may use the remaining N-M elements as */ -/* workspace.) */ - -/* INDEXW (output) INT array, dimension (N) */ -/* The indices of the eigenvalues within each block (submatrix); */ -/* for example, INDEXW(i)= j and IBLOCK(i)=k imply that the */ -/* i-th eigenvalue W(i) is the j-th eigenvalue in block k. */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ - -/* IWORK (workspace) INT array, dimension (3*N) */ - -/* INFO (output) INT */ -/* = 0: successful exit */ -/* < 0: if INFO = -i, the i-th argument had an illegal value */ -/* > 0: some or all of the eigenvalues failed to converge or */ -/* were not computed: */ -/* =1 or 3: Bisection failed to converge for some */ -/* eigenvalues; these eigenvalues are flagged by a */ -/* negative block number. The effect is that the */ -/* eigenvalues may not be as accurate as the */ -/* absolute and relative tolerances. This is */ -/* generally caused by unexpectedly inaccurate */ -/* arithmetic. */ -/* =2 or 3: RANGE='I' only: Not all of the eigenvalues */ -/* IL:IU were found. */ -/* Effect: M < IU+1-IL */ -/* Cause: non-monotonic arithmetic, causing the */ -/* Sturm sequence to be non-monotonic. */ -/* Cure: recalculate, using RANGE='A', and pick */ -/* out eigenvalues IL:IU. In some cases, */ -/* increasing the PARAMETER "FUDGE" may */ -/* make things work. */ -/* = 4: RANGE='I', and the Gershgorin interval */ -/* initially used was too small. No eigenvalues */ -/* were computed. */ -/* Probable cause: your machine has sloppy */ -/* floating-point arithmetic. */ -/* Cure: Increase the PARAMETER "FUDGE", */ -/* recompile, and try again. */ - -/* Internal Parameters */ -/* =================== */ - -/* FUDGE DOUBLE PRECISION, default = 2 */ -/* A "fudge factor" to widen the Gershgorin intervals. Ideally, */ -/* a value of 1 should work, but on machines with sloppy */ -/* arithmetic, this needs to be larger. The default for */ -/* publicly released versions should be large enough to handle */ -/* the worst machine around. Note that this has no effect */ -/* on accuracy of the solution. */ - -/* Based on contributions by */ -/* W. Kahan, University of California, Berkeley, USA */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Local Arrays .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --iwork; - --work; - --indexw; - --iblock; - --werr; - --w; - --isplit; - --e2; - --e; - --d__; - --gers; - - /* Function Body */ - *info = 0; - -/* Decode RANGE */ - - if (olsame(range, "A")) { - irange = 1; - } else if (olsame(range, "V")) { - irange = 2; - } else if (olsame(range, "I")) { - irange = 3; - } else { - irange = 0; - } - -/* Check for Errors */ - - if (irange <= 0) { - *info = -1; - } else if (! (olsame(order, "B") || olsame(order, "E"))) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (irange == 2) { - if (*vl >= *vu) { - *info = -5; - } - } else if (irange == 3 && (*il < 1 || *il > imax(1,*n))) { - *info = -6; - } else if (irange == 3 && (*iu < imin(*n,*il) || *iu > *n)) { - *info = -7; - } - - if (*info != 0) { - return 0; - } -/* Initialize error flags */ - *info = 0; - ncnvrg = FALSE_; - toofew = FALSE_; -/* Quick return if possible */ - *m = 0; - if (*n == 0) { - return 0; - } -/* Simplification: */ - if (irange == 3 && *il == 1 && *iu == *n) { - irange = 1; - } -/* Get machine constants */ - eps = DBL_EPSILON; // odmch("P"); - uflow = DBL_MIN; // odmch("U"); -/* Special Case when N=1 */ -/* Treat case of 1x1 matrix for quick return */ - if (*n == 1) { - if (irange == 1 || irange == 2 && d__[1] > *vl && d__[1] <= *vu || - irange == 3 && *il == 1 && *iu == 1) { - *m = 1; - w[1] = d__[1]; -/* The computation error of the eigenvalue is zero */ - werr[1] = 0.; - iblock[1] = 1; - indexw[1] = 1; - } - return 0; - } -/* NB is the minimum vector length for vector bisection, or 0 */ -/* if only scalar is to be done. */ - nb = 1; // ilaenv(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1); - if (nb <= 1) { - nb = 0; - } -/* Find global spectral radius */ - gl = d__[1]; - gu = d__[1]; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MIN */ - d__1 = gl, d__2 = gers[(i__ << 1) - 1]; - gl = fmin(d__1,d__2); -/* Computing MAX */ - d__1 = gu, d__2 = gers[i__ * 2]; - gu = fmax(d__1,d__2); -/* L5: */ - } -/* Compute global Gerschgorin bounds and spectral diameter */ -/* Computing MAX */ - d__1 = fabs(gl), d__2 = fabs(gu); - tnorm = fmax(d__1,d__2); - gl = gl - tnorm * 2. * eps * *n - *pivmin * 4.; - gu = gu + tnorm * 2. * eps * *n + *pivmin * 4.; -/* [JAN/28/2009] remove the line below since SPDIAM variable not use */ -/* SPDIAM = GU - GL */ -/* Input arguments for ODEBZ: */ -/* The relative tolerance. An interval (a,b] lies within */ -/* "relative tolerance" if b-a < RELTOL*max(|a|,|b|), */ - rtoli = *reltol; -/* Set the absolute tolerance for interval convergence to zero to force */ -/* interval convergence based on relative size of the interval. */ -/* This is dangerous because intervals might not converge when RELTOL is */ -/* small. But at least a very small number should be selected so that for */ -/* strongly graded matrices, the code can get relatively accurate */ -/* eigenvalues. */ - atoli = uflow * 4. + *pivmin * 4.; - if (irange == 3) { -/* RANGE='I': Compute an interval containing eigenvalues */ -/* IL through IU. The initial interval [GL,GU] from the global */ -/* Gerschgorin bounds GL and GU is refined by ODEBZ. */ - itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + - 2; - work[*n + 1] = gl; - work[*n + 2] = gl; - work[*n + 3] = gu; - work[*n + 4] = gu; - work[*n + 5] = gl; - work[*n + 6] = gu; - iwork[1] = -1; - iwork[2] = -1; - iwork[3] = *n + 1; - iwork[4] = *n + 1; - iwork[5] = *il - 1; - iwork[6] = *iu; - - odebz(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, pivmin, & - d__[1], &e[1], &e2[1], &iwork[5], &work[*n + 1], &work[*n + 5] -, &iout, &iwork[1], &w[1], &iblock[1], &iinfo); - if (iinfo != 0) { - *info = iinfo; - return 0; - } -/* On exit, output intervals may not be ordered by ascending negcount */ - if (iwork[6] == *iu) { - *wl = work[*n + 1]; - wlu = work[*n + 3]; - nwl = iwork[1]; - *wu = work[*n + 4]; - wul = work[*n + 2]; - nwu = iwork[4]; - } else { - *wl = work[*n + 2]; - wlu = work[*n + 4]; - nwl = iwork[2]; - *wu = work[*n + 3]; - wul = work[*n + 1]; - nwu = iwork[3]; - } -/* On exit, the interval [WL, WLU] contains a value with negcount NWL, */ -/* and [WUL, WU] contains a value with negcount NWU. */ - if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) { - *info = 4; - return 0; - } - } else if (irange == 2) { - *wl = *vl; - *wu = *vu; - } else if (irange == 1) { - *wl = gl; - *wu = gu; - } -/* Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. */ -/* NWL accumulates the number of eigenvalues .le. WL, */ -/* NWU accumulates the number of eigenvalues .le. WU */ - *m = 0; - iend = 0; - *info = 0; - nwl = 0; - nwu = 0; - - i__1 = *nsplit; - for (jblk = 1; jblk <= i__1; ++jblk) { - ioff = iend; - ibegin = ioff + 1; - iend = isplit[jblk]; - in = iend - ioff; - - if (in == 1) { -/* 1x1 block */ - if (*wl >= d__[ibegin] - *pivmin) { - ++nwl; - } - if (*wu >= d__[ibegin] - *pivmin) { - ++nwu; - } - if (irange == 1 || *wl < d__[ibegin] - *pivmin && *wu >= d__[ - ibegin] - *pivmin) { - ++(*m); - w[*m] = d__[ibegin]; - werr[*m] = 0.; -/* The gap for a single block doesn't matter for the later */ -/* algorithm and is assigned an arbitrary large value */ - iblock[*m] = jblk; - indexw[*m] = 1; - } -/* Disabled 2x2 case because of a failure on the following matrix */ -/* RANGE = 'I', IL = IU = 4 */ -/* Original Tridiagonal, d = [ */ -/* -0.150102010615740E+00 */ -/* -0.849897989384260E+00 */ -/* -0.128208148052635E-15 */ -/* 0.128257718286320E-15 */ -/* ]; */ -/* e = [ */ -/* -0.357171383266986E+00 */ -/* -0.180411241501588E-15 */ -/* -0.175152352710251E-15 */ -/* ]; */ - -/* ELSE IF( IN.EQ.2 ) THEN */ -/* * 2x2 block */ -/* DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) */ -/* TMP1 = HALF*(D(IBEGIN)+D(IEND)) */ -/* L1 = TMP1 - DISC */ -/* IF( WL.GE. L1-PIVMIN ) */ -/* $ NWL = NWL + 1 */ -/* IF( WU.GE. L1-PIVMIN ) */ -/* $ NWU = NWU + 1 */ -/* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. */ -/* $ L1-PIVMIN ) ) THEN */ -/* M = M + 1 */ -/* W( M ) = L1 */ -/* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ -/* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ -/* IBLOCK( M ) = JBLK */ -/* INDEXW( M ) = 1 */ -/* ENDIF */ -/* L2 = TMP1 + DISC */ -/* IF( WL.GE. L2-PIVMIN ) */ -/* $ NWL = NWL + 1 */ -/* IF( WU.GE. L2-PIVMIN ) */ -/* $ NWU = NWU + 1 */ -/* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. */ -/* $ L2-PIVMIN ) ) THEN */ -/* M = M + 1 */ -/* W( M ) = L2 */ -/* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ -/* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ -/* IBLOCK( M ) = JBLK */ -/* INDEXW( M ) = 2 */ -/* ENDIF */ - } else { -/* General Case - block of size IN >= 2 */ -/* Compute local Gerschgorin interval and use it as the initial */ -/* interval for ODEBZ */ - gu = d__[ibegin]; - gl = d__[ibegin]; - tmp1 = 0.; - i__2 = iend; - for (j = ibegin; j <= i__2; ++j) { -/* Computing MIN */ - d__1 = gl, d__2 = gers[(j << 1) - 1]; - gl = fmin(d__1,d__2); -/* Computing MAX */ - d__1 = gu, d__2 = gers[j * 2]; - gu = fmax(d__1,d__2); -/* L40: */ - } -/* [JAN/28/2009] */ -/* change SPDIAM by TNORM in lines 2 and 3 thereafter */ -/* line 1: remove computation of SPDIAM (not useful anymore) */ -/* SPDIAM = GU - GL */ -/* GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN */ -/* GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN */ - gl = gl - tnorm * 2. * eps * in - *pivmin * 2.; - gu = gu + tnorm * 2. * eps * in + *pivmin * 2.; - - if (irange > 1) { - if (gu < *wl) { -/* the local block contains none of the wanted eigenvalues */ - nwl += in; - nwu += in; - goto L70; - } -/* refine search interval if possible, only range (WL,WU] matters */ - gl = fmax(gl,*wl); - gu = fmin(gu,*wu); - if (gl >= gu) { - goto L70; - } - } -/* Find negcount of initial interval boundaries GL and GU */ - work[*n + 1] = gl; - work[*n + in + 1] = gu; - odebz(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, - pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & - work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], & - w[*m + 1], &iblock[*m + 1], &iinfo); - if (iinfo != 0) { - *info = iinfo; - return 0; - } - - nwl += iwork[1]; - nwu += iwork[in + 1]; - iwoff = *m - iwork[1]; -/* Compute Eigenvalues */ - itmax = (int) ((log(gu - gl + *pivmin) - log(*pivmin)) / log( - 2.)) + 2; - odebz(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, - pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & - work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], - &w[*m + 1], &iblock[*m + 1], &iinfo); - if (iinfo != 0) { - *info = iinfo; - return 0; - } - -/* Copy eigenvalues into W and IBLOCK */ -/* Use -JBLK for block number for unconverged eigenvalues. */ -/* Loop over the number of output intervals from ODEBZ */ - i__2 = iout; - for (j = 1; j <= i__2; ++j) { -/* eigenvalue approximation is middle point of interval */ - tmp1 = (work[j + *n] + work[j + in + *n]) * .5; -/* semi length of error interval */ - tmp2 = (d__1 = work[j + *n] - work[j + in + *n], fabs(d__1)) * - .5; - if (j > iout - iinfo) { -/* Flag non-convergence. */ - ncnvrg = TRUE_; - ib = -jblk; - } else { - ib = jblk; - } - i__3 = iwork[j + in] + iwoff; - for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) { - w[je] = tmp1; - werr[je] = tmp2; - indexw[je] = je - iwoff; - iblock[je] = ib; -/* L50: */ - } -/* L60: */ - } - - *m += im; - } -L70: - ; - } -/* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */ -/* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */ - if (irange == 3) { - idiscl = *il - 1 - nwl; - idiscu = nwu - *iu; - - if (idiscl > 0) { - im = 0; - i__1 = *m; - for (je = 1; je <= i__1; ++je) { -/* Remove some of the smallest eigenvalues from the left so that */ -/* at the end IDISCL =0. Move all eigenvalues up to the left. */ - if (w[je] <= wlu && idiscl > 0) { - --idiscl; - } else { - ++im; - w[im] = w[je]; - werr[im] = werr[je]; - indexw[im] = indexw[je]; - iblock[im] = iblock[je]; - } -/* L80: */ - } - *m = im; - } - if (idiscu > 0) { -/* Remove some of the largest eigenvalues from the right so that */ -/* at the end IDISCU =0. Move all eigenvalues up to the left. */ - im = *m + 1; - for (je = *m; je >= 1; --je) { - if (w[je] >= wul && idiscu > 0) { - --idiscu; - } else { - --im; - w[im] = w[je]; - werr[im] = werr[je]; - indexw[im] = indexw[je]; - iblock[im] = iblock[je]; - } -/* L81: */ - } - jee = 0; - i__1 = *m; - for (je = im; je <= i__1; ++je) { - ++jee; - w[jee] = w[je]; - werr[jee] = werr[je]; - indexw[jee] = indexw[je]; - iblock[jee] = iblock[je]; -/* L82: */ - } - *m = *m - im + 1; - } - if (idiscl > 0 || idiscu > 0) { -/* Code to deal with effects of bad arithmetic. (If N(w) is */ -/* monotone non-decreasing, this should never happen.) */ -/* Some low eigenvalues to be discarded are not in (WL,WLU], */ -/* or high eigenvalues to be discarded are not in (WUL,WU] */ -/* so just kill off the smallest IDISCL/largest IDISCU */ -/* eigenvalues, by marking the corresponding IBLOCK = 0 */ - if (idiscl > 0) { - wkill = *wu; - i__1 = idiscl; - for (jdisc = 1; jdisc <= i__1; ++jdisc) { - iw = 0; - i__2 = *m; - for (je = 1; je <= i__2; ++je) { - if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) { - iw = je; - wkill = w[je]; - } -/* L90: */ - } - iblock[iw] = 0; -/* L100: */ - } - } - if (idiscu > 0) { - wkill = *wl; - i__1 = idiscu; - for (jdisc = 1; jdisc <= i__1; ++jdisc) { - iw = 0; - i__2 = *m; - for (je = 1; je <= i__2; ++je) { - if (iblock[je] != 0 && (w[je] >= wkill || iw == 0)) { - iw = je; - wkill = w[je]; - } -/* L110: */ - } - iblock[iw] = 0; -/* L120: */ - } - } -/* Now erase all eigenvalues with IBLOCK set to zero */ - im = 0; - i__1 = *m; - for (je = 1; je <= i__1; ++je) { - if (iblock[je] != 0) { - ++im; - w[im] = w[je]; - werr[im] = werr[je]; - indexw[im] = indexw[je]; - iblock[im] = iblock[je]; - } -/* L130: */ - } - *m = im; - } - if (idiscl < 0 || idiscu < 0) { - toofew = TRUE_; - } - } - - if (irange == 1 && *m != *n || irange == 3 && *m != *iu - *il + 1) { - toofew = TRUE_; - } -/* If ORDER='B', do nothing the eigenvalues are already sorted by */ -/* block. */ -/* If ORDER='E', sort the eigenvalues from smallest to largest */ - if (olsame(order, "E") && *nsplit > 1) { - i__1 = *m - 1; - for (je = 1; je <= i__1; ++je) { - ie = 0; - tmp1 = w[je]; - i__2 = *m; - for (j = je + 1; j <= i__2; ++j) { - if (w[j] < tmp1) { - ie = j; - tmp1 = w[j]; - } -/* L140: */ - } - if (ie != 0) { - tmp2 = werr[ie]; - itmp1 = iblock[ie]; - itmp2 = indexw[ie]; - w[ie] = w[je]; - werr[ie] = werr[je]; - iblock[ie] = iblock[je]; - indexw[ie] = indexw[je]; - w[je] = tmp1; - werr[je] = tmp2; - iblock[je] = itmp1; - indexw[je] = itmp2; - } -/* L150: */ - } - } - - *info = 0; - if (ncnvrg) { - ++(*info); - } - if (toofew) { - *info += 2; - } - return 0; - -/* End of ODRRD */ - -} /* odrrd_ */ diff --git a/external/pmrrr/src/lapack/odrre.c b/external/pmrrr/src/lapack/odrre.c deleted file mode 100644 index 05e74171bf..0000000000 --- a/external/pmrrr/src/lapack/odrre.c +++ /dev/null @@ -1,858 +0,0 @@ -/* dlarre.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Table of constant values */ -static int c__1 = 1; -static int c__2 = 2; -#define TRUE_ (1) -#define FALSE_ (0) - -/* Subroutine */ -int odrre(char *range, int *n, double *vl, - double *vu, int *il, int *iu, double *d__, double *e, - double *e2, double *rtol1, double *rtol2, double *spltol, - int *nsplit, int *isplit, int *m, double *w, - double *werr, double *wgap, int *iblock, int *indexw, - double *gers, double *pivmin, double *work, int * - iwork, int *info) -{ - /* System generated locals */ - int i__1, i__2; - double d__1, d__2, d__3; - - /* Builtin functions */ - // double sqrt(double), log(double); - - /* Local variables */ - int i__, j; - double s1, s2; - int mb; - double gl; - int in, mm; - double gu; - int cnt; - double eps, tau, tmp, rtl; - int cnt1, cnt2; - double tmp1, eabs; - int iend, jblk; - double eold; - int indl; - double dmax__, emax; - int wend, idum, indu; - double rtol; - int iseed[4]; - double avgap, sigma; - extern int olsame(char *, char *); - int iinfo; - extern /* Subroutine */ int odcpy(int *, double *, int *, - double *, int *); - long double norep; - extern /* Subroutine */ int odsq2(int *, double *, int *); - // extern double odmch(char *); - int ibegin; - long double forceb; - int irange; - double sgndef; - extern /* Subroutine */ int odrra(int *, double *, double *, - double *, double *, double *, int *, int *, - int *), odrrb(int *, double *, double *, - int *, int *, double *, double *, int *, - double *, double *, double *, double *, int *, - double *, double *, int *, int *), odrrc(char * -, int *, double *, double *, double *, double - *, double *, int *, int *, int *, int *); - int wbegin; - extern /* Subroutine */ int odrrd(char *, char *, int *, double - *, double *, int *, int *, double *, double *, - double *, double *, double *, double *, int * -, int *, int *, double *, double *, double *, - double *, int *, int *, double *, int *, - int *); - double safmin, spdiam; - extern /* Subroutine */ int odrrk(int *, int *, double *, - double *, double *, double *, double *, - double *, double *, double *, int *); - long double usedqd; - double clwdth, isleft; - extern /* Subroutine */ int odrnv(int *, int *, int *, - double *); - double isrght, bsrtol, dpivot; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* To find the desired eigenvalues of a given real symmetric */ -/* tridiagonal matrix T, ODRRE sets any "small" off-diagonal */ -/* elements to zero, and for each unreduced block T_i, it finds */ -/* (a) a suitable shift at one end of the block's spectrum, */ -/* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */ -/* (c) eigenvalues of each L_i D_i L_i^T. */ -/* The representations and eigenvalues found are then used by */ -/* DSTEMR to compute the eigenvectors of T. */ -/* The accuracy varies depending on whether bisection is used to */ -/* find a few eigenvalues or the dqds algorithm (subroutine ODSQ2) to */ -/* conpute all and then discard any unwanted one. */ -/* As an added benefit, ODRRE also outputs the n */ -/* Gerschgorin intervals for the matrices L_i D_i L_i^T. */ - -/* Arguments */ -/* ========= */ - -/* RANGE (input) CHARACTER */ -/* = 'A': ("All") all eigenvalues will be found. */ -/* = 'V': ("Value") all eigenvalues in the half-open interval */ -/* (VL, VU] will be found. */ -/* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ -/* entire matrix) will be found. */ - -/* N (input) INT */ -/* The order of the matrix. N > 0. */ - -/* VL (input/output) DOUBLE PRECISION */ -/* VU (input/output) DOUBLE PRECISION */ -/* If RANGE='V', the lower and upper bounds for the eigenvalues. */ -/* Eigenvalues less than or equal to VL, or greater than VU, */ -/* will not be returned. VL < VU. */ -/* If RANGE='I' or ='A', ODRRE computes bounds on the desired */ -/* part of the spectrum. */ - -/* IL (input) INT */ -/* IU (input) INT */ -/* If RANGE='I', the indices (in ascending order) of the */ -/* smallest and largest eigenvalues to be returned. */ -/* 1 <= IL <= IU <= N. */ - -/* D (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the N diagonal elements of the tridiagonal */ -/* matrix T. */ -/* On exit, the N diagonal elements of the diagonal */ -/* matrices D_i. */ - -/* E (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the first (N-1) entries contain the subdiagonal */ -/* elements of the tridiagonal matrix T; E(N) need not be set. */ -/* On exit, E contains the subdiagonal elements of the unit */ -/* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */ -/* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */ - -/* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the first (N-1) entries contain the SQUARES of the */ -/* subdiagonal elements of the tridiagonal matrix T; */ -/* E2(N) need not be set. */ -/* On exit, the entries E2( ISPLIT( I ) ), */ -/* 1 <= I <= NSPLIT, have been set to zero */ - -/* RTOL1 (input) DOUBLE PRECISION */ -/* RTOL2 (input) DOUBLE PRECISION */ -/* Parameters for bisection. */ -/* An interval [LEFT,RIGHT] has converged if */ -/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ - -/* SPLTOL (input) DOUBLE PRECISION */ -/* The threshold for splitting. */ - -/* NSPLIT (output) INT */ -/* The number of blocks T splits into. 1 <= NSPLIT <= N. */ - -/* ISPLIT (output) INT array, dimension (N) */ -/* The splitting points, at which T breaks up into blocks. */ -/* The first block consists of rows/columns 1 to ISPLIT(1), */ -/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ -/* etc., and the NSPLIT-th consists of rows/columns */ -/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ - -/* M (output) INT */ -/* The total number of eigenvalues (of all L_i D_i L_i^T) */ -/* found. */ - -/* W (output) DOUBLE PRECISION array, dimension (N) */ -/* The first M elements contain the eigenvalues. The */ -/* eigenvalues of each of the blocks, L_i D_i L_i^T, are */ -/* sorted in ascending order ( ODRRE may use the */ -/* remaining N-M elements as workspace). */ - -/* WERR (output) DOUBLE PRECISION array, dimension (N) */ -/* The error bound on the corresponding eigenvalue in W. */ - -/* WGAP (output) DOUBLE PRECISION array, dimension (N) */ -/* The separation from the right neighbor eigenvalue in W. */ -/* The gap is only with respect to the eigenvalues of the same block */ -/* as each block has its own representation tree. */ -/* Exception: at the right end of a block we store the left gap */ - -/* IBLOCK (output) INT array, dimension (N) */ -/* The indices of the blocks (submatrices) associated with the */ -/* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ -/* W(i) belongs to the first block from the top, =2 if W(i) */ -/* belongs to the second block, etc. */ - -/* INDEXW (output) INT array, dimension (N) */ -/* The indices of the eigenvalues within each block (submatrix); */ -/* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ -/* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */ - -/* GERS (output) DOUBLE PRECISION array, dimension (2*N) */ -/* The N Gerschgorin intervals (the i-th Gerschgorin interval */ -/* is (GERS(2*i-1), GERS(2*i)). */ - -/* PIVMIN (output) DOUBLE PRECISION */ -/* The minimum pivot in the Sturm sequence for T. */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ -/* Workspace. */ - -/* IWORK (workspace) INT array, dimension (5*N) */ -/* Workspace. */ - -/* INFO (output) INT */ -/* = 0: successful exit */ -/* > 0: A problem occured in ODRRE. */ -/* < 0: One of the called subroutines signaled an internal problem. */ -/* Needs inspection of the corresponding parameter IINFO */ -/* for further information. */ - -/* =-1: Problem in ODRRD. */ -/* = 2: No base representation could be found in MAXTRY iterations. */ -/* Increasing MAXTRY and recompilation might be a remedy. */ -/* =-3: Problem in ODRRB when computing the refined root */ -/* representation for ODSQ2. */ -/* =-4: Problem in ODRRB when preforming bisection on the */ -/* desired part of the spectrum. */ -/* =-5: Problem in ODSQ2. */ -/* =-6: Problem in ODSQ2. */ - -/* Further Details */ -/* The base representations are required to suffer very little */ -/* element growth and consequently define all their eigenvalues to */ -/* high relative accuracy. */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Local Arrays .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --iwork; - --work; - --gers; - --indexw; - --iblock; - --wgap; - --werr; - --w; - --isplit; - --e2; - --e; - --d__; - - /* Function Body */ - *info = 0; - -/* Decode RANGE */ - - if (olsame(range, "A")) { - irange = 1; - } else if (olsame(range, "V")) { - irange = 3; - } else if (olsame(range, "I")) { - irange = 2; - } - *m = 0; -/* Get machine constants */ - safmin = DBL_MIN; // odmch("S"); - eps = DBL_EPSILON; // odmch("P"); -/* Set parameters */ - rtl = sqrt(eps); - bsrtol = sqrt(eps); -/* Treat case of 1x1 matrix for quick return */ - if (*n == 1) { - if (irange == 1 || irange == 3 && d__[1] > *vl && d__[1] <= *vu || - irange == 2 && *il == 1 && *iu == 1) { - *m = 1; - w[1] = d__[1]; -/* The computation error of the eigenvalue is zero */ - werr[1] = 0.; - wgap[1] = 0.; - iblock[1] = 1; - indexw[1] = 1; - gers[1] = d__[1]; - gers[2] = d__[1]; - } -/* store the shift for the initial RRR, which is zero in this case */ - e[1] = 0.; - return 0; - } -/* General case: tridiagonal matrix of order > 1 */ - -/* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */ -/* Compute maximum off-diagonal entry and pivmin. */ - gl = d__[1]; - gu = d__[1]; - eold = 0.; - emax = 0.; - e[*n] = 0.; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - werr[i__] = 0.; - wgap[i__] = 0.; - eabs = (d__1 = e[i__], fabs(d__1)); - if (eabs >= emax) { - emax = eabs; - } - tmp1 = eabs + eold; - gers[(i__ << 1) - 1] = d__[i__] - tmp1; -/* Computing MIN */ - d__1 = gl, d__2 = gers[(i__ << 1) - 1]; - gl = fmin(d__1,d__2); - gers[i__ * 2] = d__[i__] + tmp1; -/* Computing MAX */ - d__1 = gu, d__2 = gers[i__ * 2]; - gu = fmax(d__1,d__2); - eold = eabs; -/* L5: */ - } -/* The minimum pivot allowed in the Sturm sequence for T */ -/* Computing MAX */ -/* Computing 2nd power */ - d__3 = emax; - d__1 = 1., d__2 = d__3 * d__3; - *pivmin = safmin * fmax(d__1,d__2); -/* Compute spectral diameter. The Gerschgorin bounds give an */ -/* estimate that is wrong by at most a factor of SQRT(2) */ - spdiam = gu - gl; -/* Compute splitting points */ - odrra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], & - iinfo); -/* Can force use of bisection instead of faster DQDS. */ -/* Option left in the code for future multisection work. */ - forceb = FALSE_; -/* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */ -/* explicitly wants bisection. */ - usedqd = irange == 1 && ! forceb; - if (irange == 1 && ! forceb) { -/* Set interval [VL,VU] that contains all eigenvalues */ - *vl = gl; - *vu = gu; - } else { -/* We call ODRRD to find crude approximations to the eigenvalues */ -/* in the desired range. In case IRANGE = INDRNG, we also obtain the */ -/* interval (VL,VU] that contains all the wanted eigenvalues. */ -/* An interval [LEFT,RIGHT] has converged if */ -/* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */ -/* ODRRD needs a WORK of size 4*N, IWORK of size 3*N */ - odrrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[ - 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], - vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo); - if (iinfo != 0) { - *info = -1; - return 0; - } -/* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */ - i__1 = *n; - for (i__ = mm + 1; i__ <= i__1; ++i__) { - w[i__] = 0.; - werr[i__] = 0.; - iblock[i__] = 0; - indexw[i__] = 0; -/* L14: */ - } - } -/* ** */ -/* Loop over unreduced blocks */ - ibegin = 1; - wbegin = 1; - i__1 = *nsplit; - for (jblk = 1; jblk <= i__1; ++jblk) { - iend = isplit[jblk]; - in = iend - ibegin + 1; -/* 1 X 1 block */ - if (in == 1) { - if (irange == 1 || irange == 3 && d__[ibegin] > *vl && d__[ibegin] - <= *vu || irange == 2 && iblock[wbegin] == jblk) { - ++(*m); - w[*m] = d__[ibegin]; - werr[*m] = 0.; -/* The gap for a single block doesn't matter for the later */ -/* algorithm and is assigned an arbitrary large value */ - wgap[*m] = 0.; - iblock[*m] = jblk; - indexw[*m] = 1; - ++wbegin; - } -/* E( IEND ) holds the shift for the initial RRR */ - e[iend] = 0.; - ibegin = iend + 1; - goto L170; - } - -/* Blocks of size larger than 1x1 */ - -/* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */ - e[iend] = 0.; - -/* Find local outer bounds GL,GU for the block */ - gl = d__[ibegin]; - gu = d__[ibegin]; - i__2 = iend; - for (i__ = ibegin; i__ <= i__2; ++i__) { -/* Computing MIN */ - d__1 = gers[(i__ << 1) - 1]; - gl = fmin(d__1,gl); -/* Computing MAX */ - d__1 = gers[i__ * 2]; - gu = fmax(d__1,gu); -/* L15: */ - } - spdiam = gu - gl; - if (! (irange == 1 && ! forceb)) { -/* Count the number of eigenvalues in the current block. */ - mb = 0; - i__2 = mm; - for (i__ = wbegin; i__ <= i__2; ++i__) { - if (iblock[i__] == jblk) { - ++mb; - } else { - goto L21; - } -/* L20: */ - } -L21: - if (mb == 0) { -/* No eigenvalue in the current block lies in the desired range */ -/* E( IEND ) holds the shift for the initial RRR */ - e[iend] = 0.; - ibegin = iend + 1; - goto L170; - } else { -/* Decide whether dqds or bisection is more efficient */ - usedqd = (double) mb > in * .5 && ! forceb; - wend = wbegin + mb - 1; -/* Calculate gaps for the current block */ -/* In later stages, when representations for individual */ -/* eigenvalues are different, we use SIGMA = E( IEND ). */ - sigma = 0.; - i__2 = wend - 1; - for (i__ = wbegin; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + - werr[i__]); - wgap[i__] = fmax(d__1,d__2); -/* L30: */ - } -/* Computing MAX */ - d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); - wgap[wend] = fmax(d__1,d__2); -/* Find local index of the first and last desired evalue. */ - indl = indexw[wbegin]; - indu = indexw[wend]; - } - } - if (irange == 1 && ! forceb || usedqd) { -/* Case of DQDS */ -/* Find approximations to the extremal eigenvalues of the block */ - odrrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & - rtl, &tmp, &tmp1, &iinfo); - if (iinfo != 0) { - *info = -1; - return 0; - } -/* Computing MAX */ - d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, - fabs(d__1)); - isleft = fmax(d__2,d__3); - odrrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & - rtl, &tmp, &tmp1, &iinfo); - if (iinfo != 0) { - *info = -1; - return 0; - } -/* Computing MIN */ - d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, - fabs(d__1)); - isrght = fmin(d__2,d__3); -/* Improve the estimate of the spectral diameter */ - spdiam = isrght - isleft; - } else { -/* Case of bisection */ -/* Find approximations to the wanted extremal eigenvalues */ -/* Computing MAX */ - d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = - w[wbegin] - werr[wbegin], fabs(d__1)); - isleft = fmax(d__2,d__3); -/* Computing MIN */ - d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[ - wend] + werr[wend], fabs(d__1)); - isrght = fmin(d__2,d__3); - } -/* Decide whether the base representation for the current block */ -/* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */ -/* should be on the left or the right end of the current block. */ -/* The strategy is to shift to the end which is "more populated" */ -/* Furthermore, decide whether to use DQDS for the computation of */ -/* the eigenvalue approximations at the end of ODRRE or bisection. */ -/* dqds is chosen if all eigenvalues are desired or the number of */ -/* eigenvalues to be computed is large compared to the blocksize. */ - if (irange == 1 && ! forceb) { -/* If all the eigenvalues have to be computed, we use dqd */ - usedqd = TRUE_; -/* INDL is the local index of the first eigenvalue to compute */ - indl = 1; - indu = in; -/* MB = number of eigenvalues to compute */ - mb = in; - wend = wbegin + mb - 1; -/* Define 1/4 and 3/4 points of the spectrum */ - s1 = isleft + spdiam * .25; - s2 = isrght - spdiam * .25; - } else { -/* ODRRD has computed IBLOCK and INDEXW for each eigenvalue */ -/* approximation. */ -/* choose sigma */ - if (usedqd) { - s1 = isleft + spdiam * .25; - s2 = isrght - spdiam * .25; - } else { - tmp = fmin(isrght,*vu) - fmax(isleft,*vl); - s1 = fmax(isleft,*vl) + tmp * .25; - s2 = fmin(isrght,*vu) - tmp * .25; - } - } -/* Compute the negcount at the 1/4 and 3/4 points */ - if (mb > 1) { - odrrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, & - cnt, &cnt1, &cnt2, &iinfo); - } - if (mb == 1) { - sigma = gl; - sgndef = 1.; - } else if (cnt1 - indl >= indu - cnt2) { - if (irange == 1 && ! forceb) { - sigma = fmax(isleft,gl); - } else if (usedqd) { -/* use Gerschgorin bound as shift to get pos def matrix */ -/* for dqds */ - sigma = isleft; - } else { -/* use approximation of the first desired eigenvalue of the */ -/* block as shift */ - sigma = fmax(isleft,*vl); - } - sgndef = 1.; - } else { - if (irange == 1 && ! forceb) { - sigma = fmin(isrght,gu); - } else if (usedqd) { -/* use Gerschgorin bound as shift to get neg def matrix */ -/* for dqds */ - sigma = isrght; - } else { -/* use approximation of the first desired eigenvalue of the */ -/* block as shift */ - sigma = fmin(isrght,*vu); - } - sgndef = -1.; - } -/* An initial SIGMA has been chosen that will be used for computing */ -/* T - SIGMA I = L D L^T */ -/* Define the increment TAU of the shift in case the initial shift */ -/* needs to be refined to obtain a factorization with not too much */ -/* element growth. */ - if (usedqd) { -/* The initial SIGMA was to the outer end of the spectrum */ -/* the matrix is definite and we need not retreat. */ - tau = spdiam * eps * *n + *pivmin * 2.; - tau = fmax(tau, 2 * eps * fabs(sigma)); - } else { - if (mb > 1) { - clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin]; - avgap = (d__1 = clwdth / (double) (wend - wbegin), fabs( - d__1)); - if (sgndef == 1.) { -/* Computing MAX */ - d__1 = wgap[wbegin]; - tau = fmax(d__1,avgap) * .5; -/* Computing MAX */ - d__1 = tau, d__2 = werr[wbegin]; - tau = fmax(d__1,d__2); - } else { -/* Computing MAX */ - d__1 = wgap[wend - 1]; - tau = fmax(d__1,avgap) * .5; -/* Computing MAX */ - d__1 = tau, d__2 = werr[wend]; - tau = fmax(d__1,d__2); - } - } else { - tau = werr[wbegin]; - } - } - - for (idum = 1; idum <= 6; ++idum) { -/* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */ -/* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */ -/* pivots in WORK(2*IN+1:3*IN) */ - dpivot = d__[ibegin] - sigma; - work[1] = dpivot; - dmax__ = fabs(work[1]); - j = ibegin; - i__2 = in - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[(in << 1) + i__] = 1. / work[i__]; - tmp = e[j] * work[(in << 1) + i__]; - work[in + i__] = tmp; - dpivot = d__[j + 1] - sigma - tmp * e[j]; - work[i__ + 1] = dpivot; -/* Computing MAX */ - d__1 = dmax__, d__2 = fabs(dpivot); - dmax__ = fmax(d__1,d__2); - ++j; -/* L70: */ - } -/* check for element growth */ - if (dmax__ > spdiam * 64.) { - norep = TRUE_; - } else { - norep = FALSE_; - } - if (usedqd && ! norep) { -/* Ensure the definiteness of the representation */ -/* All entries of D (of L D L^T) must have the same sign */ - i__2 = in; - for (i__ = 1; i__ <= i__2; ++i__) { - tmp = sgndef * work[i__]; - if (tmp < 0.) { - norep = TRUE_; - } -/* L71: */ - } - } - if (norep) { -/* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */ -/* shift which makes the matrix definite. So we should end up */ -/* here really only in the case of IRANGE = VALRNG or INDRNG. */ - if (idum == 5) { - if (sgndef == 1.) { -/* The fudged Gerschgorin shift should succeed */ - sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.; - } else { - sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.; - } - } else { - sigma -= sgndef * tau; - tau *= 2.; - } - } else { -/* an initial RRR is found */ - goto L83; - } -/* L80: */ - } -/* if the program reaches this point, no base representation could be */ -/* found in MAXTRY iterations. */ - *info = 2; - return 0; -L83: -/* At this point, we have found an initial base representation */ -/* T - SIGMA I = L D L^T with not too much element growth. */ -/* Store the shift. */ - e[iend] = sigma; -/* Store D and L. */ - odcpy(&in, &work[1], &c__1, &d__[ibegin], &c__1); - i__2 = in - 1; - odcpy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1); - if (mb > 1) { - -/* Perturb each entry of the base representation by a small */ -/* (but random) relative amount to overcome difficulties with */ -/* glued matrices. */ - - for (i__ = 1; i__ <= 4; ++i__) { - iseed[i__ - 1] = 1; -/* L122: */ - } - i__2 = (in << 1) - 1; - odrnv(&c__2, iseed, &i__2, &work[1]); - i__2 = in - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.; - e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.; -/* L125: */ - } - d__[iend] *= eps * 4. * work[in] + 1.; - - } - -/* Don't update the Gerschgorin intervals because keeping track */ -/* of the updates would be too much work in DLARRV. */ -/* We update W instead and use it to locate the proper Gerschgorin */ -/* intervals. */ -/* Compute the required eigenvalues of L D L' by bisection or dqds */ - if (! usedqd) { -/* If ODRRD has been used, shift the eigenvalue approximations */ -/* according to their representation. This is necessary for */ -/* a uniform DLARRV since dqds computes eigenvalues of the */ -/* shifted representation. In DLARRV, W will always hold the */ -/* UNshifted eigenvalue approximation. */ - i__2 = wend; - for (j = wbegin; j <= i__2; ++j) { - w[j] -= sigma; - werr[j] += (d__1 = w[j], fabs(d__1)) * eps; -/* L134: */ - } -/* call ODRRB to reduce eigenvalue error of the approximations */ -/* from ODRRD */ - i__2 = iend - 1; - for (i__ = ibegin; i__ <= i__2; ++i__) { -/* Computing 2nd power */ - d__1 = e[i__]; - work[i__] = d__[i__] * (d__1 * d__1); -/* L135: */ - } -/* use bisection to find EV from INDL to INDU */ - i__2 = indl - 1; - odrrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, - rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], & - work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, & - iinfo); - if (iinfo != 0) { - *info = -4; - return 0; - } -/* ODRRB computes all gaps correctly except for the last one */ -/* Record distance to VU/GU */ -/* Computing MAX */ - d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); - wgap[wend] = fmax(d__1,d__2); - i__2 = indu; - for (i__ = indl; i__ <= i__2; ++i__) { - ++(*m); - iblock[*m] = jblk; - indexw[*m] = i__; -/* L138: */ - } - } else { -/* Call dqds to get all eigs (and then possibly delete unwanted */ -/* eigenvalues). */ -/* Note that dqds finds the eigenvalues of the L D L^T representation */ -/* of T to high relative accuracy. High relative accuracy */ -/* might be lost when the shift of the RRR is subtracted to obtain */ -/* the eigenvalues of T. However, T is not guaranteed to define its */ -/* eigenvalues to high relative accuracy anyway. */ -/* Set RTOL to the order of the tolerance used in ODSQ2 */ -/* This is an ESTIMATED error, the worst case bound is 4*N*EPS */ -/* which is usually too large and requires unnecessary work to be */ -/* done by bisection when computing the eigenvectors */ - rtol = log((double) in) * 4. * eps; - j = ibegin; - i__2 = in - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[(i__ << 1) - 1] = (d__1 = d__[j], fabs(d__1)); - work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1]; - ++j; -/* L140: */ - } - work[(in << 1) - 1] = (d__1 = d__[iend], fabs(d__1)); - work[in * 2] = 0.; - odsq2(&in, &work[1], &iinfo); - if (iinfo != 0) { -/* If IINFO = -5 then an index is part of a tight cluster */ -/* and should be changed. The index is in IWORK(1) and the */ -/* gap is in WORK(N+1) */ - *info = -5; - return 0; - } else { -/* Test that all eigenvalues are positive as expected */ - i__2 = in; - for (i__ = 1; i__ <= i__2; ++i__) { - if (work[i__] < 0.) { - *info = -6; - return 0; - } -/* L149: */ - } - } - if (sgndef > 0.) { - i__2 = indu; - for (i__ = indl; i__ <= i__2; ++i__) { - ++(*m); - w[*m] = work[in - i__ + 1]; - iblock[*m] = jblk; - indexw[*m] = i__; -/* L150: */ - } - } else { - i__2 = indu; - for (i__ = indl; i__ <= i__2; ++i__) { - ++(*m); - w[*m] = -work[i__]; - iblock[*m] = jblk; - indexw[*m] = i__; -/* L160: */ - } - } - i__2 = *m; - for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { -/* the value of RTOL below should be the tolerance in ODSQ2 */ - werr[i__] = rtol * (d__1 = w[i__], fabs(d__1)); -/* L165: */ - } - i__2 = *m - 1; - for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { -/* compute the right gap between the intervals */ -/* Computing MAX */ - d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[ - i__]); - wgap[i__] = fmax(d__1,d__2); -/* L166: */ - } -/* Computing MAX */ - d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]); - wgap[*m] = fmax(d__1,d__2); - } -/* proceed with next block */ - ibegin = iend + 1; - wbegin = wend + 1; -L170: - ; - } - - return 0; - -/* end of ODRRE */ - -} /* odrre_ */ diff --git a/external/pmrrr/src/lapack/odrrf.c b/external/pmrrr/src/lapack/odrrf.c deleted file mode 100644 index fb3ad63408..0000000000 --- a/external/pmrrr/src/lapack/odrrf.c +++ /dev/null @@ -1,419 +0,0 @@ -/* dlarrf.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Table of constant values */ -#define TRUE_ (1) -#define FALSE_ (0) -static int c__1 = 1; - -/* Subroutine */ -int odrrf(int *n, double *d__, double *l, - double *ld, int *clstrt, int *clend, double *w, - double *wgap, double *werr, double *spdiam, double * - clgapl, double *clgapr, double *pivmin, double *sigma, - double *dplus, double *lplus, double *work, int *info) -{ - /* System generated locals */ - int i__1; - double d__1, d__2, d__3; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - int i__; - double s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, - znm2, growthbound, fail, fact, oldp; - int indx; - double prod; - int ktry; - double fail2, avgap, ldmax, rdmax; - int shift; - extern /* Subroutine */ int odcpy(int *, double *, int *, - double *, int *); - int dorrr1; - // extern double odmch(char *); - double ldelta; - int nofail; - double mingap, lsigma, rdelta; - extern int odnan(double *); - int forcer; - double rsigma, clwdth; - int sawnan1, sawnan2, tryrrr1; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ -/* * */ -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* Given the initial representation L D L^T and its cluster of close */ -/* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */ -/* W( CLEND ), ODRRF finds a new relatively robust representation */ -/* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */ -/* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix (subblock, if the matrix splitted). */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The N diagonal elements of the diagonal matrix D. */ - -/* L (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (N-1) subdiagonal elements of the unit bidiagonal */ -/* matrix L. */ - -/* LD (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (N-1) elements L(i)*D(i). */ - -/* CLSTRT (input) INT */ -/* The index of the first eigenvalue in the cluster. */ - -/* CLEND (input) INT */ -/* The index of the last eigenvalue in the cluster. */ - -/* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ -/* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */ -/* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */ -/* close eigenalues. */ - -/* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ -/* The separation from the right neighbor eigenvalue in W. */ - -/* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ -/* WERR contain the semiwidth of the uncertainty */ -/* interval of the corresponding eigenvalue APPROXIMATION in W */ - -/* SPDIAM (input) estimate of the spectral diameter obtained from the */ -/* Gerschgorin intervals */ - -/* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */ -/* Set by the calling routine to protect against shifts too close */ -/* to eigenvalues outside the cluster. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot allowed in the Sturm sequence. */ - -/* SIGMA (output) DOUBLE PRECISION */ -/* The shift used to form L(+) D(+) L(+)^T. */ - -/* DPLUS (output) DOUBLE PRECISION array, dimension (N) */ -/* The N diagonal elements of the diagonal matrix D(+). */ - -/* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */ -/* The first (N-1) elements of LPLUS contain the subdiagonal */ -/* elements of the unit bidiagonal matrix L(+). */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ -/* Workspace. */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --work; - --lplus; - --dplus; - --werr; - --wgap; - --w; - --ld; - --l; - --d__; - - /* Function Body */ - *info = 0; - fact = 2.; - eps = DBL_EPSILON; // eps = odmch("Precision"); - shift = 0; - forcer = FALSE_; -/* Note that we cannot guarantee that for any of the shifts tried, */ -/* the factorization has a small or even moderate element growth. */ -/* There could be Ritz values at both ends of the cluster and despite */ -/* backing off, there are examples where all factorizations tried */ -/* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */ -/* element growth. */ -/* For this reason, we should use PIVMIN in this subroutine so that at */ -/* least the L D L^T factorization exists. It can be checked afterwards */ -/* whether the element growth caused bad residuals/orthogonality. */ -/* Decide whether the code should accept the best among all */ -/* representations despite large element growth or signal INFO=1 */ - nofail = TRUE_; - -/* Compute the average gap length of the cluster */ - clwdth = (d__1 = w[*clend] - w[*clstrt], fabs(d__1)) + werr[*clend] + werr[ - *clstrt]; - avgap = clwdth / (double) (*clend - *clstrt); - mingap = fmin(*clgapl,*clgapr); -/* Initial values for shifts to both ends of cluster */ -/* Computing MIN */ - d__1 = w[*clstrt], d__2 = w[*clend]; - lsigma = fmin(d__1,d__2) - werr[*clstrt]; -/* Computing MAX */ - d__1 = w[*clstrt], d__2 = w[*clend]; - rsigma = fmax(d__1,d__2) + werr[*clend]; -/* Use a small fudge to make sure that we really shift to the outside */ - lsigma -= fabs(lsigma) * 4. * eps; - rsigma += fabs(rsigma) * 4. * eps; -/* Compute upper bounds for how much to back off the initial shifts */ - ldmax = mingap * .25 + *pivmin * 2.; - rdmax = mingap * .25 + *pivmin * 2.; -/* Computing MAX */ - d__1 = avgap, d__2 = wgap[*clstrt]; - ldelta = fmax(d__1,d__2) / fact; -/* Computing MAX */ - d__1 = avgap, d__2 = wgap[*clend - 1]; - rdelta = fmax(d__1,d__2) / fact; - -/* Initialize the record of the best representation found */ - - // s = DBL_MIN; // s = odmch("S"); - smlgrowth = DBL_MAX; - fail = (double) (*n - 1) * mingap / (*spdiam * eps); - fail2 = (double) (*n - 1) * mingap / (*spdiam * sqrt(eps)); - bestshift = lsigma; - -/* while (KTRY <= KTRYMAX) */ - ktry = 0; - growthbound = *spdiam * 8.; -L5: - sawnan1 = FALSE_; - sawnan2 = FALSE_; -/* Ensure that we do not back off too much of the initial shifts */ - ldelta = fmin(ldmax,ldelta); - rdelta = fmin(rdmax,rdelta); -/* Compute the element growth when shifting to both ends of the cluster */ -/* accept the shift if there is no element growth at one of the two ends */ -/* Left end */ - s = -lsigma; - dplus[1] = d__[1] + s; - if (fabs(dplus[1]) < *pivmin) { - dplus[1] = -(*pivmin); -/* Need to set SAWNAN1 because refined RRR test should not be used */ -/* in this case */ - sawnan1 = TRUE_; - } - max1 = fabs(dplus[1]); - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - lplus[i__] = ld[i__] / dplus[i__]; - s = s * lplus[i__] * l[i__] - lsigma; - dplus[i__ + 1] = d__[i__ + 1] + s; - if ((d__1 = dplus[i__ + 1], fabs(d__1)) < *pivmin) { - dplus[i__ + 1] = -(*pivmin); -/* Need to set SAWNAN1 because refined RRR test should not be used */ -/* in this case */ - sawnan1 = TRUE_; - } -/* Computing MAX */ - d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], fabs(d__1)); - max1 = fmax(d__2,d__3); -/* L6: */ - } - sawnan1 = sawnan1 || odnan(&max1); - if (forcer || max1 <= growthbound && ! sawnan1) { - *sigma = lsigma; - shift = 1; - goto L100; - } -/* Right end */ - s = -rsigma; - work[1] = d__[1] + s; - if (fabs(work[1]) < *pivmin) { - work[1] = -(*pivmin); -/* Need to set SAWNAN2 because refined RRR test should not be used */ -/* in this case */ - sawnan2 = TRUE_; - } - max2 = fabs(work[1]); - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - work[*n + i__] = ld[i__] / work[i__]; - s = s * work[*n + i__] * l[i__] - rsigma; - work[i__ + 1] = d__[i__ + 1] + s; - if ((d__1 = work[i__ + 1], fabs(d__1)) < *pivmin) { - work[i__ + 1] = -(*pivmin); -/* Need to set SAWNAN2 because refined RRR test should not be used */ -/* in this case */ - sawnan2 = TRUE_; - } -/* Computing MAX */ - d__2 = max2, d__3 = (d__1 = work[i__ + 1], fabs(d__1)); - max2 = fmax(d__2,d__3); -/* L7: */ - } - sawnan2 = sawnan2 || odnan(&max2); - if (forcer || max2 <= growthbound && ! sawnan2) { - *sigma = rsigma; - shift = 2; - goto L100; - } -/* If we are at this point, both shifts led to too much element growth */ -/* Record the better of the two shifts (provided it didn't lead to NaN) */ - if (sawnan1 && sawnan2) { -/* both MAX1 and MAX2 are NaN */ - goto L50; - } else { - if (! sawnan1) { - indx = 1; - if (max1 <= smlgrowth) { - smlgrowth = max1; - bestshift = lsigma; - } - } - if (! sawnan2) { - if (sawnan1 || max2 <= max1) { - indx = 2; - } - if (max2 <= smlgrowth) { - smlgrowth = max2; - bestshift = rsigma; - } - } - } -/* If we are here, both the left and the right shift led to */ -/* element growth. If the element growth is moderate, then */ -/* we may still accept the representation, if it passes a */ -/* refined test for RRR. This test supposes that no NaN occurred. */ -/* Moreover, we use the refined RRR test only for isolated clusters. */ - if (clwdth < mingap / 128. && fmin(max1,max2) < fail2 && ! sawnan1 && ! - sawnan2) { - dorrr1 = TRUE_; - } else { - dorrr1 = FALSE_; - } - tryrrr1 = TRUE_; - if (tryrrr1 && dorrr1) { - if (indx == 1) { - tmp = (d__1 = dplus[*n], fabs(d__1)); - znm2 = 1.; - prod = 1.; - oldp = 1.; - for (i__ = *n - 1; i__ >= 1; --i__) { - if (prod <= eps) { - prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] * - work[*n + i__]) * oldp; - } else { - prod *= (d__1 = work[*n + i__], fabs(d__1)); - } - oldp = prod; -/* Computing 2nd power */ - d__1 = prod; - znm2 += d__1 * d__1; -/* Computing MAX */ - d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, fabs(d__1)); - tmp = fmax(d__2,d__3); -/* L15: */ - } - rrr1 = tmp / (*spdiam * sqrt(znm2)); - if (rrr1 <= 8.) { - *sigma = lsigma; - shift = 1; - goto L100; - } - } else if (indx == 2) { - tmp = (d__1 = work[*n], fabs(d__1)); - znm2 = 1.; - prod = 1.; - oldp = 1.; - for (i__ = *n - 1; i__ >= 1; --i__) { - if (prod <= eps) { - prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * - lplus[i__]) * oldp; - } else { - prod *= (d__1 = lplus[i__], fabs(d__1)); - } - oldp = prod; -/* Computing 2nd power */ - d__1 = prod; - znm2 += d__1 * d__1; -/* Computing MAX */ - d__2 = tmp, d__3 = (d__1 = work[i__] * prod, fabs(d__1)); - tmp = fmax(d__2,d__3); -/* L16: */ - } - rrr2 = tmp / (*spdiam * sqrt(znm2)); - if (rrr2 <= 8.) { - *sigma = rsigma; - shift = 2; - goto L100; - } - } - } -L50: - if (ktry < 1) { -/* If we are here, both shifts failed also the RRR test. */ -/* Back off to the outside */ -/* Computing MAX */ - d__1 = lsigma - ldelta, d__2 = lsigma - ldmax; - lsigma = fmax(d__1,d__2); -/* Computing MIN */ - d__1 = rsigma + rdelta, d__2 = rsigma + rdmax; - rsigma = fmin(d__1,d__2); - ldelta *= 2.; - rdelta *= 2.; - ++ktry; - goto L5; - } else { -/* None of the representations investigated satisfied our */ -/* criteria. Take the best one we found. */ - if (smlgrowth < fail || nofail) { - lsigma = bestshift; - rsigma = bestshift; - forcer = TRUE_; - goto L5; - } else { - *info = 1; - return 0; - } - } -L100: - if (shift == 1) { - } else if (shift == 2) { -/* store new L and D back into DPLUS, LPLUS */ - odcpy(n, &work[1], &c__1, &dplus[1], &c__1); - i__1 = *n - 1; - odcpy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1); - } - return 0; - -/* End of ODRRF */ - -} /* odrrf_ */ diff --git a/external/pmrrr/src/lapack/odrrj.c b/external/pmrrr/src/lapack/odrrj.c deleted file mode 100644 index 9dc93ccc7d..0000000000 --- a/external/pmrrr/src/lapack/odrrj.c +++ /dev/null @@ -1,333 +0,0 @@ -/* dlarrj.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odrrj(int *n, double *d__, double *e2, - int *ifirst, int *ilast, double *rtol, int *offset, - double *w, double *werr, double *work, int *iwork, - double *pivmin, double *spdiam, int *info) -{ - /* System generated locals */ - int i__1, i__2; - double d__1, d__2; - - /* Builtin functions */ - // double log(double); - - /* Local variables */ - int i__, j, k, p; - double s; - int i1, i2, ii; - double fac, mid; - int cnt; - double tmp, left; - int iter, nint, prev, next, savi1; - double right, width, dplus; - int olnint, maxitr; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* Given the initial eigenvalue approximations of T, ODRRJ */ -/* does bisection to refine the eigenvalues of T, */ -/* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ -/* guesses for these eigenvalues are input in W, the corresponding estimate */ -/* of the error in these guesses in WERR. During bisection, intervals */ -/* [left, right] are maintained by storing their mid-points and */ -/* semi-widths in the arrays W and WERR respectively. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The N diagonal elements of T. */ - -/* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The Squares of the (N-1) subdiagonal elements of T. */ - -/* IFIRST (input) INT */ -/* The index of the first eigenvalue to be computed. */ - -/* ILAST (input) INT */ -/* The index of the last eigenvalue to be computed. */ - -/* RTOL (input) DOUBLE PRECISION */ -/* Tolerance for the convergence of the bisection intervals. */ -/* An interval [LEFT,RIGHT] has converged if */ -/* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */ - -/* OFFSET (input) INT */ -/* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */ -/* through ILAST-OFFSET elements of these arrays are to be used. */ - -/* W (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ -/* estimates of the eigenvalues of L D L^T indexed IFIRST through */ -/* ILAST. */ -/* On output, these estimates are refined. */ - -/* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ -/* the errors in the estimates of the corresponding elements in W. */ -/* On output, these errors are refined. */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ -/* Workspace. */ - -/* IWORK (workspace) INT array, dimension (2*N) */ -/* Workspace. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot in the Sturm sequence for T. */ - -/* SPDIAM (input) DOUBLE PRECISION */ -/* The spectral diameter of T. */ - -/* INFO (output) INT */ -/* Error flag. */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ - -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --iwork; - --work; - --werr; - --w; - --e2; - --d__; - - /* Function Body */ - *info = 0; - - maxitr = (int) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + - 2; - -/* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ -/* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ -/* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The int IWORK( 2*I-1 ) */ -/* for an unconverged interval is set to the index of the next unconverged */ -/* interval, and is -1 or 0 for a converged interval. Thus a linked */ -/* list of unconverged intervals is set up. */ - - i1 = *ifirst; - i2 = *ilast; -/* The number of unconverged intervals */ - nint = 0; -/* The last unconverged interval found */ - prev = 0; - i__1 = i2; - for (i__ = i1; i__ <= i__1; ++i__) { - k = i__ << 1; - ii = i__ - *offset; - left = w[ii] - werr[ii]; - mid = w[ii]; - right = w[ii] + werr[ii]; - width = right - mid; -/* Computing MAX */ - d__1 = fabs(left), d__2 = fabs(right); - tmp = fmax(d__1,d__2); -/* The following test prevents the test of converged intervals */ - if (width < *rtol * tmp) { -/* This interval has already converged and does not need refinement. */ -/* (Note that the gaps might change through refining the */ -/* eigenvalues, however, they can only get bigger.) */ -/* Remove it from the list. */ - iwork[k - 1] = -1; -/* Make sure that I1 always points to the first unconverged interval */ - if (i__ == i1 && i__ < i2) { - i1 = i__ + 1; - } - if (prev >= i1 && i__ <= i2) { - iwork[(prev << 1) - 1] = i__ + 1; - } - } else { -/* unconverged interval found */ - prev = i__; -/* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ - -/* Do while( CNT(LEFT).GT.I-1 ) */ - - fac = 1.; -L20: - cnt = 0; - s = left; - dplus = d__[1] - s; - if (dplus < 0.) { - ++cnt; - } - i__2 = *n; - for (j = 2; j <= i__2; ++j) { - dplus = d__[j] - s - e2[j - 1] / dplus; - if (dplus < 0.) { - ++cnt; - } -/* L30: */ - } - if (cnt > i__ - 1) { - left -= werr[ii] * fac; - fac *= 2.; - goto L20; - } - -/* Do while( CNT(RIGHT).LT.I ) */ - - fac = 1.; -L50: - cnt = 0; - s = right; - dplus = d__[1] - s; - if (dplus < 0.) { - ++cnt; - } - i__2 = *n; - for (j = 2; j <= i__2; ++j) { - dplus = d__[j] - s - e2[j - 1] / dplus; - if (dplus < 0.) { - ++cnt; - } -/* L60: */ - } - if (cnt < i__) { - right += werr[ii] * fac; - fac *= 2.; - goto L50; - } - ++nint; - iwork[k - 1] = i__ + 1; - iwork[k] = cnt; - } - work[k - 1] = left; - work[k] = right; -/* L75: */ - } - savi1 = i1; - -/* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ -/* and while (ITER.LT.MAXITR) */ - - iter = 0; -L80: - prev = i1 - 1; - i__ = i1; - olnint = nint; - i__1 = olnint; - for (p = 1; p <= i__1; ++p) { - k = i__ << 1; - ii = i__ - *offset; - next = iwork[k - 1]; - left = work[k - 1]; - right = work[k]; - mid = (left + right) * .5; -/* semiwidth of interval */ - width = right - mid; -/* Computing MAX */ - d__1 = fabs(left), d__2 = fabs(right); - tmp = fmax(d__1,d__2); - if (width < *rtol * tmp || iter == maxitr) { -/* reduce number of unconverged intervals */ - --nint; -/* Mark interval as converged. */ - iwork[k - 1] = 0; - if (i1 == i__) { - i1 = next; - } else { -/* Prev holds the last unconverged interval previously examined */ - if (prev >= i1) { - iwork[(prev << 1) - 1] = next; - } - } - i__ = next; - goto L100; - } - prev = i__; - -/* Perform one bisection step */ - - cnt = 0; - s = mid; - dplus = d__[1] - s; - if (dplus < 0.) { - ++cnt; - } - i__2 = *n; - for (j = 2; j <= i__2; ++j) { - dplus = d__[j] - s - e2[j - 1] / dplus; - if (dplus < 0.) { - ++cnt; - } -/* L90: */ - } - if (cnt <= i__ - 1) { - work[k - 1] = mid; - } else { - work[k] = mid; - } - i__ = next; -L100: - ; - } - ++iter; -/* do another loop if there are still unconverged intervals */ -/* However, in the last iteration, all intervals are accepted */ -/* since this is the best we can do. */ - if (nint > 0 && iter <= maxitr) { - goto L80; - } - - -/* At this point, all the intervals have converged */ - i__1 = *ilast; - for (i__ = savi1; i__ <= i__1; ++i__) { - k = i__ << 1; - ii = i__ - *offset; -/* All intervals marked by '0' have been refined. */ - if (iwork[k - 1] == 0) { - w[ii] = (work[k - 1] + work[k]) * .5; - werr[ii] = work[k] - w[ii]; - } -/* L110: */ - } - - return 0; - -/* End of ODRRJ */ - -} /* odrrj_ */ diff --git a/external/pmrrr/src/lapack/odrrk.c b/external/pmrrr/src/lapack/odrrk.c deleted file mode 100644 index 5fee04e438..0000000000 --- a/external/pmrrr/src/lapack/odrrk.c +++ /dev/null @@ -1,188 +0,0 @@ -/* dlarrk.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odrrk(int *n, int *iw, double *gl, - double *gu, double *d__, double *e2, double *pivmin, - double *reltol, double *w, double *werr, int *info) -{ - /* System generated locals */ - int i__1; - double d__1, d__2; - - /* Builtin functions */ - // double log(double); - - /* Local variables */ - int i__, it; - double mid, eps, tmp1, tmp2, left, atoli, right; - int itmax; - double rtoli, tnorm; - // extern double odmch(char *); - int negcnt; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODRRK computes one eigenvalue of a symmetric tridiagonal */ -/* matrix T to suitable accuracy. This is an auxiliary code to be */ -/* called from DSTEMR. */ - -/* To avoid overflow, the matrix must be scaled so that its */ -/* largest element is no greater than overflow**(1/2) * */ -/* underflow**(1/4) in absolute value, and for greatest */ -/* accuracy, it should not be much smaller than that. */ - -/* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ -/* Matrix", Report CS41, Computer Science Dept., Stanford */ -/* University, July 21, 1966. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the tridiagonal matrix T. N >= 0. */ - -/* IW (input) INT */ -/* The index of the eigenvalues to be returned. */ - -/* GL (input) DOUBLE PRECISION */ -/* GU (input) DOUBLE PRECISION */ -/* An upper and a lower bound on the eigenvalue. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The n diagonal elements of the tridiagonal matrix T. */ - -/* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ -/* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ - -/* PIVMIN (input) DOUBLE PRECISION */ -/* The minimum pivot allowed in the Sturm sequence for T. */ - -/* RELTOL (input) DOUBLE PRECISION */ -/* The minimum relative width of an interval. When an interval */ -/* is narrower than RELTOL times the larger (in */ -/* magnitude) endpoint, then it is considered to be */ -/* sufficiently small, i.e., converged. Note: this should */ -/* always be at least radix*machine epsilon. */ - -/* W (output) DOUBLE PRECISION */ - -/* WERR (output) DOUBLE PRECISION */ -/* The error bound on the corresponding eigenvalue approximation */ -/* in W. */ - -/* INFO (output) INT */ -/* = 0: Eigenvalue converged */ -/* = -1: Eigenvalue did NOT converge */ - -/* Internal Parameters */ -/* =================== */ - -/* FUDGE DOUBLE PRECISION, default = 2 */ -/* A "fudge factor" to widen the Gershgorin intervals. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* Get machine constants */ - /* Parameter adjustments */ - --e2; - --d__; - - /* Function Body */ - eps = DBL_EPSILON; // odmch("P"); -/* Computing MAX */ - d__1 = fabs(*gl), d__2 = fabs(*gu); - tnorm = fmax(d__1,d__2); - rtoli = *reltol; - atoli = *pivmin * 4.; - itmax = (int) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2; - *info = -1; - left = *gl - tnorm * 2. * eps * *n - *pivmin * 4.; - right = *gu + tnorm * 2. * eps * *n + *pivmin * 4.; - it = 0; -L10: - -/* Check if interval converged or maximum number of iterations reached */ - - tmp1 = (d__1 = right - left, fabs(d__1)); -/* Computing MAX */ - d__1 = fabs(right), d__2 = fabs(left); - tmp2 = fmax(d__1,d__2); -/* Computing MAX */ - d__1 = fmax(atoli,*pivmin), d__2 = rtoli * tmp2; - if (tmp1 < fmax(d__1,d__2)) { - *info = 0; - goto L30; - } - if (it > itmax) { - goto L30; - } - -/* Count number of negative pivots for mid-point */ - - ++it; - mid = (left + right) * .5; - negcnt = 0; - tmp1 = d__[1] - mid; - if (fabs(tmp1) < *pivmin) { - tmp1 = -(*pivmin); - } - if (tmp1 <= 0.) { - ++negcnt; - } - - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - tmp1 = d__[i__] - e2[i__ - 1] / tmp1 - mid; - if (fabs(tmp1) < *pivmin) { - tmp1 = -(*pivmin); - } - if (tmp1 <= 0.) { - ++negcnt; - } -/* L20: */ - } - if (negcnt >= *iw) { - right = mid; - } else { - left = mid; - } - goto L10; -L30: - -/* Converged or maximum number of iterations reached */ - - *w = (left + right) * .5; - *werr = (d__1 = right - left, fabs(d__1)) * .5; - return 0; - -/* End of ODRRK */ - -} /* odrrk_ */ diff --git a/external/pmrrr/src/lapack/odrrr.c b/external/pmrrr/src/lapack/odrrr.c deleted file mode 100644 index 3b510ce27a..0000000000 --- a/external/pmrrr/src/lapack/odrrr.c +++ /dev/null @@ -1,173 +0,0 @@ -/* dlarrr.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define TRUE_ (1) -#define FALSE_ (0) - -/* Subroutine */ -int odrrr(int *n, double *d__, double *e, int *info) -{ - /* System generated locals */ - int i__1; - double d__1; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - int i__; - double eps, tmp, tmp2, rmin; - // extern double odmch(char *); - double offdig, safmin; - int yesrel; - double smlnum, offdig2; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - - -/* Purpose */ -/* ======= */ - -/* Perform tests to decide whether the symmetric tridiagonal matrix T */ -/* warrants expensive computations which guarantee high relative accuracy */ -/* in the eigenvalues. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix. N > 0. */ - -/* D (input) DOUBLE PRECISION array, dimension (N) */ -/* The N diagonal elements of the tridiagonal matrix T. */ - -/* E (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the first (N-1) entries contain the subdiagonal */ -/* elements of the tridiagonal matrix T; E(N) is set to ZERO. */ - -/* INFO (output) INT */ -/* INFO = 0(default) : the matrix warrants computations preserving */ -/* relative accuracy. */ -/* INFO = 1 : the matrix warrants computations guaranteeing */ -/* only absolute accuracy. */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* As a default, do NOT go for relative-accuracy preserving computations. */ - /* Parameter adjustments */ - --e; - --d__; - - /* Function Body */ - *info = 1; - safmin = DBL_MIN; // safmin = odmch("Safe minimum"); - eps = DBL_EPSILON; // eps = odmch("Precision"); - smlnum = safmin / eps; - rmin = sqrt(smlnum); -/* Tests for relative accuracy */ - -/* Test for scaled diagonal dominance */ -/* Scale the diagonal entries to one and check whether the sum of the */ -/* off-diagonals is less than one */ - -/* The sdd relative error bounds have a 1/(1- 2*x) factor in them, */ -/* x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative */ -/* accuracy is promised. In the notation of the code fragment below, */ -/* 1/(1 - (OFFDIG + OFFDIG2)) is the condition number. */ -/* We don't think it is worth going into "sdd mode" unless the relative */ -/* condition number is reasonable, not 1/macheps. */ -/* The threshold should be compatible with other thresholds used in the */ -/* code. We set OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds */ -/* to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 */ -/* instead of the current OFFDIG + OFFDIG2 < 1 */ - - yesrel = TRUE_; - offdig = 0.; - tmp = sqrt((fabs(d__[1]))); - if (tmp < rmin) { - yesrel = FALSE_; - } - if (! yesrel) { - goto L11; - } - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - tmp2 = sqrt((d__1 = d__[i__], fabs(d__1))); - if (tmp2 < rmin) { - yesrel = FALSE_; - } - if (! yesrel) { - goto L11; - } - offdig2 = (d__1 = e[i__ - 1], fabs(d__1)) / (tmp * tmp2); - if (offdig + offdig2 >= .999) { - yesrel = FALSE_; - } - if (! yesrel) { - goto L11; - } - tmp = tmp2; - offdig = offdig2; -/* L10: */ - } -L11: - if (yesrel) { - *info = 0; - return 0; - } else { - } - - -/* *** MORE TO BE IMPLEMENTED *** */ - - -/* Test if the lower bidiagonal matrix L from T = L D L^T */ -/* (zero shift facto) is well conditioned */ - - -/* Test if the upper bidiagonal matrix U from T = U D U^T */ -/* (zero shift facto) is well conditioned. */ -/* In this case, the matrix needs to be flipped and, at the end */ -/* of the eigenvector computation, the flip needs to be applied */ -/* to the computed eigenvectors (and the support) */ - - - return 0; - -/* END OF ODRRR */ - -} /* odrrr_ */ diff --git a/external/pmrrr/src/lapack/odrrv.c b/external/pmrrr/src/lapack/odrrv.c deleted file mode 100644 index 0b9b2c8054..0000000000 --- a/external/pmrrr/src/lapack/odrrv.c +++ /dev/null @@ -1,986 +0,0 @@ -/* dlarrv.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define imax(a,b) ( (a) > (b) ? (a) : (b) ) -#define imin(a,b) ( (a) < (b) ? (a) : (b) ) -#define TRUE_ (1) -#define FALSE_ (0) - -/* Table of constant values */ -static double c_b5 = 0.; -static int c__1 = 1; -static int c__2 = 2; - -/* Subroutine */ int odrrv(int *n, double *vl, double *vu, - double *d__, double *l, double *pivmin, int *isplit, - int *m, int *dol, int *dou, double *minrgp, - double *rtol1, double *rtol2, double *w, double *werr, - double *wgap, int *iblock, int *indexw, double *gers, - double *z__, int *ldz, int *isuppz, double *work, - int *iwork, int *info) -{ - /* System generated locals */ - int z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; - double d__1, d__2; - int L__1; - - /* Builtin functions */ - // double log(double); - - /* Local variables */ - int minwsize, i__, j, k, p, q, miniwsize, ii; - double gl; - int im, in; - double gu, gap, eps, tau, tol, tmp; - int zto; - double ztz; - int iend, jblk; - double lgap; - int done; - double rgap, left; - int wend, iter; - double bstw; - int itmp1; - extern /* Subroutine */ int odscal(int *, double *, double *, - int *); - int indld; - double fudge; - int idone; - double sigma; - int iinfo, iindr; - double resid; - int eskip; - double right; - extern /* Subroutine */ int odcpy(int *, double *, int *, - double *, int *); - int nclus, zfrom; - double rqtol; - int iindc1, iindc2; - extern /* Subroutine */ int odr1v(int *, int *, int *, - double *, double *, double *, double *, - double *, double *, double *, double *, int *, - int *, double *, double *, int *, int *, - double *, double *, double *, double *); - int stp2ii; - double lambda; - // extern double odmch(char *); - int ibegin, indeig; - int needbs; - int indlld; - double sgndef, mingma; - extern /* Subroutine */ int odrrb(int *, double *, double *, - int *, int *, double *, double *, int *, - double *, double *, double *, double *, int *, - double *, double *, int *, int *); - int oldien, oldncl, wbegin; - double spdiam; - int negcnt; - extern /* Subroutine */ int odrrf(int *, double *, double *, - double *, int *, int *, double *, double *, - double *, double *, double *, double *, - double *, double *, double *, double *, - double *, int *); - int oldcls; - double savgap; - int ndepth; - double ssigma; - extern /* Subroutine */ int odset(char *, int *, int *, - double *, double *, double *, int *); - int usedbs; - int iindwk, offset; - double gaptol; - int newcls, oldfst, indwrk, windex, oldlst; - int usedrq; - int newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl; - double bstres; - int newsiz, zusedu, zusedw; - double nrminv, rqcorr; - int tryrqc; - int isupmx; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODRRV computes the eigenvectors of the tridiagonal matrix */ -/* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */ -/* The input eigenvalues should have been computed by DLARRE. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The order of the matrix. N >= 0. */ - -/* VL (input) DOUBLE PRECISION */ -/* VU (input) DOUBLE PRECISION */ -/* Lower and upper bounds of the interval that contains the desired */ -/* eigenvalues. VL < VU. Needed to compute gaps on the left or right */ -/* end of the extremal eigenvalues in the desired RANGE. */ - -/* D (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the N diagonal elements of the diagonal matrix D. */ -/* On exit, D may be overwritten. */ - -/* L (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the (N-1) subdiagonal elements of the unit */ -/* bidiagonal matrix L are in elements 1 to N-1 of L */ -/* (if the matrix is not splitted.) At the end of each block */ -/* is stored the corresponding shift as given by DLARRE. */ -/* On exit, L is overwritten. */ - -/* PIVMIN (in) DOUBLE PRECISION */ -/* The minimum pivot allowed in the Sturm sequence. */ - -/* ISPLIT (input) INT array, dimension (N) */ -/* The splitting points, at which T breaks up into blocks. */ -/* The first block consists of rows/columns 1 to */ -/* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ -/* through ISPLIT( 2 ), etc. */ - -/* M (input) INT */ -/* The total number of input eigenvalues. 0 <= M <= N. */ - -/* DOL (input) INT */ -/* DOU (input) INT */ -/* If the user wants to compute only selected eigenvectors from all */ -/* the eigenvalues supplied, he can specify an index range DOL:DOU. */ -/* Or else the setting DOL=1, DOU=M should be applied. */ -/* Note that DOL and DOU refer to the order in which the eigenvalues */ -/* are stored in W. */ -/* If the user wants to compute only selected eigenpairs, then */ -/* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */ -/* computed eigenvectors. All other columns of Z are set to zero. */ - -/* MINRGP (input) DOUBLE PRECISION */ - -/* RTOL1 (input) DOUBLE PRECISION */ -/* RTOL2 (input) DOUBLE PRECISION */ -/* Parameters for bisection. */ -/* An interval [LEFT,RIGHT] has converged if */ -/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ - -/* W (input/output) DOUBLE PRECISION array, dimension (N) */ -/* The first M elements of W contain the APPROXIMATE eigenvalues for */ -/* which eigenvectors are to be computed. The eigenvalues */ -/* should be grouped by split-off block and ordered from */ -/* smallest to largest within the block ( The output array */ -/* W from DLARRE is expected here ). Furthermore, they are with */ -/* respect to the shift of the corresponding root representation */ -/* for their block. On exit, W holds the eigenvalues of the */ -/* UNshifted matrix. */ - -/* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ -/* The first M elements contain the semiwidth of the uncertainty */ -/* interval of the corresponding eigenvalue in W */ - -/* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */ -/* The separation from the right neighbor eigenvalue in W. */ - -/* IBLOCK (input) INT array, dimension (N) */ -/* The indices of the blocks (submatrices) associated with the */ -/* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ -/* W(i) belongs to the first block from the top, =2 if W(i) */ -/* belongs to the second block, etc. */ - -/* INDEXW (input) INT array, dimension (N) */ -/* The indices of the eigenvalues within each block (submatrix); */ -/* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ -/* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */ - -/* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ -/* The N Gerschgorin intervals (the i-th Gerschgorin interval */ -/* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */ -/* be computed from the original UNshifted matrix. */ - -/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ -/* If INFO = 0, the first M columns of Z contain the */ -/* orthonormal eigenvectors of the matrix T */ -/* corresponding to the input eigenvalues, with the i-th */ -/* column of Z holding the eigenvector associated with W(i). */ -/* Note: the user must ensure that at least max(1,M) columns are */ -/* supplied in the array Z. */ - -/* LDZ (input) INT */ -/* The leading dimension of the array Z. LDZ >= 1, and if */ -/* JOBZ = 'V', LDZ >= max(1,N). */ - -/* ISUPPZ (output) INT array, dimension ( 2*max(1,M) ) */ -/* The support of the eigenvectors in Z, i.e., the indices */ -/* indicating the nonzero elements in Z. The I-th eigenvector */ -/* is nonzero only in elements ISUPPZ( 2*I-1 ) through */ -/* ISUPPZ( 2*I ). */ - -/* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */ - -/* IWORK (workspace) INT array, dimension (7*N) */ - -/* INFO (output) INT */ -/* = 0: successful exit */ - -/* > 0: A problem occured in ODRRV. */ -/* < 0: One of the called subroutines signaled an internal problem. */ -/* Needs inspection of the corresponding parameter IINFO */ -/* for further information. */ - -/* =-1: Problem in ODRRB when refining a child's eigenvalues. */ -/* =-2: Problem in ODRRF when computing the RRR of a child. */ -/* When a child is inside a tight cluster, it can be difficult */ -/* to find an RRR. A partial remedy from the user's point of */ -/* view is to make the parameter MINRGP smaller and recompile. */ -/* However, as the orthogonality of the computed vectors is */ -/* proportional to 1/MINRGP, the user should be aware that */ -/* he might be trading in precision when he decreases MINRGP. */ -/* =-3: Problem in ODRRB when refining a single eigenvalue */ -/* after the Rayleigh correction was rejected. */ -/* = 5: The Rayleigh Quotient Iteration failed to converge to */ -/* full accuracy in MAXITR steps. */ - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ -/* .. */ -/* The first N entries of WORK are reserved for the eigenvalues */ - /* Parameter adjustments */ - --d__; - --l; - --isplit; - --w; - --werr; - --wgap; - --iblock; - --indexw; - --gers; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --isuppz; - --work; - --iwork; - - /* Function Body */ - indld = *n + 1; - indlld = (*n << 1) + 1; - indwrk = *n * 3 + 1; - minwsize = *n * 12; - i__1 = minwsize; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.; -/* L5: */ - } -/* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */ -/* factorization used to compute the FP vector */ - iindr = 0; -/* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */ -/* layer and the one above. */ - iindc1 = *n; - iindc2 = *n << 1; - iindwk = *n * 3 + 1; - miniwsize = *n * 7; - i__1 = miniwsize; - for (i__ = 1; i__ <= i__1; ++i__) { - iwork[i__] = 0; -/* L10: */ - } - zusedl = 1; - if (*dol > 1) { -/* Set lower bound for use of Z */ - zusedl = *dol - 1; - } - zusedu = *m; - if (*dou < *m) { -/* Set lower bound for use of Z */ - zusedu = *dou + 1; - } -/* The width of the part of Z that is used */ - zusedw = zusedu - zusedl + 1; - odset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz); - eps = DBL_EPSILON; // eps = odmch("Precision"); - rqtol = eps * 2.; - -/* Set expert flags for standard code. */ - tryrqc = TRUE_; - if (*dol == 1 && *dou == *m) { - } else { -/* Only selected eigenpairs are computed. Since the other evalues */ -/* are not refined by RQ iteration, bisection has to compute to full */ -/* accuracy. */ - *rtol1 = eps * 4.; - *rtol2 = eps * 4.; - } -/* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */ -/* desired eigenvalues. The support of the nonzero eigenvector */ -/* entries is contained in the interval IBEGIN:IEND. */ -/* Remark that if k eigenpairs are desired, then the eigenvectors */ -/* are stored in k contiguous columns of Z. */ -/* DONE is the number of eigenvectors already computed */ - done = 0; - ibegin = 1; - wbegin = 1; - i__1 = iblock[*m]; - for (jblk = 1; jblk <= i__1; ++jblk) { - iend = isplit[jblk]; - sigma = l[iend]; -/* Find the eigenvectors of the submatrix indexed IBEGIN */ -/* through IEND. */ - wend = wbegin - 1; -L15: - if (wend < *m) { - if (iblock[wend + 1] == jblk) { - ++wend; - goto L15; - } - } - if (wend < wbegin) { - ibegin = iend + 1; - goto L170; - } else if (wend < *dol || wbegin > *dou) { - ibegin = iend + 1; - wbegin = wend + 1; - goto L170; - } -/* Find local spectral diameter of the block */ - gl = gers[(ibegin << 1) - 1]; - gu = gers[ibegin * 2]; - i__2 = iend; - for (i__ = ibegin + 1; i__ <= i__2; ++i__) { -/* Computing MIN */ - d__1 = gers[(i__ << 1) - 1]; - gl = fmin(d__1,gl); -/* Computing MAX */ - d__1 = gers[i__ * 2]; - gu = fmax(d__1,gu); -/* L20: */ - } - spdiam = gu - gl; -/* OLDIEN is the last index of the previous block */ - oldien = ibegin - 1; -/* Calculate the size of the current block */ - in = iend - ibegin + 1; -/* The number of eigenvalues in the current block */ - im = wend - wbegin + 1; -/* This is for a 1x1 block */ - if (ibegin == iend) { - ++done; - z__[ibegin + wbegin * z_dim1] = 1.; - isuppz[(wbegin << 1) - 1] = ibegin; - isuppz[wbegin * 2] = ibegin; - w[wbegin] += sigma; - work[wbegin] = w[wbegin]; - ibegin = iend + 1; - ++wbegin; - goto L170; - } -/* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */ -/* Note that these can be approximations, in this case, the corresp. */ -/* entries of WERR give the size of the uncertainty interval. */ -/* The eigenvalue approximations will be refined when necessary as */ -/* high relative accuracy is required for the computation of the */ -/* corresponding eigenvectors. */ - odcpy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1); -/* We store in W the eigenvalue approximations w.r.t. the original */ -/* matrix T. */ - i__2 = im; - for (i__ = 1; i__ <= i__2; ++i__) { - w[wbegin + i__ - 1] += sigma; -/* L30: */ - } -/* NDEPTH is the current depth of the representation tree */ - ndepth = 0; -/* PARITY is either 1 or 0 */ - parity = 1; -/* NCLUS is the number of clusters for the next level of the */ -/* representation tree, we start with NCLUS = 1 for the root */ - nclus = 1; - iwork[iindc1 + 1] = 1; - iwork[iindc1 + 2] = im; -/* IDONE is the number of eigenvectors already computed in the current */ -/* block */ - idone = 0; -/* loop while( IDONE.LT.IM ) */ -/* generate the representation tree for the current block and */ -/* compute the eigenvectors */ -L40: - if (idone < im) { -/* This is a crude protection against infinitely deep trees */ - if (ndepth > *m) { - *info = -2; - return 0; - } -/* breadth first processing of the current level of the representation */ -/* tree: OLDNCL = number of clusters on current level */ - oldncl = nclus; -/* reset NCLUS to count the number of child clusters */ - nclus = 0; - - parity = 1 - parity; - if (parity == 0) { - oldcls = iindc1; - newcls = iindc2; - } else { - oldcls = iindc2; - newcls = iindc1; - } -/* Process the clusters on the current level */ - i__2 = oldncl; - for (i__ = 1; i__ <= i__2; ++i__) { - j = oldcls + (i__ << 1); -/* OLDFST, OLDLST = first, last index of current cluster. */ -/* cluster indices start with 1 and are relative */ -/* to WBEGIN when accessing W, WGAP, WERR, Z */ - oldfst = iwork[j - 1]; - oldlst = iwork[j]; - if (ndepth > 0) { -/* Retrieve relatively robust representation (RRR) of cluster */ -/* that has been computed at the previous level */ -/* The RRR is stored in Z and overwritten once the eigenvectors */ -/* have been computed or when the cluster is refined */ - if (*dol == 1 && *dou == *m) { -/* Get representation from location of the leftmost evalue */ -/* of the cluster */ - j = wbegin + oldfst - 1; - } else { - if (wbegin + oldfst - 1 < *dol) { -/* Get representation from the left end of Z array */ - j = *dol - 1; - } else if (wbegin + oldfst - 1 > *dou) { -/* Get representation from the right end of Z array */ - j = *dou; - } else { - j = wbegin + oldfst - 1; - } - } - odcpy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin] -, &c__1); - i__3 = in - 1; - odcpy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[ - ibegin], &c__1); - sigma = z__[iend + (j + 1) * z_dim1]; -/* Set the corresponding entries in Z to zero */ - odset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j - * z_dim1], ldz); - } -/* Compute DL and DLL of current RRR */ - i__3 = iend - 1; - for (j = ibegin; j <= i__3; ++j) { - tmp = d__[j] * l[j]; - work[indld - 1 + j] = tmp; - work[indlld - 1 + j] = tmp * l[j]; -/* L50: */ - } - if (ndepth > 0) { -/* P and Q are index of the first and last eigenvalue to compute */ -/* within the current block */ - p = indexw[wbegin - 1 + oldfst]; - q = indexw[wbegin - 1 + oldlst]; -/* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */ -/* thru' Q-OFFSET elements of these arrays are to be used. */ -/* OFFSET = P-OLDFST */ - offset = indexw[wbegin] - 1; -/* perform limited bisection (if necessary) to get approximate */ -/* eigenvalues to the precision needed. */ - odrrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, - &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[ - wbegin], &werr[wbegin], &work[indwrk], &iwork[ - iindwk], pivmin, &spdiam, &in, &iinfo); - if (iinfo != 0) { - *info = -1; - return 0; - } -/* We also recompute the extremal gaps. W holds all eigenvalues */ -/* of the unshifted matrix and must be used for computation */ -/* of WGAP, the entries of WORK might stem from RRRs with */ -/* different shifts. The gaps from WBEGIN-1+OLDFST to */ -/* WBEGIN-1+OLDLST are correctly computed in ODRRB. */ -/* However, we only allow the gaps to become greater since */ -/* this is what should happen when we decrease WERR */ - if (oldfst > 1) { -/* Computing MAX */ - d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + - oldfst - 1] - werr[wbegin + oldfst - 1] - w[ - wbegin + oldfst - 2] - werr[wbegin + oldfst - - 2]; - wgap[wbegin + oldfst - 2] = fmax(d__1,d__2); - } - if (wbegin + oldlst - 1 < wend) { -/* Computing MAX */ - d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + - oldlst] - werr[wbegin + oldlst] - w[wbegin + - oldlst - 1] - werr[wbegin + oldlst - 1]; - wgap[wbegin + oldlst - 1] = fmax(d__1,d__2); - } -/* Each time the eigenvalues in WORK get refined, we store */ -/* the newly found approximation with all shifts applied in W */ - i__3 = oldlst; - for (j = oldfst; j <= i__3; ++j) { - w[wbegin + j - 1] = work[wbegin + j - 1] + sigma; -/* L53: */ - } - } -/* Process the current node. */ - newfst = oldfst; - i__3 = oldlst; - for (j = oldfst; j <= i__3; ++j) { - if (j == oldlst) { -/* we are at the right end of the cluster, this is also the */ -/* boundary of the child cluster */ - newlst = j; - } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[ - wbegin + j - 1], fabs(d__1))) { -/* the right relative gap is big enough, the child cluster */ -/* (NEWFST,..,NEWLST) is well separated from the following */ - newlst = j; - } else { -/* inside a child cluster, the relative gap is not */ -/* big enough. */ - goto L140; - } -/* Compute size of child cluster found */ - newsiz = newlst - newfst + 1; -/* NEWFTT is the place in Z where the new RRR or the computed */ -/* eigenvector is to be stored */ - if (*dol == 1 && *dou == *m) { -/* Store representation at location of the leftmost evalue */ -/* of the cluster */ - newftt = wbegin + newfst - 1; - } else { - if (wbegin + newfst - 1 < *dol) { -/* Store representation at the left end of Z array */ - newftt = *dol - 1; - } else if (wbegin + newfst - 1 > *dou) { -/* Store representation at the right end of Z array */ - newftt = *dou; - } else { - newftt = wbegin + newfst - 1; - } - } - if (newsiz > 1) { - -/* Current child is not a singleton but a cluster. */ -/* Compute and store new representation of child. */ - - -/* Compute left and right cluster gap. */ - -/* LGAP and RGAP are not computed from WORK because */ -/* the eigenvalue approximations may stem from RRRs */ -/* different shifts. However, W hold all eigenvalues */ -/* of the unshifted matrix. Still, the entries in WGAP */ -/* have to be computed from WORK since the entries */ -/* in W might be of the same order so that gaps are not */ -/* exhibited correctly for very close eigenvalues. */ - if (newfst == 1) { -/* Computing MAX */ - d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl; - lgap = fmax(d__1,d__2); - } else { - lgap = wgap[wbegin + newfst - 2]; - } - rgap = wgap[wbegin + newlst - 1]; - -/* Compute left- and rightmost eigenvalue of child */ -/* to high precision in order to shift as close */ -/* as possible and obtain as large relative gaps */ -/* as possible */ - - for (k = 1; k <= 2; ++k) { - if (k == 1) { - p = indexw[wbegin - 1 + newfst]; - } else { - p = indexw[wbegin - 1 + newlst]; - } - offset = indexw[wbegin] - 1; - odrrb(&in, &d__[ibegin], &work[indlld + ibegin - - 1], &p, &p, &rqtol, &rqtol, &offset, & - work[wbegin], &wgap[wbegin], &werr[wbegin] -, &work[indwrk], &iwork[iindwk], pivmin, & - spdiam, &in, &iinfo); -/* L55: */ - } - - if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 - > *dou) { -/* if the cluster contains no desired eigenvalues */ -/* skip the computation of that branch of the rep. tree */ - -/* We could skip before the refinement of the extremal */ -/* eigenvalues of the child, but then the representation */ -/* tree could be different from the one when nothing is */ -/* skipped. For this reason we skip at this place. */ - idone = idone + newlst - newfst + 1; - goto L139; - } - -/* Compute RRR of child cluster. */ -/* Note that the new RRR is stored in Z */ - -/* ODRRF needs LWORK = 2*N */ - odrrf(&in, &d__[ibegin], &l[ibegin], &work[indld + - ibegin - 1], &newfst, &newlst, &work[wbegin], - &wgap[wbegin], &werr[wbegin], &spdiam, &lgap, - &rgap, pivmin, &tau, &z__[ibegin + newftt * - z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], - &work[indwrk], &iinfo); - if (iinfo == 0) { -/* a new RRR for the cluster was found by ODRRF */ -/* update shift and store it */ - ssigma = sigma + tau; - z__[iend + (newftt + 1) * z_dim1] = ssigma; -/* WORK() are the midpoints and WERR() the semi-width */ -/* Note that the entries in W are unchanged. */ - i__4 = newlst; - for (k = newfst; k <= i__4; ++k) { - fudge = eps * 3. * (d__1 = work[wbegin + k - - 1], fabs(d__1)); - work[wbegin + k - 1] -= tau; - fudge += eps * 4. * (d__1 = work[wbegin + k - - 1], fabs(d__1)); -/* Fudge errors */ - werr[wbegin + k - 1] += fudge; -/* Gaps are not fudged. Provided that WERR is small */ -/* when eigenvalues are close, a zero gap indicates */ -/* that a new representation is needed for resolving */ -/* the cluster. A fudge could lead to a wrong decision */ -/* of judging eigenvalues 'separated' which in */ -/* reality are not. This could have a negative impact */ -/* on the orthogonality of the computed eigenvectors. */ -/* L116: */ - } - ++nclus; - k = newcls + (nclus << 1); - iwork[k - 1] = newfst; - iwork[k] = newlst; - } else { - *info = -2; - return 0; - } - } else { - -/* Compute eigenvector of singleton */ - - iter = 0; - - tol = log((double) in) * 4. * eps; - - k = newfst; - windex = wbegin + k - 1; -/* Computing MAX */ - i__4 = windex - 1; - windmn = imax(i__4,1); -/* Computing MIN */ - i__4 = windex + 1; - windpl = imin(i__4,*m); - lambda = work[windex]; - ++done; -/* Check if eigenvector computation is to be skipped */ - if (windex < *dol || windex > *dou) { - eskip = TRUE_; - goto L125; - } else { - eskip = FALSE_; - } - left = work[windex] - werr[windex]; - right = work[windex] + werr[windex]; - indeig = indexw[windex]; -/* Note that since we compute the eigenpairs for a child, */ -/* all eigenvalue approximations are w.r.t the same shift. */ -/* In this case, the entries in WORK should be used for */ -/* computing the gaps since they exhibit even very small */ -/* differences in the eigenvalues, as opposed to the */ -/* entries in W which might "look" the same. */ - if (k == 1) { -/* In the case RANGE='I' and with not much initial */ -/* accuracy in LAMBDA and VL, the formula */ -/* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */ -/* can lead to an overestimation of the left gap and */ -/* thus to inadequately early RQI 'convergence'. */ -/* Prevent this by forcing a small left gap. */ -/* Computing MAX */ - d__1 = fabs(left), d__2 = fabs(right); - lgap = eps * fmax(d__1,d__2); - } else { - lgap = wgap[windmn]; - } - if (k == im) { -/* In the case RANGE='I' and with not much initial */ -/* accuracy in LAMBDA and VU, the formula */ -/* can lead to an overestimation of the right gap and */ -/* thus to inadequately early RQI 'convergence'. */ -/* Prevent this by forcing a small right gap. */ -/* Computing MAX */ - d__1 = fabs(left), d__2 = fabs(right); - rgap = eps * fmax(d__1,d__2); - } else { - rgap = wgap[windex]; - } - gap = fmin(lgap,rgap); - if (k == 1 || k == im) { -/* The eigenvector support can become wrong */ -/* because significant entries could be cut off due to a */ -/* large GAPTOL parameter in LAR1V. Prevent this. */ - gaptol = 0.; - } else { - gaptol = gap * eps; - } - isupmn = in; - isupmx = 1; -/* Update WGAP so that it holds the minimum gap */ -/* to the left or the right. This is crucial in the */ -/* case where bisection is used to ensure that the */ -/* eigenvalue is refined up to the required precision. */ -/* The correct value is restored afterwards. */ - savgap = wgap[windex]; - wgap[windex] = gap; -/* We want to use the Rayleigh Quotient Correction */ -/* as often as possible since it converges quadratically */ -/* when we are close enough to the desired eigenvalue. */ -/* However, the Rayleigh Quotient can have the wrong sign */ -/* and lead us away from the desired eigenvalue. In this */ -/* case, the best we can do is to use bisection. */ - usedbs = FALSE_; - usedrq = FALSE_; -/* Bisection is initially turned off unless it is forced */ - needbs = ! tryrqc; -L120: -/* Check if bisection should be used to refine eigenvalue */ - if (needbs) { -/* Take the bisection as new iterate */ - usedbs = TRUE_; - itmp1 = iwork[iindr + windex]; - offset = indexw[wbegin] - 1; - d__1 = eps * 2.; - odrrb(&in, &d__[ibegin], &work[indlld + ibegin - - 1], &indeig, &indeig, &c_b5, &d__1, & - offset, &work[wbegin], &wgap[wbegin], & - werr[wbegin], &work[indwrk], &iwork[ - iindwk], pivmin, &spdiam, &itmp1, &iinfo); - if (iinfo != 0) { - *info = -3; - return 0; - } - lambda = work[windex]; -/* Reset twist index from inaccurate LAMBDA to */ -/* force computation of true MINGMA */ - iwork[iindr + windex] = 0; - } -/* Given LAMBDA, compute the eigenvector. */ - L__1 = ! usedbs; - odr1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[ - ibegin], &work[indld + ibegin - 1], &work[ - indlld + ibegin - 1], pivmin, &gaptol, &z__[ - ibegin + windex * z_dim1], &L__1, &negcnt, & - ztz, &mingma, &iwork[iindr + windex], &isuppz[ - (windex << 1) - 1], &nrminv, &resid, &rqcorr, - &work[indwrk]); - if (iter == 0) { - bstres = resid; - bstw = lambda; - } else if (resid < bstres) { - bstres = resid; - bstw = lambda; - } -/* Computing MIN */ - i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1]; - isupmn = imin(i__4,i__5); -/* Computing MAX */ - i__4 = isupmx, i__5 = isuppz[windex * 2]; - isupmx = imax(i__4,i__5); - ++iter; -/* sin alpha <= |resid|/gap */ -/* Note that both the residual and the gap are */ -/* proportional to the matrix, so ||T|| doesn't play */ -/* a role in the quotient */ - -/* Convergence test for Rayleigh-Quotient iteration */ -/* (omitted when Bisection has been used) */ - - if (resid > tol * gap && fabs(rqcorr) > rqtol * fabs( - lambda) && ! usedbs) { -/* We need to check that the RQCORR update doesn't */ -/* move the eigenvalue away from the desired one and */ -/* towards a neighbor. -> protection with bisection */ - if (indeig <= negcnt) { -/* The wanted eigenvalue lies to the left */ - sgndef = -1.; - } else { -/* The wanted eigenvalue lies to the right */ - sgndef = 1.; - } -/* We only use the RQCORR if it improves the */ -/* the iterate reasonably. */ - if (rqcorr * sgndef >= 0. && lambda + rqcorr <= - right && lambda + rqcorr >= left) { - usedrq = TRUE_; -/* Store new midpoint of bisection interval in WORK */ - if (sgndef == 1.) { -/* The current LAMBDA is on the left of the true */ -/* eigenvalue */ - left = lambda; -/* We prefer to assume that the error estimate */ -/* is correct. We could make the interval not */ -/* as a bracket but to be modified if the RQCORR */ -/* chooses to. In this case, the RIGHT side should */ -/* be modified as follows: */ -/* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */ - } else { -/* The current LAMBDA is on the right of the true */ -/* eigenvalue */ - right = lambda; -/* See comment about assuming the error estimate is */ -/* correct above. */ -/* LEFT = MIN(LEFT, LAMBDA + RQCORR) */ - } - work[windex] = (right + left) * .5; -/* Take RQCORR since it has the correct sign and */ -/* improves the iterate reasonably */ - lambda += rqcorr; -/* Update width of error interval */ - werr[windex] = (right - left) * .5; - } else { - needbs = TRUE_; - } - if (right - left < rqtol * fabs(lambda)) { -/* The eigenvalue is computed to bisection accuracy */ -/* compute eigenvector and stop */ - usedbs = TRUE_; - goto L120; - } else if (iter < 10) { - goto L120; - } else if (iter == 10) { - needbs = TRUE_; - goto L120; - } else { - *info = 5; - return 0; - } - } else { - stp2ii = FALSE_; - if (usedrq && usedbs && bstres <= resid) { - lambda = bstw; - stp2ii = TRUE_; - } - if (stp2ii) { -/* improve error angle by second step */ - L__1 = ! usedbs; - odr1v(&in, &c__1, &in, &lambda, &d__[ibegin] -, &l[ibegin], &work[indld + ibegin - - 1], &work[indlld + ibegin - 1], - pivmin, &gaptol, &z__[ibegin + windex - * z_dim1], &L__1, &negcnt, &ztz, & - mingma, &iwork[iindr + windex], & - isuppz[(windex << 1) - 1], &nrminv, & - resid, &rqcorr, &work[indwrk]); - } - work[windex] = lambda; - } - -/* Compute FP-vector support w.r.t. whole matrix */ - - isuppz[(windex << 1) - 1] += oldien; - isuppz[windex * 2] += oldien; - zfrom = isuppz[(windex << 1) - 1]; - zto = isuppz[windex * 2]; - isupmn += oldien; - isupmx += oldien; -/* Ensure vector is ok if support in the RQI has changed */ - if (isupmn < zfrom) { - i__4 = zfrom - 1; - for (ii = isupmn; ii <= i__4; ++ii) { - z__[ii + windex * z_dim1] = 0.; -/* L122: */ - } - } - if (isupmx > zto) { - i__4 = isupmx; - for (ii = zto + 1; ii <= i__4; ++ii) { - z__[ii + windex * z_dim1] = 0.; -/* L123: */ - } - } - i__4 = zto - zfrom + 1; - odscal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], - &c__1); -L125: -/* Update W */ - w[windex] = lambda + sigma; -/* Recompute the gaps on the left and right */ -/* But only allow them to become larger and not */ -/* smaller (which can only happen through "bad" */ -/* cancellation and doesn't reflect the theory */ -/* where the initial gaps are underestimated due */ -/* to WERR being too crude.) */ - if (! eskip) { - if (k > 1) { -/* Computing MAX */ - d__1 = wgap[windmn], d__2 = w[windex] - werr[ - windex] - w[windmn] - werr[windmn]; - wgap[windmn] = fmax(d__1,d__2); - } - if (windex < wend) { -/* Computing MAX */ - d__1 = savgap, d__2 = w[windpl] - werr[windpl] - - w[windex] - werr[windex]; - wgap[windex] = fmax(d__1,d__2); - } - } - ++idone; - } -/* here ends the code for the current child */ - -L139: -/* Proceed to any remaining child nodes */ - newfst = j + 1; -L140: - ; - } -/* L150: */ - } - ++ndepth; - goto L40; - } - ibegin = iend + 1; - wbegin = wend + 1; -L170: - ; - } - - return 0; - -/* End of ODRRV */ - -} /* odrrv_ */ diff --git a/external/pmrrr/src/lapack/odruv.c b/external/pmrrr/src/lapack/odruv.c deleted file mode 100644 index 170bf103a6..0000000000 --- a/external/pmrrr/src/lapack/odruv.c +++ /dev/null @@ -1,190 +0,0 @@ -/* dlaruv.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -#define imax(a,b) ( (a) > (b) ? (a) : (b) ) -#define imin(a,b) ( (a) < (b) ? (a) : (b) ) - -/* Subroutine */ -int odruv(int *iseed, int *n, double *x) -{ - /* Initialized data */ - - static int mm[512] /* was [128][4] */ = { 494,2637,255,2008,1253, - 3344,4084,1739,3143,3468,688,1657,1238,3166,1292,3422,1270,2016, - 154,2862,697,1706,491,931,1444,444,3577,3944,2184,1661,3482,657, - 3023,3618,1267,1828,164,3798,3087,2400,2870,3876,1905,1593,1797, - 1234,3460,328,2861,1950,617,2070,3331,769,1558,2412,2800,189,287, - 2045,1227,2838,209,2770,3654,3993,192,2253,3491,2889,2857,2094, - 1818,688,1407,634,3231,815,3524,1914,516,164,303,2144,3480,119, - 3357,837,2826,2332,2089,3780,1700,3712,150,2000,3375,1621,3090, - 3765,1149,3146,33,3082,2741,359,3316,1749,185,2784,2202,2199,1364, - 1244,2020,3160,2785,2772,1217,1822,1245,2252,3904,2774,997,2573, - 1148,545,322,789,1440,752,2859,123,1848,643,2405,2638,2344,46, - 3814,913,3649,339,3808,822,2832,3078,3633,2970,637,2249,2081,4019, - 1478,242,481,2075,4058,622,3376,812,234,641,4005,1122,3135,2640, - 2302,40,1832,2247,2034,2637,1287,1691,496,1597,2394,2584,1843,336, - 1472,2407,433,2096,1761,2810,566,442,41,1238,1086,603,840,3168, - 1499,1084,3438,2408,1589,2391,288,26,512,1456,171,1677,2657,2270, - 2587,2961,1970,1817,676,1410,3723,2803,3185,184,663,499,3784,1631, - 1925,3912,1398,1349,1441,2224,2411,1907,3192,2786,382,37,759,2948, - 1862,3802,2423,2051,2295,1332,1832,2405,3638,3661,327,3660,716, - 1842,3987,1368,1848,2366,2508,3754,1766,3572,2893,307,1297,3966, - 758,2598,3406,2922,1038,2934,2091,2451,1580,1958,2055,1507,1078, - 3273,17,854,2916,3971,2889,3831,2621,1541,893,736,3992,787,2125, - 2364,2460,257,1574,3912,1216,3248,3401,2124,2762,149,2245,166,466, - 4018,1399,190,2879,153,2320,18,712,2159,2318,2091,3443,1510,449, - 1956,2201,3137,3399,1321,2271,3667,2703,629,2365,2431,1113,3922, - 2554,184,2099,3228,4012,1921,3452,3901,572,3309,3171,817,3039, - 1696,1256,3715,2077,3019,1497,1101,717,51,981,1978,1813,3881,76, - 3846,3694,1682,124,1660,3997,479,1141,886,3514,1301,3604,1888, - 1836,1990,2058,692,1194,20,3285,2046,2107,3508,3525,3801,2549, - 1145,2253,305,3301,1065,3133,2913,3285,1241,1197,3729,2501,1673, - 541,2753,949,2361,1165,4081,2725,3305,3069,3617,3733,409,2157, - 1361,3973,1865,2525,1409,3445,3577,77,3761,2149,1449,3005,225,85, - 3673,3117,3089,1349,2057,413,65,1845,697,3085,3441,1573,3689,2941, - 929,533,2841,4077,721,2821,2249,2397,2817,245,1913,1997,3121,997, - 1833,2877,1633,981,2009,941,2449,197,2441,285,1473,2741,3129,909, - 2801,421,4073,2813,2337,1429,1177,1901,81,1669,2633,2269,129,1141, - 249,3917,2481,3941,2217,2749,3041,1877,345,2861,1809,3141,2825, - 157,2881,3637,1465,2829,2161,3365,361,2685,3745,2325,3609,3821, - 3537,517,3017,2141,1537 }; - - /* System generated locals */ - int i__1; - - /* Local variables */ - int i__, i1, i2, i3, i4, it1, it2, it3, it4; - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODRUV returns a vector of n random real numbers from a uniform (0,1) */ -/* distribution (n <= 128). */ - -/* This is an auxiliary routine called by DLARNV and ZLARNV. */ - -/* Arguments */ -/* ========= */ - -/* ISEED (input/output) INT array, dimension (4) */ -/* On entry, the seed of the random number generator; the array */ -/* elements must be between 0 and 4095, and ISEED(4) must be */ -/* odd. */ -/* On exit, the seed is updated. */ - -/* N (input) INT */ -/* The number of random numbers to be generated. N <= 128. */ - -/* X (output) DOUBLE PRECISION array, dimension (N) */ -/* The generated random numbers. */ - -/* Further Details */ -/* =============== */ - -/* This routine uses a multiplicative congruential method with modulus */ -/* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */ -/* 'Multiplicative congruential random number generators with modulus */ -/* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */ -/* b = 48', Math. Comp. 189, pp 331-344, 1990). */ - -/* 48-bit ints are stored in 4 int array elements with 12 bits */ -/* per element. Hence the routine is portable across machines with */ -/* ints of 32 bits or more. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Local Arrays .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Data statements .. */ - /* Parameter adjustments */ - --iseed; - --x; - - /* Function Body */ -/* .. */ -/* .. Executable Statements .. */ - - i1 = iseed[1]; - i2 = iseed[2]; - i3 = iseed[3]; - i4 = iseed[4]; - - i__1 = imin(*n,128); - for (i__ = 1; i__ <= i__1; ++i__) { - -L20: - -/* Multiply the seed by i-th power of the multiplier modulo 2**48 */ - - it4 = i4 * mm[i__ + 383]; - it3 = it4 / 4096; - it4 -= it3 << 12; - it3 = it3 + i3 * mm[i__ + 383] + i4 * mm[i__ + 255]; - it2 = it3 / 4096; - it3 -= it2 << 12; - it2 = it2 + i2 * mm[i__ + 383] + i3 * mm[i__ + 255] + i4 * mm[i__ + - 127]; - it1 = it2 / 4096; - it2 -= it1 << 12; - it1 = it1 + i1 * mm[i__ + 383] + i2 * mm[i__ + 255] + i3 * mm[i__ + - 127] + i4 * mm[i__ - 1]; - it1 %= 4096; - -/* Convert 48-bit int to a real number in the interval (0,1) */ - - x[i__] = ((double) it1 + ((double) it2 + ((double) it3 + ( - double) it4 * 2.44140625e-4) * 2.44140625e-4) * - 2.44140625e-4) * 2.44140625e-4; - - if (x[i__] == 1.) { -/* If a real number has n bits of precision, and the first */ -/* n bits of the 48-bit int above happen to be all 1 (which */ -/* will occur about once every 2**n calls), then X( I ) will */ -/* be rounded to exactly 1.0. */ -/* Since X( I ) is not supposed to return exactly 0.0 or 1.0, */ -/* the statistically correct thing to do in this situation is */ -/* simply to iterate again. */ -/* N.B. the case X( I ) = 0.0 should not be possible. */ - i1 += 2; - i2 += 2; - i3 += 2; - i4 += 2; - goto L20; - } - -/* L10: */ - } - -/* Return final value of seed */ - - iseed[1] = it1; - iseed[2] = it2; - iseed[3] = it3; - iseed[4] = it4; - return 0; - -/* End of ODRUV */ - -} /* odruv_ */ diff --git a/external/pmrrr/src/lapack/odset.c b/external/pmrrr/src/lapack/odset.c deleted file mode 100644 index c9d5e1003e..0000000000 --- a/external/pmrrr/src/lapack/odset.c +++ /dev/null @@ -1,149 +0,0 @@ -/* dlaset.f -- translated by f2c (version 20061008). -*/ - -#include -#include -#include -#include -#include -#include - -#define imin(a,b) ( (a) < (b) ? (a) : (b) ) - -/* Subroutine */ int odset(char *uplo, int *m, int *n, double *alpha, -double *beta, double *a, int *lda) -{ - /* System generated locals */ - int a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - int i__, j; - extern int olsame(char *, char *); - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSET initializes an m-by-n matrix A to BETA on the diagonal and */ -/* ALPHA on the offdiagonals. */ - -/* Arguments */ -/* ========= */ - -/* UPLO (input) CHARACTER*1 */ -/* Specifies the part of the matrix A to be set. */ -/* = 'U': Upper triangular part is set; the strictly lower */ -/* triangular part of A is not changed. */ -/* = 'L': Lower triangular part is set; the strictly upper */ -/* triangular part of A is not changed. */ -/* Otherwise: All of the matrix A is set. */ - -/* M (input) INT */ -/* The number of rows of the matrix A. M >= 0. */ - -/* N (input) INT */ -/* The number of columns of the matrix A. N >= 0. */ - -/* ALPHA (input) DOUBLE PRECISION */ -/* The constant to which the offdiagonal elements are to be set. */ - -/* BETA (input) DOUBLE PRECISION */ -/* The constant to which the diagonal elements are to be set. */ - -/* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ -/* On exit, the leading m-by-n submatrix of A is set as follows: */ - -/* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */ -/* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */ -/* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */ - -/* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */ - -/* LDA (input) INT */ -/* The leading dimension of the array A. LDA >= max(1,M). */ - -/* ===================================================================== */ - -/* .. Local Scalars .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - if (olsame(uplo, "U")) { - -/* Set the strictly upper triangular or trapezoidal part of the */ -/* array to ALPHA. */ - - i__1 = *n; - for (j = 2; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j - 1; - i__2 = imin(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L10: */ - } -/* L20: */ - } - - } else if (olsame(uplo, "L")) { - -/* Set the strictly lower triangular or trapezoidal part of the */ -/* array to ALPHA. */ - - i__1 = imin(*m,*n); - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L30: */ - } -/* L40: */ - } - - } else { - -/* Set the leading m-by-n submatrix to ALPHA. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L50: */ - } -/* L60: */ - } - } - -/* Set the first imin(M,N) diagonal elements to BETA. */ - - i__1 = imin(*m,*n); - for (i__ = 1; i__ <= i__1; ++i__) { - a[i__ + i__ * a_dim1] = *beta; -/* L70: */ - } - - return 0; - -/* End of ODSET */ - -} /* odset_ */ diff --git a/external/pmrrr/src/lapack/odsnan.c b/external/pmrrr/src/lapack/odsnan.c deleted file mode 100644 index 7def93e524..0000000000 --- a/external/pmrrr/src/lapack/odsnan.c +++ /dev/null @@ -1,52 +0,0 @@ -/* dlaisnan.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -int odsnan(double *din1, double *din2) -{ - /* System generated locals */ - int ret_val; - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* This routine is not for general use. It exists solely to avoid */ -/* over-optimization in DISNAN. */ - -/* ODSNAN checks for NaNs by comparing its two arguments for */ -/* inequality. NaN is the only floating-point value where NaN != NaN */ -/* returns .TRUE. To check for NaNs, pass the same variable as both */ -/* arguments. */ - -/* A compiler must assume that the two arguments are */ -/* not the same variable, and the test will not be optimized away. */ -/* Interprocedural or whole-program optimization may delete this */ -/* test. The ISNAN functions will be replaced by the correct */ -/* Fortran 03 intrinsic once the intrinsic is widely available. */ - -/* Arguments */ -/* ========= */ - -/* DIN1 (input) DOUBLE PRECISION */ -/* DIN2 (input) DOUBLE PRECISION */ -/* Two numbers to compare for inequality. */ - -/* ===================================================================== */ - -/* .. Executable Statements .. */ - ret_val = *din1 != *din2; - return ret_val; -} /* odsnan_ */ - diff --git a/external/pmrrr/src/lapack/odsq2.c b/external/pmrrr/src/lapack/odsq2.c deleted file mode 100644 index 12c823074d..0000000000 --- a/external/pmrrr/src/lapack/odsq2.c +++ /dev/null @@ -1,595 +0,0 @@ -/* dlasq2.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Table of constant values */ -static int c__1 = 1; -static int c__2 = 2; -/* -static int c__10 = 10; -static int c__3 = 3; -static int c__4 = 4; -static int c__11 = 11; -*/ - -/* Subroutine */ -int odsq2(int *n, double *z__, int *info) -{ - /* System generated locals */ - int i__1, i__2, i__3; - double d__1, d__2; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - double d__, e, g; - int k; - double s, t; - int i0, i4, n0; - double dn; - int pp; - double dn1, dn2, dee, eps, tau, tol; - int ipn4; - double tol2; - int ieee; - int nbig; - double dmin__, emin, emax; - int kmin, ndiv, iter; - double qmin, temp, qmax, zmax; - int splt; - double dmin1, dmin2; - int nfail; - double desig, trace, sigma; - int iinfo, ttype; - extern /* Subroutine */ int odsq3(int *, int *, double *, - int *, double *, double *, double *, double *, - int *, int *, int *, int *, int *, - double *, double *, double *, double *, - double *, double *, double *); - // extern double odmch(char *); - double deemin; - int iwhila, iwhilb; - double oldemn, safmin; - extern /* Subroutine */ int oerbla(char *, int *); - /* extern int oienv(int *, char *, char *, int *, int *, */ - /* int *, int *); */ - extern /* Subroutine */ int odsrt(char *, int *, double *, - int *); - - -/* -- LAPACK routine (version 3.2) -- */ - -/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ -/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ -/* -- Berkeley -- */ -/* -- November 2008 -- */ - -/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ -/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSQ2 computes all the eigenvalues of the symmetric positive */ -/* definite tridiagonal matrix associated with the qd array Z to high */ -/* relative accuracy are computed to high relative accuracy, in the */ -/* absence of denormalization, underflow and overflow. */ - -/* To see the relation of Z to the tridiagonal matrix, let L be a */ -/* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */ -/* let U be an upper bidiagonal matrix with 1's above and diagonal */ -/* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */ -/* symmetric tridiagonal to which it is similar. */ - -/* Note : ODSQ2 defines a int variable, IEEE, which is true */ -/* on machines which follow ieee-754 floating-point standard in their */ -/* handling of infinities and NaNs, and false otherwise. This variable */ -/* is passed to ODSQ3. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The number of rows and columns in the matrix. N >= 0. */ - -/* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */ -/* On entry Z holds the qd array. On exit, entries 1 to N hold */ -/* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */ -/* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */ -/* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */ -/* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */ -/* shifts that failed. */ - -/* INFO (output) INT */ -/* = 0: successful exit */ -/* < 0: if the i-th argument is a scalar and had an illegal */ -/* value, then INFO = -i, if the i-th argument is an */ -/* array and the j-entry had an illegal value, then */ -/* INFO = -(i*100+j) */ -/* > 0: the algorithm failed */ -/* = 1, a split was marked by a positive value in E */ -/* = 2, current block of Z not diagonalized after 30*N */ -/* iterations (in inner while loop) */ -/* = 3, termination criterion of outer while loop not met */ -/* (program created more than N unreduced blocks) */ - -/* Further Details */ -/* =============== */ -/* Local Variables: I0:N0 defines a current unreduced segment of Z. */ -/* The shifts are accumulated in SIGMA. Iteration count is in ITER. */ -/* Ping-pong is controlled by PP (alternates between 0 and 1). */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* Test the input arguments. */ -/* (in case ODSQ2 is not called by DLASQ1) */ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - *info = 0; - eps = DBL_EPSILON; // odmch("Precision"); - safmin = DBL_MIN; // odmch("Safe minimum"); - tol = eps * 100.; -/* Computing 2nd power */ - d__1 = tol; - tol2 = d__1 * d__1; - - if (*n < 0) { - *info = -1; - oerbla("ODSQ2", &c__1); - return 0; - } else if (*n == 0) { - return 0; - } else if (*n == 1) { - -/* 1-by-1 case. */ - - if (z__[1] < 0.) { - *info = -201; - oerbla("ODSQ2", &c__2); - } - return 0; - } else if (*n == 2) { - -/* 2-by-2 case. */ - - if (z__[2] < 0. || z__[3] < 0.) { - *info = -2; - oerbla("ODSQ2", &c__2); - return 0; - } else if (z__[3] > z__[1]) { - d__ = z__[3]; - z__[3] = z__[1]; - z__[1] = d__; - } - z__[5] = z__[1] + z__[2] + z__[3]; - if (z__[2] > z__[3] * tol2) { - t = (z__[1] - z__[3] + z__[2]) * .5; - s = z__[3] * (z__[2] / t); - if (s <= t) { - s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); - } else { - s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); - } - t = z__[1] + (s + z__[2]); - z__[3] *= z__[1] / t; - z__[1] = t; - } - z__[2] = z__[3]; - z__[6] = z__[2] + z__[1]; - return 0; - } - -/* Check for negative data and compute sums of q's and e's. */ - - z__[*n * 2] = 0.; - emin = z__[2]; - qmax = 0.; - zmax = 0.; - d__ = 0.; - e = 0.; - - i__1 = *n - 1 << 1; - for (k = 1; k <= i__1; k += 2) { - if (z__[k] < 0.) { - *info = -(k + 200); - oerbla("ODSQ2", &c__2); - return 0; - } else if (z__[k + 1] < 0.) { - *info = -(k + 201); - oerbla("ODSQ2", &c__2); - return 0; - } - d__ += z__[k]; - e += z__[k + 1]; -/* Computing MAX */ - d__1 = qmax, d__2 = z__[k]; - qmax = fmax(d__1,d__2); -/* Computing MIN */ - d__1 = emin, d__2 = z__[k + 1]; - emin = fmin(d__1,d__2); -/* Computing MAX */ - d__1 = fmax(qmax,zmax), d__2 = z__[k + 1]; - zmax = fmax(d__1,d__2); -/* L10: */ - } - if (z__[(*n << 1) - 1] < 0.) { - *info = -((*n << 1) + 199); - oerbla("ODSQ2", &c__2); - return 0; - } - d__ += z__[(*n << 1) - 1]; -/* Computing MAX */ - d__1 = qmax, d__2 = z__[(*n << 1) - 1]; - qmax = fmax(d__1,d__2); - zmax = fmax(qmax,zmax); - -/* Check for diagonality. */ - - if (e == 0.) { - i__1 = *n; - for (k = 2; k <= i__1; ++k) { - z__[k] = z__[(k << 1) - 1]; -/* L20: */ - } - odsrt("D", n, &z__[1], &iinfo); - z__[(*n << 1) - 1] = d__; - return 0; - } - - trace = d__ + e; - -/* Check for zero data. */ - - if (trace == 0.) { - z__[(*n << 1) - 1] = 0.; - return 0; - } - -/* Check whether the machine is IEEE conformable. */ - /* ieee = oienv(&c__10, "ODSQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && oienv(&c__11, "ODSQ2", "N", &c__1, &c__2, */ - /* &c__3, &c__4) == 1; */ - ieee = 1; - -/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ - - for (k = *n << 1; k >= 2; k += -2) { - z__[k * 2] = 0.; - z__[(k << 1) - 1] = z__[k]; - z__[(k << 1) - 2] = 0.; - z__[(k << 1) - 3] = z__[k - 1]; -/* L30: */ - } - - i0 = 1; - n0 = *n; - -/* Reverse the qd-array, if warranted. */ - - if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { - ipn4 = i0 + n0 << 2; - i__1 = i0 + n0 - 1 << 1; - for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { - temp = z__[i4 - 3]; - z__[i4 - 3] = z__[ipn4 - i4 - 3]; - z__[ipn4 - i4 - 3] = temp; - temp = z__[i4 - 1]; - z__[i4 - 1] = z__[ipn4 - i4 - 5]; - z__[ipn4 - i4 - 5] = temp; -/* L40: */ - } - } - -/* Initial split checking via dqd and Li's test. */ - - pp = 0; - - for (k = 1; k <= 2; ++k) { - - d__ = z__[(n0 << 2) + pp - 3]; - i__1 = (i0 << 2) + pp; - for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { - if (z__[i4 - 1] <= tol2 * d__) { - z__[i4 - 1] = -0.; - d__ = z__[i4 - 3]; - } else { - d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); - } -/* L50: */ - } - -/* dqd maps Z to ZZ plus Li's test. */ - - emin = z__[(i0 << 2) + pp + 1]; - d__ = z__[(i0 << 2) + pp - 3]; - i__1 = (n0 - 1 << 2) + pp; - for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { - z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; - if (z__[i4 - 1] <= tol2 * d__) { - z__[i4 - 1] = -0.; - z__[i4 - (pp << 1) - 2] = d__; - z__[i4 - (pp << 1)] = 0.; - d__ = z__[i4 + 1]; - } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && - safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { - temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; - z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; - d__ *= temp; - } else { - z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( - pp << 1) - 2]); - d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); - } -/* Computing MIN */ - d__1 = emin, d__2 = z__[i4 - (pp << 1)]; - emin = fmin(d__1,d__2); -/* L60: */ - } - z__[(n0 << 2) - pp - 2] = d__; - -/* Now find qmax. */ - - qmax = z__[(i0 << 2) - pp - 2]; - i__1 = (n0 << 2) - pp - 2; - for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { -/* Computing MAX */ - d__1 = qmax, d__2 = z__[i4]; - qmax = fmax(d__1,d__2); -/* L70: */ - } - -/* Prepare for the next iteration on K. */ - - pp = 1 - pp; -/* L80: */ - } - -/* Initialise variables to pass to ODSQ3. */ - - ttype = 0; - dmin1 = 0.; - dmin2 = 0.; - dn = 0.; - dn1 = 0.; - dn2 = 0.; - g = 0.; - tau = 0.; - - iter = 2; - nfail = 0; - ndiv = n0 - i0 << 1; - - i__1 = *n + 1; - for (iwhila = 1; iwhila <= i__1; ++iwhila) { - if (n0 < 1) { - goto L170; - } - -/* While array unfinished do */ - -/* E(N0) holds the value of SIGMA when submatrix in I0:N0 */ -/* splits from the rest of the array, but is negated. */ - - desig = 0.; - if (n0 == *n) { - sigma = 0.; - } else { - sigma = -z__[(n0 << 2) - 1]; - } - if (sigma < 0.) { - *info = 1; - return 0; - } - -/* Find last unreduced submatrix's top index I0, find QMAX and */ -/* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */ - - emax = 0.; - if (n0 > i0) { - emin = (d__1 = z__[(n0 << 2) - 5], fabs(d__1)); - } else { - emin = 0.; - } - qmin = z__[(n0 << 2) - 3]; - qmax = qmin; - for (i4 = n0 << 2; i4 >= 8; i4 += -4) { - if (z__[i4 - 5] <= 0.) { - goto L100; - } - if (qmin >= emax * 4.) { -/* Computing MIN */ - d__1 = qmin, d__2 = z__[i4 - 3]; - qmin = fmin(d__1,d__2); -/* Computing MAX */ - d__1 = emax, d__2 = z__[i4 - 5]; - emax = fmax(d__1,d__2); - } -/* Computing MAX */ - d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; - qmax = fmax(d__1,d__2); -/* Computing MIN */ - d__1 = emin, d__2 = z__[i4 - 5]; - emin = fmin(d__1,d__2); -/* L90: */ - } - i4 = 4; - -L100: - i0 = i4 / 4; - pp = 0; - - if (n0 - i0 > 1) { - dee = z__[(i0 << 2) - 3]; - deemin = dee; - kmin = i0; - i__2 = (n0 << 2) - 3; - for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) { - dee = z__[i4] * (dee / (dee + z__[i4 - 2])); - if (dee <= deemin) { - deemin = dee; - kmin = (i4 + 3) / 4; - } -/* L110: */ - } - if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * - .5) { - ipn4 = i0 + n0 << 2; - pp = 2; - i__2 = i0 + n0 - 1 << 1; - for (i4 = i0 << 2; i4 <= i__2; i4 += 4) { - temp = z__[i4 - 3]; - z__[i4 - 3] = z__[ipn4 - i4 - 3]; - z__[ipn4 - i4 - 3] = temp; - temp = z__[i4 - 2]; - z__[i4 - 2] = z__[ipn4 - i4 - 2]; - z__[ipn4 - i4 - 2] = temp; - temp = z__[i4 - 1]; - z__[i4 - 1] = z__[ipn4 - i4 - 5]; - z__[ipn4 - i4 - 5] = temp; - temp = z__[i4]; - z__[i4] = z__[ipn4 - i4 - 4]; - z__[ipn4 - i4 - 4] = temp; -/* L120: */ - } - } - } - -/* Put -(initial shift) into DMIN. */ - -/* Computing MAX */ - d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); - dmin__ = -fmax(d__1,d__2); - -/* Now I0:N0 is unreduced. */ -/* PP = 0 for ping, PP = 1 for pong. */ -/* PP = 2 indicates that flipping was applied to the Z array and */ -/* and that the tests for deflation upon entry in ODSQ3 */ -/* should not be performed. */ - - nbig = (n0 - i0 + 1) * 100; - i__2 = nbig; - for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { - if (i0 > n0) { - goto L150; - } - -/* While submatrix unfinished take a good dqds step. */ - - odsq3(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & - nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, & - dn1, &dn2, &g, &tau); - - pp = 1 - pp; - -/* When EMIN is very small check for splits. */ - - if (pp == 0 && n0 - i0 >= 3) { - if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * - sigma) { - splt = i0 - 1; - qmax = z__[(i0 << 2) - 3]; - emin = z__[(i0 << 2) - 1]; - oldemn = z__[i0 * 4]; - i__3 = n0 - 3 << 2; - for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { - if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= - tol2 * sigma) { - z__[i4 - 1] = -sigma; - splt = i4 / 4; - qmax = 0.; - emin = z__[i4 + 3]; - oldemn = z__[i4 + 4]; - } else { -/* Computing MAX */ - d__1 = qmax, d__2 = z__[i4 + 1]; - qmax = fmax(d__1,d__2); -/* Computing MIN */ - d__1 = emin, d__2 = z__[i4 - 1]; - emin = fmin(d__1,d__2); -/* Computing MIN */ - d__1 = oldemn, d__2 = z__[i4]; - oldemn = fmin(d__1,d__2); - } -/* L130: */ - } - z__[(n0 << 2) - 1] = emin; - z__[n0 * 4] = oldemn; - i0 = splt + 1; - } - } - -/* L140: */ - } - - *info = 2; - return 0; - -/* end IWHILB */ - -L150: - -/* L160: */ - ; - } - - *info = 3; - return 0; - -/* end IWHILA */ - -L170: - -/* Move q's to the front. */ - i__1 = *n; - for (k = 2; k <= i__1; ++k) { - z__[k] = z__[(k << 2) - 3]; -/* L180: */ - } - -/* Sort and compute sum of eigenvalues. */ - odsrt("D", n, &z__[1], &iinfo); - - e = 0.; - for (k = *n; k >= 1; --k) { - e += z__[k]; -/* L190: */ - } - -/* Store trace, sum(eigenvalues) and information on performance. */ - z__[(*n << 1) + 1] = trace; - z__[(*n << 1) + 2] = e; - z__[(*n << 1) + 3] = (double) iter; -/* Computing 2nd power */ - i__1 = *n; - z__[(*n << 1) + 4] = (double) ndiv / (double) (i__1 * i__1); - z__[(*n << 1) + 5] = nfail * 100. / (double) iter; - return 0; - -/* End of ODSQ2 */ - -} /* odsq2_ */ diff --git a/external/pmrrr/src/lapack/odsq3.c b/external/pmrrr/src/lapack/odsq3.c deleted file mode 100644 index 624522ddec..0000000000 --- a/external/pmrrr/src/lapack/odsq3.c +++ /dev/null @@ -1,344 +0,0 @@ -/* dlasq3.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ int odsq3(int *i0, int *n0, double *z__, - int *pp, double *dmin__, double *sigma, double *desig, - double *qmax, int *nfail, int *iter, int *ndiv, - int *ieee, int *ttype, double *dmin1, double *dmin2, - double *dn, double *dn1, double *dn2, double *g, - double *tau) -{ - /* System generated locals */ - int i__1; - double d__1, d__2; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - double s, t; - int j4, nn; - double eps, tol; - int n0in, ipn4; - double tol2, temp; - extern /* Subroutine */ int odsq4(int *, int *, double *, - int *, int *, double *, double *, double *, - double *, double *, double *, double *, int *, - double *), odsq5(int *, int *, double *, - int *, double *, double *, double *, double *, - double *, double *, double *, int *), odsq6( - int *, int *, double *, int *, double *, - double *, double *, double *, double *, - double *); - // extern double odmch(char *); - extern int odnan(double *); - - -/* -- LAPACK routine (version 3.2) -- */ - -/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ -/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ -/* -- Berkeley -- */ -/* -- November 2008 -- */ - -/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ -/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSQ3 checks for deflation, computes a shift (TAU) and calls dqds. */ -/* In case of failure it changes shifts, and tries again until output */ -/* is positive. */ - -/* Arguments */ -/* ========= */ - -/* I0 (input) INT */ -/* First index. */ - -/* N0 (input) INT */ -/* Last index. */ - -/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ -/* Z holds the qd array. */ - -/* PP (input/output) INT */ -/* PP=0 for ping, PP=1 for pong. */ -/* PP=2 indicates that flipping was applied to the Z array */ -/* and that the initial tests for deflation should not be */ -/* performed. */ - -/* DMIN (output) DOUBLE PRECISION */ -/* Minimum value of d. */ - -/* SIGMA (output) DOUBLE PRECISION */ -/* Sum of shifts used in current segment. */ - -/* DESIG (input/output) DOUBLE PRECISION */ -/* Lower order part of SIGMA */ - -/* QMAX (input) DOUBLE PRECISION */ -/* Maximum value of q. */ - -/* NFAIL (output) INT */ -/* Number of times shift was too big. */ - -/* ITER (output) INT */ -/* Number of iterations. */ - -/* NDIV (output) INT */ -/* Number of divisions. */ - -/* IEEE (input) INT */ -/* Flag for IEEE or non IEEE arithmetic (passed to ODSQ5). */ - -/* TTYPE (input/output) INT */ -/* Shift type. */ - -/* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */ -/* These are passed as arguments in order to save their values */ -/* between calls to ODSQ3. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. External Function .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - n0in = *n0; - eps = DBL_EPSILON; // odmch("Precision"); - tol = eps * 100.; -/* Computing 2nd power */ - d__1 = tol; - tol2 = d__1 * d__1; - -/* Check for deflation. */ - -L10: - - if (*n0 < *i0) { - return 0; - } - if (*n0 == *i0) { - goto L20; - } - nn = (*n0 << 2) + *pp; - if (*n0 == *i0 + 1) { - goto L40; - } - -/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ - - if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - - 4] > tol2 * z__[nn - 7]) { - goto L30; - } - -L20: - - z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; - --(*n0); - goto L10; - -/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ - -L30: - - if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ - nn - 11]) { - goto L50; - } - -L40: - - if (z__[nn - 3] > z__[nn - 7]) { - s = z__[nn - 3]; - z__[nn - 3] = z__[nn - 7]; - z__[nn - 7] = s; - } - if (z__[nn - 5] > z__[nn - 3] * tol2) { - t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; - s = z__[nn - 3] * (z__[nn - 5] / t); - if (s <= t) { - s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); - } else { - s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); - } - t = z__[nn - 7] + (s + z__[nn - 5]); - z__[nn - 3] *= z__[nn - 7] / t; - z__[nn - 7] = t; - } - z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; - z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; - *n0 += -2; - goto L10; - -L50: - if (*pp == 2) { - *pp = 0; - } - -/* Reverse the qd-array, if warranted. */ - - if (*dmin__ <= 0. || *n0 < n0in) { - if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { - ipn4 = *i0 + *n0 << 2; - i__1 = *i0 + *n0 - 1 << 1; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - temp = z__[j4 - 3]; - z__[j4 - 3] = z__[ipn4 - j4 - 3]; - z__[ipn4 - j4 - 3] = temp; - temp = z__[j4 - 2]; - z__[j4 - 2] = z__[ipn4 - j4 - 2]; - z__[ipn4 - j4 - 2] = temp; - temp = z__[j4 - 1]; - z__[j4 - 1] = z__[ipn4 - j4 - 5]; - z__[ipn4 - j4 - 5] = temp; - temp = z__[j4]; - z__[j4] = z__[ipn4 - j4 - 4]; - z__[ipn4 - j4 - 4] = temp; -/* L60: */ - } - if (*n0 - *i0 <= 4) { - z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; - z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; - } -/* Computing MIN */ - d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; - *dmin2 = fmin(d__1,d__2); -/* Computing MIN */ - d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] - , d__1 = fmin(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; - z__[(*n0 << 2) + *pp - 1] = fmin(d__1,d__2); -/* Computing MIN */ - d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = - fmin(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; - z__[(*n0 << 2) - *pp] = fmin(d__1,d__2); -/* Computing MAX */ - d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = fmax(d__1, - d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; - *qmax = fmax(d__1,d__2); - *dmin__ = -0.; - } - } - -/* Choose a shift. */ - - odsq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, - tau, ttype, g); - -/* Call dqds until DMIN > 0. */ - -L70: - - odsq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, - ieee); - - *ndiv += *n0 - *i0 + 2; - ++(*iter); - -/* Check status. */ - - if (*dmin__ >= 0. && *dmin1 > 0.) { - -/* Success. */ - - goto L90; - - } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol - * (*sigma + *dn1) && fabs(*dn) < tol * *sigma) { - -/* Convergence hidden by negative DN. */ - - z__[(*n0 - 1 << 2) - *pp + 2] = 0.; - *dmin__ = 0.; - goto L90; - } else if (*dmin__ < 0.) { - -/* TAU too big. Select new TAU and try again. */ - - ++(*nfail); - if (*ttype < -22) { - -/* Failed twice. Play it safe. */ - - *tau = 0.; - } else if (*dmin1 > 0.) { - -/* Late failure. Gives excellent shift. */ - - *tau = (*tau + *dmin__) * (1. - eps * 2.); - *ttype += -11; - } else { - -/* Early failure. Divide by 4. */ - - *tau *= .25; - *ttype += -12; - } - goto L70; - } else if (odnan(dmin__)) { - -/* NaN. */ - - if (*tau == 0.) { - goto L80; - } else { - *tau = 0.; - goto L70; - } - } else { - -/* Possible underflow. Play it safe. */ - - goto L80; - } - -/* Risk of underflow. */ - -L80: - odsq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); - *ndiv += *n0 - *i0 + 2; - ++(*iter); - *tau = 0.; - -L90: - if (*tau < *sigma) { - *desig += *tau; - t = *sigma + *desig; - *desig -= t - *sigma; - } else { - t = *sigma + *tau; - *desig = *sigma - (t - *tau) + *desig; - } - *sigma = t; - - return 0; - -/* End of ODSQ3 */ - -} /* odsq3_ */ diff --git a/external/pmrrr/src/lapack/odsq4.c b/external/pmrrr/src/lapack/odsq4.c deleted file mode 100644 index 6fef7a1a16..0000000000 --- a/external/pmrrr/src/lapack/odsq4.c +++ /dev/null @@ -1,398 +0,0 @@ -/* dlasq4.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odsq4(int *i0, int *n0, double *z__, - int *pp, int *n0in, double *dmin__, double *dmin1, - double *dmin2, double *dn, double *dn1, double *dn2, - double *tau, int *ttype, double *g) -{ - /* System generated locals */ - int i__1; - double d__1, d__2; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - double s, a2, b1, b2; - int i4, nn, np; - double gam, gap1, gap2; - - -/* -- LAPACK routine (version 3.2) -- */ - -/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ -/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ -/* -- Berkeley -- */ -/* -- November 2008 -- */ - -/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ -/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSQ4 computes an approximation TAU to the smallest eigenvalue */ -/* using values of d from the previous transform. */ - -/* I0 (input) INT */ -/* First index. */ - -/* N0 (input) INT */ -/* Last index. */ - -/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ -/* Z holds the qd array. */ - -/* PP (input) INT */ -/* PP=0 for ping, PP=1 for pong. */ - -/* NOIN (input) INT */ -/* The value of N0 at start of EIGTEST. */ - -/* DMIN (input) DOUBLE PRECISION */ -/* Minimum value of d. */ - -/* DMIN1 (input) DOUBLE PRECISION */ -/* Minimum value of d, excluding D( N0 ). */ - -/* DMIN2 (input) DOUBLE PRECISION */ -/* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ - -/* DN (input) DOUBLE PRECISION */ -/* d(N) */ - -/* DN1 (input) DOUBLE PRECISION */ -/* d(N-1) */ - -/* DN2 (input) DOUBLE PRECISION */ -/* d(N-2) */ - -/* TAU (output) DOUBLE PRECISION */ -/* This is the shift. */ - -/* TTYPE (output) INT */ -/* Shift type. */ - -/* G (input/output) REAL */ -/* G is passed as an argument in order to save its value between */ -/* calls to ODSQ4. */ - -/* Further Details */ -/* =============== */ -/* CNST1 = 9/16 */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* A negative DMIN forces the shift to take that absolute value */ -/* TTYPE records the type of shift. */ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - if (*dmin__ <= 0.) { - *tau = -(*dmin__); - *ttype = -1; - return 0; - } - - nn = (*n0 << 2) + *pp; - if (*n0in == *n0) { - -/* No eigenvalues deflated. */ - - if (*dmin__ == *dn || *dmin__ == *dn1) { - - b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); - b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); - a2 = z__[nn - 7] + z__[nn - 5]; - -/* Cases 2 and 3. */ - - if (*dmin__ == *dn && *dmin1 == *dn1) { - gap2 = *dmin2 - a2 - *dmin2 * .25; - if (gap2 > 0. && gap2 > b2) { - gap1 = a2 - *dn - b2 / gap2 * b2; - } else { - gap1 = a2 - *dn - (b1 + b2); - } - if (gap1 > 0. && gap1 > b1) { -/* Computing MAX */ - d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; - s = fmax(d__1,d__2); - *ttype = -2; - } else { - s = 0.; - if (*dn > b1) { - s = *dn - b1; - } - if (a2 > b1 + b2) { -/* Computing MIN */ - d__1 = s, d__2 = a2 - (b1 + b2); - s = fmin(d__1,d__2); - } -/* Computing MAX */ - d__1 = s, d__2 = *dmin__ * .333; - s = fmax(d__1,d__2); - *ttype = -3; - } - } else { - -/* Case 4. */ - - *ttype = -4; - s = *dmin__ * .25; - if (*dmin__ == *dn) { - gam = *dn; - a2 = 0.; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b2 = z__[nn - 5] / z__[nn - 7]; - np = nn - 9; - } else { - np = nn - (*pp << 1); - b2 = z__[np - 2]; - gam = *dn1; - if (z__[np - 4] > z__[np - 2]) { - return 0; - } - a2 = z__[np - 4] / z__[np - 2]; - if (z__[nn - 9] > z__[nn - 11]) { - return 0; - } - b2 = z__[nn - 9] / z__[nn - 11]; - np = nn - 13; - } - -/* Approximate contribution to norm squared from I < NN-1. */ - - a2 += b2; - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = np; i4 >= i__1; i4 += -4) { - if (b2 == 0.) { - goto L20; - } - b1 = b2; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b2 *= z__[i4] / z__[i4 - 2]; - a2 += b2; - if (fmax(b2,b1) * 100. < a2 || .563 < a2) { - goto L20; - } -/* L10: */ - } -L20: - a2 *= 1.05; - -/* Rayleigh quotient residual bound. */ - - if (a2 < .563) { - s = gam * (1. - sqrt(a2)) / (a2 + 1.); - } - } - } else if (*dmin__ == *dn2) { - -/* Case 5. */ - - *ttype = -5; - s = *dmin__ * .25; - -/* Compute contribution to norm squared from I > NN-2. */ - - np = nn - (*pp << 1); - b1 = z__[np - 2]; - b2 = z__[np - 6]; - gam = *dn2; - if (z__[np - 8] > b2 || z__[np - 4] > b1) { - return 0; - } - a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); - -/* Approximate contribution to norm squared from I < NN-2. */ - - if (*n0 - *i0 > 2) { - b2 = z__[nn - 13] / z__[nn - 15]; - a2 += b2; - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = nn - 17; i4 >= i__1; i4 += -4) { - if (b2 == 0.) { - goto L40; - } - b1 = b2; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b2 *= z__[i4] / z__[i4 - 2]; - a2 += b2; - if (fmax(b2,b1) * 100. < a2 || .563 < a2) { - goto L40; - } -/* L30: */ - } -L40: - a2 *= 1.05; - } - - if (a2 < .563) { - s = gam * (1. - sqrt(a2)) / (a2 + 1.); - } - } else { - -/* Case 6, no information to guide us. */ - - if (*ttype == -6) { - *g += (1. - *g) * .333; - } else if (*ttype == -18) { - *g = .083250000000000005; - } else { - *g = .25; - } - s = *g * *dmin__; - *ttype = -6; - } - - } else if (*n0in == *n0 + 1) { - -/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ - - if (*dmin1 == *dn1 && *dmin2 == *dn2) { - -/* Cases 7 and 8. */ - - *ttype = -7; - s = *dmin1 * .333; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b1 = z__[nn - 5] / z__[nn - 7]; - b2 = b1; - if (b2 == 0.) { - goto L60; - } - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { - a2 = b1; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b1 *= z__[i4] / z__[i4 - 2]; - b2 += b1; - if (fmax(b1,a2) * 100. < b2) { - goto L60; - } -/* L50: */ - } -L60: - b2 = sqrt(b2 * 1.05); -/* Computing 2nd power */ - d__1 = b2; - a2 = *dmin1 / (d__1 * d__1 + 1.); - gap2 = *dmin2 * .5 - a2; - if (gap2 > 0. && gap2 > b2 * a2) { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); - s = fmax(d__1,d__2); - } else { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - b2 * 1.01); - s = fmax(d__1,d__2); - *ttype = -8; - } - } else { - -/* Case 9. */ - - s = *dmin1 * .25; - if (*dmin1 == *dn1) { - s = *dmin1 * .5; - } - *ttype = -9; - } - - } else if (*n0in == *n0 + 2) { - -/* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */ - -/* Cases 10 and 11. */ - - if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { - *ttype = -10; - s = *dmin2 * .333; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b1 = z__[nn - 5] / z__[nn - 7]; - b2 = b1; - if (b2 == 0.) { - goto L80; - } - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b1 *= z__[i4] / z__[i4 - 2]; - b2 += b1; - if (b1 * 100. < b2) { - goto L80; - } -/* L70: */ - } -L80: - b2 = sqrt(b2 * 1.05); -/* Computing 2nd power */ - d__1 = b2; - a2 = *dmin2 / (d__1 * d__1 + 1.); - gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ - nn - 9]) - a2; - if (gap2 > 0. && gap2 > b2 * a2) { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); - s = fmax(d__1,d__2); - } else { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - b2 * 1.01); - s = fmax(d__1,d__2); - } - } else { - s = *dmin2 * .25; - *ttype = -11; - } - } else if (*n0in > *n0 + 2) { - -/* Case 12, more than two eigenvalues deflated. No information. */ - - s = 0.; - *ttype = -12; - } - - *tau = s; - return 0; - -/* End of ODSQ4 */ - -} /* odsq4_ */ diff --git a/external/pmrrr/src/lapack/odsq5.c b/external/pmrrr/src/lapack/odsq5.c deleted file mode 100644 index 47dd5a9d9c..0000000000 --- a/external/pmrrr/src/lapack/odsq5.c +++ /dev/null @@ -1,235 +0,0 @@ -/* dlasq5.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odsq5(int *i0, int *n0, double *z__, - int *pp, double *tau, double *dmin__, double *dmin1, - double *dmin2, double *dn, double *dnm1, double *dnm2, - int *ieee) -{ - /* System generated locals */ - int i__1; - double d__1, d__2; - - /* Local variables */ - double d__; - int j4, j4p2; - double emin, temp; - - -/* -- LAPACK routine (version 3.2) -- */ - -/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ -/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ -/* -- Berkeley -- */ -/* -- November 2008 -- */ - -/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ -/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSQ5 computes one dqds transform in ping-pong form, one */ -/* version for IEEE machines another for non IEEE machines. */ - -/* Arguments */ -/* ========= */ - -/* I0 (input) INT */ -/* First index. */ - -/* N0 (input) INT */ -/* Last index. */ - -/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ -/* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ -/* an extra argument. */ - -/* PP (input) INT */ -/* PP=0 for ping, PP=1 for pong. */ - -/* TAU (input) DOUBLE PRECISION */ -/* This is the shift. */ - -/* DMIN (output) DOUBLE PRECISION */ -/* Minimum value of d. */ - -/* DMIN1 (output) DOUBLE PRECISION */ -/* Minimum value of d, excluding D( N0 ). */ - -/* DMIN2 (output) DOUBLE PRECISION */ -/* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ - -/* DN (output) DOUBLE PRECISION */ -/* d(N0), the last value of d. */ - -/* DNM1 (output) DOUBLE PRECISION */ -/* d(N0-1). */ - -/* DNM2 (output) DOUBLE PRECISION */ -/* d(N0-2). */ - -/* IEEE (input) INT */ -/* Flag for IEEE or non IEEE arithmetic. */ - -/* ===================================================================== */ - -/* .. Parameter .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - if (*n0 - *i0 - 1 <= 0) { - return 0; - } - - j4 = (*i0 << 2) + *pp - 3; - emin = z__[j4 + 4]; - d__ = z__[j4] - *tau; - *dmin__ = d__; - *dmin1 = -z__[j4]; - - if (*ieee) { - -/* Code for IEEE arithmetic. */ - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - temp = z__[j4 + 1] / z__[j4 - 2]; - d__ = d__ * temp - *tau; - *dmin__ = fmin(*dmin__,d__); - z__[j4] = z__[j4 - 1] * temp; -/* Computing MIN */ - d__1 = z__[j4]; - emin = fmin(d__1,emin); -/* L10: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - temp = z__[j4 + 2] / z__[j4 - 3]; - d__ = d__ * temp - *tau; - *dmin__ = fmin(*dmin__,d__); - z__[j4 - 1] = z__[j4] * temp; -/* Computing MIN */ - d__1 = z__[j4 - 1]; - emin = fmin(d__1,emin); -/* L20: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; - *dmin__ = fmin(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; - *dmin__ = fmin(*dmin__,*dn); - - } else { - -/* Code for non IEEE arithmetic. */ - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - if (d__ < 0.) { - return 0; - } else { - z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); - d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; - } - *dmin__ = fmin(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4]; - emin = fmin(d__1,d__2); -/* L30: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - if (d__ < 0.) { - return 0; - } else { - z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); - d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; - } - *dmin__ = fmin(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4 - 1]; - emin = fmin(d__1,d__2); -/* L40: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - if (*dnm2 < 0.) { - return 0; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; - } - *dmin__ = fmin(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - if (*dnm1 < 0.) { - return 0; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; - } - *dmin__ = fmin(*dmin__,*dn); - - } - - z__[j4 + 2] = *dn; - z__[(*n0 << 2) - *pp] = emin; - return 0; - -/* End of ODSQ5 */ - -} /* odsq5_ */ diff --git a/external/pmrrr/src/lapack/odsq6.c b/external/pmrrr/src/lapack/odsq6.c deleted file mode 100644 index f9822e86ee..0000000000 --- a/external/pmrrr/src/lapack/odsq6.c +++ /dev/null @@ -1,207 +0,0 @@ -/* dlasq6.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odsq6(int *i0, int *n0, double *z__, - int *pp, double *dmin__, double *dmin1, double *dmin2, - double *dn, double *dnm1, double *dnm2) -{ - /* System generated locals */ - int i__1; - double d__1, d__2; - - /* Local variables */ - double d__; - int j4, j4p2; - double emin, temp; - // extern double odmch(char *); - double safmin; - - -/* -- LAPACK routine (version 3.2) -- */ - -/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ -/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ -/* -- Berkeley -- */ -/* -- November 2008 -- */ - -/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ -/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSQ6 computes one dqd (shift equal to zero) transform in */ -/* ping-pong form, with protection against underflow and overflow. */ - -/* Arguments */ -/* ========= */ - -/* I0 (input) INT */ -/* First index. */ - -/* N0 (input) INT */ -/* Last index. */ - -/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ -/* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ -/* an extra argument. */ - -/* PP (input) INT */ -/* PP=0 for ping, PP=1 for pong. */ - -/* DMIN (output) DOUBLE PRECISION */ -/* Minimum value of d. */ - -/* DMIN1 (output) DOUBLE PRECISION */ -/* Minimum value of d, excluding D( N0 ). */ - -/* DMIN2 (output) DOUBLE PRECISION */ -/* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ - -/* DN (output) DOUBLE PRECISION */ -/* d(N0), the last value of d. */ - -/* DNM1 (output) DOUBLE PRECISION */ -/* d(N0-1). */ - -/* DNM2 (output) DOUBLE PRECISION */ -/* d(N0-2). */ - -/* ===================================================================== */ - -/* .. Parameter .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. External Function .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - if (*n0 - *i0 - 1 <= 0) { - return 0; - } - - safmin = DBL_MIN; // odmch("Safe minimum"); - j4 = (*i0 << 2) + *pp - 3; - emin = z__[j4 + 4]; - d__ = z__[j4]; - *dmin__ = d__; - - if (*pp == 0) { - i__1 = (*n0-3) << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - if (z__[j4 - 2] == 0.) { - z__[j4] = 0.; - d__ = z__[j4 + 1]; - *dmin__ = d__; - emin = 0.; - } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 - - 2] < z__[j4 + 1]) { - temp = z__[j4 + 1] / z__[j4 - 2]; - z__[j4] = z__[j4 - 1] * temp; - d__ *= temp; - } else { - z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); - d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); - } - *dmin__ = fmin(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4]; - emin = fmin(d__1,d__2); -/* L10: */ - } - } else { - i__1 = (*n0-3) << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - if (z__[j4 - 3] == 0.) { - z__[j4 - 1] = 0.; - d__ = z__[j4 + 2]; - *dmin__ = d__; - emin = 0.; - } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 - - 3] < z__[j4 + 2]) { - temp = z__[j4 + 2] / z__[j4 - 3]; - z__[j4 - 1] = z__[j4] * temp; - d__ *= temp; - } else { - z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); - d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); - } - *dmin__ = fmin(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4 - 1]; - emin = fmin(d__1,d__2); -/* L20: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = ((*n0-2) << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - if (z__[j4 - 2] == 0.) { - z__[j4] = 0.; - *dnm1 = z__[j4p2 + 2]; - *dmin__ = *dnm1; - emin = 0.; - } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < - z__[j4p2 + 2]) { - temp = z__[j4p2 + 2] / z__[j4 - 2]; - z__[j4] = z__[j4p2] * temp; - *dnm1 = *dnm2 * temp; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); - } - *dmin__ = fmin(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - if (z__[j4 - 2] == 0.) { - z__[j4] = 0.; - *dn = z__[j4p2 + 2]; - *dmin__ = *dn; - emin = 0.; - } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < - z__[j4p2 + 2]) { - temp = z__[j4p2 + 2] / z__[j4 - 2]; - z__[j4] = z__[j4p2] * temp; - *dn = *dnm1 * temp; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); - } - *dmin__ = fmin(*dmin__,*dn); - - z__[j4 + 2] = *dn; - z__[(*n0 << 2) - *pp] = emin; - return 0; - -/* End of ODSQ6 */ - -} /* odsq6_ */ diff --git a/external/pmrrr/src/lapack/odsrt.c b/external/pmrrr/src/lapack/odsrt.c deleted file mode 100644 index 03adb04dc8..0000000000 --- a/external/pmrrr/src/lapack/odsrt.c +++ /dev/null @@ -1,280 +0,0 @@ -/* dlasrt.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odsrt(char *id, int *n, double *d__, int *info) -{ - /* System generated locals */ - int i__1, i__2; - - /* Local variables */ - int i__, j; - double d1, d2, d3; - int dir; - double tmp; - int endd; - extern int olsame(char *, char *); - int stack[64] /* was [2][32] */; - double dmnmx; - int start; - extern /* Subroutine */ int oerbla(char *, int *); - int stkpnt; - - -/* -- LAPACK routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* Sort the numbers in D in increasing order (if ID = 'I') or */ -/* in decreasing order (if ID = 'D' ). */ - -/* Use Quick Sort, reverting to Insertion sort on arrays of */ -/* size <= 20. Dimension of STACK limits N to about 2**32. */ - -/* Arguments */ -/* ========= */ - -/* ID (input) CHARACTER*1 */ -/* = 'I': sort D in increasing order; */ -/* = 'D': sort D in decreasing order. */ - -/* N (input) INT */ -/* The length of the array D. */ - -/* D (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the array to be sorted. */ -/* On exit, D has been sorted into increasing order */ -/* (D(1) <= ... <= D(N) ) or into decreasing order */ -/* (D(1) >= ... >= D(N) ), depending on ID. */ - -/* INFO (output) INT */ -/* = 0: successful exit */ -/* < 0: if INFO = -i, the i-th argument had an illegal value */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Local Arrays .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* Test the input paramters. */ - - /* Parameter adjustments */ - --d__; - - /* Function Body */ - *info = 0; - dir = -1; - if (olsame(id, "D")) { - dir = 0; - } else if (olsame(id, "I")) { - dir = 1; - } - if (dir == -1) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } - if (*info != 0) { - i__1 = -(*info); - oerbla("ODSRT", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 1) { - return 0; - } - - stkpnt = 1; - stack[0] = 1; - stack[1] = *n; -L10: - start = stack[(stkpnt << 1) - 2]; - endd = stack[(stkpnt << 1) - 1]; - --stkpnt; - if (endd - start <= 20 && endd - start > 0) { - -/* Do Insertion sort on D( START:ENDD ) */ - - if (dir == 0) { - -/* Sort into decreasing order */ - - i__1 = endd; - for (i__ = start + 1; i__ <= i__1; ++i__) { - i__2 = start + 1; - for (j = i__; j >= i__2; --j) { - if (d__[j] > d__[j - 1]) { - dmnmx = d__[j]; - d__[j] = d__[j - 1]; - d__[j - 1] = dmnmx; - } else { - goto L30; - } -/* L20: */ - } -L30: - ; - } - - } else { - -/* Sort into increasing order */ - - i__1 = endd; - for (i__ = start + 1; i__ <= i__1; ++i__) { - i__2 = start + 1; - for (j = i__; j >= i__2; --j) { - if (d__[j] < d__[j - 1]) { - dmnmx = d__[j]; - d__[j] = d__[j - 1]; - d__[j - 1] = dmnmx; - } else { - goto L50; - } -/* L40: */ - } -L50: - ; - } - - } - - } else if (endd - start > 20) { - -/* Partition D( START:ENDD ) and stack parts, largest one first */ - -/* Choose partition entry as median of 3 */ - - d1 = d__[start]; - d2 = d__[endd]; - i__ = (start + endd) / 2; - d3 = d__[i__]; - if (d1 < d2) { - if (d3 < d1) { - dmnmx = d1; - } else if (d3 < d2) { - dmnmx = d3; - } else { - dmnmx = d2; - } - } else { - if (d3 < d2) { - dmnmx = d2; - } else if (d3 < d1) { - dmnmx = d3; - } else { - dmnmx = d1; - } - } - - if (dir == 0) { - -/* Sort into decreasing order */ - - i__ = start - 1; - j = endd + 1; -L60: -L70: - --j; - if (d__[j] < dmnmx) { - goto L70; - } -L80: - ++i__; - if (d__[i__] > dmnmx) { - goto L80; - } - if (i__ < j) { - tmp = d__[i__]; - d__[i__] = d__[j]; - d__[j] = tmp; - goto L60; - } - if (j - start > endd - j - 1) { - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - } else { - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - } - } else { - -/* Sort into increasing order */ - - i__ = start - 1; - j = endd + 1; -L90: -L100: - --j; - if (d__[j] > dmnmx) { - goto L100; - } -L110: - ++i__; - if (d__[i__] < dmnmx) { - goto L110; - } - if (i__ < j) { - tmp = d__[i__]; - d__[i__] = d__[j]; - d__[j] = tmp; - goto L90; - } - if (j - start > endd - j - 1) { - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - } else { - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - } - } - } - if (stkpnt > 0) { - goto L10; - } - return 0; - -/* End of ODSRT */ - -} /* odsrt_ */ diff --git a/external/pmrrr/src/lapack/odssq.c b/external/pmrrr/src/lapack/odssq.c deleted file mode 100644 index 5da0ce9a7c..0000000000 --- a/external/pmrrr/src/lapack/odssq.c +++ /dev/null @@ -1,109 +0,0 @@ -/* dlassq.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int odssq(int *n, double *x, int *incx, double *scale, double *sumsq) -{ - /* System generated locals */ - int i__1, i__2; - double d__1; - - /* Local variables */ - int ix; - double absxi; - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSSQ returns the values scl and smsq such that */ - -/* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ - -/* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is */ -/* assumed to be non-negative and scl returns the value */ - -/* scl = max( scale, abs( x( i ) ) ). */ - -/* scale and sumsq must be supplied in SCALE and SUMSQ and */ -/* scl and smsq are overwritten on SCALE and SUMSQ respectively. */ - -/* The routine makes only one pass through the vector x. */ - -/* Arguments */ -/* ========= */ - -/* N (input) INT */ -/* The number of elements to be used from the vector X. */ - -/* X (input) DOUBLE PRECISION array, dimension (N) */ -/* The vector for which a scaled sum of squares is computed. */ -/* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ - -/* INCX (input) INT */ -/* The increment between successive values of the vector X. */ -/* INCX > 0. */ - -/* SCALE (input/output) DOUBLE PRECISION */ -/* On entry, the value scale in the equation above. */ -/* On exit, SCALE is overwritten with scl , the scaling factor */ -/* for the sum of squares. */ - -/* SUMSQ (input/output) DOUBLE PRECISION */ -/* On entry, the value sumsq in the equation above. */ -/* On exit, SUMSQ is overwritten with smsq , the basic sum of */ -/* squares from which scl has been factored out. */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n > 0) { - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - if (x[ix] != 0.) { - absxi = (d__1 = x[ix], fabs(d__1)); - if (*scale < absxi) { -/* Computing 2nd power */ - d__1 = *scale / absxi; - *sumsq = *sumsq * (d__1 * d__1) + 1; - *scale = absxi; - } else { -/* Computing 2nd power */ - d__1 = absxi / *scale; - *sumsq += d__1 * d__1; - } - } -/* L10: */ - } - } - return 0; - -/* End of ODSSQ */ - -} /* odssq_ */ diff --git a/external/pmrrr/src/lapack/odstmr.c b/external/pmrrr/src/lapack/odstmr.c deleted file mode 100644 index 1c159ef7ea..0000000000 --- a/external/pmrrr/src/lapack/odstmr.c +++ /dev/null @@ -1,726 +0,0 @@ -/* dstemr.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Table of constant values */ -static int c__1 = 1; -static double c_b18 = .001; -#define TRUE_ (1) -#define FALSE_ (0) -#define iabs(a) ( (a) > (0) ? (a) : (-a) ) - - -/* Subroutine */ -int odstmr(char *jobz, char *range, int *n, double *d__, - double *e, double *vl, double *vu, int *il, - int *iu, int *m, double *w, double *z__, int *ldz, - int *nzc, int *isuppz, int *tryrac, double *work, - int *lwork, int *iwork, int *liwork, int *info) -{ - /* System generated locals */ - int z_dim1, z_offset, i__1, i__2; - double d__1, d__2; - - /* Builtin functions */ - // double sqrt(double); - - /* Local variables */ - int i__, j; - double r1, r2; - int jj; - double cs; - int in; - double sn, wl, wu; - int iil, iiu; - double eps, tmp; - int indd, iend, jblk, wend; - double rmin, rmax; - int itmp; - double tnrm; - extern /* Subroutine */ int ode2(double *, double *, double - *, double *, double *); - int inde2, itmp2; - double rtol1, rtol2; - extern /* Subroutine */ int odscal(int *, double *, double *, - int *); - double scale; - int indgp; - extern int olsame(char *, char *); - int iinfo, iindw, ilast; - extern /* Subroutine */ int odcpy(int *, double *, int *, - double *, int *), odswap(int *, double *, int - *, double *, int *); - int lwmin; - int wantz; - extern /* Subroutine */ int odev2(double *, double *, - double *, double *, double *, double *, - double *); - // extern double odmch(char *); - int alleig; - int ibegin; - int indeig; - int iindbl; - int valeig; - extern /* Subroutine */ int odrrc(char *, int *, double *, - double *, double *, double *, double *, int *, - int *, int *, int *), odrre(char *, - int *, double *, double *, int *, int *, - double *, double *, double *, double *, - double *, double *, int *, int *, int *, - double *, double *, double *, int *, int *, - double *, double *, double *, int *, int *); - int wbegin; - double safmin; - extern /* Subroutine */ int odrrj(int *, double *, double *, - int *, int *, double *, int *, double *, - double *, double *, int *, double *, double *, - int *), oerbla(char *, int *); - double bignum; - int inderr, iindwk, indgrs, offset; - extern double odnst(char *, int *, double *, double *); - extern /* Subroutine */ int odrrr(int *, double *, double *, - int *), odrrv(int *, double *, double *, - double *, double *, double *, int *, int *, - int *, int *, double *, double *, double *, - double *, double *, double *, int *, int *, - double *, double *, int *, int *, double *, - int *, int *), odsrt(char *, int *, double *, - int *); - double thresh; - int iinspl, ifirst, indwrk, liwmin, nzcmin; - double pivmin; - int nsplit; - double smlnum; - int lquery, zquery; - - -/* -- LAPACK computational routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ -/* .. Array Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* ODSTMR computes selected eigenvalues and, optionally, eigenvectors */ -/* of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */ -/* a well defined set of pairwise different real eigenvalues, the corresponding */ -/* real eigenvectors are pairwise orthogonal. */ - -/* The spectrum may be computed either completely or partially by specifying */ -/* either an interval (VL,VU] or a range of indices IL:IU for the desired */ -/* eigenvalues. */ - -/* Depending on the number of desired eigenvalues, these are computed either */ -/* by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are */ -/* computed by the use of various suitable L D L^T factorizations near clusters */ -/* of close eigenvalues (referred to as RRRs, Relatively Robust */ -/* Representations). An informal sketch of the algorithm follows. */ - -/* For each unreduced block (submatrix) of T, */ -/* (a) Compute T - sigma I = L D L^T, so that L and D */ -/* define all the wanted eigenvalues to high relative accuracy. */ -/* This means that small relative changes in the entries of D and L */ -/* cause only small relative changes in the eigenvalues and */ -/* eigenvectors. The standard (unfactored) representation of the */ -/* tridiagonal matrix T does not have this property in general. */ -/* (b) Compute the eigenvalues to suitable accuracy. */ -/* If the eigenvectors are desired, the algorithm attains full */ -/* accuracy of the computed eigenvalues only right before */ -/* the corresponding vectors have to be computed, see steps c) and d). */ -/* (c) For each cluster of close eigenvalues, select a new */ -/* shift close to the cluster, find a new factorization, and refine */ -/* the shifted eigenvalues to suitable accuracy. */ -/* (d) For each eigenvalue with a large enough relative separation compute */ -/* the corresponding eigenvector by forming a rank revealing twisted */ -/* factorization. Go back to (c) for any clusters that remain. */ - -/* For more details, see: */ -/* - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ -/* to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ -/* Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ -/* - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ -/* Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ -/* 2004. Also LAPACK Working Note 154. */ -/* - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ -/* tridiagonal eigenvalue/eigenvector problem", */ -/* Computer Science Division Technical Report No. UCB/CSD-97-971, */ -/* UC Berkeley, May 1997. */ - -/* Notes: */ -/* 1.ODSTMR works only on machines which follow IEEE-754 */ -/* floating-point standard in their handling of infinities and NaNs. */ -/* This permits the use of efficient inner loops avoiding a check for */ -/* zero divisors. */ - -/* Arguments */ -/* ========= */ - -/* JOBZ (input) CHARACTER*1 */ -/* = 'N': Compute eigenvalues only; */ -/* = 'V': Compute eigenvalues and eigenvectors. */ - -/* RANGE (input) CHARACTER*1 */ -/* = 'A': all eigenvalues will be found. */ -/* = 'V': all eigenvalues in the half-open interval (VL,VU] */ -/* will be found. */ -/* = 'I': the IL-th through IU-th eigenvalues will be found. */ - -/* N (input) INT */ -/* The order of the matrix. N >= 0. */ - -/* D (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the N diagonal elements of the tridiagonal matrix */ -/* T. On exit, D is overwritten. */ - -/* E (input/output) DOUBLE PRECISION array, dimension (N) */ -/* On entry, the (N-1) subdiagonal elements of the tridiagonal */ -/* matrix T in elements 1 to N-1 of E. E(N) need not be set on */ -/* input, but is used internally as workspace. */ -/* On exit, E is overwritten. */ - -/* VL (input) DOUBLE PRECISION */ -/* VU (input) DOUBLE PRECISION */ -/* If RANGE='V', the lower and upper bounds of the interval to */ -/* be searched for eigenvalues. VL < VU. */ -/* Not referenced if RANGE = 'A' or 'I'. */ - -/* IL (input) INT */ -/* IU (input) INT */ -/* If RANGE='I', the indices (in ascending order) of the */ -/* smallest and largest eigenvalues to be returned. */ -/* 1 <= IL <= IU <= N, if N > 0. */ -/* Not referenced if RANGE = 'A' or 'V'. */ - -/* M (output) INT */ -/* The total number of eigenvalues found. 0 <= M <= N. */ -/* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ - -/* W (output) DOUBLE PRECISION array, dimension (N) */ -/* The first M elements contain the selected eigenvalues in */ -/* ascending order. */ - -/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ -/* If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */ -/* contain the orthonormal eigenvectors of the matrix T */ -/* corresponding to the selected eigenvalues, with the i-th */ -/* column of Z holding the eigenvector associated with W(i). */ -/* If JOBZ = 'N', then Z is not referenced. */ -/* Note: the user must ensure that at least max(1,M) columns are */ -/* supplied in the array Z; if RANGE = 'V', the exact value of M */ -/* is not known in advance and can be computed with a workspace */ -/* query by setting NZC = -1, see below. */ - -/* LDZ (input) INT */ -/* The leading dimension of the array Z. LDZ >= 1, and if */ -/* JOBZ = 'V', then LDZ >= max(1,N). */ - -/* NZC (input) INT */ -/* The number of eigenvectors to be held in the array Z. */ -/* If RANGE = 'A', then NZC >= max(1,N). */ -/* If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. */ -/* If RANGE = 'I', then NZC >= IU-IL+1. */ -/* If NZC = -1, then a workspace query is assumed; the */ -/* routine calculates the number of columns of the array Z that */ -/* are needed to hold the eigenvectors. */ -/* This value is returned as the first entry of the Z array, and */ -/* no error message related to NZC is issued by OERBLA. */ - -/* ISUPPZ (output) INT ARRAY, dimension ( 2*max(1,M) ) */ -/* The support of the eigenvectors in Z, i.e., the indices */ -/* indicating the nonzero elements in Z. The i-th computed eigenvector */ -/* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ -/* ISUPPZ( 2*i ). This is relevant in the case when the matrix */ -/* is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */ - -/* TRYRAC (input/output) INT */ -/* If TRYRAC.EQ..TRUE., indicates that the code should check whether */ -/* the tridiagonal matrix defines its eigenvalues to high relative */ -/* accuracy. If so, the code uses relative-accuracy preserving */ -/* algorithms that might be (a bit) slower depending on the matrix. */ -/* If the matrix does not define its eigenvalues to high relative */ -/* accuracy, the code can uses possibly faster algorithms. */ -/* If TRYRAC.EQ..FALSE., the code is not required to guarantee */ -/* relatively accurate eigenvalues and can use the fastest possible */ -/* techniques. */ -/* On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix */ -/* does not define its eigenvalues to high relative accuracy. */ - -/* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ -/* On exit, if INFO = 0, WORK(1) returns the optimal */ -/* (and minimal) LWORK. */ - -/* LWORK (input) INT */ -/* The dimension of the array WORK. LWORK >= max(1,18*N) */ -/* if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */ -/* If LWORK = -1, then a workspace query is assumed; the routine */ -/* only calculates the optimal size of the WORK array, returns */ -/* this value as the first entry of the WORK array, and no error */ -/* message related to LWORK is issued by OERBLA. */ - -/* IWORK (workspace/output) INT array, dimension (LIWORK) */ -/* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ - -/* LIWORK (input) INT */ -/* The dimension of the array IWORK. LIWORK >= max(1,10*N) */ -/* if the eigenvectors are desired, and LIWORK >= max(1,8*N) */ -/* if only the eigenvalues are to be computed. */ -/* If LIWORK = -1, then a workspace query is assumed; the */ -/* routine only calculates the optimal size of the IWORK array, */ -/* returns this value as the first entry of the IWORK array, and */ -/* no error message related to LIWORK is issued by OERBLA. */ - -/* INFO (output) INT */ -/* On exit, INFO */ -/* = 0: successful exit */ -/* < 0: if INFO = -i, the i-th argument had an illegal value */ -/* > 0: if INFO = 1X, internal error in ODRRE, */ -/* if INFO = 2X, internal error in ODRRV. */ -/* Here, the digit X = ABS( IINFO ) < 10, where IINFO is */ -/* the nonzero error code returned by ODRRE or */ -/* ODRRV, respectively. */ - - -/* Further Details */ -/* =============== */ - -/* Based on contributions by */ -/* Beresford Parlett, University of California, Berkeley, USA */ -/* Jim Demmel, University of California, Berkeley, USA */ -/* Inderjit Dhillon, University of Texas, Austin, USA */ -/* Osni Marques, LBNL/NERSC, USA */ -/* Christof Voemel, University of California, Berkeley, USA */ - -/* ===================================================================== */ - -/* .. Parameters .. */ -/* .. */ -/* .. Local Scalars .. */ -/* .. */ -/* .. */ -/* .. External Functions .. */ -/* .. */ -/* .. External Subroutines .. */ -/* .. */ -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - -/* Test the input parameters. */ - - /* Parameter adjustments */ - --d__; - --e; - --w; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --isuppz; - --work; - --iwork; - - /* Function Body */ - wantz = olsame(jobz, "V"); - alleig = olsame(range, "A"); - valeig = olsame(range, "V"); - indeig = olsame(range, "I"); - - lquery = *lwork == -1 || *liwork == -1; - zquery = *nzc == -1; -/* ODSTMR needs WORK of size 6*N, IWORK of size 3*N. */ -/* In addition, ODRRE needs WORK of size 6*N, IWORK of size 5*N. */ -/* Furthermore, ODRRV needs WORK of size 12*N, IWORK of size 7*N. */ - if (wantz) { - lwmin = *n * 18; - liwmin = *n * 10; - } else { -/* need less workspace if only the eigenvalues are wanted */ - lwmin = *n * 12; - liwmin = *n << 3; - } - wl = 0.; - wu = 0.; - iil = 0; - iiu = 0; - if (valeig) { -/* We do not reference VL, VU in the cases RANGE = 'I','A' */ -/* The interval (WL, WU] contains all the wanted eigenvalues. */ -/* It is either given by the user or computed in ODRRE. */ - wl = *vl; - wu = *vu; - } else if (indeig) { -/* We do not reference IL, IU in the cases RANGE = 'V','A' */ - iil = *il; - iiu = *iu; - } - - *info = 0; - if (! (wantz || olsame(jobz, "N"))) { - *info = -1; - } else if (! (alleig || valeig || indeig)) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (valeig && *n > 0 && wu <= wl) { - *info = -7; - } else if (indeig && (iil < 1 || iil > *n)) { - *info = -8; - } else if (indeig && (iiu < iil || iiu > *n)) { - *info = -9; - } else if (*ldz < 1 || wantz && *ldz < *n) { - *info = -13; - } else if (*lwork < lwmin && ! lquery) { - *info = -17; - } else if (*liwork < liwmin && ! lquery) { - *info = -19; - } - -/* Get machine constants. */ - - safmin = DBL_MIN; // safmin = odmch("Safe minimum"); - eps = DBL_EPSILON; // eps = odmch("Precision"); - smlnum = safmin / eps; - bignum = 1. / smlnum; - rmin = sqrt(smlnum); -/* Computing MIN */ - d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); - rmax = fmin(d__1,d__2); - - if (*info == 0) { - work[1] = (double) lwmin; - iwork[1] = liwmin; - - if (wantz && alleig) { - nzcmin = *n; - } else if (wantz && valeig) { - odrrc("T", n, vl, vu, &d__[1], &e[1], &safmin, &nzcmin, &itmp, & - itmp2, info); - } else if (wantz && indeig) { - nzcmin = iiu - iil + 1; - } else { -/* WANTZ .EQ. FALSE. */ - nzcmin = 0; - } - if (zquery && *info == 0) { - z__[z_dim1 + 1] = (double) nzcmin; - } else if (*nzc < nzcmin && ! zquery) { - *info = -14; - } - } - if (*info != 0) { - - i__1 = -(*info); - oerbla("ODSTMR", &i__1); - - return 0; - } else if (lquery || zquery) { - return 0; - } - -/* Handle N = 0, 1, and 2 cases immediately */ - - *m = 0; - if (*n == 0) { - return 0; - } - - if (*n == 1) { - if (alleig || indeig) { - *m = 1; - w[1] = d__[1]; - } else { - if (wl < d__[1] && wu >= d__[1]) { - *m = 1; - w[1] = d__[1]; - } - } - if (wantz && ! zquery) { - z__[z_dim1 + 1] = 1.; - isuppz[1] = 1; - isuppz[2] = 1; - } - return 0; - } - - if (*n == 2) { - if (! wantz) { - ode2(&d__[1], &e[1], &d__[2], &r1, &r2); - } else if (wantz && ! zquery) { - odev2(&d__[1], &e[1], &d__[2], &r1, &r2, &cs, &sn); - } - if (alleig || valeig && r2 > wl && r2 <= wu || indeig && iil == 1) { - ++(*m); - w[*m] = r2; - if (wantz && ! zquery) { - z__[*m * z_dim1 + 1] = -sn; - z__[*m * z_dim1 + 2] = cs; -/* Note: At most one of SN and CS can be zero. */ - if (sn != 0.) { - if (cs != 0.) { - isuppz[(*m << 1) - 1] = 1; - isuppz[(*m << 1) - 1] = 2; - } else { - isuppz[(*m << 1) - 1] = 1; - isuppz[(*m << 1) - 1] = 1; - } - } else { - isuppz[(*m << 1) - 1] = 2; - isuppz[*m * 2] = 2; - } - } - } - if (alleig || valeig && r1 > wl && r1 <= wu || indeig && iiu == 2) { - ++(*m); - w[*m] = r1; - if (wantz && ! zquery) { - z__[*m * z_dim1 + 1] = cs; - z__[*m * z_dim1 + 2] = sn; -/* Note: At most one of SN and CS can be zero. */ - if (sn != 0.) { - if (cs != 0.) { - isuppz[(*m << 1) - 1] = 1; - isuppz[(*m << 1) - 1] = 2; - } else { - isuppz[(*m << 1) - 1] = 1; - isuppz[(*m << 1) - 1] = 1; - } - } else { - isuppz[(*m << 1) - 1] = 2; - isuppz[*m * 2] = 2; - } - } - } - return 0; - } -/* Continue with general N */ - indgrs = 1; - inderr = (*n << 1) + 1; - indgp = *n * 3 + 1; - indd = (*n << 2) + 1; - inde2 = *n * 5 + 1; - indwrk = *n * 6 + 1; - - iinspl = 1; - iindbl = *n + 1; - iindw = (*n << 1) + 1; - iindwk = *n * 3 + 1; - -/* Scale matrix to allowable range, if necessary. */ -/* The allowable range is related to the PIVMIN parameter; see the */ -/* comments in DLARRD. The preference for scaling small values */ -/* up is heuristic; we expect users' matrices not to be close to the */ -/* RMAX threshold. */ - - scale = 1.; - tnrm = odnst("M", n, &d__[1], &e[1]); - if (tnrm > 0. && tnrm < rmin) { - scale = rmin / tnrm; - } else if (tnrm > rmax) { - scale = rmax / tnrm; - } - if (scale != 1.) { - odscal(n, &scale, &d__[1], &c__1); - i__1 = *n - 1; - odscal(&i__1, &scale, &e[1], &c__1); - tnrm *= scale; - if (valeig) { -/* If eigenvalues in interval have to be found, */ -/* scale (WL, WU] accordingly */ - wl *= scale; - wu *= scale; - } - } - -/* Compute the desired eigenvalues of the tridiagonal after splitting */ -/* into smaller subblocks if the corresponding off-diagonal elements */ -/* are small */ -/* THRESH is the splitting parameter for ODRRE */ -/* A negative THRESH forces the old splitting criterion based on the */ -/* size of the off-diagonal. A positive THRESH switches to splitting */ -/* which preserves relative accuracy. */ - - if (*tryrac) { -/* Test whether the matrix warrants the more expensive relative approach. */ - odrrr(n, &d__[1], &e[1], &iinfo); - } else { -/* The user does not care about relative accurately eigenvalues */ - iinfo = -1; - } -/* Set the splitting criterion */ - if (iinfo == 0) { - thresh = eps; - } else { - thresh = -eps; -/* relative accuracy is desired but T does not guarantee it */ - *tryrac = FALSE_; - } - - if (*tryrac) { -/* Copy original diagonal, needed to guarantee relative accuracy */ - odcpy(n, &d__[1], &c__1, &work[indd], &c__1); - } -/* Store the squares of the offdiagonal values of T */ - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { -/* Computing 2nd power */ - d__1 = e[j]; - work[inde2 + j - 1] = d__1 * d__1; -/* L5: */ - } -/* Set the tolerance parameters for bisection */ - if (! wantz) { -/* ODRRE computes the eigenvalues to full precision. */ - rtol1 = eps * 4.; - rtol2 = eps * 4.; - } else { -/* ODRRE computes the eigenvalues to less than full precision. */ -/* ODRRV will refine the eigenvalue approximations, and we can */ -/* need less accurate initial bisection in ODRRE. */ -/* Note: these settings do only affect the subset case and ODRRE */ - rtol1 = sqrt(eps); -/* Computing MAX */ - d__1 = sqrt(eps) * .005, d__2 = eps * 4.; - rtol2 = fmax(d__1,d__2); - } - odrre(range, n, &wl, &wu, &iil, &iiu, &d__[1], &e[1], &work[inde2], & - rtol1, &rtol2, &thresh, &nsplit, &iwork[iinspl], m, &w[1], &work[ - inderr], &work[indgp], &iwork[iindbl], &iwork[iindw], &work[ - indgrs], &pivmin, &work[indwrk], &iwork[iindwk], &iinfo); - if (iinfo != 0) { - *info = iabs(iinfo) + 10; - return 0; - } -/* Note that if RANGE .NE. 'V', ODRRE computes bounds on the desired */ -/* part of the spectrum. All desired eigenvalues are contained in */ -/* (WL,WU] */ - if (wantz) { - -/* Compute the desired eigenvectors corresponding to the computed */ -/* eigenvalues */ - - odrrv(n, &wl, &wu, &d__[1], &e[1], &pivmin, &iwork[iinspl], m, & - c__1, m, &c_b18, &rtol1, &rtol2, &w[1], &work[inderr], &work[ - indgp], &iwork[iindbl], &iwork[iindw], &work[indgrs], &z__[ - z_offset], ldz, &isuppz[1], &work[indwrk], &iwork[iindwk], & - iinfo); - if (iinfo != 0) { - *info = iabs(iinfo) + 20; - return 0; - } - } else { -/* ODRRE computes eigenvalues of the (shifted) root representation */ -/* ODRRV returns the eigenvalues of the unshifted matrix. */ -/* However, if the eigenvectors are not desired by the user, we need */ -/* to apply the corresponding shifts from ODRRE to obtain the */ -/* eigenvalues of the original matrix. */ - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - itmp = iwork[iindbl + j - 1]; - w[j] += e[iwork[iinspl + itmp - 1]]; -/* L20: */ - } - } - - if (*tryrac) { -/* Refine computed eigenvalues so that they are relatively accurate */ -/* with respect to the original matrix T. */ - ibegin = 1; - wbegin = 1; - i__1 = iwork[iindbl + *m - 1]; - for (jblk = 1; jblk <= i__1; ++jblk) { - iend = iwork[iinspl + jblk - 1]; - in = iend - ibegin + 1; - wend = wbegin - 1; -/* check if any eigenvalues have to be refined in this block */ -L36: - if (wend < *m) { - if (iwork[iindbl + wend] == jblk) { - ++wend; - goto L36; - } - } - if (wend < wbegin) { - ibegin = iend + 1; - goto L39; - } - offset = iwork[iindw + wbegin - 1] - 1; - ifirst = iwork[iindw + wbegin - 1]; - ilast = iwork[iindw + wend - 1]; - rtol2 = eps * 4.; - odrrj(&in, &work[indd + ibegin - 1], &work[inde2 + ibegin - 1], - &ifirst, &ilast, &rtol2, &offset, &w[wbegin], &work[ - inderr + wbegin - 1], &work[indwrk], &iwork[iindwk], & - pivmin, &tnrm, &iinfo); - ibegin = iend + 1; - wbegin = wend + 1; -L39: - ; - } - } - -/* If matrix was scaled, then rescale eigenvalues appropriately. */ - - if (scale != 1.) { - d__1 = 1. / scale; - odscal(m, &d__1, &w[1], &c__1); - } - -/* If eigenvalues are not in increasing order, then sort them, */ -/* possibly along with eigenvectors. */ - - if (nsplit > 1) { - if (! wantz) { - odsrt("I", m, &w[1], &iinfo); - if (iinfo != 0) { - *info = 3; - return 0; - } - } else { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - i__ = 0; - tmp = w[j]; - i__2 = *m; - for (jj = j + 1; jj <= i__2; ++jj) { - if (w[jj] < tmp) { - i__ = jj; - tmp = w[jj]; - } -/* L50: */ - } - if (i__ != 0) { - w[i__] = w[j]; - w[j] = tmp; - if (wantz) { - odswap(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * - z_dim1 + 1], &c__1); - itmp = isuppz[(i__ << 1) - 1]; - isuppz[(i__ << 1) - 1] = isuppz[(j << 1) - 1]; - isuppz[(j << 1) - 1] = itmp; - itmp = isuppz[i__ * 2]; - isuppz[i__ * 2] = isuppz[j * 2]; - isuppz[j * 2] = itmp; - } - } -/* L60: */ - } - } - } - - - work[1] = (double) lwmin; - iwork[1] = liwmin; - return 0; - -/* End of ODSTMR */ - -} /* odstmr_ */ diff --git a/external/pmrrr/src/lapack/oerbla.c b/external/pmrrr/src/lapack/oerbla.c deleted file mode 100644 index 2615e37458..0000000000 --- a/external/pmrrr/src/lapack/oerbla.c +++ /dev/null @@ -1,55 +0,0 @@ -/* xerbla.f -- translated by f2c (version 20061008) */ - -#include -#include -#include -#include -#include -#include - -/* Subroutine */ -int oerbla(char *srname, int *info) -{ - - -/* -- LAPACK auxiliary routine (version 3.2) -- */ -/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ -/* November 2006 */ - -/* .. Scalar Arguments .. */ -/* .. */ - -/* Purpose */ -/* ======= */ - -/* OERBLA is an error handler for the LAPACK routines. */ -/* It is called by an LAPACK routine if an input parameter has an */ -/* invalid value. A message is printed and execution stops. */ - -/* Installers may consider modifying the STOP statement in order to */ -/* call system-specific exception-handling facilities. */ - -/* Arguments */ -/* ========= */ - -/* SRNAME (input) CHARACTER*(*) */ -/* The name of the routine which called OERBLA. */ - -/* INFO (input) INT */ -/* The position of the invalid parameter in the parameter list */ -/* of the calling routine. */ - -/* ===================================================================== */ - -/* .. Intrinsic Functions .. */ -/* .. */ -/* .. Executable Statements .. */ - - printf("** On entry to %6s, parameter number %2i had an illegal value\n", - srname, *info); - - -/* End of OERBLA */ - - return 0; -} /* oerbla_ */ diff --git a/external/pmrrr/src/lapack/oerbla.cpp b/external/pmrrr/src/lapack/oerbla.cpp new file mode 100644 index 0000000000..00ea20e504 --- /dev/null +++ b/external/pmrrr/src/lapack/oerbla.cpp @@ -0,0 +1,66 @@ +/** + C++ template version of LAPACK routine xerbla. + Based on C code translated by f2c (version 20061008). +*/ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + /* Subroutine */ + int oerbla(const char *srname, int *info) + { + + /* Table of constant values */ + static int c__1 = 1; + + /* -- LAPACK auxiliary routine (version 3.2) -- */ + /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ + /* November 2006 */ + + /* .. Scalar Arguments .. */ + /* .. */ + + /* Purpose */ + /* ======= */ + + /* OERBLA is an error handler for the LAPACK routines. */ + /* It is called by an LAPACK routine if an input parameter has an */ + /* invalid value. A message is printed and execution stops. */ + + /* Installers may consider modifying the STOP statement in order to */ + /* call system-specific exception-handling facilities. */ + + /* Arguments */ + /* ========= */ + + /* SRNAME (input) CHARACTER*(*) */ + /* The name of the routine which called OERBLA. */ + + /* INFO (input) INT */ + /* The position of the invalid parameter in the parameter list */ + /* of the calling routine. */ + + /* ===================================================================== */ + + /* .. Intrinsic Functions .. */ + /* .. */ + /* .. Executable Statements .. */ + + printf("** On entry to %6s, parameter number %2i had an illegal value\n", + srname, *info); + + + /* End of OERBLA */ + + return 0; + } /* oerbla_ */ + +} + +} diff --git a/external/pmrrr/src/lapack/olsame.c b/external/pmrrr/src/lapack/olsame.c deleted file mode 100644 index e332449f83..0000000000 --- a/external/pmrrr/src/lapack/olsame.c +++ /dev/null @@ -1,112 +0,0 @@ -/* -- translated by f2c (version 19940927) */ - -#include -#include -#include -#include -#include -#include - -int olsame(char *ca, char *cb) -{ - - - /* System generated locals */ - int ret_val; - - /* Local variables */ - static int inta, intb, zcode; - - -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - January 31, 1994 - - - Purpose - ======= - - OLSAME returns .TRUE. if CA is the same letter as CB regardless of - case. - - Arguments - ========= - - CA (input) CHARACTER*1 - CB (input) CHARACTER*1 - CA and CB specify the single characters to be compared. - - ===================================================================== - - - - Test if the characters are equal */ - - ret_val = *(unsigned char *)ca == *(unsigned char *)cb; - if (ret_val) { - return ret_val; - } - -/* Now test for equivalence if both characters are alphabetic. */ - - zcode = 'Z'; - -/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime - machines, on which ICHAR returns a value with bit 8 set. - ICHAR('A') on Prime machines returns 193 which is the same as - ICHAR('A') on an EBCDIC machine. */ - - inta = *(unsigned char *)ca; - intb = *(unsigned char *)cb; - - if (zcode == 90 || zcode == 122) { - -/* ASCII is assumed - ZCODE is the ASCII code of either lower o -r - upper case 'Z'. */ - - if (inta >= 97 && inta <= 122) { - inta += -32; - } - if (intb >= 97 && intb <= 122) { - intb += -32; - } - - } else if (zcode == 233 || zcode == 169) { - -/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower - or - upper case 'Z'. */ - - if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta - >= 162 && inta <= 169) { - inta += 64; - } - if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb - >= 162 && intb <= 169) { - intb += 64; - } - - } else if (zcode == 218 || zcode == 250) { - -/* ASCII is assumed, on Prime machines - ZCODE is the ASCII cod -e - plus 128 of either lower or upper case 'Z'. */ - - if (inta >= 225 && inta <= 250) { - inta += -32; - } - if (intb >= 225 && intb <= 250) { - intb += -32; - } - } - ret_val = inta == intb; - -/* RETURN - - End of OLSAME */ - - return ret_val; -} /* olsame_ */ - diff --git a/external/pmrrr/src/lapack/olsame.cpp b/external/pmrrr/src/lapack/olsame.cpp new file mode 100644 index 0000000000..fa93ed5011 --- /dev/null +++ b/external/pmrrr/src/lapack/olsame.cpp @@ -0,0 +1,118 @@ +/* -- translated by f2c (version 19940927) */ + +#include +#include +#include +#include +#include +#include + +namespace pmrrr { namespace lapack { + + int olsame(const char *ca, const char *cb) + { + + + /* System generated locals */ + int ret_val; + + /* Local variables */ + static int inta, intb, zcode; + + + /* -- LAPACK auxiliary routine (version 2.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + January 31, 1994 + + + Purpose + ======= + + OLSAME returns .TRUE. if CA is the same letter as CB regardless of + case. + + Arguments + ========= + + CA (input) CHARACTER*1 + CB (input) CHARACTER*1 + CA and CB specify the single characters to be compared. + + ===================================================================== + + + + Test if the characters are equal */ + + ret_val = *(unsigned char *)ca == *(unsigned char *)cb; + if (ret_val) { + return ret_val; + } + + /* Now test for equivalence if both characters are alphabetic. */ + + zcode = 'Z'; + + /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime + machines, on which ICHAR returns a value with bit 8 set. + ICHAR('A') on Prime machines returns 193 which is the same as + ICHAR('A') on an EBCDIC machine. */ + + inta = *(unsigned char *)ca; + intb = *(unsigned char *)cb; + + if (zcode == 90 || zcode == 122) { + + /* ASCII is assumed - ZCODE is the ASCII code of either lower o + r + upper case 'Z'. */ + + if (inta >= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + + /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower + or + upper case 'Z'. */ + + if ((inta >= 129 && inta <= 137 ) || (inta >= 145 && inta <= 153) || (inta + >= 162 && inta <= 169)) { + inta += 64; + } + if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb + >= 162 && intb <= 169)) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + + /* ASCII is assumed, on Prime machines - ZCODE is the ASCII cod + e + plus 128 of either lower or upper case 'Z'. */ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + + /* RETURN + + End of OLSAME */ + + return ret_val; + } /* olsame_ */ + +} + +} + diff --git a/external/pmrrr/src/plarre.c b/external/pmrrr/src/plarre.c deleted file mode 100644 index 42651ace4c..0000000000 --- a/external/pmrrr/src/plarre.c +++ /dev/null @@ -1,1203 +0,0 @@ -/* Parallel computation of eigenvalues and symmetric tridiagonal - * matrix T, given by its diagonal elements D and its super-/sub- - * diagonal elements E. - * - * Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.7 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include "pmrrr.h" -#include "pmrrr/plarre.h" -#include "pmrrr/structs.h" - -#define ONE 1.0 -#define HUNDRED 100.0 -#define HALF 0.5 -#define FOURTH 0.25 - - -static void *eigval_subset_thread_a(void *argin); -static void *eigval_subset_thread_r(void *argin); -static void clean_up_plarre(double*, double*, int*, int*, int*); - - -static -int eigval_approx_proc(proc_t *procinfo, int ifirst, int ilast, - int n, double *D, double *E, double *E2, - int *Windex, int *iblock, double *gersch, tol_t *tolstruct, - double *W, double *Werr, double *Wgap, double *work, - int *iwork); - -static -int eigval_root_proc(proc_t *procinfo, int ifirst, int ilast, - int n, double *D, double *E, double *E2, - int *Windex, int *iblock, double *gersch, tol_t *tolstruct, - double *W, double *Werr, double *Wgap, double *work, - int *iwork); - -static -int eigval_refine_proc(proc_t *procinfo, int ifirst, int ilast, - int n, double *D, double *E, double *E2, - int *Windex, int *iblock, double *gersch, tol_t *tolstruct, - double *W, double *Werr, double *Wgap, double *work, - int *iwork); - -static -auxarg1_t *create_auxarg1(int, double*, double*, double*, int, int, - int, int, int, int*, double, double, - double*, double*, double*, int*, int*); -static -void retrieve_auxarg1(auxarg1_t*, int*, double**, double**, double**, - int*, int*, int*, int*, int*, int**, double*, - double*, double**, double**, double**, int**, - int**); -static -auxarg2_t *create_auxarg2(int, double*, double*, int, int, double*, - double*,double*,int*,double, double, double, double); -static -void retrieve_auxarg2(auxarg2_t*, int*, double**, double**, int*, - int*, double**, double**, double**, int**, double*, double*, double*, - double*); - -static int cmp(const void*, const void*); - - - - -/* Routine to compute eigenvalues */ -int plarre(proc_t *procinfo, char *jobz, char *range, in_t *Dstruct, - val_t *Wstruct, tol_t *tolstruct, int *nzp, int *offsetp) -{ - /* input variables */ - int nproc = procinfo->nproc; - bool wantZ = (jobz[0] == 'V' || jobz[0] == 'v'); - bool cntval = (jobz[0] == 'C' || jobz[0] == 'c'); - int n = Dstruct->n; - double *restrict D = Dstruct->D; - double *restrict E = Dstruct->E; - int *restrict isplit = Dstruct->isplit; - double *vl = Wstruct->vl; - double *vu = Wstruct->vu; - int *il = Wstruct->il; - int *iu = Wstruct->iu; - double *restrict W = Wstruct->W; - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - int *restrict Windex = Wstruct->Windex; - int *restrict iblock = Wstruct->iblock; - double *restrict gersch = Wstruct->gersch; - - /* constants */ - int IZERO = 0, IONE = 1; - double DZERO = 0.0; - - /* work space */ - double *E2; - double *work; - int *iwork; - - /* compute geschgorin disks and spectral diameter */ - double gl, gu, eold, emax, eabs; - - /* compute splitting points */ - int bl_begin, bl_end, bl_size; - - /* distribute work among processes */ - int ifirst, ilast, ifirst_tmp, ilast_tmp; - int chunk, isize, iil, iiu; - - /* gather results */ - int *rcount, *rdispl; - - /* others */ - int info, i, j, jbl, idummy; - double tmp1, dummy; - bool sorted; - enum range_enum {allrng=1, valrng=2, indrng=3} irange; - double intervals[2]; - int negcounts[2]; - double sigma; - - if (range[0] == 'A' || range[0] == 'a') { - irange = allrng; - } else if (range[0] == 'V' || range[0] == 'v') { - irange = valrng; - } else if (range[0] == 'I' || range[0] == 'i') { - irange = indrng; - } else { - return 1; - } - - /* allocate work space */ - E2 = (double *) malloc( n * sizeof(double) ); - assert(E2 != NULL); - work = (double *) malloc( 4*n * sizeof(double) ); - assert(work != NULL); - iwork = (int *) malloc( 3*n * sizeof(int) ); - assert(iwork != NULL); - rcount = (int *) malloc( nproc * sizeof(int) ); - assert(rcount != NULL); - rdispl = (int *) malloc( nproc * sizeof(int) ); - assert(rdispl != NULL); - - /* Compute square of off-diagonal elements */ - for (i=0; i= emax) emax = eabs; - tmp1 = eabs + eold; - gersch[2*i] = D[i] - tmp1; - gl = fmin(gl, gersch[2*i]); - gersch[2*i+1] = D[i] + tmp1; - gu = fmax(gu, gersch[2*i+1]); - eold = eabs; - } - /* min. pivot allowed in the Sturm sequence of T */ - tolstruct->pivmin = DBL_MIN * fmax(1.0, emax*emax); - /* estimate of spectral diameter */ - Dstruct->spdiam = gu - gl; - - /* compute splitting points with threshold "split" */ - odrra(&n, D, E, E2, &tolstruct->split, &Dstruct->spdiam, - &Dstruct->nsplit, isplit, &info); - assert(info == 0); - - if (irange == allrng || irange == indrng) { - *vl = gl; - *vu = gu; - } - - /* set eigenvalue indices in case of all or subset by value has - * to be computed; thereby convert all problem to subset by index - * computation */ - if (irange == allrng) { - *il = 1; - *iu = n; - } else if (irange == valrng) { - intervals[0] = *vl; intervals[1] = *vu; - - /* find negcount at boundaries 'vl' and 'vu'; - * needs work of dim(n) and iwork of dim(n) */ - odebz(&IONE, &IZERO, &n, &IONE, &IONE, &IZERO, - &DZERO, &DZERO, &tolstruct->pivmin, D, E, E2, &idummy, - intervals, &dummy, &idummy, negcounts, work, iwork, &info); - assert(info == 0); - - /* update negcounts of whole matrix with negcounts found for block */ - *il = negcounts[0] + 1; - *iu = negcounts[1]; - } - - if (cntval && irange == valrng) { - /* clean up and return */ - *nzp = iceil(*iu-*il+1, nproc); - clean_up_plarre(E2, work, iwork, rcount, rdispl); - return 0; - } - - - /* loop over unreduced blocks */ - bl_begin = 0; - - for (jbl=0; jblnsplit; jbl++) { - - bl_end = isplit[jbl] - 1; - bl_size = bl_end - bl_begin + 1; - - /* deal with 1x1 block immediately */ - if (bl_size == 1) { - E[bl_end] = 0.0; - W[bl_begin] = D[bl_begin]; - Werr[bl_begin] = 0.0; - Werr[bl_begin] = 0.0; - iblock[bl_begin] = jbl + 1; - Windex[bl_begin] = 1; - bl_begin = bl_end + 1; - continue; - } - - /* Indix range of block */ - iil = 1; - iiu = bl_size; - - /* each process computes a subset of the eigenvalues of the block */ - ifirst_tmp = iil; - for (i=0; ipid) { - ifirst = ifirst_tmp; - ilast = ilast_tmp; - isize = ilast - ifirst + 1; - *offsetp = ifirst - iil; - *nzp = isize; - } - rcount[i] = ilast_tmp - ifirst_tmp + 1; - rdispl[i] = ifirst_tmp - iil; - ifirst_tmp = ilast_tmp + 1; - ifirst_tmp = imin(ifirst_tmp, iiu + 1); - } - - /* approximate eigenvalues of input assigned to process */ - if (isize != 0) { - info = eigval_approx_proc(procinfo, ifirst, ilast, - bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], - &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], - tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], - work, iwork); - assert(info == 0); - } - - /* compute root representation of block */ - info = eigval_root_proc(procinfo, ifirst, ilast, - bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], - &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], - tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], - work, iwork); - assert(info == 0); - - /* refine eigenvalues assigned to process w.r.t root */ - if (isize != 0) { - info = eigval_refine_proc(procinfo, ifirst, ilast, - bl_size, &D[bl_begin], &E[bl_begin], &E2[bl_begin], - &Windex[bl_begin], &iblock[bl_begin], &gersch[2*bl_begin], - tolstruct, &W[bl_begin], &Werr[bl_begin], &Wgap[bl_begin], - work, iwork); - assert(info == 0); - } - - memcpy(work, &W[bl_begin], isize * sizeof(double) ); - MPI_Allgatherv(work, isize, MPI_DOUBLE, &W[bl_begin], rcount, rdispl, - MPI_DOUBLE, procinfo->comm); - - memcpy(work, &Werr[bl_begin], isize * sizeof(double) ); - MPI_Allgatherv(work, isize, MPI_DOUBLE, &Werr[bl_begin], rcount, rdispl, - MPI_DOUBLE, procinfo->comm); - - memcpy(iwork, &Windex[bl_begin], isize * sizeof(int) ); - MPI_Allgatherv(iwork, isize, MPI_INT, &Windex[bl_begin], rcount, rdispl, - MPI_INT, procinfo->comm); - - /* Ensure that within block eigenvalues sorted */ - sorted = false; - while (sorted == false) { - sorted = true; - for (j=bl_begin; j < bl_end; j++) { - if (W[j+1] < W[j]) { - sorted = false; - tmp1 = W[j]; - W[j] = W[j+1]; - W[j+1] = tmp1; - tmp1 = Werr[j]; - Werr[j] = Werr[j+1]; - Werr[j+1] = tmp1; - } - } - } - - /* Set indices index correctly */ - for (j=bl_begin; j <= bl_end; j++) - iblock[j] = jbl + 1; - - /* Recompute gaps within the blocks */ - for (j = bl_begin; j < bl_end; j++) { - Wgap[j] = fmax(0.0, (W[j+1] - Werr[j+1]) - (W[j] + Werr[j]) ); - } - sigma = E[bl_end]; - Wgap[bl_end] = fmax(0.0, (gu - sigma) - (W[bl_end] + Werr[bl_end]) ); - - /* Compute UNSHIFTED eigenvalues */ - if (!wantZ) { - sigma = E[bl_end]; - for (i=bl_begin; i<=bl_end; i++) { - W[i] += sigma; - } - } - - /* Proceed with next block */ - bl_begin = bl_end + 1; - } - /* end of loop over unreduced blocks */ - - /* free memory */ - clean_up_plarre(E2, work, iwork, rcount, rdispl); - - return 0; -} - -/* - * Free's on allocated memory of plarre routine - */ -static -void clean_up_plarre(double *E2, double *work, int *iwork, - int *rcount, int *rdispl) -{ - free(E2); - free(work); - free(iwork); - free(rcount); - free(rdispl); -} - -#ifndef DISABLE_PTHREADS -static -int eigval_approx_proc -(proc_t *procinfo, int ifirst, int ilast, - int n, double *D, double *E, double *E2, - int *Windex, int *iblock, double *gersch, tol_t *tolstruct, - double *W, double *Werr, double *Wgap, double *work, int *iwork) -{ - /* Input parameter */ - int isize = ilast-ifirst+1; - double pivmin = tolstruct->pivmin; - - /* /\* Multithreading *\/ */ - int max_nthreads = procinfo->nthreads; - int iifirst, iilast, chunk; - pthread_t *threads; - pthread_attr_t attr; - auxarg1_t *auxarg1; - - /* Others */ - int info, m, i, j; - double dummy; - - /* Allocate workspace */ - int *isplit = (int *) malloc( n * sizeof(int) ); - assert(isplit != NULL); - threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) ); - assert(threads != NULL); - - /* This is an unreduced block */ - int nsplit = 1; - isplit[0] = n; - - if (max_nthreads > 1) { - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); - } - - /* Set tolerance parameters */ - double bsrtol = sqrt(DBL_EPSILON); - - - /* APPROXIMATE EIGENVALUES */ - - /* compute approximations of the eigenvalues with muliple threads */ - /* equivalent to: */ - /* dlarrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch, */ - /* &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr, */ - /* &wl, &wu, iblock, Windex, work, iwork, &info); */ - /* assert(info == 0); */ - /* assert(m == ilast-ifirst+1); */ - - int nthreads = max_nthreads; - while (nthreads > 1 && isize / nthreads < 2) - nthreads--; - - if (nthreads > 1) { - - /* each threads computes W[iifirst:iilast] and places them in - * work[0:n-1]; the corresponding errors in work[n:2*n-1]; - * the blocks they belong in iwork[0:n-1]; and their indices in - * iwork[n:2*n-1]; */ - - iifirst = ifirst; - chunk = isize / nthreads; - for (i=1; i 1) { - pthread_attr_destroy(&attr); - } - - return 0; -} -#else -static -int eigval_approx_proc -(proc_t *procinfo, int ifirst, int ilast, - int n, double *D, double *E, double *E2, - int *Windex, int *iblock, double *gersch, tol_t *tolstruct, - double *W, double *Werr, double *Wgap, double *work, - int *iwork) -{ - /* Input parameter */ - double pivmin = tolstruct->pivmin; - - /* Allocate workspace */ - int *isplit = (int *) malloc( n * sizeof(int) ); - assert(isplit != NULL); - - /* This is an unreduced block */ - int nsplit = 1; - isplit[0] = n; - - /* Set tolerance parameters */ - double bsrtol = sqrt(DBL_EPSILON); - - /* APPROXIMATE EIGENVALUES */ - int m, info; - double wl, wu, dummy; - odrrd("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch, - &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr, - &wl, &wu, iblock, Windex, work, iwork, &info); - assert(info == 0); - assert(m == ilast-ifirst+1); - - /* clean up */ - free(isplit); - - return 0; -} -#endif - -static -int eigval_root_proc -(proc_t *procinfo, int ifirst, int ilast, - int n, double *D, double *E, double *E2, - int *Windex, int *iblock, double *gersch, tol_t *tolstruct, - double *W, double *Werr, double *Wgap, double *work, int *iwork) -{ - /* Input parameter */ - double pivmin = tolstruct->pivmin; - - /* Create random vector to perturb rrr, same seed */ - int two_n = 2*n; - int iseed[4] = {1,1,1,1}; - - double Dpivot, Dmax; - bool noREP; - - int info, i, j; - int IONE = 1, ITWO = 2; - double tmp, tmp1, tmp2; - - /* Set tolerance parameters (need to be same as in refine function) */ - double rtl = sqrt(DBL_EPSILON); - - /* Allocate workspace */ - double *randvec = (double *) malloc( 2*n * sizeof(double) ); - assert(randvec != NULL); - - /* create random vector to perturb rrr and broadcast it */ - odrnv(&ITWO, iseed, &two_n, randvec); - - /* store shift of initial RRR, here set to zero */ - E[n-1] = 0.0; - - /* find outer bounds GL, GU for block and spectral diameter */ - double gl = D[0]; - double gu = D[0]; - for (i = 0; i < n; i++) { - gl = fmin(gl, gersch[2*i] ); - gu = fmax(gu, gersch[2*i+1]); - } - double spdiam = gu - gl; - - /* find approximation of extremal eigenvalues of the block - * odrrk computes one eigenvalue of tridiagonal matrix T - * tmp1 and tmp2 one hold the eigenvalue and error, respectively */ - odrrk(&n, &IONE, &gl, &gu, D, E2, - &pivmin, &rtl, &tmp1, &tmp2, &info); - assert(info == 0); /* if info=-1 => eigenvalue did not converge */ - - double isleft = fmax(gl, tmp1-tmp2 - HUNDRED*DBL_EPSILON*fabs(tmp1-tmp2) ); - - odrrk(&n, &n, &gl, &gu, D, E2, &pivmin, &rtl, &tmp1, &tmp2, &info); - assert(info == 0); /* if info=-1 => eigenvalue did not converge */ - - double isright = fmin(gu, tmp1+tmp2 + HUNDRED*DBL_EPSILON*fabs(tmp1+tmp2) ); - - spdiam = isright - isleft; - - /* compute negcount at points s1 and s2 */ - double s1 = isleft + HALF * spdiam; - double s2 = isright - FOURTH * spdiam; /* not needed currently */ - - /* compute negcount at points s1 and s2 */ - /* cnt = number of eigenvalues in (s1,s2] = count_right - count_left - * negcnt_lft = number of eigenvalues smaller equals than s1 - * negcnt_rgt = number of eigenvalues smaller equals than s2 */ - int cnt, negcnt_lft, negcnt_rgt; - odrrc("T", &n, &s1, &s2, D, E, &pivmin, - &cnt, &negcnt_lft, &negcnt_rgt, &info); - assert(info == 0); - - /* if more of the desired eigenvectors are in the left part shift left - * and the other way around */ - int sgndef; - double sigma; - if ( negcnt_lft >= n - negcnt_lft ) { - /* shift left */ - sigma = isleft; - sgndef = ONE; - } else { - /* shift right */ - sigma = isright; - sgndef = -ONE; - } - - /* define increment to perturb initial shift to find RRR - * with not too much element growth */ - double tau = spdiam*DBL_EPSILON*n + 2.0*pivmin; - - - /* try to find initial RRR of block: - * need work space of 3*n here to store D, L, D^-1 of possible - * representation: - * D_try = work[0 : n-1] - * L_try = work[n :2*n-1] - * inv(D_try) = work[2*n:3*n-1] */ - - int off_L = n; - int off_invD = 2*n; - - int jtry; - for (jtry = 0; jtry < MAX_TRY_RRR; jtry++) { - - Dpivot = D[0] - sigma; - work[0] = Dpivot; - Dmax = fabs( work[0] ); - j = 0; - - for (i = 0; i < n-1; i++) { - work[i+off_invD] = 1.0 / work[i]; - tmp = E[j] * work[i+off_invD]; - work[i+off_L] = tmp; - Dpivot = (D[j+1] - sigma) - tmp*E[j]; - work[i+1] = Dpivot; - Dmax = fmax(Dmax, fabs(Dpivot) ); - j++; - } - - /* except representation only if not too much element growth */ - if (Dmax > MAX_GROWTH*spdiam) { - noREP = true; - } else { - noREP = false; - } - - if (noREP == true) { - /* if all eigenvalues are desired shift is made definite to use DQDS - * so we should not end here */ - if (jtry == MAX_TRY_RRR-2) { - if (sgndef == ONE) { /* floating point comparison okay here */ - sigma = gl - FUDGE_FACTOR*spdiam*DBL_EPSILON*n - - FUDGE_FACTOR*2.0*pivmin; - } else { - sigma = gu + FUDGE_FACTOR*spdiam*DBL_EPSILON*n - + FUDGE_FACTOR*2.0*pivmin; - } - } else if (jtry == MAX_TRY_RRR-1) { - fprintf(stderr,"No initial representation could be found.\n"); - exit(3); - } else { - sigma -= sgndef*tau; - tau *= 2.0; - continue; - } - } else { /* found representation */ - break; - } - } - /* end trying to find initial RRR of block */ - - /* save initial RRR and corresponding shift */ - memcpy(D, &work[0], n * sizeof(double) ); - memcpy(E, &work[n], (n-1) * sizeof(double) ); - E[n-1] = sigma; - /* work[0:4*n-1] can now be used again for anything */ - - /* perturb root rrr by small relative amount, first make sure - * that at least two values are actually disturbed enough, - * which might not be necessary */ - while( fabs(randvec[0])*RAND_FACTOR < 1.0 ) - randvec[0] *= 2.0; - while( fabs(randvec[n-1]) *RAND_FACTOR < 1.0 ) - randvec[n-1] *= 2.0; - - for (i=0; ipivmin; - - /* Multithreading */ - int nthreads; - int max_nthreads = procinfo->nthreads; - int chunk; - pthread_t *threads; - pthread_attr_t attr; - auxarg2_t *auxarg2; - - int info, i; - - /* Allocate space */ - threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) ); - assert(threads != NULL); - int *isplit = (int *) malloc( n * sizeof(int) ); - assert(isplit != NULL); - - /* This is an unreduced block (nsplit=1) */ - isplit[0] = n; - - /* Prepare multi-threading */ - if (max_nthreads > 1) { - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); - } - - /* find outer bounds GL, GU for block and spectral diameter */ - double gl = D[0]; - double gu = D[0]; - for (i = 0; i < n; i++) { - gl = fmin(gl, gersch[2*i] ); - gu = fmax(gu, gersch[2*i+1]); - } - double spdiam = gu - gl; - - /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */ - - int i_low = Windex[0]; - int i_upp = Windex[isize-1]; - double sigma = E[n-1]; - - /* calculate gaps */ - for (i=0; i 1 && isize/nthreads < 2) { - nthreads--; - } - - if (nthreads > 1) { - - int rf_begin=0, rf_end; - chunk = isize / nthreads; - for (i=1; irtol1, tolstruct->rtol2, - pivmin, spdiam); - - info = pthread_create(&threads[i], &attr, - eigval_subset_thread_r, - (void *) auxarg2); - assert(info == 0); - - rf_begin = rf_end + 1; - } - rf_end = isize-1; - - auxarg2 = create_auxarg2(n, D, - &work[off_DE2], - rf_begin, rf_end, W, Werr, Wgap, Windex, - tolstruct->rtol1, tolstruct->rtol2, - pivmin, spdiam); - - void *status = eigval_subset_thread_r( (void *) auxarg2 ); - assert(status == NULL); - - /* join threads */ - for (i=1; irtol1, &tolstruct->rtol2, &offset, W, Wgap, - Werr, work, iwork, &pivmin, &spdiam, &n, &info); - assert(info == 0); - /* needs work of dim(2*n) and iwork of dim(2*n) */ - } - /* odrrb computes gaps correctly, but not last one; - * this is ignored since the gaps are recomputed anyway */ - - /* clean up */ - free(threads); - free(isplit); - - if (max_nthreads > 1) { - pthread_attr_destroy(&attr); - } - - return 0; -} -#else -static -int eigval_refine_proc -(proc_t *procinfo, int ifirst, int ilast, - int n, double *D, double *E, double *E2, - int *Windex, int *iblock, double *gersch, tol_t *tolstruct, - double *W, double *Werr, double *Wgap, double *work, int *iwork) -{ - /* Input parameter */ - int isize = ilast-ifirst+1; - double pivmin = tolstruct->pivmin; - - /* Allocate space */ - int *isplit = (int *) malloc( n * sizeof(int) ); - assert(isplit != NULL); - - /* This is an unreduced block (nsplit=1) */ - isplit[0] = n; - - /* find outer bounds GL, GU for block and spectral diameter */ - double gl = D[0]; - double gu = D[0]; - int i; - for (i = 0; i < n; i++) { - gl = fmin(gl, gersch[2*i] ); - gu = fmax(gu, gersch[2*i+1]); - } - double spdiam = gu - gl; - - /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */ - - int i_low = Windex[0]; - int i_upp = Windex[isize-1]; - double sigma = E[n-1]; - - /* calculate gaps */ - for (i=0; irtol1, &tolstruct->rtol2, &offset, W, Wgap, - Werr, work, iwork, &pivmin, &spdiam, &n, &info); - assert(info == 0); - /* needs work of dim(2*n) and iwork of dim(2*n) */ - - /* odrrb computes gaps correctly, but not last one; - * this is ignored since the gaps are recomputed anyway */ - - /* clean up */ - free(isplit); - - return 0; -} -#endif - -static -void *eigval_subset_thread_a(void *argin) -{ - /* from input argument */ - int n, il, iu, my_il, my_iu; - double *D, *E, *E2, *gersch; - double bsrtol, pivmin; - int nsplit, *isplit; - - /* others */ - int info; - double dummy1, dummy2; - int num_vals; - double *W_tmp, *Werr_tmp, *W, *Werr; - int *iblock_tmp, *Windex_tmp, *iblock, *Windex; - double *work; - int *iwork; - - retrieve_auxarg1((auxarg1_t *) argin, &n, &D, &E, &E2, - &il, &iu, &my_il, &my_iu, &nsplit, - &isplit, &bsrtol, &pivmin, &gersch, - &W, &Werr, &Windex, &iblock); - - /* allocate memory */ - W_tmp = (double *) malloc( n * sizeof(double) ); - assert(W_tmp != NULL); - - Werr_tmp = (double *) malloc( n * sizeof(double) ); - assert(Werr_tmp != NULL); - - Windex_tmp = (int *) malloc( n * sizeof(int) ); - assert(Windex_tmp != NULL); - - iblock_tmp = (int *) malloc( n * sizeof(int) ); - assert(iblock_tmp != NULL); - - work = (double *) malloc( 4*n * sizeof(double) ); - assert (work != NULL); - - iwork = (int *) malloc( 3*n * sizeof(int) ); - assert (iwork != NULL); - - /* compute eigenvalues 'my_il' to 'my_iu', put into temporary arrays */ - odrrd("I", "B", &n, &dummy1, &dummy2, &my_il, &my_iu, gersch, - &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &num_vals, - W_tmp, Werr_tmp, &dummy1, &dummy2, iblock_tmp, Windex_tmp, - work, iwork, &info); - - assert(info == 0); - - /* copy computed values in W, Werr, Windex, iblock (which are work space) */ - memcpy(&W[my_il-il], W_tmp, num_vals * sizeof(double) ); - memcpy(&Werr[my_il-il], Werr_tmp, num_vals * sizeof(double) ); - memcpy(&Windex[my_il-il], Windex_tmp, num_vals * sizeof(int) ); - memcpy(&iblock[my_il-il], iblock_tmp, num_vals * sizeof(int) ); - - free(W_tmp); - free(Werr_tmp); - free(Windex_tmp); - free(iblock_tmp); - free(work); - free(iwork); - - return NULL; -} - -static -auxarg1_t *create_auxarg1(int n, double *D, double *E, double *E2, - int il, int iu, int my_il, int my_iu, - int nsplit, int *isplit, double bsrtol, - double pivmin, double *gersch, double *W, - double *Werr, int *Windex, int *iblock) -{ - auxarg1_t *arg; - - arg = (auxarg1_t *) malloc( sizeof(auxarg1_t) ); - assert(arg != NULL); - - arg->n = n; - arg->D = D; - arg->E = E; - arg->E2 = E2; - arg->il = il; - arg->iu = iu; - arg->my_il = my_il; - arg->my_iu = my_iu; - arg->nsplit = nsplit; - arg->isplit = isplit; - arg->bsrtol = bsrtol; - arg->pivmin = pivmin; - arg->gersch = gersch; - arg->W = W; - arg->Werr = Werr; - arg->Windex = Windex; - arg->iblock = iblock; - - return arg; -} - -static -void retrieve_auxarg1(auxarg1_t *arg, int *n, double **D, double **E, - double **E2, int *il, int *iu, int *my_il, - int *my_iu, int *nsplit, int **isplit, - double *bsrtol, double *pivmin, double **gersch, - double **W, double **Werr, int **Windex, - int **iblock) -{ - *n = arg->n; - *D = arg->D; - *E = arg->E; - *E2 = arg->E2; - *il = arg->il; - *iu = arg->iu; - *my_il = arg->my_il; - *my_iu = arg->my_iu; - *nsplit = arg->nsplit; - *isplit = arg->isplit; - *bsrtol = arg->bsrtol; - *pivmin = arg->pivmin; - *gersch = arg->gersch; - *W = arg->W; - *Werr = arg->Werr; - *Windex = arg->Windex; - *iblock = arg->iblock; - - free(arg); -} - -static -void *eigval_subset_thread_r(void *argin) -{ - /* from input argument */ - int bl_size, rf_begin, rf_end; - double *D, *DE2; - double rtol1, rtol2, pivmin; - double bl_spdiam; - - /* others */ - int info, offset; - double *W, *Werr, *Wgap; - int *Windex; - double *work; - int *iwork; - - retrieve_auxarg2((auxarg2_t *) argin, &bl_size, &D, &DE2, - &rf_begin, &rf_end, &W, &Werr, &Wgap, &Windex, &rtol1, &rtol2, - &pivmin, &bl_spdiam); - - /* malloc work space */ - work = (double *) malloc( 2*bl_size * sizeof(double) ); - assert(work != NULL); - - iwork = (int *) malloc( 2*bl_size * sizeof(int) ); - assert(iwork != NULL); - - /* special case of only one eigenvalue */ - if (rf_begin == rf_end) - Wgap[rf_begin] = 0.0; - - offset = Windex[rf_begin] - 1; - - /* call bisection routine to refine the eigenvalues */ - odrrb(&bl_size, D, DE2, &Windex[rf_begin], &Windex[rf_end], - &rtol1, &rtol2, &offset, &W[rf_begin], &Wgap[rf_begin], - &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam, - &bl_size, &info); - assert(info == 0); - - /* clean up */ - free(work); - free(iwork); - - return NULL; -} - -static -auxarg2_t *create_auxarg2(int bl_size, double *D, double *DE2, - int rf_begin, int rf_end, double *W, double *Werr, - double *Wgap, int *Windex, - double rtol1, double rtol2, double pivmin, - double bl_spdiam) -{ - auxarg2_t *arg; - - arg = (auxarg2_t *) malloc( sizeof(auxarg2_t) ); - assert(arg != NULL); - - arg->bl_size = bl_size; - arg->D = D; - arg->DE2 = DE2; - arg->rf_begin = rf_begin; - arg->rf_end = rf_end; - arg->W = W; - arg->Werr = Werr; - arg->Wgap = Wgap; - arg->Windex = Windex; - arg->rtol1 = rtol1; - arg->rtol2 = rtol2; - arg->pivmin = pivmin; - arg->bl_spdiam = bl_spdiam; - - return arg; -} - -static -void retrieve_auxarg2(auxarg2_t *arg, int *bl_size, double **D, - double **DE2, int *rf_begin, int *rf_end, - double **W, double **Werr, double **Wgap, int **Windex, - double *rtol1, double *rtol2, - double *pivmin, double *bl_spdiam) -{ - *bl_size = arg->bl_size; - *D = arg->D; - *DE2 = arg->DE2; - *rf_begin = arg->rf_begin; - *rf_end = arg->rf_end; - *W = arg->W; - *Werr = arg->Werr; - *Wgap = arg->Wgap; - *Windex = arg->Windex; - *rtol1 = arg->rtol1; - *rtol2 = arg->rtol2; - *pivmin = arg->pivmin; - *bl_spdiam = arg->bl_spdiam; - - free(arg); -} - -/* - * Compare function for using qsort() on an array - * of doubles - */ -static -int cmp(const void *a1, const void *a2) -{ - double arg1 = *(double *)a1; - double arg2 = *(double *)a2; - - if (arg1 < arg2) - return -1; - else - return 1; -} diff --git a/external/pmrrr/src/plarrv.c b/external/pmrrr/src/plarrv.c deleted file mode 100644 index bafb7bdb85..0000000000 --- a/external/pmrrr/src/plarrv.c +++ /dev/null @@ -1,672 +0,0 @@ -/* Parallel computation of eigenvectors and symmetric tridiagonal - * matrix T, which is preprocessed by the routine 'plarre'. - * - * Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include "pmrrr.h" -#include "pmrrr/plarrv.h" -#include "pmrrr/process_task.h" -#include "pmrrr/rrr.h" -#include "pmrrr/queue.h" -#include "pmrrr/structs.h" -#include "pmrrr/counter.h" - -static int assign_to_proc -(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, vec_t *Zstruct, - int *nzp, int *myfirstp); -static int cmpa(const void*, const void*); -static int init_workQ -(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ); -static void *empty_workQ(void*); -static workQ_t *create_workQ(); -static void destroy_workQ(workQ_t*); -static auxarg3_t *create_auxarg3 -(int, proc_t*, val_t*, vec_t*, tol_t*, workQ_t*, counter_t*); -static void retrieve_auxarg3 -(auxarg3_t*, int*, proc_t**, val_t**, vec_t**, tol_t**, workQ_t**, counter_t**); - -/* - * Computation of eigenvectors of a symmetric tridiagonal - */ -#ifndef DISABLE_PTHREADS -int plarrv -(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, - vec_t *Zstruct, tol_t *tolstruct, int *nzp, int *myfirstp) -{ - int nthreads = procinfo->nthreads; - int n = Dstruct->n; - double *W = Wstruct->W; - - /* Allocate work space and copy eigenvalues */ - double *Wshifted = (double*)malloc(n*sizeof(double)); - assert(Wshifted != NULL); - - memcpy(Wshifted, W, n*sizeof(double)); - Wstruct->Wshifted = Wshifted; - - pthread_t *threads = (pthread_t*)malloc(nthreads*sizeof(pthread_t)); - assert(threads != NULL); - - /* Assign eigenvectors to processes */ - assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp, myfirstp); - - /* Create work queue Q, counter, threads to empty Q */ - workQ_t *workQ = create_workQ(); - counter_t *num_left = PMR_create_counter(*nzp); - - threads[0] = pthread_self(); - pthread_attr_t attr; - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); - - int i; - for (i=1; in; - double *W = Wstruct->W; - - /* Allocate work space and copy eigenvalues */ - double *Wshifted = (double*)malloc(n*sizeof(double)); - assert(Wshifted != NULL); - - memcpy(Wshifted, W, n*sizeof(double)); - Wstruct->Wshifted = Wshifted; - - /* Assign eigenvectors to processes */ - assign_to_proc(procinfo, Dstruct, Wstruct, Zstruct, nzp, myfirstp); - - /* Create work queue Q, counter, threads to empty Q */ - workQ_t *workQ = create_workQ(); - counter_t *num_left = PMR_create_counter(*nzp); - - /* Initialize work queue of process */ - int info = init_workQ(procinfo, Dstruct, Wstruct, nzp, workQ); - assert(info == 0); - - /* Empty the work queue */ - auxarg3_t *auxarg = - create_auxarg3(0, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left); - void *status = empty_workQ((void*)auxarg); - assert(status == NULL); - - /* Clean up */ - free(Wshifted); - destroy_workQ(workQ); - PMR_destroy_counter(num_left); - - return 0; -} -#endif - -/* - * Assign the computation of eigenvectors to the processes - */ -static -int assign_to_proc -(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, - vec_t *Zstruct, int *nzp, int *myfirstp) -{ - /* From inputs */ - int pid = procinfo->pid; - int nproc = procinfo->nproc; - double *restrict L = Dstruct->E; - int *restrict isplit = Dstruct->isplit; - int n = Wstruct->n; - int il = *(Wstruct->il); - int iu = *(Wstruct->iu); - double *restrict W = Wstruct->W; - int *restrict Windex = Wstruct->Windex; - int *restrict iblock = Wstruct->iblock; - int *restrict iproc = Wstruct->iproc; - int *restrict Zindex = Zstruct->Zindex; - - sort_struct_t *array = (sort_struct_t *) malloc(n*sizeof(sort_struct_t)); - - int i; - for (i=0; inz = *nzp; - } - - ibegin = iend + 1; - ibegin = imin(ibegin, iu); - } /* end id */ - - for (j = iend+1; j < n; j++ ) { - iproc[array[j].ind] = -1; - Zindex[array[j].ind] = -1; - } - - free(array); - return 0; -} - -/* - * Compare function for using qsort() on an array of - * sort_structs - */ -static -int cmpa(const void *a1, const void *a2) -{ - sort_struct_t *arg1, *arg2; - - arg1 = (sort_struct_t *) a1; - arg2 = (sort_struct_t *) a2; - - /* Within block local index decides */ - if (arg1->block_ind == arg2->block_ind) { - return (arg1->local_ind - arg2->local_ind); - } else { - if (arg1->lambda < arg2->lambda) { - return -1; - } else if (arg1->lambda > arg2->lambda) { - return 1; - } else { - if (arg1->local_ind < arg2->local_ind) - return -1; - else - return 1; - } - } -} - -/* - * Initialize work queue by putting all tasks for the process - * into the work queue. - */ -static -int init_workQ -(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ) -{ - int pid = procinfo->pid; - int nproc = procinfo->nproc; - int nthreads = procinfo->nthreads; - double *restrict D = Dstruct->D; - double *restrict L = Dstruct->E; - int nsplit = Dstruct->nsplit; - int *restrict isplit = Dstruct->isplit; - double *restrict W = Wstruct->W; - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - int *restrict iproc = Wstruct->iproc; - double *restrict Wshifted = Wstruct->Wshifted; - double *restrict gersch = Wstruct->gersch; - int nz = *nzp; - - /* Loop over blocks */ - int i, j, k, l; - int ibegin = 0; - for ( j=0; j= MIN_RELGAP*fabs(Wshifted[i])) - new_last = i; - else - continue; - - /* Skip rest if no eigenvalues of process */ - if (new_first > iWend || new_last < iWbegin) { - new_first = i + 1; - continue; - } - - int new_size = new_last - new_first + 1; - - if (new_size == 1) { - /* Singleton was found */ - if (new_first < iWbegin || new_first > iWend) { - new_first = i + 1; - continue; - } else { - if (new_first==iWbegin || task_inserted==true) { - /* Initialize new singleton task */ - sn_first = new_first; - sn_last = new_first; - sn_size = 1; - } else { - /* Extend singleton task by one */ - sn_last++; - sn_size++; - } - } - - /* Insert task if ... */ - if (i==iWend || sn_size>=max_size || - Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) { - - double lgap; - if (sn_first == ibegin) { - lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl ); - } else { - lgap = Wgap[sn_first-1]; - } - - PMR_increment_rrr_dependencies(RRR); - - task_t *task = - PMR_create_s_task - (sn_first, sn_last, 1, ibegin, iend, spdiam, lgap, RRR); - - PMR_insert_task_at_back(workQ->s_queue, task); - - task_inserted = true; - } else { - task_inserted = false; - } - } else { - /* Cluster was found */ - int cl_first = new_first; - int cl_last = new_last; - int cl_size = new_size; - - /* Split cluster into clusters by absolut criterion */ - if (cl_size > 3) { - /* Split cluster to smaller clusters [cl_first:cl_last] */ - for (k=new_first+1; k 0.8*avggap) - cl_last = k; - else - continue; - - /* Skip cluster if no eigenvalues of process in it */ - if (cl_last < iWbegin || cl_first > iWend) { - cl_first = k + 1; - continue; - } - - /* Record left gap of cluster */ - double lgap; - if (cl_first == ibegin) { - lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl); - } else { - lgap = Wgap[cl_first-1]; - } - - /* Determine processes involved in processing the cluster */ - int left_pid = nproc-1; - int right_pid = 0; - for (l=cl_first; l<=cl_last; l++) { - if (iproc[l] != -1) { - left_pid = imin(left_pid, iproc[l]); - right_pid = imax(right_pid, iproc[l]); - } - } - - rrr_t *RRR_parent = - PMR_create_rrr(&D[ibegin], &L[ibegin], NULL, NULL, isize, 0); - - task_t *task = - PMR_create_c_task - (cl_first, cl_last, 1, ibegin, iend, spdiam, lgap, iWbegin, - iWend, left_pid, right_pid, RRR_parent); - - /* Insert task into queue, depending if cluster need - * communication with other processes */ - if (left_pid != right_pid) - PMR_insert_task_at_back(workQ->r_queue, task); - else - PMR_insert_task_at_back(workQ->c_queue, task); - - cl_first = k + 1; - } /* end k */ - } else { - /* Cluster is too small to split, so insert it to queue */ - - /* Record left gap of cluster */ - double lgap; - if (cl_first == ibegin) { - lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl ); - } else { - lgap = Wgap[cl_first-1]; - } - - /* Determine processes involved */ - int left_pid = nproc-1; - int right_pid = 0; - for (l=cl_first; l<=cl_last; l++) { - if (iproc[l] != -1) { - left_pid = imin(left_pid, iproc[l]); - right_pid = imax(right_pid, iproc[l]); - } - } - - rrr_t *RRR_parent = - PMR_create_rrr - (&D[ibegin], &L[ibegin], NULL, NULL, isize, 0); - - task_t *task = - PMR_create_c_task - (cl_first, cl_last, 1, ibegin, - iend, spdiam, lgap, iWbegin, iWend, - left_pid, right_pid, RRR_parent); - - /* Insert task into queue, depending if cluster need - * communication with other processes */ - if (left_pid != right_pid) - PMR_insert_task_at_back(workQ->r_queue, task); - else - PMR_insert_task_at_back(workQ->c_queue, task); - } - task_inserted = true; - } /* end new_size */ - - new_first = i + 1; - } /* end of splitting eigenvalues into tasks */ - - /* Set flag in RRR that last singleton is created */ - PMR_set_parent_processed_flag(RRR); - PMR_try_destroy_rrr(RRR); - - ibegin = iend + 1; - } /* end loop over blocks */ - - return 0; -} - -/* - * Processes all the tasks put in the work queue. - */ -static -void *empty_workQ(void *argin) -{ - int tid; - proc_t *procinfo; - val_t *Wstruct; - vec_t *Zstruct; - tol_t *tolstruct; - workQ_t *workQ; - counter_t *num_left; - retrieve_auxarg3 - ((auxarg3_t*)argin, &tid, &procinfo, &Wstruct, - &Zstruct, &tolstruct, &workQ, &num_left); - - int n = Wstruct->n; - - /* max. needed double precision work space: odr1v */ - double *work = (double*)malloc(4*n*sizeof(double)); - assert(work != NULL); - - /* max. needed double precision work space: odrrb */ - int *iwork = (int*)malloc(2*n*sizeof(int)); - assert(iwork != NULL); - - /* while loop to empty the work queue */ - while (PMR_get_counter_value(num_left) > 0) { - /* empty r-queue before processing other tasks */ - PMR_process_r_queue - (tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork); - - task_t *task = PMR_remove_task_at_front(workQ->s_queue); - if ( task != NULL ) { - assert(task->flag == SINGLETON_TASK_FLAG); - - PMR_process_s_task - ((singleton_t*)task->data, tid, procinfo, - Wstruct, Zstruct, tolstruct, num_left, work, iwork); - free(task); - continue; - } - - task = PMR_remove_task_at_front(workQ->c_queue); - if ( task != NULL ) { - assert(task->flag == CLUSTER_TASK_FLAG); - - PMR_process_c_task - ((cluster_t*)task->data, tid, procinfo, - Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork); - free(task); - continue; - } - } /* end while */ - - free(work); - free(iwork); - - return NULL; -} - -static workQ_t *create_workQ() -{ - workQ_t *wq = (workQ_t*)malloc(sizeof(workQ_t)); - - wq->r_queue = PMR_create_empty_queue(); - wq->s_queue = PMR_create_empty_queue(); - wq->c_queue = PMR_create_empty_queue(); - - return wq; -} - -static void destroy_workQ(workQ_t *wq) -{ - PMR_destroy_queue(wq->r_queue); - PMR_destroy_queue(wq->s_queue); - PMR_destroy_queue(wq->c_queue); - free(wq); -} - -static auxarg3_t* -create_auxarg3 -(int tid, proc_t *procinfo, val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, workQ_t *workQ, counter_t *num_left) -{ - auxarg3_t *arg = (auxarg3_t*)malloc(sizeof(auxarg3_t)); - assert(arg != NULL); - - arg->tid = tid; - arg->procinfo = procinfo; - arg->Wstruct = Wstruct; - arg->Zstruct = Zstruct; - arg->tolstruct = tolstruct; - arg->workQ = workQ; - arg->num_left = num_left; - - return arg; -} - -static void -retrieve_auxarg3 -(auxarg3_t *arg, int *tid, proc_t **procinfo, - val_t **Wstruct, vec_t **Zstruct, tol_t **tolstruct, - workQ_t **workQ, counter_t **num_left) -{ - *tid = arg->tid; - *procinfo = arg->procinfo; - *Wstruct = arg->Wstruct; - *Zstruct = arg->Zstruct; - *tolstruct = arg->tolstruct; - *workQ = arg->workQ; - *num_left = arg->num_left; - - free(arg); -} diff --git a/external/pmrrr/src/pmrrr.c b/external/pmrrr/src/pmrrr.c deleted file mode 100644 index 1c8df22eb1..0000000000 --- a/external/pmrrr/src/pmrrr.c +++ /dev/null @@ -1,680 +0,0 @@ -/* Computation of eigenvalues and eigenvectors of a symmetric - * tridiagonal matrix T, given by its diagonal elements D - * and its super-/subdiagonal elements E. - * - * See INCLUDE/pmrrr.h for more information. - * - * Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include "pmrrr.h" -#include "pmrrr/plarre.h" -#include "pmrrr/plarrv.h" -#include "pmrrr/structs.h" - -static int handle_small_cases -(char*, char*, int*, double*, double*, - double*, double*, int*, int*, int*, - MPI_Comm, int*, int*, double*, double*,int*, int*); - -static double scale_matrix(in_t*, val_t*, bool); -static void invscale_eigenvalues(val_t*, double, int); -static void clean_up -(MPI_Comm, double*, double*, double*, - int*, int*, int*, int*, int*, proc_t*, - in_t*, val_t*, vec_t*, tol_t*); -static int refine_to_highrac -(proc_t*, char*, double*, double*,in_t*, int*, val_t*, tol_t*); - -static int cmp(const void*, const void*); -static int cmpb(const void*, const void*); - -/* - * Computation of eigenvalues and eigenvectors of a symmetric - * tridiagonal matrix T, given by its diagonal elements D - * and its super-/subdiagonal elements E. - * See README or 'pmrrr.h' for details. - */ - -int pmrrr -(char *jobz, char *range, int *np, double *D, - double *E, double *vl, double *vu, int *il, - int *iu, int *tryracp, MPI_Comm comm, int *nzp, - int *offsetp, double *W, double *Z, int *ldz, int *Zsupp) -{ - /* Input parameter */ - int n = *np; - bool onlyW = toupper(jobz[0]) == 'N'; - bool wantZ = toupper(jobz[0]) == 'V'; - bool cntval = toupper(jobz[0]) == 'C'; - bool alleig = toupper(range[0]) == 'A'; - bool valeig = toupper(range[0]) == 'V'; - bool indeig = toupper(range[0]) == 'I'; - - /* Check input parameters */ - if(!(onlyW || wantZ || cntval)) return 1; - if(!(alleig || valeig || indeig)) return 1; - if(n <= 0) return 1; - if (valeig) { - if(*vu<=*vl) return 1; - } else if (indeig) { - if (*il<1 || *il>n || *iu<*il || *iu>n) return 1; - } - - /* MPI & multithreading info */ - int is_init, is_final; - MPI_Initialized(&is_init); - MPI_Finalized(&is_final); - if (is_init!=1 || is_final==1) { - fprintf(stderr, "ERROR: MPI is not active! (init=%d, final=%d) \n", - is_init, is_final); - return 1; - } - MPI_Comm comm_dup; - MPI_Comm_dup(comm, &comm_dup); - int nproc, pid, thread_support; - MPI_Comm_size(comm_dup, &nproc); - MPI_Comm_rank(comm_dup, &pid); - MPI_Query_thread(&thread_support); - - int nthreads; - if ( !(thread_support == MPI_THREAD_MULTIPLE || - thread_support == MPI_THREAD_FUNNELED) ) { - /* Disable multithreading; note: to support multithreading with - * MPI_THREAD_SERIALIZED the code must be changed slightly; this - * is not supported at the moment */ - nthreads = 1; - } else { - char *ompvar = getenv("PMR_NUM_THREADS"); - if (ompvar == NULL) { - nthreads = DEFAULT_NUM_THREADS; - } else { - nthreads = atoi(ompvar); - } - } - -#if defined(MVAPICH2_VERSION) - if (nthreads>1) { - int mv2_affinity=1; - char *mv2_string = getenv("MV2_ENABLE_AFFINITY"); - if (mv2_string != NULL) - mv2_affinity = atoi(mv2_string); - if (mv2_affinity!=0) { - nthreads = 1; - if (pid==0) { - fprintf(stderr, "WARNING: PMRRR incurs a significant performance penalty when multithreaded with MVAPICH2 with affinity enabled. The number of threads has been reduced to one; please rerun with MV2_ENABLE_AFFINITY=0 or PMR_NUM_THREADS=1 in the future.\n"); - fflush(stderr); - } - } - } -#endif - - /* If only maximal number of local eigenvectors are queried - * return if possible here */ - *nzp = 0; - *offsetp = 0; - if (cntval) { - if ( alleig || n < DSTEMR_IF_SMALLER ) { - *nzp = iceil(n,nproc); - MPI_Comm_free(&comm_dup); - return 0; - } else if (indeig) { - *nzp = iceil(*iu-*il+1,nproc); - MPI_Comm_free(&comm_dup); - return 0; - } - } - - /* Check if computation should be done by multiple processes */ - int info; - if (n < DSTEMR_IF_SMALLER) { - info = handle_small_cases(jobz, range, np, D, E, vl, vu, il, - iu, tryracp, comm, nzp, offsetp, W, - Z, ldz, Zsupp); - MPI_Comm_free(&comm_dup); - return info; - } - - /* Allocate memory */ - double *Werr = (double*)malloc(n*sizeof(double)); assert(Werr!=NULL); - double *Wgap = (double*)malloc(n*sizeof(double)); assert(Wgap!=NULL); - double *gersch = (double*)malloc(2*n*sizeof(double)); assert(gersch!=NULL); - int *iblock = (int*)calloc(n,sizeof(int)); assert(iblock!=NULL); - int *iproc = (int*)malloc(n*sizeof(int)); assert(iproc!=NULL); - int *Windex = (int*)malloc(n*sizeof(int)); assert(Windex!=NULL); - int *isplit = (int*)malloc(n*sizeof(int)); assert(isplit!=NULL); - int *Zindex = (int*)malloc(n*sizeof(int)); assert(Zindex!=NULL); - proc_t *procinfo = (proc_t*)malloc(sizeof(proc_t)); assert(procinfo!=NULL); - in_t *Dstruct = (in_t*)malloc(sizeof(in_t)); assert(Dstruct!=NULL); - val_t *Wstruct = (val_t*)malloc(sizeof(val_t)); assert(Wstruct!=NULL); - vec_t *Zstruct = (vec_t*)malloc(sizeof(vec_t)); assert(Zstruct!=NULL); - tol_t *tolstruct = (tol_t*)malloc(sizeof(tol_t)); assert(tolstruct!=NULL); - - /* Bundle variables into a structures */ - procinfo->pid = pid; - procinfo->nproc = nproc; - procinfo->comm = comm_dup; - procinfo->nthreads = nthreads; - procinfo->thread_support = thread_support; - - Dstruct->n = n; - Dstruct->D = D; - Dstruct->E = E; - Dstruct->isplit = isplit; - - Wstruct->n = n; - Wstruct->vl = vl; - Wstruct->vu = vu; - Wstruct->il = il; - Wstruct->iu = iu; - Wstruct->W = W; - Wstruct->Werr = Werr; - Wstruct->Wgap = Wgap; - Wstruct->Windex = Windex; - Wstruct->iblock = iblock; - Wstruct->iproc = iproc; - Wstruct->gersch = gersch; - - Zstruct->ldz = *ldz; - Zstruct->nz = 0; - Zstruct->Z = Z; - Zstruct->Zsupp = Zsupp; - Zstruct->Zindex = Zindex; - - /* Scale matrix to allowable range, returns 1.0 if not scaled */ - double scale = scale_matrix(Dstruct, Wstruct, valeig); - - /* Test if matrix warrants more expensive computations which - * guarantees high relative accuracy */ - if (*tryracp) - odrrr(&n, D, E, &info); /* 0 - rel acc */ - else info = -1; - - int i; - double *Dcopy, *E2copy; - if (info == 0) { - /* This case is extremely rare in practice */ - tolstruct->split = DBL_EPSILON; - /* Copy original data needed for refinement later */ - Dcopy = (double*)malloc(n*sizeof(double)); assert(Dcopy!=NULL); - memcpy(Dcopy, D, n*sizeof(double)); - E2copy = (double*)malloc(n*sizeof(double)); assert(E2copy!=NULL); - for (i=0; isplit = -DBL_EPSILON; - *tryracp = 0; - } - - if (!wantZ) { - /* Compute eigenvalues to full precision */ - tolstruct->rtol1 = 4.0 * DBL_EPSILON; - tolstruct->rtol2 = 4.0 * DBL_EPSILON; - } else { - /* Do not compute to full accuracy first, but refine later */ - tolstruct->rtol1 = sqrt(DBL_EPSILON); - tolstruct->rtol1 = fmin(1e-2*MIN_RELGAP, tolstruct->rtol1); - tolstruct->rtol2 = sqrt(DBL_EPSILON)*5.0E-3; - tolstruct->rtol2 = fmin(5e-6*MIN_RELGAP, tolstruct->rtol2); - tolstruct->rtol2 = fmax(4.0 * DBL_EPSILON, tolstruct->rtol2); - } - - /* Compute all eigenvalues: sorted by block */ - info = plarre(procinfo,jobz,range,Dstruct,Wstruct,tolstruct,nzp,offsetp); - assert(info == 0); - - /* If just number of local eigenvectors are queried */ - if (cntval & valeig) { - clean_up(comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, - isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, - tolstruct); - return 0; - } - - /* If only eigenvalues are to be computed */ - if (!wantZ) { - - /* Refine to high relative with respect to input T */ - if (*tryracp) { - info = - refine_to_highrac - (procinfo, jobz, Dcopy, E2copy, Dstruct, nzp, Wstruct, tolstruct); - assert(info == 0); - } - - /* Sort eigenvalues */ - qsort(W, n, sizeof(double), cmp); - - /* Only keep subset ifirst:ilast */ - int ifirst, ilast, isize; - int iil = *il; - int iiu = *iu; - int ifirst_tmp=iil; - for (i=0; i 0) { - memmove(W, &W[ifirst-1], *nzp * sizeof(double)); - } - - /* If matrix was scaled, rescale eigenvalues */ - invscale_eigenvalues(Wstruct, scale, *nzp); - - clean_up - (comm_dup, Werr, Wgap, gersch, iblock, iproc, Windex, - isplit, Zindex, procinfo, Dstruct, Wstruct, Zstruct, tolstruct); - - return 0; - } /* end of only eigenvalues to compute */ - - /* Compute eigenvectors */ - info = plarrv(procinfo, Dstruct, Wstruct, Zstruct, tolstruct, - nzp, offsetp); - assert(info == 0); - - /* Refine to high relative with respect to input matrix */ - if (*tryracp) { - info = refine_to_highrac(procinfo, jobz, Dcopy, E2copy, - Dstruct, nzp, Wstruct, tolstruct); - assert(info == 0); - } - - /* If matrix was scaled, rescale eigenvalues */ - invscale_eigenvalues(Wstruct, scale, n); - - /* Make the first nz elements of W contains the eigenvalues - * associated to the process */ - int j, im=0; - for (j=0; jind < arg2->ind) - return -1; - else - return 1; -} - -/* - * Free's on allocated memory of pmrrr routine - */ -static -void clean_up -(MPI_Comm comm, double *Werr, double *Wgap, - double *gersch, int *iblock, int *iproc, - int *Windex, int *isplit, int *Zindex, - proc_t *procinfo, in_t *Dstruct, - val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct) -{ - MPI_Comm_free(&comm); - free(Werr); - free(Wgap); - free(gersch); - free(iblock); - free(iproc); - free(Windex); - free(isplit); - free(Zindex); - free(procinfo); - free(Dstruct); - free(Wstruct); - free(Zstruct); - free(tolstruct); -} - -/* - * Wrapper to call LAPACKs DSTEMR for small matrices - */ -static -int handle_small_cases -(char *jobz, char *range, int *np, double *D, - double *E, double *vlp, double *vup, int *ilp, - int *iup, int *tryracp, MPI_Comm comm, int *nzp, - int *myfirstp, double *W, double *Z, int *ldzp, int *Zsupp) -{ - bool cntval = toupper(jobz[0]) == 'C'; - bool onlyW = toupper(jobz[0]) == 'N'; - bool wantZ = toupper(jobz[0]) == 'V'; - bool indeig = toupper(range[0]) == 'I'; - int n = *np; - int ldz_tmp = *np; - int ldz = *ldzp; - - int nproc, pid; - MPI_Comm_size(comm, &nproc); - MPI_Comm_rank(comm, &pid); - - int lwork, liwork; - double *Z_tmp; - if (onlyW) { - lwork = 12*n; - liwork = 8*n; - } else if (cntval) { - lwork = 18*n; - liwork = 10*n; - } else if (wantZ) { - lwork = 18*n; - liwork = 10*n; - int itmp; - if (indeig) itmp = *iup-*ilp+1; - else itmp = n; - Z_tmp = (double*)malloc(n*itmp*sizeof(double)); assert(Z_tmp!=NULL); - } else { - return 1; - } - - double *work = (double*)malloc(lwork*sizeof(double)); assert(work != NULL); - int *iwork = (int*)malloc(liwork*sizeof(int)); assert(iwork!=NULL); - - if (cntval) { - /* Note: at the moment, jobz="C" should never get here, since - * it is blocked before. */ - int m, info, MINUSONE=-1; - double cnt; - odstmr("V", "V", np, D, E, vlp, vup, ilp, iup, &m, W, &cnt, - &ldz_tmp, &MINUSONE, Zsupp, tryracp, work, &lwork, iwork, - &liwork, &info); - assert(info == 0); - - *nzp = (int) ceil(cnt/nproc); - free(work); free(iwork); - return 0; - } - - int m, info; - odstmr(jobz, range, np, D, E, vlp, vup, ilp, iup, &m, W, Z_tmp, - &ldz_tmp, np, Zsupp, tryracp, work, &lwork, iwork, - &liwork, &info); - assert(info == 0); - - int chunk = iceil(m,nproc); - int myfirst = imin(pid * chunk, m); - int mylast = imin((pid+1)*chunk - 1, m - 1); - int mysize = mylast - myfirst + 1; - - if (mysize > 0) { - memmove(W, &W[myfirst], mysize*sizeof(double)); - if (wantZ) { - if (ldz == ldz_tmp) { - /* copy everything in one chunk */ - memcpy(Z, &Z_tmp[myfirst*ldz_tmp], n*mysize*sizeof(double)); - } else { - /* copy each vector seperately */ - int i; - for (i=0; in; - double *restrict D = Dstruct->D; - double *restrict E = Dstruct->E; - double *vl = Wstruct->vl; - double *vu = Wstruct->vu; - - /* Set some machine dependent constants */ - double smlnum = DBL_MIN / DBL_EPSILON; - double bignum = 1.0 / smlnum; - double rmin = sqrt(smlnum); - double rmax = fmin(sqrt(bignum), 1.0 / sqrt(sqrt(DBL_MIN))); - - /* Scale matrix to allowable range */ - double scale = 1.0; - double T_norm = odnst("M", &n, D, E); /* returns max(|T(i,j)|) */ - if (T_norm > 0 && T_norm < rmin) { - scale = rmin / T_norm; - } else if (T_norm > rmax) { - scale = rmax / T_norm; - } - - if (scale != 1.0) { /* FP cmp okay */ - /* Scale matrix and matrix norm */ - int itmp = n-1; - int IONE = 1; - odscal(&n, &scale, D, &IONE); - odscal(&itmp, &scale, E, &IONE); - if (valeig == true) { - /* Scale eigenvalue bounds */ - *vl *= scale; - *vu *= scale; - } - } /* end scaling */ - - return scale; -} - -/* - * If matrix scaled, rescale eigenvalues - */ -static -void invscale_eigenvalues(val_t *Wstruct, double scale, int size) -{ - if (scale != 1.0) { /* FP cmp okay */ - double *vl = Wstruct->vl; - double *vu = Wstruct->vu; - double *W = Wstruct->W; - - double invscale = 1.0 / scale; - *vl *= invscale; - *vu *= invscale; - int IONE = 1; - odscal(&size, &invscale, W, &IONE); - } -} - -/* - * Refines the eigenvalue to high relative accuracy with - * respect to the input matrix; - * Note: In principle this part could be fully parallel too, - * but it will only rarely be called and not much work - * is involved, if the eigenvalues are not small in magnitude - * even no work at all is not uncommon. - */ -static -int refine_to_highrac -(proc_t *procinfo, char *jobz, double *D, double *E2, in_t *Dstruct, int *nzp, - val_t *Wstruct, tol_t *tolstruct) -{ - int n = Dstruct->n; - int nsplit = Dstruct->nsplit; - int *restrict isplit = Dstruct->isplit; - double spdiam = Dstruct->spdiam; - double *restrict W = Wstruct->W; - double *restrict Werr = Wstruct->Werr; - - double *work = (double*)malloc(2*n*sizeof(double)); assert(work!=NULL); - int *iwork = (int*)malloc(2*n*sizeof(int)); assert(iwork!=NULL); - - int j, ibegin=0; - for (j=0; jpivmin; - odrrj(&isize, &D[ibegin], &E2[ibegin], &ifirst, &ilast, &tol, - &offset, &W[ibegin], &Werr[ibegin], work, iwork, &pivmin, - &spdiam, &info); - assert(info == 0); - - ibegin = iend + 1; - } /* end j */ - - free(work); - free(iwork); - return 0; -} - -/* - * Compare function for using qsort() on an array - * of doubles - */ -static -int cmp(const void *a1, const void *a2) -{ - double arg1 = *(double *)a1; - double arg2 = *(double *)a2; - - if (arg1 < arg2) - return -1; - else - return 1; -} - -/* - * Routine to communicate eigenvalues such that every process has - * all computed eigenvalues (iu-il+1) in W; this routine is designed - * to be called right after 'pmrrr'. - */ -int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *myfirstp, double *W) -{ - MPI_Comm comm_dup; - MPI_Comm_dup(comm, &comm_dup); - int nproc; - MPI_Comm_size(comm_dup, &nproc); - - int *rcount = (int*)malloc(nproc*sizeof(int)); assert(rcount!=NULL); - int *rdispl = (int*)malloc(nproc*sizeof(int)); assert(rdispl!=NULL); - double *work = (double*)malloc((*nz+1)*sizeof(double)); assert(work!=NULL); - - if (*nz > 0) - memcpy(work, W, (*nz)*sizeof(double) ); - - MPI_Allgather(nz, 1, MPI_INT, rcount, 1, MPI_INT, comm_dup); - - MPI_Allgather(myfirstp, 1, MPI_INT, rdispl, 1, MPI_INT, comm_dup); - - MPI_Allgatherv - (work, *nz, MPI_DOUBLE, W, rcount, rdispl, MPI_DOUBLE, comm_dup); - - MPI_Comm_free(&comm_dup); - free(rcount); - free(rdispl); - free(work); - - return 0; -} - -void pmr_comm_eigvals_ -(MPI_Fint *comm, int *nz, int *myfirstp, double *W, int *info) -{ - MPI_Comm c_comm = MPI_Comm_f2c(*comm); - *info = PMR_comm_eigvals(c_comm, nz, myfirstp, W); -} diff --git a/external/pmrrr/src/process_c_task.c b/external/pmrrr/src/process_c_task.c deleted file mode 100644 index 984258880e..0000000000 --- a/external/pmrrr/src/process_c_task.c +++ /dev/null @@ -1,800 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include "pmrrr.h" -#include "pmrrr/plarrv.h" -#include "pmrrr/queue.h" -#include "pmrrr/counter.h" -#include "pmrrr/rrr.h" -#include "pmrrr/structs.h" -#include "pmrrr/process_task.h" - -#define THREE 3.0 -#define FOUR 4.0 - -static inline -rrr_t* compute_new_rrr -(cluster_t *cl, int tid, proc_t *procinfo, - val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, double *work, int *iwork); - -static inline -int refine_eigvals -(cluster_t *cl, int rf_begin, int rf_end, - int tid, proc_t *procinfo, - rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, counter_t *num_left, - workQ_t *workQ, double *work, int *iwork); - -static inline -int communicate_refined_eigvals -(cluster_t *cl, proc_t *procinfo, int tid, val_t *Wstruct, rrr_t *RRR); - -static inline -int test_comm_status(cluster_t *cl, val_t *Wstruct); - -static inline -int create_subtasks -(cluster_t *cl, int tid, proc_t *procinfo, - rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, - workQ_t *workQ, counter_t *num_left); - -int PMR_process_c_task -(cluster_t *cl, int tid, proc_t *procinfo, - val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, workQ_t *workQ, - counter_t *num_left, double *work, int *iwork) -{ - int depth = cl->depth; - int left_pid = cl->left_pid; - int right_pid = cl->right_pid; - int pid = procinfo->pid; - int n = Wstruct->n; - - /* Protection against infinitely deep trees */ - assert(depth < n); - - /* Check if task only need to be split into subtasks */ - int status; - if (cl->wait_until_refined == true) { - status = test_comm_status(cl, Wstruct); - if (status == COMM_COMPLETE) { - create_subtasks - (cl, tid, procinfo, cl->RRR, Wstruct, Zstruct, workQ, num_left); - return C_TASK_PROCESSED; - } else { - return C_TASK_NOT_PROCESSED; - } - } - - /* Otherwise: compute new rrr, refine part own cluster, - * communicate the refined eigenvalues if necessary, - * and create subtasks if possible */ - rrr_t *RRR = - compute_new_rrr - (cl, tid, procinfo, Wstruct, Zstruct, tolstruct, work, iwork); - - /* Refine eigenvalues 'rf_begin' to 'rf_end' */ - int rf_begin, rf_end; - if (left_pid != right_pid) { - rf_begin = imax(cl->begin, cl->proc_W_begin); - rf_end = imin(cl->end, cl->proc_W_end); - } - if (pid == left_pid ) rf_begin = cl->begin; - if (pid == right_pid) rf_end = cl->end; - - refine_eigvals - (cl, rf_begin, rf_end, tid, procinfo, RRR, - Wstruct, Zstruct, tolstruct, num_left, workQ, work, iwork); - - /* Communicate results: non-blocking */ - status = COMM_COMPLETE; - if (left_pid != right_pid) { - status = communicate_refined_eigvals(cl, procinfo, tid, Wstruct, RRR); - /* status = COMM_INCOMPLETE if communication not finished */ - } - - if (status == COMM_COMPLETE) { - create_subtasks(cl, tid, procinfo, RRR, Wstruct, Zstruct, workQ, num_left); - return C_TASK_PROCESSED; - } else { - return C_TASK_NOT_PROCESSED; - } -} /* end process_c_task */ - -static inline -rrr_t* compute_new_rrr -(cluster_t *cl, int tid, proc_t *procinfo, - val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, double *work, int *iwork) -{ - int cl_begin = cl->begin; - int cl_end = cl->end; - int cl_size = cl_end - cl_begin + 1; - int depth = cl->depth; - int bl_begin = cl->bl_begin; - int bl_end = cl->bl_end; - int bl_size = bl_end - bl_begin + 1; - double bl_spdiam = cl->bl_spdiam; - rrr_t *RRR_parent = cl->RRR; - - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - int *restrict Windex = Wstruct->Windex; - double *restrict Wshifted = Wstruct->Wshifted; - - /* Allocate memory for new representation for cluster */ - double *D = (double*)malloc(bl_size*sizeof(double)); - double *L = (double*)malloc(bl_size*sizeof(double)); - double *DL = (double*)malloc(bl_size*sizeof(double)); - double *DLL = (double*)malloc(bl_size*sizeof(double)); - assert(D!=NULL); - assert(L!=NULL); - assert(DL!=NULL); - assert(DLL!=NULL); - - /* Recompute DL and DLL */ - int i; - double tmp; - double *D_parent = RRR_parent->D; - double *L_parent = RRR_parent->L; - for (i=0; ipivmin; - - /* to shift as close as possible refine extremal eigenvalues */ - int k, p; - double savegap; - for (k=0; k<2; k++) { - if (k == 0) { - p = Windex[cl_begin]; - savegap = Wgap[cl_begin]; - Wgap[cl_begin] = 0.0; - } else { - p = Windex[cl_end ]; - savegap = Wgap[cl_end]; - Wgap[cl_end] = 0.0; - } - - int info; - int offset = Windex[cl_begin] - 1; - odrrb - (&bl_size, D_parent, DLL_parent, &p, &p, &RQtol, - &RQtol, &offset, &Wshifted[cl_begin], &Wgap[cl_begin], - &Werr[cl_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); - assert( info == 0 ); - - if (k == 0) { - Wgap[cl_begin] = fmax(0, (Wshifted[cl_begin+1]-Werr[cl_begin+1]) - - (Wshifted[cl_begin]+Werr[cl_begin]) ); - } else { - Wgap[cl_end] = savegap; - } - } /* end k */ - - double left_gap = cl->lgap; - double right_gap = Wgap[cl_end]; - - /* Compute new RRR and store it in D and L */ - int info; - int IONE=1; - double tau; - odrrf - (&bl_size, D_parent, L_parent, DL_parent, - &IONE, &cl_size, &Wshifted[cl_begin], &Wgap[cl_begin], - &Werr[cl_begin], &bl_spdiam, &left_gap, &right_gap, - &pivmin, &tau, D, L, work, &info); - assert(info == 0); - - /* Update shift and store it */ - tmp = L_parent[bl_size-1] + tau; - L[bl_size-1] = tmp; - - /* Compute D*L and D*L*L */ - for (i=0; icopied_parent_rrr == true) { - free(RRR_parent->D); - free(RRR_parent->L); - } - rrr_t *RRR = PMR_reset_rrr(RRR_parent, D, L, DL, DLL, bl_size, depth+1); - - /* Update shifted eigenvalues */ - for (k=cl_begin; k<=cl_end; k++) { - double fudge = THREE*DBL_EPSILON*fabs(Wshifted[k]); - Wshifted[k] -= tau; - fudge += FOUR*DBL_EPSILON*fabs(Wshifted[k]); - Werr[k] += fudge; - } - - /* Assure that structure is not freed while it is processed */ - PMR_increment_rrr_dependencies(RRR); - - return RRR; -} /* end compute_new_rrr */ - -/* - * Refine eigenvalues with respect to new rrr - */ -static inline -int refine_eigvals -(cluster_t *cl, int rf_begin, int rf_end, - int tid, proc_t *procinfo, rrr_t *RRR, - val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, counter_t *num_left, - workQ_t *workQ, double *work, int *iwork) -{ - int rf_size = rf_end-rf_begin+1; - int bl_begin = cl->bl_begin; - int bl_end = cl->bl_end; - int bl_size = bl_end - bl_begin + 1; - double bl_spdiam = cl->bl_spdiam; - - double *restrict D = RRR->D; - double *restrict L = RRR->L; - double *restrict DLL = RRR->DLL; - - double *restrict W = Wstruct->W; - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - int *restrict Windex = Wstruct->Windex; - double *restrict Wshifted = Wstruct->Wshifted; - - double pivmin = tolstruct->pivmin; - double rtol1 = tolstruct->rtol1; - double rtol2 = tolstruct->rtol2; - - /* Determine if refinement should be split into tasks */ - int left = PMR_get_counter_value(num_left); - int nz = Zstruct->nz; - int nthreads = procinfo->nthreads; - int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads)); - int own_part = (int)fmax(ceil((double)left/nthreads),MIN_REFINE_CHUNK); - - int offset, i, p, q; - double savegap; - task_t *task; - if (own_part < rf_size) { - int others_part = rf_size - own_part; - int num_tasks = iceil(rf_size, own_part) - 1; /* >1 */ - int chunk = others_part/num_tasks; /* floor */ - - int ts_begin=rf_begin, ts_end; - p = Windex[rf_begin]; - for (i=0; ir_queue, task); - else - PMR_refine_sem_post(task->data); /* case chunk=0 */ - - ts_begin = ts_end + 1; - p = q + 1; - } - ts_end = rf_end; - q = Windex[rf_end]; - offset = Windex[ts_begin] - 1; - - /* Call bisection routine to refine the values */ - if (ts_begin <= ts_end) { - int info; - odrrb - (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, - &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin], - work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); - assert( info == 0 ); - } - - /* Empty "all" r-queue refine tasks before waiting */ - int num_iter = PMR_get_num_tasks(workQ->r_queue); - for (i=0; ir_queue); - if (task != NULL) { - if (task->flag == REFINE_TASK_FLAG) { - PMR_process_r_task - ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork); - free(task); - } else { - PMR_insert_task_at_back(workQ->r_queue, task); - } - } /* if task */ - } /* end for i */ - - /* Barrier: wait until all created tasks finished */ - int count = num_tasks; - while (count > 0) { - while ( PMR_refine_sem_wait(task->data) != 0 ) { }; - count--; - } - PMR_refine_sem_destroy(task->data); - - /* Edit right gap at splitting point */ - ts_begin = rf_begin; - for (i=0; ibegin; - int cl_end = cl->end; - int bl_begin = cl->bl_begin; - int bl_end = cl->bl_end; - int proc_W_begin = cl->proc_W_begin; - int proc_W_end = cl->proc_W_end; - int left_pid = cl->left_pid; - int right_pid = cl->right_pid; - int pid = procinfo->pid; - - double *restrict W = Wstruct->W; - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - double *restrict Wshifted = Wstruct->Wshifted; - int *restrict iproc = Wstruct->iproc; - - int my_begin = imax(cl_begin, proc_W_begin); - int my_end = imin(cl_end, proc_W_end); - if (pid == left_pid ) my_begin = cl_begin; - if (pid == right_pid) my_end = cl_end; - int my_size = my_end - my_begin + 1; - - int i, k; - int num_messages = 0; - for (i=left_pid; i<=right_pid; i++) { - for (k=cl_begin; k<=cl_end; k++) { - if (iproc[k] == i) { - num_messages += 4; - break; - } - } - } - - MPI_Request *requests=(MPI_Request*)malloc(num_messages*sizeof(MPI_Request)); - MPI_Status *stats = (MPI_Status*)malloc(num_messages*sizeof(MPI_Status)); - - int p; - int i_msg = 0; - int other_begin, other_end, other_size; - for (p=left_pid; p<=right_pid; p++) { - bool proc_involved = false; - for (k=cl_begin; k<=cl_end; k++) { - if (iproc[k] == p) { - proc_involved = true; - break; - } - } - - int u; - if (p != pid && proc_involved == true) { - /* send message to process p (non-blocking) */ - MPI_Isend(&Wshifted[my_begin], my_size, MPI_DOUBLE, p, - my_begin, procinfo->comm, &requests[4*i_msg]); - - MPI_Isend(&Werr[my_begin], my_size, MPI_DOUBLE, p, - my_begin, procinfo->comm, &requests[4*i_msg+1]); - - /* Find eigenvalues in of process p */ - other_size = 0; - for (k=cl_begin; k<=cl_end; k++) { - if (other_size == 0 && iproc[k] == p) { - other_begin = k; - other_end = k; - other_size++; - u = k+1; - while (u <=cl_end && iproc[u] == p) { - other_end++; - other_size++; - u++; - } - } - } - if (p == left_pid) { - other_begin = cl_begin; - u = cl_begin; - while (iproc[u] == -1) { - other_size++; - u++; - } - } - if (p == right_pid) { - other_end = cl_end; - u = cl_end; - while (iproc[u] == -1) { - other_size++; - u--; - } - } - - /* receive message from process p (non-blocking) */ - MPI_Irecv(&Wshifted[other_begin], other_size, MPI_DOUBLE, p, - other_begin, procinfo->comm, &requests[4*i_msg+2]); - - MPI_Irecv(&Werr[other_begin], other_size, MPI_DOUBLE, p, - other_begin, procinfo->comm, &requests[4*i_msg+3]); - - i_msg++; - } - } /* end for p */ - num_messages = 4*i_msg; /* messages actually send */ - - int communication_done; - int status = MPI_Testall(num_messages, requests, &communication_done, stats); - assert(status == MPI_SUCCESS); - - if (communication_done == true) { - double sigma = RRR->L[bl_end-bl_begin]; - for (k=cl_begin; knum_messages = num_messages; - comm->requests = requests; - comm->stats = stats; - cl->wait_until_refined = true; - cl->messages = comm; - - status = COMM_INCOMPLETE; - } - - return status; -} /* end communicate_refined_eigvals */ - -static inline -int test_comm_status(cluster_t *cl, val_t *Wstruct) -{ - int cl_begin = cl->begin; - int cl_end = cl->end; - int bl_begin = cl->bl_begin; - int bl_end = cl->bl_end; - rrr_t *RRR = cl->RRR; - comm_t *comm = cl->messages; - int num_messages = comm->num_messages; - MPI_Request *requests = comm->requests; - MPI_Status *stats = comm->stats; - double *restrict W = Wstruct->W; - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - double *restrict Wshifted = Wstruct->Wshifted; - - /* Test if communication complete */ - int communication_done; - int status = MPI_Testall(num_messages, requests, &communication_done, stats); - assert(status == MPI_SUCCESS); - - if (communication_done == true) { - cl->wait_until_refined = false; - - int k; - double sigma = RRR->L[bl_end-bl_begin]; - for (k=cl_begin; kbegin; - int cl_end = cl->end; - int depth = cl->depth; - int bl_begin = cl->bl_begin; - int bl_end = cl->bl_end; - int bl_size = bl_end - bl_begin + 1; - double bl_spdiam = cl->bl_spdiam; - double lgap; - - int pid = procinfo->pid; - int nproc = procinfo->nproc; - int nthreads = procinfo->nthreads; - bool proc_involved=true; - - double *restrict Wgap = Wstruct->Wgap; - double *restrict Wshifted = Wstruct->Wshifted; - int *restrict iproc = Wstruct->iproc; - - int ldz = Zstruct->ldz; - double *restrict Z = Zstruct->Z; - int *restrict Zindex = Zstruct->Zindex; - - /* others */ - int i, l, k; - int max_size; - task_t *task; - bool task_inserted; - int new_first, new_last, new_size, new_ftt1, new_ftt2; - int sn_first, sn_last, sn_size; - rrr_t *RRR_parent; - int new_lpid, new_rpid; - double *restrict D_parent; - double *restrict L_parent; - int my_first, my_last; - bool copy_parent_rrr; - - - max_size = fmax(1, PMR_get_counter_value(num_left) / - (fmin(depth+1,4)*nthreads) ); - task_inserted = true; - new_first = cl_begin; - for (i=cl_begin; i<=cl_end; i++) { - - if ( i == cl_end ) - new_last = i; - else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) ) - new_last = i; - else - continue; - - new_size = new_last - new_first + 1; - - if (new_size == 1) { - /* singleton was found */ - - if (new_first==cl_begin || task_inserted==true) { - /* initialize new singleton task */ - sn_first = new_first; - sn_last = new_first; - sn_size = 1; - } else { - /* extend singleton task by one */ - sn_last++; - sn_size++; - } - - /* insert task if ... */ - if (i==cl_end || sn_size>=max_size || - Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) { - - /* Check if process involved in s-task */ - proc_involved = false; - for (k=sn_first; k<=sn_last; k++) { - if (iproc[k] == pid) { - proc_involved = true; - break; - } - } - if (proc_involved == false) { - task_inserted = true; - new_first = i + 1; - continue; - } - - /* Insert task as process is involved */ - if (sn_first == cl_begin) { - lgap = cl->lgap; - } else { - lgap = Wgap[sn_first-1]; - } - - PMR_increment_rrr_dependencies(RRR); - - task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin, - bl_end, bl_spdiam, lgap, RRR); - - PMR_insert_task_at_back(workQ->s_queue, task); - - task_inserted = true; - } else { - task_inserted = false; - } - - } else { - /* cluster was found */ - - /* check if process involved in processing the new cluster */ - new_lpid = nproc-1; - new_rpid = -1; - for (l=new_first; l<=new_last; l++) { - if (iproc[l] != -1) { - new_lpid = imin(new_lpid, iproc[l]); - new_rpid = imax(new_rpid, iproc[l]); - } - } - if (new_lpid > pid || new_rpid < pid) { - task_inserted = true; - new_first = i + 1; - continue; - } - - /* find gap to the left */ - if (new_first == cl_begin) { - lgap = cl->lgap; - } else { - lgap = Wgap[new_first - 1]; - } - - /* determine where to store the parent rrr needed by the - * cluster to find its new rrr */ - my_first = imax(new_first, cl->proc_W_begin); - my_last = imin(new_last, cl->proc_W_end); - if ( my_first == my_last ) { - /* only one eigenvalue of cluster belongs to process */ - copy_parent_rrr = true; - } else { - /* store parent rrr in Z at column new_ftt */ - copy_parent_rrr = false; - } - new_ftt1 = Zindex[my_first ]; - new_ftt2 = Zindex[my_first + 1]; - - if (copy_parent_rrr == true) { - /* Copy parent RRR into alloceted arrays and mark them - * for freeing later */ - D_parent = (double *) malloc(bl_size * sizeof(double)); - assert(D_parent != NULL); - - L_parent = (double *) malloc(bl_size * sizeof(double)); - assert(L_parent != NULL); - - memcpy(D_parent, RRR->D, bl_size*sizeof(double)); - memcpy(L_parent, RRR->L, bl_size*sizeof(double)); - - RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, - NULL, bl_size, depth); - PMR_set_copied_parent_rrr_flag(RRR_parent, true); - - } else { - /* copy parent RRR into Z to make cluster task independent */ - memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, - bl_size*sizeof(double)); - memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, - bl_size*sizeof(double)); - - RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin], - &Z[new_ftt2*ldz + bl_begin], - NULL, NULL, bl_size, depth); - } - - /* Create the task for the cluster and put it in the queue */ - task = PMR_create_c_task(new_first, new_last, depth+1, - bl_begin, bl_end, bl_spdiam, lgap, - cl->proc_W_begin, cl->proc_W_end, - new_lpid, new_rpid, RRR_parent); - - if (new_lpid != new_rpid) - PMR_insert_task_at_back(workQ->r_queue, task); - else - PMR_insert_task_at_back(workQ->c_queue, task); - - task_inserted = true; - - } /* if singleton or cluster found */ - - new_first = i + 1; - } /* end i */ - - /* set flag in RRR that last singleton is created */ - PMR_set_parent_processed_flag(RRR); - - /* clean up */ - PMR_try_destroy_rrr(RRR); - free(cl); - - return 0; -} /* end create_subtasks */ diff --git a/external/pmrrr/src/process_r_task.c b/external/pmrrr/src/process_r_task.c deleted file mode 100644 index e587122e5f..0000000000 --- a/external/pmrrr/src/process_r_task.c +++ /dev/null @@ -1,141 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include "pmrrr.h" -#include "pmrrr/plarrv.h" -#include "pmrrr/queue.h" -#include "pmrrr/counter.h" -#include "pmrrr/structs.h" -#include "pmrrr/tasks.h" -#include "pmrrr/process_task.h" - -int PMR_process_r_task(refine_t *rf, proc_t *procinfo, val_t *Wstruct, - tol_t *tolstruct, double *work, int *iwork); - -/* - * Executes all tasks which are in the r-queue at the moment of the - * call. This routine is called to make sure that all tasks in the - * queue are dequeued before continueing with other tasks. - */ -void PMR_process_r_queue -(int tid, proc_t *procinfo, val_t *Wstruct, - vec_t *Zstruct, tol_t *tolstruct, - workQ_t *workQ, counter_t *num_left, - double *work, int *iwork) -{ - int thread_support = procinfo->thread_support; - int num_tasks = PMR_get_num_tasks(workQ->r_queue); - - int t; - for (t=0; tr_queue); - if (task != NULL) { - if (task->flag == CLUSTER_TASK_FLAG) { - if (thread_support != MPI_THREAD_FUNNELED || tid == 0) { - /* if MPI_THREAD_FUNNELED only tid==0 should process - * these tasks, otherwise any thread can do it */ - int status = - PMR_process_c_task - ((cluster_t*)task->data, tid, procinfo, Wstruct, - Zstruct, tolstruct, workQ, num_left, work, iwork); - if (status == C_TASK_PROCESSED) - free(task); - else - PMR_insert_task_at_back(workQ->r_queue, task); - } else { - PMR_insert_task_at_back(workQ->r_queue, task); - } - } /* end if cluster task */ - else if (task->flag == REFINE_TASK_FLAG) { - PMR_process_r_task - ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork); - free(task); - } - } /* end if task removed */ - } /* end for t */ -} /* end process_r_queue */ - -/* - * Process the task of refining a subset of eigenvalues. - */ -int PMR_process_r_task -(refine_t *rf, proc_t *procinfo, - val_t *Wstruct, tol_t *tolstruct, double *work, int *iwork) -{ - int ts_begin = rf->begin; - double *restrict D = rf->D; - double *restrict DLL = rf->DLL; - int p = rf->p; - int q = rf->q; - int bl_size = rf->bl_size; - double bl_spdiam = rf->bl_spdiam; - - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - int *restrict Windex = Wstruct->Windex; - double *restrict Wshifted = Wstruct->Wshifted; - - double savegap; - if (p == q) { - savegap = Wgap[ts_begin]; - Wgap[ts_begin] = 0.0; - } - - int info; - int offset = Windex[ts_begin]-1; - double rtol1 = tolstruct->rtol1; - double rtol2 = tolstruct->rtol2; - double pivmin = tolstruct->pivmin; - odrrb - (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, - &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin], - work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); - assert(info == 0); - - if (p == q) - Wgap[ts_begin] = savegap; - - PMR_refine_sem_post(rf); - free(rf); - - return 0; -} diff --git a/external/pmrrr/src/process_s_task.c b/external/pmrrr/src/process_s_task.c deleted file mode 100644 index 5d6b6660e9..0000000000 --- a/external/pmrrr/src/process_s_task.c +++ /dev/null @@ -1,310 +0,0 @@ -/* Copyright (c) 2010, RWTH Aachen University - * All rights reserved. - * - * Copyright (c) 2015, Jack Poulson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * * Neither the name of the RWTH Aachen University nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH - * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), - * August 2010, Version 0.6 - * - * This code was the result of a collaboration between - * Matthias Petschow and Paolo Bientinesi. When you use this - * code, kindly reference a paper related to this work. - * - */ -#include "pmrrr.h" -#include "pmrrr/rrr.h" -#include "pmrrr/counter.h" -#include "pmrrr/structs.h" -#include "pmrrr/tasks.h" -#include "pmrrr/process_task.h" - -int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo, - val_t *Wstruct, vec_t *Zstruct, - tol_t *tolstruct, counter_t *num_left, - double *work, int *iwork) -{ - /* Inputs */ - int begin = sng->begin; - int end = sng->end; - int bl_begin = sng->bl_begin; - int bl_end = sng->bl_end; - int bl_size = bl_end - bl_begin + 1; - double bl_spdiam = sng->bl_spdiam; - rrr_t *RRR = sng->RRR; - double *restrict D = RRR->D; - double *restrict L = RRR->L; - double *restrict DL = RRR->DL; - double *restrict DLL = RRR->DLL; - - int pid = procinfo->pid; - int n = Wstruct->n; - double *restrict W = Wstruct->W; - double *restrict Werr = Wstruct->Werr; - double *restrict Wgap = Wstruct->Wgap; - int *restrict Windex = Wstruct->Windex; - int *restrict iproc = Wstruct->iproc; - double *restrict Wshifted = Wstruct->Wshifted; - int ldz = Zstruct->ldz; - double *restrict Z = Zstruct->Z; - int *restrict isuppZ = Zstruct->Zsupp;; - int *restrict Zindex = Zstruct->Zindex; - double pivmin = tolstruct->pivmin; - - /* others */ - int info, i, k, itmp, num_decrement=0; - int IONE = 1; - double DZERO = 0.0; - double tol, lambda, left, right; - int i_local, zind; - double gap, lgap, rgap, gaptol, savedgap, tmp; - bool usedBS, usedRQ, needBS, wantNC, step2II; - int r, offset; - double twoeps = 2*DBL_EPSILON, RQtol = 2*DBL_EPSILON; - double residual, bstres, bstw; - int i_supmn, i_supmx; - double RQcorr; - int negcount; - int sgndef, suppsize; - double sigma; - int i_Zfrom, i_Zto; - double ztz, norminv, mingma; - - - /* set tolerance parameter */ - tol = 4.0 * log( (double) bl_size ) * DBL_EPSILON; - - /* loop over all singletons in the task */ - for (i=begin; i<=end; i++) { - - /* check if eigenvector is supposed to be computed by - * the process */ - if (iproc[i] != pid) - continue; - num_decrement++; - - if (bl_size == 1) { - /* set eigenvector to column of identity matrix */ - zind = Zindex[i]; - memset(&Z[zind*ldz], 0.0, n*sizeof(double) ); - Z[zind*ldz + bl_begin] = 1.0; - isuppZ[2*zind ] = bl_begin + 1; - isuppZ[2*zind + 1] = bl_begin + 1; - continue; - } - - lambda = Wshifted[i]; - left = Wshifted[i] - Werr[i]; - right = Wshifted[i] + Werr[i]; - i_local = Windex[i]; - r = 0; - - /* compute left and right gap */ - if (i == bl_begin) - lgap = DBL_EPSILON * fmax( fabs(left), fabs(right) ); - else if (i == begin) - lgap = sng->lgap; - else - lgap = Wgap[i-1]; - - if (i == bl_end) { - rgap = DBL_EPSILON * fmax( fabs(left), fabs(right) ); - } else { - rgap = Wgap[i]; - } - - gap = fmin(lgap, rgap); - - if ( i == bl_begin || i == bl_end ) { - gaptol = 0.0; - } else { - gaptol = gap * DBL_EPSILON; - } - - /* initialize lower and upper value of support */ - i_supmn = bl_size; - i_supmx = 1; - - /* update Wgap so that it holds minimum gap and save the - * old value */ - savedgap = Wgap[i]; - Wgap[i] = gap; - - /* initialize flags indicating if bisection or Rayleigh-Quotient - * correction was used */ - usedBS = false; - usedRQ = false; - - /* the need for bisection is initially turned off */ - needBS = !TRY_RQC; - - /* IEEE floating point is assumed, so that all 0 bits are 0.0 */ - zind = Zindex[i]; - memset(&Z[zind*ldz], 0.0, n*sizeof(double)); - - /* inverse iteration with twisted factorization */ - for (k=1; k<=MAXITER; k++) { - - if (needBS == true) { - usedBS = true; - itmp = r; - - offset = Windex[i] - 1; - tmp = Wgap[i]; - Wgap[i] = 0.0; - - odrrb(&bl_size, D, DLL, &i_local, &i_local, &DZERO, - &twoeps, &offset, &Wshifted[i], &Wgap[i], - &Werr[i], work, iwork, &pivmin, &bl_spdiam, - &itmp, &info); - assert(info == 0); - - Wgap[i] = tmp; - lambda = Wshifted[i]; - r = 0; - } - wantNC = (usedBS == true) ? false : true; - - /* compute the eigenvector corresponding to lambda */ - odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, - &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, - &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], - &norminv, &residual, &RQcorr, work); - - if (k == 1) { - bstres = residual; - bstw = lambda; - } else if (residual < bstres) { - bstres = residual; - bstw = lambda; - } - - /* update support held */ - i_supmn = imin(i_supmn, isuppZ[2*zind ]); - i_supmx = imax(i_supmx, isuppZ[2*zind + 1]); - - /* Convergence test for Rayleigh Quotient Iteration - * not done if bisection was used */ - if ( !usedBS && residual > tol*gap - && fabs(RQcorr) > RQtol*fabs(lambda) ) { - - if (i_local <= negcount) { - sgndef = -1; /* wanted eigenvalue lies to the left */ - } else { - sgndef = 1; /* wanted eigenvalue lies to the right */ - } - - if ( RQcorr*sgndef >= 0.0 - && lambda+RQcorr <= right - && lambda+RQcorr >= left ) { - usedRQ = true; - if ( sgndef == 1 ) - left = lambda; - else - right = lambda; - Wshifted[i] = 0.5*(left + right); - lambda += RQcorr; - } else { /* bisection is needed */ - needBS = true; - } - - if ( right-left < RQtol*fabs(lambda) ) { - /* eigenvalue computed to bisection accuracy - * => compute eigenvector */ - usedBS = true; - } else if ( k == MAXITER-1 ) { - /* for last iteration use bisection */ - needBS = true; - } - } else { - /* go to next iteration */ - break; - } - - } /* end k */ - - /* if necessary call odr1v to improve error angle by 2nd step */ - step2II = false; - if ( usedRQ && usedBS && (bstres <= residual) ) { - lambda = bstw; - step2II = true; - } - if ( step2II == true ) { - odr1v(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, - &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, - &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], - &norminv, &residual, &RQcorr, work); - } - Wshifted[i] = lambda; - - /* compute support w.r.t. whole matrix - * block beginning is offset for each support */ - isuppZ[2*zind ] += bl_begin; - isuppZ[2*zind + 1] += bl_begin; - - /* ensure vector is okay if support changed in RQI - * minus ones because of indices starting from zero */ - i_Zfrom = isuppZ[2*zind ] - 1; - i_Zto = isuppZ[2*zind + 1] - 1; - i_supmn += bl_begin - 1; - i_supmx += bl_begin - 1; - if ( i_supmn < i_Zfrom ) { - for ( k=i_supmn; k < i_Zfrom; k++ ) { - Z[k + zind*ldz] = 0.0; - } - } - if ( i_supmx > i_Zto ) { - for ( k=i_Zto+1; k <= i_supmx; k++ ) { - Z[k + zind*ldz] = 0.0; - } - } - - /* normalize eigenvector */ - suppsize = i_Zto - i_Zfrom + 1; - odscal(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE); - - sigma = L[bl_size-1]; - W[i] = lambda + sigma; - - if (i < end) - Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]); - - } /* end i */ - - /* decrement counter */ - PMR_decrement_counter(num_left, num_decrement); - - /* clean up */ - free(sng); - PMR_try_destroy_rrr(RRR); - - return 0; -} diff --git a/external/pmrrr/src/queue.cpp b/external/pmrrr/src/queue.cpp new file mode 100644 index 0000000000..5a9ef0db8c --- /dev/null +++ b/external/pmrrr/src/queue.cpp @@ -0,0 +1,251 @@ +/* Copyright (c) 2010, RWTH Aachen University + * All rights reserved. + * + * Copyright (c) 2015, Jack Poulson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the RWTH Aachen University nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RWTH + * AACHEN UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Coded by Matthias Petschow (petschow@aices.rwth-aachen.de), + * August 2010, Version 0.6 + * + * This code was the result of a collaboration between + * Matthias Petschow and Paolo Bientinesi. When you use this + * code, kindly reference a paper related to this work. + * + */ + +#include +#include +#include + +#include +#include + +#ifndef DISABLE_PTHREADS +# include +#endif + +namespace pmrrr { namespace detail { + + int PMR_queue_init_lock(queue_t *queue) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + int info = pthread_mutex_init(&queue->lock, NULL); + #else + int info = pthread_spin_init(&queue->lock, PTHREAD_PROCESS_PRIVATE); + #endif + assert(info == 0); + return info; + #else + return 0; + #endif + } + + void PMR_queue_destroy_lock(queue_t *queue) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + pthread_mutex_destroy(&queue->lock); + #else + pthread_spin_destroy(&queue->lock); + #endif + #endif + } + + int PMR_queue_lock(queue_t *queue) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + int info = pthread_mutex_lock(&queue->lock); + if( info == EINVAL ) + fprintf(stderr,"pthread_mutex_lock returned EINVAL\n"); + else if( info == EAGAIN ) + fprintf(stderr,"pthread_mutex_lock returned EAGAIN\n"); + else if( info == EDEADLK ) + fprintf(stderr,"pthread_mutex_lock returned EDEADLK\n"); + else if( info == EPERM ) + fprintf(stderr,"pthread_mutex_lock returned EPERM\n"); + else + fprintf(stderr,"pthread_mutex_lock returned %d\n",info); + #else + int info = pthread_spin_lock(&queue->lock); + #endif + assert(info == 0); + return info; + #else + return 0; + #endif + } + + int PMR_queue_unlock(queue_t *queue) + { + #ifndef DISABLE_PTHREADS + #ifdef NOSPINLOCKS + int info = pthread_mutex_unlock(&queue->lock); + if( info == EINVAL ) + fprintf(stderr,"pthread_mutex_unlock returned EINVAL\n"); + else if( info == EAGAIN ) + fprintf(stderr,"pthread_mutex_unlock returned EAGAIN\n"); + else if( info == EDEADLK ) + fprintf(stderr,"pthread_mutex_unlock returned EDEADLK\n"); + else if( info == EPERM ) + fprintf(stderr,"pthread_mutex_unlock returned EPERM\n"); + else + fprintf(stderr,"pthread_mutex_unlock returned %d\n",info); + #else + int info = pthread_spin_unlock(&queue->lock); + #endif + assert(info == 0); + return info; + #else + return 0; + #endif + } + + queue_t *PMR_create_empty_queue(void) + { + queue_t *queue = (queue_t *) malloc(sizeof(queue_t)); + assert(queue != NULL); + + queue->num_tasks = 0; + queue->head = NULL; + queue->back = NULL; + + PMR_queue_init_lock(queue); + return queue; + } + + void PMR_destroy_queue(queue_t *queue) + { + PMR_queue_destroy_lock(queue); + free(queue); + } + + int PMR_insert_task_at_front(queue_t *queue, task_t *task) + { + + int info = PMR_queue_lock(queue); + + queue->num_tasks++; + task->next = queue->head; + if (queue->head == NULL) + queue->back = task; + else + queue->head->prev = task; + queue->head = task; + + info |= PMR_queue_unlock(queue); + assert(info == 0); + return info; + } + + int PMR_insert_task_at_back(queue_t *queue, task_t *task) + { + int info = PMR_queue_lock(queue); + + queue->num_tasks++; + task->prev = queue->back; + task->next = NULL; + if (queue->head == NULL) + queue->head = task; + else + queue->back->next = task; + queue->back = task; + + info |= PMR_queue_unlock(queue); + assert(info == 0); + return info; + } + + task_t *PMR_remove_task_at_front(queue_t *queue) + { + int info = PMR_queue_lock(queue); + + task_t *task = queue->head; + if (queue->head != NULL) { + /* at least one element */ + queue->num_tasks--; + if (queue->head->next == NULL) { + /* last task removed */ + queue->head = NULL; + queue->back = NULL; + } else { + /* at least two tasks */ + queue->head->next->prev = NULL; + queue->head = queue->head->next; + } + } + + info |= PMR_queue_unlock(queue); + assert(info == 0); + return task; + } + + task_t *PMR_remove_task_at_back (queue_t *queue) + { + int info = PMR_queue_lock(queue); + + task_t *task = queue->back; + if (queue->back != NULL) { + /* at least one element */ + queue->num_tasks--; + if (queue->back->prev == NULL) { + /* last task removed */ + queue->head = NULL; + queue->back = NULL; + } else { + /* at least two tasks */ + queue->back->prev->next = NULL; + queue->back = queue->back->prev; + } + } + + info |= PMR_queue_unlock(queue); + assert(info == 0); + return task; + } + + + + int PMR_get_num_tasks(queue_t *queue) + { + + int info = PMR_queue_lock(queue); + int num_tasks = queue->num_tasks; + info |= PMR_queue_unlock(queue); + assert(info == 0); + return num_tasks; + } + +} // namespace detail + +} // namespace pmrrr + diff --git a/include/El/core/imports/pmrrr.hpp b/include/El/core/imports/pmrrr.hpp index 40538713e7..42aff99b98 100644 --- a/include/El/core/imports/pmrrr.hpp +++ b/include/El/core/imports/pmrrr.hpp @@ -6,9 +6,14 @@ which can be found in the LICENSE file in the root directory, or at http://opensource.org/licenses/BSD-2-Clause */ + #ifndef EL_IMPORTS_PMRRR_HPP #define EL_IMPORTS_PMRRR_HPP +#include + +#include + namespace El { namespace herm_tridiag_eig { @@ -17,11 +22,6 @@ struct Estimate { int numGlobalEigenvalues; }; -// Return an upper bound on the number of (local) eigenvalues in the given range -Estimate EigEstimate -( int n, double* d, double* e, double* w, mpi::Comm comm, - double lowerBound, double upperBound ); - struct Info { int numLocalEigenvalues; int numGlobalEigenvalues; @@ -29,35 +29,204 @@ struct Info { int firstLocalEigenvalue; }; +// Return upper bounds on the number of (local) eigenvalues in the given range, +// (lowerBound,upperBound] +template +Estimate EigEstimate +( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm, + FloatingType lowerBound, FloatingType upperBound ) +{ + DEBUG_ONLY(CSE cse("herm_tridiag_eig::EigEstimate")) + + Estimate estimate; + char jobz='C'; + char range='V'; + int il, iu; + int highAccuracy=0; + int nz, offset; + int ldz=1; + vector ZSupport(2*n); + int retval = pmrrr::pmrrr + ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, + &highAccuracy, comm.comm, &nz, &offset, w, static_cast(nullptr), &ldz, ZSupport.data() ); + if( retval != 0 ) + RuntimeError("pmrrr returned ",retval); + + estimate.numLocalEigenvalues = nz; + estimate.numGlobalEigenvalues = mpi::AllReduce( nz, comm ); + return estimate; +} + // Compute all of the eigenvalues -Info Eig( int n, double* d, double* e, double* w, mpi::Comm comm ); +template +Info Eig( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm ) +{ + DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig")) + + Info info; + char jobz='N'; + char range='A'; + FloatingType vl, vu; + int il, iu; + int highAccuracy=0; + int nz, offset; + int ldz=1; + vector ZSupport(2*n); + int retval = pmrrr::pmrrr + ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm, + &nz, &offset, w, static_cast(nullptr), &ldz, ZSupport.data() ); + if( retval != 0 ) + RuntimeError("pmrrr returned ",retval); + + info.numLocalEigenvalues=nz; + info.firstLocalEigenvalue=offset; + info.numGlobalEigenvalues=n; + return info; +} // Compute all of the eigenpairs +template Info Eig -( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm ); +( int n, FloatingType* d, FloatingType* e, FloatingType* w, FloatingType* Z, int ldz, mpi::Comm comm ) +{ + DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig")) + + Info info; + char jobz='V'; + char range='A'; + FloatingType vl, vu; + int il, iu; + int highAccuracy=0; + int nz, offset; + vector ZSupport(2*n); + int retval = pmrrr::pmrrr + ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm, + &nz, &offset, w, Z, &ldz, ZSupport.data() ); + if( retval != 0 ) + RuntimeError("pmrrr returned ",retval); + + info.numLocalEigenvalues=nz; + info.firstLocalEigenvalue=offset; + info.numGlobalEigenvalues=n; + return info; +} -// Compute all of the eigenvalues in [lowerBound,upperBound) +// Compute all of the eigenvalues in (lowerBound,upperBound] +template Info Eig -( int n, double* d, double* e, double* w, mpi::Comm comm, - double lowerBound, double upperBound ); +( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm, + FloatingType lowerBound, FloatingType upperBound ) +{ + DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig")) + + Info info; + char jobz='N'; + char range='V'; + int il, iu; + int highAccuracy=0; + int nz, offset; + int ldz=1; + vector ZSupport(2*n); + int retval = pmrrr::pmrrr + ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, + &highAccuracy, comm.comm, &nz, &offset, w, static_cast(nullptr), &ldz, ZSupport.data() ); + if( retval != 0 ) + RuntimeError("pmrrr returned ",retval); -// Compute all of the eigenpairs with eigenvalues in [lowerBound,upperBound) + info.numLocalEigenvalues=nz; + info.firstLocalEigenvalue=offset; + mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm ); + return info; +} + +// Compute all of the eigenpairs with eigenvalues in (lowerBound,upperBound] +template Info Eig -( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, - double lowerBound, double upperBound ); +( int n, FloatingType* d, FloatingType* e, FloatingType* w, FloatingType* Z, int ldz, mpi::Comm comm, + FloatingType lowerBound, FloatingType upperBound ) +{ + DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig")) + + Info info; + char jobz='V'; + char range='V'; + int il, iu; + int highAccuracy=0; + int nz, offset; + vector ZSupport(2*n); + int retval = pmrrr::pmrrr + ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, + &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() ); + if( retval != 0 ) + RuntimeError("pmrrr returned ",retval); -// Compute all of the eigenvalues with indices in [lowerBound,upperBound) + info.numLocalEigenvalues=nz; + info.firstLocalEigenvalue=offset; + mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm ); + return info; +} + +// Compute all of the eigenvalues with indices in [lowerBound,upperBound] +template Info Eig -( int n, double* d, double* e, double* w, mpi::Comm comm, - int lowerBound, int upperBound ); +( int n, FloatingType* d, FloatingType* e, FloatingType* w, mpi::Comm comm, + int lowerBound, int upperBound ) +{ + DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig")) + + Info info; + ++lowerBound; + ++upperBound; + char jobz='N'; + char range='I'; + FloatingType vl, vu; + int highAccuracy=0; + int nz, offset; + int ldz=1; + vector ZSupport(2*n); + int retval = pmrrr::pmrrr + ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, + &highAccuracy, comm.comm, &nz, &offset, w, static_cast(nullptr), &ldz, ZSupport.data() ); + if( retval != 0 ) + RuntimeError("pmrrr returned ",retval); + + info.numLocalEigenvalues=nz; + info.firstLocalEigenvalue=offset; + info.numGlobalEigenvalues=(upperBound-lowerBound)+1; + return info; +} -// Compute all of the eigenpairs with ordered eigenvalue indices in -// [lowerBound,upperBound) +// Compute all of the eigenpairs with eigenvalues indices in +// [lowerBound,upperBound] +template Info Eig -( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, - int lowerBound, int upperBound ); +( int n, FloatingType* d, FloatingType* e, FloatingType* w, FloatingType* Z, int ldz, mpi::Comm comm, + int lowerBound, int upperBound ) +{ + DEBUG_ONLY(CSE cse("herm_tridiag_eig::Eig")) + + Info info; + ++lowerBound; + ++upperBound; + char jobz='V'; + char range='I'; + FloatingType vl, vu; + int highAccuracy=0; + int nz, offset; + vector ZSupport(2*n); + int retval = pmrrr::pmrrr + ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, + &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() ); + if( retval != 0 ) + RuntimeError("pmrrr returned ",retval); + + info.numLocalEigenvalues=nz; + info.firstLocalEigenvalue=offset; + info.numGlobalEigenvalues=(upperBound-lowerBound)+1; + return info; +} } // namespace herm_tridiag_eig } // namespace El -#endif // ifndef EL_IMPORTS_PMRRR_HPP +#endif diff --git a/include/El/core/imports/pmrrr.hpp.old b/include/El/core/imports/pmrrr.hpp.old new file mode 100644 index 0000000000..f7a7de571b --- /dev/null +++ b/include/El/core/imports/pmrrr.hpp.old @@ -0,0 +1,64 @@ +/* + Copyright (c) 2009-2015, Jack Poulson + All rights reserved. + + This file is part of Elemental and is under the BSD 2-Clause License, + which can be found in the LICENSE file in the root directory, or at + http://opensource.org/licenses/BSD-2-Clause +*/ +#pragma once +#ifndef EL_IMPORTS_PMRRR_HPP +#define EL_IMPORTS_PMRRR_HPP + +namespace El { +namespace herm_tridiag_eig { + +struct Estimate { + int numLocalEigenvalues; + int numGlobalEigenvalues; +}; + +// Return an upper bound on the number of (local) eigenvalues in the given range +Estimate EigEstimate +( int n, double* d, double* e, double* w, mpi::Comm comm, + double lowerBound, double upperBound ); + +struct Info { + int numLocalEigenvalues; + int numGlobalEigenvalues; + + int firstLocalEigenvalue; +}; + +// Compute all of the eigenvalues +Info Eig( int n, double* d, double* e, double* w, mpi::Comm comm ); + +// Compute all of the eigenpairs +Info Eig +( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm ); + +// Compute all of the eigenvalues in [lowerBound,upperBound) +Info Eig +( int n, double* d, double* e, double* w, mpi::Comm comm, + double lowerBound, double upperBound ); + +// Compute all of the eigenpairs with eigenvalues in [lowerBound,upperBound) +Info Eig +( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, + double lowerBound, double upperBound ); + +// Compute all of the eigenvalues with indices in [lowerBound,upperBound) +Info Eig +( int n, double* d, double* e, double* w, mpi::Comm comm, + int lowerBound, int upperBound ); + +// Compute all of the eigenpairs with ordered eigenvalue indices in +// [lowerBound,upperBound) +Info Eig +( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, + int lowerBound, int upperBound ); + +} // namespace herm_tridiag_eig +} // namespace El + +#endif // ifndef EL_IMPORTS_PMRRR_HPP diff --git a/src/core/imports/pmrrr.cpp b/src/core/imports/pmrrr.cpp deleted file mode 100644 index a1438a6a99..0000000000 --- a/src/core/imports/pmrrr.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* - Copyright (c) 2009-2016, Jack Poulson - All rights reserved. - - This file is part of Elemental and is under the BSD 2-Clause License, - which can be found in the LICENSE file in the root directory, or at - http://opensource.org/licenses/BSD-2-Clause -*/ -#include - -extern "C" { - -int pmrrr -( const char* jobz, // 'N' ~ only eigenvalues, 'V' ~ also eigenvectors - const char* range, // 'A'~all eigenpairs, 'V'~interval (vl,vu], 'I'~il-iu - const int* n, // size of matrix - double* d, // full diagonal of tridiagonal matrix [length n] - double* e, // full subdiagonal in first n-1 entries [length n] - const double* vl, // if range=='V', compute eigenpairs in (vl,vu] - const double* vu, - const int* il, // if range=='I', compute il-iu eigenpairs - const int* iu, - int* tryrac, // if nonzero, try for high relative accuracy - MPI_Comm comm, - int* nz, // number of locally computed eigenvectors - int* offset, // the first eigenpair computed by our process - double* w, // eigenvalues corresponding to local eigenvectors [length nz] - double* Z, // local eigenvectors [size ldz x nz] - const int* ldz, // leading dimension of Z - int* ZSupp // support of eigenvectors [length 2n] -); - -} // extern "C" - -namespace El { -namespace herm_tridiag_eig { - -// Return upper bounds on the number of (local) eigenvalues in the given range, -// (lowerBound,upperBound] -Estimate EigEstimate -( int n, double* d, double* e, double* w, mpi::Comm comm, - double lowerBound, double upperBound ) -{ - DEBUG_CSE - Estimate estimate; - char jobz='C'; - char range='V'; - int il, iu; - int highAccuracy=0; - int nz, offset; - int ldz=1; - vector ZSupport(2*n); - int retval = pmrrr - ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, - &highAccuracy, comm.comm, &nz, &offset, w, 0, &ldz, ZSupport.data() ); - if( retval != 0 ) - RuntimeError("pmrrr returned ",retval); - - estimate.numLocalEigenvalues = nz; - estimate.numGlobalEigenvalues = mpi::AllReduce( nz, comm ); - return estimate; -} - -// Compute all of the eigenvalues -Info Eig( int n, double* d, double* e, double* w, mpi::Comm comm ) -{ - DEBUG_CSE - Info info; - char jobz='N'; - char range='A'; - double vl, vu; - int il, iu; - int highAccuracy=0; - int nz, offset; - int ldz=1; - vector ZSupport(2*n); - int retval = pmrrr - ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm, - &nz, &offset, w, 0, &ldz, ZSupport.data() ); - if( retval != 0 ) - RuntimeError("pmrrr returned ",retval); - - info.numLocalEigenvalues=nz; - info.firstLocalEigenvalue=offset; - info.numGlobalEigenvalues=n; - return info; -} - -// Compute all of the eigenpairs -Info Eig -( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm ) -{ - DEBUG_CSE - Info info; - char jobz='V'; - char range='A'; - double vl, vu; - int il, iu; - int highAccuracy=0; - int nz, offset; - vector ZSupport(2*n); - int retval = pmrrr - ( &jobz, &range, &n, d, e, &vl, &vu, &il, &iu, &highAccuracy, comm.comm, - &nz, &offset, w, Z, &ldz, ZSupport.data() ); - if( retval != 0 ) - RuntimeError("pmrrr returned ",retval); - - info.numLocalEigenvalues=nz; - info.firstLocalEigenvalue=offset; - info.numGlobalEigenvalues=n; - return info; -} - -// Compute all of the eigenvalues in (lowerBound,upperBound] -Info Eig -( int n, double* d, double* e, double* w, mpi::Comm comm, - double lowerBound, double upperBound ) -{ - DEBUG_CSE - Info info; - char jobz='N'; - char range='V'; - int il, iu; - int highAccuracy=0; - int nz, offset; - int ldz=1; - vector ZSupport(2*n); - int retval = pmrrr - ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, - &highAccuracy, comm.comm, &nz, &offset, w, 0, &ldz, ZSupport.data() ); - if( retval != 0 ) - RuntimeError("pmrrr returned ",retval); - - info.numLocalEigenvalues=nz; - info.firstLocalEigenvalue=offset; - mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm ); - return info; -} - -// Compute all of the eigenpairs with eigenvalues in (lowerBound,upperBound] -Info Eig -( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, - double lowerBound, double upperBound ) -{ - DEBUG_CSE - Info info; - char jobz='V'; - char range='V'; - int il, iu; - int highAccuracy=0; - int nz, offset; - vector ZSupport(2*n); - int retval = pmrrr - ( &jobz, &range, &n, d, e, &lowerBound, &upperBound, &il, &iu, - &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() ); - if( retval != 0 ) - RuntimeError("pmrrr returned ",retval); - - info.numLocalEigenvalues=nz; - info.firstLocalEigenvalue=offset; - mpi::AllReduce( &nz, &info.numGlobalEigenvalues, 1, mpi::SUM, comm ); - return info; -} - -// Compute all of the eigenvalues with indices in [lowerBound,upperBound] -Info Eig -( int n, double* d, double* e, double* w, mpi::Comm comm, - int lowerBound, int upperBound ) -{ - DEBUG_CSE - Info info; - ++lowerBound; - ++upperBound; - char jobz='N'; - char range='I'; - double vl, vu; - int highAccuracy=0; - int nz, offset; - int ldz=1; - vector ZSupport(2*n); - int retval = pmrrr - ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, - &highAccuracy, comm.comm, &nz, &offset, w, 0, &ldz, ZSupport.data() ); - if( retval != 0 ) - RuntimeError("pmrrr returned ",retval); - - info.numLocalEigenvalues=nz; - info.firstLocalEigenvalue=offset; - info.numGlobalEigenvalues=(upperBound-lowerBound)+1; - return info; -} - -// Compute all of the eigenpairs with eigenvalues indices in -// [lowerBound,upperBound] -Info Eig -( int n, double* d, double* e, double* w, double* Z, int ldz, mpi::Comm comm, - int lowerBound, int upperBound ) -{ - DEBUG_CSE - Info info; - ++lowerBound; - ++upperBound; - char jobz='V'; - char range='I'; - double vl, vu; - int highAccuracy=0; - int nz, offset; - vector ZSupport(2*n); - int retval = pmrrr - ( &jobz, &range, &n, d, e, &vl, &vu, &lowerBound, &upperBound, - &highAccuracy, comm.comm, &nz, &offset, w, Z, &ldz, ZSupport.data() ); - if( retval != 0 ) - RuntimeError("pmrrr returned ",retval); - - info.numLocalEigenvalues=nz; - info.firstLocalEigenvalue=offset; - info.numGlobalEigenvalues=(upperBound-lowerBound)+1; - return info; -} - -} // namespace herm_tridiag_eig -} // namespace El diff --git a/src/lapack_like/spectral/HermitianTridiagEig.cpp b/src/lapack_like/spectral/HermitianTridiagEig.cpp index 01da2f6ede..342bbadb63 100644 --- a/src/lapack_like/spectral/HermitianTridiagEig.cpp +++ b/src/lapack_like/spectral/HermitianTridiagEig.cpp @@ -1141,11 +1141,11 @@ MRRRHelper QCtrl.rowAlign = 0; DistMatrixWriteProxy wProx( wPre, wCtrl ); - DistMatrixWriteProxy QProx( QPre, QCtrl ); + DistMatrixWriteProxy QProx( QPre, QCtrl ); auto& w = wProx.Get(); auto& Q = QProx.Get(); - DistMatrix d_STAR_STAR(g), dSub_STAR_STAR(g); + DistMatrix d_STAR_STAR(g), dSub_STAR_STAR(g); Copy( d, d_STAR_STAR ); dSub_STAR_STAR.Resize( n-1, 1, n ); Copy( dSub, dSub_STAR_STAR ); @@ -1153,7 +1153,7 @@ MRRRHelper Int k; if( ctrl.subset.rangeSubset ) { - vector dVector(n), dSubVector(n), wVector(n); + vector dVector(n), dSubVector(n), wVector(n); MemCopy( dVector.data(), d_STAR_STAR.Buffer(), n ); MemCopy( dSubVector.data(), dSub_STAR_STAR.Buffer(), n-1 ); auto estimate = herm_tridiag_eig::EigEstimate @@ -1171,7 +1171,7 @@ MRRRHelper Q.Resize( n, k ); herm_tridiag_eig::Info rangeInfo; - vector wVector(n); + vector wVector(n); if( ctrl.subset.rangeSubset ) rangeInfo = herm_tridiag_eig::Eig ( int(n), d_STAR_STAR.Buffer(), dSub_STAR_STAR.Buffer(), @@ -1215,14 +1215,14 @@ MRRRHelper typedef Complex C; HermitianTridiagEigInfo info; - DistMatrix d_STAR_STAR(g); - DistMatrix,STAR,STAR> dSub_STAR_STAR(g); + DistMatrix d_STAR_STAR(g); + DistMatrix,STAR,STAR> dSub_STAR_STAR(g); Copy( d, d_STAR_STAR ); dSub_STAR_STAR.Resize( n-1, 1, n ); Copy( dSub, dSub_STAR_STAR ); - DistMatrix dSubReal(g); - DistMatrix,STAR,STAR> phase(g); + DistMatrix dSubReal(g); + DistMatrix,STAR,STAR> phase(g); RemovePhase( dSub_STAR_STAR, dSubReal, phase ); ElementalProxyCtrl wCtrl, QCtrl; @@ -1239,7 +1239,7 @@ MRRRHelper Int k; if( ctrl.subset.rangeSubset ) { - vector dVector(n), dSubVector(n), wVector(n); + vector dVector(n), dSubVector(n), wVector(n); MemCopy( dVector.data(), d_STAR_STAR.Buffer(), n ); MemCopy( dSubVector.data(), dSubReal.Buffer(), n-1 ); auto estimate = herm_tridiag_eig::EigEstimate @@ -1254,11 +1254,11 @@ MRRRHelper k = ( n==0 ? 0 : ctrl.subset.upperIndex-ctrl.subset.lowerIndex+1 ); else k = n; - DistMatrix QReal(g); + DistMatrix QReal(g); QReal.Resize( n, k ); herm_tridiag_eig::Info rangeInfo; - vector wVector(n); + vector wVector(n); if( ctrl.subset.rangeSubset ) rangeInfo = herm_tridiag_eig::Eig ( int(n), d_STAR_STAR.Buffer(), dSubReal.Buffer(), @@ -1407,12 +1407,12 @@ Int MRRREstimateHelper { DEBUG_CSE const Int n = d.Height(); - DistMatrix d_STAR_STAR( d.Grid() ); - DistMatrix dSub_STAR_STAR( d.Grid() ); + DistMatrix d_STAR_STAR( d.Grid() ); + DistMatrix dSub_STAR_STAR( d.Grid() ); Copy( d, d_STAR_STAR ); dSub_STAR_STAR.Resize( n-1, 1, n ); Copy( dSub, dSub_STAR_STAR ); - vector dVector(n), dSubVector(n), wVector(n); + vector dVector(n), dSubVector(n), wVector(n); MemCopy( dVector.data(), d_STAR_STAR.Buffer(), n ); MemCopy( dSubVector.data(), dSub_STAR_STAR.Buffer(), n-1 ); auto estimate = herm_tridiag_eig::EigEstimate