Skip to content

Commit

Permalink
Add function nearest_correlaction_matrix
Browse files Browse the repository at this point in the history
- remove function _is_positive_definite()
- add symmetry and diagonal 1.0 to tests
- update usage. replace _nearest_positive_definite() with nearest_correlation_matrix()
- add back correlation matrix diagonal check in iman conover
- add function nearest_correlation_matrix and tests
  • Loading branch information
tommyod committed Dec 2, 2024
1 parent a5f8d0b commit f2cc08b
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 39 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ dependencies = [
"segyio",
"xlrd",
"xtgeo>=2.15",
"cvxpy",
]

[project.urls]
Expand Down
132 changes: 95 additions & 37 deletions src/semeio/fmudesign/create_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from datetime import datetime
from pathlib import Path

import cvxpy as cp
import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy
from scipy.stats import qmc
Expand All @@ -17,42 +17,98 @@
from semeio.fmudesign.iman_conover import ImanConover


def _is_positive_definite(b_mat):
"""Returns true when input is positive-definite, via Cholesky"""
try:
_ = la.cholesky(b_mat)
return True
except la.LinAlgError:
return False


def _nearest_positive_definite(a_mat):
"""Implementation taken from:
https://stackoverflow.com/questions/43238173/
python-convert-matrix-to-positive-semi-definite/43244194#43244194
def nearest_correlation_matrix(matrix, *, weights=None, eps=1e-6, verbose=False):
"""Returns the correlation matrix nearest to `matrix`, weighted elementwise
by `weights`.
Parameters
----------
matrix : np.ndarray
The matrix that we want to find the nearest correlation matrix to.
A square 2-dimensional NumPy ndarray.
weights : np.ndarray or None, optional
An elementwise weighting matrix. A square 2-dimensional NumPy ndarray
that must have the same shape as `matrix`. The default is None.
eps : float, optional
Tolerance for the optimization solver. The default is 1e-6.
verbose : bool, optional
Whether to print information from the solver. The default is False.
Returns
-------
np.ndarray
The correlation matrix that is nearest to the input matrix.
Notes
-----
This function implements equation (3) in the paper "An Augmented Lagrangian
Dual Approach for the H-Weighted Nearest Correlation Matrix Problem" by
Houduo Qi and Defeng Sun.
http://www.personal.soton.ac.uk/hdqi/REPORTS/Cor_matrix_H.pdf
Another useful link is:
https://nhigham.com/2020/04/14/what-is-a-correlation-matrix/
Examples
--------
>>> X = np.array([[1, 1, 0],
... [1, 1, 1],
... [0, 1, 1]])
>>> nearest_correlation_matrix(X)
array([[1. , 0.76068..., 0.15729...],
[0.76068..., 1. , 0.76068...],
[0.15729..., 0.76068..., 1. ]])
>>> H = np.array([[1, 0.5, 0.1],
... [0.5, 1, 0.5],
... [0.1, 0.5, 1]])
>>> nearest_correlation_matrix(X, weights=H)
array([[1. , 0.94171..., 0.77365...],
[0.94171..., 1. , 0.94171...],
[0.77365..., 0.94171..., 1. ]])
"""

b_mat = (a_mat + a_mat.T) / 2
_, s_mat, v_mat = la.svd(b_mat)

h_mat = np.dot(v_mat.T, np.dot(np.diag(s_mat), v_mat))

a2_mat = (b_mat + h_mat) / 2

a3_mat = (a2_mat + a2_mat.T) / 2

if _is_positive_definite(a3_mat):
return a3_mat

spacing = np.spacing(la.norm(a_mat))
identity = np.eye(a_mat.shape[0])
kiter = 1
while not _is_positive_definite(a3_mat):
mineig = np.min(np.real(la.eigvals(a3_mat)))
a3_mat += identity * (-mineig * kiter**2 + spacing)
kiter += 1

return a3_mat
if not isinstance(matrix, np.ndarray):
raise TypeError("Input argument `matrix` must be np.ndarray.")
if not matrix.ndim == 2 and matrix.shape[0] == matrix.shape[1]:
raise ValueError("Input argument `matrix` must be square.")

# Switch to notation used in the paper
G = matrix.copy()
H = np.ones_like(G) if weights is None else weights

if not isinstance(H, np.ndarray):
raise TypeError("Input argument `weights` must be np.ndarray.")
if not (H.shape == G.shape):
raise ValueError("Argument `weights` must have same shape as `matrix`.")

# To constrain Y to be Positive Symmetric Definite (PSD), you need to
# either set PSD=True here, or add the special constraint 'Y >> 0'. See:
# https://www.cvxpy.org/tutorial/constraints/index.html#semidefinite-matrices
X = cp.Variable(shape=G.shape, PSD=True)

# Objective and constraints for minimizing the weighted frobenius norm.
# This is equation (3) in the paper. We set (X - eps * I) >> 0 as an extra
# constraint. Mathematically this is not needed, but numerically it helps
# by nudging the solution slightly more, so the minimum eigenvalue is > 0.
objective = cp.norm(cp.multiply(H, X - G), "fro")
eps_identity = (eps / G.shape[0]) * 10
constraints = [cp.diag(X) == 1.0, (X - eps_identity * np.eye(G.shape[0])) >> 0]

# For solver options, see:
# https://www.cvxpy.org/tutorial/solvers/index.html#setting-solver-options
problem = cp.Problem(cp.Minimize(objective), constraints)
problem.solve(solver="SCS", verbose=verbose, eps=eps)
X = X.value.copy() # Copy over solution

# We might get small eigenvalues due to numerics. Attempt to fix this by
# recursively calling the solver with smaller values of epsilon. This is
# an extra fail-safe that is very rarely triggered on actual data.
is_symmetric = np.allclose(X, X.T)
is_PD = np.linalg.eig(X).eigenvalues.min() > 0
if not (is_symmetric and is_PD) and (eps > 1e-14):
if verbose:
print(f"Recursively calling solver with eps := {eps} / 10")
return nearest_correlation_matrix(G, weights=H, eps=eps / 10, verbose=verbose)

return X


class DesignMatrix:
Expand Down Expand Up @@ -758,7 +814,9 @@ def generate(self, realnums, parameters, seedvalues, corrdict, rng):
# Make correlation matrix symmetric by copying lower triangular part
correlations = np.triu(correlations.T, k=1) + np.tril(correlations)

correlations = _nearest_positive_definite(correlations)
correlations = nearest_correlation_matrix(
correlations, weights=None, eps=1e-6, verbose=False
)

sampler = qmc.LatinHypercube(
d=len(multivariate_parameters), seed=rng
Expand Down
4 changes: 2 additions & 2 deletions src/semeio/fmudesign/iman_conover.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ def __init__(self, correlation_matrix):
raise ValueError("Correlation matrix must be square.")
if not correlation_matrix.shape[0] == correlation_matrix.shape[1]:
raise ValueError("Correlation matrix must be square.")
# if not np.allclose(np.diag(correlation_matrix), 1.0):
# raise ValueError("Correlation matrix must have 1.0 on diagonal.")
if not np.allclose(np.diag(correlation_matrix), 1.0):
raise ValueError("Correlation matrix must have 1.0 on diagonal.")
if not np.allclose(correlation_matrix.T, correlation_matrix):
raise ValueError("Correlation matrix must be symmetric.")
if not _is_positive_definite(correlation_matrix):
Expand Down
82 changes: 82 additions & 0 deletions tests/fmudesign/test_create_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import semeio
from semeio.fmudesign import DesignMatrix, excel2dict_design
from semeio.fmudesign.create_design import nearest_correlation_matrix

TESTDATA = Path(__file__).parent / "data"

Expand Down Expand Up @@ -349,3 +350,84 @@ def test_generate_background(tmpdir):
0.8,
atol=0.20,
)


class TestNearestCorrelationMatrix:
@pytest.mark.parametrize("variables", range(2, 100, 10))
def test_nearest_correlation_matrix(self, variables):
"""Test that we can cholesky decompose the solution."""

rng = np.random.default_rng(variables)

# Create a correlation matrix
observations = rng.normal(size=(variables * 2, variables))
matrix = np.corrcoef(observations, rowvar=False)

# Taking the cholesky decomposition should work just fine
np.linalg.cholesky(matrix)

# Mess it up
matrix = matrix + rng.normal(size=matrix.shape, scale=0.1)
matrix = matrix - np.identity(variables) * np.mean(np.diag(matrix))

# Now the cholesky decomposition should fail
with pytest.raises(np.linalg.LinAlgError):
np.linalg.cholesky(matrix)

# Adjust the matrix to its nearest correlation matrix
correlation_matrix = nearest_correlation_matrix(matrix)

# Taking the cholesky decomposition should work now
np.linalg.cholesky(correlation_matrix)

# Diagonal entries should be 1.0 and the matrix should be symmetric
assert np.allclose(np.diag(correlation_matrix), 1.0)
assert np.allclose(correlation_matrix, correlation_matrix.T)

def test_nearest_correlation_matrix_on_matlab_example(self):
"""These matrices are from the 'nearcorr' docs:
https://www.mathworks.com/help/stats/nearcorr.html
"""
# The matrix we want to adjust to become a correlation matrix
A = np.array(
[
[1.0, 0.0, 0.0, 0.0, -0.936],
[0.0, 1.0, -0.55, -0.3645, -0.53],
[0.0, -0.55, 1.0, -0.0351, 0.0875],
[0.0, -0.3645, -0.0351, 1.0, 0.4557],
[-0.936, -0.53, 0.0875, 0.4557, 1.0],
]
)

W = np.array(
[
[0.0, 1.0, 0.1, 0.15, 0.25],
[1.0, 0.0, 0.05, 0.025, 0.15],
[0.1, 0.05, 0.0, 0.25, 1.0],
[0.15, 0.025, 0.25, 0.0, 0.25],
[0.25, 0.15, 1.0, 0.25, 0.0],
]
)

matlab_Y = np.array(
[
[1.0, 0.0014, 0.0287, -0.0222, -0.8777],
[0.0014, 1.0, -0.498, -0.7268, -0.4567],
[0.0287, -0.498, 1.0, -0.0358, 0.0878],
[-0.0222, -0.7268, -0.0358, 1.0, 0.4465],
[-0.8777, -0.4567, 0.0878, 0.4465, 1.0],
]
)

# The smallest eigenvalue of A is -0.1244...
# The smallest eigenvalue of Y is 1.088e-06
Y = nearest_correlation_matrix(A, weights=W)

# Matlab output has 4 digits, so atol is set to 1e-4 here
assert np.allclose(Y, matlab_Y, atol=1e-4)


if __name__ == "__main__":
import pytest

pytest.main(args=[__file__, "--doctest-modules", "-v", "-l"])

0 comments on commit f2cc08b

Please sign in to comment.