Skip to content

Commit

Permalink
merged
Browse files Browse the repository at this point in the history
  • Loading branch information
Julien Roussel authored and Julien Roussel committed Feb 13, 2024
2 parents 71fd49d + c779ed1 commit 6b48130
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 15 deletions.
7 changes: 3 additions & 4 deletions qolmat/imputations/imputers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1784,8 +1784,7 @@ def _fit_element(
D = utils.prepare_data(X, self.period)
Omega = ~np.isnan(D)
# D = utils.linear_interpolation(D)

Q = model.fit_basis(D, Omega)
Q = model.fit_basis(X, Omega)

return Q

Expand Down Expand Up @@ -1818,9 +1817,9 @@ def _transform_element(
self._check_dataframe(df)
if self.method not in ["PCP", "noisy"]:
raise ValueError("Argument method must be `PCP` or `noisy`!")
hyperparams = self.get_hyperparams()
model = self.get_model(**hyperparams)

hyperparams = self.get_hyperparams(col=col)
model = self.get_model(random_state=self._rng, **hyperparams)
X = df.astype(float).values

D = utils.prepare_data(X, self.period)
Expand Down
20 changes: 18 additions & 2 deletions qolmat/imputations/rpca/rpca.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Union
from typing import Union, Tuple
from typing_extensions import Self

import numpy as np
Expand Down Expand Up @@ -38,7 +38,23 @@ def __init__(
self.random_state = random_state
self.verbose = verbose

def fit_basis(self, D: NDArray, Omega: NDArray) -> NDArray:
def fit_basis(self, D: NDArray, Omega: NDArray) -> Self:
"""Fit RPCA model on data
Parameters
----------
D : NDArray
Observations
Omega: NDArrau
boolean matrix indicating the observed values
Returns
-------
Self
Model RPCA
"""
D = utils.linear_interpolation(D)

n_rows, n_cols = D.shape
if n_rows == 1 or n_cols == 1:
self.V = np.array([[1]])
Expand Down
23 changes: 16 additions & 7 deletions qolmat/imputations/rpca/rpca_noisy.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ def get_params_scale(self, D: NDArray) -> Dict[str, float]:
}

def decompose_on_basis(
self, D: NDArray, Omega: NDArray, Q: NDArray
self,
D: NDArray,
Omega: NDArray,
Q: NDArray,
) -> Tuple[NDArray, NDArray]:
D = utils.linear_interpolation(D)
params_scale = self.get_params_scale(D)
Expand Down Expand Up @@ -214,12 +217,18 @@ def decompose_rpca(
Anomalies
"""

params_scale = self.get_params_scale(D)
self.params_scale = self.get_params_scale(D)

lam = params_scale["lam"] if self.lam is None else self.lam
rank = params_scale["rank"] if self.rank is None else self.rank
rank = int(rank)
tau = params_scale["tau"] if self.tau is None else self.tau
if self.lam is not None:
self.params_scale["lam"] = self.lam
if self.rank is not None:
self.params_scale["rank"] = self.rank
if self.tau is not None:
self.params_scale["tau"] = self.tau

lam = self.params_scale["lam"]
rank = int(self.params_scale["rank"])
tau = self.params_scale["tau"]
mu = 1e-2 if self.mu is None else self.mu

n_rows, _ = D.shape
Expand Down Expand Up @@ -373,7 +382,7 @@ def decompose_rpca_algorithm(
Y = np.zeros((n_rows, n_cols))
X = D.copy()
A = np.zeros((n_rows, n_cols))
U, S, Vt = np.linalg.svd(X)
U, S, Vt = np.linalg.svd(X, full_matrices=False)

U = U[:, :rank]
S = S[:rank]
Expand Down
2 changes: 1 addition & 1 deletion qolmat/imputations/rpca/rpca_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def approx_rank(
"""
if threshold == 1:
return min(M.shape)
_, values_singular, _ = np.linalg.svd(M, full_matrices=True)
_, values_singular, _ = np.linalg.svd(M, full_matrices=False)

cum_sum = np.cumsum(values_singular) / np.sum(values_singular)
rank = np.argwhere(cum_sum > threshold)[0][0] + 1
Expand Down
8 changes: 8 additions & 0 deletions tests/imputations/rpca/test_rpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
class RPCAMock(RPCA):
def __init__(self):
super().__init__()
self.Q = None

def decompose_rpca(
self, D: NDArray, Omega: NDArray
Expand All @@ -43,3 +44,10 @@ def test_rpca_fit_basis() -> None:
_, n_colsQ = Q.shape
assert n_cols == n_colsQ
assert rpca.call_count == 1


def test_transform_with_basis() -> None:
rpca = RPCAMock()
X_imputed = rpca.transform_with_basis(X_incomplete)
assert X_imputed.shape == X_incomplete.shape
assert rpca.call_count == 1
13 changes: 12 additions & 1 deletion tests/imputations/test_imputers.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ def test_ImputerRegressor_fit_transform(df: pd.DataFrame) -> None:
@pytest.mark.parametrize("df", [df_timeseries])
def test_ImputerRPCA_fit_transform(df: pd.DataFrame) -> None:
imputer = imputers.ImputerRPCA(columnwise=False, max_iterations=100, tau=1, lam=0.3)
result = imputer.fit_transform(df)
imputer = imputer.fit(df)
result = imputer.transform(df)
expected = pd.DataFrame(
{
"col1": [i for i in range(20)],
Expand All @@ -273,6 +274,16 @@ def test_ImputerRPCA_fit_transform(df: pd.DataFrame) -> None:
result = np.around(result)
np.testing.assert_allclose(result, expected, atol=1e-2)

result = imputer.transform(df.iloc[:10])
expected = pd.DataFrame(
{
"col1": [i for i in range(10)],
"col2": [0, 1, 2, 2, 2] + [i for i in range(5, 10)],
}
)
result = np.around(result)
np.testing.assert_allclose(result, expected, atol=1e-2)


@pytest.mark.parametrize("df", [df_incomplete])
def test_ImputerSoftImpute_fit_transform(df: pd.DataFrame) -> None:
Expand Down

0 comments on commit 6b48130

Please sign in to comment.