Skip to content

Commit

Permalink
rpca pcp decompsoe implemented, not working without finite rank
Browse files Browse the repository at this point in the history
  • Loading branch information
Julien Roussel authored and Julien Roussel committed Feb 23, 2024
1 parent dd22252 commit 5a4eaa8
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 5 deletions.
4 changes: 2 additions & 2 deletions docs/imputers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ In the case of time-series data, we also propose :class:`~qolmat.imputations.dif
References
----------

[1] Candès, Emmanuel J., et al. `Robust principal component analysis? <https://arxiv.org/abs/2001.05484>`_ Journal of the ACM (JACM) 58.3 (2011): 1-37.
[1] Candès, Emmanuel J., et al. `Robust principal component analysis? <https://arxiv.org/pdf/0912.3599.pdf>`_ Journal of the ACM (JACM) 58.3 (2011): 1-37.

[2] Botterman, HL., Roussel, J., Morzadec, T., Jabbari, A., Brunel, N. `Robust PCA for Anomaly Detection and Data Imputation in Seasonal Time Series <https://link.springer.com/chapter/10.1007/978-3-031-25891-6_21>`_ in International Conference on Machine Learning, Optimization, and Data Science. Cham: Springer Nature Switzerland (2022).

Expand All @@ -131,7 +131,7 @@ References

[8] Ho, Jonathan, Ajay Jain, and Pieter Abbeel. `Denoising diffusion probabilistic models. <https://arxiv.org/abs/2006.11239>`_ Advances in neural information processing systems 33 (2020): 6840-6851.

[9] Tashiro, Yusuke, et al. `Csdi: Conditional score-based diffusion models for probabilistic time series imputation. <https://arxiv.org/abs/2107.03502>`_ Advances in Neural Information Processing Systems 34 (2021): 24804-24816.
[9] Tashiro, Yusuke, et al. `CSDI: Conditional score-based diffusion models for probabilistic time series imputation. <https://arxiv.org/abs/2107.03502>`_ Advances in Neural Information Processing Systems 34 (2021): 24804-24816.

[10] Kotelnikov, Akim, et al. `Tabddpm: Modelling tabular data with diffusion models. <https://icml.cc/virtual/2023/poster/24703>`_ International Conference on Machine Learning. PMLR, 2023.

Expand Down
67 changes: 64 additions & 3 deletions qolmat/imputations/rpca/rpca_pcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
from numpy.typing import NDArray
from sklearn import utils as sku
import scipy as scp

from qolmat.imputations.rpca import rpca_utils
from qolmat.imputations.rpca.rpca import RPCA
Expand Down Expand Up @@ -80,13 +81,73 @@ def get_params_scale(self, D: NDArray):
dict_params = {"mu": mu, "lam": lam}
return dict_params

# def decompose_on_basis(
# self, D: NDArray, Omega: NDArray, Q: NDArray
# ) -> Tuple[NDArray, NDArray]:
# n_rows, n_cols = D.shape
# if n_rows == 1 or n_cols == 1:
# return D, np.full_like(D, 0)
# M, A, L, Q = self.decompose_rpca(D, Omega)
# return M, A

def decompose_on_basis(
self, D: NDArray, Omega: NDArray, Q: NDArray
) -> Tuple[NDArray, NDArray]:
n_rows, n_cols = D.shape
rank, _ = Q.shape
if n_rows == 1 or n_cols == 1:
return D, np.full_like(D, 0)
M, A, L, Q = self.decompose_rpca(D, Omega)
params_scale = self.get_params_scale(D)

mu = params_scale["mu"] if self.mu is None else self.mu
lam = params_scale["lam"] if self.lam is None else self.lam

D = utils.linear_interpolation(D)

# D_norm = np.linalg.norm(D, "fro")

A = np.array(np.full_like(D, 0))
L = np.zeros((n_rows, rank))
M = np.array(np.full_like(D, 0))
Y = np.array(np.full_like(D, 0))

# errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan)

# M: NDArray = D - A
for _ in range(self.max_iterations):
# A_prev = A.copy()
# L_prev = L.copy()
# L = Y @ Q.T

# A = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu)

# A[~Omega] = (D - M)[~Omega]

# Y += mu * (D - M - A)

# errors[iteration] = error

A_Omega = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu)
A_Omega_C = D - M
A = np.where(Omega, A_Omega, A_Omega_C)

L = scp.linalg.solve(
a=Q @ Q.T,
b=Q @ (D - A + Y / mu),
).T
M = L @ Q

Y += mu * (D - M - A)

# drift_A = np.linalg.norm(A - A_prev, np.inf)
# drift_L = np.linalg.norm(L - L_prev, np.inf)
# error = max([drift_A, drift_L])

# if error < self.tol:
# break

self._check_cost_function_minimized(D, M, A, Omega, lam)

return M, A

def decompose_rpca(
Expand Down Expand Up @@ -127,9 +188,9 @@ def decompose_rpca(

M: NDArray = D - A
for iteration in range(self.max_iterations):
L, Q = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu)
L, Q = rpca_utils.svd_thresholding(D - A + Y / mu, mu)
M = L @ Q
A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu)
A = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu)
A[~Omega] = (D - M)[~Omega]

Y += mu * (D - M - A)
Expand Down

0 comments on commit 5a4eaa8

Please sign in to comment.