From 5a4eaa87437466ec45c260e5057447be589e5392 Mon Sep 17 00:00:00 2001 From: Julien Roussel <3178729-JulienRoussel77@users.noreply.gitlab.com> Date: Fri, 23 Feb 2024 11:41:16 +0100 Subject: [PATCH] rpca pcp decompsoe implemented, not working without finite rank --- docs/imputers.rst | 4 +- qolmat/imputations/rpca/rpca_pcp.py | 67 +++++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/docs/imputers.rst b/docs/imputers.rst index dafe24bc..40245600 100644 --- a/docs/imputers.rst +++ b/docs/imputers.rst @@ -115,7 +115,7 @@ In the case of time-series data, we also propose :class:`~qolmat.imputations.dif References ---------- -[1] Candès, Emmanuel J., et al. `Robust principal component analysis? `_ Journal of the ACM (JACM) 58.3 (2011): 1-37. +[1] Candès, Emmanuel J., et al. `Robust principal component analysis? `_ Journal of the ACM (JACM) 58.3 (2011): 1-37. [2] Botterman, HL., Roussel, J., Morzadec, T., Jabbari, A., Brunel, N. `Robust PCA for Anomaly Detection and Data Imputation in Seasonal Time Series `_ in International Conference on Machine Learning, Optimization, and Data Science. Cham: Springer Nature Switzerland (2022). @@ -131,7 +131,7 @@ References [8] Ho, Jonathan, Ajay Jain, and Pieter Abbeel. `Denoising diffusion probabilistic models. `_ Advances in neural information processing systems 33 (2020): 6840-6851. -[9] Tashiro, Yusuke, et al. `Csdi: Conditional score-based diffusion models for probabilistic time series imputation. `_ Advances in Neural Information Processing Systems 34 (2021): 24804-24816. +[9] Tashiro, Yusuke, et al. `CSDI: Conditional score-based diffusion models for probabilistic time series imputation. `_ Advances in Neural Information Processing Systems 34 (2021): 24804-24816. [10] Kotelnikov, Akim, et al. `Tabddpm: Modelling tabular data with diffusion models. `_ International Conference on Machine Learning. PMLR, 2023. diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py index f613b322..ebda3549 100644 --- a/qolmat/imputations/rpca/rpca_pcp.py +++ b/qolmat/imputations/rpca/rpca_pcp.py @@ -6,6 +6,7 @@ import numpy as np from numpy.typing import NDArray from sklearn import utils as sku +import scipy as scp from qolmat.imputations.rpca import rpca_utils from qolmat.imputations.rpca.rpca import RPCA @@ -80,13 +81,73 @@ def get_params_scale(self, D: NDArray): dict_params = {"mu": mu, "lam": lam} return dict_params + # def decompose_on_basis( + # self, D: NDArray, Omega: NDArray, Q: NDArray + # ) -> Tuple[NDArray, NDArray]: + # n_rows, n_cols = D.shape + # if n_rows == 1 or n_cols == 1: + # return D, np.full_like(D, 0) + # M, A, L, Q = self.decompose_rpca(D, Omega) + # return M, A + def decompose_on_basis( self, D: NDArray, Omega: NDArray, Q: NDArray ) -> Tuple[NDArray, NDArray]: n_rows, n_cols = D.shape + rank, _ = Q.shape if n_rows == 1 or n_cols == 1: return D, np.full_like(D, 0) - M, A, L, Q = self.decompose_rpca(D, Omega) + params_scale = self.get_params_scale(D) + + mu = params_scale["mu"] if self.mu is None else self.mu + lam = params_scale["lam"] if self.lam is None else self.lam + + D = utils.linear_interpolation(D) + + # D_norm = np.linalg.norm(D, "fro") + + A = np.array(np.full_like(D, 0)) + L = np.zeros((n_rows, rank)) + M = np.array(np.full_like(D, 0)) + Y = np.array(np.full_like(D, 0)) + + # errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan) + + # M: NDArray = D - A + for _ in range(self.max_iterations): + # A_prev = A.copy() + # L_prev = L.copy() + # L = Y @ Q.T + + # A = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu) + + # A[~Omega] = (D - M)[~Omega] + + # Y += mu * (D - M - A) + + # errors[iteration] = error + + A_Omega = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu) + A_Omega_C = D - M + A = np.where(Omega, A_Omega, A_Omega_C) + + L = scp.linalg.solve( + a=Q @ Q.T, + b=Q @ (D - A + Y / mu), + ).T + M = L @ Q + + Y += mu * (D - M - A) + + # drift_A = np.linalg.norm(A - A_prev, np.inf) + # drift_L = np.linalg.norm(L - L_prev, np.inf) + # error = max([drift_A, drift_L]) + + # if error < self.tol: + # break + + self._check_cost_function_minimized(D, M, A, Omega, lam) + return M, A def decompose_rpca( @@ -127,9 +188,9 @@ def decompose_rpca( M: NDArray = D - A for iteration in range(self.max_iterations): - L, Q = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu) + L, Q = rpca_utils.svd_thresholding(D - A + Y / mu, mu) M = L @ Q - A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu) + A = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu) A[~Omega] = (D - M)[~Omega] Y += mu * (D - M - A)