rpca pcp decompsoe implemented, not working without finite rank

scikit-learn-contrib · Feb 23, 2024 · 5a4eaa8 · 5a4eaa8
1 parent dd22252
commit 5a4eaa8
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 5 deletions.
diff --git a/docs/imputers.rst b/docs/imputers.rst
@@ -115,7 +115,7 @@ In the case of time-series data, we also propose :class:`~qolmat.imputations.dif
 References
 ----------
 
-[1] Candès, Emmanuel J., et al. `Robust principal component analysis? <https://arxiv.org/abs/2001.05484>`_ Journal of the ACM (JACM) 58.3 (2011): 1-37.
+[1] Candès, Emmanuel J., et al. `Robust principal component analysis? <https://arxiv.org/pdf/0912.3599.pdf>`_ Journal of the ACM (JACM) 58.3 (2011): 1-37.
 
 [2] Botterman, HL., Roussel, J., Morzadec, T., Jabbari, A., Brunel, N. `Robust PCA for Anomaly Detection and Data Imputation in Seasonal Time Series <https://link.springer.com/chapter/10.1007/978-3-031-25891-6_21>`_ in International Conference on Machine Learning, Optimization, and Data Science. Cham: Springer Nature Switzerland (2022).
 
@@ -131,7 +131,7 @@ References
 
 [8] Ho, Jonathan, Ajay Jain, and Pieter Abbeel. `Denoising diffusion probabilistic models. <https://arxiv.org/abs/2006.11239>`_ Advances in neural information processing systems 33 (2020): 6840-6851.
 
-[9] Tashiro, Yusuke, et al. `Csdi: Conditional score-based diffusion models for probabilistic time series imputation. <https://arxiv.org/abs/2107.03502>`_ Advances in Neural Information Processing Systems 34 (2021): 24804-24816.
+[9] Tashiro, Yusuke, et al. `CSDI: Conditional score-based diffusion models for probabilistic time series imputation. <https://arxiv.org/abs/2107.03502>`_ Advances in Neural Information Processing Systems 34 (2021): 24804-24816.
 
 [10] Kotelnikov, Akim, et al. `Tabddpm: Modelling tabular data with diffusion models. <https://icml.cc/virtual/2023/poster/24703>`_ International Conference on Machine Learning. PMLR, 2023.
 

diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py
@@ -6,6 +6,7 @@
 import numpy as np
 from numpy.typing import NDArray
 from sklearn import utils as sku
+import scipy as scp
 
 from qolmat.imputations.rpca import rpca_utils
 from qolmat.imputations.rpca.rpca import RPCA
@@ -80,13 +81,73 @@ def get_params_scale(self, D: NDArray):
         dict_params = {"mu": mu, "lam": lam}
         return dict_params
 
+    # def decompose_on_basis(
+    #     self, D: NDArray, Omega: NDArray, Q: NDArray
+    # ) -> Tuple[NDArray, NDArray]:
+    #     n_rows, n_cols = D.shape
+    #     if n_rows == 1 or n_cols == 1:
+    #         return D, np.full_like(D, 0)
+    #     M, A, L, Q = self.decompose_rpca(D, Omega)
+    #     return M, A
+
     def decompose_on_basis(
         self, D: NDArray, Omega: NDArray, Q: NDArray
     ) -> Tuple[NDArray, NDArray]:
         n_rows, n_cols = D.shape
+        rank, _ = Q.shape
         if n_rows == 1 or n_cols == 1:
             return D, np.full_like(D, 0)
-        M, A, L, Q = self.decompose_rpca(D, Omega)
+        params_scale = self.get_params_scale(D)
+
+        mu = params_scale["mu"] if self.mu is None else self.mu
+        lam = params_scale["lam"] if self.lam is None else self.lam
+
+        D = utils.linear_interpolation(D)
+
+        # D_norm = np.linalg.norm(D, "fro")
+
+        A = np.array(np.full_like(D, 0))
+        L = np.zeros((n_rows, rank))
+        M = np.array(np.full_like(D, 0))
+        Y = np.array(np.full_like(D, 0))
+
+        # errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan)
+
+        # M: NDArray = D - A
+        for _ in range(self.max_iterations):
+            # A_prev = A.copy()
+            # L_prev = L.copy()
+            # L = Y @ Q.T
+
+            # A = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu)
+
+            # A[~Omega] = (D - M)[~Omega]
+
+            # Y += mu * (D - M - A)
+
+            # errors[iteration] = error
+
+            A_Omega = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu)
+            A_Omega_C = D - M
+            A = np.where(Omega, A_Omega, A_Omega_C)
+
+            L = scp.linalg.solve(
+                a=Q @ Q.T,
+                b=Q @ (D - A + Y / mu),
+            ).T
+            M = L @ Q
+
+            Y += mu * (D - M - A)
+
+            # drift_A = np.linalg.norm(A - A_prev, np.inf)
+            # drift_L = np.linalg.norm(L - L_prev, np.inf)
+            # error = max([drift_A, drift_L])
+
+            # if error < self.tol:
+            #     break
+
+        self._check_cost_function_minimized(D, M, A, Omega, lam)
+
         return M, A
 
     def decompose_rpca(
@@ -127,9 +188,9 @@ def decompose_rpca(
 
         M: NDArray = D - A
         for iteration in range(self.max_iterations):
-            L, Q = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu)
+            L, Q = rpca_utils.svd_thresholding(D - A + Y / mu, mu)
             M = L @ Q
-            A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu)
+            A = rpca_utils.soft_thresholding(D - M + Y / mu, lam * mu)
             A[~Omega] = (D - M)[~Omega]
 
             Y += mu * (D - M - A)