benchopt · Badr-MOUFAD · Dec 8, 2022 · Dec 8, 2022 · Dec 8, 2022 · Dec 8, 2022
diff --git a/datasets/meg.py b/datasets/meg.py
@@ -0,0 +1,34 @@
+from benchopt import BaseDataset
+from benchopt import safe_import_context
+
+
+with safe_import_context() as import_ctx:
+    from sklearn.datasets import fetch_openml
+    import numpy as np
+
+
+class Dataset(BaseDataset):
+    name = "MEG"
+    install_cmd = "conda"
+    requirements = ["scikit-learn"]
+
+    @staticmethod
+    def _load_meg_data(condition="Left Auditory"):
+        dataset = fetch_openml(data_id=43884)
+        all_data = dataset.data.to_numpy()
+        X = all_data[:, :7498]
+
+        if condition == "Left Auditory":
+            idx = 7498 + 27
+        else:
+            idx = 7498 + 85 + 28
+        y = np.ascontiguousarray(all_data[:, idx])
+        return X, y
+
+    def get_data(self):
+        try:
+            X, y = self.X, self.y
+        except AttributeError:
+            X, y = self._load_meg_data()
+            self.X, self.y = X, y
+        return dict(X=X, y=y)
diff --git a/datasets/simulated.py b/datasets/simulated.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from benchopt import BaseDataset
+from benchopt.datasets.simulated import make_correlated_data
 
 
 class Dataset(BaseDataset):
@@ -11,24 +12,26 @@ class Dataset(BaseDataset):
     # the cross product for each key in the dictionary.
     parameters = {
         'n_samples, n_features': [
-            (100, 50),
-            (1000, 200)
-        ]
+            (100, 10_000),
+            (500, 600),
+            (10_000, 100)
+        ],
+        'rho': [0, 0.6],
     }
 
-    def __init__(self, n_samples=10, n_features=50, random_state=27):
+    def __init__(self, n_samples=10, n_features=50, rho=0, random_state=27):
         # Store the parameters of the dataset
         self.n_samples = n_samples
         self.n_features = n_features
         self.random_state = random_state
+        self.rho = rho
 
     def get_data(self):
         rng = np.random.RandomState(self.random_state)
-        coef = rng.randn(self.n_features)
-        X = rng.randn(self.n_samples, self.n_features)
-        y = X @ coef + 0.1 * rng.randn(self.n_samples)
-        y += 100 * rng.randn(self.n_samples)  # add intercept
+
+        X, y, _ = make_correlated_data(self.n_samples, self.n_features,
+                                       rho=self.rho, random_state=rng)
 
         data = dict(X=X, y=y)
 
-        return self.n_features + 1, data
+        return data
diff --git a/solvers/pdcd_ws.py b/solvers/pdcd_ws.py
@@ -0,0 +1,55 @@
+from benchopt import BaseSolver
+from benchopt import safe_import_context
+
+with safe_import_context() as import_ctx:
+    import numpy as np
+
+    from skglm.penalties import L1
+    from skglm.experimental.pdcd_ws import PDCD_WS
+    from skglm.experimental.quantile_regression import Pinball
+
+
+class Solver(BaseSolver):
+    name = "PDCD-WS"
+
+    requirements = [
+        'pip:git+https://github.com/Badr-MOUFAD/skglm.git@pinball-df'
+    ]
+
+    references = [
+        'Q. Bertrand and Q. Klopfenstein and P.-A. Bannier and G. Gidel'
+        'and M. Massias'
+        '"Beyond L1: Faster and Better Sparse Models with skglm", '
+        'https://arxiv.org/abs/2204.07826'
+    ]
+
+    stopping_strategy = "iteration"
+
+    def set_objective(self, X, y, lmbd, quantile, fit_intercept):
+        self.X, self.y, self.lmbd = X, y, lmbd
+        self.quantile = quantile
+        self.fit_intercept = fit_intercept
+
+        self.penalty = L1(len(y) * lmbd)
+        self.datafit = Pinball(self.quantile)
+
+        self.solver = PDCD_WS(
+            tol=1e-9,
+            dual_init=np.sign(y)/2 + (quantile - 0.5)
+        )
+
+        # Cache Numba compilation
+        self.run(5)
+
+    def run(self, n_iter):
+        if n_iter == 0:
+            self.coef = np.zeros(self.X.shape[1])
+        else:
+            self.solver.max_iter = n_iter
+            coef = self.solver.solve(self.X, self.y,
+                                     self.datafit, self.penalty)[0]
+
+            self.coef = coef.flatten()
+
+    def get_result(self):
+        return self.coef