Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH - Add skglm PDCD with working sets solver #10

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
34 changes: 34 additions & 0 deletions datasets/meg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from benchopt import BaseDataset
from benchopt import safe_import_context


with safe_import_context() as import_ctx:
from sklearn.datasets import fetch_openml
import numpy as np


class Dataset(BaseDataset):
name = "MEG"
install_cmd = "conda"
requirements = ["scikit-learn"]

@staticmethod
def _load_meg_data(condition="Left Auditory"):
dataset = fetch_openml(data_id=43884)
all_data = dataset.data.to_numpy()
X = all_data[:, :7498]

if condition == "Left Auditory":
idx = 7498 + 27
else:
idx = 7498 + 85 + 28
y = np.ascontiguousarray(all_data[:, idx])
return X, y

def get_data(self):
try:
X, y = self.X, self.y
except AttributeError:
X, y = self._load_meg_data()
self.X, self.y = X, y
return dict(X=X, y=y)
21 changes: 12 additions & 9 deletions datasets/simulated.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np

from benchopt import BaseDataset
from benchopt.datasets.simulated import make_correlated_data


class Dataset(BaseDataset):
Expand All @@ -11,24 +12,26 @@ class Dataset(BaseDataset):
# the cross product for each key in the dictionary.
parameters = {
'n_samples, n_features': [
(100, 50),
(1000, 200)
]
(100, 10_000),
(500, 600),
(10_000, 100)
],
'rho': [0, 0.6],
}

def __init__(self, n_samples=10, n_features=50, random_state=27):
def __init__(self, n_samples=10, n_features=50, rho=0, random_state=27):
# Store the parameters of the dataset
self.n_samples = n_samples
self.n_features = n_features
self.random_state = random_state
self.rho = rho

def get_data(self):
rng = np.random.RandomState(self.random_state)
coef = rng.randn(self.n_features)
X = rng.randn(self.n_samples, self.n_features)
y = X @ coef + 0.1 * rng.randn(self.n_samples)
y += 100 * rng.randn(self.n_samples) # add intercept

X, y, _ = make_correlated_data(self.n_samples, self.n_features,
rho=self.rho, random_state=rng)

data = dict(X=X, y=y)

return self.n_features + 1, data
return data
55 changes: 55 additions & 0 deletions solvers/pdcd_ws.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from benchopt import BaseSolver
from benchopt import safe_import_context

with safe_import_context() as import_ctx:
import numpy as np

from skglm.penalties import L1
from skglm.experimental.pdcd_ws import PDCD_WS
from skglm.experimental.quantile_regression import Pinball


class Solver(BaseSolver):
name = "PDCD-WS"

requirements = [
'pip:git+https://github.com/Badr-MOUFAD/skglm.git@pinball-df'
mathurinm marked this conversation as resolved.
Show resolved Hide resolved
]
mathurinm marked this conversation as resolved.
Show resolved Hide resolved

references = [
'Q. Bertrand and Q. Klopfenstein and P.-A. Bannier and G. Gidel'
'and M. Massias'
'"Beyond L1: Faster and Better Sparse Models with skglm", '
'https://arxiv.org/abs/2204.07826'
]

stopping_strategy = "iteration"

def set_objective(self, X, y, lmbd, quantile, fit_intercept):
self.X, self.y, self.lmbd = X, y, lmbd
self.quantile = quantile
self.fit_intercept = fit_intercept

self.penalty = L1(len(y) * lmbd)
self.datafit = Pinball(self.quantile)

self.solver = PDCD_WS(
tol=1e-9,
dual_init=np.sign(y)/2 + (quantile - 0.5)
)

# Cache Numba compilation
self.run(5)

def run(self, n_iter):
if n_iter == 0:
self.coef = np.zeros(self.X.shape[1])
else:
self.solver.max_iter = n_iter
coef = self.solver.solve(self.X, self.y,
self.datafit, self.penalty)[0]

self.coef = coef.flatten()

def get_result(self):
return self.coef