Merge pull request #111 from lbluque/main

Doc and README edits
CederGroupHub · Oct 12, 2023 · 61e4c3d · 61e4c3d
2 parents 8bbf8d8 + 4896d13
commit 61e4c3d
Show file tree

Hide file tree

Showing 8 changed files with 43 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -23,6 +23,16 @@ Available regression models
 - Adaptive versions of Lasso, Group Lasso, Overlap Group Lasso, Sparse Group Lasso & Ridged Group Lasso.
 - Best Subset Selection, Ridged Best Subset, L0, L1L0 & L2L0 (all with optional grouping of parameters)
 
+Installation
+------------
+**sparse-lm** is available on [PyPI](https://pypi.org/project/sparse-lm/), and can be installed via pip:
+
+```bash
+pip install sparse-lm
+```
+
+Additional information on installation can be found the documentation [here](https://cedergrouphub.github.io/sparse-lm/install.html).
+
 Basic usage
 -----------
 If you already use **scikit-learn**, using **sparse-lm** will be very easy. Just use any
@@ -34,13 +44,21 @@ from sklearn.datasets import make_regression
 from sklearn.model_selection import GridSearchCV
 from sparselm.model import AdaptiveLasso
 
-X, y = make_regression(n_samples=200, n_features=5000, random_state=0)
+X, y = make_regression(n_samples=100, n_features=80, n_informative=10, random_state=0)
 alasso = AdaptiveLasso(fit_intercept=False)
-param_grid = {'alpha': np.logsppace(-7, -2)}
+param_grid = {'alpha': np.logspace(-8, 2, 10)}
 
 cvsearch = GridSearchCV(alasso, param_grid)
 cvsearch.fit(X, y)
 print(cvsearch.best_params_)
 ```
 
-For more details on use and functionality see the [documentation](https://cedergrouphub.github.io/sparse-lm/).
+For more details on use and functionality have a look at the
+[examples](https://cedergrouphub.github.io/sparse-lm/auto_examples/index.html) and
+[API](https://cedergrouphub.github.io/sparse-lm/api.html) sections of the documentation.
+
+Contributing
+------------
+
+We welcome any contributions that you think may improve the package! Please have a look at the
+[contribution guidelines](https://cedergrouphub.github.io/sparse-lm/contributing.html) in the documentation.
diff --git a/src/requirements.txt b/src/requirements.txt
@@ -0,0 +1,5 @@
+numpy
+scikit-learn
+cvxpy
+scipy
+joblib
diff --git a/src/sparselm/model/_adaptive_lasso.py b/src/sparselm/model/_adaptive_lasso.py
@@ -9,7 +9,7 @@
 Regressors follow scikit-learn interface, but use cvxpy to set up and solve
 optimization problem.
 
-NOTE: In certain cases these can yield infeasible problems. This can cause
+Note: In certain cases these can yield infeasible problems. This can cause
 processes to die and as a result make a calculation hang indefinitely when
 using them in a multiprocess model selection tool such as sklearn
 GridSearchCV with n_jobs > 1.
@@ -238,7 +238,7 @@ class AdaptiveGroupLasso(AdaptiveLasso, GroupLasso):
 
     .. math::
 
-        \min_{\beta} || X \beta - y ||^2_2 + \alpha * \sum_{G} w_G ||\beta_G||_2
+        \min_{\beta} || X \beta - y ||^2_2 + \alpha \sum_{G} w_G ||\beta_G||_2
 
     Where w represents a vector of weights that is iteratively updated.
 

diff --git a/src/sparselm/model/_base.py b/src/sparselm/model/_base.py
@@ -469,7 +469,7 @@ def generate_problem(
     def add_constraints(self, constraints: list[cp.Constraint]) -> None:
         """Add a constraint to the problem.
 
-        .. warning::
+        .. Warning::
             Adding constraints will not work with any sklearn class that relies on
             cloning the estimator (ie GridSearchCV, etc) . This is because a new cvxpy
             problem is generated for any cloned estimator.

diff --git a/src/sparselm/model/_miqp/_best_subset.py b/src/sparselm/model/_miqp/_best_subset.py
@@ -75,19 +75,19 @@ class BestSubsetSelection(MIQPl0):
         canonicals_ (SimpleNamespace):
             Namespace that contains underlying cvxpy objects used to define
             the optimization problem. The objects included are the following:
-                - objective - the objective function.
-                - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
-                - parameters - hyper-parameters
-                - auxiliaries - auxiliary variables and expressions
-                - constraints - solution constraints
+            - objective - the objective function.
+            - beta - variable to be optimized (corresponds to the estimated coef_ attribute).
+            - parameters - hyper-parameters
+            - auxiliaries - auxiliary variables and expressions
+            - constraints - solution constraints
 
-    Notes:
+    Note:
         Installation of Gurobi is not a must, but highly recommended. An open source alternative
         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
         See the Mixed-integer programs section of the cvxpy docs:
         https://www.cvxpy.org/tutorial/advanced/index.html
 
-    WARNING:
+    Warning:
         Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
         problems.
     """
@@ -201,13 +201,13 @@ class RidgedBestSubsetSelection(TikhonovMixin, BestSubsetSelection):
                 - auxiliaries - auxiliary variables and expressions
                 - constraints - solution constraints
 
-    Notes:
+    Note:
         Installation of Gurobi is not a must, but highly recommended. An open source alternative
         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
         See the Mixed-integer programs section of the cvxpy docs:
         https://www.cvxpy.org/tutorial/advanced/index.html
 
-    WARNING:
+    Warning:
         Even with gurobi solver, this can take a very long time to converge for large problems and under-determined
         problems.
     """

diff --git a/src/sparselm/model/_miqp/_regularized_l0.py b/src/sparselm/model/_miqp/_regularized_l0.py
@@ -105,7 +105,7 @@ class RegularizedL0(MIQPl0):
                 - auxiliaries - auxiliary variables and expressions
                 - constraints - solution constraints
 
-    Notes:
+    Note:
         Installation of Gurobi is not a must, but highly recommended. An open source alternative
         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
         See the Mixed-integer programs section of the cvxpy docs:
@@ -333,7 +333,7 @@ class L1L0(MixedL0):
                 - auxiliaries - auxiliary variables and expressions
                 - constraints - solution constraints
 
-    Notes:
+    Note:
         Installation of Gurobi is not a must, but highly recommended. An open source alternative
         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
         See the Mixed-integer programs section of the cvxpy docs:
@@ -492,7 +492,7 @@ class L2L0(TikhonovMixin, MixedL0):
                 - auxiliaries - auxiliary variables and expressions
                 - constraints - solution constraints
 
-    Notes:
+    Note:
         Installation of Gurobi is not a must, but highly recommended. An open source alternative
         is SCIP. ECOS_BB also works but can be very slow, and has recurring correctness issues.
         See the Mixed-integer programs section of the cvxpy docs:

diff --git a/src/sparselm/model_selection.py b/src/sparselm/model_selection.py
@@ -56,8 +56,7 @@ class GridSearchCV(_GridSearchCV):
             "max_score", which means to maximize the score. Can also choose
             "one_std_score", which means to apply one standard error rule
             on scores.
-        scoring (str, callable, list, tuple or dict,
-        default="neg_root_mean_squared_error"):
+        scoring (str, callable, list, tuple or dict, default="neg_root_mean_squared_error"):
             Strategy to evaluate the performance of the cross-validated
             model on the test set.
             If `scoring` represents a single score, one can use:

diff --git a/src/sparselm/stepwise.py b/src/sparselm/stepwise.py
@@ -75,7 +75,7 @@ class StepwiseEstimator(_BaseComposition, RegressorMixin, LinearModel):
                with correct hierarchy, groups and other parameters before
                wrapping them up with the composite!
 
-    Notes:
+    Note:
         1. Do not use GridSearchCV or LineSearchCV to search a StepwiseEstimator!
 
         2. No nesting is allowed for StepwiseEstimator, which means no step of a