Skip to content

Commit

Permalink
TST fix all tests
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre committed May 1, 2024
1 parent 360c560 commit eba14ba
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 50 deletions.
4 changes: 2 additions & 2 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ readme = "README.md"

[dependencies]
python = "*"
scikit-learn = "*"
scikit-learn = ">=1.4.2"

[pypi-dependencies]
skltemplate = { path=".", editable=true }
Expand Down
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ authors = [
description = "A template for scikit-learn compatible packages."
readme = "README.md"
dependencies = [
"scikit-learn>=1.0.2",
"scikit-learn>=1.4.2",
]
classifiers = [
"Programming Language :: Python :: 3",
Expand Down Expand Up @@ -69,4 +69,8 @@ ignore=[
# folder.
"examples/*"=["E402"]
"doc/conf.py"=["E402"]
"doc/_templates/numpydoc_docstring.py"=["F821", "W292"]
"doc/_templates/numpydoc_docstring.py"=["F821", "W292"]

[tool.pytest.ini_options]
addopts = "--doctest-modules --color=yes"
doctest_optionflags = "NORMALIZE_WHITESPACE"
143 changes: 116 additions & 27 deletions skltemplate/_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
This is a module to be used as a reference for building other modules
"""
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin, _fit_context
from sklearn.metrics import euclidean_distances
from sklearn.utils.multiclass import unique_labels
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted


class TemplateEstimator(BaseEstimator):
Expand All @@ -17,7 +17,19 @@ class TemplateEstimator(BaseEstimator):
Parameters
----------
demo_param : str, default='demo_param'
A parameter used for demonstation of how to pass and store paramters.
A parameter used for demonstration of how to pass and store parameters.
Attributes
----------
is_fitted_ : bool
A boolean indicating whether the estimator has been fitted.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
Examples
--------
Expand All @@ -30,16 +42,24 @@ class TemplateEstimator(BaseEstimator):
TemplateEstimator()
"""

# This is a dictionary allowing to define the type of parameters.
# It used to validate parameter within the `_fit_context` decorator.
_parameter_constraints = {
"demo_param": [str],
}

def __init__(self, demo_param="demo_param"):
self.demo_param = demo_param

@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y):
"""A reference implementation of a fitting function.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
y : array-like, shape (n_samples,) or (n_samples, n_outputs)
The target values (class labels in classification, real numbers in
regression).
Expand All @@ -49,7 +69,12 @@ def fit(self, X, y):
self : object
Returns self.
"""
X, y = check_X_y(X, y, accept_sparse=True)
# `_validate_data` is defined in the `BaseEstimator` class.
# It allows to:
# - run different checks on the input data;
# - define some attributes associated to the input data: `n_features_in_` and
# `feature_names_in_`.
X, y = self._validate_data(X, y, accept_sparse=True)
self.is_fitted_ = True
# `fit` should always return `self`
return self
Expand All @@ -67,11 +92,16 @@ def predict(self, X):
y : ndarray, shape (n_samples,)
Returns an array of ones.
"""
X = check_array(X, accept_sparse=True)
check_is_fitted(self, "is_fitted_")
# Check is fit had been called
check_is_fitted(self)
# We need to set reset=False because we don't want to overwrite `n_features_in_`
# `feature_names_in_` but only check that the shape is consistent.
X = self._validate_data(X, accept_sparse=True, reset=False)
return np.ones(X.shape[0], dtype=np.int64)


# Note that the mixin class should always be on the left of `BaseEstimator` to ensure
# the MRO works as expected.
class TemplateClassifier(ClassifierMixin, BaseEstimator):
"""An example classifier which implements a 1-NN algorithm.
Expand All @@ -87,22 +117,54 @@ class TemplateClassifier(ClassifierMixin, BaseEstimator):
----------
X_ : ndarray, shape (n_samples, n_features)
The input passed during :meth:`fit`.
y_ : ndarray, shape (n_samples,)
The labels passed during :meth:`fit`.
classes_ : ndarray, shape (n_classes,)
The classes seen at :meth:`fit`.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
Examples
--------
>>> from sklearn.datasets import load_iris
>>> from skltemplate import TemplateClassifier
>>> X, y = load_iris(return_X_y=True)
>>> clf = TemplateClassifier().fit(X, y)
>>> clf.predict(X)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
"""

# This is a dictionary allowing to define the type of parameters.
# It used to validate parameter within the `_fit_context` decorator.
_parameter_constraints = {
"demo_param": [str],
}

def __init__(self, demo_param="demo"):
self.demo_param = demo_param

@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y):
"""A reference implementation of a fitting function for a classifier.
Parameters
----------
X : array-like, shape (n_samples, n_features)
The training input samples.
y : array-like, shape (n_samples,)
The target values. An array of int.
Expand All @@ -111,13 +173,22 @@ def fit(self, X, y):
self : object
Returns self.
"""
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
self.classes_ = unique_labels(y)

# `_validate_data` is defined in the `BaseEstimator` class.
# It allows to:
# - run different checks on the input data;
# - define some attributes associated to the input data: `n_features_in_` and
# `feature_names_in_`.
X, y = self._validate_data(X, y)
# We need to make sure that we have a classification task
check_classification_targets(y)

# classifier should always store the classes seen during `fit`
self.classes_ = np.unique(y)

# Store the training data to predict later
self.X_ = X
self.y_ = y

# Return the classifier
return self

Expand All @@ -136,15 +207,19 @@ def predict(self, X):
seen during fit.
"""
# Check is fit had been called
check_is_fitted(self, ["X_", "y_"])
check_is_fitted(self)

# Input validation
X = check_array(X)
# We need to set reset=False because we don't want to overwrite `n_features_in_`
# `feature_names_in_` but only check that the shape is consistent.
X = self._validate_data(X, reset=False)

closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
return self.y_[closest]


# Note that the mixin class should always be on the left of `BaseEstimator` to ensure
# the MRO works as expected.
class TemplateTransformer(TransformerMixin, BaseEstimator):
"""An example transformer that returns the element-wise square root.
Expand All @@ -158,20 +233,32 @@ class TemplateTransformer(TransformerMixin, BaseEstimator):
Attributes
----------
n_features_ : int
The number of features of the data passed to :meth:`fit`.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
"""

# This is a dictionary allowing to define the type of parameters.
# It used to validate parameter within the `_fit_context` decorator.
_parameter_constraints = {
"demo_param": [str],
}

def __init__(self, demo_param="demo"):
self.demo_param = demo_param

@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y=None):
"""A reference implementation of a fitting function for a transformer.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
y : None
There is no need of a target in a transformer, yet the pipeline API
requires this parameter.
Expand All @@ -181,9 +268,7 @@ def fit(self, X, y=None):
self : object
Returns self.
"""
X = check_array(X, accept_sparse=True)

self.n_features_ = X.shape[1]
X = self._validate_data(X, accept_sparse=True)

# Return the transformer
return self
Expand All @@ -202,14 +287,18 @@ def transform(self, X):
The array containing the element-wise square roots of the values
in ``X``.
"""
# Check is fit had been called
check_is_fitted(self, "n_features_")
# Since this is a stateless transformer, we should not call `check_is_fitted`.
# Common test will check for this particularly.

# Input validation
X = check_array(X, accept_sparse=True)

# Check that the input is of the same shape as the one passed
# during fit.
if X.shape[1] != self.n_features_:
raise ValueError("Shape of input is different from what was seenin `fit`")
# We need to set reset=False because we don't want to overwrite `n_features_in_`
# `feature_names_in_` but only check that the shape is consistent.
X = self._validate_data(X, accept_sparse=True, reset=False)
return np.sqrt(X)

def _more_tags(self):
# This is a quick example to show the tags API:\
# https://scikit-learn.org/dev/developers/develop.html#estimator-tags
# Here, our transformer does not do any operation in `fit` and only validate
# the parameters. Thus, it is stateless.
return {'stateless': True}
16 changes: 9 additions & 7 deletions skltemplate/tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import pytest
from sklearn.utils.estimator_checks import check_estimator
"""This file shows how to write test based on the scikit-learn common tests."""

from sklearn.utils.estimator_checks import parametrize_with_checks

from skltemplate import TemplateClassifier, TemplateEstimator, TemplateTransformer


@pytest.mark.parametrize(
"estimator", [TemplateEstimator(), TemplateTransformer(), TemplateClassifier()]
)
def test_all_estimators(estimator):
return check_estimator(estimator)
# parametrize_with_checks allows to get a generator of check that is more fine-grained
# than check_estimator
@parametrize_with_checks([TemplateEstimator(), TemplateTransformer(), TemplateClassifier()])
def test_estimators(estimator, check, request):
"""Check the compatibility with scikit-learn API"""
check(estimator)
17 changes: 6 additions & 11 deletions skltemplate/tests/test_template.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""This file will just show how to write tests for the template classes."""
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_array_equal
from sklearn.datasets import load_iris
from sklearn.utils._testing import assert_allclose, assert_array_equal

from skltemplate import TemplateClassifier, TemplateEstimator, TemplateTransformer

Expand All @@ -12,6 +13,7 @@ def data():


def test_template_estimator(data):
"""Check the internals and behaviour of `TemplateEstimator`."""
est = TemplateEstimator()
assert est.demo_param == "demo_param"

Expand All @@ -23,22 +25,14 @@ def test_template_estimator(data):
assert_array_equal(y_pred, np.ones(X.shape[0], dtype=np.int64))


def test_template_transformer_error(data):
X, y = data
trans = TemplateTransformer()
trans.fit(X)
with pytest.raises(ValueError, match="Shape of input is different"):
X_diff_size = np.ones((10, X.shape[1] + 1))
trans.transform(X_diff_size)


def test_template_transformer(data):
"""Check the internals and behaviour of `TemplateTransformer`."""
X, y = data
trans = TemplateTransformer()
assert trans.demo_param == "demo"

trans.fit(X)
assert trans.n_features_ == X.shape[1]
assert trans.n_features_in_ == X.shape[1]

X_trans = trans.transform(X)
assert_allclose(X_trans, np.sqrt(X))
Expand All @@ -48,6 +42,7 @@ def test_template_transformer(data):


def test_template_classifier(data):
"""Check the internals and behaviour of `TemplateClassifier`."""
X, y = data
clf = TemplateClassifier()
assert clf.demo_param == "demo"
Expand Down

0 comments on commit eba14ba

Please sign in to comment.