Skip to content

Commit

Permalink
Merge pull request #774 from EducationalTestingService/770-ruff
Browse files Browse the repository at this point in the history
Remove pre-commit hooks that are covered by ruff
  • Loading branch information
tamarl08 authored Jul 30, 2024
2 parents 796cb1d + 503753a commit 51c540b
Show file tree
Hide file tree
Showing 38 changed files with 200 additions and 54 deletions.
6 changes: 0 additions & 6 deletions .isort.cfg

This file was deleted.

23 changes: 0 additions & 23 deletions .pep8speaks.yml

This file was deleted.

6 changes: 0 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,10 @@ repos:
rev: '1.0.1'
hooks:
- id: flynt
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
args: [--line-length=100]
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: 'v0.3.1'
hooks:
- id: ruff
args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D,tests/test_input.py:E501,skll/data/featureset.py:E501,skll/learner/__init__.py:E501,skll/learner/voting.py:E501,skll/learner/utils.py:E501"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.8.0'
hooks:
Expand Down
12 changes: 6 additions & 6 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ How to contribute
$ pre-commit install

[`pre-commit`](https://pre-commit.com/) is used to run pre-commit
hooks, such as [`isort`](https://pycqa.github.io/isort/) and
[`flake8`](https://flake8.pycqa.org/en/latest/). (Check
hooks, such as [`ruff`](https://github.com/astral-sh/ruff) and
[`mypy`](https://github.com/python/mypy). (Check
[here](./.pre-commit-config.yaml) to see a full list of pre-commit
hooks.) If you attempt to make a commit and it fails, you will be
able to see which hooks passed/failed and you will have an
Expand All @@ -52,13 +52,13 @@ How to contribute

$ pre-commit run

To run the `black` hook alone on changed files:
To run the `ruff` hook alone on changed files:

$ pre-commit run black
$ pre-commit run ruff

To run the `black` hook alone on a given file:
To run the `ruff` hook alone on a given file:

$ pre-commit run black <file-path>
$ pre-commit run ruff --files <file-path>

Finally, the `SKIP` environment variable can be used to indicate to
`pre-commit` that certain checks should be skipped. It can be
Expand Down
1 change: 1 addition & 0 deletions examples/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Data generation scripts for the different tutorial examples."""
15 changes: 7 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,17 @@ where = ["."]
exclude = ["tests", "examples"]
namespaces = false

[tool.black]
include = '\.pyi?$'
line-length = 100
target-version = ['py311']

[tool.ruff]
extend-exclude = ["setup.py"]
lint.select = ["D", "E", "F", "I"]
lint.ignore = ["D212"]
line-length = 100
target-version = "py311"
fix = true

[mypy]
exclude = '^setup\.py$'
[tool.ruff.lint.per-file-ignores]
"tests/test*.py" = ["D"]
"tests/test_input.py" = ["E501"]
"skll/data/featureset.py" = ["E501"]
"skll/learner/__init__.py" = ["E501"]
"skll/learner/voting.py" = ["E501"]
"skll/learner/utils.py" = ["E501"]
5 changes: 5 additions & 0 deletions skll/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def _find_invalid_options(self) -> Set[str]:
-------
invalid_options : Set[str]
The set of invalid options specified by the user.
"""
# compute a list of all the valid options
valid_options = list(self.defaults().keys()) + self._required_options
Expand Down Expand Up @@ -211,6 +212,7 @@ def _find_ill_specified_options(
the default value for the option does not result in running an
experiment with unexpected settings, this is not really a major
problem.
"""
incorrectly_specified_options = []
multiply_specified_options = []
Expand Down Expand Up @@ -252,6 +254,7 @@ def validate(self) -> None:
KeyError
If any options are not defined in the appropriate sections.
"""
invalid_options = self._find_invalid_options()
if invalid_options:
Expand Down Expand Up @@ -522,6 +525,7 @@ def parse_config_file(
ValueError
If various configuration parameters are incorrectly specified,
or cause conflicts.
"""
# ensure that a path is specified
if not config_path:
Expand Down Expand Up @@ -1114,6 +1118,7 @@ def _setup_config_parser(config_path: PathOrStr, validate=True) -> SKLLConfigPar
------
FileNotFoundError
If the configuration file does not exist.
"""
# initialize config parser with the given defaults
config = SKLLConfigParser()
Expand Down
5 changes: 5 additions & 0 deletions skll/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def fix_json(json_string: str) -> str:
-------
str
The normalized JSON string.
"""
json_string = json_string.replace("True", "true")
json_string = json_string.replace("False", "false")
Expand Down Expand Up @@ -63,6 +64,7 @@ def load_cv_folds(folds_file: PathOrStr, ids_to_floats=False) -> FoldMapping:
------
ValueError
If example IDs cannot be converted to floats and `ids_to_floats` is `True`.
"""
with open(folds_file) as f:
reader = csv.reader(f)
Expand Down Expand Up @@ -106,6 +108,7 @@ def locate_file(file_path: PathOrStr, config_dir: PathOrStr) -> str:
------
FileNotFoundError
If the file does not exist.
"""
if not file_path:
return ""
Expand Down Expand Up @@ -140,6 +143,7 @@ def _munge_featureset_name(name_or_list: Union[Iterable, str]) -> str:
-------
res : str
name components joined with '+' if input is a list or the name itself.
"""
if isinstance(name_or_list, str):
return name_or_list
Expand Down Expand Up @@ -179,6 +183,7 @@ def _parse_and_validate_metrics(metrics: str, option_name: str, logger=None) ->
ValueError
If "mean_squared_error" is specified as a metric.
"""
# create a logger if one was not passed in
if not logger:
Expand Down
1 change: 1 addition & 0 deletions skll/data/dict_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class DictVectorizer(OldDictVectorizer):
FeatureHasher : performs vectorization using only a hash function.
sklearn.preprocessing.OneHotEncoder : handles nominal/categorical features
encoded as columns of integers.
"""

def __eq__(self, other):
Expand Down
13 changes: 13 additions & 0 deletions skll/data/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class FeatureSet(object):
-----
If ids, labels, and/or features are not None, the number of rows in
each array must be equal.
"""

def __init__(
Expand Down Expand Up @@ -125,6 +126,7 @@ def __contains__(self, value):
----------
value
The value to check.
"""
return value in self.ids

Expand All @@ -146,6 +148,7 @@ def __eq__(self, other):
-----
We consider feature values to be equal if any differences are in the
sixth decimal place or higher.
"""
return (
self.ids.shape == other.ids.shape
Expand Down Expand Up @@ -218,6 +221,7 @@ def __add__(self, other: "FeatureSet") -> "FeatureSet":
ValueError
If there are conflicting labels.
"""
# Check that the sets of IDs are equal
if set(self.ids) != set(other.ids):
Expand Down Expand Up @@ -335,6 +339,7 @@ def filter(
ValueError
If attempting to use features to filter a ``FeatureSet`` that
uses a ``FeatureHasher`` vectorizer.
"""
# Construct mask that indicates which examples to keep
mask = np.ones(len(self), dtype=bool)
Expand Down Expand Up @@ -430,6 +435,7 @@ def filtered_iter(
ValueError
If any of the "labels", "features", or "vectorizer" attribute
is ``None``.
"""
if self.features is not None and not isinstance(self.vectorizer, DictVectorizer):
raise ValueError(
Expand Down Expand Up @@ -477,6 +483,7 @@ def __sub__(self, other: "FeatureSet") -> "FeatureSet":
-------
:class:`skll.data.featureset.FeatureSet`
A copy of ``self`` with all features in ``other`` removed.
"""
new_set = deepcopy(self)
if other.vectorizer:
Expand All @@ -492,6 +499,7 @@ def has_labels(self):
-------
has_labels : bool
Whether or not this FeatureSet has any finite labels.
"""
# make sure that labels is not None or a list of Nones
if self.labels is not None and not all(label is None for label in self.labels):
Expand All @@ -510,6 +518,7 @@ def __str__(self):
-------
str:
A string representation of ``FeatureSet``.
"""
return str(self.__dict__)

Expand All @@ -521,6 +530,7 @@ def __repr__(self):
-------
str:
A string representation of ``FeatureSet``.
"""
return repr(self.__dict__)

Expand All @@ -542,6 +552,7 @@ def __getitem__(
If `value` is a slice, then return a new ``FeatureSet`` instance
containing a subset of the data. If it's an index, return the
specific example by row number.
"""
# Check if we're slicing
if isinstance(value, slice):
Expand Down Expand Up @@ -597,6 +608,7 @@ def split(
-------
Tuple[:class:`skll.data.featureset.FeatureSet`, :class:`skll.data.featureset.FeatureSet`]
A tuple containing the two featureset instances.
"""
# Note: an alternative way to implement this is to make copies
# of the given FeatureSet instance and then use the `filter()`
Expand Down Expand Up @@ -655,6 +667,7 @@ def from_data_frame(
-------
:class:`skll.data.featureset.FeatureSet`
A ``FeatureSet`` instance generated from from the given data frame.
"""
if labels_column:
feature_columns = [column for column in df.columns if column != labels_column]
Expand Down
Loading

0 comments on commit 51c540b

Please sign in to comment.