Merge pull request #774 from EducationalTestingService/770-ruff

Remove pre-commit hooks that are covered by ruff
EducationalTestingService · Jul 30, 2024 · 51c540b · 51c540b
2 parents 796cb1d + 503753a
commit 51c540b
Show file tree

Hide file tree

Showing 38 changed files with 200 additions and 54 deletions.
diff --git a/.isort.cfg b/.isort.cfg
diff --git a/.pep8speaks.yml b/.pep8speaks.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -22,16 +22,10 @@ repos:
     rev: '1.0.1'
     hooks:
       - id: flynt
-  - repo: https://github.com/psf/black
-    rev: 24.2.0
-    hooks:
-      - id: black
-        args: [--line-length=100]
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     rev: 'v0.3.1'
     hooks:
       - id: ruff
-        args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D,tests/test_input.py:E501,skll/data/featureset.py:E501,skll/learner/__init__.py:E501,skll/learner/voting.py:E501,skll/learner/utils.py:E501"]
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: 'v1.8.0'
     hooks:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -33,8 +33,8 @@ How to contribute
          $ pre-commit install
 
    [`pre-commit`](https://pre-commit.com/) is used to run pre-commit
-   hooks, such as [`isort`](https://pycqa.github.io/isort/) and
-   [`flake8`](https://flake8.pycqa.org/en/latest/). (Check
+   hooks, such as [`ruff`](https://github.com/astral-sh/ruff) and
+   [`mypy`](https://github.com/python/mypy). (Check
    [here](./.pre-commit-config.yaml) to see a full list of pre-commit
    hooks.) If you attempt to make a commit and it fails, you will be
    able to see which hooks passed/failed and you will have an
@@ -52,13 +52,13 @@ How to contribute
 
          $ pre-commit run
 
-   To run the `black` hook alone on changed files:
+   To run the `ruff` hook alone on changed files:
 
-         $ pre-commit run black
+         $ pre-commit run ruff
 
-   To run the `black` hook alone on a given file:
+   To run the `ruff` hook alone on a given file:
 
-         $ pre-commit run black <file-path>
+         $ pre-commit run ruff --files <file-path>
 
    Finally, the `SKIP` environment variable can be used to indicate to
    `pre-commit` that certain checks should be skipped. It can be

diff --git a/examples/__init__.py b/examples/__init__.py
@@ -0,0 +1 @@
+"""Data generation scripts for the different tutorial examples."""
diff --git a/pyproject.toml b/pyproject.toml
@@ -63,18 +63,17 @@ where = ["."]
 exclude = ["tests", "examples"]
 namespaces = false
 
-[tool.black]
-include = '\.pyi?$'
-line-length = 100
-target-version = ['py311']
-
 [tool.ruff]
-extend-exclude = ["setup.py"]
 lint.select = ["D", "E", "F", "I"]
 lint.ignore = ["D212"]
 line-length = 100
 target-version = "py311"
 fix = true
 
-[mypy]
-exclude = '^setup\.py$'
+[tool.ruff.lint.per-file-ignores]
+"tests/test*.py" = ["D"]
+"tests/test_input.py" = ["E501"]
+"skll/data/featureset.py" = ["E501"]
+"skll/learner/__init__.py" = ["E501"]
+"skll/learner/voting.py" = ["E501"]
+"skll/learner/utils.py" = ["E501"]
diff --git a/skll/config/__init__.py b/skll/config/__init__.py
@@ -164,6 +164,7 @@ def _find_invalid_options(self) -> Set[str]:
         -------
         invalid_options : Set[str]
             The set of invalid options specified by the user.
+
         """
         # compute a list of all the valid options
         valid_options = list(self.defaults().keys()) + self._required_options
@@ -211,6 +212,7 @@ def _find_ill_specified_options(
         the default value for the option  does not result in running an
         experiment with unexpected settings, this is not really a major
         problem.
+
         """
         incorrectly_specified_options = []
         multiply_specified_options = []
@@ -252,6 +254,7 @@ def validate(self) -> None:
 
         KeyError
             If any options are not defined in the appropriate sections.
+
         """
         invalid_options = self._find_invalid_options()
         if invalid_options:
@@ -522,6 +525,7 @@ def parse_config_file(
     ValueError
         If various configuration parameters are incorrectly specified,
         or cause conflicts.
+
     """
     # ensure that a path is specified
     if not config_path:
@@ -1114,6 +1118,7 @@ def _setup_config_parser(config_path: PathOrStr, validate=True) -> SKLLConfigPar
     ------
     FileNotFoundError
         If the configuration file does not exist.
+
     """
     # initialize config parser with the given defaults
     config = SKLLConfigParser()

diff --git a/skll/config/utils.py b/skll/config/utils.py
@@ -31,6 +31,7 @@ def fix_json(json_string: str) -> str:
     -------
     str
         The normalized JSON string.
+
     """
     json_string = json_string.replace("True", "true")
     json_string = json_string.replace("False", "false")
@@ -63,6 +64,7 @@ def load_cv_folds(folds_file: PathOrStr, ids_to_floats=False) -> FoldMapping:
     ------
     ValueError
         If example IDs cannot be converted to floats and `ids_to_floats` is `True`.
+
     """
     with open(folds_file) as f:
         reader = csv.reader(f)
@@ -106,6 +108,7 @@ def locate_file(file_path: PathOrStr, config_dir: PathOrStr) -> str:
     ------
     FileNotFoundError
         If the file does not exist.
+
     """
     if not file_path:
         return ""
@@ -140,6 +143,7 @@ def _munge_featureset_name(name_or_list: Union[Iterable, str]) -> str:
     -------
     res : str
         name components joined with '+' if input is a list or the name itself.
+
     """
     if isinstance(name_or_list, str):
         return name_or_list
@@ -179,6 +183,7 @@ def _parse_and_validate_metrics(metrics: str, option_name: str, logger=None) ->
 
     ValueError
         If "mean_squared_error" is specified as a metric.
+
     """
     # create a logger if one was not passed in
     if not logger:

diff --git a/skll/data/dict_vectorizer.py b/skll/data/dict_vectorizer.py
@@ -82,6 +82,7 @@ class DictVectorizer(OldDictVectorizer):
     FeatureHasher : performs vectorization using only a hash function.
     sklearn.preprocessing.OneHotEncoder : handles nominal/categorical features
       encoded as columns of integers.
+
     """
 
     def __eq__(self, other):

diff --git a/skll/data/featureset.py b/skll/data/featureset.py
@@ -56,6 +56,7 @@ class FeatureSet(object):
     -----
     If ids, labels, and/or features are not None, the number of rows in
     each array must be equal.
+
     """
 
     def __init__(
@@ -125,6 +126,7 @@ def __contains__(self, value):
         ----------
         value
             The value to check.
+
         """
         return value in self.ids
 
@@ -146,6 +148,7 @@ def __eq__(self, other):
         -----
         We consider feature values to be equal if any differences are in the
         sixth decimal place or higher.
+
         """
         return (
             self.ids.shape == other.ids.shape
@@ -218,6 +221,7 @@ def __add__(self, other: "FeatureSet") -> "FeatureSet":
 
         ValueError
             If there are conflicting labels.
+
         """
         # Check that the sets of IDs are equal
         if set(self.ids) != set(other.ids):
@@ -335,6 +339,7 @@ def filter(
         ValueError
             If attempting to use features to filter a ``FeatureSet`` that
             uses a ``FeatureHasher`` vectorizer.
+
         """
         # Construct mask that indicates which examples to keep
         mask = np.ones(len(self), dtype=bool)
@@ -430,6 +435,7 @@ def filtered_iter(
         ValueError
             If any of the "labels", "features", or "vectorizer" attribute
             is ``None``.
+
         """
         if self.features is not None and not isinstance(self.vectorizer, DictVectorizer):
             raise ValueError(
@@ -477,6 +483,7 @@ def __sub__(self, other: "FeatureSet") -> "FeatureSet":
         -------
         :class:`skll.data.featureset.FeatureSet`
             A copy of ``self`` with all features in ``other`` removed.
+
         """
         new_set = deepcopy(self)
         if other.vectorizer:
@@ -492,6 +499,7 @@ def has_labels(self):
         -------
         has_labels : bool
             Whether or not this FeatureSet has any finite labels.
+
         """
         # make sure that labels is not None or a list of Nones
         if self.labels is not None and not all(label is None for label in self.labels):
@@ -510,6 +518,7 @@ def __str__(self):
         -------
         str:
             A string representation of ``FeatureSet``.
+
         """
         return str(self.__dict__)
 
@@ -521,6 +530,7 @@ def __repr__(self):
         -------
         str:
             A string representation of ``FeatureSet``.
+
         """
         return repr(self.__dict__)
 
@@ -542,6 +552,7 @@ def __getitem__(
             If `value` is a slice, then return a new ``FeatureSet`` instance
             containing a subset of the data. If it's an index, return the
             specific example by row number.
+
         """
         # Check if we're slicing
         if isinstance(value, slice):
@@ -597,6 +608,7 @@ def split(
         -------
         Tuple[:class:`skll.data.featureset.FeatureSet`, :class:`skll.data.featureset.FeatureSet`]
             A tuple containing the two featureset instances.
+
         """
         # Note: an alternative way to implement this is to make copies
         # of the given FeatureSet instance and then use the `filter()`
@@ -655,6 +667,7 @@ def from_data_frame(
         -------
         :class:`skll.data.featureset.FeatureSet`
             A ``FeatureSet`` instance generated from from the given data frame.
+
         """
         if labels_column:
             feature_columns = [column for column in df.columns if column != labels_column]