From aba1b8e7b966b6e9dc341cac54c3ffbb33eead6f Mon Sep 17 00:00:00 2001
From: Tamar Lavee <tlavee@ets.org>
Date: Fri, 26 Jul 2024 13:06:57 -0400
Subject: [PATCH 1/7] test precommit

---
 .pre-commit-config.yaml   | 7 +------
 .isort.cfg => _.isort.cfg | 0
 pyproject.toml            | 9 ---------
 skll/metrics.py           | 5 +++++
 4 files changed, 6 insertions(+), 15 deletions(-)
 rename .isort.cfg => _.isort.cfg (100%)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0f5f5b14..b937421e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,16 +22,11 @@ repos:
     rev: '1.0.1'
     hooks:
       - id: flynt
-  - repo: https://github.com/psf/black
-    rev: 24.2.0
-    hooks:
-      - id: black
-        args: [--line-length=100]
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     rev: 'v0.3.1'
     hooks:
       - id: ruff
-        args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D,tests/test_input.py:E501,skll/data/featureset.py:E501,skll/learner/__init__.py:E501,skll/learner/voting.py:E501,skll/learner/utils.py:E501"]
+     #  args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D,tests/test_input.py:E501,skll/data/featureset.py:E501,skll/learner/__init__.py:E501,skll/learner/voting.py:E501,skll/learner/utils.py:E501"]
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: 'v1.8.0'
     hooks:
diff --git a/.isort.cfg b/_.isort.cfg
similarity index 100%
rename from .isort.cfg
rename to _.isort.cfg
diff --git a/pyproject.toml b/pyproject.toml
index 663788e8..68d92f9b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,18 +63,9 @@ where = ["."]
 exclude = ["tests", "examples"]
 namespaces = false
 
-[tool.black]
-include = '\.pyi?$'
-line-length = 100
-target-version = ['py311']
-
 [tool.ruff]
-extend-exclude = ["setup.py"]
 lint.select = ["D", "E", "F", "I"]
 lint.ignore = ["D212"]
 line-length = 100
 target-version = "py311"
 fix = true
-
-[mypy]
-exclude = '^setup\.py$'
diff --git a/skll/metrics.py b/skll/metrics.py
index 68393cbc..c053c30b 100644
--- a/skll/metrics.py
+++ b/skll/metrics.py
@@ -85,6 +85,7 @@ def kappa(
         If labels cannot be converted to int.
     ValueError
         If invalid weight scheme.
+
     """
     # Ensure that the lists are both the same length
     assert len(y_true) == len(y_pred)
@@ -190,6 +191,7 @@ def correlation(y_true: np.ndarray, y_pred: np.ndarray, corr_type: str = "pearso
     -------
     float
         correlation value if well-defined, else 0.0
+
     """
     # get the correlation function to use based on the given type
     corr_func = pearsonr
@@ -226,6 +228,7 @@ def f1_score_least_frequent(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     -------
     float
         F1 score of the least frequent label.
+
     """
     least_frequent = np.bincount(y_true).argmin()
     return f1_score(y_true, y_pred, average=None)[least_frequent]
@@ -253,6 +256,7 @@ def register_custom_metric(custom_metric_path: PathOrStr, custom_metric_name: st
         with an already existing attribute in ``skll.metrics``
         or if the custom metric name conflicts with a scikit-learn
         or SKLL metric.
+
     """
     if not custom_metric_path:
         raise ValueError(
@@ -332,6 +336,7 @@ def use_score_func(func_name: str, y_true: np.ndarray, y_pred: np.ndarray) -> fl
     -------
     float
         The scored result from the given scorer.
+
     """
     try:
         scorer = get_scorer(func_name)

From 63054e1def10d7c989086be5cd7515f9e31e5d5a Mon Sep 17 00:00:00 2001
From: Tamar Lavee <tlavee@ets.org>
Date: Fri, 26 Jul 2024 13:27:49 -0400
Subject: [PATCH 2/7] remove redundant ruff configuration

---
 .pre-commit-config.yaml | 1 -
 pyproject.toml          | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b937421e..79b10dda 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,7 +26,6 @@ repos:
     rev: 'v0.3.1'
     hooks:
       - id: ruff
-     #  args: [--line-length=100, --select, "D,E,F,I", --ignore, "D212", --per-file-ignores, "tests/test*.py:D,tests/test_input.py:E501,skll/data/featureset.py:E501,skll/learner/__init__.py:E501,skll/learner/voting.py:E501,skll/learner/utils.py:E501"]
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: 'v1.8.0'
     hooks:
diff --git a/pyproject.toml b/pyproject.toml
index 68d92f9b..0fe8dcd7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,3 +69,11 @@ lint.ignore = ["D212"]
 line-length = 100
 target-version = "py311"
 fix = true
+
+[tool.ruff.lint.per-file-ignores]
+"tests/test*.py" = ["D"]
+"tests/test_input.py" = ["E501"]
+"skll/data/featureset.py" = ["E501"]
+"skll/learner/__init__.py" = ["E501"]
+"skll/learner/voting.py" = ["E501"]
+"skll/learner/utils.py" = ["E501"]

From 492b7820cab53bd7f94734d5f2624e74453389f7 Mon Sep 17 00:00:00 2001
From: Tamar Lavee <tlavee@ets.org>
Date: Fri, 26 Jul 2024 13:30:02 -0400
Subject: [PATCH 3/7] remove isort config

---
 _.isort.cfg                         | 6 ------
 .pep8speaks.yml => _.pep8speaks.yml | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)
 delete mode 100644 _.isort.cfg
 rename .pep8speaks.yml => _.pep8speaks.yml (97%)

diff --git a/_.isort.cfg b/_.isort.cfg
deleted file mode 100644
index 919df1ae..00000000
--- a/_.isort.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-[settings]
-multi_line_output = 3
-include_trailing_comma = true
-use_parentheses = true
-ensure_newline_before_comments = true
-line_length = 88
diff --git a/.pep8speaks.yml b/_.pep8speaks.yml
similarity index 97%
rename from .pep8speaks.yml
rename to _.pep8speaks.yml
index 9489f045..97477e2c 100644
--- a/.pep8speaks.yml
+++ b/_.pep8speaks.yml
@@ -1,4 +1,4 @@
-# File : .pep8speaks.yml
+# File : _.pep8speaks.yml
 
 scanner:
     diff_only: True  # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.

From b1da5e041b49ca0b2787fa337a66ce28aff350be Mon Sep 17 00:00:00 2001
From: Tamar Lavee <tlavee@ets.org>
Date: Fri, 26 Jul 2024 14:27:33 -0400
Subject: [PATCH 4/7] remove pep8speaks.yml

---
 _.pep8speaks.yml | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 _.pep8speaks.yml

diff --git a/_.pep8speaks.yml b/_.pep8speaks.yml
deleted file mode 100644
index 97477e2c..00000000
--- a/_.pep8speaks.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-# File : _.pep8speaks.yml
-
-scanner:
-    diff_only: True  # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
-    linter: flake8  # Other option is pycodestyle
-
-flake8:  # Valid if scanner.linter is flake8
-    max-line-length: 100
-    ignore: [W503, W504] # Errors and warnings to ignore
-
-no_blank_comment: False  # If True, no comment is made on PR without any errors.
-descending_issues_order: False  # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file
-
-message:  # Customize the comment made by the bot
-    opened:  # Messages when a new PR is submitted
-        header: "Hello @{name}! Thanks for opening this PR. "
-                # The keyword {name} is converted into the author's username
-        footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)"
-                # The messages can be written as they would over GitHub
-    updated:  # Messages when new commits are added to the PR
-        header: "Hello @{name}! Thanks for updating this PR. "
-        footer: ""  # Why to comment the link to the style guide everytime? :)
-    no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :tada: "

From 910f33d39d8a4b92340e941377929ff5c8550a0d Mon Sep 17 00:00:00 2001
From: Tamar Lavee <tlavee@ets.org>
Date: Fri, 26 Jul 2024 14:38:54 -0400
Subject: [PATCH 5/7] update contributing README with current precommit

---
 CONTRIBUTING.md       | 12 ++++++------
 skll/utils/testing.py | 13 +++++++++++++
 skll/utils/wandb.py   | 15 ++++++++++++---
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 68666bb1..8e58bc87 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -33,8 +33,8 @@ How to contribute
          $ pre-commit install
 
    [`pre-commit`](https://pre-commit.com/) is used to run pre-commit
-   hooks, such as [`isort`](https://pycqa.github.io/isort/) and
-   [`flake8`](https://flake8.pycqa.org/en/latest/). (Check
+   hooks, such as [`ruff`](https://github.com/astral-sh/ruff) and
+   [`mypy`](https://github.com/python/mypy). (Check
    [here](./.pre-commit-config.yaml) to see a full list of pre-commit
    hooks.) If you attempt to make a commit and it fails, you will be
    able to see which hooks passed/failed and you will have an
@@ -52,13 +52,13 @@ How to contribute
 
          $ pre-commit run
 
-   To run the `black` hook alone on changed files:
+   To run the `ruff` hook alone on changed files:
 
-         $ pre-commit run black
+         $ pre-commit run ruff
 
-   To run the `black` hook alone on a given file:
+   To run the `ruff` hook alone on a given file:
 
-         $ pre-commit run black <file-path>
+         $ pre-commit run ruff --files <file-path>
 
    Finally, the `SKIP` environment variable can be used to indicate to
    `pre-commit` that certain checks should be skipped. It can be
diff --git a/skll/utils/testing.py b/skll/utils/testing.py
index f0824688..1eea2bda 100644
--- a/skll/utils/testing.py
+++ b/skll/utils/testing.py
@@ -53,6 +53,7 @@ def unlink(file_path: PathOrStr):
     ----------
     file_path : :class:`skll.types.PathOrStr`
         File path to remove.
+
     """
     file_path = Path(file_path)
     if file_path.exists():
@@ -74,6 +75,7 @@ def fill_in_config_paths(config_template_path: PathOrStr) -> Path:
     -------
     Path
         The path to the filled configuration file.
+
     """
     # convert path to Path object
     config_template_path = Path(config_template_path)
@@ -154,6 +156,7 @@ def fill_in_config_paths_for_single_file(
     -------
     Path
         The path to the filled configuration file.
+
     """
     # convert path to Path object if it's a string
     config_template_path = Path(config_template_path)
@@ -231,6 +234,7 @@ def fill_in_config_options(
     -------
     Path
         The path to the filled configuration file.
+
     """
     # convert path to Path object if it's a string
     config_template_path = Path(config_template_path)
@@ -333,6 +337,7 @@ def fill_in_config_paths_for_fancy_output(config_template_path: PathOrStr) -> Pa
     -------
     Path
         The path to the filled configuration file.
+
     """
     # convert template path to a Path object if string
     config_template_path = Path(config_template_path)
@@ -392,6 +397,7 @@ def fill_in_config_options_for_voting_learners(
         - the custom seed value used for cross-validation, if any
         - the number of learning curve cross-validation folds (10 or 20)
         - the list of learning curve training sizes
+
     """
     # setup learner-type specific values based on configuration options
     custom_learner = ""
@@ -569,6 +575,7 @@ def create_jsonlines_feature_files(path: PathOrStr):
     ----------
     path : :class:`skll.types.PathOrStr`
         Full path under which to save the created feature files.
+
     """
     # convert to Path object
     path = Path(path)
@@ -629,6 +636,7 @@ def remove_jsonlines_feature_files(path: PathOrStr):
     ----------
     path : :class:`skll.types.PathOrStr`
         Path to directory in which jsonlines files reside.
+
     """
     for i in range(6):
         unlink(Path(path) / f"f{i}.jsonlines")
@@ -842,6 +850,7 @@ def make_regression_data(
         3-tuple containing the generated training featureset, the generated
         test featureset, and a dictionary containing the oracle feature
         weights
+
     """
     # if we are doing feature hashing and we have asked for more
     # feature bins than number of total features, we need to
@@ -944,6 +953,7 @@ def make_sparse_data(use_feature_hashing=False):
     Tuple
         Tuple containing the generated training featureset and
         the generated test featureset.
+
     """
     # Create training data
     X, y = make_classification(
@@ -1040,6 +1050,7 @@ def make_digits_data(num_examples=None, test_size=0.2, use_digit_names=False):
     ValueError
         If ``num_examples`` is greater than the number of available
         examples.
+
     """
     # load the digits data
     digits = load_digits(as_frame=True)
@@ -1150,6 +1161,7 @@ def make_california_housing_data(num_examples=None, test_size=0.2):
     ValueError
         If ``num_examples`` is greater than the number of available
         examples.
+
     """
     # load the housing data
     housing = fetch_california_housing(
@@ -1238,6 +1250,7 @@ def compute_expected_folds_for_cv_testing(featureset, num_folds=10, stratified=T
     expected_fold_ids : dict
         A dictionary mapping each ID in the featureset to a fold ID.
         Fold IDs range from 0 to ``num_folds``-1.
+
     """
     # initialize the return dictionary
     expected_fold_ids = {}
diff --git a/skll/utils/wandb.py b/skll/utils/wandb.py
index e26b809d..00bbdb32 100644
--- a/skll/utils/wandb.py
+++ b/skll/utils/wandb.py
@@ -8,13 +8,13 @@
 from typing import Any, Dict, Optional, Union
 
 import pandas as pd
-
 import wandb
-from skll.config import _setup_config_parser
-from skll.types import PathOrStr
 from wandb.sdk.lib import RunDisabled
 from wandb.sdk.wandb_run import Run
 
+from skll.config import _setup_config_parser
+from skll.types import PathOrStr
+
 
 class WandbLogger:
     """Interface for Weights and Biases logging."""
@@ -30,6 +30,7 @@ def __init__(self, wandb_credentials: Optional[Dict[str, str]], config_file_path
             used to initialize the wandb run. If ``None``, logging to W&B will not be performed.
         config_file_path : str
             The path to this experiment's config file
+
         """
         self.wandb_run: Optional[Union[Run, RunDisabled]] = None
         if wandb_credentials:
@@ -48,6 +49,7 @@ def log_plot(self, plot_file_path: str) -> None:
         ----------
         plot_file_path : str
             The full path to the plot file.
+
         """
         plot_name = Path(plot_file_path).stem
         if self.wandb_run:
@@ -63,6 +65,7 @@ def log_summary_file(self, summary_file_path: PathOrStr) -> None:
         ----------
         summary_file_path : PathOrStr
             The path to the summary tsv file
+
         """
         if self.wandb_run:
             summary_df = pd.read_csv(summary_file_path, sep="\t")
@@ -87,6 +90,7 @@ def log_evaluation_results(self, task_results: Dict[str, Any]) -> None:
         task_results : Dict[str,Any]
             The evaluation results of a single job of "evaluate" task or
             a single fold of a "cross_validate" task.
+
         """
         if self.wandb_run:
             task_prefix = task_results["job_name"]
@@ -136,6 +140,7 @@ def log_train_results(self, task_results: Dict[str, Any]) -> None:
         ----------
         task_results : Dict[str, Any]
             The train task results.
+
         """
         if self.wandb_run:
             task_prefix = task_results["job_name"]
@@ -152,6 +157,7 @@ def log_predict_results(self, task_results: Dict[str, Any]) -> None:
         ----------
         task_results : Dict[str, Any]
             The predict task results.
+
         """
         if self.wandb_run:
             task_prefix = task_results["job_name"]
@@ -176,6 +182,7 @@ def log_conf_matrix_chart(self, task_prefix, confusion_matrix, labels) -> None:
             the confusion matrix values
         labels : List[str]
             label names
+
         """
         if self.wandb_run:
             conf_matrix_data = []
@@ -198,6 +205,7 @@ def log_to_summary(self, task_prefix, metric_name, metric_value) -> None:
             The metric name
         metric_value : Any
             The metric value
+
         """
         if self.wandb_run:
             self.wandb_run.summary[f"{task_prefix}/{metric_name}"] = metric_value
@@ -217,6 +225,7 @@ def get_config_dict(config_file_path: str) -> Dict[str, Any]:
     Dictionary containing all SKLL configuration fields.
 
     This also includes default values when for fields that are missing in the file.
+
     """
     config_parser = _setup_config_parser(config_file_path, validate=False)
     return {

From cf82ae9525712e47e5d7798fab2e915b66e5cd52 Mon Sep 17 00:00:00 2001
From: Tamar Lavee <tlavee@ets.org>
Date: Mon, 29 Jul 2024 10:35:22 -0400
Subject: [PATCH 6/7] run ruff on all files and apply changes.

---
 skll/config/__init__.py                       |  5 ++++
 skll/config/utils.py                          |  5 ++++
 skll/data/dict_vectorizer.py                  |  1 +
 skll/data/featureset.py                       | 13 +++++++++++
 skll/data/readers.py                          | 20 ++++++++++++++++
 skll/data/writers.py                          | 20 ++++++++++++++++
 skll/experiments/__init__.py                  |  2 ++
 skll/experiments/input.py                     |  1 +
 skll/experiments/output.py                    |  8 +++++++
 skll/experiments/utils.py                     |  3 +++
 skll/learner/__init__.py                      | 18 +++++++++++++++
 skll/learner/utils.py                         | 23 +++++++++++++++++++
 skll/learner/voting.py                        |  8 +++++++
 .../compute_eval_from_predictions.py          |  2 ++
 skll/utils/commandline/filter_features.py     |  1 +
 .../utils/commandline/generate_predictions.py |  1 +
 skll/utils/commandline/join_features.py       |  1 +
 .../utils/commandline/plot_learning_curves.py |  1 +
 skll/utils/commandline/print_model_weights.py |  1 +
 skll/utils/commandline/run_experiment.py      |  1 +
 skll/utils/commandline/skll_convert.py        |  1 +
 skll/utils/commandline/summarize_results.py   |  1 +
 skll/utils/logging.py                         |  3 +++
 23 files changed, 140 insertions(+)

diff --git a/skll/config/__init__.py b/skll/config/__init__.py
index b13adc2f..a3e3407c 100644
--- a/skll/config/__init__.py
+++ b/skll/config/__init__.py
@@ -164,6 +164,7 @@ def _find_invalid_options(self) -> Set[str]:
         -------
         invalid_options : Set[str]
             The set of invalid options specified by the user.
+
         """
         # compute a list of all the valid options
         valid_options = list(self.defaults().keys()) + self._required_options
@@ -211,6 +212,7 @@ def _find_ill_specified_options(
         the default value for the option  does not result in running an
         experiment with unexpected settings, this is not really a major
         problem.
+
         """
         incorrectly_specified_options = []
         multiply_specified_options = []
@@ -252,6 +254,7 @@ def validate(self) -> None:
 
         KeyError
             If any options are not defined in the appropriate sections.
+
         """
         invalid_options = self._find_invalid_options()
         if invalid_options:
@@ -522,6 +525,7 @@ def parse_config_file(
     ValueError
         If various configuration parameters are incorrectly specified,
         or cause conflicts.
+
     """
     # ensure that a path is specified
     if not config_path:
@@ -1114,6 +1118,7 @@ def _setup_config_parser(config_path: PathOrStr, validate=True) -> SKLLConfigPar
     ------
     FileNotFoundError
         If the configuration file does not exist.
+
     """
     # initialize config parser with the given defaults
     config = SKLLConfigParser()
diff --git a/skll/config/utils.py b/skll/config/utils.py
index 515659a9..7215a6a1 100644
--- a/skll/config/utils.py
+++ b/skll/config/utils.py
@@ -31,6 +31,7 @@ def fix_json(json_string: str) -> str:
     -------
     str
         The normalized JSON string.
+
     """
     json_string = json_string.replace("True", "true")
     json_string = json_string.replace("False", "false")
@@ -63,6 +64,7 @@ def load_cv_folds(folds_file: PathOrStr, ids_to_floats=False) -> FoldMapping:
     ------
     ValueError
         If example IDs cannot be converted to floats and `ids_to_floats` is `True`.
+
     """
     with open(folds_file) as f:
         reader = csv.reader(f)
@@ -106,6 +108,7 @@ def locate_file(file_path: PathOrStr, config_dir: PathOrStr) -> str:
     ------
     FileNotFoundError
         If the file does not exist.
+
     """
     if not file_path:
         return ""
@@ -140,6 +143,7 @@ def _munge_featureset_name(name_or_list: Union[Iterable, str]) -> str:
     -------
     res : str
         name components joined with '+' if input is a list or the name itself.
+
     """
     if isinstance(name_or_list, str):
         return name_or_list
@@ -179,6 +183,7 @@ def _parse_and_validate_metrics(metrics: str, option_name: str, logger=None) ->
 
     ValueError
         If "mean_squared_error" is specified as a metric.
+
     """
     # create a logger if one was not passed in
     if not logger:
diff --git a/skll/data/dict_vectorizer.py b/skll/data/dict_vectorizer.py
index f9255569..d3b4232e 100644
--- a/skll/data/dict_vectorizer.py
+++ b/skll/data/dict_vectorizer.py
@@ -82,6 +82,7 @@ class DictVectorizer(OldDictVectorizer):
     FeatureHasher : performs vectorization using only a hash function.
     sklearn.preprocessing.OneHotEncoder : handles nominal/categorical features
       encoded as columns of integers.
+
     """
 
     def __eq__(self, other):
diff --git a/skll/data/featureset.py b/skll/data/featureset.py
index 8c377817..8de94acb 100644
--- a/skll/data/featureset.py
+++ b/skll/data/featureset.py
@@ -56,6 +56,7 @@ class FeatureSet(object):
     -----
     If ids, labels, and/or features are not None, the number of rows in
     each array must be equal.
+
     """
 
     def __init__(
@@ -125,6 +126,7 @@ def __contains__(self, value):
         ----------
         value
             The value to check.
+
         """
         return value in self.ids
 
@@ -146,6 +148,7 @@ def __eq__(self, other):
         -----
         We consider feature values to be equal if any differences are in the
         sixth decimal place or higher.
+
         """
         return (
             self.ids.shape == other.ids.shape
@@ -218,6 +221,7 @@ def __add__(self, other: "FeatureSet") -> "FeatureSet":
 
         ValueError
             If there are conflicting labels.
+
         """
         # Check that the sets of IDs are equal
         if set(self.ids) != set(other.ids):
@@ -335,6 +339,7 @@ def filter(
         ValueError
             If attempting to use features to filter a ``FeatureSet`` that
             uses a ``FeatureHasher`` vectorizer.
+
         """
         # Construct mask that indicates which examples to keep
         mask = np.ones(len(self), dtype=bool)
@@ -430,6 +435,7 @@ def filtered_iter(
         ValueError
             If any of the "labels", "features", or "vectorizer" attribute
             is ``None``.
+
         """
         if self.features is not None and not isinstance(self.vectorizer, DictVectorizer):
             raise ValueError(
@@ -477,6 +483,7 @@ def __sub__(self, other: "FeatureSet") -> "FeatureSet":
         -------
         :class:`skll.data.featureset.FeatureSet`
             A copy of ``self`` with all features in ``other`` removed.
+
         """
         new_set = deepcopy(self)
         if other.vectorizer:
@@ -492,6 +499,7 @@ def has_labels(self):
         -------
         has_labels : bool
             Whether or not this FeatureSet has any finite labels.
+
         """
         # make sure that labels is not None or a list of Nones
         if self.labels is not None and not all(label is None for label in self.labels):
@@ -510,6 +518,7 @@ def __str__(self):
         -------
         str:
             A string representation of ``FeatureSet``.
+
         """
         return str(self.__dict__)
 
@@ -521,6 +530,7 @@ def __repr__(self):
         -------
         str:
             A string representation of ``FeatureSet``.
+
         """
         return repr(self.__dict__)
 
@@ -542,6 +552,7 @@ def __getitem__(
             If `value` is a slice, then return a new ``FeatureSet`` instance
             containing a subset of the data. If it's an index, return the
             specific example by row number.
+
         """
         # Check if we're slicing
         if isinstance(value, slice):
@@ -597,6 +608,7 @@ def split(
         -------
         Tuple[:class:`skll.data.featureset.FeatureSet`, :class:`skll.data.featureset.FeatureSet`]
             A tuple containing the two featureset instances.
+
         """
         # Note: an alternative way to implement this is to make copies
         # of the given FeatureSet instance and then use the `filter()`
@@ -655,6 +667,7 @@ def from_data_frame(
         -------
         :class:`skll.data.featureset.FeatureSet`
             A ``FeatureSet`` instance generated from from the given data frame.
+
         """
         if labels_column:
             feature_columns = [column for column in df.columns if column != labels_column]
diff --git a/skll/data/readers.py b/skll/data/readers.py
index cb307a2e..ac10481b 100644
--- a/skll/data/readers.py
+++ b/skll/data/readers.py
@@ -118,6 +118,7 @@ class Reader(object):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     def __init__(
@@ -177,6 +178,7 @@ def for_path(cls, path_or_list: Union[PathOrStr, FeatureDictList], **kwargs) ->
         ------
         ValueError
             If file does not have a valid extension.
+
         """
         if not isinstance(path_or_list, (str, Path)):
             return DictListReader(path_or_list)
@@ -211,6 +213,7 @@ def _sub_read(self, file):
         Raises
         ------
         NotImplementedError
+
         """
         raise NotImplementedError
 
@@ -229,6 +232,7 @@ def _print_progress(self, progress_num: Union[int, str], end="\r"):
         end : str, default='\r'
             The string to put at the end of the line.  "\r" should be
             used for every update except for the final one.
+
         """
         # Print out status
         if not self.quiet:
@@ -270,6 +274,7 @@ def _sub_read_rows(self, file: PathOrStr) -> Tuple[np.ndarray, np.ndarray, Featu
 
         ValueError
             If the example IDs are not unique.
+
         """
         # Get labels and IDs
         ids_list: List[IdType] = []
@@ -365,6 +370,7 @@ def _parse_dataframe(
 
         features : :class:`skll.types.FeatureDictList`
             List of feature dictionaries.
+
         """
         if df.empty:
             raise ValueError("No features found in possibly empty file " f"'{self.path_or_list}'.")
@@ -459,6 +465,7 @@ def read(self) -> FeatureSet:
 
         ValueError
             If the example IDs are not unique.
+
         """
         self.logger.debug(f"Path: {self.path_or_list}")
 
@@ -550,6 +557,7 @@ class DictListReader(Reader):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     def read(self) -> FeatureSet:
@@ -560,6 +568,7 @@ def read(self) -> FeatureSet:
         -------
         :class:`skll.data.FeatureSet`
             A ``FeatureSet`` representing the list of dictionaries we read in.
+
         """
         # if we are in this method, `self.path_or_list` must be a
         # list of dictionaries
@@ -698,6 +707,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
         ValueError
             If IDs cannot be converted to floats, and ``ids_to_floats``
             is ``True``.
+
         """
         for example_num, line in enumerate(file):
             # Remove extraneous whitespace
@@ -789,6 +799,7 @@ class LibSVMReader(Reader):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     line_regex = re.compile(
@@ -827,6 +838,7 @@ def _pair_to_tuple(pair: str, feat_map: Dict[str, str]) -> Tuple[str, Union[floa
             The name of the feature.
         value : Union[float, int, str]
             The value of the example.
+
         """
         name, value = pair.split(":")
         if feat_map is not None:
@@ -859,6 +871,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
         ------
         ValueError
             If line does not look like valid libsvm format.
+
         """
         feat_map: Optional[Dict[str, str]]
         for example_num, line in enumerate(file):
@@ -952,6 +965,7 @@ class CSVReader(Reader):
 
     kwargs : Optional[Dict[str, Any]]
         Other arguments to the Reader object.
+
     """
 
     def __init__(
@@ -990,6 +1004,7 @@ def _sub_read(self, file: PathOrStr) -> Tuple[np.ndarray, np.ndarray, FeatureDic
 
         features : :class:`skll.types.FeatureDictList`
             The list of feature dictionaries for the feature set.
+
         """
         df = pd.read_csv(file, sep=self._sep, engine=self._engine, **self._pandas_kwargs)
         return self._parse_dataframe(
@@ -1035,6 +1050,7 @@ class TSVReader(CSVReader):
 
     kwargs : Optional[Dict[str, Any]]
         Other arguments to the Reader object.
+
     """
 
     def __init__(
@@ -1072,6 +1088,7 @@ class ARFFReader(Reader):
 
     kwargs : Optional[Dict[str, Any]]
         Other arguments to the Reader object.
+
     """
 
     def __init__(self, path_or_list: Union[PathOrStr, List[Dict[str, Any]]], **kwargs):
@@ -1101,6 +1118,7 @@ def split_with_quotes(
 
         escape_char : str, default='\\'
             The escape character.
+
         """
         return next(
             csv.reader([string], delimiter=delimiter, quotechar=quote_char, escapechar=escape_char)
@@ -1125,6 +1143,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
 
         example : :class:`skll.types.FeatureDict`
             The example features in dictionary format.
+
         """
         field_names = []
         # Process ARFF header
@@ -1235,6 +1254,7 @@ def safe_float(
     Union[float, int, str]
         The text value converted to int or float, if possible. Otherwise
         it's a string.
+
     """
     # convert to str to be "Safe"!
     text = str(text)
diff --git a/skll/data/writers.py b/skll/data/writers.py
index cfe0faca..1b339bba 100644
--- a/skll/data/writers.py
+++ b/skll/data/writers.py
@@ -66,6 +66,7 @@ class Writer(object):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     def __init__(
@@ -128,6 +129,7 @@ def for_path(cls, path: PathOrStr, feature_set: FeatureSet, **kwargs) -> "Writer
         writer : :class:`skll.data.Writer`
             New instance of the Writer sub-class that is
             appropriate for the given path.
+
         """
         # Get lowercase extension for file extension checking
         # NOTE: the reason we are doing this complicated gymnastics
@@ -174,6 +176,7 @@ def _write_subset(
 
         filter_features : Optional[Set[str]], default=None
             Set of features to include in current feature file.
+
         """
         self.logger.debug(f"sub_path: {sub_path}")
         self.logger.debug(f"feature_set: {self.feat_set.name}")
@@ -226,6 +229,7 @@ def _write_header(self, feature_set, output_file, filter_features):
 
         filter_features : Ignored
            Not used.
+
         """
         pass
 
@@ -250,6 +254,7 @@ def _write_line(self, id_, label_, feat_dict, output_file):
         Raises
         ------
         NotImplementedError
+
         """
         raise NotImplementedError
 
@@ -271,6 +276,7 @@ def _write_data(self, feature_set, output_file, filter_features):
         Raises
         ------
         NotImplementedError
+
         """
         raise NotImplementedError
 
@@ -297,6 +303,7 @@ def _get_column_names_and_indexes(
 
         column_indexes : List[int]
             A list of the (possibly filtered) column indexes.
+
         """
         # if we're not doing filtering,
         # then just take all the feature names
@@ -367,6 +374,7 @@ class CSVWriter(Writer):
 
     pandas_kwargs : Optional[Dict[str], Any], default=None
         Arguments that will be passed directly to the `pandas` I/O reader.
+
     """
 
     def __init__(
@@ -418,6 +426,7 @@ def _build_dataframe_with_features(
         ValueError
             If ID column is already used as feature.
             If label column is already used as feature.
+
         """
         # if there is no filtering, then just keep all the names
         (column_names, column_idxs) = self._get_column_names_and_indexes(
@@ -476,6 +485,7 @@ def _build_dataframe(
         ValueError
             If ID column is already used as feature.
             If label column is already used as feature.
+
         """
         # create the data frame with just the features
         # from the feature set, at this point
@@ -520,6 +530,7 @@ def _write_data(
             If only writing a subset of the features in the
             FeatureSet to ``output_file``, these are the
             features to include in this file.
+
         """
         df = self._build_dataframe(feature_set, filter_features=filter_features)
         df.to_csv(output_file, sep=self._sep, index=self._index, **self._pandas_kwargs)
@@ -569,6 +580,7 @@ class TSVWriter(CSVWriter):
 
     pandas_kwargs : Optional[Dict[str, Any]], default=None
         Arguments that will be passed directly to the `pandas` I/O reader.
+
     """
 
     def __init__(
@@ -640,6 +652,7 @@ class ARFFWriter(Writer):
 
     kwargs : Optional[Dict[str, Any]]
         The arguments to the ``Writer`` object being instantiated.
+
     """
 
     def __init__(
@@ -687,6 +700,7 @@ def _write_header(
             If only writing a subset of the features in the
             FeatureSet to ``output_file``, these are the
             features to include in this file.
+
         """
         fieldnames, _ = self._get_column_names_and_indexes(self.feat_set, filter_features)
         fieldnames.append(self.id_col)
@@ -747,6 +761,7 @@ def _write_line(
 
         ValueError
             If ID column name is already used as a feature.
+
         """
         # Add class column to feat_dict (unless this is unlabeled data)
         if self.label_col not in feat_dict:
@@ -802,6 +817,7 @@ class NDJWriter(Writer):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     def __init__(
@@ -840,6 +856,7 @@ def _write_line(
 
         output_file : IO[str]
             The file being written to.
+
         """
         example_dict: FeatureDict = {}
         # Don't try to add class column if this is label-less data
@@ -900,6 +917,7 @@ class LibSVMWriter(Writer):
 
     label_map : Optional[Dict[str, int]], default=None
         A mapping from label strings to integers.
+
     """
 
     LIBSVM_REPLACE_DICT = {
@@ -960,6 +978,7 @@ def _sanitize(name: Union[IdType, LabelType]) -> Union[IdType, LabelType]:
         -------
         Union[:class:`skll.types.IdType`, :class:`skll.types.LabelType`]
             The sanitized name with special characters replaced.
+
         """
         sanitized_name = name
         if isinstance(sanitized_name, str):
@@ -986,6 +1005,7 @@ def _write_line(
 
         output_file : IO[str]
             The file being written to.
+
         """
         field_values = (
             sorted(
diff --git a/skll/experiments/__init__.py b/skll/experiments/__init__.py
index 2a52d807..1403fef4 100644
--- a/skll/experiments/__init__.py
+++ b/skll/experiments/__init__.py
@@ -80,6 +80,7 @@ def _classify_featureset(args: Dict[str, Any]) -> List[Dict[str, Any]]:
     ------
     ValueError
         If extra unknown arguments are passed to the function.
+
     """
     # Extract all the arguments.
     # (There doesn't seem to be a better way to do this since one can't specify
@@ -669,6 +670,7 @@ def run_configuration(
         If value for ``"ablation"`` is not a positive int or ``None``.
     OSError
         If the lenth of the ``FeatureSet`` name > 210.
+
     """
     try:
         # Read configuration
diff --git a/skll/experiments/input.py b/skll/experiments/input.py
index 06b92ec5..a5ada564 100644
--- a/skll/experiments/input.py
+++ b/skll/experiments/input.py
@@ -72,6 +72,7 @@ def load_featureset(
     merged_set : :class:`skll.data.featureset.FeatureSet`
         A ``FeatureSet`` instance containing the specified labels, IDs, features,
         and feature vectorizer.
+
     """
     # get a logger if one was not provided
     logger = logger if logger else logging.getLogger(__name__)
diff --git a/skll/experiments/output.py b/skll/experiments/output.py
index a335d5d1..e5f2e7fe 100644
--- a/skll/experiments/output.py
+++ b/skll/experiments/output.py
@@ -52,6 +52,7 @@ def _compute_ylimits_for_featureset(
     ylimits : Dict[str, Tuple[float, float]]
         A dictionary, with metric names as keys
         and a tuple of (lower_limit, upper_limit) as values.
+
     """
     # set the y-limits of the curves depending on what kind
     # of values the metric produces
@@ -119,6 +120,7 @@ def _generate_learning_curve_score_plots(
     -------
     List[str]
         A list of paths of the generated plots
+
     """
     # convert output dir to a path
     output_dir = Path(output_dir)
@@ -259,6 +261,7 @@ def _generate_learning_curve_time_plots(
     -------
      List[str]
         A list of paths of the generated plots
+
     """
     # convert output dir to a path
     output_dir = Path(output_dir)
@@ -346,6 +349,7 @@ def generate_learning_curve_plots(
     -------
     List[str]
         A list of paths of the generated plots
+
     """
     # convert output_dir to Path object
     output_dir = Path(output_dir)
@@ -462,6 +466,7 @@ def _print_fancy_output(
         List of result dictionaries.
     output_file : IO[str], default=sys.stdout
         The file buffer to print to.
+
     """
     if not learner_result_dicts:
         raise ValueError("Result dictionary list is empty!")
@@ -552,6 +557,7 @@ def _write_learning_curve_file(result_json_paths: List[str], output_file: IO[str
         list of paths to the individual result JSON files.
     output_file : IO[str]
         The file buffer to write to.
+
     """
     learner_result_dicts = []
 
@@ -646,6 +652,7 @@ def _write_skll_folds(skll_fold_ids: FoldMapping, skll_fold_ids_file: IO[str]) -
         Dictionary with ids as keys and test-fold-numbers as values.
     skll_fold_ids_file : IO[str]
         An open file handler to write to.
+
     """
     f = csv.writer(skll_fold_ids_file)
     f.writerow(["id", "cv_test_fold"])
@@ -670,6 +677,7 @@ def _write_summary_file(result_json_paths: List[str], output_file: IO[str], abla
         The file buffer to write to.
     ablation : int, default=0
         The number of features to remove when doing ablation experiment.
+
     """
     learner_result_dicts = []
     # Map from feature set names to all features in them
diff --git a/skll/experiments/utils.py b/skll/experiments/utils.py
index f286336f..2682460e 100644
--- a/skll/experiments/utils.py
+++ b/skll/experiments/utils.py
@@ -64,6 +64,7 @@ def _check_job_results(job_results: List[List[Dict[str, Any]]]) -> None:
     ----------
     job_results : List[List[Dict[str, Any]]]
         A list of job result dictionaries.
+
     """
     logger = get_skll_logger("experiment")
     logger.info("Checking job results")
@@ -101,6 +102,7 @@ def _create_learner_result_dicts(
     -------
     res : List[Dict[str, Any]]
         The results of the learners, as a list of dictionaries.
+
     """
     res = []
 
@@ -256,6 +258,7 @@ def _get_stat_float(label_result_dict: Dict[str, float], stat: str) -> float:
     -------
     float
         The value of the stat if it's in the dictionary, and NaN otherwise.
+
     """
     if stat in label_result_dict and label_result_dict[stat] is not None:
         return label_result_dict[stat]
diff --git a/skll/learner/__init__.py b/skll/learner/__init__.py
index d77cf8b7..579a0b16 100644
--- a/skll/learner/__init__.py
+++ b/skll/learner/__init__.py
@@ -171,6 +171,7 @@ class Learner(object):
         Path to module where a custom classifier is defined.
     logger : Optional[logging.Logger], default=None
         A logging object. If ``None`` is passed, get logger from ``__name__``.
+
     """
 
     def __init__(
@@ -402,6 +403,7 @@ def from_file(
         -------
         :class:`skll.learner.Learner`
             The ``Learner`` instance loaded from the file.
+
         """
         # use the logger that's passed in or if nothing was passed in,
         # then create a new logger
@@ -435,6 +437,7 @@ def load(self, learner_path: PathOrStr) -> None:
         ----------
         learner_path : :class:`skll.types.PathOrStr`
             The path to a saved learner object file to load.
+
         """
         del self.__dict__
         self.__dict__ = Learner.from_file(learner_path).__dict__
@@ -460,6 +463,7 @@ def _convert_coef_array_to_feature_names(self, coef: np.ndarray, feature_name_pr
         -------
         Dict[str, Any]
             A dictionary of labeled weights
+
         """
         res = {}
         vocabulary = {}
@@ -509,6 +513,7 @@ def model_params(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         ------
         ValueError
             If the instance does not support model parameters.
+
         """
         res = {}
         intercept = {}
@@ -623,6 +628,7 @@ def probability(self, value: bool) -> None:
         ----------
         value : bool
             Whether learner should return probabilities of all labels.
+
         """
         # LinearSVC doesn't support predict_proba
         self._probability = value
@@ -653,6 +659,7 @@ def save(self, learner_path: PathOrStr) -> None:
         ----------
         learner_path : :class:`skll.types.PathOrStr`
             The path to save the ``Learner`` instance to.
+
         """
         _save_learner_to_disk(self, learner_path)
 
@@ -671,6 +678,7 @@ def _create_estimator(self):
         ------
         ValueError
             If there is no default parameter grid for estimator.
+
         """
         estimator = None
         default_param_grid = None
@@ -705,6 +713,7 @@ def get_feature_names_out(self) -> np.ndarray:
         ValueError
             If ``self.feat_vectorizer`` is either ``None`` or a
             :class:`sklearn.feature_extraction.FeatureHasher`.
+
         """
         if isinstance(self.feat_vectorizer, DictVectorizer):
             return self.feat_vectorizer.get_feature_names_out()[self.feat_selector.get_support()]
@@ -729,6 +738,7 @@ def _check_input_formatting(self, examples: FeatureSet) -> None:
             If labels are strings.
         TypeError
             If any features are strings.
+
         """
         # Make sure the labels for a regression task are not strings.
         if self.model_type._estimator_type == "regressor" and examples.labels is not None:
@@ -762,6 +772,7 @@ def _check_max_feature_value(self, feat_array: np.ndarray):
         ----------
         feat_array : numpy.ndarray
             A numpy array with features.
+
         """
         max_feat_abs = np.max(np.abs(feat_array.data))
         if max_feat_abs > 1000.0:
@@ -780,6 +791,7 @@ def _create_label_dict(self, examples: FeatureSet) -> None:
         ----------
         examples : :class:`skll.data.featureset.FeatureSet`
             The examples to use for training.
+
         """
         # we don't need to do this if we have already done it
         # or for regression models, so simply return.
@@ -818,6 +830,7 @@ def _train_setup(self, examples: FeatureSet) -> None:
         ----------
         examples : :class:`skll.data.featureset.FeatureSet`
             The ``FeatureSet`` instance to use for training.
+
         """
         # Check feature values and labels
         self._check_input_formatting(examples)
@@ -898,6 +911,7 @@ def train(
             If process runs out of memory converting training data to dense.
         ValueError
             If FeatureHasher is used with MultinomialNB.
+
         """
         # get the estimator type since we need it in multiple places below
         estimator_type = self.model_type._estimator_type
@@ -1232,6 +1246,7 @@ def evaluate(
             the per-label PRFs, the model parameters, the grid search objective
             function score, and the additional evaluation metrics, if any.
             For regressors, the first two elements in the tuple are ``None``.
+
         """
         # are we in a regressor or a classifier
         estimator_type = self.model_type._estimator_type
@@ -1355,6 +1370,7 @@ def predict(
         RuntimeError
             If there is a mismatch between the learner vectorizer
             and the test set vectorizer.
+
         """
         example_ids = examples.ids
 
@@ -1617,6 +1633,7 @@ def cross_validate(
             If classification labels are not properly encoded as strings.
         ValueError
             If ``grid_search`` is ``True`` but ``grid_objective`` is ``None``.
+
         """
         # Seed the random number generator so that randomized
         # algorithms are replicable
@@ -1814,6 +1831,7 @@ def learning_curve(
         ------
         ValueError
             If the number of examples is less than 500.
+
         """
         # check that the number of training examples is more than the minimum
         # needed for generating a reliable learning curve
diff --git a/skll/learner/utils.py b/skll/learner/utils.py
index d8574b71..2500f65d 100644
--- a/skll/learner/utils.py
+++ b/skll/learner/utils.py
@@ -116,6 +116,7 @@ class FilteredLeaveOneGroupOut(LeaveOneGroupOut):
         A list of example IDs.
     logger : Optional[logging.Logger], default=None
         A logger instance.
+
     """
 
     def __init__(
@@ -154,6 +155,7 @@ def split(
             The training set indices for that split.
         test_index : numpy.ndarray
             The testing set indices for that split.
+
         """
         for train_index, test_index in super(FilteredLeaveOneGroupOut, self).split(X, y, groups):
             train_len = len(train_index)
@@ -181,6 +183,7 @@ class SelectByMinCount(SelectKBest):
     ----------
     min_count : int, default=1
         The minimum feature count to select.
+
     """
 
     def __init__(self, min_count: int = 1):
@@ -202,6 +205,7 @@ def fit(self, X, y=None):
         Returns
         -------
         self
+
         """
         # initialize a list of counts of times each feature appears
         col_counts = [0 for _ in range(X.shape[1])]
@@ -229,6 +233,7 @@ def _get_support_mask(self):
         -------
         mask : numpy.ndarray
             The mask with features to keep set to True.
+
         """
         mask = np.zeros(self.scores_.shape, dtype=bool)
         mask[self.scores_ >= self.min_count] = True
@@ -253,6 +258,7 @@ def add_unseen_labels(
     Dict[:class:`skll.types.LabelType`, int]
         Dictionary mapping merged labels from both the training and test sets
         to indices.
+
     """
     # get the list of labels that were in the training set
     train_label_list = list(train_label_dict.keys())
@@ -317,6 +323,7 @@ def compute_evaluation_metrics(
         per-label PRFs, the grid search objective function score, and the
         additional evaluation metrics, if any. For regressors, the
         first two elements are ``None``.
+
     """
     # set up the logger
     logger = logger if logger else logging.getLogger(__name__)
@@ -485,6 +492,7 @@ def compute_num_folds_from_example_counts(
     ValueError
         If ``cv_folds`` is not an integer or if the training set has
         fewer than 2 examples associated with a label (for classification).
+
     """
     # get a logger if not provided
     logger = logger if logger else logging.getLogger(__name__)
@@ -540,6 +548,7 @@ def contiguous_ints_or_floats(numbers: np.ndarray) -> bool:
         If ``numbers`` does not contain integers or floating point values.
     ValueError
         If ``numbers`` is empty.
+
     """
     try:
         # make sure that number is not empty
@@ -580,6 +589,7 @@ def get_acceptable_classification_metrics(label_array: np.ndarray) -> Set[str]:
     acceptable_metrics : Set[str]
         A set of metric names that are acceptable
         for the given classification scenario.
+
     """
     # this is a classifier so the acceptable objective
     # functions definitely include those metrics that
@@ -668,6 +678,7 @@ def load_custom_learner(
     ------
     ValueError
         If the custom learner path does not end in '.py'.
+
     """
     if not custom_learner_path:
         raise ValueError(
@@ -722,6 +733,7 @@ def get_predictions(
     NotImplementedError
         If the scikit-learn model does not implement ``predict_proba()`` to
         get the class probabilities.
+
     """
     # deferred import to avoid circular dependencies
     from skll.learner.voting import VotingLearner
@@ -787,6 +799,7 @@ def rescaled(cls):
     ------
     ValueError
         If classifier cannot be rescaled (i.e. is not a regressor).
+
     """
     # If this class has already been run through the decorator, return it
     if hasattr(cls, "rescale"):
@@ -819,6 +832,7 @@ def fit(self, X: np.ndarray, y=None):  # noqa: D417
         Returns
         -------
         self
+
         """
         # fit a regular regression model
         orig_fit(self, X, y=y)
@@ -857,6 +871,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         -------
         numpy.ndarray
             The prediction results.
+
         """
         # get the unconstrained predictions
         res = orig_predict(self, X)
@@ -896,6 +911,7 @@ def _get_param_names(class_x):
         ------
         RuntimeError
             If `varargs` exist in the scikit-learn estimator.
+
         """
         # initialize the empty list of parameter names
         args = []
@@ -952,6 +968,7 @@ def init(self, constrain: bool = True, rescale: bool = True, **kwargs):  # noqa:
             Whether to rescale prediction values using z-scores.
         kwargs : Dict[str, Any]
             Keyword arguments for base class.
+
         """
         # pylint: disable=W0201
         self.constrain = constrain
@@ -1006,6 +1023,7 @@ def setup_cv_fold_iterator(
         k-fold iterator
     Optional[List[str]]
         List of cross-validation groups
+
     """
     # explicitly declare the return types
     kfold: Union[FilteredLeaveOneGroupOut, KFold, StratifiedKFold]
@@ -1059,6 +1077,7 @@ def setup_cv_split_iterator(
         Iterator over the train/test featuresets
     int
         The maximum number of training samples available.
+
     """
     # seed the random number generator for replicability
     random_state = np.random.RandomState(123456789)
@@ -1122,6 +1141,7 @@ def train_and_score(
     float
         The time taken in seconds to fit the ``learner`` on
         ``train_examples``.
+
     """
     # capture the time before we train the model
     start_time = time.time()
@@ -1198,6 +1218,7 @@ def write_predictions(
         List of class labels, required if ``probability`` is ``True``.
     append : bool, default=False
         Should we append the current predictions to the file if it exists?
+
     """
     # create a new file starting with the given prefix
     prediction_file = f"{file_prefix}_predictions.tsv"
@@ -1250,6 +1271,7 @@ def _save_learner_to_disk(
         A ``Learner`` or ``VotingLearner`` instance to save to disk.
     filepath : :class:`skll.types.PathOrStr`
         The path to save the learner instance to.
+
     """
     # create the directory if it doesn't exist
     learner_dir = Path(filepath).parent
@@ -1288,6 +1310,7 @@ def _load_learner_from_disk(
     ------
     ValueError
         If the pickled version of the ``Learner`` instance is out of date.
+
     """
     skll_version, learner = joblib.load(filepath)
 
diff --git a/skll/learner/voting.py b/skll/learner/voting.py
index 407ff2ee..93604b46 100644
--- a/skll/learner/voting.py
+++ b/skll/learner/voting.py
@@ -108,6 +108,7 @@ class VotingLearner(object):
         list and the order of the ``learner_names`` list.
     logger : Optional[logging.Logger], default=None
         A logging object. If ``None`` is passed, get logger from ``__name__``.
+
     """
 
     def __init__(
@@ -236,6 +237,7 @@ def save(self, learner_path: PathOrStr) -> None:
         ----------
         learner_path : :class:`skll.types.PathOrStr`
             The path to save the ``VotingLearner`` instance to.
+
         """
         _save_learner_to_disk(self, learner_path)
 
@@ -257,6 +259,7 @@ def from_file(
         -------
         learner : skll.learner.voting.VotingLearner
             The ``VotingLearner`` instance loaded from the file.
+
         """
         # use the logger that's passed in or if nothing was passed in,
         # then create a new logger
@@ -322,6 +325,7 @@ def train(
             the number of grid search folds will be used.
         shuffle : bool, default=False
             Shuffle examples (e.g., for grid search CV.)
+
         """
         if param_grid_list is None:
             self._param_grids = []
@@ -443,6 +447,7 @@ def predict(
             name of each underlying learner as the key and the array of its
             predictions as the value. The second element is ``None`` if
             ``individual_predictions`` is set to ``False``.
+
         """
         example_ids = examples.ids
 
@@ -551,6 +556,7 @@ def evaluate(
             The confusion matrix, the overall accuracy, the per-label
             PRFs, the model parameters, the grid search objective
             function score, and the additional evaluation metrics, if any.
+
         """
         # make the prediction on the test data; note that these
         # are either class indices or class probabilities
@@ -720,6 +726,7 @@ def cross_validate(
             If classification labels are not properly encoded as strings.
         ValueError
             If ``grid_search`` is ``True`` but ``grid_objective`` is ``None``.
+
         """
         # Seed the random number generator so that randomized algorithms are
         # replicable.
@@ -912,6 +919,7 @@ def learning_curve(
         ------
         ValueError
             If the number of examples is less than 500.
+
         """
         # check that the number of training examples is more than the minimum
         # needed for generating a reliable learning curve
diff --git a/skll/utils/commandline/compute_eval_from_predictions.py b/skll/utils/commandline/compute_eval_from_predictions.py
index e22ea099..d1fe02a1 100755
--- a/skll/utils/commandline/compute_eval_from_predictions.py
+++ b/skll/utils/commandline/compute_eval_from_predictions.py
@@ -118,6 +118,7 @@ def compute_eval_from_predictions(
     ValueError
         If the requested prediction method is ``"expected_value"`` but
         the class names can't be converted to ints.
+
     """
     # convert the examples file and predictions file to a Path
     examples_file = Path(examples_file)
@@ -192,6 +193,7 @@ def main(argv: Optional[List[str]] = None) -> None:
     argv: Optional[List[str]], default=None
         List of arguments, as if specified on the command-line. If ``None``,
         then ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = argparse.ArgumentParser(
diff --git a/skll/utils/commandline/filter_features.py b/skll/utils/commandline/filter_features.py
index 3b5a5fb5..78817a27 100755
--- a/skll/utils/commandline/filter_features.py
+++ b/skll/utils/commandline/filter_features.py
@@ -28,6 +28,7 @@ def main(argv: Optional[List[str]] = None) -> None:
     argv : Optional[List[str]], default=None
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = argparse.ArgumentParser(
diff --git a/skll/utils/commandline/generate_predictions.py b/skll/utils/commandline/generate_predictions.py
index 79a283ac..83f51a63 100755
--- a/skll/utils/commandline/generate_predictions.py
+++ b/skll/utils/commandline/generate_predictions.py
@@ -32,6 +32,7 @@ def main(argv: Optional[List[str]] = None):
     argv : Optional[List[str]], default=None
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = argparse.ArgumentParser(
diff --git a/skll/utils/commandline/join_features.py b/skll/utils/commandline/join_features.py
index e3740763..480d9114 100755
--- a/skll/utils/commandline/join_features.py
+++ b/skll/utils/commandline/join_features.py
@@ -27,6 +27,7 @@ def main(argv: Optional[List[str]] = None) -> None:
     argv : Optional[List[str]], default=None
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = argparse.ArgumentParser(
diff --git a/skll/utils/commandline/plot_learning_curves.py b/skll/utils/commandline/plot_learning_curves.py
index 76d57836..40722ce8 100755
--- a/skll/utils/commandline/plot_learning_curves.py
+++ b/skll/utils/commandline/plot_learning_curves.py
@@ -34,6 +34,7 @@ def main(argv: Optional[List[str]] = None) -> None:
     argv : Optional[List[str]], default=None
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = argparse.ArgumentParser(
diff --git a/skll/utils/commandline/print_model_weights.py b/skll/utils/commandline/print_model_weights.py
index f35fe06f..758285f2 100755
--- a/skll/utils/commandline/print_model_weights.py
+++ b/skll/utils/commandline/print_model_weights.py
@@ -32,6 +32,7 @@ def main(argv: Optional[List[str]] = None) -> None:
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
         Defaults to ``None``.
+
     """
     parser = argparse.ArgumentParser(
         description="Prints out the weights of a" " given model.",
diff --git a/skll/utils/commandline/run_experiment.py b/skll/utils/commandline/run_experiment.py
index f6ff49ad..49e19532 100755
--- a/skll/utils/commandline/run_experiment.py
+++ b/skll/utils/commandline/run_experiment.py
@@ -26,6 +26,7 @@ def main(argv: Optional[List[str]] = None) -> None:
     argv : Optional[List[str]], default=None
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = ArgumentParser(
diff --git a/skll/utils/commandline/skll_convert.py b/skll/utils/commandline/skll_convert.py
index 61d3f339..4999073e 100755
--- a/skll/utils/commandline/skll_convert.py
+++ b/skll/utils/commandline/skll_convert.py
@@ -43,6 +43,7 @@ def main(argv: Optional[List[str]] = None) -> None:
     argv : Optional[List[str]], default=None
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = argparse.ArgumentParser(
diff --git a/skll/utils/commandline/summarize_results.py b/skll/utils/commandline/summarize_results.py
index b9f8dcc0..c50bc627 100755
--- a/skll/utils/commandline/summarize_results.py
+++ b/skll/utils/commandline/summarize_results.py
@@ -25,6 +25,7 @@ def main(argv: Optional[List[str]] = None) -> None:
     argv : Optional[List[str]], default=None
         List of arguments, as if specified on the command-line.
         If ``None``, ``sys.argv[1:]`` is used instead.
+
     """
     # Get command line arguments
     parser = argparse.ArgumentParser(
diff --git a/skll/utils/logging.py b/skll/utils/logging.py
index fb3ee8fc..3a19d076 100644
--- a/skll/utils/logging.py
+++ b/skll/utils/logging.py
@@ -33,6 +33,7 @@ def filter(self, record):
         ----------
         record : logging.LogRecord
             The log record to be filtered.
+
         """
         # Check if the log record is from matplotlib.category and contains the specific message
         if (
@@ -86,6 +87,7 @@ def get_skll_logger(
     -------
     logger: logging.Logger
         A ``Logger`` instance.
+
     """
     # first get the logger instance associated with the
     # given name if one already exists
@@ -121,6 +123,7 @@ def close_and_remove_logger_handlers(logger: logging.Logger) -> None:
     ----------
     logger : logging.Logger
         Logger instance
+
     """
     for handler in logger.handlers[:]:
         handler.close()

From 503753a9d936bd4402ffef142c0866f8d229a494 Mon Sep 17 00:00:00 2001
From: Tamar Lavee <tlavee@ets.org>
Date: Mon, 29 Jul 2024 13:42:43 -0400
Subject: [PATCH 7/7] apply more ruff changes, mostly adding docstrings.

---
 examples/__init__.py                   | 1 +
 skll/utils/__init__.py                 | 1 +
 skll/utils/commandline/__init__.py     | 1 +
 tests/other/custom_logistic_wrapper.py | 7 +++++--
 tests/other/custom_metrics2.py         | 2 ++
 tests/other/kappa.py                   | 2 ++
 tests/other/majority_class_learner.py  | 5 +++++
 7 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/examples/__init__.py b/examples/__init__.py
index e69de29b..8a708a68 100644
--- a/examples/__init__.py
+++ b/examples/__init__.py
@@ -0,0 +1 @@
+"""Data generation scripts for the different tutorial examples."""
diff --git a/skll/utils/__init__.py b/skll/utils/__init__.py
index e69de29b..a14e0f8e 100644
--- a/skll/utils/__init__.py
+++ b/skll/utils/__init__.py
@@ -0,0 +1 @@
+"""Code for different utility scripts, functions, and classes used throughout SKLL."""
diff --git a/skll/utils/commandline/__init__.py b/skll/utils/commandline/__init__.py
index e69de29b..08b0c243 100644
--- a/skll/utils/commandline/__init__.py
+++ b/skll/utils/commandline/__init__.py
@@ -0,0 +1 @@
+"""Command line scripts and utilities."""
diff --git a/tests/other/custom_logistic_wrapper.py b/tests/other/custom_logistic_wrapper.py
index 00ad0d87..54ea90c3 100644
--- a/tests/other/custom_logistic_wrapper.py
+++ b/tests/other/custom_logistic_wrapper.py
@@ -1,7 +1,8 @@
 # License: BSD 3 clause
 """
-A simple wrapper around the existing LogisticRegression class, for testing
-custom learners functionality.
+A simple wrapper around the existing LogisticRegression class.
+
+Used for testing custom learners functionality.
 
 :author: Michael Heilman (mheilman@ets.org)
 """
@@ -10,4 +11,6 @@
 
 
 class CustomLogisticRegressionWrapper(LogisticRegression):
+    """A simple wrapper around the existing LogisticRegression class."""
+
     pass
diff --git a/tests/other/custom_metrics2.py b/tests/other/custom_metrics2.py
index 3a2420ab..9c36ffc4 100644
--- a/tests/other/custom_metrics2.py
+++ b/tests/other/custom_metrics2.py
@@ -1,5 +1,7 @@
+"""Additional custom metrics module for testing purposes."""
 from sklearn.metrics import fbeta_score
 
 
 def f06_micro(y_true, y_pred):
+    """Define a custom metric for testing purposes."""
     return fbeta_score(y_true, y_pred, beta=0.6, average="micro")
diff --git a/tests/other/kappa.py b/tests/other/kappa.py
index 0cd2602f..850d241d 100644
--- a/tests/other/kappa.py
+++ b/tests/other/kappa.py
@@ -1,2 +1,4 @@
+"""metric definition for testing purposes."""
 def dummy_metric(y_true, y_pred):
+    """Return a fixed score."""
     return 1.0
diff --git a/tests/other/majority_class_learner.py b/tests/other/majority_class_learner.py
index 699a1a60..8c97877a 100644
--- a/tests/other/majority_class_learner.py
+++ b/tests/other/majority_class_learner.py
@@ -12,10 +12,14 @@
 
 
 class MajorityClassLearner(BaseEstimator, ClassifierMixin):
+    """A simple majority class classifier."""
+
     def __init__(self):
+        """Initialize class."""
         self.majority_class = None
 
     def fit(self, X, y):
+        """Set the majority class based on the given data."""
         counts = Counter(y)
         max_count = -1
         for label, count in counts.items():
@@ -25,4 +29,5 @@ def fit(self, X, y):
         return self
 
     def predict(self, X):
+        """Return the prediction (majority class) for the given data."""
         return np.array([self.majority_class for x in range(X.shape[0])])