From ac8cbb3799bdfa31360c87bf1097410326dba0bd Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 20 May 2020 16:04:14 -0400
Subject: [PATCH] [MRG] More deprecation for 0.24 (#17142)

* normalize_components in sparsePCA

* changed default strategy of Dummy to prior and removed outputs_2d ttribute

* removed usage of None to drop estimator in ensemble and behaviour param of IsolationForest

* remove support for drop=None in Voting

* removed some warning decorators

* remove feature_extraction.extract_patches

* removed VectorizerMixin and copy parameter from TFIDFVectorizer

* kernel.set_params now raises attributeerror

* removed fig from plot_partial_dependence

* removed iid parameter of search estimators

* removed brier_scorer

* raise error in split when shuffle is False and random_state is not None

* removed MultiOutputEstimator

* removed base classes of NaiveBayes

* removed drop from pipeline

* removed utils in random_projection

* removed presort and classes_ in trees

* flake8

* fixed some tests

* flake

* fixed docstring

* fixed other one

* some left

* mmmm
---
 sklearn/decomposition/_sparse_pca.py          |  56 +---
 sklearn/decomposition/_truncated_svd.py       |   1 -
 .../decomposition/tests/test_sparse_pca.py    |  20 --
 sklearn/dummy.py                              |  48 +--
 sklearn/ensemble/_base.py                     |  14 +-
 sklearn/ensemble/_iforest.py                  |  29 --
 sklearn/ensemble/_voting.py                   |  22 +-
 sklearn/ensemble/tests/test_bagging.py        |   3 -
 .../ensemble/tests/test_gradient_boosting.py  |   3 -
 sklearn/ensemble/tests/test_iforest.py        |  12 -
 sklearn/ensemble/tests/test_voting.py         |  52 +--
 .../ensemble/tests/test_weight_boosting.py    |   3 -
 sklearn/feature_extraction/image.py           |  43 +--
 .../feature_extraction/tests/test_image.py    |  11 +-
 sklearn/feature_extraction/tests/test_text.py |  24 --
 sklearn/feature_extraction/text.py            |  25 +-
 sklearn/gaussian_process/kernels.py           |  13 +-
 .../gaussian_process/tests/test_kernels.py    |  23 +-
 .../inspection/_plot/partial_dependence.py    |  17 +-
 .../tests/test_plot_partial_dependence.py     |  18 -
 sklearn/linear_model/tests/test_ridge.py      |   6 +-
 sklearn/manifold/_isomap.py                   |  11 -
 sklearn/metrics/_scorer.py                    |  18 +-
 sklearn/metrics/tests/test_score_objects.py   |  11 -
 sklearn/model_selection/_search.py            |  40 +--
 sklearn/model_selection/_split.py             |   6 +-
 sklearn/model_selection/tests/test_search.py  | 309 +++++-------------
 sklearn/model_selection/tests/test_split.py   |   5 +-
 sklearn/multioutput.py                        |   8 -
 sklearn/naive_bayes.py                        |  22 +-
 sklearn/pipeline.py                           |  15 +-
 sklearn/random_projection.py                  |  17 -
 sklearn/tests/test_dummy.py                   |  20 --
 sklearn/tests/test_multioutput.py             |  10 -
 sklearn/tests/test_naive_bayes.py             |  17 -
 sklearn/tests/test_pipeline.py                |  31 +-
 sklearn/tests/test_random_projection.py       |  12 -
 sklearn/tree/_classes.py                      |  39 ---
 sklearn/tree/tests/test_tree.py               |  30 --
 sklearn/utils/estimator_checks.py             |   5 +
 40 files changed, 165 insertions(+), 904 deletions(-)

diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py
index 4419157a3db3f..8f766b734ffab 100644
--- a/sklearn/decomposition/_sparse_pca.py
+++ b/sklearn/decomposition/_sparse_pca.py
@@ -2,8 +2,6 @@
 # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
 # License: BSD 3 clause
 
-import warnings
-
 import numpy as np
 
 from ..utils import check_random_state, check_array
@@ -14,22 +12,6 @@
 from ._dict_learning import dict_learning, dict_learning_online
 
 
-# FIXME: remove in 0.24
-def _check_normalize_components(normalize_components, estimator_name):
-    if normalize_components != 'deprecated':
-        if normalize_components:
-            warnings.warn(
-                "'normalize_components' has been deprecated in 0.22 and "
-                "will be removed in 0.24. Remove the parameter from the "
-                " constructor.", FutureWarning
-            )
-        else:
-            raise NotImplementedError(
-                "normalize_components=False is not supported starting from "
-                "0.22. Remove this parameter from the constructor."
-            )
-
-
 class SparsePCA(TransformerMixin, BaseEstimator):
     """Sparse Principal Components Analysis (SparsePCA)
 
@@ -85,16 +67,6 @@ class SparsePCA(TransformerMixin, BaseEstimator):
         across multiple function calls.
         See :term:`Glossary <random_state>`.
 
-    normalize_components : 'deprecated'
-        This parameter does not have any effect. The components are always
-        normalized.
-
-        .. versionadded:: 0.20
-
-        .. deprecated:: 0.22
-           ``normalize_components`` is deprecated in 0.22 and will be removed
-           in 0.24.
-
     Attributes
     ----------
     components_ : array, [n_components, n_features]
@@ -140,8 +112,7 @@ class SparsePCA(TransformerMixin, BaseEstimator):
     @_deprecate_positional_args
     def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,
                  max_iter=1000, tol=1e-8, method='lars', n_jobs=None,
-                 U_init=None, V_init=None, verbose=False, random_state=None,
-                 normalize_components='deprecated'):
+                 U_init=None, V_init=None, verbose=False, random_state=None):
         self.n_components = n_components
         self.alpha = alpha
         self.ridge_alpha = ridge_alpha
@@ -153,7 +124,6 @@ def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,
         self.V_init = V_init
         self.verbose = verbose
         self.random_state = random_state
-        self.normalize_components = normalize_components
 
     def fit(self, X, y=None):
         """Fit the model from data in X.
@@ -174,10 +144,6 @@ def fit(self, X, y=None):
         random_state = check_random_state(self.random_state)
         X = self._validate_data(X)
 
-        _check_normalize_components(
-            self.normalize_components, self.__class__.__name__
-        )
-
         self.mean_ = X.mean(axis=0)
         X = X - self.mean_
 
@@ -304,16 +270,6 @@ class MiniBatchSparsePCA(SparsePCA):
         across multiple function calls.
         See :term:`Glossary <random_state>`.
 
-    normalize_components : 'deprecated'
-        This parameter does not have any effect. The components are always
-        normalized.
-
-        .. versionadded:: 0.20
-
-        .. deprecated:: 0.22
-           ``normalize_components`` is deprecated in 0.22 and will be removed
-           in 0.24.
-
     Attributes
     ----------
     components_ : array, [n_components, n_features]
@@ -357,13 +313,11 @@ class MiniBatchSparsePCA(SparsePCA):
     @_deprecate_positional_args
     def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,
                  n_iter=100, callback=None, batch_size=3, verbose=False,
-                 shuffle=True, n_jobs=None, method='lars', random_state=None,
-                 normalize_components='deprecated'):
+                 shuffle=True, n_jobs=None, method='lars', random_state=None):
         super().__init__(
             n_components=n_components, alpha=alpha, verbose=verbose,
             ridge_alpha=ridge_alpha, n_jobs=n_jobs, method=method,
-            random_state=random_state,
-            normalize_components=normalize_components)
+            random_state=random_state)
         self.n_iter = n_iter
         self.callback = callback
         self.batch_size = batch_size
@@ -388,10 +342,6 @@ def fit(self, X, y=None):
         random_state = check_random_state(self.random_state)
         X = self._validate_data(X)
 
-        _check_normalize_components(
-            self.normalize_components, self.__class__.__name__
-        )
-
         self.mean_ = X.mean(axis=0)
         X = X - self.mean_
 
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index cdc9a9c685400..ef723ac25d4d3 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -88,7 +88,6 @@ class TruncatedSVD(TransformerMixin, BaseEstimator):
     --------
     >>> from sklearn.decomposition import TruncatedSVD
     >>> from scipy.sparse import random as sparse_random
-    >>> from sklearn.random_projection import sparse_random_matrix
     >>> X = sparse_random(100, 100, density=0.01, format='csr',
     ...                   random_state=42)
     >>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index 9ee0339a192b4..d6ddfa01a49d0 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -189,26 +189,6 @@ def test_pca_vs_spca():
     assert_allclose(results_test_pca, results_test_spca)
 
 
-@pytest.mark.parametrize("spca", [SparsePCA, MiniBatchSparsePCA])
-def test_spca_deprecation_warning(spca):
-    rng = np.random.RandomState(0)
-    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)
-
-    warn_msg = "'normalize_components' has been deprecated in 0.22"
-    with pytest.warns(FutureWarning, match=warn_msg):
-        spca(normalize_components=True).fit(Y)
-
-
-@pytest.mark.parametrize("spca", [SparsePCA, MiniBatchSparsePCA])
-def test_spca_error_unormalized_components(spca):
-    rng = np.random.RandomState(0)
-    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)
-
-    err_msg = "normalize_components=False is not supported starting "
-    with pytest.raises(NotImplementedError, match=err_msg):
-        spca(normalize_components=False).fit(Y)
-
-
 @pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
 @pytest.mark.parametrize("n_components", [None, 3])
 def test_spca_n_components_(SPCA, n_components):
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 17b2c6cfd2e5d..cee7294ab5afd 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -17,9 +17,9 @@
 from .utils.random import _random_choice_csc
 from .utils.stats import _weighted_percentile
 from .utils.multiclass import class_distribution
-from .utils import deprecated
 from .utils.validation import _deprecate_positional_args
 
+
 class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
     """
     DummyClassifier is a classifier that makes predictions using simple rules.
@@ -33,7 +33,7 @@ class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
 
     Parameters
     ----------
-    strategy : str, default="stratified"
+    strategy : str, default="prior"
         Strategy to use to generate predictions.
 
         * "stratified": generates predictions by respecting the training
@@ -47,14 +47,9 @@ class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
           the user. This is useful for metrics that evaluate a non-majority
           class
 
-          .. versionchanged:: 0.22
-             The default value of `strategy` will change to "prior" in version
-             0.24. Starting from version 0.22, a warning will be raised if
-             `strategy` is not explicitly set.
-
-          .. versionadded:: 0.17
-             Dummy Classifier now supports prior fitting strategy using
-             parameter *prior*.
+          .. versionchanged:: 0.24
+             The default value of `strategy` has changed to "prior" in version
+             0.24.
 
     random_state : int, RandomState instance or None, optional, default=None
         Controls the randomness to generate the predictions when
@@ -99,7 +94,7 @@ class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
     0.75
     """
     @_deprecate_positional_args
-    def __init__(self, *, strategy="warn", random_state=None,
+    def __init__(self, *, strategy="prior", random_state=None,
                  constant=None):
         self.strategy = strategy
         self.random_state = random_state
@@ -126,16 +121,11 @@ def fit(self, X, y, sample_weight=None):
         allowed_strategies = ("most_frequent", "stratified", "uniform",
                               "constant", "prior")
 
-        # TODO: Remove in 0.24
-        if self.strategy == "warn":
-            warnings.warn("The default value of strategy will change from "
-                          "stratified to prior in 0.24.", FutureWarning)
-            self._strategy = "stratified"
-        elif self.strategy not in allowed_strategies:
+        if self.strategy not in allowed_strategies:
             raise ValueError("Unknown strategy type: %s, expected one of %s."
                              % (self.strategy, allowed_strategies))
-        else:
-            self._strategy = self.strategy
+
+        self._strategy = self.strategy
 
         if self._strategy == "uniform" and sp.issparse(y):
             y = y.toarray()
@@ -395,16 +385,6 @@ def score(self, X, y, sample_weight=None):
             X = np.zeros(shape=(len(y), 1))
         return super().score(X, y, sample_weight)
 
-    # mypy error: Decorated property not supported
-    @deprecated(  # type: ignore
-        "The outputs_2d_ attribute is deprecated in version 0.22 "
-        "and will be removed in version 0.24. It is equivalent to "
-        "n_outputs_ > 1."
-    )
-    @property
-    def outputs_2d_(self):
-        return self.n_outputs_ != 1
-
 
 class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     """
@@ -624,13 +604,3 @@ def score(self, X, y, sample_weight=None):
         if X is None:
             X = np.zeros(shape=(len(y), 1))
         return super().score(X, y, sample_weight)
-
-    # mypy error: Decorated property not supported
-    @deprecated(  # type: ignore
-        "The outputs_2d_ attribute is deprecated in version 0.22 "
-        "and will be removed in version 0.24. It is equivalent to "
-        "n_outputs_ > 1."
-    )
-    @property
-    def outputs_2d_(self):
-        return self.n_outputs_ != 1
diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py
index a91f28b0710b2..f360f1833714f 100644
--- a/sklearn/ensemble/_base.py
+++ b/sklearn/ensemble/_base.py
@@ -5,7 +5,6 @@
 
 from abc import ABCMeta, abstractmethod
 import numbers
-import warnings
 from typing import List
 
 import numpy as np
@@ -227,16 +226,7 @@ def _validate_estimators(self):
         # defined by MetaEstimatorMixin
         self._validate_names(names)
 
-        # FIXME: deprecate the usage of None to drop an estimator from the
-        # ensemble. Remove in 0.24
-        if any(est is None for est in estimators):
-            warnings.warn(
-                "Using 'None' to drop an estimator from the ensemble is "
-                "deprecated in 0.22 and support will be dropped in 0.24. "
-                "Use the string 'drop' instead.", FutureWarning
-            )
-
-        has_estimator = any(est not in (None, 'drop') for est in estimators)
+        has_estimator = any(est != 'drop' for est in estimators)
         if not has_estimator:
             raise ValueError(
                 "All estimators are dropped. At least one is required "
@@ -247,7 +237,7 @@ def _validate_estimators(self):
                              else is_regressor)
 
         for est in estimators:
-            if est not in (None, 'drop') and not is_estimator_type(est):
+            if est != 'drop' and not is_estimator_type(est):
                 raise ValueError(
                     "The estimator {} should be a {}.".format(
                         est.__class__.__name__, is_estimator_type.__name__[3:]
diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py
index 0c1bec9ebfb65..18f446d899bf1 100644
--- a/sklearn/ensemble/_iforest.py
+++ b/sklearn/ensemble/_iforest.py
@@ -93,20 +93,6 @@ class IsolationForest(OutlierMixin, BaseBagging):
         :obj:`joblib.parallel_backend` context. ``-1`` means using all
         processors. See :term:`Glossary <n_jobs>` for more details.
 
-    behaviour : str, default='deprecated'
-        This parameter has no effect, is deprecated, and will be removed.
-
-        .. versionadded:: 0.20
-           ``behaviour`` is added in 0.20 for back-compatibility purpose.
-
-        .. deprecated:: 0.20
-           ``behaviour='old'`` is deprecated in 0.20 and will not be possible
-           in 0.22.
-
-        .. deprecated:: 0.22
-           ``behaviour`` parameter is deprecated in 0.22 and removed in
-           0.24.
-
     random_state : int or RandomState, default=None
         Controls the pseudo-randomness of the selection of the feature
         and split values for each branching step and each tree in the forest.
@@ -192,7 +178,6 @@ def __init__(self, *,
                  max_features=1.,
                  bootstrap=False,
                  n_jobs=None,
-                 behaviour='deprecated',
                  random_state=None,
                  verbose=0,
                  warm_start=False):
@@ -212,7 +197,6 @@ def __init__(self, *,
             random_state=random_state,
             verbose=verbose)
 
-        self.behaviour = behaviour
         self.contamination = contamination
 
     def _set_oob_score(self, X, y):
@@ -247,19 +231,6 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
             Fitted estimator.
         """
-        if self.behaviour != 'deprecated':
-            if self.behaviour == 'new':
-                warn(
-                    "'behaviour' is deprecated in 0.22 and will be removed "
-                    "in 0.24. You should not pass or set this parameter.",
-                    FutureWarning
-                )
-            else:
-                raise NotImplementedError(
-                    "The old behaviour of IsolationForest is not implemented "
-                    "anymore. Remove the 'behaviour' parameter."
-                )
-
         X = check_array(X, accept_sparse=['csc'])
         if issparse(X):
             # Pre-sort indices to avoid that each individual tree of the
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index 5e595ca9eb113..09004d1a21dbb 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -53,7 +53,7 @@ def _weights_not_none(self):
         if self.weights is None:
             return None
         return [w for est, w in zip(self.estimators, self.weights)
-                if est[1] not in (None, 'drop')]
+                if est[1] != 'drop']
 
     def _predict(self, X):
         """Collect results from clf.predict calls."""
@@ -78,15 +78,15 @@ def fit(self, X, y, sample_weight=None):
                         message=self._log_message(names[idx],
                                                   idx + 1, len(clfs))
                 )
-                for idx, clf in enumerate(clfs) if clf not in (None, 'drop')
+                for idx, clf in enumerate(clfs) if clf != 'drop'
             )
 
         self.named_estimators_ = Bunch()
 
-        # Uses None or 'drop' as placeholder for dropped estimators
+        # Uses 'drop' as placeholder for dropped estimators
         est_iter = iter(self.estimators_)
         for name, est in self.estimators:
-            current_est = est if est in (None, 'drop') else next(est_iter)
+            current_est = est if est == 'drop' else next(est_iter)
             self.named_estimators_[name] = current_est
 
         return self
@@ -126,11 +126,8 @@ class VotingClassifier(ClassifierMixin, _BaseVoting):
         using ``set_params``.
 
         .. versionchanged:: 0.21
-            ``'drop'`` is accepted.
-
-        .. deprecated:: 0.22
-           Using ``None`` to drop an estimator is deprecated in 0.22 and
-           support will be dropped in 0.24. Use the string ``'drop'`` instead.
+            ``'drop'`` is accepted. Using None was deprecated in 0.22 and
+            support was removed in 0.24.
 
     voting : {'hard', 'soft'}, default='hard'
         If 'hard', uses predicted class labels for majority rule voting.
@@ -376,11 +373,8 @@ class VotingRegressor(RegressorMixin, _BaseVoting):
         ``set_params``.
 
         .. versionchanged:: 0.21
-            ``'drop'`` is accepted.
-
-        .. deprecated:: 0.22
-           Using ``None`` to drop an estimator is deprecated in 0.22 and
-           support will be dropped in 0.24. Use the string ``'drop'`` instead.
+            ``'drop'`` is accepted. Using None was deprecated in 0.22 and
+            support was removed in 0.24.
 
     weights : array-like of shape (n_regressors,), default=None
         Sequence of weights (`float` or `int`) to weight the occurrences of
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 3e8401332aeef..f87c8652a4368 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -16,7 +16,6 @@
 from sklearn.utils._testing import assert_warns
 from sklearn.utils._testing import assert_warns_message
 from sklearn.utils._testing import assert_raise_message
-from sklearn.utils._testing import ignore_warnings
 
 from sklearn.dummy import DummyClassifier, DummyRegressor
 from sklearn.model_selection import GridSearchCV, ParameterGrid
@@ -52,8 +51,6 @@
 diabetes.target = diabetes.target[perm]
 
 
-# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
-@ignore_warnings(category=FutureWarning)
 def test_classification():
     # Check classification for various parameter settings.
     rng = check_random_state(0)
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index c7653ddac959c..5461258887054 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -32,7 +32,6 @@
 from sklearn.utils._testing import assert_warns
 from sklearn.utils._testing import assert_warns_message
 from sklearn.utils._testing import skip_if_32bit
-from sklearn.utils._testing import ignore_warnings
 from sklearn.exceptions import DataConversionWarning
 from sklearn.exceptions import NotFittedError
 from sklearn.dummy import DummyClassifier, DummyRegressor
@@ -1295,8 +1294,6 @@ def _make_multiclass():
     return make_classification(n_classes=3, n_clusters_per_class=1)
 
 
-# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
-@ignore_warnings(category=FutureWarning)
 @pytest.mark.parametrize(
     "gb, dataset_maker, init_estimator",
     [(GradientBoostingClassifier, make_classification, DummyClassifier),
diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py
index aeb384ab44503..46174728c421c 100644
--- a/sklearn/ensemble/tests/test_iforest.py
+++ b/sklearn/ensemble/tests/test_iforest.py
@@ -124,11 +124,6 @@ def test_iforest_error():
     # test X_test n_features match X_train one:
     assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:])
 
-    # test that behaviour='old' will raise an error
-    msg = "The old behaviour of IsolationForest is not implemented anymore."
-    with pytest.raises(NotImplementedError, match=msg):
-        IsolationForest(behaviour='old').fit(X)
-
 
 def test_recalculate_max_depth():
     """Check max_depth recalculation when max_samples is reset to n_samples"""
@@ -317,13 +312,6 @@ def test_iforest_chunks_works2(
     assert mocked_get_chunk.call_count == n_predict_calls
 
 
-def test_iforest_deprecation():
-    iforest = IsolationForest(behaviour='new')
-    warn_msg = "'behaviour' is deprecated in 0.22 and will be removed in 0.24"
-    with pytest.warns(FutureWarning, match=warn_msg):
-        iforest.fit(iris.data)
-
-
 def test_iforest_with_uniform_data():
     """Test whether iforest predicts inliers when using uniform data"""
 
diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
index f81b9e59a5f1b..7e8852f2d0f19 100644
--- a/sklearn/ensemble/tests/test_voting.py
+++ b/sklearn/ensemble/tests/test_voting.py
@@ -358,11 +358,8 @@ def test_voting_classifier_set_params():
     assert eclf2.estimators[1][1].get_params() == clf2.get_params()
 
 
-# TODO: Remove parametrization in 0.24 when None is removed in Voting*
-@pytest.mark.parametrize("drop", [None, 'drop'])
-def test_set_estimator_none(drop):
-    """VotingClassifier set_params should be able to set estimators as None or
-    drop"""
+def test_set_estimator_drop():
+    # VotingClassifier set_params should be able to set estimators as drop
     # Test predict
     clf1 = LogisticRegression(random_state=123)
     clf2 = RandomForestClassifier(n_estimators=10, random_state=123)
@@ -375,27 +372,27 @@ def test_set_estimator_none(drop):
                                          ('nb', clf3)],
                              voting='hard', weights=[1, 1, 0.5])
     with pytest.warns(None) as record:
-        eclf2.set_params(rf=drop).fit(X, y)
-    assert record if drop is None else not record
+        eclf2.set_params(rf='drop').fit(X, y)
+    assert not record
     assert_array_equal(eclf1.predict(X), eclf2.predict(X))
 
-    assert dict(eclf2.estimators)["rf"] is drop
+    assert dict(eclf2.estimators)["rf"] == 'drop'
     assert len(eclf2.estimators_) == 2
     assert all(isinstance(est, (LogisticRegression, GaussianNB))
                for est in eclf2.estimators_)
-    assert eclf2.get_params()["rf"] is drop
+    assert eclf2.get_params()["rf"] == 'drop'
 
     eclf1.set_params(voting='soft').fit(X, y)
     with pytest.warns(None) as record:
         eclf2.set_params(voting='soft').fit(X, y)
-    assert record if drop is None else not record
+    assert not record
     assert_array_equal(eclf1.predict(X), eclf2.predict(X))
     assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
     msg = 'All estimators are dropped. At least one is required'
     with pytest.warns(None) as record:
         with pytest.raises(ValueError, match=msg):
-            eclf2.set_params(lr=drop, rf=drop, nb=drop).fit(X, y)
-    assert record if drop is None else not record
+            eclf2.set_params(lr='drop', rf='drop', nb='drop').fit(X, y)
+    assert not record
 
     # Test soft voting transform
     X1 = np.array([[1], [2]])
@@ -408,8 +405,8 @@ def test_set_estimator_none(drop):
                              voting='soft', weights=[1, 0.5],
                              flatten_transform=False)
     with pytest.warns(None) as record:
-        eclf2.set_params(rf=drop).fit(X1, y1)
-    assert record if drop is None else not record
+        eclf2.set_params(rf='drop').fit(X1, y1)
+    assert not record
     assert_array_almost_equal(eclf1.transform(X1),
                               np.array([[[0.7, 0.3], [0.3, 0.7]],
                                         [[1., 0.], [0., 1.]]]))
@@ -470,7 +467,6 @@ def test_transform():
     )
 
 
-# TODO: Remove drop=None in 0.24 when None is removed in Voting*
 @pytest.mark.parametrize(
     "X, y, voter",
     [(X, y, VotingClassifier(
@@ -480,19 +476,16 @@ def test_transform():
          [('lr', LinearRegression()),
           ('rf', RandomForestRegressor(n_estimators=5))]))]
 )
-@pytest.mark.parametrize("drop", [None, 'drop'])
-def test_none_estimator_with_weights(X, y, voter, drop):
-    # TODO: remove the parametrization on 'drop' when support for None is
-    # removed.
+def test_none_estimator_with_weights(X, y, voter):
     # check that an estimator can be set to 'drop' and passing some weight
     # regression test for
     # https://github.com/scikit-learn/scikit-learn/issues/13777
     voter = clone(voter)
     voter.fit(X, y, sample_weight=np.ones(y.shape))
-    voter.set_params(lr=drop)
+    voter.set_params(lr='drop')
     with pytest.warns(None) as record:
         voter.fit(X, y, sample_weight=np.ones(y.shape))
-    assert record if drop is None else not record
+    assert not record
     y_pred = voter.predict(X)
     assert y_pred.shape == y.shape
 
@@ -555,20 +548,3 @@ def test_voting_verbose(estimator, capsys):
 
     estimator.fit(X, y)
     assert re.match(pattern, capsys.readouterr()[0])
-
-
-# TODO: Remove in 0.24 when None is removed in Voting*
-@pytest.mark.parametrize(
-    "Voter, BaseEstimator",
-    [(VotingClassifier, DecisionTreeClassifier),
-     (VotingRegressor, DecisionTreeRegressor)]
-)
-def test_deprecate_none_transformer(Voter, BaseEstimator):
-    est = Voter(estimators=[('lr', None),
-                            ('tree', BaseEstimator(random_state=0))])
-
-    msg = ("Using 'None' to drop an estimator from the ensemble is "
-           "deprecated in 0.22 and support will be dropped in 0.24. "
-           "Use the string 'drop' instead.")
-    with pytest.warns(FutureWarning, match=msg):
-        est.fit(X, y)
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index 5b64bc6b7c7a0..f7c0bad5af193 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -12,7 +12,6 @@
 from sklearn.utils._testing import assert_array_equal, assert_array_less
 from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_raises, assert_raises_regexp
-from sklearn.utils._testing import ignore_warnings
 
 from sklearn.base import BaseEstimator
 from sklearn.base import clone
@@ -500,8 +499,6 @@ def test_multidimensional_X():
     boost.predict(X)
 
 
-# TODO: Remove in 0.24 when DummyClassifier's `strategy` default changes
-@ignore_warnings
 @pytest.mark.parametrize("algorithm", ['SAMME', 'SAMME.R'])
 def test_adaboostclassifier_without_sample_weight(algorithm):
     X, y = iris.data, iris.target
diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index a58f649946aa6..930559fc55def 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -15,7 +15,7 @@
 from scipy import sparse
 from numpy.lib.stride_tricks import as_strided
 
-from ..utils import check_array, check_random_state, deprecated
+from ..utils import check_array, check_random_state
 from ..utils.validation import _deprecate_positional_args
 from ..base import BaseEstimator
 
@@ -305,47 +305,6 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
     return patches
 
 
-@deprecated("The function feature_extraction.image.extract_patches has been "
-            "deprecated in 0.22 and will be removed in 0.24.")
-def extract_patches(arr, patch_shape=8, extraction_step=1):
-    """Extracts patches of any n-dimensional array in place using strides.
-
-    Given an n-dimensional array it will return a 2n-dimensional array with
-    the first n dimensions indexing patch position and the last n indexing
-    the patch content. This operation is immediate (O(1)). A reshape
-    performed on the first n dimensions will cause numpy to copy data, leading
-    to a list of extracted patches.
-
-    Read more in the :ref:`User Guide <image_feature_extraction>`.
-
-    Parameters
-    ----------
-    arr : ndarray
-        n-dimensional array of which patches are to be extracted
-
-    patch_shape : int or tuple of length arr.ndim, default=8
-        Indicates the shape of the patches to be extracted. If an
-        integer is given, the shape will be a hypercube of
-        sidelength given by its value.
-
-    extraction_step : int or tuple of length arr.ndim, default=1
-        Indicates step size at which extraction shall be performed.
-        If integer is given, then the step is uniform in all dimensions.
-
-
-    Returns
-    -------
-    patches : strided ndarray
-        2n-dimensional array indexing patches on first n dimensions and
-        containing patches on the last n dimensions. These dimensions
-        are fake, but this way no data is copied. A simple reshape invokes
-        a copying operation to obtain a list of patches:
-        result.reshape([-1] + list(patch_shape))
-    """
-    return _extract_patches(arr, patch_shape=patch_shape,
-                            extraction_step=extraction_step)
-
-
 @_deprecate_positional_args
 def extract_patches_2d(image, patch_size, *, max_patches=None,
                        random_state=None):
diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py
index 7d744e361c8b8..6dbf73c16ce80 100644
--- a/sklearn/feature_extraction/tests/test_image.py
+++ b/sklearn/feature_extraction/tests/test_image.py
@@ -10,8 +10,7 @@
 
 from sklearn.feature_extraction.image import (
     img_to_graph, grid_to_graph, extract_patches_2d,
-    reconstruct_from_patches_2d, PatchExtractor, _extract_patches,
-    extract_patches)
+    reconstruct_from_patches_2d, PatchExtractor, _extract_patches)
 from sklearn.utils._testing import ignore_warnings
 
 
@@ -334,11 +333,3 @@ def test_width_patch():
         extract_patches_2d(x, (4, 1))
     with pytest.raises(ValueError):
         extract_patches_2d(x, (1, 4))
-
-
-# TODO: Remove in 0.24
-def test_extract_patches_deprecated():
-    msg = ("The function feature_extraction.image.extract_patches has been "
-           "deprecated in 0.22 and will be removed in 0.24.")
-    with pytest.warns(FutureWarning, match=msg):
-        extract_patches(downsampled_face)
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 86ae2fd6c149e..c65f25c2e7329 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -13,7 +13,6 @@
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.feature_extraction.text import TfidfTransformer
 from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.feature_extraction.text import VectorizerMixin
 
 from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
 
@@ -522,18 +521,6 @@ def test_tfidf_vectorizer_setters():
     assert tv._tfidf.sublinear_tf
 
 
-# FIXME Remove copy parameter support in 0.24
-def test_tfidf_vectorizer_deprecationwarning():
-    msg = ("'copy' param is unused and has been deprecated since "
-           "version 0.22. Backward compatibility for 'copy' will "
-           "be removed in 0.24.")
-    with pytest.warns(FutureWarning, match=msg):
-        tv = TfidfVectorizer()
-        train_data = JUNK_FOOD_DOCS
-        tv.fit(train_data)
-        tv.transform(train_data, copy=True)
-
-
 @fails_if_pypy
 def test_hashing_vectorizer():
     v = HashingVectorizer()
@@ -1352,14 +1339,3 @@ def test_n_features_in(Vectorizer, X):
     assert not hasattr(vectorizer, 'n_features_in_')
     vectorizer.fit(X)
     assert not hasattr(vectorizer, 'n_features_in_')
-
-
-# TODO: Remove in 0.24
-def test_vectorizermixin_is_deprecated():
-    class MyVectorizer(VectorizerMixin):
-        pass
-
-    msg = ("VectorizerMixin is deprecated in version 0.22 and will be removed "
-           "in version 0.24.")
-    with pytest.warns(FutureWarning, match=msg):
-        MyVectorizer()
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 661f638b000fc..f8aa3cc786fff 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -30,7 +30,7 @@
 from ._hash import FeatureHasher
 from ._stop_words import ENGLISH_STOP_WORDS
 from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES
-from ..utils import _IS_32BIT, deprecated
+from ..utils import _IS_32BIT
 from ..utils.fixes import _astype_copy_false
 from ..exceptions import NotFittedError
 from ..utils.validation import _deprecate_positional_args
@@ -505,12 +505,6 @@ def _warn_for_unused_params(self):
                               " since 'analyzer' != 'word'")
 
 
-@deprecated("VectorizerMixin is deprecated in version "
-            "0.22 and will be removed in version 0.24.")
-class VectorizerMixin(_VectorizerMixin):
-    pass
-
-
 class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
     """Convert a collection of text documents to a matrix of token occurrences
 
@@ -1843,7 +1837,7 @@ def fit_transform(self, raw_documents, y=None):
         # we set copy to False
         return self._tfidf.transform(X, copy=False)
 
-    def transform(self, raw_documents, copy="deprecated"):
+    def transform(self, raw_documents):
         """Transform documents to document-term matrix.
 
         Uses the vocabulary and document frequencies (df) learned by fit (or
@@ -1854,15 +1848,6 @@ def transform(self, raw_documents, copy="deprecated"):
         raw_documents : iterable
             An iterable which yields either str, unicode or file objects.
 
-        copy : bool, default=True
-            Whether to copy X and operate on the copy or perform in-place
-            operations.
-
-            .. deprecated:: 0.22
-               The `copy` parameter is unused and was deprecated in version
-               0.22 and will be removed in 0.24. This parameter will be
-               ignored.
-
         Returns
         -------
         X : sparse matrix of (n_samples, n_features)
@@ -1870,12 +1855,6 @@ def transform(self, raw_documents, copy="deprecated"):
         """
         check_is_fitted(self, msg='The TF-IDF vectorizer is not fitted')
 
-        # FIXME Remove copy parameter support in 0.24
-        if copy != "deprecated":
-            msg = ("'copy' param is unused and has been deprecated since "
-                   "version 0.22. Backward compatibility for 'copy' will "
-                   "be removed in 0.24.")
-            warnings.warn(msg, FutureWarning)
         X = super().transform(raw_documents)
         return self._tfidf.transform(X, copy=False)
 
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index bf48aac36d846..a43b52c8627ec 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -23,7 +23,6 @@
 from collections import namedtuple
 import math
 from inspect import signature
-import warnings
 
 import numpy as np
 from scipy.special import kv, gamma
@@ -159,16 +158,8 @@ def get_params(self, deep=True):
                                " %s doesn't follow this convention."
                                % (cls, ))
         for arg in args:
-            try:
-                value = getattr(self, arg)
-            except AttributeError:
-                warnings.warn('From version 0.24, get_params will raise an '
-                              'AttributeError if a parameter cannot be '
-                              'retrieved as an instance attribute. Previously '
-                              'it would return None.',
-                              FutureWarning)
-                value = None
-            params[arg] = value
+            params[arg] = getattr(self, arg)
+
         return params
 
     def set_params(self, **params):
diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py
index 9e2248a66ee28..4627117677c8b 100644
--- a/sklearn/gaussian_process/tests/test_kernels.py
+++ b/sklearn/gaussian_process/tests/test_kernels.py
@@ -14,7 +14,7 @@
 from sklearn.gaussian_process.kernels \
     import (RBF, Matern, RationalQuadratic, ExpSineSquared, DotProduct,
             ConstantKernel, WhiteKernel, PairwiseKernel, KernelOperator,
-            Exponentiation, Kernel, CompoundKernel)
+            Exponentiation, CompoundKernel)
 from sklearn.base import clone
 
 from sklearn.utils._testing import (assert_almost_equal, assert_array_equal,
@@ -356,27 +356,6 @@ def test_repr_kernels(kernel):
     repr(kernel)
 
 
-def test_warns_on_get_params_non_attribute():
-    class MyKernel(Kernel):
-        def __init__(self, param=5):
-            pass
-
-        def __call__(self, X, Y=None, eval_gradient=False):
-            return X
-
-        def diag(self, X):
-            return np.ones(X.shape[0])
-
-        def is_stationary(self):
-            return False
-
-    est = MyKernel()
-    with pytest.warns(FutureWarning, match='AttributeError'):
-        params = est.get_params()
-
-    assert params['param'] is None
-
-
 def test_rational_quadratic_kernel():
     kernel = RationalQuadratic(length_scale=[1., 1.])
     assert_raise_message(AttributeError,
diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py
index e02717e76dce3..2d0713e890fba 100644
--- a/sklearn/inspection/_plot/partial_dependence.py
+++ b/sklearn/inspection/_plot/partial_dependence.py
@@ -1,7 +1,6 @@
 import numbers
 from itertools import chain
 from itertools import count
-import warnings
 
 import numpy as np
 from scipy import sparse
@@ -20,7 +19,7 @@
 def plot_partial_dependence(estimator, X, features, *, feature_names=None,
                             target=None, response_method='auto', n_cols=3,
                             grid_resolution=100, percentiles=(0.05, 0.95),
-                            method='auto', n_jobs=None, verbose=0, fig=None,
+                            method='auto', n_jobs=None, verbose=0,
                             line_kw=None, contour_kw=None, ax=None):
     """Partial dependence plots.
 
@@ -154,13 +153,6 @@ def plot_partial_dependence(estimator, X, features, *, feature_names=None,
     verbose : int, optional (default=0)
         Verbose output during PD computations.
 
-    fig : Matplotlib figure object, optional (default=None)
-        A figure object onto which the plots will be drawn, after the figure
-        has been cleared. By default, a new one is created.
-
-        .. deprecated:: 0.22
-           ``fig`` will be removed in 0.24.
-
     line_kw : dict, optional
         Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
         For one-way partial dependence plots.
@@ -317,13 +309,6 @@ def convert_feature(fx):
             X_col = _safe_indexing(X, fx, axis=1)
             deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))
 
-    if fig is not None:
-        warnings.warn("The fig parameter is deprecated in version "
-                      "0.22 and will be removed in version 0.24",
-                      FutureWarning)
-        fig.clear()
-        ax = fig.gca()
-
     display = PartialDependenceDisplay(pd_results=pd_results,
                                        features=features,
                                        feature_names=feature_names,
diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
index 3f82f0846909e..e07707b922400 100644
--- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
+++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
@@ -457,21 +457,3 @@ def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg):
 
     with pytest.raises(ValueError, match=err_msg):
         plot_partial_dependence(clf, iris.data, **params)
-
-
-def test_plot_partial_dependence_fig_deprecated(pyplot):
-    # Make sure fig object is correctly used if not None
-    X, y = make_regression(n_samples=50, random_state=0)
-    clf = LinearRegression()
-    clf.fit(X, y)
-
-    fig = pyplot.figure()
-    grid_resolution = 25
-
-    msg = ("The fig parameter is deprecated in version 0.22 and will be "
-           "removed in version 0.24")
-    with pytest.warns(FutureWarning, match=msg):
-        plot_partial_dependence(
-            clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)
-
-    assert pyplot.gcf() is fig
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index c1f7bb86a7fcf..0a10e5f23f562 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -531,11 +531,7 @@ def test_ridge_gcv_sample_weights(
     kfold = RidgeCV(
         alphas=alphas, cv=splits, scoring='neg_mean_squared_error',
         fit_intercept=fit_intercept)
-    # ignore warning from GridSearchCV: FutureWarning: The default
-    # of the `iid` parameter will change from True to False in version 0.22
-    # and will be removed in 0.24
-    with ignore_warnings(category=FutureWarning):
-        kfold.fit(X_tiled, y_tiled)
+    kfold.fit(X_tiled, y_tiled)
 
     ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept)
     splits = cv.split(X_tiled, y_tiled, groups=indices)
diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py
index 3229522d21c6e..1a9ac3862e26c 100644
--- a/sklearn/manifold/_isomap.py
+++ b/sklearn/manifold/_isomap.py
@@ -6,7 +6,6 @@
 import numpy as np
 from ..base import BaseEstimator, TransformerMixin
 from ..neighbors import NearestNeighbors, kneighbors_graph
-from ..utils.deprecation import deprecated
 from ..utils.validation import check_is_fitted
 from ..utils.validation import _deprecate_positional_args
 from ..utils.graph import graph_shortest_path
@@ -168,16 +167,6 @@ def _fit_transform(self, X):
 
         self.embedding_ = self.kernel_pca_.fit_transform(G)
 
-    # mypy error: Decorated property not supported
-    @deprecated(  # type: ignore
-        "Attribute `training_data_` was deprecated in version 0.22 and"
-        " will be removed in 0.24."
-    )
-    @property
-    def training_data_(self):
-        check_is_fitted(self)
-        return self.nbrs_._fit_X
-
     def reconstruction_error(self):
         """Compute the reconstruction error for the embedding.
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 400e92c158ca8..f116f76edb260 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -21,7 +21,6 @@
 from collections.abc import Iterable
 from functools import partial
 from collections import Counter
-import warnings
 
 import numpy as np
 
@@ -127,9 +126,6 @@ def __init__(self, score_func, sign, kwargs):
         self._kwargs = kwargs
         self._score_func = score_func
         self._sign = sign
-        # XXX After removing the deprecated scorers (v0.24) remove the
-        # XXX deprecation_msg property again and remove __call__'s body again
-        self._deprecation_msg = None
 
     def __repr__(self):
         kwargs_string = "".join([", %s=%s" % (str(k), str(v))
@@ -162,10 +158,6 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
         score : float
             Score function applied to prediction of estimator on X.
         """
-        if self._deprecation_msg is not None:
-            warnings.warn(self._deprecation_msg,
-                          category=FutureWarning,
-                          stacklevel=2)
         return self._score(partial(_cached_call, None), estimator, X, y_true,
                            sample_weight=sample_weight)
 
@@ -353,11 +345,7 @@ def get_scorer(scoring):
     """
     if isinstance(scoring, str):
         try:
-            if scoring == 'brier_score_loss':
-                # deprecated
-                scorer = brier_score_loss_scorer
-            else:
-                scorer = SCORERS[scoring]
+            scorer = SCORERS[scoring]
         except KeyError:
             raise ValueError('%r is not a valid scoring value. '
                              'Use sorted(sklearn.metrics.SCORERS.keys()) '
@@ -668,10 +656,6 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
 brier_score_loss_scorer = make_scorer(brier_score_loss,
                                       greater_is_better=False,
                                       needs_proba=True)
-deprecation_msg = ('Scoring method brier_score_loss was renamed to '
-                   'neg_brier_score in version 0.22 and will '
-                   'be removed in 0.24.')
-brier_score_loss_scorer._deprecation_msg = deprecation_msg
 
 
 # Clustering scores
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 189d36ae88328..f4ef238983c41 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -553,17 +553,6 @@ def test_scoring_is_not_metric():
         check_scoring(KMeans(), scoring=cluster_module.adjusted_rand_score)
 
 
-def test_deprecated_scorer():
-    X, y = make_blobs(random_state=0, centers=2)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    clf = DecisionTreeClassifier()
-    clf.fit(X_train, y_train)
-
-    deprecated_scorer = get_scorer('brier_score_loss')
-    with pytest.warns(FutureWarning):
-        deprecated_scorer(clf, X_test, y_test)
-
-
 @pytest.mark.parametrize(
     ("scorers,expected_predict_count,"
      "expected_predict_proba_count,expected_decision_func_count"),
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index b706487b65343..c3349cc967abe 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -404,14 +404,13 @@ class BaseSearchCV(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):
     @abstractmethod
     @_deprecate_positional_args
     def __init__(self, estimator, *, scoring=None, n_jobs=None,
-                 iid='deprecated', refit=True, cv=None, verbose=0,
+                 refit=True, cv=None, verbose=0,
                  pre_dispatch='2*n_jobs', error_score=np.nan,
                  return_train_score=True):
 
         self.scoring = scoring
         self.estimator = estimator
         self.n_jobs = n_jobs
-        self.iid = iid
         self.refit = refit
         self.cv = cv
         self.verbose = verbose
@@ -843,20 +842,11 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
         test_sample_counts = np.array(test_sample_counts[:n_splits],
                                       dtype=np.int)
 
-        if self.iid != 'deprecated':
-            warnings.warn(
-                "The parameter 'iid' is deprecated in 0.22 and will be "
-                "removed in 0.24.", FutureWarning
-            )
-            iid = self.iid
-        else:
-            iid = False
-
         for scorer_name in scorers.keys():
             # Computed the (weighted) mean and std for test scores alone
             _store('test_%s' % scorer_name, test_scores[scorer_name],
                    splits=True, rank=True,
-                   weights=test_sample_counts if iid else None)
+                   weights=None)
             if self.return_train_score:
                 _store('train_%s' % scorer_name, train_scores[scorer_name],
                        splits=True)
@@ -934,15 +924,6 @@ class GridSearchCV(BaseSearchCV):
             - A str, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    iid : bool, default=False
-        If True, return the average score across folds, weighted by the number
-        of samples in each test set. In this case, the data is assumed to be
-        identically distributed across the folds, and the loss minimized is
-        the total loss per sample, and not the mean loss across the folds.
-
-        .. deprecated:: 0.22
-            Parameter ``iid`` is deprecated in 0.22 and will be removed in 0.24
-
     cv : int, cross-validation generator or an iterable, default=None
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -1172,12 +1153,12 @@ class GridSearchCV(BaseSearchCV):
 
     @_deprecate_positional_args
     def __init__(self, estimator, param_grid, *, scoring=None,
-                 n_jobs=None, iid='deprecated', refit=True, cv=None,
+                 n_jobs=None, refit=True, cv=None,
                  verbose=0, pre_dispatch='2*n_jobs',
                  error_score=np.nan, return_train_score=False):
         super().__init__(
             estimator=estimator, scoring=scoring,
-            n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
+            n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
         self.param_grid = param_grid
@@ -1275,15 +1256,6 @@ class RandomizedSearchCV(BaseSearchCV):
             - A str, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    iid : bool, default=False
-        If True, return the average score across folds, weighted by the number
-        of samples in each test set. In this case, the data is assumed to be
-        identically distributed across the folds, and the loss minimized is
-        the total loss per sample, and not the mean loss across the folds.
-
-        .. deprecated:: 0.22
-            Parameter ``iid`` is deprecated in 0.22 and will be removed in 0.24
-
     cv : int, cross-validation generator or an iterable, default=None
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -1511,7 +1483,7 @@ class RandomizedSearchCV(BaseSearchCV):
 
     @_deprecate_positional_args
     def __init__(self, estimator, param_distributions, *, n_iter=10,
-                 scoring=None, n_jobs=None, iid='deprecated', refit=True,
+                 scoring=None, n_jobs=None, refit=True,
                  cv=None, verbose=0, pre_dispatch='2*n_jobs',
                  random_state=None, error_score=np.nan,
                  return_train_score=False):
@@ -1520,7 +1492,7 @@ def __init__(self, estimator, param_distributions, *, n_iter=10,
         self.random_state = random_state
         super().__init__(
             estimator=estimator, scoring=scoring,
-            n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
+            n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
 
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 6684fad038786..259eef782bb99 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -289,12 +289,10 @@ def __init__(self, n_splits, *, shuffle, random_state):
                             " got {0}".format(shuffle))
 
         if not shuffle and random_state is not None:  # None is the default
-            # TODO 0.24: raise a ValueError instead of a warning
-            warnings.warn(
+            raise ValueError(
                 'Setting a random_state has no effect since shuffle is '
-                'False. This will raise an error in 0.24. You should leave '
+                'False. You should leave '
                 'random_state to its default (None), or set shuffle=True.',
-                FutureWarning
             )
 
         self.n_splits = n_splits
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 1673040f96bc6..24b71143f0e83 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -35,7 +35,6 @@
 from sklearn.datasets import make_multilabel_classification
 
 from sklearn.model_selection import fit_grid_point
-from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import KFold
 from sklearn.model_selection import StratifiedKFold
@@ -845,7 +844,6 @@ def check_cv_results_keys(cv_results, param_keys, score_keys, n_cand):
                for key in param_keys + score_keys)
 
 
-@pytest.mark.filterwarnings("ignore:The parameter 'iid' is deprecated")  # 0.24
 def test_grid_search_cv_results():
     X, y = make_classification(n_samples=50, n_features=4,
                                random_state=42)
@@ -867,38 +865,35 @@ def test_grid_search_cv_results():
                   'mean_score_time', 'std_score_time')
     n_candidates = n_grid_points
 
-    for iid in (False, True):
-        search = GridSearchCV(SVC(), cv=n_splits, iid=iid,
-                              param_grid=params, return_train_score=True)
-        search.fit(X, y)
-        assert iid == search.iid
-        cv_results = search.cv_results_
-        # Check if score and timing are reasonable
-        assert all(cv_results['rank_test_score'] >= 1)
-        assert (all(cv_results[k] >= 0) for k in score_keys
-                if k != 'rank_test_score')
-        assert (all(cv_results[k] <= 1) for k in score_keys
-                if 'time' not in k and
-                k != 'rank_test_score')
-        # Check cv_results structure
-        check_cv_results_array_types(search, param_keys, score_keys)
-        check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
-        # Check masking
-        cv_results = search.cv_results_
-        n_candidates = len(search.cv_results_['params'])
-        assert all((cv_results['param_C'].mask[i] and
-                    cv_results['param_gamma'].mask[i] and
-                    not cv_results['param_degree'].mask[i])
-                   for i in range(n_candidates)
-                   if cv_results['param_kernel'][i] == 'linear')
-        assert all((not cv_results['param_C'].mask[i] and
-                    not cv_results['param_gamma'].mask[i] and
-                    cv_results['param_degree'].mask[i])
-                   for i in range(n_candidates)
-                   if cv_results['param_kernel'][i] == 'rbf')
-
-
-@pytest.mark.filterwarnings("ignore:The parameter 'iid' is deprecated")  # 0.24
+    search = GridSearchCV(SVC(), cv=n_splits, param_grid=params,
+                          return_train_score=True)
+    search.fit(X, y)
+    cv_results = search.cv_results_
+    # Check if score and timing are reasonable
+    assert all(cv_results['rank_test_score'] >= 1)
+    assert (all(cv_results[k] >= 0) for k in score_keys
+            if k != 'rank_test_score')
+    assert (all(cv_results[k] <= 1) for k in score_keys
+            if 'time' not in k and
+            k != 'rank_test_score')
+    # Check cv_results structure
+    check_cv_results_array_types(search, param_keys, score_keys)
+    check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
+    # Check masking
+    cv_results = search.cv_results_
+    n_candidates = len(search.cv_results_['params'])
+    assert all((cv_results['param_C'].mask[i] and
+                cv_results['param_gamma'].mask[i] and
+                not cv_results['param_degree'].mask[i])
+               for i in range(n_candidates)
+               if cv_results['param_kernel'][i] == 'linear')
+    assert all((not cv_results['param_C'].mask[i] and
+                not cv_results['param_gamma'].mask[i] and
+                cv_results['param_degree'].mask[i])
+               for i in range(n_candidates)
+               if cv_results['param_kernel'][i] == 'rbf')
+
+
 def test_random_search_cv_results():
     X, y = make_classification(n_samples=50, n_features=4, random_state=42)
 
@@ -920,28 +915,26 @@ def test_random_search_cv_results():
                   'mean_score_time', 'std_score_time')
     n_cand = n_search_iter
 
-    for iid in (False, True):
-        search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
-                                    cv=n_splits, iid=iid,
-                                    param_distributions=params,
-                                    return_train_score=True)
-        search.fit(X, y)
-        assert iid == search.iid
-        cv_results = search.cv_results_
-        # Check results structure
-        check_cv_results_array_types(search, param_keys, score_keys)
-        check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
-        n_candidates = len(search.cv_results_['params'])
-        assert all((cv_results['param_C'].mask[i] and
-                    cv_results['param_gamma'].mask[i] and
-                    not cv_results['param_degree'].mask[i])
-                   for i in range(n_candidates)
-                   if cv_results['param_kernel'][i] == 'linear')
-        assert all((not cv_results['param_C'].mask[i] and
-                    not cv_results['param_gamma'].mask[i] and
-                    cv_results['param_degree'].mask[i])
-                   for i in range(n_candidates)
-                   if cv_results['param_kernel'][i] == 'rbf')
+    search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
+                                cv=n_splits,
+                                param_distributions=params,
+                                return_train_score=True)
+    search.fit(X, y)
+    cv_results = search.cv_results_
+    # Check results structure
+    check_cv_results_array_types(search, param_keys, score_keys)
+    check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
+    n_candidates = len(search.cv_results_['params'])
+    assert all((cv_results['param_C'].mask[i] and
+                cv_results['param_gamma'].mask[i] and
+                not cv_results['param_degree'].mask[i])
+               for i in range(n_candidates)
+               if cv_results['param_kernel'][i] == 'linear')
+    assert all((not cv_results['param_C'].mask[i] and
+                not cv_results['param_gamma'].mask[i] and
+                cv_results['param_degree'].mask[i])
+               for i in range(n_candidates)
+               if cv_results['param_kernel'][i] == 'rbf')
 
 
 @pytest.mark.parametrize(
@@ -951,7 +944,7 @@ def test_random_search_cv_results():
       {'param_distributions': {'C': [1, 10]}, 'n_iter': 2})]
 )
 def test_search_default_iid(SearchCV, specialized_params):
-    # Test the IID parameter
+    # Test the IID parameter  TODO: Clearly this test does something else???
     # noise-free simple 2d-data
     X, y = make_blobs(centers=[[0, 0], [1, 0], [0, 1], [1, 1]], random_state=0,
                       cluster_std=0.1, shuffle=False, n_samples=80)
@@ -998,105 +991,6 @@ def test_search_default_iid(SearchCV, specialized_params):
     assert train_std == pytest.approx(0)
 
 
-@pytest.mark.filterwarnings("ignore:The parameter 'iid' is deprecated")  # 0.24
-def test_search_iid_param():
-    # Test the IID parameter
-    # noise-free simple 2d-data
-    X, y = make_blobs(centers=[[0, 0], [1, 0], [0, 1], [1, 1]], random_state=0,
-                      cluster_std=0.1, shuffle=False, n_samples=80)
-    # split dataset into two folds that are not iid
-    # first one contains data of all 4 blobs, second only from two.
-    mask = np.ones(X.shape[0], dtype=np.bool)
-    mask[np.where(y == 1)[0][::2]] = 0
-    mask[np.where(y == 2)[0][::2]] = 0
-    # this leads to perfect classification on one fold and a score of 1/3 on
-    # the other
-    # create "cv" for splits
-    cv = [[mask, ~mask], [~mask, mask]]
-    # once with iid=True (default)
-    grid_search = GridSearchCV(SVC(gamma='auto'), param_grid={'C': [1, 10]},
-                               cv=cv, return_train_score=True, iid=True)
-    random_search = RandomizedSearchCV(SVC(gamma='auto'), n_iter=2,
-                                       param_distributions={'C': [1, 10]},
-                                       cv=cv, iid=True,
-                                       return_train_score=True)
-    for search in (grid_search, random_search):
-        search.fit(X, y)
-        assert search.iid or search.iid is None
-
-        test_cv_scores = np.array(list(search.cv_results_['split%d_test_score'
-                                                          % s_i][0]
-                                       for s_i in range(search.n_splits_)))
-        test_mean = search.cv_results_['mean_test_score'][0]
-        test_std = search.cv_results_['std_test_score'][0]
-
-        train_cv_scores = np.array(list(search.cv_results_['split%d_train_'
-                                                           'score' % s_i][0]
-                                        for s_i in range(search.n_splits_)))
-        train_mean = search.cv_results_['mean_train_score'][0]
-        train_std = search.cv_results_['std_train_score'][0]
-
-        # Test the first candidate
-        assert search.cv_results_['param_C'][0] == 1
-        assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
-        assert_array_almost_equal(train_cv_scores, [1, 1])
-
-        # for first split, 1/4 of dataset is in test, for second 3/4.
-        # take weighted average and weighted std
-        expected_test_mean = 1 * 1. / 4. + 1. / 3. * 3. / 4.
-        expected_test_std = np.sqrt(1. / 4 * (expected_test_mean - 1) ** 2 +
-                                    3. / 4 * (expected_test_mean - 1. / 3.) **
-                                    2)
-        assert_almost_equal(test_mean, expected_test_mean)
-        assert_almost_equal(test_std, expected_test_std)
-        assert_array_almost_equal(test_cv_scores,
-                                  cross_val_score(SVC(C=1, gamma='auto'), X,
-                                                  y, cv=cv))
-
-        # For the train scores, we do not take a weighted mean irrespective of
-        # i.i.d. or not
-        assert_almost_equal(train_mean, 1)
-        assert_almost_equal(train_std, 0)
-
-    # once with iid=False
-    grid_search = GridSearchCV(SVC(gamma='auto'),
-                               param_grid={'C': [1, 10]},
-                               cv=cv, iid=False, return_train_score=True)
-    random_search = RandomizedSearchCV(SVC(gamma='auto'), n_iter=2,
-                                       param_distributions={'C': [1, 10]},
-                                       cv=cv, iid=False,
-                                       return_train_score=True)
-
-    for search in (grid_search, random_search):
-        search.fit(X, y)
-        assert not search.iid
-
-        test_cv_scores = np.array(list(search.cv_results_['split%d_test_score'
-                                                          % s][0]
-                                       for s in range(search.n_splits_)))
-        test_mean = search.cv_results_['mean_test_score'][0]
-        test_std = search.cv_results_['std_test_score'][0]
-
-        train_cv_scores = np.array(list(search.cv_results_['split%d_train_'
-                                                           'score' % s][0]
-                                        for s in range(search.n_splits_)))
-        train_mean = search.cv_results_['mean_train_score'][0]
-        train_std = search.cv_results_['std_train_score'][0]
-
-        assert search.cv_results_['param_C'][0] == 1
-        # scores are the same as above
-        assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
-        # Unweighted mean/std is used
-        assert_almost_equal(test_mean, np.mean(test_cv_scores))
-        assert_almost_equal(test_std, np.std(test_cv_scores))
-
-        # For the train scores, we do not take a weighted mean irrespective of
-        # i.i.d. or not
-        assert_almost_equal(train_mean, 1)
-        assert_almost_equal(train_std, 0)
-
-
-@pytest.mark.filterwarnings("ignore:The parameter 'iid' is deprecated")  # 0.24
 def test_grid_search_cv_results_multimetric():
     X, y = make_classification(n_samples=50, n_features=4, random_state=42)
 
@@ -1104,22 +998,19 @@ def test_grid_search_cv_results_multimetric():
     params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
               dict(kernel=['poly', ], degree=[1, 2])]
 
-    for iid in (False, True):
-        grid_searches = []
-        for scoring in ({'accuracy': make_scorer(accuracy_score),
-                         'recall': make_scorer(recall_score)},
-                        'accuracy', 'recall'):
-            grid_search = GridSearchCV(SVC(), cv=n_splits,
-                                       iid=iid, param_grid=params,
-                                       scoring=scoring, refit=False)
-            grid_search.fit(X, y)
-            assert grid_search.iid == iid
-            grid_searches.append(grid_search)
+    grid_searches = []
+    for scoring in ({'accuracy': make_scorer(accuracy_score),
+                     'recall': make_scorer(recall_score)},
+                    'accuracy', 'recall'):
+        grid_search = GridSearchCV(SVC(), cv=n_splits,
+                                   param_grid=params,
+                                   scoring=scoring, refit=False)
+        grid_search.fit(X, y)
+        grid_searches.append(grid_search)
 
-        compare_cv_results_multimetric_with_single(*grid_searches, iid=iid)
+    compare_cv_results_multimetric_with_single(*grid_searches)
 
 
-@pytest.mark.filterwarnings("ignore:The parameter 'iid' is deprecated")  # 0.24
 def test_random_search_cv_results_multimetric():
     X, y = make_classification(n_samples=50, n_features=4, random_state=42)
 
@@ -1129,38 +1020,34 @@ def test_random_search_cv_results_multimetric():
     # Scipy 0.12's stats dists do not accept seed, hence we use param grid
     params = dict(C=np.logspace(-4, 1, 3),
                   gamma=np.logspace(-5, 0, 3, base=0.1))
-    for iid in (True, False):
-        for refit in (True, False):
-            random_searches = []
-            for scoring in (('accuracy', 'recall'), 'accuracy', 'recall'):
-                # If True, for multi-metric pass refit='accuracy'
-                if refit:
-                    probability = True
-                    refit = 'accuracy' if isinstance(scoring, tuple) else refit
-                else:
-                    probability = False
-                clf = SVC(probability=probability, random_state=42)
-                random_search = RandomizedSearchCV(clf, n_iter=n_search_iter,
-                                                   cv=n_splits, iid=iid,
-                                                   param_distributions=params,
-                                                   scoring=scoring,
-                                                   refit=refit, random_state=0)
-                random_search.fit(X, y)
-                random_searches.append(random_search)
-
-            compare_cv_results_multimetric_with_single(*random_searches,
-                                                       iid=iid)
-            compare_refit_methods_when_refit_with_acc(
-                random_searches[0], random_searches[1], refit)
-
-
-@pytest.mark.filterwarnings("ignore:The parameter 'iid' is deprecated")  # 0.24
+    for refit in (True, False):
+        random_searches = []
+        for scoring in (('accuracy', 'recall'), 'accuracy', 'recall'):
+            # If True, for multi-metric pass refit='accuracy'
+            if refit:
+                probability = True
+                refit = 'accuracy' if isinstance(scoring, tuple) else refit
+            else:
+                probability = False
+            clf = SVC(probability=probability, random_state=42)
+            random_search = RandomizedSearchCV(clf, n_iter=n_search_iter,
+                                               cv=n_splits,
+                                               param_distributions=params,
+                                               scoring=scoring,
+                                               refit=refit, random_state=0)
+            random_search.fit(X, y)
+            random_searches.append(random_search)
+
+        compare_cv_results_multimetric_with_single(*random_searches)
+        compare_refit_methods_when_refit_with_acc(
+            random_searches[0], random_searches[1], refit)
+
+
 def compare_cv_results_multimetric_with_single(
-        search_multi, search_acc, search_rec, iid):
+        search_multi, search_acc, search_rec):
     """Compare multi-metric cv_results with the ensemble of multiple
     single metric cv_results from single metric grid/random search"""
 
-    assert search_multi.iid == iid
     assert search_multi.multimetric_
     assert_array_equal(sorted(search_multi.scorer_),
                        ('accuracy', 'recall'))
@@ -1727,16 +1614,12 @@ def _run_search(self, evaluate):
 
     results = mycv.cv_results_
     check_results(results, gscv)
-    # TODO: remove in v0.24, the deprecation goes away then.
-    with pytest.warns(FutureWarning,
-                      match="attribute is to be deprecated from version 0.22"):
-        for attr in dir(gscv):
-            if (attr[0].islower() and attr[-1:] == '_' and
-                    attr not in {'cv_results_', 'best_estimator_',
-                                 'refit_time_',
-                                 }):
-                assert getattr(gscv, attr) == getattr(mycv, attr), \
-                    "Attribute %s not equal" % attr
+    for attr in dir(gscv):
+        if (attr[0].islower() and attr[-1:] == '_' and
+                attr not in {'cv_results_', 'best_estimator_',
+                             'refit_time_', 'classes_'}):
+            assert getattr(gscv, attr) == getattr(mycv, attr), \
+                "Attribute %s not equal" % attr
 
 
 def test__custom_fit_no_run_search():
@@ -1760,18 +1643,6 @@ def __init__(self, estimator, **kwargs):
         BadSearchCV(SVC()).fit(X, y)
 
 
-@pytest.mark.parametrize("iid", [False, True])
-def test_deprecated_grid_search_iid(iid):
-    # FIXME: remove in 0.24
-    depr_msg = "The parameter 'iid' is deprecated in 0.22 and will be removed"
-    X, y = make_blobs(n_samples=54, random_state=0, centers=2)
-    grid = GridSearchCV(
-        SVC(random_state=0), param_grid={'C': [10]}, cv=3, iid=iid
-    )
-    with pytest.warns(FutureWarning, match=depr_msg):
-        grid.fit(X, y)
-
-
 def test_empty_cv_iterator_error():
     # Use global X, y
 
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index b89571ba085dd..0f9238d63ec64 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -1616,7 +1616,6 @@ def test_leave_p_out_empty_trainset():
 @pytest.mark.parametrize('Klass', (KFold, StratifiedKFold))
 def test_random_state_shuffle_false(Klass):
     # passing a non-default random_state when shuffle=False makes no sense
-    # TODO 0.24: raise a ValueError instead of a warning
-    with pytest.warns(FutureWarning,
-                      match='has no effect since shuffle is False'):
+    with pytest.raises(ValueError,
+                       match='has no effect since shuffle is False'):
         Klass(3, shuffle=False, random_state=0)
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index a5ede43f0fe8c..9d70d5f61c20f 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -27,7 +27,6 @@
 from .utils.validation import (check_is_fitted, has_fit_parameter,
                                _check_fit_params, _deprecate_positional_args)
 from .utils.multiclass import check_classification_targets
-from .utils import deprecated
 
 __all__ = ["MultiOutputRegressor", "MultiOutputClassifier",
            "ClassifierChain", "RegressorChain"]
@@ -788,10 +787,3 @@ def fit(self, X, Y, **fit_params):
 
     def _more_tags(self):
         return {'multioutput_only': True}
-
-
-# TODO: remove in 0.24
-@deprecated("MultiOutputEstimator is deprecated in version "
-            "0.22 and will be removed in version 0.24.")
-class MultiOutputEstimator(_MultiOutputEstimator):
-    pass
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index e631bb3dcd599..11246203a8b94 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -27,7 +27,7 @@
 from .preprocessing import binarize
 from .preprocessing import LabelBinarizer
 from .preprocessing import label_binarize
-from .utils import check_X_y, check_array, deprecated
+from .utils import check_X_y, check_array
 from .utils.extmath import safe_sparse_dot
 from .utils.multiclass import _check_partial_fit_first_call
 from .utils.validation import check_is_fitted, check_non_negative, column_or_1d
@@ -52,13 +52,9 @@ def _joint_log_likelihood(self, X):
         predict_proba and predict_log_proba.
         """
 
+    @abstractmethod
     def _check_X(self, X):
         """To be overridden in subclasses with the actual checks."""
-        # Note that this is not marked @abstractmethod as long as the
-        # deprecated public alias sklearn.naive_bayes.BayesNB exists
-        # (until 0.24) to preserve backward compat for 3rd party projects
-        # with existing derived classes.
-        return X
 
     def predict(self, X):
         """
@@ -1225,17 +1221,3 @@ def _joint_log_likelihood(self, X):
             jll += self.feature_log_prob_[i][:, indices].T
         total_ll = jll + self.class_log_prior_
         return total_ll
-
-
-# TODO: remove in 0.24
-@deprecated("BaseNB is deprecated in version "
-            "0.22 and will be removed in version 0.24.")
-class BaseNB(_BaseNB):
-    pass
-
-
-# TODO: remove in 0.24
-@deprecated("BaseDiscreteNB is deprecated in version "
-            "0.22 and will be removed in version 0.24.")
-class BaseDiscreteNB(_BaseDiscreteNB):
-    pass
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 6f02cb565e15c..ca9546696e94d 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -11,7 +11,6 @@
 
 from collections import defaultdict
 from itertools import islice
-import warnings
 
 import numpy as np
 from scipy import sparse
@@ -780,7 +779,8 @@ class FeatureUnion(TransformerMixin, _BaseComposition):
     ----------
     transformer_list : list of (string, transformer) tuples
         List of transformer objects to be applied to the data. The first
-        half of each tuple is the name of the transformer.
+        half of each tuple is the name of the transformer. The tranformer can
+        be 'drop' for it to be ignored.
 
         .. versionchanged:: 0.22
            Deprecated `None` as a transformer in favor of 'drop'.
@@ -865,13 +865,6 @@ def _validate_transformers(self):
 
         # validate estimators
         for t in transformers:
-            # TODO: Remove in 0.24 when None is removed
-            if t is None:
-                warnings.warn("Using None as a transformer is deprecated "
-                              "in version 0.22 and will be removed in "
-                              "version 0.24. Please use 'drop' instead.",
-                              FutureWarning)
-                continue
             if t == 'drop':
                 continue
             if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
@@ -888,7 +881,7 @@ def _iter(self):
         get_weight = (self.transformer_weights or {}).get
         return ((name, trans, get_weight(name))
                 for name, trans in self.transformer_list
-                if trans is not None and trans != 'drop')
+                if trans != 'drop')
 
     def get_feature_names(self):
         """Get feature names from all transformers.
@@ -1011,7 +1004,7 @@ def transform(self, X):
 
     def _update_transformer_list(self, transformers):
         transformers = iter(transformers)
-        self.transformer_list[:] = [(name, old if old is None or old == 'drop'
+        self.transformer_list[:] = [(name, old if old == 'drop'
                                      else next(transformers))
                                     for name, old in self.transformer_list]
 
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index e7a502bf1e7b5..8204997f0537b 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -41,7 +41,6 @@
 from .utils.validation import check_array, check_is_fitted
 from .utils.validation import _deprecate_positional_args
 from .exceptions import DataDimensionalityWarning
-from .utils import deprecated
 
 
 __all__ = ["SparseRandomProjection",
@@ -153,13 +152,6 @@ def _check_input_size(n_components, n_features):
                          n_features)
 
 
-# TODO: remove in 0.24
-@deprecated("gaussian_random_matrix is deprecated in "
-            "0.22 and will be removed in version 0.24.")
-def gaussian_random_matrix(n_components, n_features, random_state=None):
-    return _gaussian_random_matrix(n_components, n_features, random_state)
-
-
 def _gaussian_random_matrix(n_components, n_features, random_state=None):
     """Generate a dense Gaussian random matrix.
 
@@ -200,15 +192,6 @@ def _gaussian_random_matrix(n_components, n_features, random_state=None):
     return components
 
 
-# TODO: remove in 0.24
-@deprecated("gaussian_random_matrix is deprecated in "
-            "0.22 and will be removed in version 0.24.")
-def sparse_random_matrix(n_components, n_features, density='auto',
-                         random_state=None):
-    return _sparse_random_matrix(n_components, n_features, density,
-                                 random_state)
-
-
 def _sparse_random_matrix(n_components, n_features, density='auto',
                           random_state=None):
     """Generalized Achlioptas random sparse matrix for random projection
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 38abb0b158fd3..280ade175bc4a 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -756,7 +756,6 @@ def test_dtype_of_classifier_probas(strategy):
     assert probas.dtype == np.float64
 
 
-@pytest.mark.filterwarnings("ignore:The default value of strategy.*")  # 0.24
 @pytest.mark.parametrize('Dummy', (DummyRegressor, DummyClassifier))
 def test_n_features_in_(Dummy):
     X = [[1, 2]]
@@ -765,22 +764,3 @@ def test_n_features_in_(Dummy):
     assert not hasattr(d, 'n_features_in_')
     d.fit(X, y)
     assert d.n_features_in_ is None
-
-
-@pytest.mark.parametrize("Dummy", (DummyRegressor, DummyClassifier))
-def test_outputs_2d_deprecation(Dummy):
-    X = [[1, 2]]
-    y = [0]
-    with pytest.warns(FutureWarning,
-                      match="will be removed in version 0.24"):
-        Dummy().fit(X, y).outputs_2d_
-
-
-# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
-def test_strategy_stratified_deprecated_for_prior():
-    X, y = [[1, 2]], [0]
-
-    msg = ("The default value of strategy will change from "
-           "stratified to prior in 0.24")
-    with pytest.warns(FutureWarning, match=msg):
-        DummyClassifier().fit(X, y)
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index d312231d0430a..ce8e77dc72ba4 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -26,7 +26,6 @@
 from sklearn.multioutput import ClassifierChain, RegressorChain
 from sklearn.multioutput import MultiOutputClassifier
 from sklearn.multioutput import MultiOutputRegressor
-from sklearn.multioutput import MultiOutputEstimator
 from sklearn.svm import LinearSVC
 from sklearn.base import ClassifierMixin
 from sklearn.utils import shuffle
@@ -556,15 +555,6 @@ def test_multi_output_classes_(estimator):
         assert_array_equal(estimator_classes, expected_classes)
 
 
-# TODO: remove in 0.24
-def test_deprecation():
-    class A(MultiOutputEstimator, MultiOutputRegressor):
-        pass
-
-    with pytest.warns(FutureWarning, match="is deprecated in version 0.22"):
-        A(SGDRegressor(random_state=0, max_iter=5))
-
-
 class DummyRegressorWithFitParams(DummyRegressor):
     def fit(self, X, y, sample_weight=None, **fit_params):
         self._fit_params = fit_params
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 1106684998f75..3f15e548dde62 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -21,7 +21,6 @@
 from sklearn.naive_bayes import GaussianNB, BernoulliNB
 from sklearn.naive_bayes import MultinomialNB, ComplementNB
 from sklearn.naive_bayes import CategoricalNB
-from sklearn.naive_bayes import BaseNB, BaseDiscreteNB
 
 
 # Data is just 6 separable points in the plane
@@ -826,19 +825,3 @@ def test_check_accuracy_on_digits():
 
     scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10)
     assert scores.mean() > 0.86
-
-
-# TODO: remove in 0.24
-def test_deprecations():
-
-    class A(BaseNB, GaussianNB):
-        pass
-
-    class B(BaseDiscreteNB, CategoricalNB):
-        pass
-
-    with pytest.warns(FutureWarning, match="is deprecated in version 0.22"):
-        A()
-
-    with pytest.warns(FutureWarning, match="is deprecated in version 0.22"):
-        B()
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index b9c2e26abac61..9f61b12e24ca1 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -900,9 +900,7 @@ def test_set_feature_union_steps():
     assert ['mock__x5'] == ft.get_feature_names()
 
 
-# TODO: Remove parametrization in 0.24 when None is removed for FeatureUnion
-@pytest.mark.parametrize('drop', ['drop', None])
-def test_set_feature_union_step_drop(drop):
+def test_set_feature_union_step_drop():
     mult2 = Mult(2)
     mult2.get_feature_names = lambda: ['x2']
     mult3 = Mult(3)
@@ -915,32 +913,32 @@ def test_set_feature_union_step_drop(drop):
     assert ['m2__x2', 'm3__x3'] == ft.get_feature_names()
 
     with pytest.warns(None) as record:
-        ft.set_params(m2=drop)
+        ft.set_params(m2='drop')
         assert_array_equal([[3]], ft.fit(X).transform(X))
         assert_array_equal([[3]], ft.fit_transform(X))
     assert ['m3__x3'] == ft.get_feature_names()
-    assert record if drop is None else not record
+    assert not record
 
     with pytest.warns(None) as record:
-        ft.set_params(m3=drop)
+        ft.set_params(m3='drop')
         assert_array_equal([[]], ft.fit(X).transform(X))
         assert_array_equal([[]], ft.fit_transform(X))
     assert [] == ft.get_feature_names()
-    assert record if drop is None else not record
+    assert not record
 
     with pytest.warns(None) as record:
         # check we can change back
         ft.set_params(m3=mult3)
         assert_array_equal([[3]], ft.fit(X).transform(X))
-    assert record if drop is None else not record
+    assert not record
 
     with pytest.warns(None) as record:
         # Check 'drop' step at construction time
-        ft = FeatureUnion([('m2', drop), ('m3', mult3)])
+        ft = FeatureUnion([('m2', 'drop'), ('m3', mult3)])
         assert_array_equal([[3]], ft.fit(X).transform(X))
         assert_array_equal([[3]], ft.fit_transform(X))
     assert ['m3__x3'] == ft.get_feature_names()
-    assert record if drop is None else not record
+    assert not record
 
 
 def test_step_name_validation():
@@ -1227,16 +1225,3 @@ def transform(self, X, y=None):
 
     t.fit(X, y, a=0)
     t.fit_transform(X, y, a=0)
-
-
-# TODO: Remove in 0.24 when None is removed
-def test_feature_union_warns_with_none():
-    msg = (r"Using None as a transformer is deprecated in version 0\.22 and "
-           r"will be removed in version 0\.24\. Please use 'drop' instead\.")
-    with pytest.warns(FutureWarning, match=msg):
-        union = FeatureUnion([('multi1', None), ('multi2', Mult())])
-
-    X = [[1, 2, 3], [4, 5, 6]]
-
-    with pytest.warns(FutureWarning, match=msg):
-        union.fit_transform(X)
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
index b8d69632105b0..829f7cccc4ba9 100644
--- a/sklearn/tests/test_random_projection.py
+++ b/sklearn/tests/test_random_projection.py
@@ -10,9 +10,7 @@
 
 from sklearn.random_projection import johnson_lindenstrauss_min_dim
 from sklearn.random_projection import _gaussian_random_matrix
-from sklearn.random_projection import gaussian_random_matrix
 from sklearn.random_projection import _sparse_random_matrix
-from sklearn.random_projection import sparse_random_matrix
 from sklearn.random_projection import SparseRandomProjection
 from sklearn.random_projection import GaussianRandomProjection
 
@@ -354,13 +352,3 @@ def test_works_with_sparse_data():
                                      random_state=1).fit(sp.csr_matrix(data))
         assert_array_almost_equal(densify(rp_dense.components_),
                                   densify(rp_sparse.components_))
-
-
-# TODO remove in 0.24
-def test_deprecations():
-
-    with pytest.warns(FutureWarning, match="deprecated in 0.22"):
-        gaussian_random_matrix(10, 100)
-
-    with pytest.warns(FutureWarning, match="deprecated in 0.22"):
-        sparse_random_matrix(10, 100)
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index 3994613d92b6b..a20a85826f86d 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -97,7 +97,6 @@ def __init__(self, *,
                  min_impurity_decrease,
                  min_impurity_split,
                  class_weight=None,
-                 presort='deprecated',
                  ccp_alpha=0.0):
         self.criterion = criterion
         self.splitter = splitter
@@ -111,7 +110,6 @@ def __init__(self, *,
         self.min_impurity_decrease = min_impurity_decrease
         self.min_impurity_split = min_impurity_split
         self.class_weight = class_weight
-        self.presort = presort
         self.ccp_alpha = ccp_alpha
 
     def get_depth(self):
@@ -319,13 +317,6 @@ def fit(self, X, y, sample_weight=None, check_input=True,
             raise ValueError("min_impurity_decrease must be greater than "
                              "or equal to 0")
 
-        if self.presort != 'deprecated':
-            warnings.warn("The parameter 'presort' is deprecated and has no "
-                          "effect. It will be removed in v0.24. You can "
-                          "suppress this warning by not passing any value "
-                          "to the 'presort' parameter.",
-                          FutureWarning)
-
         # Build tree
         criterion = self.criterion
         if not isinstance(criterion, Criterion):
@@ -728,11 +719,6 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree):
         Note that these weights will be multiplied with sample_weight (passed
         through the fit method) if sample_weight is specified.
 
-    presort : deprecated, default='deprecated'
-        This parameter is deprecated and will be removed in v0.24.
-
-        .. deprecated:: 0.22
-
     ccp_alpha : non-negative float, default=0.0
         Complexity parameter used for Minimal Cost-Complexity Pruning. The
         subtree with the largest cost complexity that is smaller than
@@ -831,7 +817,6 @@ def __init__(self, *,
                  min_impurity_decrease=0.,
                  min_impurity_split=None,
                  class_weight=None,
-                 presort='deprecated',
                  ccp_alpha=0.0):
         super().__init__(
             criterion=criterion,
@@ -846,7 +831,6 @@ def __init__(self, *,
             random_state=random_state,
             min_impurity_decrease=min_impurity_decrease,
             min_impurity_split=min_impurity_split,
-            presort=presort,
             ccp_alpha=ccp_alpha)
 
     def fit(self, X, y, sample_weight=None, check_input=True,
@@ -1092,11 +1076,6 @@ class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):
            ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it
            will be removed in 0.25. Use ``min_impurity_decrease`` instead.
 
-    presort : deprecated, default='deprecated'
-        This parameter is deprecated and will be removed in v0.24.
-
-        .. deprecated:: 0.22
-
     ccp_alpha : non-negative float, default=0.0
         Complexity parameter used for Minimal Cost-Complexity Pruning. The
         subtree with the largest cost complexity that is smaller than
@@ -1185,7 +1164,6 @@ def __init__(self, *,
                  max_leaf_nodes=None,
                  min_impurity_decrease=0.,
                  min_impurity_split=None,
-                 presort='deprecated',
                  ccp_alpha=0.0):
         super().__init__(
             criterion=criterion,
@@ -1199,7 +1177,6 @@ def __init__(self, *,
             random_state=random_state,
             min_impurity_decrease=min_impurity_decrease,
             min_impurity_split=min_impurity_split,
-            presort=presort,
             ccp_alpha=ccp_alpha)
 
     def fit(self, X, y, sample_weight=None, check_input=True,
@@ -1246,22 +1223,6 @@ def fit(self, X, y, sample_weight=None, check_input=True,
             X_idx_sorted=X_idx_sorted)
         return self
 
-    @property
-    def classes_(self):
-        # TODO: Remove method in 0.24
-        msg = ("the classes_ attribute is to be deprecated from version "
-               "0.22 and will be removed in 0.24.")
-        warnings.warn(msg, FutureWarning)
-        return np.array([None] * self.n_outputs_)
-
-    @property
-    def n_classes_(self):
-        # TODO: Remove method in 0.24
-        msg = ("the n_classes_ attribute is to be deprecated from version "
-               "0.22 and will be removed in 0.24.")
-        warnings.warn(msg, FutureWarning)
-        return np.array([1] * self.n_outputs_, dtype=np.intp)
-
     def _compute_partial_dependence_recursion(self, grid, target_features):
         """Fast partial dependence computation.
 
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 071e7efd49177..bfdf7024ad78c 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -1607,19 +1607,6 @@ def test_public_apply_sparse_trees(name):
     check_public_apply_sparse(name)
 
 
-@pytest.mark.parametrize('Cls',
-                         (DecisionTreeRegressor, DecisionTreeClassifier))
-@pytest.mark.parametrize('presort', ['auto', True, False])
-def test_presort_deprecated(Cls, presort):
-    # TODO: remove in v0.24
-    X = np.zeros((10, 10))
-    y = np.r_[[0] * 5, [1] * 5]
-    tree = Cls(presort=presort)
-    with pytest.warns(FutureWarning,
-                      match="The parameter 'presort' is deprecated "):
-        tree.fit(X, y)
-
-
 def test_decision_path_hardcoded():
     X = iris.data
     y = iris.target
@@ -1931,23 +1918,6 @@ def test_prune_tree_raises_negative_ccp_alpha():
         clf._prune_tree()
 
 
-def test_classes_deprecated():
-    X = [[0, 0], [2, 2], [4, 6], [10, 11]]
-    y = [0.5, 2.5, 3.5, 5.5]
-    clf = DecisionTreeRegressor()
-    clf = clf.fit(X, y)
-
-    match = ("attribute is to be deprecated from version "
-             "0.22 and will be removed in 0.24.")
-
-    with pytest.warns(FutureWarning, match=match):
-        n = len(clf.classes_)
-        assert n == clf.n_outputs_
-
-    with pytest.warns(FutureWarning, match=match):
-        assert len(clf.n_classes_) == clf.n_outputs_
-
-
 def check_apply_path_readonly(name):
     X_readonly = create_memmap_backed_data(X_small.astype(tree._tree.DTYPE,
                                                           copy=False))
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index e6834a33cf178..bbde6264a1c77 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -573,6 +573,11 @@ def _set_checking_parameters(estimator):
         # datasets (only very shallow trees are built) that the checks use.
         estimator.set_params(min_samples_leaf=5)
 
+    if name == 'DummyClassifier':
+        # the default strategy prior would output constant predictions and fail
+        # for check_classifiers_predictions
+        estimator.set_params(strategy='stratified')
+
     # Speed-up by reducing the number of CV or splits for CV estimators
     loo_cv = ['RidgeCV']
     if name not in loo_cv and hasattr(estimator, 'cv'):