From 92f11547b4638858688c9acd793e0257b0bdaa6e Mon Sep 17 00:00:00 2001
From: Nabil Fayak <nabil.fayak@alteryx.com>
Date: Fri, 18 Aug 2023 12:14:17 -0400
Subject: [PATCH 1/3] added in checkmates features needed to remove evalml
 dependencies in tempo

---
 checkmates/exceptions/__init__.py             |    2 +
 checkmates/exceptions/exceptions.py           |    9 +
 checkmates/objectives/__init__.py             |   11 +-
 checkmates/objectives/utils.py                |   36 +
 checkmates/pipelines/__init__.py              |   13 +
 checkmates/pipelines/component_base.py        |  283 +++++
 checkmates/pipelines/component_base_meta.py   |   44 +
 checkmates/pipelines/components.py            | 1089 +++++++++++++++++
 .../pipelines/training_validation_split.py    |  102 ++
 checkmates/pipelines/transformers.py          |  238 ++++
 checkmates/pipelines/utils.py                 |  172 +++
 checkmates/utils/__init__.py                  |   12 +-
 checkmates/utils/base_meta.py                 |   46 +
 checkmates/utils/gen_utils.py                 |   18 +
 checkmates/utils/logger.py                    |   78 ++
 checkmates/utils/nullable_type_utils.py       |  177 +++
 16 files changed, 2325 insertions(+), 5 deletions(-)
 create mode 100644 checkmates/pipelines/__init__.py
 create mode 100644 checkmates/pipelines/component_base.py
 create mode 100644 checkmates/pipelines/component_base_meta.py
 create mode 100644 checkmates/pipelines/components.py
 create mode 100644 checkmates/pipelines/training_validation_split.py
 create mode 100644 checkmates/pipelines/transformers.py
 create mode 100644 checkmates/pipelines/utils.py
 create mode 100644 checkmates/utils/base_meta.py
 create mode 100644 checkmates/utils/logger.py
 create mode 100644 checkmates/utils/nullable_type_utils.py

diff --git a/checkmates/exceptions/__init__.py b/checkmates/exceptions/__init__.py
index cafcc6c..e2fabe5 100644
--- a/checkmates/exceptions/__init__.py
+++ b/checkmates/exceptions/__init__.py
@@ -5,4 +5,6 @@
     ValidationErrorCode,
     ObjectiveCreationError,
     ObjectiveNotFoundError,
+    MethodPropertyNotFoundError,
+    ComponentNotYetFittedError,
 )
diff --git a/checkmates/exceptions/exceptions.py b/checkmates/exceptions/exceptions.py
index 1fa2540..f7438af 100644
--- a/checkmates/exceptions/exceptions.py
+++ b/checkmates/exceptions/exceptions.py
@@ -13,6 +13,15 @@ class ObjectiveNotFoundError(Exception):
 
     pass
 
+class MethodPropertyNotFoundError(Exception):
+    """Exception to raise when a class is does not have an expected method or property."""
+
+    pass
+
+class ComponentNotYetFittedError(Exception):
+    """An exception to be raised when predict/predict_proba/transform is called on a component without fitting first."""
+
+    pass
 
 class ObjectiveCreationError(Exception):
     """Exception when get_objective tries to instantiate an objective and required args are not provided."""
diff --git a/checkmates/objectives/__init__.py b/checkmates/objectives/__init__.py
index ba6a55b..0bbf6ec 100644
--- a/checkmates/objectives/__init__.py
+++ b/checkmates/objectives/__init__.py
@@ -3,10 +3,13 @@
 from checkmates.objectives.objective_base import ObjectiveBase
 from checkmates.objectives.regression_objective import RegressionObjective
 
-from checkmates.objectives.utils import get_objective
-from checkmates.objectives.utils import get_default_primary_search_objective
-from checkmates.objectives.utils import get_non_core_objectives
-from checkmates.objectives.utils import get_core_objectives
+from checkmates.objectives.utils import (
+    get_objective,
+    get_default_primary_search_objective,
+    get_non_core_objectives,
+    get_core_objectives,
+    get_problem_type,
+)
 
 
 from checkmates.objectives.standard_metrics import RootMeanSquaredLogError
diff --git a/checkmates/objectives/utils.py b/checkmates/objectives/utils.py
index 1ba882b..1d717ee 100644
--- a/checkmates/objectives/utils.py
+++ b/checkmates/objectives/utils.py
@@ -1,9 +1,17 @@
 """Utility methods for CheckMates objectives."""
+import pandas as pd
+from typing import Optional
+
 from checkmates import objectives
 from checkmates.exceptions import ObjectiveCreationError, ObjectiveNotFoundError
 from checkmates.objectives.objective_base import ObjectiveBase
 from checkmates.problem_types import handle_problem_types
 from checkmates.utils.gen_utils import _get_subclasses
+from checkmates.problem_types import ProblemTypes
+
+from checkmates.utils.logger import get_logger
+
+logger = get_logger(__file__)
 
 
 def get_non_core_objectives():
@@ -89,6 +97,34 @@ def get_objective(objective, return_instance=False, **kwargs):
 
     return objective_class
 
+def get_problem_type(
+    input_problem_type: Optional[str],
+    target_data: pd.Series,
+) -> ProblemTypes:
+    """helper function to determine if classification problem is binary or multiclass dependent on target variable values."""
+    if not input_problem_type:
+        raise ValueError("problem type is required")
+    if input_problem_type.lower() == "classification":
+        values: pd.Series = target_data.value_counts()
+        if values.size == 2:
+            return ProblemTypes.BINARY
+        elif values.size > 2:
+            return ProblemTypes.MULTICLASS
+        else:
+            message: str = "The target field contains less than two unique values. It cannot be used for modeling."
+            logger.error(message, exc_info=True)
+            raise ValueError(message)
+
+    if input_problem_type.lower() == "regression":
+        return ProblemTypes.REGRESSION
+
+    if input_problem_type.lower() == "time series regression":
+        return ProblemTypes.TIME_SERIES_REGRESSION
+
+    message = f"Unexpected problem type provided in configuration: {input_problem_type}"
+    logger.error(message, exc_info=True)
+    raise ValueError(message)
+
 
 def get_default_primary_search_objective(problem_type):
     """Get the default primary search objective for a problem type.
diff --git a/checkmates/pipelines/__init__.py b/checkmates/pipelines/__init__.py
new file mode 100644
index 0000000..c7d5701
--- /dev/null
+++ b/checkmates/pipelines/__init__.py
@@ -0,0 +1,13 @@
+from checkmates.pipelines.component_base_meta import ComponentBaseMeta
+from checkmates.pipelines.component_base import ComponentBase
+from checkmates.pipelines.transformers import Transformer
+from checkmates.pipelines.components import (  # noqa: F401
+    DropColumns,
+    DropRowsTransformer,
+    PerColumnImputer,
+    TargetImputer,
+    TimeSeriesImputer,
+    TimeSeriesRegularizer,
+)
+from checkmates.pipelines.utils import _make_component_list_from_actions, split_data, drop_infinity
+from checkmates.pipelines.training_validation_split import TrainingValidationSplit
\ No newline at end of file
diff --git a/checkmates/pipelines/component_base.py b/checkmates/pipelines/component_base.py
new file mode 100644
index 0000000..0360dd8
--- /dev/null
+++ b/checkmates/pipelines/component_base.py
@@ -0,0 +1,283 @@
+"""Base class for all components."""
+import copy
+from abc import ABC, abstractmethod
+
+import cloudpickle
+
+from checkmates.exceptions import MethodPropertyNotFoundError
+from checkmates.pipelines.component_base_meta import ComponentBaseMeta
+from checkmates.utils import (
+    _downcast_nullable_X,
+    _downcast_nullable_y,
+    classproperty,
+    infer_feature_types,
+    log_subtitle,
+    safe_repr,
+)
+from checkmates.utils.logger import get_logger
+
+
+class ComponentBase(ABC, metaclass=ComponentBaseMeta):
+    """Base class for all components.
+
+    Args:
+        parameters (dict): Dictionary of parameters for the component. Defaults to None.
+        component_obj (obj): Third-party objects useful in component implementation. Defaults to None.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    _default_parameters = None
+    _can_be_used_for_fast_partial_dependence = True
+    # Referring to the pandas nullable dtypes; not just woodwork logical types
+    _integer_nullable_incompatibilities = []
+    _boolean_nullable_incompatibilities = []
+    is_multiseries = False
+
+    def __init__(self, parameters=None, component_obj=None, random_seed=0, **kwargs):
+        """Base class for all components.
+
+        Args:
+            parameters (dict): Dictionary of parameters for the component. Defaults to None.
+            component_obj (obj): Third-party objects useful in component implementation. Defaults to None.
+            random_seed (int): Seed for the random number generator. Defaults to 0.
+            kwargs (Any): Any keyword arguments to pass into the component.
+        """
+        self.random_seed = random_seed
+        self._component_obj = component_obj
+        self._parameters = parameters or {}
+        self._is_fitted = False
+
+    @property
+    @classmethod
+    @abstractmethod
+    def name(cls):
+        """Returns string name of this component."""
+
+    @property
+    @classmethod
+    @abstractmethod
+    def modifies_features(cls):
+        """Returns whether this component modifies (subsets or transforms) the features variable during transform.
+
+        For Estimator objects, this attribute determines if the return
+        value from `predict` or `predict_proba` should be used as
+        features or targets.
+        """
+
+    @property
+    @classmethod
+    @abstractmethod
+    def modifies_target(cls):
+        """Returns whether this component modifies (subsets or transforms) the target variable during transform.
+
+        For Estimator objects, this attribute determines if the return
+        value from `predict` or `predict_proba` should be used as
+        features or targets.
+        """
+
+    @property
+    @classmethod
+    @abstractmethod
+    def training_only(cls):
+        """Returns whether or not this component should be evaluated during training-time only, or during both training and prediction time."""
+
+    @classproperty
+    def needs_fitting(self):
+        """Returns boolean determining if component needs fitting before calling predict, predict_proba, transform, or feature_importances.
+
+        This can be overridden to False for components that do not need to be fit or whose fit methods do nothing.
+
+        Returns:
+            True.
+        """
+        return True
+
+    @property
+    def parameters(self):
+        """Returns the parameters which were used to initialize the component."""
+        return copy.copy(self._parameters)
+
+    @classproperty
+    def default_parameters(cls):
+        """Returns the default parameters for this component.
+
+        Our convention is that Component.default_parameters == Component().parameters.
+
+        Returns:
+            dict: Default parameters for this component.
+        """
+        if cls._default_parameters is None:
+            cls._default_parameters = cls().parameters
+
+        return cls._default_parameters
+
+    @classproperty
+    def _supported_by_list_API(cls):
+        return not cls.modifies_target
+
+    def _handle_partial_dependence_fast_mode(
+        self,
+        pipeline_parameters,
+        X=None,
+        target=None,
+    ):
+        """Determines whether or not a component can be used with partial dependence's fast mode.
+
+        Args:
+            pipeline_parameters (dict): Pipeline parameters that will be used to create the pipelines
+                used in partial dependence fast mode.
+            X (pd.DataFrame, optional): Holdout data being used for partial dependence calculations.
+            target (str, optional): The target whose values we are trying to predict.
+        """
+        if self._can_be_used_for_fast_partial_dependence:
+            return pipeline_parameters
+
+        raise TypeError(
+            f"Component {self.name} cannot run partial dependence fast mode.",
+        )
+
+    def clone(self):
+        """Constructs a new component with the same parameters and random state.
+
+        Returns:
+            A new instance of this component with identical parameters and random state.
+        """
+        return self.__class__(**self.parameters, random_seed=self.random_seed)
+
+    def fit(self, X, y=None):
+        """Fits component to data.
+
+        Args:
+            X (pd.DataFrame): The input training data of shape [n_samples, n_features]
+            y (pd.Series, optional): The target training data of length [n_samples]
+
+        Returns:
+            self
+
+        Raises:
+            MethodPropertyNotFoundError: If component does not have a fit method or a component_obj that implements fit.
+        """
+        X = infer_feature_types(X)
+        if y is not None:
+            y = infer_feature_types(y)
+        try:
+            self._component_obj.fit(X, y)
+            return self
+        except AttributeError:
+            raise MethodPropertyNotFoundError(
+                "Component requires a fit method or a component_obj that implements fit",
+            )
+
+    def describe(self, print_name=False, return_dict=False):
+        """Describe a component and its parameters.
+
+        Args:
+            print_name(bool, optional): whether to print name of component
+            return_dict(bool, optional): whether to return description as dictionary in the format {"name": name, "parameters": parameters}
+
+        Returns:
+            None or dict: Returns dictionary if return_dict is True, else None.
+        """
+        logger = get_logger(f"{__name__}.describe")
+        if print_name:
+            title = self.name
+            log_subtitle(logger, title)
+        for parameter in self.parameters:
+            parameter_str = ("\t * {} : {}").format(
+                parameter,
+                self.parameters[parameter],
+            )
+            logger.info(parameter_str)
+        if return_dict:
+            component_dict = {"name": self.name}
+            component_dict.update({"parameters": self.parameters})
+            return component_dict
+
+    def save(self, file_path, pickle_protocol=cloudpickle.DEFAULT_PROTOCOL):
+        """Saves component at file path.
+
+        Args:
+            file_path (str): Location to save file.
+            pickle_protocol (int): The pickle data stream format.
+        """
+        with open(file_path, "wb") as f:
+            cloudpickle.dump(self, f, protocol=pickle_protocol)
+
+    @staticmethod
+    def load(file_path):
+        """Loads component at file path.
+
+        Args:
+            file_path (str): Location to load file.
+
+        Returns:
+            ComponentBase object
+        """
+        with open(file_path, "rb") as f:
+            return cloudpickle.load(f)
+
+    def __eq__(self, other):
+        """Check for equality."""
+        if not isinstance(other, self.__class__):
+            return False
+        random_seed_eq = self.random_seed == other.random_seed
+        if not random_seed_eq:
+            return False
+        attributes_to_check = ["_parameters", "_is_fitted"]
+        for attribute in attributes_to_check:
+            if getattr(self, attribute) != getattr(other, attribute):
+                return False
+        return True
+
+    def __str__(self):
+        """String representation of a component."""
+        return self.name
+
+    def __repr__(self):
+        """String representation of a component."""
+        parameters_repr = ", ".join(
+            [f"{key}={safe_repr(value)}" for key, value in self.parameters.items()],
+        )
+        return f"{(type(self).__name__)}({parameters_repr})"
+
+    def update_parameters(self, update_dict, reset_fit=True):
+        """Updates the parameter dictionary of the component.
+
+        Args:
+            update_dict (dict): A dict of parameters to update.
+            reset_fit (bool, optional): If True, will set `_is_fitted` to False.
+        """
+        self._parameters.update(update_dict)
+        if reset_fit:
+            self._is_fitted = False
+
+    def _handle_nullable_types(self, X=None, y=None):
+        """Transforms X and y to remove any incompatible nullable types according to a component's needs.
+
+        Args:
+            X (pd.DataFrame, optional): Input data to a component of shape [n_samples, n_features].
+                May contain nullable types.
+            y (pd.Series, optional): The target of length [n_samples]. May contain nullable types.
+
+        Returns:
+            X, y with any incompatible nullable types downcasted to compatible equivalents.
+        """
+        X_bool_incompatible = "X" in self._boolean_nullable_incompatibilities
+        X_int_incompatible = "X" in self._integer_nullable_incompatibilities
+        if X is not None and (X_bool_incompatible or X_int_incompatible):
+            X = _downcast_nullable_X(
+                X,
+                handle_boolean_nullable=X_bool_incompatible,
+                handle_integer_nullable=X_int_incompatible,
+            )
+
+        y_bool_incompatible = "y" in self._boolean_nullable_incompatibilities
+        y_int_incompatible = "y" in self._integer_nullable_incompatibilities
+        if y is not None and (y_bool_incompatible or y_int_incompatible):
+            y = _downcast_nullable_y(
+                y,
+                handle_boolean_nullable=y_bool_incompatible,
+                handle_integer_nullable=y_int_incompatible,
+            )
+
+        return X, y
\ No newline at end of file
diff --git a/checkmates/pipelines/component_base_meta.py b/checkmates/pipelines/component_base_meta.py
new file mode 100644
index 0000000..a922ad4
--- /dev/null
+++ b/checkmates/pipelines/component_base_meta.py
@@ -0,0 +1,44 @@
+"""Metaclass that overrides creating a new component by wrapping methods with validators and setters."""
+from functools import wraps
+
+from checkmates.exceptions import ComponentNotYetFittedError
+from checkmates.utils.base_meta import BaseMeta
+
+
+class ComponentBaseMeta(BaseMeta):
+    """Metaclass that overrides creating a new component by wrapping methods with validators and setters."""
+
+    @classmethod
+    def check_for_fit(cls, method):
+        """`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
+
+        It raises an exception if `False` and calls and returns the wrapped method if `True`.
+
+        Args:
+            method (callable): Method to wrap.
+
+        Returns:
+            The wrapped method.
+
+        Raises:
+            ComponentNotYetFittedError: If component is not yet fitted.
+        """
+
+        @wraps(method)
+        def _check_for_fit(self, X=None, y=None):
+            klass = type(self).__name__
+            if not self._is_fitted and self.needs_fitting:
+                raise ComponentNotYetFittedError(
+                    f"This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.",
+                )
+            elif method.__name__ == "inverse_transform":
+                # Since inverse transform only takes one argument, the y is actually "called" X in this piece of code.
+                return method(self, X)
+            elif X is None and y is None:
+                return method(self)
+            elif y is None:
+                return method(self, X)
+            else:
+                return method(self, X, y)
+
+        return _check_for_fit
\ No newline at end of file
diff --git a/checkmates/pipelines/components.py b/checkmates/pipelines/components.py
new file mode 100644
index 0000000..a83c61a
--- /dev/null
+++ b/checkmates/pipelines/components.py
@@ -0,0 +1,1089 @@
+"""Initalizes an transformer that selects specified columns in input data."""
+from abc import abstractmethod
+from functools import wraps
+import pandas as pd
+import woodwork as ww
+import warnings
+from sklearn.impute import SimpleImputer as SkImputer
+
+from woodwork.logical_types import Datetime
+from woodwork.statistics_utils import infer_frequency
+
+from checkmates.pipelines.transformers import Transformer
+from checkmates.pipelines.transformers import SimpleImputer
+from checkmates.exceptions import ComponentNotYetFittedError
+from checkmates.pipelines import ComponentBaseMeta
+from checkmates.utils import infer_feature_types
+from checkmates.utils.nullable_type_utils import (
+    _get_new_logical_types_for_imputed_data,
+    _determine_fractional_type,
+    _determine_non_nullable_equivalent,
+)
+
+
+
+class ColumnSelector(Transformer):
+    """Initalizes an transformer that selects specified columns in input data.
+
+    Args:
+        columns (list(string)): List of column names, used to determine which columns to select.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    def __init__(self, columns=None, random_seed=0, **kwargs):
+        if columns and not isinstance(columns, list):
+            raise ValueError(
+                f"Parameter columns must be a list. Received {type(columns)}.",
+            )
+
+        parameters = {"columns": columns}
+        parameters.update(kwargs)
+        super().__init__(
+            parameters=parameters,
+            component_obj=None,
+            random_seed=random_seed,
+        )
+
+    def _check_input_for_columns(self, X):
+        cols = self.parameters.get("columns") or []
+        column_names = X.columns
+
+        missing_cols = set(cols) - set(column_names)
+        if missing_cols:
+            raise ValueError(f"Columns of type {missing_cols} not found in input data.")
+
+    @abstractmethod
+    def _modify_columns(self, cols, X, y=None):
+        """How the transformer modifies the columns of the input data."""
+
+    def fit(self, X, y=None):
+        """Fits the transformer by checking if column names are present in the dataset.
+
+        Args:
+            X (pd.DataFrame): Data to check.
+            y (pd.Series, ignored): Targets.
+
+        Returns:
+            self
+        """
+        X = infer_feature_types(X)
+        self._check_input_for_columns(X)
+        return self
+
+    def transform(self, X, y=None):
+        """Transform data using fitted column selector component.
+
+        Args:
+            X (pd.DataFrame): The input training data of shape [n_samples, n_features].
+            y (pd.Series, optional): The target training data of length [n_samples].
+
+        Returns:
+            pd.DataFrame: Transformed data.
+        """
+        X = infer_feature_types(X)
+        self._check_input_for_columns(X)
+        cols = self.parameters.get("columns") or []
+        modified_cols = self._modify_columns(cols, X, y)
+        return infer_feature_types(modified_cols)
+
+
+class DropColumns(ColumnSelector):
+    """Drops specified columns in input data.
+
+    Args:
+        columns (list(string)): List of column names, used to determine which columns to drop.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    name = "Drop Columns Transformer"
+    hyperparameter_ranges = {}
+    """{}"""
+    needs_fitting = False
+
+    def _check_input_for_columns(self, X):
+        pass
+
+    def _modify_columns(self, cols, X, y=None):
+        column_intersection = list(set(cols).intersection(X.columns))
+        return X.ww.drop(column_intersection)
+
+    def transform(self, X, y=None):
+        """Transforms data X by dropping columns.
+
+        Args:
+            X (pd.DataFrame): Data to transform.
+            y (pd.Series, optional): Targets.
+
+        Returns:
+            pd.DataFrame: Transformed X.
+        """
+        return super().transform(X, y)
+
+
+class SelectColumns(ColumnSelector):
+    """Selects specified columns in input data.
+
+    Args:
+        columns (list(string)): List of column names, used to determine which columns to select. If columns are not present, they will not be selected.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    name = "Select Columns Transformer"
+    hyperparameter_ranges = {}
+    """{}"""
+    needs_fitting = False
+
+    def _check_input_for_columns(self, X):
+        pass
+
+    def fit(self, X, y=None):
+        """Fits the transformer by checking if column names are present in the dataset.
+
+        Args:
+            X (pd.DataFrame): Data to check.
+            y (pd.Series, optional): Targets.
+
+        Returns:
+            self
+        """
+        return self
+
+    def _modify_columns(self, cols, X, y=None):
+        column_intersection = list(
+            sorted(set(cols).intersection(X.columns), key=cols.index),
+        )
+        return X.ww[column_intersection]
+
+
+class SelectByType(Transformer):
+    """Selects columns by specified Woodwork logical type or semantic tag in input data.
+
+    Args:
+        column_types (string, ww.LogicalType, list(string), list(ww.LogicalType)): List of Woodwork types or tags, used to determine which columns to select or exclude.
+        exclude (bool): If true, exclude the column_types instead of including them. Defaults to False.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    name = "Select Columns By Type Transformer"
+    hyperparameter_ranges = {}
+    """{}"""
+    needs_fitting = False
+
+    def __init__(self, column_types=None, exclude=False, random_seed=0, **kwargs):
+        parameters = {"column_types": column_types, "exclude": exclude}
+        parameters.update(kwargs)
+        super().__init__(
+            parameters=parameters,
+            component_obj=None,
+            random_seed=random_seed,
+        )
+
+    def _modify_columns(self, cols, X, y=None):
+        if self.parameters.get("exclude"):
+            return X.ww.select(exclude=cols)
+        return X.ww.select(include=cols)
+
+    def fit(self, X, y=None):
+        """Fits the transformer by checking if column names are present in the dataset.
+
+        Args:
+            X (pd.DataFrame): Data to check.
+            y (pd.Series, ignored): Targets.
+
+        Returns:
+            self
+        """
+        X = infer_feature_types(X)
+        return self
+
+    def transform(self, X, y=None):
+        """Transforms data X by selecting columns.
+
+        Args:
+            X (pd.DataFrame): Data to transform.
+            y (pd.Series, optional): Targets.
+
+        Returns:
+            pd.DataFrame: Transformed X.
+        """
+        X = infer_feature_types(X)
+        cols = self.parameters.get("column_types") or []
+        modified_cols = self._modify_columns(cols, X, y)
+        return infer_feature_types(modified_cols)
+
+"""Transformer to drop rows specified by row indices."""
+
+class DropRowsTransformer(Transformer):
+    """Transformer to drop rows specified by row indices.
+
+    Args:
+        indices_to_drop (list): List of indices to drop in the input data. Defaults to None.
+        random_seed (int): Seed for the random number generator. Is not used by this component. Defaults to 0.
+    """
+
+    name = "Drop Rows Transformer"
+    modifies_target = True
+    training_only = True
+    hyperparameter_ranges = {}
+    """{}"""
+
+    def __init__(self, indices_to_drop=None, random_seed=0):
+        if indices_to_drop is not None and len(set(indices_to_drop)) != len(
+            indices_to_drop,
+        ):
+            raise ValueError("All input indices must be unique.")
+        self.indices_to_drop = indices_to_drop
+        parameters = {"indices_to_drop": self.indices_to_drop}
+        super().__init__(
+            parameters=parameters,
+            component_obj=None,
+            random_seed=random_seed,
+        )
+
+    def fit(self, X, y=None):
+        """Fits component to data.
+
+        Args:
+            X (pd.DataFrame): The input training data of shape [n_samples, n_features].
+            y (pd.Series, optional): The target training data of length [n_samples].
+
+        Returns:
+            self
+
+        Raises:
+            ValueError: If indices to drop do not exist in input features or target.
+        """
+        X_t = infer_feature_types(X)
+        y_t = infer_feature_types(y) if y is not None else None
+        if self.indices_to_drop is not None:
+            indices_to_drop_set = set(self.indices_to_drop)
+            missing_X_indices = indices_to_drop_set.difference(set(X_t.index))
+            missing_y_indices = (
+                indices_to_drop_set.difference(set(y_t.index))
+                if y_t is not None
+                else None
+            )
+            if len(missing_X_indices):
+                raise ValueError(
+                    "Indices [{}] do not exist in input features".format(
+                        list(missing_X_indices),
+                    ),
+                )
+            elif y_t is not None and len(missing_y_indices):
+                raise ValueError(
+                    "Indices [{}] do not exist in input target".format(
+                        list(missing_y_indices),
+                    ),
+                )
+        return self
+
+    def transform(self, X, y=None):
+        """Transforms data using fitted component.
+
+        Args:
+            X (pd.DataFrame): Features.
+            y (pd.Series, optional): Target data.
+
+        Returns:
+            (pd.DataFrame, pd.Series): Data with row indices dropped.
+        """
+        X_t = infer_feature_types(X)
+        y_t = infer_feature_types(y) if y is not None else None
+        if self.indices_to_drop is None or len(self.indices_to_drop) == 0:
+            return X_t, y_t
+        schema = X_t.ww.schema
+
+        X_t = X_t.drop(self.indices_to_drop, axis=0)
+        X_t.ww.init(schema=schema)
+
+        if y_t is not None:
+            y_t = y_t.ww.drop(self.indices_to_drop)
+        return X_t, y_t
+
+"""Component that imputes missing data according to a specified imputation strategy per column."""
+
+class PerColumnImputer(Transformer):
+    """Imputes missing data according to a specified imputation strategy per column.
+
+    Args:
+        impute_strategies (dict): Column and {"impute_strategy": strategy, "fill_value":value} pairings.
+            Valid values for impute strategy include "mean", "median", "most_frequent", "constant" for numerical data,
+            and "most_frequent", "constant" for object data types. Defaults to None, which uses "most_frequent" for all columns.
+            When impute_strategy == "constant", fill_value is used to replace missing data.
+            When None, uses 0 when imputing numerical data and "missing_value" for strings or object data types.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    name = "Per Column Imputer"
+    hyperparameter_ranges = {}
+    """{}"""
+
+    def __init__(
+        self,
+        impute_strategies=None,
+        random_seed=0,
+        **kwargs,
+    ):
+        parameters = {
+            "impute_strategies": impute_strategies,
+        }
+        self.imputers = None
+        self.impute_strategies = impute_strategies or dict()
+        if not isinstance(self.impute_strategies, dict):
+            raise ValueError(
+                "`impute_strategies` is not a dictionary. Please provide in Column and {`impute_strategy`: strategy, `fill_value`:value} pairs. ",
+            )
+        super().__init__(
+            parameters=parameters,
+            component_obj=None,
+            random_seed=random_seed,
+        )
+
+    def fit(self, X, y=None):
+        """Fits imputers on input data.
+
+        Args:
+            X (pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features] to fit.
+            y (pd.Series, optional): The target training data of length [n_samples]. Ignored.
+
+        Returns:
+            self
+        """
+        X = infer_feature_types(X)
+        self.imputers = dict()
+
+        columns_to_impute = self.impute_strategies.keys()
+        if len(columns_to_impute) == 0:
+            warnings.warn(
+                "No columns to impute. Please check `impute_strategies` parameter.",
+            )
+
+        for column in columns_to_impute:
+            strategy_dict = self.impute_strategies.get(column, dict())
+            strategy = strategy_dict["impute_strategy"]
+            fill_value = strategy_dict.get("fill_value", None)
+            self.imputers[column] = SimpleImputer(
+                impute_strategy=strategy,
+                fill_value=fill_value,
+            )
+
+        for column, imputer in self.imputers.items():
+            imputer.fit(X.ww[[column]])
+
+        return self
+
+    def transform(self, X, y=None):
+        """Transforms input data by imputing missing values.
+
+        Args:
+            X (pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features] to transform.
+            y (pd.Series, optional): The target training data of length [n_samples]. Ignored.
+
+        Returns:
+            pd.DataFrame: Transformed X
+        """
+        X_ww = infer_feature_types(X)
+        original_schema = X_ww.ww.schema
+
+        cols_to_drop = []
+        for column, imputer in self.imputers.items():
+            transformed = imputer.transform(X_ww.ww[[column]])
+            if transformed.empty:
+                cols_to_drop.append(column)
+            else:
+                X_ww.ww[column] = transformed[column]
+        X_t = X_ww.ww.drop(cols_to_drop)
+        X_t.ww.init(schema=original_schema.get_subset_schema(X_t.columns))
+        return X_t
+
+"""Component that imputes missing target data according to a specified imputation strategy."""
+
+class TargetImputerMeta(ComponentBaseMeta):
+    """A version of the ComponentBaseMeta class which handles when input features is None."""
+
+    @classmethod
+    def check_for_fit(cls, method):
+        """`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
+
+        Args:
+            method (callable): Method to wrap.
+
+        Raises:
+            ComponentNotYetFittedError: If component is not fitted.
+
+        Returns:
+            The wrapped input method.
+        """
+
+        @wraps(method)
+        def _check_for_fit(self, X=None, y=None):
+            klass = type(self).__name__
+            if not self._is_fitted and self.needs_fitting:
+                raise ComponentNotYetFittedError(
+                    f"This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.",
+                )
+            else:
+                return method(self, X, y)
+
+        return _check_for_fit
+
+
+class TargetImputer(Transformer, metaclass=TargetImputerMeta):
+    """Imputes missing target data according to a specified imputation strategy.
+
+    Args:
+        impute_strategy (string): Impute strategy to use. Valid values include "mean", "median", "most_frequent", "constant" for
+           numerical data, and "most_frequent", "constant" for object data types. Defaults to "most_frequent".
+        fill_value (string): When impute_strategy == "constant", fill_value is used to replace missing data.
+           Defaults to None which uses 0 when imputing numerical data and "missing_value" for strings or object data types.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    name = "Target Imputer"
+    hyperparameter_ranges = {"impute_strategy": ["mean", "median", "most_frequent"]}
+    """{
+        "impute_strategy": ["mean", "median", "most_frequent"]
+    }"""
+    modifies_features = False
+    modifies_target = True
+
+    def __init__(
+        self, impute_strategy="most_frequent", fill_value=None, random_seed=0, **kwargs
+    ):
+        parameters = {"impute_strategy": impute_strategy, "fill_value": fill_value}
+        parameters.update(kwargs)
+        imputer = SkImputer(strategy=impute_strategy, fill_value=fill_value, **kwargs)
+        super().__init__(
+            parameters=parameters,
+            component_obj=imputer,
+            random_seed=random_seed,
+        )
+
+    def fit(self, X, y):
+        """Fits imputer to target data. 'None' values are converted to np.nan before imputation and are treated as the same.
+
+        Args:
+            X (pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features]. Ignored.
+            y (pd.Series, optional): The target training data of length [n_samples].
+
+        Returns:
+            self
+
+        Raises:
+            TypeError: If target is filled with all null values.
+        """
+        if y is None:
+            return self
+        y = infer_feature_types(y)
+        if all(y.isnull()):
+            raise TypeError("Provided target full of nulls.")
+        y = y.to_frame()
+
+        # Return early if all the columns are bool dtype, which will never have null values
+        if (y.dtypes == bool).all():
+            return y
+
+        self._component_obj.fit(y)
+        return self
+
+    def transform(self, X, y):
+        """Transforms input target data by imputing missing values. 'None' and np.nan values are treated as the same.
+
+        Args:
+            X (pd.DataFrame): Features. Ignored.
+            y (pd.Series): Target data to impute.
+
+        Returns:
+            (pd.DataFrame, pd.Series): The original X, transformed y
+        """
+        if X is not None:
+            X = infer_feature_types(X)
+        if y is None:
+            return X, None
+        y_ww = infer_feature_types(y)
+        y_df = y_ww.ww.to_frame()
+
+        # Return early if all the columns are bool dtype, which will never have null values
+        if (y_df.dtypes == bool).all():
+            return X, y_ww
+
+        transformed = self._component_obj.transform(y_df)
+        y_t = pd.Series(transformed[:, 0], index=y_ww.index)
+
+        # Determine logical type to use - should match input data where possible
+        new_logical_type_dict = _get_new_logical_types_for_imputed_data(
+            self.parameters["impute_strategy"],
+            y_df.ww.schema,
+        )
+        new_logical_type = list(new_logical_type_dict.values())[0]
+
+        return X, ww.init_series(y_t, logical_type=new_logical_type)
+
+    def fit_transform(self, X, y):
+        """Fits on and transforms the input target data.
+
+        Args:
+            X (pd.DataFrame): Features. Ignored.
+            y (pd.Series): Target data to impute.
+
+        Returns:
+            (pd.DataFrame, pd.Series): The original X, transformed y
+        """
+        return self.fit(X, y).transform(X, y)
+
+"""Component that imputes missing data according to a specified timeseries-specific imputation strategy."""
+import pandas as pd
+import woodwork as ww
+from woodwork.logical_types import (
+    BooleanNullable,
+    Double,
+)
+
+class TimeSeriesImputer(Transformer):
+    """Imputes missing data according to a specified timeseries-specific imputation strategy.
+
+    This Transformer should be used after the `TimeSeriesRegularizer` in order to impute the missing values that were
+    added to X and y (if passed).
+
+    Args:
+        categorical_impute_strategy (string): Impute strategy to use for string, object, boolean, categorical dtypes.
+            Valid values include "backwards_fill" and "forwards_fill". Defaults to "forwards_fill".
+        numeric_impute_strategy (string): Impute strategy to use for numeric columns. Valid values include
+            "backwards_fill", "forwards_fill", and "interpolate". Defaults to "interpolate".
+        target_impute_strategy (string): Impute strategy to use for the target column. Valid values include
+            "backwards_fill", "forwards_fill", and "interpolate". Defaults to "forwards_fill".
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+
+    Raises:
+        ValueError: If categorical_impute_strategy, numeric_impute_strategy, or target_impute_strategy is not one of the valid values.
+    """
+
+    modifies_features = True
+    modifies_target = True
+    training_only = True
+
+    name = "Time Series Imputer"
+    hyperparameter_ranges = {
+        "categorical_impute_strategy": ["backwards_fill", "forwards_fill"],
+        "numeric_impute_strategy": ["backwards_fill", "forwards_fill", "interpolate"],
+        "target_impute_strategy": ["backwards_fill", "forwards_fill", "interpolate"],
+    }
+    """{
+        "categorical_impute_strategy": ["backwards_fill", "forwards_fill"],
+        "numeric_impute_strategy": ["backwards_fill", "forwards_fill", "interpolate"],
+        "target_impute_strategy": ["backwards_fill", "forwards_fill", "interpolate"],
+    }"""
+    _valid_categorical_impute_strategies = set(["backwards_fill", "forwards_fill"])
+    _valid_numeric_impute_strategies = set(
+        ["backwards_fill", "forwards_fill", "interpolate"],
+    )
+    _valid_target_impute_strategies = set(
+        ["backwards_fill", "forwards_fill", "interpolate"],
+    )
+
+    # Incompatibility: https://github.com/alteryx/evalml/issues/4001
+    # TODO: Remove when support is added https://github.com/alteryx/evalml/issues/4014
+    _integer_nullable_incompatibilities = ["X", "y"]
+    _boolean_nullable_incompatibilities = ["y"]
+
+    def __init__(
+        self,
+        categorical_impute_strategy="forwards_fill",
+        numeric_impute_strategy="interpolate",
+        target_impute_strategy="forwards_fill",
+        random_seed=0,
+        **kwargs,
+    ):
+        if categorical_impute_strategy not in self._valid_categorical_impute_strategies:
+            raise ValueError(
+                f"{categorical_impute_strategy} is an invalid parameter. Valid categorical impute strategies are {', '.join(self._valid_numeric_impute_strategies)}",
+            )
+        elif numeric_impute_strategy not in self._valid_numeric_impute_strategies:
+            raise ValueError(
+                f"{numeric_impute_strategy} is an invalid parameter. Valid numeric impute strategies are {', '.join(self._valid_numeric_impute_strategies)}",
+            )
+        elif target_impute_strategy not in self._valid_target_impute_strategies:
+            raise ValueError(
+                f"{target_impute_strategy} is an invalid parameter. Valid target column impute strategies are {', '.join(self._valid_target_impute_strategies)}",
+            )
+
+        parameters = {
+            "categorical_impute_strategy": categorical_impute_strategy,
+            "numeric_impute_strategy": numeric_impute_strategy,
+            "target_impute_strategy": target_impute_strategy,
+        }
+        parameters.update(kwargs)
+        self._all_null_cols = None
+        self._forwards_cols = None
+        self._backwards_cols = None
+        self._interpolate_cols = None
+        self._impute_target = None
+        super().__init__(
+            parameters=parameters,
+            component_obj=None,
+            random_seed=random_seed,
+        )
+
+    def fit(self, X, y=None):
+        """Fits imputer to data.
+
+        'None' values are converted to np.nan before imputation and are treated as the same.
+        If a value is missing at the beginning or end of a column, that value will be imputed using
+        backwards fill or forwards fill as necessary, respectively.
+
+        Args:
+            X (pd.DataFrame, np.ndarray): The input training data of shape [n_samples, n_features]
+            y (pd.Series, optional): The target training data of length [n_samples]
+
+        Returns:
+            self
+        """
+        X = infer_feature_types(X)
+
+        nan_ratio = X.isna().sum() / X.shape[0]
+        self._all_null_cols = nan_ratio[nan_ratio == 1].index.tolist()
+
+        def _filter_cols(impute_strat, X):
+            """Function to return which columns of the dataset to impute given the impute strategy."""
+            cols = []
+            if self.parameters["categorical_impute_strategy"] == impute_strat:
+                if self.parameters["numeric_impute_strategy"] == impute_strat:
+                    cols = list(X.columns)
+                else:
+                    cols = list(X.ww.select(exclude=["numeric"]).columns)
+            elif self.parameters["numeric_impute_strategy"] == impute_strat:
+                cols = list(X.ww.select(include=["numeric"]).columns)
+
+            X_cols = [col for col in cols if col not in self._all_null_cols]
+            if len(X_cols) > 0:
+                return X_cols
+
+        self._forwards_cols = _filter_cols("forwards_fill", X)
+        self._backwards_cols = _filter_cols("backwards_fill", X)
+        self._interpolate_cols = _filter_cols("interpolate", X)
+
+        if y is not None:
+            y = infer_feature_types(y)
+            if y.isnull().any():
+                self._impute_target = self.parameters["target_impute_strategy"]
+
+        return self
+
+    def transform(self, X, y=None):
+        """Transforms data X by imputing missing values using specified timeseries-specific strategies. 'None' values are converted to np.nan before imputation and are treated as the same.
+
+        Args:
+            X (pd.DataFrame): Data to transform.
+            y (pd.Series, optional): Optionally, target data to transform.
+
+        Returns:
+            pd.DataFrame: Transformed X and y
+        """
+        if len(self._all_null_cols) == X.shape[1]:
+            df = pd.DataFrame(index=X.index)
+            df.ww.init()
+            return df, y
+        X = infer_feature_types(X)
+        if y is not None:
+            y = infer_feature_types(y)
+
+        # This will change the logical type of BooleanNullable/IntegerNullable/AgeNullable columns with nans
+        # so we save the original schema to recreate it where possible after imputation
+        original_schema = X.ww.schema
+        X, y = self._handle_nullable_types(X, y)
+
+        X_not_all_null = X.ww.drop(self._all_null_cols)
+
+        # Because the TimeSeriesImputer is always used with the TimeSeriesRegularizer,
+        # many of the columns containing nans may have originally been non nullable logical types.
+        # We will use the non nullable equivalents where possible
+        original_schema = original_schema.get_subset_schema(
+            list(X_not_all_null.columns),
+        )
+        new_ltypes = {
+            col: _determine_non_nullable_equivalent(ltype)
+            for col, ltype in original_schema.logical_types.items()
+        }
+
+        if self._forwards_cols is not None:
+            X_forward = X[self._forwards_cols]
+            imputed = X_forward.pad()
+            imputed.bfill(inplace=True)  # Fill in the first value, if missing
+            X_not_all_null[X_forward.columns] = imputed
+
+        if self._backwards_cols is not None:
+            X_backward = X[self._backwards_cols]
+            imputed = X_backward.bfill()
+            imputed.pad(inplace=True)  # Fill in the last value, if missing
+            X_not_all_null[X_backward.columns] = imputed
+
+        if self._interpolate_cols is not None:
+            X_interpolate = X_not_all_null[self._interpolate_cols]
+            imputed = X_interpolate.interpolate()
+            imputed.bfill(inplace=True)  # Fill in the first value, if missing
+            X_not_all_null[X_interpolate.columns] = imputed
+
+            # Interpolate may add floating point values to integer data, so we
+            # have to update those logical types from the ones passed in to a fractional type
+            # Note we ignore all other types of columns to maintain the types specified above
+            int_cols_to_update = original_schema._filter_cols(
+                include=["IntegerNullable", "AgeNullable"],
+            )
+            new_int_ltypes = {
+                col: _determine_fractional_type(ltype)
+                for col, ltype in original_schema.logical_types.items()
+                if col in int_cols_to_update
+            }
+            new_ltypes.update(new_int_ltypes)
+        X_not_all_null.ww.init(schema=original_schema, logical_types=new_ltypes)
+
+        y_imputed = pd.Series(y)
+        if y is not None and len(y) > 0:
+            if self._impute_target == "forwards_fill":
+                y_imputed = y.pad()
+                y_imputed.bfill(inplace=True)
+            elif self._impute_target == "backwards_fill":
+                y_imputed = y.bfill()
+                y_imputed.pad(inplace=True)
+            elif self._impute_target == "interpolate":
+                y_imputed = y.interpolate()
+                y_imputed.bfill(inplace=True)
+            # Re-initialize woodwork with the downcast logical type
+            y_imputed = ww.init_series(y_imputed, logical_type=y.ww.logical_type)
+
+        return X_not_all_null, y_imputed
+
+    def _handle_nullable_types(self, X=None, y=None):
+        """Transforms X and y to remove any incompatible nullable types for the time series imputer when the interpolate method is used.
+
+        Args:
+            X (pd.DataFrame, optional): Input data to a component of shape [n_samples, n_features].
+                May contain nullable types.
+            y (pd.Series, optional): The target of length [n_samples]. May contain nullable types.
+
+        Returns:
+            X, y with any incompatible nullable types downcasted to compatible equivalents when interpolate is used. Is NoOp otherwise.
+        """
+        if self._impute_target == "interpolate":
+            # For BooleanNullable, we have to avoid Categorical columns
+            # since the category dtype also has incompatibilities with linear interpolate, which is expected
+            if isinstance(y.ww.logical_type, BooleanNullable):
+                y = ww.init_series(y, Double)
+            else:
+                _, y = super()._handle_nullable_types(None, y)
+        if self._interpolate_cols is not None:
+            X, _ = super()._handle_nullable_types(X, None)
+
+        return X, y
+
+"""Transformer that regularizes a dataset with an uninferrable offset frequency for time series problems."""
+
+class TimeSeriesRegularizer(Transformer):
+    """Transformer that regularizes an inconsistently spaced datetime column.
+
+    If X is passed in to fit/transform, the column `time_index` will be checked for an inferrable offset frequency. If
+    the `time_index` column is perfectly inferrable then this Transformer will do nothing and return the original X and y.
+
+    If X does not have a perfectly inferrable frequency but one can be estimated, then X and y will be reformatted based
+    on the estimated frequency for `time_index`. In the original X and y passed:
+    - Missing datetime values will be added and will have their corresponding columns in X and y set to None.
+    - Duplicate datetime values will be dropped.
+    - Extra datetime values will be dropped.
+    - If it can be determined that a duplicate or extra value is misaligned, then it will be repositioned to take the
+    place of a missing value.
+
+    This Transformer should be used before the `TimeSeriesImputer` in order to impute the missing values that were
+    added to X and y (if passed).
+
+    Args:
+        time_index (string): Name of the column containing the datetime information used to order the data, required. Defaults to None.
+        frequency_payload (tuple): Payload returned from Woodwork's infer_frequency function where debug is True. Defaults to None.
+        window_length (int): The size of the rolling window over which inference is conducted to determine the prevalence of uninferrable frequencies.
+        Lower values make this component more sensitive to recognizing numerous faulty datetime values. Defaults to 5.
+        threshold (float): The minimum percentage of windows that need to have been able to infer a frequency. Lower values make this component more
+        sensitive to recognizing numerous faulty datetime values. Defaults to 0.8.
+        random_seed (int): Seed for the random number generator. This transformer performs the same regardless of the random seed provided.
+        Defaults to 0.
+
+    Raises:
+        ValueError: if the frequency_payload parameter has not been passed a tuple
+    """
+
+    name = "Time Series Regularizer"
+    hyperparameter_ranges = {}
+    """{}"""
+
+    modifies_target = True
+    training_only = True
+
+    def __init__(
+        self,
+        time_index=None,
+        frequency_payload=None,
+        window_length=4,
+        threshold=0.4,
+        random_seed=0,
+        **kwargs,
+    ):
+        self.time_index = time_index
+        self.frequency_payload = frequency_payload
+        self.window_length = window_length
+        self.threshold = threshold
+        self.error_dict = {}
+        self.inferred_freq = None
+        self.debug_payload = None
+
+        if self.frequency_payload and not isinstance(self.frequency_payload, tuple):
+            raise ValueError(
+                "The frequency_payload parameter must be a tuple returned from Woodwork's infer_frequency function where debug is True.",
+            )
+
+        parameters = {
+            "time_index": time_index,
+            "window_length": window_length,
+            "threshold": threshold,
+        }
+        parameters.update(kwargs)
+
+        super().__init__(parameters=parameters, random_seed=random_seed)
+
+    def fit(self, X, y=None):
+        """Fits the TimeSeriesRegularizer.
+
+        Args:
+            X (pd.DataFrame): The input training data of shape [n_samples, n_features].
+            y (pd.Series, optional): The target training data of length [n_samples].
+
+        Returns:
+            self
+
+        Raises:
+            ValueError: if self.time_index is None, if X and y have different lengths, if `time_index` in X does not
+                        have an offset frequency that can be estimated
+            TypeError: if the `time_index` column is not of type Datetime
+            KeyError: if the `time_index` column doesn't exist
+        """
+        if self.time_index is None:
+            raise ValueError("The argument time_index cannot be None!")
+        elif self.time_index not in X.columns:
+            raise KeyError(
+                f"The time_index column `{self.time_index}` does not exist in X!",
+            )
+
+        X_ww = infer_feature_types(X)
+
+        if not isinstance(X_ww.ww.logical_types[self.time_index], Datetime):
+            raise TypeError(
+                f"The time_index column `{self.time_index}` must be of type Datetime.",
+            )
+
+        if y is not None:
+            y = infer_feature_types(y)
+            if len(X_ww) != len(y):
+                raise ValueError(
+                    "If y has been passed, then it must be the same length as X.",
+                )
+
+        if self.frequency_payload:
+            ww_payload = self.frequency_payload
+        else:
+            ww_payload = infer_frequency(
+                X_ww[self.time_index],
+                debug=True,
+                window_length=self.window_length,
+                threshold=self.threshold,
+            )
+        self.inferred_freq = ww_payload[0]
+        self.debug_payload = ww_payload[1]
+
+        if self.inferred_freq is not None:
+            return self
+
+        if (
+            self.debug_payload["estimated_freq"] is None
+        ):  # If even WW can't infer the frequency
+            raise ValueError(
+                f"The column {self.time_index} does not have a frequency that can be inferred.",
+            )
+
+        estimated_freq = self.debug_payload["estimated_freq"]
+        duplicates = self.debug_payload["duplicate_values"]
+        missing = self.debug_payload["missing_values"]
+        extra = self.debug_payload["extra_values"]
+        nan = self.debug_payload["nan_values"]
+
+        self.error_dict = self._identify_indices(
+            self.time_index,
+            X_ww,
+            estimated_freq,
+            duplicates,
+            missing,
+            extra,
+            nan,
+        )
+
+        return self
+
+    @staticmethod
+    def _identify_indices(
+        time_index,
+        X,
+        estimated_freq,
+        duplicates,
+        missing,
+        extra,
+        nan,
+    ):
+        """Identifies which of the problematic indices is actually misaligned.
+
+        Args:
+            time_index (str): The column name of the datetime values to consider.
+            X (pd.DataFrame): The input training data of shape [n_samples, n_features].
+            estimated_freq (str): The estimated frequency of the `time_index` column.
+            duplicates (list): Payload information regarding the duplicate values.
+            missing (list): Payload information regarding the missing values.
+            extra (list): Payload information regarding the extra values.
+            nan (list): Payload information regarding the nan values.
+
+        Returns:
+            (dict): A dictionary of the duplicate, missing, extra, and misaligned indices and their datetime values.
+        """
+        error_dict = {
+            "duplicate": {},
+            "missing": {},
+            "extra": {},
+            "nan": {},
+            "misaligned": {},
+        }
+
+        # Adds the indices for the consecutive range of missing, duplicate, and extra values
+        for each_missing in missing:
+            # Needed to recreate what the missing datetime values would have been
+            temp_dates = pd.date_range(
+                pd.to_datetime(each_missing["dt"]),
+                freq=estimated_freq,
+                periods=each_missing["range"],
+            )
+            for each_range in range(each_missing["range"]):
+                error_dict["missing"][each_missing["idx"] + each_range] = temp_dates[
+                    each_range
+                ]
+
+        for each_duplicate in duplicates:
+            for each_range in range(each_duplicate["range"]):
+                error_dict["duplicate"][
+                    each_duplicate["idx"] + each_range
+                ] = pd.to_datetime(each_duplicate["dt"])
+
+        for each_extra in extra:
+            for each_range in range(each_extra["range"]):
+                error_dict["extra"][each_extra["idx"] + each_range] = X.iloc[
+                    each_extra["idx"] + each_range
+                ][time_index]
+
+        for each_nan in nan:
+            for each_range in range(each_nan["range"]):
+                error_dict["nan"][each_nan["idx"] + each_range] = "No Value"
+
+        # Identify which of the duplicate/extra values in conjunction with the missing values are actually misaligned
+        for ind_missing, missing_value in error_dict["missing"].items():
+            temp_range = pd.date_range(missing_value, freq=estimated_freq, periods=3)
+            window_range = temp_range[1] - temp_range[0]
+            missing_range = [missing_value - window_range, missing_value + window_range]
+            for ind_duplicate, duplicate_value in error_dict["duplicate"].items():
+                if (
+                    duplicate_value is not None
+                    and missing_range[0] <= duplicate_value <= missing_range[1]
+                ):
+                    error_dict["misaligned"][ind_duplicate] = {
+                        "incorrect": duplicate_value,
+                        "correct": missing_value,
+                    }
+                    error_dict["duplicate"][ind_duplicate] = None
+                    error_dict["missing"][ind_missing] = None
+                    break
+            for ind_extra, extra_value in error_dict["extra"].items():
+                if (
+                    extra_value is not None
+                    and missing_range[0] <= extra_value <= missing_range[1]
+                ):
+                    error_dict["misaligned"][ind_extra] = {
+                        "incorrect": extra_value,
+                        "correct": missing_value,
+                    }
+                    error_dict["extra"][ind_extra] = None
+                    error_dict["missing"][ind_missing] = None
+                    break
+
+        final_error_dict = {
+            "duplicate": {},
+            "missing": {},
+            "extra": {},
+            "nan": {},
+            "misaligned": {},
+        }
+        # Remove duplicate/extra/missing values that were identified as misaligned
+        for type_, type_inds in error_dict.items():
+            new_type_inds = {
+                ind_: date_ for ind_, date_ in type_inds.items() if date_ is not None
+            }
+            final_error_dict[type_] = new_type_inds
+
+        return final_error_dict
+
+    def transform(self, X, y=None):
+        """Regularizes a dataframe and target data to an inferrable offset frequency.
+
+        A 'clean' X and y (if y was passed in) are created based on an inferrable offset frequency and matching datetime values
+        with the original X and y are imputed into the clean X and y. Datetime values identified as misaligned are
+        shifted into their appropriate position.
+
+        Args:
+            X (pd.DataFrame): The input training data of shape [n_samples, n_features].
+            y (pd.Series, optional): The target training data of length [n_samples].
+
+        Returns:
+            (pd.DataFrame, pd.Series): Data with an inferrable `time_index` offset frequency.
+        """
+        if self.inferred_freq is not None:
+            return X, y
+
+        # The cleaned df will begin at the range determined by estimated_range_start, which will result
+        # in dropping of the first consecutive faulty values in the dataset.
+        cleaned_df = pd.DataFrame(
+            {
+                self.time_index: pd.date_range(
+                    self.debug_payload["estimated_range_start"],
+                    self.debug_payload["estimated_range_end"],
+                    freq=self.debug_payload["estimated_freq"],
+                ),
+            },
+        )
+
+        cleaned_x = cleaned_df.merge(X, on=[self.time_index], how="left")
+        cleaned_x = cleaned_x.groupby(self.time_index).first().reset_index()
+
+        cleaned_y = None
+        if y is not None:
+            y_dates = pd.DataFrame({self.time_index: X[self.time_index], "target": y})
+            cleaned_y = cleaned_df.merge(y_dates, on=[self.time_index], how="left")
+            cleaned_y = cleaned_y.groupby(self.time_index).first().reset_index()
+
+        for index, values in self.error_dict["misaligned"].items():
+            to_replace = X.iloc[index]
+            to_replace[self.time_index] = values["correct"]
+            cleaned_x.loc[
+                cleaned_x[self.time_index] == values["correct"]
+            ] = to_replace.values
+            if y is not None:
+                cleaned_y.loc[cleaned_y[self.time_index] == values["correct"]] = y.iloc[
+                    index
+                ]
+
+        if cleaned_y is not None:
+            cleaned_y = cleaned_y["target"]
+            cleaned_y = ww.init_series(cleaned_y)
+
+        cleaned_x.ww.init()
+
+        return cleaned_x, cleaned_y
\ No newline at end of file
diff --git a/checkmates/pipelines/training_validation_split.py b/checkmates/pipelines/training_validation_split.py
new file mode 100644
index 0000000..3d22f8f
--- /dev/null
+++ b/checkmates/pipelines/training_validation_split.py
@@ -0,0 +1,102 @@
+"""Training Validation Split class."""
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection._split import BaseCrossValidator
+
+
+class TrainingValidationSplit(BaseCrossValidator):
+    """Split the training data into training and validation sets.
+
+    Args:
+        test_size (float): What percentage of data points should be included in the validation
+            set. Defalts to the complement of `train_size` if `train_size` is set, and 0.25 otherwise.
+        train_size (float): What percentage of data points should be included in the training set.
+            Defaults to the complement of `test_size`
+        shuffle (boolean): Whether to shuffle the data before splitting. Defaults to False.
+        stratify (list): Splits the data in a stratified fashion, using this argument as class labels.
+            Defaults to None.
+        random_seed (int): The seed to use for random sampling. Defaults to 0.
+
+    Examples:
+        >>> import numpy as np
+        >>> import pandas as pd
+        ...
+        >>> X = pd.DataFrame([i for i in range(10)], columns=["First"])
+        >>> y = pd.Series([i for i in range(10)])
+        ...
+        >>> tv_split = TrainingValidationSplit()
+        >>> split_ = next(tv_split.split(X, y))
+        >>> assert (split_[0] == np.array([0, 1, 2, 3, 4, 5, 6])).all()
+        >>> assert (split_[1] == np.array([7, 8, 9])).all()
+        ...
+        ...
+        >>> tv_split = TrainingValidationSplit(test_size=0.5)
+        >>> split_ = next(tv_split.split(X, y))
+        >>> assert (split_[0] == np.array([0, 1, 2, 3, 4])).all()
+        >>> assert (split_[1] == np.array([5, 6, 7, 8, 9])).all()
+        ...
+        ...
+        >>> tv_split = TrainingValidationSplit(shuffle=True)
+        >>> split_ = next(tv_split.split(X, y))
+        >>> assert (split_[0] == np.array([9, 1, 6, 7, 3, 0, 5])).all()
+        >>> assert (split_[1] == np.array([2, 8, 4])).all()
+        ...
+        ...
+        >>> y = pd.Series([i % 3 for i in range(10)])
+        >>> tv_split = TrainingValidationSplit(shuffle=True, stratify=y)
+        >>> split_ = next(tv_split.split(X, y))
+        >>> assert (split_[0] == np.array([1, 9, 3, 2, 8, 6, 7])).all()
+        >>> assert (split_[1] == np.array([0, 4, 5])).all()
+    """
+
+    def __init__(
+        self,
+        test_size=None,
+        train_size=None,
+        shuffle=False,
+        stratify=None,
+        random_seed=0,
+    ):
+        self.test_size = test_size
+        self.train_size = train_size
+        self.shuffle = shuffle
+        self.stratify = stratify
+        self.random_seed = random_seed
+
+    @staticmethod
+    def get_n_splits():
+        """Return the number of splits of this object.
+
+        Returns:
+            int: Always returns 1.
+        """
+        return 1
+
+    @property
+    def is_cv(self):
+        """Returns whether or not the data splitter is a cross-validation data splitter.
+
+        Returns:
+            bool: If the splitter is a cross-validation data splitter
+        """
+        return False
+
+    def split(self, X, y=None):
+        """Divide the data into training and testing sets.
+
+        Args:
+            X (pd.DataFrame): Dataframe of points to split
+            y (pd.Series): Series of points to split
+
+        Returns:
+            list: Indices to split data into training and test set
+        """
+        train, test = train_test_split(
+            np.arange(X.shape[0]),
+            test_size=self.test_size,
+            train_size=self.train_size,
+            shuffle=self.shuffle,
+            stratify=self.stratify,
+            random_state=self.random_seed,
+        )
+        return iter([(train, test)])
\ No newline at end of file
diff --git a/checkmates/pipelines/transformers.py b/checkmates/pipelines/transformers.py
new file mode 100644
index 0000000..ef70fb4
--- /dev/null
+++ b/checkmates/pipelines/transformers.py
@@ -0,0 +1,238 @@
+"""A component that may or may not need fitting that transforms data. These components are used before an estimator."""
+from abc import abstractmethod
+
+from checkmates.exceptions import MethodPropertyNotFoundError
+from checkmates.pipelines import ComponentBase
+from checkmates.utils import infer_feature_types
+import pandas as pd
+import woodwork
+from sklearn.impute import SimpleImputer as SkImputer
+
+from checkmates.pipelines.transformers import Transformer
+from checkmates.utils import infer_feature_types
+from checkmates.utils.nullable_type_utils import _get_new_logical_types_for_imputed_data
+
+class Transformer(ComponentBase):
+    """A component that may or may not need fitting that transforms data. These components are used before an estimator.
+
+    To implement a new Transformer, define your own class which is a subclass of Transformer, including
+    a name and a list of acceptable ranges for any parameters to be tuned during the automl search (hyperparameters).
+    Define an `__init__` method which sets up any necessary state and objects. Make sure your `__init__` only
+    uses standard keyword arguments and calls `super().__init__()` with a parameters dict. You may also override the
+    `fit`, `transform`, `fit_transform` and other methods in this class if appropriate.
+
+    To see some examples, check out the definitions of any Transformer component.
+
+    Args:
+        parameters (dict): Dictionary of parameters for the component. Defaults to None.
+        component_obj (obj): Third-party objects useful in component implementation. Defaults to None.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    modifies_features = True
+    modifies_target = False
+    training_only = False
+
+    def __init__(self, parameters=None, component_obj=None, random_seed=0, **kwargs):
+        super().__init__(
+            parameters=parameters,
+            component_obj=component_obj,
+            random_seed=random_seed,
+            **kwargs,
+        )
+
+    @abstractmethod
+    def transform(self, X, y=None):
+        """Transforms data X.
+
+        Args:
+            X (pd.DataFrame): Data to transform.
+            y (pd.Series, optional): Target data.
+
+        Returns:
+            pd.DataFrame: Transformed X
+
+        Raises:
+            MethodPropertyNotFoundError: If transformer does not have a transform method or a component_obj that implements transform.
+        """
+
+    def fit_transform(self, X, y=None):
+        """Fits on X and transforms X.
+
+        Args:
+            X (pd.DataFrame): Data to fit and transform.
+            y (pd.Series): Target data.
+
+        Returns:
+            pd.DataFrame: Transformed X.
+
+        Raises:
+            MethodPropertyNotFoundError: If transformer does not have a transform method or a component_obj that implements transform.
+        """
+        X_ww = infer_feature_types(X)
+        if y is not None:
+            y_ww = infer_feature_types(y)
+        else:
+            y_ww = y
+
+        try:
+            return self.fit(X_ww, y_ww).transform(X_ww, y_ww)
+        except MethodPropertyNotFoundError as e:
+            raise e
+
+    def _get_feature_provenance(self):
+        return {}
+
+"""Component that imputes missing data according to a specified imputation strategy."""
+
+class SimpleImputer(Transformer):
+    """Imputes missing data according to a specified imputation strategy.  Natural language columns are ignored.
+
+    Args:
+        impute_strategy (string): Impute strategy to use. Valid values include "mean", "median", "most_frequent", "constant" for
+           numerical data, and "most_frequent", "constant" for object data types.
+        fill_value (string): When impute_strategy == "constant", fill_value is used to replace missing data.
+           Defaults to 0 when imputing numerical data and "missing_value" for strings or object data types.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+
+    """
+
+    name = "Simple Imputer"
+    hyperparameter_ranges = {"impute_strategy": ["mean", "median", "most_frequent"]}
+    """{
+        "impute_strategy": ["mean", "median", "most_frequent"]
+    }"""
+
+    def __init__(
+        self, impute_strategy="most_frequent", fill_value=None, random_seed=0, **kwargs
+    ):
+        parameters = {"impute_strategy": impute_strategy, "fill_value": fill_value}
+        parameters.update(kwargs)
+        self.impute_strategy = impute_strategy
+        imputer = SkImputer(
+            strategy=impute_strategy,
+            fill_value=fill_value,
+            missing_values=pd.NA,
+            **kwargs,
+        )
+        self._all_null_cols = None
+        super().__init__(
+            parameters=parameters,
+            component_obj=imputer,
+            random_seed=random_seed,
+        )
+
+    def fit(self, X, y=None):
+        """Fits imputer to data. 'None' values are converted to np.nan before imputation and are treated as the same.
+
+        Args:
+            X (pd.DataFrame or np.ndarray): the input training data of shape [n_samples, n_features]
+            y (pd.Series, optional): the target training data of length [n_samples]
+
+        Returns:
+            self
+
+        Raises:
+            ValueError: if the SimpleImputer receives a dataframe with both Boolean and Categorical data.
+
+        """
+        X = infer_feature_types(X)
+
+        if set([lt.type_string for lt in X.ww.logical_types.values()]) == {
+            "boolean",
+            "categorical",
+        }:
+            raise ValueError(
+                "SimpleImputer cannot handle dataframes with both boolean and categorical features.  Use Imputer instead.",
+            )
+
+        nan_ratio = X.isna().sum() / X.shape[0]
+
+        # Keep track of the different types of data in X
+        self._all_null_cols = nan_ratio[nan_ratio == 1].index.tolist()
+        self._natural_language_cols = list(
+            X.ww.select(
+                "NaturalLanguage",
+                return_schema=True,
+            ).columns.keys(),
+        )
+
+        # Only impute data that is not natural language columns or fully null
+        self._cols_to_impute = [
+            col
+            for col in X.columns
+            if col not in self._natural_language_cols and col not in self._all_null_cols
+        ]
+
+        # If there are no columns to impute, return early
+        if not self._cols_to_impute:
+            return self
+
+        X = X[self._cols_to_impute]
+        if (X.dtypes == bool).all():
+            # Ensure that _component_obj still gets fit so that if any of the dtypes are different
+            # at transform, we've fit the component. This is needed because sklearn doesn't allow
+            # data with only bool dtype to be passed in.
+            X = X.astype("boolean")
+
+        self._component_obj.fit(X, y)
+        return self
+
+    def transform(self, X, y=None):
+        """Transforms input by imputing missing values. 'None' and np.nan values are treated as the same.
+
+        Args:
+            X (pd.DataFrame): Data to transform.
+            y (pd.Series, optional): Ignored.
+
+        Returns:
+            pd.DataFrame: Transformed X
+        """
+        # Record original data
+        X = infer_feature_types(X)
+        original_schema = X.ww.schema
+        original_index = X.index
+
+        # separate out just the columns we are imputing
+        X_t = X[self._cols_to_impute]
+        if not self._cols_to_impute or (X_t.dtypes == bool).all():
+            # If there are no columns to impute or all columns to impute are bool dtype,
+            # which will never have null values, return the original data without any fully null columns
+            not_all_null_cols = [
+                col for col in X.columns if col not in self._all_null_cols
+            ]
+            return X.ww[not_all_null_cols]
+
+        # Transform the data
+        X_t = self._component_obj.transform(X_t)
+        X_t = pd.DataFrame(X_t, columns=self._cols_to_impute)
+
+        # Reinit woodwork, maintaining original types where possible
+        imputed_schema = original_schema.get_subset_schema(self._cols_to_impute)
+        new_logical_types = _get_new_logical_types_for_imputed_data(
+            impute_strategy=self.impute_strategy,
+            original_schema=imputed_schema,
+        )
+        X_t.ww.init(schema=imputed_schema, logical_types=new_logical_types)
+
+        # Add back in the unchanged original natural language columns that we want to keep
+        if len(self._natural_language_cols) > 0:
+            X_t = woodwork.concat_columns([X_t, X.ww[self._natural_language_cols]])
+            # reorder columns to match original
+            X_t = X_t.ww[[col for col in original_schema.columns if col in X_t.columns]]
+
+        if self._cols_to_impute:
+            X_t.index = original_index
+        return X_t
+
+    def fit_transform(self, X, y=None):
+        """Fits on X and transforms X.
+
+        Args:
+            X (pd.DataFrame): Data to fit and transform
+            y (pd.Series, optional): Target data.
+
+        Returns:
+            pd.DataFrame: Transformed X
+        """
+        return self.fit(X, y).transform(X, y)
\ No newline at end of file
diff --git a/checkmates/pipelines/utils.py b/checkmates/pipelines/utils.py
new file mode 100644
index 0000000..c63f1b1
--- /dev/null
+++ b/checkmates/pipelines/utils.py
@@ -0,0 +1,172 @@
+"""Utility methods for EvalML pipelines."""
+from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
+from typing import Union
+import numpy as np
+import pandas as pd
+
+
+from checkmates.data_checks import DataCheckActionCode
+from checkmates.pipelines.components import (  # noqa: F401
+    DropColumns,
+    DropRowsTransformer,
+    PerColumnImputer,
+    TargetImputer,
+    TimeSeriesImputer,
+    TimeSeriesRegularizer,
+)
+from checkmates.utils import infer_feature_types
+from checkmates.pipelines.training_validation_split import TrainingValidationSplit
+from checkmates.problem_types import is_classification, is_regression, is_time_series
+
+
+
+
+def _make_component_list_from_actions(actions):
+    """Creates a list of components from the input DataCheckAction list.
+
+    Args:
+        actions (list(DataCheckAction)): List of DataCheckAction objects used to create list of components
+
+    Returns:
+        list(ComponentBase): List of components used to address the input actions
+    """
+    components = []
+    cols_to_drop = []
+    indices_to_drop = []
+
+    for action in actions:
+        if action.action_code == DataCheckActionCode.REGULARIZE_AND_IMPUTE_DATASET:
+            metadata = action.metadata
+            parameters = metadata.get("parameters", {})
+            components.extend(
+                [
+                    TimeSeriesRegularizer(
+                        time_index=parameters.get("time_index", None),
+                        frequency_payload=parameters["frequency_payload"],
+                    ),
+                    TimeSeriesImputer(),
+                ],
+            )
+        elif action.action_code == DataCheckActionCode.DROP_COL:
+            cols_to_drop.extend(action.metadata["columns"])
+        elif action.action_code == DataCheckActionCode.IMPUTE_COL:
+            metadata = action.metadata
+            parameters = metadata.get("parameters", {})
+            if metadata["is_target"]:
+                components.append(
+                    TargetImputer(impute_strategy=parameters["impute_strategy"]),
+                )
+            else:
+                impute_strategies = parameters["impute_strategies"]
+                components.append(PerColumnImputer(impute_strategies=impute_strategies))
+        elif action.action_code == DataCheckActionCode.DROP_ROWS:
+            indices_to_drop.extend(action.metadata["rows"])
+    if cols_to_drop:
+        cols_to_drop = sorted(set(cols_to_drop))
+        components.append(DropColumns(columns=cols_to_drop))
+    if indices_to_drop:
+        indices_to_drop = sorted(set(indices_to_drop))
+        components.append(DropRowsTransformer(indices_to_drop=indices_to_drop))
+
+    return components
+
+def split_data(
+    X,
+    y,
+    problem_type,
+    problem_configuration=None,
+    test_size=None,
+    random_seed=0,
+):
+    """Split data into train and test sets.
+
+    Args:
+        X (pd.DataFrame or np.ndarray): data of shape [n_samples, n_features]
+        y (pd.Series, or np.ndarray): target data of length [n_samples]
+        problem_type (str or ProblemTypes): type of supervised learning problem. see evalml.problem_types.problemtype.all_problem_types for a full list.
+        problem_configuration (dict): Additional parameters needed to configure the search. For example,
+            in time series problems, values should be passed in for the time_index, gap, and max_delay variables.
+        test_size (float): What percentage of data points should be included in the test set. Defaults to 0.2 (20%) for non-timeseries problems and 0.1
+            (10%) for timeseries problems.
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+
+    Returns:
+        pd.DataFrame, pd.DataFrame, pd.Series, pd.Series: Feature and target data each split into train and test sets.
+
+    Examples:
+        >>> X = pd.DataFrame([1, 2, 3, 4, 5, 6], columns=["First"])
+        >>> y = pd.Series([8, 9, 10, 11, 12, 13])
+        ...
+        >>> X_train, X_validation, y_train, y_validation = split_data(X, y, "regression", random_seed=42)
+        >>> X_train
+           First
+        5      6
+        2      3
+        4      5
+        3      4
+        >>> X_validation
+           First
+        0      1
+        1      2
+        >>> y_train
+        5    13
+        2    10
+        4    12
+        3    11
+        dtype: int64
+        >>> y_validation
+        0    8
+        1    9
+        dtype: int64
+    """
+    X = infer_feature_types(X)
+    y = infer_feature_types(y)
+
+    data_splitter = None
+    if is_time_series(problem_type):
+        if test_size is None:
+            test_size = 0.1
+            if (
+                problem_configuration is not None
+                and "forecast_horizon" in problem_configuration
+            ):
+                fh_pct = problem_configuration["forecast_horizon"] / len(X)
+                test_size = max(test_size, fh_pct)
+        data_splitter = TrainingValidationSplit(
+            test_size=test_size,
+            shuffle=False,
+            stratify=None,
+            random_seed=random_seed,
+        )
+    else:
+        if test_size is None:
+            test_size = 0.2
+        if is_regression(problem_type):
+            data_splitter = ShuffleSplit(
+                n_splits=1,
+                test_size=test_size,
+                random_state=random_seed,
+            )
+        elif is_classification(problem_type):
+            data_splitter = StratifiedShuffleSplit(
+                n_splits=1,
+                test_size=test_size,
+                random_state=random_seed,
+            )
+
+    train, test = next(data_splitter.split(X, y))
+
+    X_train = X.ww.iloc[train]
+    X_test = X.ww.iloc[test]
+    y_train = y.ww.iloc[train]
+    y_test = y.ww.iloc[test]
+
+    return X_train, X_test, y_train, y_test
+
+def drop_infinity(
+    data: Union[pd.DataFrame, pd.Series],
+) -> Union[pd.DataFrame, pd.Series]:
+    """Removes infinity values"""
+    ww = data.ww._schema is not None
+    replace = data.ww.replace if ww else data.replace
+    return replace([np.inf, -np.inf], np.nan)
diff --git a/checkmates/utils/__init__.py b/checkmates/utils/__init__.py
index 263a3ff..36b70ff 100644
--- a/checkmates/utils/__init__.py
+++ b/checkmates/utils/__init__.py
@@ -1,3 +1,13 @@
 """Utility methods."""
-from checkmates.utils.gen_utils import classproperty
+from checkmates.utils.gen_utils import classproperty, safe_repr
 from checkmates.utils.woodwork_utils import infer_feature_types
+from checkmates.utils.base_meta import BaseMeta
+from checkmates.utils.nullable_type_utils import (
+    _downcast_nullable_X,
+    _downcast_nullable_y,
+    _determine_downcast_type,
+    _determine_fractional_type,
+    _determine_non_nullable_equivalent,
+    _get_new_logical_types_for_imputed_data,
+)
+from checkmates.utils.logger import get_logger, log_subtitle, log_title
diff --git a/checkmates/utils/base_meta.py b/checkmates/utils/base_meta.py
new file mode 100644
index 0000000..615d333
--- /dev/null
+++ b/checkmates/utils/base_meta.py
@@ -0,0 +1,46 @@
+"""Metaclass that overrides creating a new component or pipeline by wrapping methods with validators and setters."""
+from abc import ABCMeta
+from functools import wraps
+
+
+class BaseMeta(ABCMeta):
+    """Metaclass that overrides creating a new component or pipeline by wrapping methods with validators and setters."""
+
+    FIT_METHODS = ["fit", "fit_transform"]
+    METHODS_TO_CHECK = [
+        "predict",
+        "predict_proba",
+        "transform",
+        "inverse_transform",
+        "get_trend_dataframe",
+    ]
+    PROPERTIES_TO_CHECK = ["feature_importance"]
+
+    @classmethod
+    def set_fit(cls, method):
+        """Wrapper for the fit method."""
+
+        @wraps(method)
+        def _set_fit(self, X, y=None):
+            return_value = method(self, X, y)
+            self._is_fitted = True
+            return return_value
+
+        return _set_fit
+
+    def __new__(cls, name, bases, dct):
+        """Create a new instance."""
+        for attribute in dct:
+            if attribute in cls.FIT_METHODS:
+                dct[attribute] = cls.set_fit(dct[attribute])
+            if attribute in cls.METHODS_TO_CHECK:
+                dct[attribute] = cls.check_for_fit(dct[attribute])
+            if attribute in cls.PROPERTIES_TO_CHECK:
+                property_orig = dct[attribute]
+                dct[attribute] = property(
+                    cls.check_for_fit(property_orig.__get__),
+                    property_orig.__set__,
+                    property_orig.__delattr__,
+                    property_orig.__doc__,
+                )
+        return super().__new__(cls, name, bases, dct)
\ No newline at end of file
diff --git a/checkmates/utils/gen_utils.py b/checkmates/utils/gen_utils.py
index 7a18387..49bba20 100644
--- a/checkmates/utils/gen_utils.py
+++ b/checkmates/utils/gen_utils.py
@@ -1,6 +1,8 @@
 """General utility methods."""
 import logging
 from collections import namedtuple
+import pandas as pd
+import numpy as np
 
 logger = logging.getLogger(__name__)
 
@@ -147,3 +149,19 @@ def are_ts_parameters_valid_for_split(
             "Please use a smaller number of splits, reduce one or more these parameters, or collect more data."
         )
     return _validation_result(not msg, msg, train_size, window_size, n_obs, n_splits)
+
+def safe_repr(value):
+    """Convert the given value into a string that can safely be used for repr.
+
+    Args:
+        value: The item to convert
+
+    Returns:
+        String representation of the value
+    """
+    if isinstance(value, float):
+        if pd.isna(value):
+            return "np.nan"
+        if np.isinf(value):
+            return f"float('{repr(value)}')"
+    return repr(value)
\ No newline at end of file
diff --git a/checkmates/utils/logger.py b/checkmates/utils/logger.py
new file mode 100644
index 0000000..6a94918
--- /dev/null
+++ b/checkmates/utils/logger.py
@@ -0,0 +1,78 @@
+"""Logging functions."""
+import logging
+import sys
+import time
+
+
+def get_logger(name):
+    """Get the logger with the associated name.
+
+    Args:
+        name (str): Name of the logger to get.
+
+    Returns:
+        The logger object with the associated name.
+    """
+    logger = logging.getLogger(name)
+    if not len(logger.handlers):
+        logger.setLevel(logging.DEBUG)
+        stdout_handler = logging.StreamHandler(sys.stdout)
+        stdout_handler.setLevel(logging.INFO)
+        stdout_handler.setFormatter(logging.Formatter("%(message)s"))
+        logger.addHandler(stdout_handler)
+    return logger
+
+
+def log_title(logger, title):
+    """Log with a title."""
+    logger.info("\n" + "*" * (len(title) + 4))
+    logger.info("* %s *" % title)
+    logger.info("*" * (len(title) + 4))
+    logger.info("")
+
+
+def log_subtitle(logger, title, underline="="):
+    """Log with a subtitle."""
+    logger.info("")
+    logger.info("%s" % title)
+    logger.info(underline * len(title))
+
+
+def time_elapsed(start_time):
+    """How much time has elapsed since the search started.
+
+    Args:
+        start_time (int): Time when search started.
+
+    Returns:
+        str: elapsed time formatted as a string [H:]MM:SS
+    """
+    time_diff = time.time() - start_time
+    # Source: tqdm.std.tqdm.format_interval
+    mins, s = divmod(int(time_diff), 60)
+    h, m = divmod(mins, 60)
+    if h:
+        return "{0:d}:{1:02d}:{2:02d}".format(h, m, s)
+    else:
+        return "{0:02d}:{1:02d}".format(m, s)
+
+
+def log_batch_times(logger, batch_times):
+    """Used to print out the batch times.
+
+    Args:
+        logger: the logger.
+        batch_times: dict with (batch number, {pipeline name, pipeline time}).
+    """
+    log_title(logger, "Batch Time Stats")
+    for batch_number in batch_times:
+        subtitle = "Batch " + str(batch_number) + " time stats:"
+        log_subtitle(logger, subtitle)
+        for pipeline_name in batch_times[batch_number]:
+            logger.info(
+                "\n"
+                + pipeline_name
+                + ": "
+                + f"{batch_times[batch_number][pipeline_name]:.2f} seconds",
+            )
+        logger.info("")
\ No newline at end of file
diff --git a/checkmates/utils/nullable_type_utils.py b/checkmates/utils/nullable_type_utils.py
new file mode 100644
index 0000000..01bb6a6
--- /dev/null
+++ b/checkmates/utils/nullable_type_utils.py
@@ -0,0 +1,177 @@
+import woodwork as ww
+from woodwork.logical_types import AgeNullable, BooleanNullable, IntegerNullable
+
+DOWNCAST_TYPE_DICT = {
+    "BooleanNullable": ("Boolean", "Categorical"),
+    "IntegerNullable": ("Integer", "Double"),
+    "AgeNullable": ("Age", "AgeFractional"),
+}
+
+
+def _downcast_nullable_X(X, handle_boolean_nullable=True, handle_integer_nullable=True):
+    """Removes Pandas nullable integer and nullable boolean dtypes from data by transforming
+        to other dtypes via Woodwork logical type transformations.
+
+    Args:
+        X (pd.DataFrame): Input data of shape [n_samples, n_features] whose nullable types will be changed.
+        handle_boolean_nullable (bool, optional): Whether or not to downcast data with BooleanNullable logical types.
+        handle_integer_nullable (bool, optional): Whether or not to downcast data with IntegerNullable or AgeNullable logical types.
+
+
+    Returns:
+        X with any incompatible nullable types downcasted to compatible equivalents.
+    """
+    if X.ww.schema is None:
+        X.ww.init()
+
+    incompatible_logical_types = _get_incompatible_nullable_types(
+        handle_boolean_nullable,
+        handle_integer_nullable,
+    )
+
+    data_to_downcast = X.ww.select(incompatible_logical_types)
+    # If no incompatible types are present, no downcasting is needed
+    if not len(data_to_downcast.columns):
+        return X
+
+    new_ltypes = {
+        col: _determine_downcast_type(data_to_downcast.ww[col])
+        for col in data_to_downcast.columns
+    }
+
+    X.ww.set_types(logical_types=new_ltypes)
+    return X
+
+
+def _downcast_nullable_y(y, handle_boolean_nullable=True, handle_integer_nullable=True):
+    """Removes Pandas nullable integer and nullable boolean dtypes from data by transforming
+        to other dtypes via Woodwork logical type transformations.
+
+    Args:
+        y (pd.Series): Target data of shape [n_samples] whose nullable types will be changed.
+        handle_boolean_nullable (bool, optional): Whether or not to downcast data with BooleanNullable logical types.
+        handle_integer_nullable (bool, optional): Whether or not to downcast data with IntegerNullable or AgeNullable logical types.
+
+
+    Returns:
+        y with any incompatible nullable types downcasted to compatible equivalents.
+    """
+    if y.ww.schema is None:
+        y = ww.init_series(y)
+
+    incompatible_logical_types = _get_incompatible_nullable_types(
+        handle_boolean_nullable,
+        handle_integer_nullable,
+    )
+
+    if isinstance(y.ww.logical_type, tuple(incompatible_logical_types)):
+        new_ltype = _determine_downcast_type(y)
+        return y.ww.set_logical_type(new_ltype)
+
+    return y
+
+
+def _get_incompatible_nullable_types(handle_boolean_nullable, handle_integer_nullable):
+    """Determines which Woodwork logical types are incompatible.
+
+    Args:
+        handle_boolean_nullable (bool): Whether boolean nullable logical types are incompatible.
+        handle_integer_nullable (bool): Whether integer nullable logical types are incompatible.
+
+    Returns:
+        list[ww.LogicalType] of logical types that are incompatible.
+    """
+    nullable_types_to_handle = []
+    if handle_boolean_nullable:
+        nullable_types_to_handle.append(BooleanNullable)
+    if handle_integer_nullable:
+        nullable_types_to_handle.append(IntegerNullable)
+        nullable_types_to_handle.append(AgeNullable)
+
+    return nullable_types_to_handle
+
+
+def _determine_downcast_type(col):
+    """Determines what logical type to downcast to based on whether nans were present or not.
+        - BooleanNullable becomes Boolean if nans are not present and Categorical if they are
+        - IntegerNullable becomes Integer if nans are not present and Double if they are.
+        - AgeNullable becomes Age if nans are not present and AgeFractional if they are.
+
+    Args:
+        col (Woodwork Series): The data whose downcast logical type we are determining by inspecting
+            its current logical type and whether nans are present.
+
+    Returns:
+        LogicalType string to be used to downcast incompatible nullable logical types.
+    """
+    no_nans_ltype, has_nans_ltype = DOWNCAST_TYPE_DICT[str(col.ww.logical_type)]
+    if col.isnull().any():
+        return has_nans_ltype
+
+    return no_nans_ltype
+
+
+def _determine_fractional_type(logical_type):
+    """Determines what logical type to use for integer data that has fractional values imputed.
+    - IntegerNullable becomes Double.
+    - AgeNullable becomes AgeFractional.
+    - All other logical types are returned unchanged.
+
+    Args:
+        logical_type (ww.LogicalType): The logical type whose fractional equivalent we are determining.
+            Should be either IntegerNullable or AgeNullable.
+
+    Returns:
+        LogicalType to be used after fractional values have been added to a previously integer column.
+    """
+    fractional_ltype = None
+    if isinstance(logical_type, (IntegerNullable, AgeNullable)):
+        _, fractional_ltype = DOWNCAST_TYPE_DICT[str(logical_type)]
+
+    return fractional_ltype or logical_type
+
+
+def _determine_non_nullable_equivalent(logical_type):
+    """Determines the non nullable equivalent logical type to use for nullable types. These types cannot support null values.
+    - IntegerNullable becomes Integer.
+    - AgeNullable becomes Age.
+    - BooleanNullable becomes Boolean.
+    - All other logical types are returned unchanged.
+
+    Args:
+        logical_type (ww.LogicalType): The logical type whose non nullable equivalent we are determining.
+
+    Returns:
+        LogicalType to be used instead of nullable type when nans aren't present.
+    """
+    non_nullable_ltype, _ = DOWNCAST_TYPE_DICT.get(str(logical_type), (None, None))
+
+    return non_nullable_ltype or logical_type
+
+
+def _get_new_logical_types_for_imputed_data(
+    impute_strategy,
+    original_schema,
+):
+    """Determines what the logical types should be after imputing data. New logical types are only needed for integer data that may have had fractional values imputed.
+
+    Args:
+        impute_strategy (str): The strategy used to impute data. May be one of
+            "most_frequent", "forwards_fill", "backwards_fill", "mean", "median", "constant", "interpolate, or "knn".
+            Integer types will be converted to their corresponding fractional types if any but
+            "most_frequent", "forwards_fill" or "backwards_fill" are used.
+        original_schema (ww.TableSchema): The Woodwork table schema of the original data that was passed to the imputer.
+
+    Returns:
+        dict[str, ww.LogicalType]: Updated logical types to use for imputed data.
+    """
+    # Some impute strategies will always maintain integer values, so we can use the original logical types
+    if impute_strategy in {"most_frequent", "forwards_fill", "backwards_fill"}:
+        return original_schema.logical_types
+
+    return {
+        col: _determine_fractional_type(ltype)
+        if isinstance(ltype, (AgeNullable, IntegerNullable))
+        else ltype
+        for col, ltype in original_schema.logical_types.items()
+    }
\ No newline at end of file

From 4526e01c8e6e6a608a0b29ce22925ff85cb7c33f Mon Sep 17 00:00:00 2001
From: Nabil Fayak <nabil.fayak@alteryx.com>
Date: Fri, 18 Aug 2023 12:22:52 -0400
Subject: [PATCH 2/3] lint fix

---
 checkmates/exceptions/exceptions.py           |  3 ++
 checkmates/objectives/utils.py                | 10 +++---
 checkmates/pipelines/__init__.py              | 10 ++++--
 checkmates/pipelines/component_base.py        |  2 +-
 checkmates/pipelines/component_base_meta.py   |  2 +-
 checkmates/pipelines/components.py            | 34 +++++++++++--------
 .../pipelines/training_validation_split.py    |  2 +-
 checkmates/pipelines/transformers.py          | 11 +++---
 checkmates/pipelines/utils.py                 | 12 +++----
 checkmates/utils/base_meta.py                 |  2 +-
 checkmates/utils/gen_utils.py                 |  6 ++--
 checkmates/utils/logger.py                    |  2 +-
 checkmates/utils/nullable_type_utils.py       | 13 ++++---
 13 files changed, 65 insertions(+), 44 deletions(-)

diff --git a/checkmates/exceptions/exceptions.py b/checkmates/exceptions/exceptions.py
index f7438af..d1479a2 100644
--- a/checkmates/exceptions/exceptions.py
+++ b/checkmates/exceptions/exceptions.py
@@ -13,16 +13,19 @@ class ObjectiveNotFoundError(Exception):
 
     pass
 
+
 class MethodPropertyNotFoundError(Exception):
     """Exception to raise when a class is does not have an expected method or property."""
 
     pass
 
+
 class ComponentNotYetFittedError(Exception):
     """An exception to be raised when predict/predict_proba/transform is called on a component without fitting first."""
 
     pass
 
+
 class ObjectiveCreationError(Exception):
     """Exception when get_objective tries to instantiate an objective and required args are not provided."""
 
diff --git a/checkmates/objectives/utils.py b/checkmates/objectives/utils.py
index 1d717ee..78725b3 100644
--- a/checkmates/objectives/utils.py
+++ b/checkmates/objectives/utils.py
@@ -1,14 +1,13 @@
 """Utility methods for CheckMates objectives."""
-import pandas as pd
 from typing import Optional
 
+import pandas as pd
+
 from checkmates import objectives
 from checkmates.exceptions import ObjectiveCreationError, ObjectiveNotFoundError
 from checkmates.objectives.objective_base import ObjectiveBase
-from checkmates.problem_types import handle_problem_types
+from checkmates.problem_types import ProblemTypes, handle_problem_types
 from checkmates.utils.gen_utils import _get_subclasses
-from checkmates.problem_types import ProblemTypes
-
 from checkmates.utils.logger import get_logger
 
 logger = get_logger(__file__)
@@ -97,11 +96,12 @@ def get_objective(objective, return_instance=False, **kwargs):
 
     return objective_class
 
+
 def get_problem_type(
     input_problem_type: Optional[str],
     target_data: pd.Series,
 ) -> ProblemTypes:
-    """helper function to determine if classification problem is binary or multiclass dependent on target variable values."""
+    """Helper function to determine if classification problem is binary or multiclass dependent on target variable values."""
     if not input_problem_type:
         raise ValueError("problem type is required")
     if input_problem_type.lower() == "classification":
diff --git a/checkmates/pipelines/__init__.py b/checkmates/pipelines/__init__.py
index c7d5701..8a04168 100644
--- a/checkmates/pipelines/__init__.py
+++ b/checkmates/pipelines/__init__.py
@@ -1,3 +1,5 @@
+"""General CheckMates pipelines."""
+
 from checkmates.pipelines.component_base_meta import ComponentBaseMeta
 from checkmates.pipelines.component_base import ComponentBase
 from checkmates.pipelines.transformers import Transformer
@@ -9,5 +11,9 @@
     TimeSeriesImputer,
     TimeSeriesRegularizer,
 )
-from checkmates.pipelines.utils import _make_component_list_from_actions, split_data, drop_infinity
-from checkmates.pipelines.training_validation_split import TrainingValidationSplit
\ No newline at end of file
+from checkmates.pipelines.utils import (
+    _make_component_list_from_actions,
+    split_data,
+    drop_infinity,
+)
+from checkmates.pipelines.training_validation_split import TrainingValidationSplit
diff --git a/checkmates/pipelines/component_base.py b/checkmates/pipelines/component_base.py
index 0360dd8..083c2c7 100644
--- a/checkmates/pipelines/component_base.py
+++ b/checkmates/pipelines/component_base.py
@@ -280,4 +280,4 @@ def _handle_nullable_types(self, X=None, y=None):
                 handle_integer_nullable=y_int_incompatible,
             )
 
-        return X, y
\ No newline at end of file
+        return X, y
diff --git a/checkmates/pipelines/component_base_meta.py b/checkmates/pipelines/component_base_meta.py
index a922ad4..9a7a427 100644
--- a/checkmates/pipelines/component_base_meta.py
+++ b/checkmates/pipelines/component_base_meta.py
@@ -41,4 +41,4 @@ def _check_for_fit(self, X=None, y=None):
             else:
                 return method(self, X, y)
 
-        return _check_for_fit
\ No newline at end of file
+        return _check_for_fit
diff --git a/checkmates/pipelines/components.py b/checkmates/pipelines/components.py
index a83c61a..121e2bf 100644
--- a/checkmates/pipelines/components.py
+++ b/checkmates/pipelines/components.py
@@ -1,27 +1,29 @@
 """Initalizes an transformer that selects specified columns in input data."""
+import warnings
 from abc import abstractmethod
 from functools import wraps
+
 import pandas as pd
 import woodwork as ww
-import warnings
 from sklearn.impute import SimpleImputer as SkImputer
-
-from woodwork.logical_types import Datetime
+from woodwork.logical_types import (
+    BooleanNullable,
+    Datetime,
+    Double,
+)
 from woodwork.statistics_utils import infer_frequency
 
-from checkmates.pipelines.transformers import Transformer
-from checkmates.pipelines.transformers import SimpleImputer
 from checkmates.exceptions import ComponentNotYetFittedError
 from checkmates.pipelines import ComponentBaseMeta
+from checkmates.pipelines.transformers import SimpleImputer, Transformer
 from checkmates.utils import infer_feature_types
 from checkmates.utils.nullable_type_utils import (
-    _get_new_logical_types_for_imputed_data,
     _determine_fractional_type,
     _determine_non_nullable_equivalent,
+    _get_new_logical_types_for_imputed_data,
 )
 
 
-
 class ColumnSelector(Transformer):
     """Initalizes an transformer that selects specified columns in input data.
 
@@ -211,8 +213,10 @@ def transform(self, X, y=None):
         modified_cols = self._modify_columns(cols, X, y)
         return infer_feature_types(modified_cols)
 
+
 """Transformer to drop rows specified by row indices."""
 
+
 class DropRowsTransformer(Transformer):
     """Transformer to drop rows specified by row indices.
 
@@ -300,8 +304,10 @@ def transform(self, X, y=None):
             y_t = y_t.ww.drop(self.indices_to_drop)
         return X_t, y_t
 
+
 """Component that imputes missing data according to a specified imputation strategy per column."""
 
+
 class PerColumnImputer(Transformer):
     """Imputes missing data according to a specified imputation strategy per column.
 
@@ -396,8 +402,10 @@ def transform(self, X, y=None):
         X_t.ww.init(schema=original_schema.get_subset_schema(X_t.columns))
         return X_t
 
+
 """Component that imputes missing target data according to a specified imputation strategy."""
 
+
 class TargetImputerMeta(ComponentBaseMeta):
     """A version of the ComponentBaseMeta class which handles when input features is None."""
 
@@ -531,13 +539,9 @@ def fit_transform(self, X, y):
         """
         return self.fit(X, y).transform(X, y)
 
+
 """Component that imputes missing data according to a specified timeseries-specific imputation strategy."""
-import pandas as pd
-import woodwork as ww
-from woodwork.logical_types import (
-    BooleanNullable,
-    Double,
-)
+
 
 class TimeSeriesImputer(Transformer):
     """Imputes missing data according to a specified timeseries-specific imputation strategy.
@@ -776,8 +780,10 @@ def _handle_nullable_types(self, X=None, y=None):
 
         return X, y
 
+
 """Transformer that regularizes a dataset with an uninferrable offset frequency for time series problems."""
 
+
 class TimeSeriesRegularizer(Transformer):
     """Transformer that regularizes an inconsistently spaced datetime column.
 
@@ -1086,4 +1092,4 @@ def transform(self, X, y=None):
 
         cleaned_x.ww.init()
 
-        return cleaned_x, cleaned_y
\ No newline at end of file
+        return cleaned_x, cleaned_y
diff --git a/checkmates/pipelines/training_validation_split.py b/checkmates/pipelines/training_validation_split.py
index 3d22f8f..9c32c58 100644
--- a/checkmates/pipelines/training_validation_split.py
+++ b/checkmates/pipelines/training_validation_split.py
@@ -99,4 +99,4 @@ def split(self, X, y=None):
             stratify=self.stratify,
             random_state=self.random_seed,
         )
-        return iter([(train, test)])
\ No newline at end of file
+        return iter([(train, test)])
diff --git a/checkmates/pipelines/transformers.py b/checkmates/pipelines/transformers.py
index ef70fb4..af4b4c4 100644
--- a/checkmates/pipelines/transformers.py
+++ b/checkmates/pipelines/transformers.py
@@ -1,17 +1,16 @@
 """A component that may or may not need fitting that transforms data. These components are used before an estimator."""
 from abc import abstractmethod
 
-from checkmates.exceptions import MethodPropertyNotFoundError
-from checkmates.pipelines import ComponentBase
-from checkmates.utils import infer_feature_types
 import pandas as pd
 import woodwork
 from sklearn.impute import SimpleImputer as SkImputer
 
-from checkmates.pipelines.transformers import Transformer
+from checkmates.exceptions import MethodPropertyNotFoundError
+from checkmates.pipelines import ComponentBase
 from checkmates.utils import infer_feature_types
 from checkmates.utils.nullable_type_utils import _get_new_logical_types_for_imputed_data
 
+
 class Transformer(ComponentBase):
     """A component that may or may not need fitting that transforms data. These components are used before an estimator.
 
@@ -83,8 +82,10 @@ def fit_transform(self, X, y=None):
     def _get_feature_provenance(self):
         return {}
 
+
 """Component that imputes missing data according to a specified imputation strategy."""
 
+
 class SimpleImputer(Transformer):
     """Imputes missing data according to a specified imputation strategy.  Natural language columns are ignored.
 
@@ -235,4 +236,4 @@ def fit_transform(self, X, y=None):
         Returns:
             pd.DataFrame: Transformed X
         """
-        return self.fit(X, y).transform(X, y)
\ No newline at end of file
+        return self.fit(X, y).transform(X, y)
diff --git a/checkmates/pipelines/utils.py b/checkmates/pipelines/utils.py
index c63f1b1..5f4e555 100644
--- a/checkmates/pipelines/utils.py
+++ b/checkmates/pipelines/utils.py
@@ -1,9 +1,9 @@
 """Utility methods for EvalML pipelines."""
-from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
 from typing import Union
+
 import numpy as np
 import pandas as pd
-
+from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
 
 from checkmates.data_checks import DataCheckActionCode
 from checkmates.pipelines.components import (  # noqa: F401
@@ -14,11 +14,9 @@
     TimeSeriesImputer,
     TimeSeriesRegularizer,
 )
-from checkmates.utils import infer_feature_types
 from checkmates.pipelines.training_validation_split import TrainingValidationSplit
 from checkmates.problem_types import is_classification, is_regression, is_time_series
-
-
+from checkmates.utils import infer_feature_types
 
 
 def _make_component_list_from_actions(actions):
@@ -70,6 +68,7 @@ def _make_component_list_from_actions(actions):
 
     return components
 
+
 def split_data(
     X,
     y,
@@ -163,10 +162,11 @@ def split_data(
 
     return X_train, X_test, y_train, y_test
 
+
 def drop_infinity(
     data: Union[pd.DataFrame, pd.Series],
 ) -> Union[pd.DataFrame, pd.Series]:
-    """Removes infinity values"""
+    """Removes infinity values."""
     ww = data.ww._schema is not None
     replace = data.ww.replace if ww else data.replace
     return replace([np.inf, -np.inf], np.nan)
diff --git a/checkmates/utils/base_meta.py b/checkmates/utils/base_meta.py
index 615d333..0b780e2 100644
--- a/checkmates/utils/base_meta.py
+++ b/checkmates/utils/base_meta.py
@@ -43,4 +43,4 @@ def __new__(cls, name, bases, dct):
                     property_orig.__delattr__,
                     property_orig.__doc__,
                 )
-        return super().__new__(cls, name, bases, dct)
\ No newline at end of file
+        return super().__new__(cls, name, bases, dct)
diff --git a/checkmates/utils/gen_utils.py b/checkmates/utils/gen_utils.py
index 49bba20..d61bc42 100644
--- a/checkmates/utils/gen_utils.py
+++ b/checkmates/utils/gen_utils.py
@@ -1,8 +1,9 @@
 """General utility methods."""
 import logging
 from collections import namedtuple
-import pandas as pd
+
 import numpy as np
+import pandas as pd
 
 logger = logging.getLogger(__name__)
 
@@ -150,6 +151,7 @@ def are_ts_parameters_valid_for_split(
         )
     return _validation_result(not msg, msg, train_size, window_size, n_obs, n_splits)
 
+
 def safe_repr(value):
     """Convert the given value into a string that can safely be used for repr.
 
@@ -164,4 +166,4 @@ def safe_repr(value):
             return "np.nan"
         if np.isinf(value):
             return f"float('{repr(value)}')"
-    return repr(value)
\ No newline at end of file
+    return repr(value)
diff --git a/checkmates/utils/logger.py b/checkmates/utils/logger.py
index 6a94918..a0b03cd 100644
--- a/checkmates/utils/logger.py
+++ b/checkmates/utils/logger.py
@@ -75,4 +75,4 @@ def log_batch_times(logger, batch_times):
                 + ": "
                 + f"{batch_times[batch_number][pipeline_name]:.2f} seconds",
             )
-        logger.info("")
\ No newline at end of file
+        logger.info("")
diff --git a/checkmates/utils/nullable_type_utils.py b/checkmates/utils/nullable_type_utils.py
index 01bb6a6..5e86330 100644
--- a/checkmates/utils/nullable_type_utils.py
+++ b/checkmates/utils/nullable_type_utils.py
@@ -1,3 +1,4 @@
+"""Nullable Type Utils for CheckMates."""
 import woodwork as ww
 from woodwork.logical_types import AgeNullable, BooleanNullable, IntegerNullable
 
@@ -9,8 +10,7 @@
 
 
 def _downcast_nullable_X(X, handle_boolean_nullable=True, handle_integer_nullable=True):
-    """Removes Pandas nullable integer and nullable boolean dtypes from data by transforming
-        to other dtypes via Woodwork logical type transformations.
+    """Removes Pandas nullable integer and nullable boolean dtypes from data by transforming to other dtypes via Woodwork logical type transformations.
 
     Args:
         X (pd.DataFrame): Input data of shape [n_samples, n_features] whose nullable types will be changed.
@@ -44,8 +44,7 @@ def _downcast_nullable_X(X, handle_boolean_nullable=True, handle_integer_nullabl
 
 
 def _downcast_nullable_y(y, handle_boolean_nullable=True, handle_integer_nullable=True):
-    """Removes Pandas nullable integer and nullable boolean dtypes from data by transforming
-        to other dtypes via Woodwork logical type transformations.
+    """Removes Pandas nullable integer and nullable boolean dtypes from data by transforming to other dtypes via Woodwork logical type transformations.
 
     Args:
         y (pd.Series): Target data of shape [n_samples] whose nullable types will be changed.
@@ -93,6 +92,7 @@ def _get_incompatible_nullable_types(handle_boolean_nullable, handle_integer_nul
 
 def _determine_downcast_type(col):
     """Determines what logical type to downcast to based on whether nans were present or not.
+
         - BooleanNullable becomes Boolean if nans are not present and Categorical if they are
         - IntegerNullable becomes Integer if nans are not present and Double if they are.
         - AgeNullable becomes Age if nans are not present and AgeFractional if they are.
@@ -113,6 +113,7 @@ def _determine_downcast_type(col):
 
 def _determine_fractional_type(logical_type):
     """Determines what logical type to use for integer data that has fractional values imputed.
+
     - IntegerNullable becomes Double.
     - AgeNullable becomes AgeFractional.
     - All other logical types are returned unchanged.
@@ -133,6 +134,7 @@ def _determine_fractional_type(logical_type):
 
 def _determine_non_nullable_equivalent(logical_type):
     """Determines the non nullable equivalent logical type to use for nullable types. These types cannot support null values.
+
     - IntegerNullable becomes Integer.
     - AgeNullable becomes Age.
     - BooleanNullable becomes Boolean.
@@ -166,6 +168,7 @@ def _get_new_logical_types_for_imputed_data(
         dict[str, ww.LogicalType]: Updated logical types to use for imputed data.
     """
     # Some impute strategies will always maintain integer values, so we can use the original logical types
+
     if impute_strategy in {"most_frequent", "forwards_fill", "backwards_fill"}:
         return original_schema.logical_types
 
@@ -174,4 +177,4 @@ def _get_new_logical_types_for_imputed_data(
         if isinstance(ltype, (AgeNullable, IntegerNullable))
         else ltype
         for col, ltype in original_schema.logical_types.items()
-    }
\ No newline at end of file
+    }

From 3711bbd817a16c750da2afc12790b5b99525ef7a Mon Sep 17 00:00:00 2001
From: Nabil Fayak <nabil.fayak@alteryx.com>
Date: Fri, 18 Aug 2023 12:25:04 -0400
Subject: [PATCH 3/3] release notes updated

---
 docs/source/release_notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index eaf81f1..0223344 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -11,6 +11,7 @@ Release Notes
     * Enhancements
         * Added all datachecks except `invalid_target_data_check` along with tests and utils, migrated over from `EvalML` :pr:`15`
         * Added ``invalid_target_data_check`` along with all tests, utils, and objectives, migrated from ``EvalML`` :pr:`17`
+        * Added modules necessary to remove ``EvalML`` dependencies within testing environment :pr:`19`
     * Documentation Changes
         * Updated readme.md, contrubuting.md, and releases.md to reflect CheckMates package installation, quickstart, and useful links :pr:`13`