alteryx · bchen1116 · Mar 9, 2023 · Mar 9, 2023 · Mar 10, 2023 · Mar 10, 2023
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -9,6 +9,7 @@ Release Notes
         * Add ``get_evalml_requirements_file`` :pr:`4034`
         * Pipelines with DFS Transformers will run fast permutation importance if DFS features pre-exist :pr:`4037`
     * Fixes
+        * Updated ``TargetLeakageDataCheck`` to handle boolean targets properly :pr:`4061`
         * Remove nullable types handling for ``OverSampler`` :pr:`4064`
     * Changes
         * Uncapped ``pmdarima`` and updated minimum version :pr:`4027`

diff --git a/evalml/data_checks/target_leakage_data_check.py b/evalml/data_checks/target_leakage_data_check.py
@@ -48,11 +48,16 @@ def _calculate_dependence(self, X, y):
         while target_str in list(X2.columns):
             target_str += "_y"
         X2.ww[target_str] = y
+        # bool_columns = X2.ww.select(["Boolean", "BooleanNullable"]).columns.values
+        # bool_to_int = {col: "IntegerNullable" for col in bool_columns}
+        # X2.ww.set_types(bool_to_int)
+        # print(X2.ww.types)
         try:
             dep_corr = X2.ww.dependence_dict(
                 measures=self.method,
                 target_col=target_str,
             )
+            print(dep_corr)
         except KeyError:
             # keyError raised when the target does not appear due to incompatibility with the metric, return []
             return []

diff --git a/evalml/tests/data_checks_tests/test_target_leakage_data_check.py b/evalml/tests/data_checks_tests/test_target_leakage_data_check.py
@@ -193,15 +193,15 @@ def test_target_leakage_types():
 
     expected = [
         DataCheckWarning(
-            message="Columns 'a', 'b' are 80.0% or more correlated with the target",
+            message="Columns 'a', 'b', 'c' are 80.0% or more correlated with the target",
             data_check_name=target_leakage_data_check_name,
             message_code=DataCheckMessageCode.TARGET_LEAKAGE,
-            details={"columns": ["a", "b"]},
+            details={"columns": ["a", "b", "c"]},
             action_options=[
                 DataCheckActionOption(
                     DataCheckActionCode.DROP_COL,
                     data_check_name=target_leakage_data_check_name,
-                    metadata={"columns": ["a", "b"]},
+                    metadata={"columns": ["a", "b", "c"]},
                 ),
             ],
         ).to_dict(),
@@ -356,8 +356,21 @@ def test_target_leakage_data_check_warnings_pearson():
     y = y.astype(bool)
 
     leakage_check = TargetLeakageDataCheck(pct_corr_threshold=0.5, method="pearson")
-    # pearsons does not support boolean columns
-    assert leakage_check.validate(X, y) == []
+    assert leakage_check.validate(X, y) == [
+        DataCheckWarning(
+            message="Columns 'a', 'b', 'c', 'd' are 50.0% or more correlated with the target",
+            data_check_name=target_leakage_data_check_name,
+            message_code=DataCheckMessageCode.TARGET_LEAKAGE,
+            details={"columns": ["a", "b", "c", "d"]},
+            action_options=[
+                DataCheckActionOption(
+                    DataCheckActionCode.DROP_COL,
+                    data_check_name=target_leakage_data_check_name,
+                    metadata={"columns": ["a", "b", "c", "d"]},
+                ),
+            ],
+        ).to_dict(),
+    ]
 
     y = y.astype(int)
     assert leakage_check.validate(X, y) == [
@@ -447,9 +460,6 @@ def test_target_leakage_none_measures(measures):
     X["b"] = y
     y = y.astype(bool)
 
-    if measures in ["pearson", "spearman"]:
-        assert leakage_check.validate(X, y) == []
-        return
     assert len(leakage_check.validate(X, y))