From 4e76e5de7bfe5ed45860531dfe66cdffa1582e6e Mon Sep 17 00:00:00 2001
From: bchen1116 <bchen1116@gmail.com>
Date: Thu, 9 Mar 2023 14:15:39 +0800
Subject: [PATCH 1/3] update file

---
 docs/source/release_notes.rst                 |  1 +
 .../data_checks/target_leakage_data_check.py  |  5 +++++
 .../test_target_leakage_data_check.py         | 22 ++++++++++++++-----
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 8623081feb..51a914ed3a 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -9,6 +9,7 @@ Release Notes
         * Add ``get_evalml_requirements_file`` :pr:`4034`
         * Pipelines with DFS Transformers will run fast permutation importance if DFS features pre-exist :pr:`4037`
     * Fixes
+        * Updated ``TargetLeakageDataCheck`` to handle boolean targets properly :pr:``
     * Changes
         * Uncapped ``pmdarima`` and updated minimum version :pr:`4027`
         * Increase min catboost to 1.1.1 and xgboost to 1.7.0 to add nullable type support for those estimators :pr:`3996`
diff --git a/evalml/data_checks/target_leakage_data_check.py b/evalml/data_checks/target_leakage_data_check.py
index 4594cc23e2..620e896868 100644
--- a/evalml/data_checks/target_leakage_data_check.py
+++ b/evalml/data_checks/target_leakage_data_check.py
@@ -48,11 +48,16 @@ def _calculate_dependence(self, X, y):
         while target_str in list(X2.columns):
             target_str += "_y"
         X2.ww[target_str] = y
+        # bool_columns = X2.ww.select(["Boolean", "BooleanNullable"]).columns.values
+        # bool_to_int = {col: "IntegerNullable" for col in bool_columns}
+        # X2.ww.set_types(bool_to_int)
+        # print(X2.ww.types)
         try:
             dep_corr = X2.ww.dependence_dict(
                 measures=self.method,
                 target_col=target_str,
             )
+            print(dep_corr)
         except KeyError:
             # keyError raised when the target does not appear due to incompatibility with the metric, return []
             return []
diff --git a/evalml/tests/data_checks_tests/test_target_leakage_data_check.py b/evalml/tests/data_checks_tests/test_target_leakage_data_check.py
index b443fb669f..6b43aebb94 100644
--- a/evalml/tests/data_checks_tests/test_target_leakage_data_check.py
+++ b/evalml/tests/data_checks_tests/test_target_leakage_data_check.py
@@ -188,7 +188,7 @@ def test_target_leakage_types():
     ] * 6 + [datetime.strptime("2015", "%Y")]
     X["d"] = ~y
     X["e"] = np.zeros(len(y))
-    y = y.astype(bool)
+    # y = y.astype(bool)
     X.ww.init(logical_types={"a": "categorical", "d": "Boolean", "b": "Boolean"})
 
     expected = [
@@ -356,8 +356,21 @@ def test_target_leakage_data_check_warnings_pearson():
     y = y.astype(bool)
 
     leakage_check = TargetLeakageDataCheck(pct_corr_threshold=0.5, method="pearson")
-    # pearsons does not support boolean columns
-    assert leakage_check.validate(X, y) == []
+    assert leakage_check.validate(X, y) == [
+        DataCheckWarning(
+            message="Columns 'a', 'b', 'c', 'd' are 50.0% or more correlated with the target",
+            data_check_name=target_leakage_data_check_name,
+            message_code=DataCheckMessageCode.TARGET_LEAKAGE,
+            details={"columns": ["a", "b", "c", "d"]},
+            action_options=[
+                DataCheckActionOption(
+                    DataCheckActionCode.DROP_COL,
+                    data_check_name=target_leakage_data_check_name,
+                    metadata={"columns": ["a", "b", "c", "d"]},
+                ),
+            ],
+        ).to_dict(),
+    ]
 
     y = y.astype(int)
     assert leakage_check.validate(X, y) == [
@@ -447,9 +460,6 @@ def test_target_leakage_none_measures(measures):
     X["b"] = y
     y = y.astype(bool)
 
-    if measures in ["pearson", "spearman"]:
-        assert leakage_check.validate(X, y) == []
-        return
     assert len(leakage_check.validate(X, y))
 
 

From 4ed1c643a98690b86d2835b0408f5ed1ada5acc3 Mon Sep 17 00:00:00 2001
From: bchen1116 <bchen1116@gmail.com>
Date: Thu, 9 Mar 2023 14:17:41 +0800
Subject: [PATCH 2/3] update release notes

---
 docs/source/release_notes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 51a914ed3a..dffe486bb0 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -9,7 +9,7 @@ Release Notes
         * Add ``get_evalml_requirements_file`` :pr:`4034`
         * Pipelines with DFS Transformers will run fast permutation importance if DFS features pre-exist :pr:`4037`
     * Fixes
-        * Updated ``TargetLeakageDataCheck`` to handle boolean targets properly :pr:``
+        * Updated ``TargetLeakageDataCheck`` to handle boolean targets properly :pr:`4061`
     * Changes
         * Uncapped ``pmdarima`` and updated minimum version :pr:`4027`
         * Increase min catboost to 1.1.1 and xgboost to 1.7.0 to add nullable type support for those estimators :pr:`3996`

From 0141795106dd3fdbd53f4c409ad4667474f91ae7 Mon Sep 17 00:00:00 2001
From: bchen1116 <bchen1116@gmail.com>
Date: Fri, 10 Mar 2023 12:03:58 +0800
Subject: [PATCH 3/3] update test

---
 .../data_checks_tests/test_target_leakage_data_check.py   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/evalml/tests/data_checks_tests/test_target_leakage_data_check.py b/evalml/tests/data_checks_tests/test_target_leakage_data_check.py
index 6b43aebb94..11d47d7945 100644
--- a/evalml/tests/data_checks_tests/test_target_leakage_data_check.py
+++ b/evalml/tests/data_checks_tests/test_target_leakage_data_check.py
@@ -188,20 +188,20 @@ def test_target_leakage_types():
     ] * 6 + [datetime.strptime("2015", "%Y")]
     X["d"] = ~y
     X["e"] = np.zeros(len(y))
-    # y = y.astype(bool)
+    y = y.astype(bool)
     X.ww.init(logical_types={"a": "categorical", "d": "Boolean", "b": "Boolean"})
 
     expected = [
         DataCheckWarning(
-            message="Columns 'a', 'b' are 80.0% or more correlated with the target",
+            message="Columns 'a', 'b', 'c' are 80.0% or more correlated with the target",
             data_check_name=target_leakage_data_check_name,
             message_code=DataCheckMessageCode.TARGET_LEAKAGE,
-            details={"columns": ["a", "b"]},
+            details={"columns": ["a", "b", "c"]},
             action_options=[
                 DataCheckActionOption(
                     DataCheckActionCode.DROP_COL,
                     data_check_name=target_leakage_data_check_name,
-                    metadata={"columns": ["a", "b"]},
+                    metadata={"columns": ["a", "b", "c"]},
                 ),
             ],
         ).to_dict(),