From 0142f075c5ce64cea47e2c414b345f7cfe4e9aca Mon Sep 17 00:00:00 2001 From: sbadithe Date: Tue, 22 Nov 2022 15:12:02 -0800 Subject: [PATCH 01/18] Update importlib API --- docs/source/release_notes.rst | 1 + woodwork/type_sys/inference_functions.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index de411e8ce..2abacd8a6 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -15,6 +15,7 @@ Future Release * Update demo dataset links to point to new endpoint (:pr:`1570`) * Fix DivisionByZero error in ``type_system.py`` (:pr:`1571`) * Fixed issue where forcing a ``Boolean`` logical type on a column of 0.0s and 1.0s caused incorrect transformation (:pr:`1576`) + * Fix importlib DeprecationWarning in ``inference_functions.py`` (:pr:``) * Changes * Unpin dask dependency (:pr:`1561`) * Changed the sampling strategy for type inference from ``head`` to random (:pr:`1566`) diff --git a/woodwork/type_sys/inference_functions.py b/woodwork/type_sys/inference_functions.py index f6d1699d5..6b00c1f94 100644 --- a/woodwork/type_sys/inference_functions.py +++ b/woodwork/type_sys/inference_functions.py @@ -1,5 +1,5 @@ import sys -from importlib import resources as pkg_resources +from importlib.resources import files from typing import Any, Callable, Iterable, Union import numpy as np @@ -7,7 +7,6 @@ from pandas.api import types as pdtypes import woodwork as ww -from woodwork import data from woodwork.config import config from woodwork.type_sys.utils import _is_categorical_series, col_is_datetime @@ -16,7 +15,7 @@ Tokens = Iterable[str] COMMON_WORDS_SET = set( - line.strip().lower() for line in pkg_resources.open_text(data, "1-1000.txt") + line.strip().lower() for line in files("woodwork.data").joinpath("1-1000.txt").read_text() ) NL_delimiters = r"[- \[\].,!\?;\n]" From 0a7e9472f248e8339366b9e38a91d67c10356c5f Mon Sep 17 00:00:00 2001 From: sbadithe Date: Tue, 22 Nov 2022 15:26:34 -0800 Subject: [PATCH 02/18] try changing jsonschema --- docs/source/release_notes.rst | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 2abacd8a6..53688d696 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -15,7 +15,7 @@ Future Release * Update demo dataset links to point to new endpoint (:pr:`1570`) * Fix DivisionByZero error in ``type_system.py`` (:pr:`1571`) * Fixed issue where forcing a ``Boolean`` logical type on a column of 0.0s and 1.0s caused incorrect transformation (:pr:`1576`) - * Fix importlib DeprecationWarning in ``inference_functions.py`` (:pr:``) + * Fix importlib DeprecationWarning in ``inference_functions.py`` (:pr:`1584`) * Changes * Unpin dask dependency (:pr:`1561`) * Changed the sampling strategy for type inference from ``head`` to random (:pr:`1566`) diff --git a/pyproject.toml b/pyproject.toml index a59aad3dc..85a689818 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", + "jsonschema == 3.0.2" ] [project.urls] From a986faff1e3fc8daeabf2c938a11546e5b0d5210 Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 15:53:09 -0800 Subject: [PATCH 03/18] fix files API call. --- woodwork/type_sys/inference_functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/woodwork/type_sys/inference_functions.py b/woodwork/type_sys/inference_functions.py index dcf5e031b..a0339c107 100644 --- a/woodwork/type_sys/inference_functions.py +++ b/woodwork/type_sys/inference_functions.py @@ -15,7 +15,8 @@ Tokens = Iterable[str] COMMON_WORDS_SET = set( - line.strip().lower() for line in files("woodwork.data").joinpath("1-1000.txt").read_text() + word.strip().lower() + for word in files("woodwork.data").joinpath("1-1000.txt").read_text().split("\n") ) NL_delimiters = r"[- \[\].,!\?;\n]" From bef1cee12efc0f80c1b151d4de7edc7aba1a91eb Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 15:54:09 -0800 Subject: [PATCH 04/18] refactor test to account for new py3.11 error msg --- woodwork/tests/accessor/test_table_accessor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/woodwork/tests/accessor/test_table_accessor.py b/woodwork/tests/accessor/test_table_accessor.py index bbba750fe..163e84ea0 100644 --- a/woodwork/tests/accessor/test_table_accessor.py +++ b/woodwork/tests/accessor/test_table_accessor.py @@ -1,10 +1,13 @@ import re from inspect import isclass +from platform import python_version +from string import punctuation from unittest.mock import patch import numpy as np import pandas as pd import pytest +from packaging.version import Version import woodwork as ww from woodwork.accessor_utils import ( @@ -2456,7 +2459,11 @@ def test_accessor_schema_properties(sample_df): assert prop_from_accessor == prop_from_schema # Assumes we don't have setters for any of these attributes - error = "can't set attribute" + if Version(python_version().strip(punctuation)) < Version("3.11.0"): + error = "can't set attribute" + else: + # Error messages were changed as part of Python 3.11 + error = "has no setter" with pytest.raises(AttributeError, match=error): setattr(sample_df.ww, schema_property, "new_value") From 2300069454ac99c846e73d3a7ac2f72269f1979b Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 16:18:33 -0800 Subject: [PATCH 05/18] try requiring only latest importlib --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 85a689818..8cb34972e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", - "jsonschema == 3.0.2" + "importlib-resources >= 5.10.0" ] [project.urls] From 307e86bc5b5d9bdcc2ce146d47dab84c98e22597 Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 16:49:13 -0800 Subject: [PATCH 06/18] try requiring importlib-metadata and importlib-resources --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8cb34972e..a762fd6fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,8 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", - "importlib-resources >= 5.10.0" + "importlib-metadata == 5.1.0", + "importlib-resources == 5.10.0" ] [project.urls] From d6765fab6b97b745f3e0a2b6a2e8839caf1ca91b Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 16:55:51 -0800 Subject: [PATCH 07/18] try not requiring any version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a762fd6fa..dd725552c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,8 +40,8 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", - "importlib-metadata == 5.1.0", - "importlib-resources == 5.10.0" + "importlib-metadata", + "importlib-resources" ] [project.urls] From 5bbf03ed21f60278429c5ce2c4ba9cced5d0dd6f Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 17:10:19 -0800 Subject: [PATCH 08/18] try changing import name --- woodwork/type_sys/inference_functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/woodwork/type_sys/inference_functions.py b/woodwork/type_sys/inference_functions.py index a0339c107..63043bf0f 100644 --- a/woodwork/type_sys/inference_functions.py +++ b/woodwork/type_sys/inference_functions.py @@ -1,9 +1,9 @@ import sys -from importlib.resources import files from typing import Any, Callable, Iterable, Union import numpy as np import pandas as pd +from importlib_resources import files from pandas.api import types as pdtypes import woodwork as ww @@ -17,6 +17,7 @@ COMMON_WORDS_SET = set( word.strip().lower() for word in files("woodwork.data").joinpath("1-1000.txt").read_text().split("\n") + if len(word) > 0 ) NL_delimiters = r"[- \[\].,!\?;\n]" From 843575c6f2e188c601ea329decc60c248f13e094 Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 17:28:19 -0800 Subject: [PATCH 09/18] try making 5.10.0 minimal req --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dd725552c..8cb34972e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,8 +40,7 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", - "importlib-metadata", - "importlib-resources" + "importlib-resources >= 5.10.0" ] [project.urls] From b9f684787475efde74c131109691d7f346bdde1d Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 17:32:19 -0800 Subject: [PATCH 10/18] lower minimal req for importlib-resources --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8cb34972e..f7cc9bd5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", - "importlib-resources >= 5.10.0" + "importlib-resources >= 1.3" ] [project.urls] From 7c23fdb78958acff83ad43130f3e297e93924f49 Mon Sep 17 00:00:00 2001 From: sbadithe Date: Fri, 2 Dec 2022 17:43:13 -0800 Subject: [PATCH 11/18] update min req files --- woodwork/tests/requirement_files/minimum_core_requirements.txt | 1 + woodwork/tests/requirement_files/minimum_dask_requirements.txt | 1 + woodwork/tests/requirement_files/minimum_spark_requirements.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/woodwork/tests/requirement_files/minimum_core_requirements.txt b/woodwork/tests/requirement_files/minimum_core_requirements.txt index 4841761b6..543446918 100644 --- a/woodwork/tests/requirement_files/minimum_core_requirements.txt +++ b/woodwork/tests/requirement_files/minimum_core_requirements.txt @@ -2,3 +2,4 @@ pandas==1.4.0 python-dateutil==2.8.1 scikit-learn==0.22 scipy==1.4.0 +importlib-resources==5.10.0 \ No newline at end of file diff --git a/woodwork/tests/requirement_files/minimum_dask_requirements.txt b/woodwork/tests/requirement_files/minimum_dask_requirements.txt index 9c2c0caa1..fdf787dbb 100644 --- a/woodwork/tests/requirement_files/minimum_dask_requirements.txt +++ b/woodwork/tests/requirement_files/minimum_dask_requirements.txt @@ -3,3 +3,4 @@ pandas==1.4.0 python-dateutil==2.8.1 scikit-learn==0.22 scipy==1.4.0 +importlib-resources==5.10.0 \ No newline at end of file diff --git a/woodwork/tests/requirement_files/minimum_spark_requirements.txt b/woodwork/tests/requirement_files/minimum_spark_requirements.txt index 5e14f7059..18826c9d6 100644 --- a/woodwork/tests/requirement_files/minimum_spark_requirements.txt +++ b/woodwork/tests/requirement_files/minimum_spark_requirements.txt @@ -3,3 +3,4 @@ pyspark==3.2.0 python-dateutil==2.8.1 scikit-learn==0.22 scipy==1.4.0 +importlib-resources==5.10.0 \ No newline at end of file From 6f8195955547cdd49d764f1acedae6f63ad9c73b Mon Sep 17 00:00:00 2001 From: sbadithe Date: Mon, 5 Dec 2022 10:16:27 -0800 Subject: [PATCH 12/18] resolve some more of the warnings when running tests --- pyproject.toml | 2 +- woodwork/tests/accessor/test_statistics.py | 11 ++++++++--- woodwork/tests/fixtures/datetime_freq.py | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7cc9bd5f..8cb34972e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", - "importlib-resources >= 1.3" + "importlib-resources >= 5.10.0" ] [project.urls] diff --git a/woodwork/tests/accessor/test_statistics.py b/woodwork/tests/accessor/test_statistics.py index 9856df75e..e0c13bbe8 100644 --- a/woodwork/tests/accessor/test_statistics.py +++ b/woodwork/tests/accessor/test_statistics.py @@ -58,6 +58,7 @@ from woodwork.tests.testing_utils import ( _check_close, check_empty_box_plot_dict, + concat_dataframe_or_series, dep_between_cols, to_pandas, ) @@ -2087,7 +2088,10 @@ def test_get_low_high_bound_warnings(): def test_get_medcouple(outliers_df_pandas, skewed_outliers_df_pandas): has_outliers_series = outliers_df_pandas["has_outliers"] - has_outliers_series = has_outliers_series.append(pd.Series([39]), ignore_index=True) + has_outliers_series = pd.concat( + [has_outliers_series, pd.Series([39], dtype="int64")], + ignore_index=True, + ) has_outliers_series.ww.init() mc = _get_medcouple_statistic(has_outliers_series) assert mc == 0.122 @@ -2122,7 +2126,8 @@ def test_determine_best_outlier_method_sampling_outcome(skewed_outliers_df_panda def test_determine_best_outlier_method_equivalent_outcome( - outliers_df_pandas, skewed_outliers_df_pandas + outliers_df_pandas, + skewed_outliers_df_pandas, ): contains_nans_series_skewed = skewed_outliers_df_pandas["right_skewed_outliers"] contains_nans_series_skewed.ww.init() @@ -2313,7 +2318,7 @@ def test_spearman_ordinal(df_mi, use_ordinal): else: df_mi.ww.init() sp = df_mi.ww.dependence(measures=["spearman"]) - valid_sp_columns = (sp.column_1.append(sp.column_2)).unique() + valid_sp_columns = concat_dataframe_or_series(sp.column_1, sp.column_2).unique() assert "strs" not in valid_sp_columns if use_ordinal: assert "strs2" in valid_sp_columns diff --git a/woodwork/tests/fixtures/datetime_freq.py b/woodwork/tests/fixtures/datetime_freq.py index 432cdebe4..4a0b264dd 100644 --- a/woodwork/tests/fixtures/datetime_freq.py +++ b/woodwork/tests/fixtures/datetime_freq.py @@ -50,8 +50,8 @@ def pad_datetime_series(dates, freq, pad_start=0, pad_end=100): dates = [pd.Timestamp(d) for d in dates] - head = pd.Series([]) - tail = pd.Series([]) + head = pd.Series([], dtype="float64") + tail = pd.Series([], dtype="float64") if pad_start > 0: head = ( From 9af6d8c3499f925ae59761b659f8f15b2033bcc2 Mon Sep 17 00:00:00 2001 From: sbadithe Date: Mon, 5 Dec 2022 10:20:23 -0800 Subject: [PATCH 13/18] update release notes --- docs/source/release_notes.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 98731537a..1ec419455 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -3,15 +3,18 @@ Release Notes ------------- -.. Future Release - ============== +Future Release +============== * Enhancements * Fixes + * Fix importlib DeprecationWarning in ``inference_functions.py`` (:pr:`1584`) * Changes * Documentation Changes * Testing Changes + * Add handling of Python 3.11 error messages in ``test_table_accessor.py``(:pr:`1584`) -.. Thanks to the following people for contributing to this release: + Thanks to the following people for contributing to this release: + :user:`sbadithe` v0.21.0 December 1, 2022 ======================== @@ -26,7 +29,6 @@ v0.21.0 December 1, 2022 * Fix DivisionByZero error in ``type_system.py`` (:pr:`1571`) * Fix Categorical dtype inference for ``PostalCode`` logical type (:pr:`1574`) * Fixed issue where forcing a ``Boolean`` logical type on a column of 0.0s and 1.0s caused incorrect transformation (:pr:`1576`) - * Fix importlib DeprecationWarning in ``inference_functions.py`` (:pr:`1584`) * Changes * Unpin dask dependency (:pr:`1561`) * Changed the sampling strategy for type inference from ``head`` to random (:pr:`1566`) From 6002f70eabd302561434ec0f89417b35d9e14c7d Mon Sep 17 00:00:00 2001 From: sbadithe Date: Mon, 5 Dec 2022 10:31:41 -0800 Subject: [PATCH 14/18] Fix typo release notes --- docs/source/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 1ec419455..388aeddbc 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -11,7 +11,7 @@ Future Release * Changes * Documentation Changes * Testing Changes - * Add handling of Python 3.11 error messages in ``test_table_accessor.py``(:pr:`1584`) + * Add handling of Python 3.11 error messages in ``test_table_accessor.py`` (:pr:`1584`) Thanks to the following people for contributing to this release: :user:`sbadithe` From 4a2c1521e12249b1edbd4d78194a06c19b2fc39d Mon Sep 17 00:00:00 2001 From: sbadithe Date: Tue, 6 Dec 2022 11:21:31 -0800 Subject: [PATCH 15/18] PR comments --- woodwork/tests/accessor/test_table_accessor.py | 12 ++---------- woodwork/tests/fixtures/datetime_freq.py | 4 ++-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/woodwork/tests/accessor/test_table_accessor.py b/woodwork/tests/accessor/test_table_accessor.py index 163e84ea0..6437805bb 100644 --- a/woodwork/tests/accessor/test_table_accessor.py +++ b/woodwork/tests/accessor/test_table_accessor.py @@ -1,13 +1,11 @@ import re from inspect import isclass -from platform import python_version from string import punctuation from unittest.mock import patch import numpy as np import pandas as pd import pytest -from packaging.version import Version import woodwork as ww from woodwork.accessor_utils import ( @@ -2458,14 +2456,8 @@ def test_accessor_schema_properties(sample_df): assert prop_from_accessor == prop_from_schema - # Assumes we don't have setters for any of these attributes - if Version(python_version().strip(punctuation)) < Version("3.11.0"): - error = "can't set attribute" - else: - # Error messages were changed as part of Python 3.11 - error = "has no setter" - with pytest.raises(AttributeError, match=error): - setattr(sample_df.ww, schema_property, "new_value") + with pytest.raises(AttributeError): + setattr(sample_df.ww, schema_property) def test_sets_spark_option_on_init(sample_df_spark): diff --git a/woodwork/tests/fixtures/datetime_freq.py b/woodwork/tests/fixtures/datetime_freq.py index 4a0b264dd..49cce8dd6 100644 --- a/woodwork/tests/fixtures/datetime_freq.py +++ b/woodwork/tests/fixtures/datetime_freq.py @@ -50,8 +50,8 @@ def pad_datetime_series(dates, freq, pad_start=0, pad_end=100): dates = [pd.Timestamp(d) for d in dates] - head = pd.Series([], dtype="float64") - tail = pd.Series([], dtype="float64") + head = pd.Series([], dtype="datetime64[ns]") + tail = pd.Series([], dtype="datetime64[ns]") if pad_start > 0: head = ( From e4804b986291e449fa18f281dc0976a2b5d6647b Mon Sep 17 00:00:00 2001 From: sbadithe Date: Tue, 6 Dec 2022 11:26:33 -0800 Subject: [PATCH 16/18] lint and typo --- woodwork/tests/accessor/test_table_accessor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/woodwork/tests/accessor/test_table_accessor.py b/woodwork/tests/accessor/test_table_accessor.py index 6437805bb..fccc0c4d5 100644 --- a/woodwork/tests/accessor/test_table_accessor.py +++ b/woodwork/tests/accessor/test_table_accessor.py @@ -1,6 +1,5 @@ import re from inspect import isclass -from string import punctuation from unittest.mock import patch import numpy as np @@ -2457,7 +2456,7 @@ def test_accessor_schema_properties(sample_df): assert prop_from_accessor == prop_from_schema with pytest.raises(AttributeError): - setattr(sample_df.ww, schema_property) + setattr(sample_df.ww, schema_property, "new_value") def test_sets_spark_option_on_init(sample_df_spark): From 0590ef5d048d30a18f54ab25edd058b078f5c421 Mon Sep 17 00:00:00 2001 From: sbadithe Date: Tue, 6 Dec 2022 11:41:23 -0800 Subject: [PATCH 17/18] leave original comment --- woodwork/tests/accessor/test_table_accessor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/woodwork/tests/accessor/test_table_accessor.py b/woodwork/tests/accessor/test_table_accessor.py index fccc0c4d5..dfde8a407 100644 --- a/woodwork/tests/accessor/test_table_accessor.py +++ b/woodwork/tests/accessor/test_table_accessor.py @@ -2455,6 +2455,7 @@ def test_accessor_schema_properties(sample_df): assert prop_from_accessor == prop_from_schema + # Assumes we don't have setters for any of these attributes with pytest.raises(AttributeError): setattr(sample_df.ww, schema_property, "new_value") From 15d5084093e5c976591b82cc6ddfe2d191e142fc Mon Sep 17 00:00:00 2001 From: Shripad Badithe <60528327+sbadithe@users.noreply.github.com> Date: Tue, 6 Dec 2022 12:39:02 -0800 Subject: [PATCH 18/18] Update release_notes.rst --- docs/source/release_notes.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 388aeddbc..78cc22f5b 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -11,7 +11,6 @@ Future Release * Changes * Documentation Changes * Testing Changes - * Add handling of Python 3.11 error messages in ``test_table_accessor.py`` (:pr:`1584`) Thanks to the following people for contributing to this release: :user:`sbadithe`