From 70652b9bda5a1053fe178a94ba42e5edc3db6e64 Mon Sep 17 00:00:00 2001 From: Shripad Badithe <60528327+sbadithe@users.noreply.github.com> Date: Tue, 6 Dec 2022 12:56:41 -0800 Subject: [PATCH] Fix importlib DeprecationWarning (#1584) * Update importlib API * resolve broken test with Python3.11 error message * fix some FutureWarnings --- docs/source/release_notes.rst | 8 +++++--- pyproject.toml | 1 + woodwork/tests/accessor/test_statistics.py | 11 ++++++++--- woodwork/tests/accessor/test_table_accessor.py | 3 +-- woodwork/tests/fixtures/datetime_freq.py | 4 ++-- .../requirement_files/minimum_core_requirements.txt | 1 + .../requirement_files/minimum_dask_requirements.txt | 1 + .../requirement_files/minimum_spark_requirements.txt | 1 + woodwork/type_sys/inference_functions.py | 7 ++++--- 9 files changed, 24 insertions(+), 13 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index effa4d024..78cc22f5b 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -3,15 +3,17 @@ Release Notes ------------- -.. Future Release - ============== +Future Release +============== * Enhancements * Fixes + * Fix importlib DeprecationWarning in ``inference_functions.py`` (:pr:`1584`) * Changes * Documentation Changes * Testing Changes -.. Thanks to the following people for contributing to this release: + Thanks to the following people for contributing to this release: + :user:`sbadithe` v0.21.0 December 1, 2022 ======================== diff --git a/pyproject.toml b/pyproject.toml index a59aad3dc..8cb34972e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "scikit-learn >= 0.22", "python-dateutil >= 2.8.1", "scipy >= 1.4.0", + "importlib-resources >= 5.10.0" ] [project.urls] diff --git a/woodwork/tests/accessor/test_statistics.py b/woodwork/tests/accessor/test_statistics.py index 9856df75e..e0c13bbe8 100644 --- a/woodwork/tests/accessor/test_statistics.py +++ b/woodwork/tests/accessor/test_statistics.py @@ -58,6 +58,7 @@ from woodwork.tests.testing_utils import ( _check_close, check_empty_box_plot_dict, + concat_dataframe_or_series, dep_between_cols, to_pandas, ) @@ -2087,7 +2088,10 @@ def test_get_low_high_bound_warnings(): def test_get_medcouple(outliers_df_pandas, skewed_outliers_df_pandas): has_outliers_series = outliers_df_pandas["has_outliers"] - has_outliers_series = has_outliers_series.append(pd.Series([39]), ignore_index=True) + has_outliers_series = pd.concat( + [has_outliers_series, pd.Series([39], dtype="int64")], + ignore_index=True, + ) has_outliers_series.ww.init() mc = _get_medcouple_statistic(has_outliers_series) assert mc == 0.122 @@ -2122,7 +2126,8 @@ def test_determine_best_outlier_method_sampling_outcome(skewed_outliers_df_panda def test_determine_best_outlier_method_equivalent_outcome( - outliers_df_pandas, skewed_outliers_df_pandas + outliers_df_pandas, + skewed_outliers_df_pandas, ): contains_nans_series_skewed = skewed_outliers_df_pandas["right_skewed_outliers"] contains_nans_series_skewed.ww.init() @@ -2313,7 +2318,7 @@ def test_spearman_ordinal(df_mi, use_ordinal): else: df_mi.ww.init() sp = df_mi.ww.dependence(measures=["spearman"]) - valid_sp_columns = (sp.column_1.append(sp.column_2)).unique() + valid_sp_columns = concat_dataframe_or_series(sp.column_1, sp.column_2).unique() assert "strs" not in valid_sp_columns if use_ordinal: assert "strs2" in valid_sp_columns diff --git a/woodwork/tests/accessor/test_table_accessor.py b/woodwork/tests/accessor/test_table_accessor.py index bbba750fe..dfde8a407 100644 --- a/woodwork/tests/accessor/test_table_accessor.py +++ b/woodwork/tests/accessor/test_table_accessor.py @@ -2456,8 +2456,7 @@ def test_accessor_schema_properties(sample_df): assert prop_from_accessor == prop_from_schema # Assumes we don't have setters for any of these attributes - error = "can't set attribute" - with pytest.raises(AttributeError, match=error): + with pytest.raises(AttributeError): setattr(sample_df.ww, schema_property, "new_value") diff --git a/woodwork/tests/fixtures/datetime_freq.py b/woodwork/tests/fixtures/datetime_freq.py index 432cdebe4..49cce8dd6 100644 --- a/woodwork/tests/fixtures/datetime_freq.py +++ b/woodwork/tests/fixtures/datetime_freq.py @@ -50,8 +50,8 @@ def pad_datetime_series(dates, freq, pad_start=0, pad_end=100): dates = [pd.Timestamp(d) for d in dates] - head = pd.Series([]) - tail = pd.Series([]) + head = pd.Series([], dtype="datetime64[ns]") + tail = pd.Series([], dtype="datetime64[ns]") if pad_start > 0: head = ( diff --git a/woodwork/tests/requirement_files/minimum_core_requirements.txt b/woodwork/tests/requirement_files/minimum_core_requirements.txt index 4841761b6..543446918 100644 --- a/woodwork/tests/requirement_files/minimum_core_requirements.txt +++ b/woodwork/tests/requirement_files/minimum_core_requirements.txt @@ -2,3 +2,4 @@ pandas==1.4.0 python-dateutil==2.8.1 scikit-learn==0.22 scipy==1.4.0 +importlib-resources==5.10.0 \ No newline at end of file diff --git a/woodwork/tests/requirement_files/minimum_dask_requirements.txt b/woodwork/tests/requirement_files/minimum_dask_requirements.txt index 9c2c0caa1..fdf787dbb 100644 --- a/woodwork/tests/requirement_files/minimum_dask_requirements.txt +++ b/woodwork/tests/requirement_files/minimum_dask_requirements.txt @@ -3,3 +3,4 @@ pandas==1.4.0 python-dateutil==2.8.1 scikit-learn==0.22 scipy==1.4.0 +importlib-resources==5.10.0 \ No newline at end of file diff --git a/woodwork/tests/requirement_files/minimum_spark_requirements.txt b/woodwork/tests/requirement_files/minimum_spark_requirements.txt index 5e14f7059..18826c9d6 100644 --- a/woodwork/tests/requirement_files/minimum_spark_requirements.txt +++ b/woodwork/tests/requirement_files/minimum_spark_requirements.txt @@ -3,3 +3,4 @@ pyspark==3.2.0 python-dateutil==2.8.1 scikit-learn==0.22 scipy==1.4.0 +importlib-resources==5.10.0 \ No newline at end of file diff --git a/woodwork/type_sys/inference_functions.py b/woodwork/type_sys/inference_functions.py index af543be63..63043bf0f 100644 --- a/woodwork/type_sys/inference_functions.py +++ b/woodwork/type_sys/inference_functions.py @@ -1,13 +1,12 @@ import sys -from importlib import resources as pkg_resources from typing import Any, Callable, Iterable, Union import numpy as np import pandas as pd +from importlib_resources import files from pandas.api import types as pdtypes import woodwork as ww -from woodwork import data from woodwork.config import config from woodwork.type_sys.utils import _is_categorical_series, col_is_datetime @@ -16,7 +15,9 @@ Tokens = Iterable[str] COMMON_WORDS_SET = set( - line.strip().lower() for line in pkg_resources.open_text(data, "1-1000.txt") + word.strip().lower() + for word in files("woodwork.data").joinpath("1-1000.txt").read_text().split("\n") + if len(word) > 0 ) NL_delimiters = r"[- \[\].,!\?;\n]"