Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix importlib DeprecationWarning; Removes expected test message to account for changes in Python3.11 #1584

Merged
merged 19 commits into from
Dec 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
Release Notes
-------------

.. Future Release
==============
Future Release
==============
* Enhancements
* Fixes
* Fix importlib DeprecationWarning in ``inference_functions.py`` (:pr:`1584`)
* Changes
* Documentation Changes
* Testing Changes

.. Thanks to the following people for contributing to this release:
Thanks to the following people for contributing to this release:
:user:`sbadithe`

v0.21.0 December 1, 2022
========================
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
"scikit-learn >= 0.22",
"python-dateutil >= 2.8.1",
"scipy >= 1.4.0",
"importlib-resources >= 5.10.0"
sbadithe marked this conversation as resolved.
Show resolved Hide resolved
]

[project.urls]
Expand Down
11 changes: 8 additions & 3 deletions woodwork/tests/accessor/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from woodwork.tests.testing_utils import (
_check_close,
check_empty_box_plot_dict,
concat_dataframe_or_series,
dep_between_cols,
to_pandas,
)
Expand Down Expand Up @@ -2087,7 +2088,10 @@ def test_get_low_high_bound_warnings():

def test_get_medcouple(outliers_df_pandas, skewed_outliers_df_pandas):
has_outliers_series = outliers_df_pandas["has_outliers"]
has_outliers_series = has_outliers_series.append(pd.Series([39]), ignore_index=True)
has_outliers_series = pd.concat(
sbadithe marked this conversation as resolved.
Show resolved Hide resolved
[has_outliers_series, pd.Series([39], dtype="int64")],
ignore_index=True,
)
has_outliers_series.ww.init()
mc = _get_medcouple_statistic(has_outliers_series)
assert mc == 0.122
Expand Down Expand Up @@ -2122,7 +2126,8 @@ def test_determine_best_outlier_method_sampling_outcome(skewed_outliers_df_panda


def test_determine_best_outlier_method_equivalent_outcome(
outliers_df_pandas, skewed_outliers_df_pandas
outliers_df_pandas,
skewed_outliers_df_pandas,
):
contains_nans_series_skewed = skewed_outliers_df_pandas["right_skewed_outliers"]
contains_nans_series_skewed.ww.init()
Expand Down Expand Up @@ -2313,7 +2318,7 @@ def test_spearman_ordinal(df_mi, use_ordinal):
else:
df_mi.ww.init()
sp = df_mi.ww.dependence(measures=["spearman"])
valid_sp_columns = (sp.column_1.append(sp.column_2)).unique()
valid_sp_columns = concat_dataframe_or_series(sp.column_1, sp.column_2).unique()
sbadithe marked this conversation as resolved.
Show resolved Hide resolved
assert "strs" not in valid_sp_columns
if use_ordinal:
assert "strs2" in valid_sp_columns
Expand Down
3 changes: 1 addition & 2 deletions woodwork/tests/accessor/test_table_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2456,8 +2456,7 @@ def test_accessor_schema_properties(sample_df):
assert prop_from_accessor == prop_from_schema

# Assumes we don't have setters for any of these attributes
error = "can't set attribute"
with pytest.raises(AttributeError, match=error):
with pytest.raises(AttributeError):
setattr(sample_df.ww, schema_property, "new_value")


Expand Down
4 changes: 2 additions & 2 deletions woodwork/tests/fixtures/datetime_freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@
def pad_datetime_series(dates, freq, pad_start=0, pad_end=100):
dates = [pd.Timestamp(d) for d in dates]

head = pd.Series([])
tail = pd.Series([])
head = pd.Series([], dtype="datetime64[ns]")
tail = pd.Series([], dtype="datetime64[ns]")

if pad_start > 0:
head = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ pandas==1.4.0
python-dateutil==2.8.1
scikit-learn==0.22
scipy==1.4.0
importlib-resources==5.10.0
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pandas==1.4.0
python-dateutil==2.8.1
scikit-learn==0.22
scipy==1.4.0
importlib-resources==5.10.0
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pyspark==3.2.0
python-dateutil==2.8.1
scikit-learn==0.22
scipy==1.4.0
importlib-resources==5.10.0
7 changes: 4 additions & 3 deletions woodwork/type_sys/inference_functions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import sys
from importlib import resources as pkg_resources
from typing import Any, Callable, Iterable, Union

import numpy as np
import pandas as pd
from importlib_resources import files
from pandas.api import types as pdtypes

import woodwork as ww
from woodwork import data
from woodwork.config import config
from woodwork.type_sys.utils import _is_categorical_series, col_is_datetime

Expand All @@ -16,7 +15,9 @@
Tokens = Iterable[str]

COMMON_WORDS_SET = set(
line.strip().lower() for line in pkg_resources.open_text(data, "1-1000.txt")
word.strip().lower()
for word in files("woodwork.data").joinpath("1-1000.txt").read_text().split("\n")
if len(word) > 0
)

NL_delimiters = r"[- \[\].,!\?;\n]"
Expand Down