From 3e28e27630a45f9dfc19759d4e65710defe10ba9 Mon Sep 17 00:00:00 2001 From: christopherbunn Date: Mon, 21 Aug 2023 14:40:27 -0400 Subject: [PATCH] Final nits --- evalml/pipelines/utils.py | 4 ++-- evalml/tests/pipeline_tests/test_pipeline_utils.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py index 17bb3f4adf..dbc51abee8 100644 --- a/evalml/pipelines/utils.py +++ b/evalml/pipelines/utils.py @@ -1489,7 +1489,7 @@ def stack_X(X, series_id_name, time_index, starting_index=None, series_id_values time_index (str): The name of the time index column. starting_index (int): The starting index to use for the stacked DataFrame. If None, the starting index will match that of the input data. Defaults to None. - series_id_values (set): The unique values of a series ID, used to generate the index. If None, values will + series_id_values (set, list): The unique values of a series ID, used to generate the index. If None, values will be generated from X column values. Required if X only has time index values and no exogenous values. Defaults to None. @@ -1508,7 +1508,7 @@ def stack_X(X, series_id_name, time_index, starting_index=None, series_id_values if len(series_ids) == 0: raise ValueError( - "Series ID values needs to be passed in X column values or as a set with the `series_id_values` parameter.", + "Series ID values need to be passed in X column values or as a set with the `series_id_values` parameter.", ) time_index_col = X[time_index].repeat(len(series_ids)).reset_index(drop=True) diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py index 151f1d01e7..92eb95cc0e 100644 --- a/evalml/tests/pipeline_tests/test_pipeline_utils.py +++ b/evalml/tests/pipeline_tests/test_pipeline_utils.py @@ -1472,11 +1472,13 @@ def test_stack_data_noop(): pd.testing.assert_series_equal(stack_data(series_y), series_y) +@pytest.mark.parametrize("series_id_values_type", [set, list]) @pytest.mark.parametrize("no_features", [True, False]) @pytest.mark.parametrize("starting_index", [None, 1, 132]) def test_stack_X( starting_index, no_features, + series_id_values_type, multiseries_ts_data_stacked, multiseries_ts_data_unstacked, ): @@ -1487,13 +1489,13 @@ def test_stack_X( X_expected.index = X_expected.index + starting_index if no_features: - series_id_values = set(str(i) for i in range(0, 5)) + series_id_values = series_id_values_type(str(i) for i in range(0, 5)) X = pd.DataFrame(X["date"]) X_expected = X_expected[["date", "series_id"]] with pytest.raises( ValueError, - match="Series ID values needs to be passed in X column values or as a set with the `series_id_values` parameter.", + match="Series ID values need to be passed in X column values or as a set with the `series_id_values` parameter.", ): stack_X(X, "series_id", "date", starting_index=starting_index)