diff --git a/pyproject.toml b/pyproject.toml index 586921a8..697ccbec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ packages = ["tsfm_public", "tsfm_public.toolkit", "tsfm_public.models", "tsfm_pu notebooks = ["jupyter", "matplotlib", "datasets", "ipywidgets", "plotly", "kaleido", "tensorboard"] testing = ["pytest", "tsfm_public[notebooks]", "parameterized"] -dev = ["pre-commit", "tsfm_public[testing]", "ruff==0.1.5"] +dev = ["pre-commit", "tsfm_public[testing]", "ruff==0.4.4"] evaluation = [ "tsevaluate @ git+ssh://git@github.ibm.com/srom/tsevaluate.git", ] @@ -54,15 +54,16 @@ version_file = "tsfm_public/_version.py" [tool.ruff] # Never enforce `E501` (line length violations). -ignore = ["C901", "E501", "E741", "F402", "F823" ] -select = ["C", "E", "F", "I", "W"] +lint.ignore = ["C901", "E501", "E741", "F402", "F823" ] +lint.select = ["C", "E", "F", "I", "W"] line-length = 119 +extend-exclude = ["tsfm_public/_version.py"] # Ignore import violations in all `__init__.py` files. -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401", "F403", "F811"] -[tool.ruff.isort] +[tool.ruff.lint.isort] lines-after-imports = 2 known-first-party = ["tsfm_public"] diff --git a/tests/models/tinytimemixer/test_modeling_tinytimemixer.py b/tests/models/tinytimemixer/test_modeling_tinytimemixer.py index 9897f4d1..3ce871c4 100644 --- a/tests/models/tinytimemixer/test_modeling_tinytimemixer.py +++ b/tests/models/tinytimemixer/test_modeling_tinytimemixer.py @@ -2,7 +2,7 @@ # # This code is based on the test code for PatchTSMixer in the HuggingFace Transformers Library: # https://github.com/huggingface/transformers/blob/main/tests/models/patchtsmixer/test_modeling_patchtsmixer.py -""" Testing suite for the PyTorch TinyTimeMixer model. """ +"""Testing suite for the PyTorch TinyTimeMixer model.""" # Standard import itertools diff --git a/tests/toolkit/test_dataset.py b/tests/toolkit/test_dataset.py index 18f27b68..6e23adad 100644 --- a/tests/toolkit/test_dataset.py +++ b/tests/toolkit/test_dataset.py @@ -3,7 +3,6 @@ """Tests basic dataset functions""" - from datetime import datetime, timedelta import numpy as np diff --git a/tests/toolkit/test_time_series_forecasting_pipeline.py b/tests/toolkit/test_time_series_forecasting_pipeline.py index defdd123..503e5c46 100644 --- a/tests/toolkit/test_time_series_forecasting_pipeline.py +++ b/tests/toolkit/test_time_series_forecasting_pipeline.py @@ -2,6 +2,7 @@ # """Tests the time series preprocessor and functions""" + import pandas as pd from transformers import PatchTSTForPrediction diff --git a/tests/toolkit/test_time_series_preprocessor.py b/tests/toolkit/test_time_series_preprocessor.py index bdb9c93e..8a52c7c9 100644 --- a/tests/toolkit/test_time_series_preprocessor.py +++ b/tests/toolkit/test_time_series_preprocessor.py @@ -429,6 +429,14 @@ def test_get_datasets_with_frequency_token(ts_data): assert train[0]["freq_token"] == DEFAULT_FREQUENCY_MAPPING["d"] +def test_get_frequency_token(): + tsp = TimeSeriesPreprocessor(timestamp_column="date") + + assert tsp.get_frequency_token("1h") == DEFAULT_FREQUENCY_MAPPING["h"] + assert tsp.get_frequency_token("h") == DEFAULT_FREQUENCY_MAPPING["h"] + assert tsp.get_frequency_token("0 days 01:00:00") == DEFAULT_FREQUENCY_MAPPING["h"] + + def test_id_columns_and_scaling_id_columns(ts_data_runs): df = ts_data_runs diff --git a/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py b/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py index b4aafa7f..0e7df2d0 100644 --- a/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py +++ b/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py @@ -1,6 +1,6 @@ # Copyright contributors to the TSFM project # -""" TinyTimeMixer model configuration""" +"""TinyTimeMixer model configuration""" from typing import Optional, Union diff --git a/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py b/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py index 504dfdd8..eb64518d 100644 --- a/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py +++ b/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py @@ -2,7 +2,7 @@ # # This code is based on layers and components from the PatchTSMixer model in the HuggingFace Transformers # Library: https://github.com/huggingface/transformers/blob/main/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py -""" PyTorch TinyTimeMixer model.""" +"""PyTorch TinyTimeMixer model.""" # Standard import copy diff --git a/tsfm_public/models/tinytimemixer/utils/ttm_utils.py b/tsfm_public/models/tinytimemixer/utils/ttm_utils.py index 03bd457e..dbbb5d68 100644 --- a/tsfm_public/models/tinytimemixer/utils/ttm_utils.py +++ b/tsfm_public/models/tinytimemixer/utils/ttm_utils.py @@ -1,4 +1,5 @@ """Utilities for TTM notebooks""" + # Standard import argparse import os diff --git a/tsfm_public/toolkit/time_series_preprocessor.py b/tsfm_public/toolkit/time_series_preprocessor.py index d55a08af..f32258cc 100644 --- a/tsfm_public/toolkit/time_series_preprocessor.py +++ b/tsfm_public/toolkit/time_series_preprocessor.py @@ -14,6 +14,7 @@ import pandas as pd from datasets import Dataset from deprecated import deprecated +from pandas.tseries.frequencies import to_offset from sklearn.preprocessing import MinMaxScaler as MinMaxScaler_ from sklearn.preprocessing import OrdinalEncoder as OrdinalEncoder_ from sklearn.preprocessing import StandardScaler as StandardScaler_ @@ -317,44 +318,6 @@ def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs) -> "PreTrai return super().from_dict(feature_extractor_dict, **kwargs) - # def _prepare_single_time_series(self, name, d): - # """ - # Segment and prepare the time series based on the configuration arguments. - - # name: name for the time series, for example as a result of a grouping operation - # d: the data for a single time series - # """ - # for s_begin in range(d.shape[0] - self.context_length + 1): - # s_end = s_begin + self.context_length - # seq_x = d[self.input_columns].iloc[s_begin:s_end].values - - # if self.time_series_task == TimeSeriesTask.FORECASTING: - # seq_y = ( - # d[self.output_columns] - # .iloc[s_end : s_end + self.prediction_length] - # .values - # ) - # else: - # seq_y = None - # # to do: add handling of other types - - # if self.timestamp_column: - # ts = d[self.timestamp_column].iloc[s_end - 1] - # else: - # ts = None - - # if self.id_columns: - # ids = d[self.id_columns].iloc[s_end - 1].values - # else: - # ids = None - - # yield { - # "timestamp_column": ts, - # "id_columns": ids, - # "past_values": seq_x, - # "future_values": seq_y, - # } - @classmethod def _get_scaler_class(cls, scaler_type): if scaler_type == ScalerType.MINMAX.value: @@ -483,10 +446,25 @@ def _train_categorical_encoder(self, df: pd.DataFrame): def get_frequency_token(self, token_name: str): token = self.frequency_mapping.get(token_name, None) + if token is not None: + return token - if token is None: - warn(f"Frequency token {token_name} was not found in the frequncy token mapping.") - token = self.frequency_mapping["oov"] + # try to map as a frequency string + try: + token_name_offs = to_offset(token_name).freqstr + token = self.frequency_mapping.get(token_name_offs, None) + if token is not None: + return token + except ValueError: + # lastly try to map the timedelta to a frequency string + token_name_td = pd._libs.tslibs.timedeltas.Timedelta(token_name) + token_name_offs = to_offset(token_name_td).freqstr + token = self.frequency_mapping.get(token_name_offs, None) + if token is not None: + return token + + warn(f"Frequency token {token_name} was not found in the frequncy token mapping.") + token = self.frequency_mapping["oov"] return token