Skip to content

Commit

Permalink
Merge pull request #84 from ibm-granite/freq_inf
Browse files Browse the repository at this point in the history
Improve frequency handling
  • Loading branch information
wgifford authored Jul 11, 2024
2 parents f4c811e + ea4dd6e commit c6f0fc6
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 50 deletions.
11 changes: 6 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ packages = ["tsfm_public", "tsfm_public.toolkit", "tsfm_public.models", "tsfm_pu

notebooks = ["jupyter", "matplotlib", "datasets", "ipywidgets", "plotly", "kaleido", "tensorboard"]
testing = ["pytest", "tsfm_public[notebooks]", "parameterized"]
dev = ["pre-commit", "tsfm_public[testing]", "ruff==0.1.5"]
dev = ["pre-commit", "tsfm_public[testing]", "ruff==0.4.4"]
evaluation = [
"tsevaluate @ git+ssh://[email protected]/srom/tsevaluate.git",
]
Expand All @@ -54,15 +54,16 @@ version_file = "tsfm_public/_version.py"

[tool.ruff]
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741", "F402", "F823" ]
select = ["C", "E", "F", "I", "W"]
lint.ignore = ["C901", "E501", "E741", "F402", "F823" ]
lint.select = ["C", "E", "F", "I", "W"]
line-length = 119
extend-exclude = ["tsfm_public/_version.py"]

# Ignore import violations in all `__init__.py` files.
[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401", "F403", "F811"]

[tool.ruff.isort]
[tool.ruff.lint.isort]
lines-after-imports = 2
known-first-party = ["tsfm_public"]

Expand Down
2 changes: 1 addition & 1 deletion tests/models/tinytimemixer/test_modeling_tinytimemixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# This code is based on the test code for PatchTSMixer in the HuggingFace Transformers Library:
# https://github.com/huggingface/transformers/blob/main/tests/models/patchtsmixer/test_modeling_patchtsmixer.py
""" Testing suite for the PyTorch TinyTimeMixer model. """
"""Testing suite for the PyTorch TinyTimeMixer model."""

# Standard
import itertools
Expand Down
1 change: 0 additions & 1 deletion tests/toolkit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

"""Tests basic dataset functions"""


from datetime import datetime, timedelta

import numpy as np
Expand Down
1 change: 1 addition & 0 deletions tests/toolkit/test_time_series_forecasting_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#

"""Tests the time series preprocessor and functions"""

import pandas as pd
from transformers import PatchTSTForPrediction

Expand Down
8 changes: 8 additions & 0 deletions tests/toolkit/test_time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,14 @@ def test_get_datasets_with_frequency_token(ts_data):
assert train[0]["freq_token"] == DEFAULT_FREQUENCY_MAPPING["d"]


def test_get_frequency_token():
tsp = TimeSeriesPreprocessor(timestamp_column="date")

assert tsp.get_frequency_token("1h") == DEFAULT_FREQUENCY_MAPPING["h"]
assert tsp.get_frequency_token("h") == DEFAULT_FREQUENCY_MAPPING["h"]
assert tsp.get_frequency_token("0 days 01:00:00") == DEFAULT_FREQUENCY_MAPPING["h"]


def test_id_columns_and_scaling_id_columns(ts_data_runs):
df = ts_data_runs

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright contributors to the TSFM project
#
""" TinyTimeMixer model configuration"""
"""TinyTimeMixer model configuration"""

from typing import Optional, Union

Expand Down
2 changes: 1 addition & 1 deletion tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# This code is based on layers and components from the PatchTSMixer model in the HuggingFace Transformers
# Library: https://github.com/huggingface/transformers/blob/main/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py
""" PyTorch TinyTimeMixer model."""
"""PyTorch TinyTimeMixer model."""

# Standard
import copy
Expand Down
1 change: 1 addition & 0 deletions tsfm_public/models/tinytimemixer/utils/ttm_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Utilities for TTM notebooks"""

# Standard
import argparse
import os
Expand Down
60 changes: 19 additions & 41 deletions tsfm_public/toolkit/time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pandas as pd
from datasets import Dataset
from deprecated import deprecated
from pandas.tseries.frequencies import to_offset
from sklearn.preprocessing import MinMaxScaler as MinMaxScaler_
from sklearn.preprocessing import OrdinalEncoder as OrdinalEncoder_
from sklearn.preprocessing import StandardScaler as StandardScaler_
Expand Down Expand Up @@ -317,44 +318,6 @@ def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs) -> "PreTrai

return super().from_dict(feature_extractor_dict, **kwargs)

# def _prepare_single_time_series(self, name, d):
# """
# Segment and prepare the time series based on the configuration arguments.

# name: name for the time series, for example as a result of a grouping operation
# d: the data for a single time series
# """
# for s_begin in range(d.shape[0] - self.context_length + 1):
# s_end = s_begin + self.context_length
# seq_x = d[self.input_columns].iloc[s_begin:s_end].values

# if self.time_series_task == TimeSeriesTask.FORECASTING:
# seq_y = (
# d[self.output_columns]
# .iloc[s_end : s_end + self.prediction_length]
# .values
# )
# else:
# seq_y = None
# # to do: add handling of other types

# if self.timestamp_column:
# ts = d[self.timestamp_column].iloc[s_end - 1]
# else:
# ts = None

# if self.id_columns:
# ids = d[self.id_columns].iloc[s_end - 1].values
# else:
# ids = None

# yield {
# "timestamp_column": ts,
# "id_columns": ids,
# "past_values": seq_x,
# "future_values": seq_y,
# }

@classmethod
def _get_scaler_class(cls, scaler_type):
if scaler_type == ScalerType.MINMAX.value:
Expand Down Expand Up @@ -483,10 +446,25 @@ def _train_categorical_encoder(self, df: pd.DataFrame):

def get_frequency_token(self, token_name: str):
token = self.frequency_mapping.get(token_name, None)
if token is not None:
return token

if token is None:
warn(f"Frequency token {token_name} was not found in the frequncy token mapping.")
token = self.frequency_mapping["oov"]
# try to map as a frequency string
try:
token_name_offs = to_offset(token_name).freqstr
token = self.frequency_mapping.get(token_name_offs, None)
if token is not None:
return token
except ValueError:
# lastly try to map the timedelta to a frequency string
token_name_td = pd._libs.tslibs.timedeltas.Timedelta(token_name)
token_name_offs = to_offset(token_name_td).freqstr
token = self.frequency_mapping.get(token_name_offs, None)
if token is not None:
return token

warn(f"Frequency token {token_name} was not found in the frequncy token mapping.")
token = self.frequency_mapping["oov"]

return token

Expand Down

0 comments on commit c6f0fc6

Please sign in to comment.