From d3084668326a1fb6b4585f7a13274d3ce00e3f8a Mon Sep 17 00:00:00 2001
From: "Wesley M. Gifford" <wmgifford@us.ibm.com>
Date: Mon, 29 Jul 2024 16:54:04 -0400
Subject: [PATCH] add tests for frequency token

---
 .../test_time_series_forecasting_pipeline.py  | 129 ++++++++++++++++--
 1 file changed, 121 insertions(+), 8 deletions(-)

diff --git a/tests/toolkit/test_time_series_forecasting_pipeline.py b/tests/toolkit/test_time_series_forecasting_pipeline.py
index 503e5c46..a00078b1 100644
--- a/tests/toolkit/test_time_series_forecasting_pipeline.py
+++ b/tests/toolkit/test_time_series_forecasting_pipeline.py
@@ -2,25 +2,89 @@
 #
 
 """Tests the time series preprocessor and functions"""
-
 import pandas as pd
+import pytest
 from transformers import PatchTSTForPrediction
 
+from tsfm_public import TinyTimeMixerForPrediction
 from tsfm_public.toolkit.time_series_forecasting_pipeline import (
     TimeSeriesForecastingPipeline,
 )
-from tsfm_public.toolkit.time_series_preprocessor import TimeSeriesPreprocessor
+from tsfm_public.toolkit.time_series_preprocessor import DEFAULT_FREQUENCY_MAPPING, TimeSeriesPreprocessor
 from tsfm_public.toolkit.util import select_by_index
 
 
-def test_forecasting_pipeline_forecasts():
+@pytest.fixture(scope="module")
+def patchtst_model():
+    model_path = "ibm-granite/granite-timeseries-patchtst"
+    model = PatchTSTForPrediction.from_pretrained(model_path)
+
+    return model
+
+
+@pytest.fixture(scope="module")
+def ttm_model():
+    model_path = "ibm-granite/granite-timeseries-ttm-v1"
+    model = TinyTimeMixerForPrediction.from_pretrained(model_path)
+
+    return model
+
+
+@pytest.fixture(scope="module")
+def etth_data():
     timestamp_column = "date"
     id_columns = []
     target_columns = ["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"]
     prediction_length = 96
 
-    model_path = "ibm/patchtst-etth1-forecasting"
-    model = PatchTSTForPrediction.from_pretrained(model_path)
+    dataset_path = "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv"
+    data = pd.read_csv(
+        dataset_path,
+        parse_dates=[timestamp_column],
+    )
+    train_end_index = 12 * 30 * 24
+
+    context_length = 512  # model.config.context_length
+
+    test_end_index = 12 * 30 * 24 + 8 * 30 * 24
+    test_start_index = test_end_index - context_length - 4
+
+    data = pd.read_csv(
+        dataset_path,
+        parse_dates=[timestamp_column],
+    )
+
+    train_data = select_by_index(
+        data,
+        id_columns=id_columns,
+        start_index=0,
+        end_index=train_end_index,
+    )
+    test_data = select_by_index(
+        data,
+        id_columns=id_columns,
+        start_index=test_start_index,
+        end_index=test_end_index,
+    )
+
+    params = {
+        "timestamp_column": timestamp_column,
+        "id_columns": id_columns,
+        "target_columns": target_columns,
+        "prediction_length": prediction_length,
+        "context_length": context_length,
+    }
+
+    return train_data, test_data, params
+
+
+def test_forecasting_pipeline_forecasts(patchtst_model):
+    timestamp_column = "date"
+    id_columns = []
+    target_columns = ["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"]
+    prediction_length = 96
+
+    model = patchtst_model
     context_length = model.config.context_length
 
     forecast_pipeline = TimeSeriesForecastingPipeline(
@@ -111,14 +175,13 @@ def test_forecasting_pipeline_forecasts():
     assert forecasts.shape == (10, 2 * len(target_columns) + 1)
 
 
-def test_forecasting_pipeline_forecasts_with_preprocessor():
+def test_forecasting_pipeline_forecasts_with_preprocessor(patchtst_model):
     timestamp_column = "date"
     id_columns = []
     target_columns = ["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"]
     prediction_length = 96
 
-    model_path = "ibm/patchtst-etth1-forecasting"
-    model = PatchTSTForPrediction.from_pretrained(model_path)
+    model = patchtst_model
     context_length = model.config.context_length
 
     dataset_path = "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv"
@@ -181,3 +244,53 @@ def test_forecasting_pipeline_forecasts_with_preprocessor():
 
     # if we have inverse scaled mean should be larger
     assert forecasts["HUFL_prediction"].mean().mean() > 10
+
+
+def test_frequency_token(ttm_model, etth_data):
+    model = ttm_model
+    train_data, test_data, params = etth_data
+
+    timestamp_column = params["timestamp_column"]
+    id_columns = params["id_columns"]
+    target_columns = params["target_columns"]
+    prediction_length = params["prediction_length"]
+    context_length = params["context_length"]
+
+    tsp = TimeSeriesPreprocessor(
+        timestamp_column=timestamp_column,
+        id_columns=id_columns,
+        target_columns=target_columns,
+        context_length=context_length,
+        prediction_length=prediction_length,
+        freq="1h",
+        scaling=True,
+    )
+
+    tsp.train(train_data)
+
+    assert model.config.resolution_prefix_tuning is False
+
+    forecast_pipeline = TimeSeriesForecastingPipeline(
+        model=model,
+        timestamp_column=timestamp_column,
+        id_columns=id_columns,
+        target_columns=target_columns,
+        freq="1h",
+        feature_extractor=tsp,
+        explode_forecasts=False,
+        inverse_scale_outputs=True,
+    )
+    assert forecast_pipeline._preprocess_params["frequency_token"] is None
+
+    model.config.resolution_prefix_tuning = True
+    forecast_pipeline = TimeSeriesForecastingPipeline(
+        model=model,
+        timestamp_column=timestamp_column,
+        id_columns=id_columns,
+        target_columns=target_columns,
+        freq="1h",
+        feature_extractor=tsp,
+        explode_forecasts=False,
+        inverse_scale_outputs=True,
+    )
+    assert forecast_pipeline._preprocess_params["frequency_token"] == DEFAULT_FREQUENCY_MAPPING["h"]