Skip to content

Commit

Permalink
Add tests, update get_dataset for target columns
Browse files Browse the repository at this point in the history
Signed-off-by: Wesley M. Gifford <[email protected]>
  • Loading branch information
wgifford committed Mar 29, 2024
1 parent 11509f0 commit 5d6178f
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 14 deletions.
13 changes: 13 additions & 0 deletions tests/toolkit/test_time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,16 @@ def test_train_without_targets(ts_data):
tsp.train(ts_data)

assert tsp.target_columns == ["value2"]


def test_get_datasets_without_targets(ts_data):
ts_data = ts_data.drop(columns=["id", "id2"])
tsp = TimeSeriesPreprocessor(
timestamp_column="timestamp",
prediction_length=2,
context_length=5,
)

train, _, _ = tsp.get_datasets(ts_data, split_config={"train": 0.7, "test": 0.2})

train.datasets[0].target_columns == ["value1", "value2"]
31 changes: 17 additions & 14 deletions tsfm_public/toolkit/time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ def train(
self._check_dataset(dataset)
df = self._standardize_dataframe(dataset)
self._set_targets(df)
self._validate_columns()

if self.freq is None:
self._estimate_frequency(df)
Expand Down Expand Up @@ -563,7 +564,6 @@ def preprocess(
# 2) incremental / batch based processing of datasets to minimize memory impact

self._check_dataset(dataset)

df = self._standardize_dataframe(dataset)

if self.scaling:
Expand Down Expand Up @@ -647,22 +647,14 @@ def get_datasets(

data = self._standardize_dataframe(dataset)

# get split_params
# split_params = get_split_params(config, self.context_length, len(data))
if not self.context_length:
raise ValueError("TimeSeriesPreprocessor must be instantiated with non-null context_length")
if not self.prediction_length:
raise ValueError("TimeSeriesPreprocessor must be instantiated with non-null prediction_length")

# get split_params
split_params, split_function = get_split_params(split_config, context_length=self.context_length)

# specify columns
column_specifiers = {
"id_columns": self.id_columns,
"timestamp_column": self.timestamp_column,
"target_columns": self.target_columns,
"observable_columns": self.observable_columns,
"control_columns": self.control_columns,
"conditional_columns": self.conditional_columns,
"static_categorical_columns": self.static_categorical_columns,
}

# split data
if isinstance(split_function, dict):
train_data = split_function["train"](data, id_columns=self.id_columns, **split_params["train"])
Expand All @@ -674,6 +666,17 @@ def get_datasets(
# data preprocessing
self.train(train_data)

# specify columns
column_specifiers = {
"id_columns": self.id_columns,
"timestamp_column": self.timestamp_column,
"target_columns": self.target_columns,
"observable_columns": self.observable_columns,
"control_columns": self.control_columns,
"conditional_columns": self.conditional_columns,
"static_categorical_columns": self.static_categorical_columns,
}

# handle fewshot operation
if fewshot_fraction is not None:
if not ((fewshot_fraction <= 1.0) and (fewshot_fraction > 0.0)):
Expand Down

0 comments on commit 5d6178f

Please sign in to comment.