Skip to content

Commit

Permalink
clean up output
Browse files Browse the repository at this point in the history
Signed-off-by: Wesley M. Gifford <[email protected]>
  • Loading branch information
wgifford committed Apr 23, 2024
1 parent 493d42c commit fc09e1a
Showing 1 changed file with 25 additions and 3 deletions.
28 changes: 25 additions & 3 deletions tsfm_public/toolkit/time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,11 +344,29 @@ def _standardize_dataframe(

return df

def _clean_up_dataframe(self, df: pd.DataFrame) -> None:
"""Removes columns added during internal processing of the provided dataframe.
Currently, the following checks are done:
- Remove INTERNAL_ID_COLUMN if present
Args:
df (pd.DataFrame): Input pandas dataframe
Returns:
pd.DataFrame: Cleaned up dataframe
"""

if not self.id_columns:
if INTERNAL_ID_COLUMN in df.columns:
df.drop(columns=INTERNAL_ID_COLUMN, inplace=True)

def _get_groups(
self,
dataset: pd.DataFrame,
) -> Generator[Tuple[Any, pd.DataFrame], None, None]:
"""Get groups of the time series dataset (multi-time series) based on the ID columns.
"""Get groups of the time series dataset (multi-time series) based on the ID columns for scaling.
Note that this is used for scaling purposes only.
Args:
dataset (pd.DataFrame): Input dataset
Expand Down Expand Up @@ -472,7 +490,7 @@ def _check_dataset(self, dataset: Union[Dataset, pd.DataFrame]):

def _set_targets(self, dataset: pd.DataFrame) -> None:
if self.target_columns == []:
skip_columns = copy.copy(self.id_columns)
skip_columns = copy.copy(self.id_columns) + [INTERNAL_ID_COLUMN]
if self.timestamp_column:
skip_columns.append(self.timestamp_column)

Expand Down Expand Up @@ -531,6 +549,7 @@ def train(
if self.encode_categorical:
self._train_categorical_encoder(df)

self._clean_up_dataframe(df)
return self

def inverse_scale_targets(
Expand Down Expand Up @@ -581,10 +600,12 @@ def inverse_scale_func(grp, id_columns):
else:
id_columns = INTERNAL_ID_COLUMN

return df.groupby(id_columns, group_keys=False).apply(
df_inv = df.groupby(id_columns, group_keys=False).apply(
inverse_scale_func,
id_columns=id_columns,
)
self._clean_up_dataframe(df_inv)
return df_inv

def preprocess(
self,
Expand Down Expand Up @@ -640,6 +661,7 @@ def scale_func(grp, id_columns):
raise RuntimeError("Attempt to encode categorical columns, but the encoder has not been trained yet.")
df[cols_to_encode] = self.categorical_encoder.transform(df[cols_to_encode])

self._clean_up_dataframe(df)
return df

def get_datasets(
Expand Down

0 comments on commit fc09e1a

Please sign in to comment.