clean up output

Signed-off-by: Wesley M. Gifford <[email protected]>
ibm-granite · Apr 23, 2024 · fc09e1a · fc09e1a
1 parent 493d42c
commit fc09e1a
Showing 1 changed file with 25 additions and 3 deletions.
diff --git a/tsfm_public/toolkit/time_series_preprocessor.py b/tsfm_public/toolkit/time_series_preprocessor.py
@@ -344,11 +344,29 @@ def _standardize_dataframe(
 
         return df
 
+    def _clean_up_dataframe(self, df: pd.DataFrame) -> None:
+        """Removes columns added during internal processing of the provided dataframe.
+
+        Currently, the following checks are done:
+         - Remove INTERNAL_ID_COLUMN if present
+
+        Args:
+            df (pd.DataFrame): Input pandas dataframe
+
+        Returns:
+            pd.DataFrame: Cleaned up dataframe
+        """
+
+        if not self.id_columns:
+            if INTERNAL_ID_COLUMN in df.columns:
+                df.drop(columns=INTERNAL_ID_COLUMN, inplace=True)
+
     def _get_groups(
         self,
         dataset: pd.DataFrame,
     ) -> Generator[Tuple[Any, pd.DataFrame], None, None]:
-        """Get groups of the time series dataset (multi-time series) based on the ID columns.
+        """Get groups of the time series dataset (multi-time series) based on the ID columns for scaling.
+        Note that this is used for scaling purposes only.
 
         Args:
             dataset (pd.DataFrame): Input dataset
@@ -472,7 +490,7 @@ def _check_dataset(self, dataset: Union[Dataset, pd.DataFrame]):
 
     def _set_targets(self, dataset: pd.DataFrame) -> None:
         if self.target_columns == []:
-            skip_columns = copy.copy(self.id_columns)
+            skip_columns = copy.copy(self.id_columns) + [INTERNAL_ID_COLUMN]
             if self.timestamp_column:
                 skip_columns.append(self.timestamp_column)
 
@@ -531,6 +549,7 @@ def train(
         if self.encode_categorical:
             self._train_categorical_encoder(df)
 
+        self._clean_up_dataframe(df)
         return self
 
     def inverse_scale_targets(
@@ -581,10 +600,12 @@ def inverse_scale_func(grp, id_columns):
         else:
             id_columns = INTERNAL_ID_COLUMN
 
-        return df.groupby(id_columns, group_keys=False).apply(
+        df_inv = df.groupby(id_columns, group_keys=False).apply(
             inverse_scale_func,
             id_columns=id_columns,
         )
+        self._clean_up_dataframe(df_inv)
+        return df_inv
 
     def preprocess(
         self,
@@ -640,6 +661,7 @@ def scale_func(grp, id_columns):
                 raise RuntimeError("Attempt to encode categorical columns, but the encoder has not been trained yet.")
             df[cols_to_encode] = self.categorical_encoder.transform(df[cols_to_encode])
 
+        self._clean_up_dataframe(df)
         return df
 
     def get_datasets(