update docstrings

Signed-off-by: Wesley M. Gifford <[email protected]>
ibm-granite · Mar 29, 2024 · b1b981d · b1b981d
1 parent 67b1f48
commit b1b981d
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 8 deletions.
diff --git a/tsfm_public/toolkit/time_series_preprocessor.py b/tsfm_public/toolkit/time_series_preprocessor.py
@@ -595,7 +595,7 @@ def scale_func(grp, id_columns):
     def get_datasets(
         self,
         dataset: Union[Dataset, pd.DataFrame],
-        split_config: Dict[str, Any],
+        split_config: Dict[str, Union[List[Union[int, float]], float]],
         fewshot_fraction: Optional[float] = None,
         fewshot_location: str = FractionLocation.LAST.value,
     ) -> Tuple[Any]:
@@ -604,17 +604,24 @@ def get_datasets(
 
         Args:
             dataset (Union[Dataset, pd.DataFrame]): Loaded pandas dataframe
-                split_config (Dict[str, Any]): Dictionary of dictionaries containing
-                split parameters. For example:
+                split_config (Dict[str, Union[List[Union[int, float]], float]]): Dictionary of dictionaries containing
+                split parameters. Two configurations are possible:
+                1. Specify train/valid/test indices or relative fractions
                     {
                         train: [0, 50],
                         valid: [50, 70],
                         test:  [70, 100]
                     }
                 end value is not inclusive
+                2. Specify train/test fractions:
+                    {
+                        train: 0.7
+                        test: 0.2
+                    }
+                    A valid split should not be specified directly; the above implies valid = 0.1
+
             fewshot_fraction (float, optional): When non-null, return this percent of the original training
-                dataset. This is done to support fewshot fine-tuning. The fraction of data chosen is at the
-                end of the training dataset.
+                dataset. This is done to support fewshot fine-tuning.
             fewshot_location (str): Determines where the fewshot data is chosen. Valid options are "first" and "last"
                 as described in the enum FewshotLocation. Default is to choose the fewshot data at the end
                 of the training dataset (i.e., "last").

diff --git a/tsfm_public/toolkit/util.py b/tsfm_public/toolkit/util.py
@@ -477,15 +477,19 @@ def convert_tsf_to_dataframe(
 
 
 def get_split_params(
-    split_config: Dict[str, List[Union[int, float]]],
+    split_config: Dict[str, Union[float, List[Union[int, float]]]],
     context_length: Optional[int] = None,
 ) -> Tuple[Dict[str, Dict[str, Union[int, float]]], Dict[str, Callable]]:
     """Get split parameters
 
     Args:
-        split_config (Dict[str, List[int, float]]): Dictionary containing keys for
-            train, valid, test. Each value consists of a list of length two, indicating
+        split_config ( Dict[str, Union[float, List[Union[int, float]]]]): Dictionary containing keys which
+            define the splits. Two options are possible:
+            1. Specifiy train, valid, test. Each value consists of a list of length two, indicating
             the boundaries of a split.
+            2. Specify train, test. Each value consists of a single floating point number specifying the
+            fraction of data to use. Valid is populated using the remaining data.
+
         context_length (int, optional): Context length, used only when offseting
             the split so predictions can be made for all elements of split. Defaults to None.