Skip to content

Commit

Permalink
update docstrings
Browse files Browse the repository at this point in the history
Signed-off-by: Wesley M. Gifford <[email protected]>
  • Loading branch information
wgifford committed Mar 29, 2024
1 parent 67b1f48 commit b1b981d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
17 changes: 12 additions & 5 deletions tsfm_public/toolkit/time_series_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ def scale_func(grp, id_columns):
def get_datasets(
self,
dataset: Union[Dataset, pd.DataFrame],
split_config: Dict[str, Any],
split_config: Dict[str, Union[List[Union[int, float]], float]],
fewshot_fraction: Optional[float] = None,
fewshot_location: str = FractionLocation.LAST.value,
) -> Tuple[Any]:
Expand All @@ -604,17 +604,24 @@ def get_datasets(
Args:
dataset (Union[Dataset, pd.DataFrame]): Loaded pandas dataframe
split_config (Dict[str, Any]): Dictionary of dictionaries containing
split parameters. For example:
split_config (Dict[str, Union[List[Union[int, float]], float]]): Dictionary of dictionaries containing
split parameters. Two configurations are possible:
1. Specify train/valid/test indices or relative fractions
{
train: [0, 50],
valid: [50, 70],
test: [70, 100]
}
end value is not inclusive
2. Specify train/test fractions:
{
train: 0.7
test: 0.2
}
A valid split should not be specified directly; the above implies valid = 0.1
fewshot_fraction (float, optional): When non-null, return this percent of the original training
dataset. This is done to support fewshot fine-tuning. The fraction of data chosen is at the
end of the training dataset.
dataset. This is done to support fewshot fine-tuning.
fewshot_location (str): Determines where the fewshot data is chosen. Valid options are "first" and "last"
as described in the enum FewshotLocation. Default is to choose the fewshot data at the end
of the training dataset (i.e., "last").
Expand Down
10 changes: 7 additions & 3 deletions tsfm_public/toolkit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,15 +477,19 @@ def convert_tsf_to_dataframe(


def get_split_params(
split_config: Dict[str, List[Union[int, float]]],
split_config: Dict[str, Union[float, List[Union[int, float]]]],
context_length: Optional[int] = None,
) -> Tuple[Dict[str, Dict[str, Union[int, float]]], Dict[str, Callable]]:
"""Get split parameters
Args:
split_config (Dict[str, List[int, float]]): Dictionary containing keys for
train, valid, test. Each value consists of a list of length two, indicating
split_config ( Dict[str, Union[float, List[Union[int, float]]]]): Dictionary containing keys which
define the splits. Two options are possible:
1. Specifiy train, valid, test. Each value consists of a list of length two, indicating
the boundaries of a split.
2. Specify train, test. Each value consists of a single floating point number specifying the
fraction of data to use. Valid is populated using the remaining data.
context_length (int, optional): Context length, used only when offseting
the split so predictions can be made for all elements of split. Defaults to None.
Expand Down

0 comments on commit b1b981d

Please sign in to comment.