Skip to content

Commit

Permalink
fix typo
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg committed Sep 20, 2023
1 parent 9e8a0d1 commit 1e01e11
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion scripts/data_prep/convert_dataset_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def main(args: Namespace) -> None:

if args.concat_tokens is not None:
mode = ConcatMode.CONCAT_TOKENS
tokenizer = build_tokenizer(args.tokenizer, **args.tokenizer_kwargs)
tokenizer = build_tokenizer(args.tokenizer, args.tokenizer_kwargs)
# we will enforce length, so suppress warnings about sequences too long for the model
tokenizer.model_max_length = int(1e30)
columns = {'tokens': 'bytes'}
Expand Down

0 comments on commit 1e01e11

Please sign in to comment.