Skip to content

Commit

Permalink
decode w/ timestamps
Browse files Browse the repository at this point in the history
  • Loading branch information
sanchit-gandhi committed Sep 28, 2023
1 parent 0baebe7 commit be54cab
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions tests/models/whisper/test_tokenization_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,18 +281,18 @@ def test_timestamp_encoding(self):

# fmt: off
EXPECTED_TOKENS = [
NOTIMESTAMPS + 1, 2471, 271, 610, 393, 360, 220, 31208, 377, 23150, 30, 50494,
NOTIMESTAMPS + 1, 41132, 610, 393, 360, 220, 31208, 377, 23150, 30, 50494,
]
# fmt: on

encoding = tokenizer(input_text, split_special_tokens=False).input_ids
decoding = tokenizer.decode(encoding)
encoding = tokenizer(input_text, add_special_tokens=False).input_ids
decoding = tokenizer.decode(encoding, decode_with_timestamps=True)

self.assertEqual(EXPECTED_TOKENS, encoding)
self.assertEqual(input_text, decoding)

encoding = rust_tokenizer(input_text, split_special_tokens=False)
decoding = rust_tokenizer.decode(encoding)
encoding = rust_tokenizer(input_text, add_special_tokens=False)
decoding = rust_tokenizer.decode(encoding, decode_with_timestamps=True)

self.assertEqual(EXPECTED_TOKENS, encoding)
self.assertEqual(input_text, decoding)
Expand Down

0 comments on commit be54cab

Please sign in to comment.