From be54cab74a4f697830360fd9123fd793e05f86cd Mon Sep 17 00:00:00 2001 From: sanchit-gandhi Date: Thu, 28 Sep 2023 09:56:15 +0100 Subject: [PATCH] decode w/ timestamps --- tests/models/whisper/test_tokenization_whisper.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/models/whisper/test_tokenization_whisper.py b/tests/models/whisper/test_tokenization_whisper.py index 7be9e983f9fee6..88d146e59424ef 100644 --- a/tests/models/whisper/test_tokenization_whisper.py +++ b/tests/models/whisper/test_tokenization_whisper.py @@ -281,18 +281,18 @@ def test_timestamp_encoding(self): # fmt: off EXPECTED_TOKENS = [ - NOTIMESTAMPS + 1, 2471, 271, 610, 393, 360, 220, 31208, 377, 23150, 30, 50494, + NOTIMESTAMPS + 1, 41132, 610, 393, 360, 220, 31208, 377, 23150, 30, 50494, ] # fmt: on - encoding = tokenizer(input_text, split_special_tokens=False).input_ids - decoding = tokenizer.decode(encoding) + encoding = tokenizer(input_text, add_special_tokens=False).input_ids + decoding = tokenizer.decode(encoding, decode_with_timestamps=True) self.assertEqual(EXPECTED_TOKENS, encoding) self.assertEqual(input_text, decoding) - encoding = rust_tokenizer(input_text, split_special_tokens=False) - decoding = rust_tokenizer.decode(encoding) + encoding = rust_tokenizer(input_text, add_special_tokens=False) + decoding = rust_tokenizer.decode(encoding, decode_with_timestamps=True) self.assertEqual(EXPECTED_TOKENS, encoding) self.assertEqual(input_text, decoding)