Skip to content

Commit

Permalink
Fixed few minor issues as (make fix-copies) broke few test cases whil…
Browse files Browse the repository at this point in the history
…e stripping the text
  • Loading branch information
nileshkokane01 committed Dec 3, 2023
1 parent 6368358 commit ceab41d
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion tests/models/rembert/test_tokenization_rembert.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,13 @@ def _test_added_vocab_and_eos(expected, tokenizer_class, expected_eos, temp_dir)
return tokenizer

new_eos = AddedToken("[NEW_EOS]", rstrip=False, lstrip=True, normalized=False, special=True)
new_masked_token = AddedToken("[MASK]", lstrip=True, rstrip=False, normalized=False)
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
# Load a slow tokenizer from the hub, init with the new token for fast to also include it
tokenizer = self.tokenizer_class.from_pretrained(pretrained_name, eos_token=new_eos)
tokenizer = self.tokenizer_class.from_pretrained(
pretrained_name, eos_token=new_eos, mask_token=new_masked_token
)
EXPECTED_ADDED_TOKENS_DECODER = tokenizer.added_tokens_decoder
with self.subTest("Hub -> Slow: Test loading a slow tokenizer from the hub)"):
self.assertEqual(tokenizer._eos_token, new_eos)
Expand Down

0 comments on commit ceab41d

Please sign in to comment.