From e1034794dc3f190598d550d7d6965b9ad59aadcf Mon Sep 17 00:00:00 2001 From: Arthur Date: Wed, 20 Sep 2023 10:25:30 +0200 Subject: [PATCH 01/27] fix wav2vec2 --- src/transformers/models/wav2vec2/tokenization_wav2vec2.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index dc8b9bde7e6214..22c5be3eac158a 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -232,9 +232,8 @@ def set_target_lang(self, target_lang: str): # make sure that tokens made of several # characters are not split at tokenization - for token in self.encoder.keys(): - if len(token) > 1: - self.unique_no_split_tokens.append(token) + self.add_tokens([ tok for token in self.encoder.keys() if len(token)>1]) + @property def word_delimiter_token(self) -> str: From e67ef1fb73f49ae1eca13e82ddb50f670578f45b Mon Sep 17 00:00:00 2001 From: Arthur Date: Wed, 20 Sep 2023 10:25:47 +0200 Subject: [PATCH 02/27] nit --- src/transformers/models/wav2vec2/tokenization_wav2vec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index 22c5be3eac158a..0a5ca12158f448 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -232,7 +232,7 @@ def set_target_lang(self, target_lang: str): # make sure that tokens made of several # characters are not split at tokenization - self.add_tokens([ tok for token in self.encoder.keys() if len(token)>1]) + self.add_tokens([ token for token in self.encoder.keys() if len(token)>1]) @property From c30986c98f162388b8ab0c6183f81ab1d1906990 Mon Sep 17 00:00:00 2001 From: Arthur Date: Wed, 20 Sep 2023 16:51:40 +0200 Subject: [PATCH 03/27] stash --- src/transformers/tokenization_utils_base.py | 2 +- tests/models/fnet/test_modeling_fnet.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index b936adc36bb6da..ca05e261ea62b2 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2206,7 +2206,7 @@ def _from_pretrained( " it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again." " You will see the new `added_tokens_decoder` attribute that will store the relevant information." ) - + # ISSUEEE FSNETTTTTTTT # begin legacy: read the added_tokens_file and update kwargs with special_tokens_map if modified if special_tokens_map_file is not None: with open(special_tokens_map_file, encoding="utf-8") as special_tokens_map_handle: diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 01e9942de25222..ae151d68bda059 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -529,12 +529,12 @@ def test_inference_for_masked_lm(self): self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) - @slow + # @slow @require_tokenizers def test_inference_long_sentence(self): + tokenizer = FNetTokenizerFast.from_pretrained("google/fnet-base") model = FNetForMaskedLM.from_pretrained("google/fnet-base") model.to(torch_device) - tokenizer = FNetTokenizerFast.from_pretrained("google/fnet-base") inputs = tokenizer( "the man worked as a [MASK].", From d63347ce900c99947c43837cbc41b37f7b30b8a3 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 21 Sep 2023 14:19:53 +0200 Subject: [PATCH 04/27] one more file to update --- tests/models/idefics/test_modeling_idefics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/models/idefics/test_modeling_idefics.py b/tests/models/idefics/test_modeling_idefics.py index c6df84b11fc8e6..664601c7801d0b 100644 --- a/tests/models/idefics/test_modeling_idefics.py +++ b/tests/models/idefics/test_modeling_idefics.py @@ -508,6 +508,7 @@ def default_processor(self): @require_bitsandbytes @slow def test_inference_natural_language_visual_reasoning(self): + # TODO tokenizer updates seem to have broken this cat_image_path = self.tests_dir / "fixtures/tests_samples/COCO/000000039769.png" cats_image_obj = Image.open(cat_image_path) # 2 cats dogs_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures_nlvr2/raw/main/image1.jpeg" From c3a9da6f07251cdfd1df0f5172094f9c91a80b4e Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 09:02:54 +0200 Subject: [PATCH 05/27] fix byt5 --- src/transformers/models/byt5/tokenization_byt5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/byt5/tokenization_byt5.py b/src/transformers/models/byt5/tokenization_byt5.py index 1d310fe3045fb0..68b7d2b846e481 100644 --- a/src/transformers/models/byt5/tokenization_byt5.py +++ b/src/transformers/models/byt5/tokenization_byt5.py @@ -101,7 +101,7 @@ def __init__( @property def vocab_size(self): - return self._utf_vocab_size + return self._utf_vocab_size + self.offset def get_vocab(self): vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} From e1f426e85dfeb3b27ae9dea7b20d3bbb684796c3 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 09:04:48 +0200 Subject: [PATCH 06/27] vocab size is 256, don't change that! --- src/transformers/models/byt5/tokenization_byt5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/byt5/tokenization_byt5.py b/src/transformers/models/byt5/tokenization_byt5.py index 68b7d2b846e481..c0c3aa56e99266 100644 --- a/src/transformers/models/byt5/tokenization_byt5.py +++ b/src/transformers/models/byt5/tokenization_byt5.py @@ -101,10 +101,10 @@ def __init__( @property def vocab_size(self): - return self._utf_vocab_size + self.offset + return self._utf_vocab_size def get_vocab(self): - vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} + vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size + self.offset)} vocab.update(self.added_tokens_encoder) return vocab From 5faf3041cbcb6c4a08879d1e26974861bf6bcf48 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 09:13:00 +0200 Subject: [PATCH 07/27] use other revision --- tests/models/idefics/test_modeling_idefics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/models/idefics/test_modeling_idefics.py b/tests/models/idefics/test_modeling_idefics.py index 664601c7801d0b..aeaea799c2dd50 100644 --- a/tests/models/idefics/test_modeling_idefics.py +++ b/tests/models/idefics/test_modeling_idefics.py @@ -503,12 +503,11 @@ def test_retain_grad_hidden_states_attentions(self): class IdeficsModelIntegrationTest(TestCasePlus): @cached_property def default_processor(self): - return IdeficsProcessor.from_pretrained("HuggingFaceM4/idefics-9b") if is_vision_available() else None + return IdeficsProcessor.from_pretrained("HuggingFaceM4/idefics-9b", revision = "refs/pr/11") if is_vision_available() else None @require_bitsandbytes @slow def test_inference_natural_language_visual_reasoning(self): - # TODO tokenizer updates seem to have broken this cat_image_path = self.tests_dir / "fixtures/tests_samples/COCO/000000039769.png" cats_image_obj = Image.open(cat_image_path) # 2 cats dogs_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures_nlvr2/raw/main/image1.jpeg" From f1cd94d6909332ea37c8f62a2b35113193875731 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 09:22:06 +0200 Subject: [PATCH 08/27] test persimon in smaller size --- tests/models/persimmon/test_modeling_persimmon.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 7eb66ecfb3fe69..6a197e723f2fb8 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -385,11 +385,11 @@ class PersimmonIntegrationTest(unittest.TestCase): @slow def test_model_8b_chat_logits(self): input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338] - model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", device_map="auto") + model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", device_map="auto", torch_dtype = torch.float16) out = model(torch.tensor([input_ids])).logits EXPECTED_MEAN = torch.tensor( - [[-11.2879, -11.2628, -11.2498, -11.2534, -11.2676, -11.2638, -11.2501, -11.2431]], dtype=torch.float32 + [[-11.2879, -11.2628, -11.2498, -11.2534, -11.2676, -11.2638, -11.2501, -11.2431]], dtype=torch.float16 ) torch.testing.assert_close(out.cpu().mean(-1), EXPECTED_MEAN, atol=1e-4, rtol=1e-4) # fmt: off @@ -404,7 +404,7 @@ def test_model_8b_chat_greedy_generation(self): prompt = "human: Simply put, the theory of relativity states that?\n\nadept:" tokenizer = AutoTokenizer.from_pretrained("ArthurZ/persimmon-8b-chat", use_fast=False) input_ids = tokenizer.encode(prompt, return_tensors="pt").to(torch_device) - model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat").to(torch_device) + model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", torch_dtype = torch.float16).to(torch_device) # greedy generation outputs generated_ids = model.generate(input_ids, max_new_tokens=64) From 231c13fe5a41a9cd4e627a4bb4aa44befb30a182 Mon Sep 17 00:00:00 2001 From: ArthurZucker Date: Thu, 28 Sep 2023 03:56:16 -0400 Subject: [PATCH 09/27] style --- src/transformers/models/wav2vec2/tokenization_wav2vec2.py | 3 +-- tests/models/idefics/test_modeling_idefics.py | 6 +++++- tests/models/persimmon/test_modeling_persimmon.py | 8 ++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index 0a5ca12158f448..63e078740e353f 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -232,8 +232,7 @@ def set_target_lang(self, target_lang: str): # make sure that tokens made of several # characters are not split at tokenization - self.add_tokens([ token for token in self.encoder.keys() if len(token)>1]) - + self.add_tokens([token for token in self.encoder.keys() if len(token) > 1]) @property def word_delimiter_token(self) -> str: diff --git a/tests/models/idefics/test_modeling_idefics.py b/tests/models/idefics/test_modeling_idefics.py index aeaea799c2dd50..83fa42016d5338 100644 --- a/tests/models/idefics/test_modeling_idefics.py +++ b/tests/models/idefics/test_modeling_idefics.py @@ -503,7 +503,11 @@ def test_retain_grad_hidden_states_attentions(self): class IdeficsModelIntegrationTest(TestCasePlus): @cached_property def default_processor(self): - return IdeficsProcessor.from_pretrained("HuggingFaceM4/idefics-9b", revision = "refs/pr/11") if is_vision_available() else None + return ( + IdeficsProcessor.from_pretrained("HuggingFaceM4/idefics-9b", revision="refs/pr/11") + if is_vision_available() + else None + ) @require_bitsandbytes @slow diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index 6a197e723f2fb8..50b2a30fc8b724 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -385,7 +385,9 @@ class PersimmonIntegrationTest(unittest.TestCase): @slow def test_model_8b_chat_logits(self): input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338] - model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", device_map="auto", torch_dtype = torch.float16) + model = PersimmonForCausalLM.from_pretrained( + "ArthurZ/persimmon-8b-chat", device_map="auto", torch_dtype=torch.float16 + ) out = model(torch.tensor([input_ids])).logits EXPECTED_MEAN = torch.tensor( @@ -404,7 +406,9 @@ def test_model_8b_chat_greedy_generation(self): prompt = "human: Simply put, the theory of relativity states that?\n\nadept:" tokenizer = AutoTokenizer.from_pretrained("ArthurZ/persimmon-8b-chat", use_fast=False) input_ids = tokenizer.encode(prompt, return_tensors="pt").to(torch_device) - model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", torch_dtype = torch.float16).to(torch_device) + model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", torch_dtype=torch.float16).to( + torch_device + ) # greedy generation outputs generated_ids = model.generate(input_ids, max_new_tokens=64) From 9671eb16f88426b250691fb4610c8943ecd01191 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 09:56:56 +0200 Subject: [PATCH 10/27] tests --- tests/models/instructblip/test_modeling_instructblip.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/models/instructblip/test_modeling_instructblip.py b/tests/models/instructblip/test_modeling_instructblip.py index 3cdf41cb5416d1..362e4431c241d6 100644 --- a/tests/models/instructblip/test_modeling_instructblip.py +++ b/tests/models/instructblip/test_modeling_instructblip.py @@ -517,11 +517,11 @@ def prepare_img(): return image -@require_vision -@require_torch -@slow +# @require_vision +# @require_torch +# @slow class InstructBlipModelIntegrationTest(unittest.TestCase): - @require_bitsandbytes + # @require_bitsandbytes def test_inference_vicuna_7b(self): processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b") model = InstructBlipForConditionalGeneration.from_pretrained( From 45bf24c079b34b0f392be2a6d0ffb37c5a5d3337 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 10:33:17 +0200 Subject: [PATCH 11/27] nits --- .../instructblip/test_modeling_instructblip.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/models/instructblip/test_modeling_instructblip.py b/tests/models/instructblip/test_modeling_instructblip.py index 362e4431c241d6..1c8af011180ba4 100644 --- a/tests/models/instructblip/test_modeling_instructblip.py +++ b/tests/models/instructblip/test_modeling_instructblip.py @@ -29,7 +29,14 @@ InstructBlipQFormerConfig, InstructBlipVisionConfig, ) -from transformers.testing_utils import require_bitsandbytes, require_torch, require_vision, slow, torch_device +from transformers.testing_utils import ( + require_accelerate, + require_bitsandbytes, + require_torch, + require_vision, + slow, + torch_device, +) from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -517,11 +524,12 @@ def prepare_img(): return image -# @require_vision -# @require_torch -# @slow +@require_vision +@require_torch +@slow class InstructBlipModelIntegrationTest(unittest.TestCase): - # @require_bitsandbytes + @require_bitsandbytes + @require_accelerate def test_inference_vicuna_7b(self): processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b") model = InstructBlipForConditionalGeneration.from_pretrained( From 93cba7fde83aedb94b5378967da5a0d4a0975525 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 12:10:38 +0200 Subject: [PATCH 12/27] update add tokens from pretrained --- src/transformers/tokenization_utils_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index f622c25e47b0fc..b44239d7ea6de9 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2233,8 +2233,9 @@ def _from_pretrained( with open(added_tokens_file, encoding="utf-8") as added_tokens_handle: added_tok_encoder = json.load(added_tokens_handle) # legacy: we have to init with (rstrip=True, lstrip=True) + strip = True if not "Fast" in cls.__name__ else False added_tokens_decoder = { - index: AddedToken(token, rstrip=True, lstrip=True) for token, index in added_tok_encoder.items() + index: AddedToken(token, rstrip=strip, lstrip=strip) for token, index in added_tok_encoder.items() } # end legacy From 5b34f336f4ba58fe842d5567aa9669b6285c1044 Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 16:57:43 +0200 Subject: [PATCH 13/27] test tokenization --- tests/models/fnet/test_modeling_fnet.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index ae151d68bda059..758020e9310c08 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -533,8 +533,7 @@ def test_inference_for_masked_lm(self): @require_tokenizers def test_inference_long_sentence(self): tokenizer = FNetTokenizerFast.from_pretrained("google/fnet-base") - model = FNetForMaskedLM.from_pretrained("google/fnet-base") - model.to(torch_device) + inputs = tokenizer( "the man worked as a [MASK].", @@ -543,8 +542,15 @@ def test_inference_long_sentence(self): padding="max_length", max_length=512, ) + # fmt: off + self.assertEqual(input, torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5,168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + # fmt: on + inputs = {k: v.to(torch_device) for k, v in inputs.items()} + + model = FNetForMaskedLM.from_pretrained("google/fnet-base") + model.to(torch_device) logits = model(**inputs).logits predictions_mask_1 = tokenizer.decode(logits[0, 6].topk(5).indices) predictions_mask_2 = tokenizer.decode(logits[0, 12].topk(5).indices) From c25d1b56ad31cce06d0fc6c7b5a190310659609c Mon Sep 17 00:00:00 2001 From: Arthur Date: Thu, 28 Sep 2023 16:57:57 +0200 Subject: [PATCH 14/27] nits --- tests/models/fnet/test_modeling_fnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 758020e9310c08..90898ded598358 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -543,7 +543,7 @@ def test_inference_long_sentence(self): max_length=512, ) # fmt: off - self.assertEqual(input, torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5,168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + self.assertEqual(input["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5,168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) # fmt: on inputs = {k: v.to(torch_device) for k, v in inputs.items()} From 77bf490a8b2479c945160b54ee14ac29a306fa9c Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 09:44:00 +0200 Subject: [PATCH 15/27] potential fnet fix? --- src/transformers/models/fnet/tokenization_fnet_fast.py | 2 +- src/transformers/tokenization_utils_base.py | 1 - tests/models/fnet/test_modeling_fnet.py | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/fnet/tokenization_fnet_fast.py b/src/transformers/models/fnet/tokenization_fnet_fast.py index 2179751e558e60..48422cb6fc76a6 100644 --- a/src/transformers/models/fnet/tokenization_fnet_fast.py +++ b/src/transformers/models/fnet/tokenization_fnet_fast.py @@ -108,7 +108,7 @@ def __init__( ): # Mask token behave like a normal word, i.e. include the space before it and # is included in the raw text, there should be a match in a non-normalized sentence. - mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token + mask_token = AddedToken(str(mask_token), lstrip=True, rstrip=False) # if isinstance(mask_token, str) else mask_token cls_token = AddedToken(cls_token, lstrip=False, rstrip=False) if isinstance(cls_token, str) else cls_token sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index b44239d7ea6de9..28f25a68f193a2 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2209,7 +2209,6 @@ def _from_pretrained( " it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again." " You will see the new `added_tokens_decoder` attribute that will store the relevant information." ) - # ISSUEEE FSNETTTTTTTT # begin legacy: read the added_tokens_file and update kwargs with special_tokens_map if modified if special_tokens_map_file is not None: with open(special_tokens_map_file, encoding="utf-8") as special_tokens_map_handle: diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 90898ded598358..b30662d8863db1 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -529,12 +529,11 @@ def test_inference_for_masked_lm(self): self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) - # @slow + @slow @require_tokenizers def test_inference_long_sentence(self): tokenizer = FNetTokenizerFast.from_pretrained("google/fnet-base") - inputs = tokenizer( "the man worked as a [MASK].", "this is his [MASK].", @@ -542,8 +541,9 @@ def test_inference_long_sentence(self): padding="max_length", max_length=512, ) + # fmt: off - self.assertEqual(input["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5,168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + self.assertEqual(input["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 6, 845, 5, 168, 65, 367, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) # fmt: on inputs = {k: v.to(torch_device) for k, v in inputs.items()} From e074df169d35fc621d8bcfa741c8f3de2542c969 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 09:49:53 +0200 Subject: [PATCH 16/27] more nits --- tests/models/fnet/test_modeling_fnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index b30662d8863db1..f845644c641090 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -543,7 +543,7 @@ def test_inference_long_sentence(self): ) # fmt: off - self.assertEqual(input["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 6, 845, 5, 168, 65, 367, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + self.assertEqual(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 6, 845, 5, 168, 65, 367, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) # fmt: on inputs = {k: v.to(torch_device) for k, v in inputs.items()} From 5ce7d1d348e72d07681486ad2d0e6e27810ff514 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:08:35 +0200 Subject: [PATCH 17/27] nits --- src/transformers/models/fnet/tokenization_fnet.py | 2 +- src/transformers/models/fnet/tokenization_fnet_fast.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/fnet/tokenization_fnet.py b/src/transformers/models/fnet/tokenization_fnet.py index cfa54fcecfb517..8bcc580cb621fc 100644 --- a/src/transformers/models/fnet/tokenization_fnet.py +++ b/src/transformers/models/fnet/tokenization_fnet.py @@ -116,7 +116,7 @@ def __init__( ) -> None: # Mask token behave like a normal word, i.e. include the space before it and # is included in the raw text, there should be a match in a non-normalized sentence. - mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token + mask_token = AddedToken(mask_token, lstrip=False, rstrip=False) if isinstance(mask_token, str) else mask_token cls_token = AddedToken(cls_token, lstrip=False, rstrip=False) if isinstance(cls_token, str) else cls_token sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs diff --git a/src/transformers/models/fnet/tokenization_fnet_fast.py b/src/transformers/models/fnet/tokenization_fnet_fast.py index 48422cb6fc76a6..9247b1a33594d2 100644 --- a/src/transformers/models/fnet/tokenization_fnet_fast.py +++ b/src/transformers/models/fnet/tokenization_fnet_fast.py @@ -108,7 +108,7 @@ def __init__( ): # Mask token behave like a normal word, i.e. include the space before it and # is included in the raw text, there should be a match in a non-normalized sentence. - mask_token = AddedToken(str(mask_token), lstrip=True, rstrip=False) # if isinstance(mask_token, str) else mask_token + mask_token = AddedToken(str(mask_token), lstrip=False, rstrip=True)# if isinstance(mask_token, str) else mask_token cls_token = AddedToken(cls_token, lstrip=False, rstrip=False) if isinstance(cls_token, str) else cls_token sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token From 8fa6ed3f8532079007ee454290101bf4223f03cd Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:13:15 +0200 Subject: [PATCH 18/27] correct test --- tests/models/fnet/test_modeling_fnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index f845644c641090..77133afceaf9c2 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -543,7 +543,7 @@ def test_inference_long_sentence(self): ) # fmt: off - self.assertEqual(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 6, 845, 5, 168, 65, 367, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + self.assertEqual(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5, 168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) # fmt: on inputs = {k: v.to(torch_device) for k, v in inputs.items()} From 62ccd668ea4240ce85a7ae47013d40c0ddfc9585 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:15:02 +0200 Subject: [PATCH 19/27] assert close --- tests/models/fnet/test_modeling_fnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 77133afceaf9c2..24d2d3564d7a63 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -543,7 +543,7 @@ def test_inference_long_sentence(self): ) # fmt: off - self.assertEqual(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5, 168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + torch.testing.assert_allclose(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5, 168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) # fmt: on inputs = {k: v.to(torch_device) for k, v in inputs.items()} From 1d4656a7e8521d940fb9c73b62cabdc333ecc9df Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:28:51 +0200 Subject: [PATCH 20/27] udpate --- src/transformers/models/fnet/tokenization_fnet.py | 2 +- src/transformers/models/fnet/tokenization_fnet_fast.py | 2 +- tests/models/fnet/test_modeling_fnet.py | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/fnet/tokenization_fnet.py b/src/transformers/models/fnet/tokenization_fnet.py index 8bcc580cb621fc..cfa54fcecfb517 100644 --- a/src/transformers/models/fnet/tokenization_fnet.py +++ b/src/transformers/models/fnet/tokenization_fnet.py @@ -116,7 +116,7 @@ def __init__( ) -> None: # Mask token behave like a normal word, i.e. include the space before it and # is included in the raw text, there should be a match in a non-normalized sentence. - mask_token = AddedToken(mask_token, lstrip=False, rstrip=False) if isinstance(mask_token, str) else mask_token + mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token cls_token = AddedToken(cls_token, lstrip=False, rstrip=False) if isinstance(cls_token, str) else cls_token sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs diff --git a/src/transformers/models/fnet/tokenization_fnet_fast.py b/src/transformers/models/fnet/tokenization_fnet_fast.py index 9247b1a33594d2..30e890561de663 100644 --- a/src/transformers/models/fnet/tokenization_fnet_fast.py +++ b/src/transformers/models/fnet/tokenization_fnet_fast.py @@ -108,7 +108,7 @@ def __init__( ): # Mask token behave like a normal word, i.e. include the space before it and # is included in the raw text, there should be a match in a non-normalized sentence. - mask_token = AddedToken(str(mask_token), lstrip=False, rstrip=True)# if isinstance(mask_token, str) else mask_token + mask_token = AddedToken(str(mask_token), lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token cls_token = AddedToken(cls_token, lstrip=False, rstrip=False) if isinstance(cls_token, str) else cls_token sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 24d2d3564d7a63..bcd398c3a6cdc9 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -543,7 +543,7 @@ def test_inference_long_sentence(self): ) # fmt: off - torch.testing.assert_allclose(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 16657, 6, 845, 5, 168, 65, 367, 16657, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + torch.testing.assert_allclose(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 6, 845, 5, 168, 65, 367, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) # fmt: on inputs = {k: v.to(torch_device) for k, v in inputs.items()} @@ -552,6 +552,10 @@ def test_inference_long_sentence(self): model = FNetForMaskedLM.from_pretrained("google/fnet-base") model.to(torch_device) logits = model(**inputs).logits + + torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 283, 821, 4638, 3806, 2063])) + torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 394, 4059, 1499, 1587, 1316])) + predictions_mask_1 = tokenizer.decode(logits[0, 6].topk(5).indices) predictions_mask_2 = tokenizer.decode(logits[0, 12].topk(5).indices) From 511b4ecd42d2c5f243c5c3f649b2495fc2db8f17 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:30:08 +0200 Subject: [PATCH 21/27] ouch --- tests/models/fnet/test_modeling_fnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index bcd398c3a6cdc9..6494683a1a4209 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -543,7 +543,7 @@ def test_inference_long_sentence(self): ) # fmt: off - torch.testing.assert_allclose(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 6, 845, 5, 168, 65, 367, 6, 845, 5, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) + torch.testing.assert_allclose(inputs["input_ids"], torch.tensor([[4, 13, 283, 2479, 106, 8, 6, 845, 5, 168, 65, 367, 6, 845, 5, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3, 3, 3, 3, 3, 3, 3, 3, 3, 3,3]])) # fmt: on inputs = {k: v.to(torch_device) for k, v in inputs.items()} From 8f027981d1587404407e8313a939134611987afa Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:32:08 +0200 Subject: [PATCH 22/27] fix it --- src/transformers/models/fnet/tokenization_fnet_fast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/fnet/tokenization_fnet_fast.py b/src/transformers/models/fnet/tokenization_fnet_fast.py index 30e890561de663..674862728dada0 100644 --- a/src/transformers/models/fnet/tokenization_fnet_fast.py +++ b/src/transformers/models/fnet/tokenization_fnet_fast.py @@ -108,7 +108,7 @@ def __init__( ): # Mask token behave like a normal word, i.e. include the space before it and # is included in the raw text, there should be a match in a non-normalized sentence. - mask_token = AddedToken(str(mask_token), lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token + mask_token = AddedToken(str(mask_token), lstrip=True, rstrip=False) cls_token = AddedToken(cls_token, lstrip=False, rstrip=False) if isinstance(cls_token, str) else cls_token sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token From 016b35f907bdab4fcf9076976ed02a0242340197 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:55:41 +0200 Subject: [PATCH 23/27] some more nits --- tests/models/fnet/test_modeling_fnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 6494683a1a4209..53e5afe2c2d995 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -553,8 +553,8 @@ def test_inference_long_sentence(self): model.to(torch_device) logits = model(**inputs).logits - torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 283, 821, 4638, 3806, 2063])) - torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 394, 4059, 1499, 1587, 1316])) + # torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 283, 821, 4638, 3806, 2063])) + # torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 394, 4059, 1499, 1587, 1316])) predictions_mask_1 = tokenizer.decode(logits[0, 6].topk(5).indices) predictions_mask_2 = tokenizer.decode(logits[0, 12].topk(5).indices) From 32a54fb4438317d738b48da9d590f9bcbef56afb Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 10:58:35 +0200 Subject: [PATCH 24/27] FINALLU --- tests/models/fnet/test_modeling_fnet.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 53e5afe2c2d995..c53c30c3be9083 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -552,10 +552,6 @@ def test_inference_long_sentence(self): model = FNetForMaskedLM.from_pretrained("google/fnet-base") model.to(torch_device) logits = model(**inputs).logits - - # torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 283, 821, 4638, 3806, 2063])) - # torch.testing.assert_allclose(logits[0, 6].topk(5).indices.cpu().data, torch.tensor([ 394, 4059, 1499, 1587, 1316])) - predictions_mask_1 = tokenizer.decode(logits[0, 6].topk(5).indices) predictions_mask_2 = tokenizer.decode(logits[0, 12].topk(5).indices) From 0c999b1e83d944c66eead56ebbfc9870fd3510dd Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 11:05:39 +0200 Subject: [PATCH 25/27] use `adept` checkpoints --- tests/models/persimmon/test_modeling_persimmon.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py index b44a2f3dce22bf..3b67128c3b7372 100644 --- a/tests/models/persimmon/test_modeling_persimmon.py +++ b/tests/models/persimmon/test_modeling_persimmon.py @@ -387,7 +387,7 @@ class PersimmonIntegrationTest(unittest.TestCase): def test_model_8b_chat_logits(self): input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338] model = PersimmonForCausalLM.from_pretrained( - "ArthurZ/persimmon-8b-chat", device_map="auto", torch_dtype=torch.float16 + "adept/persimmon-8b-chat", device_map="auto", torch_dtype=torch.float16 ) out = model(torch.tensor([input_ids])).logits @@ -405,9 +405,9 @@ def test_model_8b_chat_logits(self): def test_model_8b_chat_greedy_generation(self): EXPECTED_TEXT_COMPLETION = """human: Simply put, the theory of relativity states that?\n\nadept: The theory of relativity states that the laws of physics are the same for all observers, regardless of their relative motion.""" prompt = "human: Simply put, the theory of relativity states that?\n\nadept:" - tokenizer = AutoTokenizer.from_pretrained("ArthurZ/persimmon-8b-chat", use_fast=False) + tokenizer = AutoTokenizer.from_pretrained("adept/persimmon-8b-chat", use_fast=False) input_ids = tokenizer.encode(prompt, return_tensors="pt").to(torch_device) - model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", torch_dtype=torch.float16).to( + model = PersimmonForCausalLM.from_pretrained("adept/persimmon-8b-chat", torch_dtype=torch.float16).to( torch_device ) From fd105ca9c26a5f391e8d28af98951665c32f2bc3 Mon Sep 17 00:00:00 2001 From: Arthur Date: Fri, 29 Sep 2023 11:06:14 +0200 Subject: [PATCH 26/27] more adept checkpoints --- src/transformers/models/persimmon/configuration_persimmon.py | 4 ++-- src/transformers/models/persimmon/modeling_persimmon.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/persimmon/configuration_persimmon.py b/src/transformers/models/persimmon/configuration_persimmon.py index 8fa8f0a7ce82d6..8606e4febffe80 100644 --- a/src/transformers/models/persimmon/configuration_persimmon.py +++ b/src/transformers/models/persimmon/configuration_persimmon.py @@ -21,7 +21,7 @@ logger = logging.get_logger(__name__) PERSIMMON_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "ArthurZ/persimmon-8b-base": "https://huggingface.co/ArthurZ/persimmon-8b-base/resolve/main/config.json", + "adept/persimmon-8b-base": "https://huggingface.co/adept/persimmon-8b-base/resolve/main/config.json", } @@ -30,7 +30,7 @@ class PersimmonConfig(PretrainedConfig): This is the configuration class to store the configuration of a [`PersimmonModel`]. It is used to instantiate an Persimmon model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the - [ArthurZ/persimmon-8b-base](https://huggingface.co/ArthurZ/persimmon-8b-base). + [adept/persimmon-8b-base](https://huggingface.co/adept/persimmon-8b-base). Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index a6c633dbc2d1fa..bd55df28cd3455 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -789,8 +789,8 @@ def forward( ```python >>> from transformers import AutoTokenizer, PersimmonForCausalLM - >>> model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-base") - >>> tokenizer = AutoTokenizer.from_pretrained("ArthurZ/persimmon-8b-base") + >>> model = PersimmonForCausalLM.from_pretrained("adept/persimmon-8b-base") + >>> tokenizer = AutoTokenizer.from_pretrained("adept/persimmon-8b-base") >>> prompt = "human: Hey, what should I eat for dinner?" >>> inputs = tokenizer(prompt, return_tensors="pt") From 03808d4ef1933447938efcd0640147b2c9260b57 Mon Sep 17 00:00:00 2001 From: ArthurZucker Date: Fri, 29 Sep 2023 07:58:14 -0400 Subject: [PATCH 27/27] that was invlved! --- src/transformers/models/fnet/tokenization_fnet_fast.py | 2 +- src/transformers/tokenization_utils_base.py | 3 ++- tests/models/fnet/test_modeling_fnet.py | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/fnet/tokenization_fnet_fast.py b/src/transformers/models/fnet/tokenization_fnet_fast.py index 674862728dada0..2179751e558e60 100644 --- a/src/transformers/models/fnet/tokenization_fnet_fast.py +++ b/src/transformers/models/fnet/tokenization_fnet_fast.py @@ -108,7 +108,7 @@ def __init__( ): # Mask token behave like a normal word, i.e. include the space before it and # is included in the raw text, there should be a match in a non-normalized sentence. - mask_token = AddedToken(str(mask_token), lstrip=True, rstrip=False) + mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token cls_token = AddedToken(cls_token, lstrip=False, rstrip=False) if isinstance(cls_token, str) else cls_token sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 28f25a68f193a2..72d50c3fece484 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2220,6 +2220,7 @@ def _from_pretrained( continue if isinstance(value, dict): value = AddedToken(**value) + init_kwargs[key] = value elif key == "additional_special_tokens" and isinstance(value, list): for token in value: token = AddedToken(**token) if isinstance(token, dict) else token @@ -2232,7 +2233,7 @@ def _from_pretrained( with open(added_tokens_file, encoding="utf-8") as added_tokens_handle: added_tok_encoder = json.load(added_tokens_handle) # legacy: we have to init with (rstrip=True, lstrip=True) - strip = True if not "Fast" in cls.__name__ else False + strip = True if "Fast" not in cls.__name__ else False added_tokens_decoder = { index: AddedToken(token, rstrip=strip, lstrip=strip) for token, index in added_tok_encoder.items() } diff --git a/tests/models/fnet/test_modeling_fnet.py b/tests/models/fnet/test_modeling_fnet.py index 53e5afe2c2d995..6ed372f5eba79b 100644 --- a/tests/models/fnet/test_modeling_fnet.py +++ b/tests/models/fnet/test_modeling_fnet.py @@ -548,7 +548,6 @@ def test_inference_long_sentence(self): inputs = {k: v.to(torch_device) for k, v in inputs.items()} - model = FNetForMaskedLM.from_pretrained("google/fnet-base") model.to(torch_device) logits = model(**inputs).logits