From 5241e4736f40b01a9c9e1abb712c9e00217de74a Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 29 Apr 2024 17:55:04 +0100 Subject: [PATCH 1/4] Temporarily silence warnings in apply_chat_template until we can properly deprecate default chat templates --- .../models/idefics2/processing_idefics2.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index 7b98519928f55e..7cb3218240b960 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -17,6 +17,7 @@ """ from typing import TYPE_CHECKING, Dict, List, Optional, Union +import warnings from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput, is_valid_image, load_image @@ -285,10 +286,14 @@ def apply_chat_template( chat_template = self.chat_template else: chat_template = self.default_chat_template - - return self.tokenizer.apply_chat_template( - conversation, chat_template=chat_template, tokenize=tokenize, **kwargs - ) + with warnings.catch_warnings(): + # TODO Matt: This is a workaround to avoid annoying warnings until we properly remove default + # chat templates, which are already deprecated. Once they are removed in v4.43, we can remove + # this and clean up Tokenizer.apply_chat_template as well. + warnings.simplefilter("ignore") + return self.tokenizer.apply_chat_template( + conversation, chat_template=chat_template, tokenize=tokenize, **kwargs + ) @property def default_chat_template(self): From 709349b0514e6fc0b1d039aaaf1e40525f23f5b1 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 29 Apr 2024 18:06:00 +0100 Subject: [PATCH 2/4] make fixup --- src/transformers/models/idefics2/processing_idefics2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index 7cb3218240b960..0d556dbb6aeccc 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -16,8 +16,8 @@ Processor class for IDEFICS2. """ -from typing import TYPE_CHECKING, Dict, List, Optional, Union import warnings +from typing import TYPE_CHECKING, Dict, List, Optional, Union from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput, is_valid_image, load_image From 9522442e4866c854b738bba2842c44cf1efd543c Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 29 Apr 2024 19:40:19 +0100 Subject: [PATCH 3/4] Move the default chat template warning into apply_chat_template itself --- .../blenderbot/tokenization_blenderbot.py | 7 ----- .../tokenization_blenderbot_fast.py | 7 ----- .../tokenization_blenderbot_small.py | 7 ----- .../tokenization_blenderbot_small_fast.py | 7 ----- .../models/bloom/tokenization_bloom_fast.py | 7 ----- .../code_llama/tokenization_code_llama.py | 7 ----- .../tokenization_code_llama_fast.py | 7 ----- .../models/cohere/tokenization_cohere_fast.py | 7 ----- .../models/gpt2/tokenization_gpt2.py | 7 ----- .../models/gpt2/tokenization_gpt2_fast.py | 8 +---- .../gpt_neox/tokenization_gpt_neox_fast.py | 7 ----- .../tokenization_gpt_neox_japanese.py | 7 ----- .../models/gpt_sw3/tokenization_gpt_sw3.py | 7 ----- .../tokenization_gptsan_japanese.py | 7 ----- .../models/idefics2/processing_idefics2.py | 18 ++++++----- .../models/llama/tokenization_llama.py | 7 ----- .../models/llama/tokenization_llama_fast.py | 7 ----- .../models/whisper/tokenization_whisper.py | 7 ----- .../whisper/tokenization_whisper_fast.py | 7 ----- src/transformers/tokenization_utils_base.py | 31 ++++++++++++++----- 20 files changed, 34 insertions(+), 142 deletions(-) diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot.py b/src/transformers/models/blenderbot/tokenization_blenderbot.py index 6ce85fa644a47a..67724538233430 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot.py @@ -411,13 +411,6 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py index 0735b4666b537e..3a5206cdf4dfd3 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py @@ -293,13 +293,6 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py index 2d8b5f97deca34..832b5315edfd7c 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py @@ -224,13 +224,6 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py index 1c8a2656e68003..1f647d2430acd8 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py @@ -104,13 +104,6 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/bloom/tokenization_bloom_fast.py b/src/transformers/models/bloom/tokenization_bloom_fast.py index 95afa8c45a3794..afd3bfaaeef042 100644 --- a/src/transformers/models/bloom/tokenization_bloom_fast.py +++ b/src/transformers/models/bloom/tokenization_bloom_fast.py @@ -155,11 +155,4 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/code_llama/tokenization_code_llama.py b/src/transformers/models/code_llama/tokenization_code_llama.py index ed12b737b28e76..5a114542df2b03 100644 --- a/src/transformers/models/code_llama/tokenization_code_llama.py +++ b/src/transformers/models/code_llama/tokenization_code_llama.py @@ -456,13 +456,6 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py index 845ce94ad90c8e..d1cde882ff6bf7 100644 --- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py +++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py @@ -369,13 +369,6 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/cohere/tokenization_cohere_fast.py b/src/transformers/models/cohere/tokenization_cohere_fast.py index 1fd38e555f3eaf..96db4d4d11ed0f 100644 --- a/src/transformers/models/cohere/tokenization_cohere_fast.py +++ b/src/transformers/models/cohere/tokenization_cohere_fast.py @@ -247,13 +247,6 @@ def default_chat_template(self): '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) default_template = ( "{{ bos_token }}" "{% if messages[0]['role'] == 'system' %}" diff --git a/src/transformers/models/gpt2/tokenization_gpt2.py b/src/transformers/models/gpt2/tokenization_gpt2.py index 3d5281008a6120..9e81b4473e3244 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2.py +++ b/src/transformers/models/gpt2/tokenization_gpt2.py @@ -336,11 +336,4 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/gpt2/tokenization_gpt2_fast.py b/src/transformers/models/gpt2/tokenization_gpt2_fast.py index 498ca69832fb96..39e59298c860ab 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2_fast.py +++ b/src/transformers/models/gpt2/tokenization_gpt2_fast.py @@ -147,11 +147,5 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) + return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py index 2ee18c05ab25a4..ba54a1dc7d56b2 100644 --- a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py +++ b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py @@ -234,11 +234,4 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py index 83ae7779851d8c..b761e539a5386d 100644 --- a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py @@ -165,13 +165,6 @@ def default_chat_template(self): """ A simple chat template that just adds BOS/EOS tokens around messages while discarding role information. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{% for message in messages %}" "{{ bos_token + eos_token + message.content + eos_token }}" diff --git a/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py b/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py index 83fbd4bd0b21be..1000bfd1b6c8b1 100644 --- a/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py +++ b/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py @@ -301,13 +301,6 @@ def default_chat_template(self): This chat template formats messages like an instant messenger chat log, with "User:" and "Bot:" strings preceding messages. BOS tokens are added between all messages. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{{ eos_token }}{{ bos_token }}" "{% for message in messages %}" diff --git a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py index 7cb28acaeba4d3..627f7a9b2856f2 100644 --- a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py +++ b/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py @@ -246,13 +246,6 @@ def default_chat_template(self): A simple chat template that adds standard BOS, SEP and EOS tokens between messages while discarding role information. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{% for message in messages %}" "{% if not loop.first %}{{ bos_token}}{% endif %}" diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index 0d556dbb6aeccc..4e984170c559a5 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -285,15 +285,17 @@ def apply_chat_template( if self.chat_template is not None: chat_template = self.chat_template else: + logger.warning_once( + "No chat template is set for this processor, falling back to a default class-level template. This is " + "very error-prone, because models are often trained with templates different from the class default! " + "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " + "point any code depending on them will stop working. We recommend setting a valid chat template before " + "then to ensure that this model continues working without issues." + ) chat_template = self.default_chat_template - with warnings.catch_warnings(): - # TODO Matt: This is a workaround to avoid annoying warnings until we properly remove default - # chat templates, which are already deprecated. Once they are removed in v4.43, we can remove - # this and clean up Tokenizer.apply_chat_template as well. - warnings.simplefilter("ignore") - return self.tokenizer.apply_chat_template( - conversation, chat_template=chat_template, tokenize=tokenize, **kwargs - ) + return self.tokenizer.apply_chat_template( + conversation, chat_template=chat_template, tokenize=tokenize, **kwargs + ) @property def default_chat_template(self): diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py index def5e8ecbaacf1..2b4920c6688236 100644 --- a/src/transformers/models/llama/tokenization_llama.py +++ b/src/transformers/models/llama/tokenization_llama.py @@ -429,13 +429,6 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/llama/tokenization_llama_fast.py b/src/transformers/models/llama/tokenization_llama_fast.py index ccc01cd61914e9..4a7ef126d41afe 100644 --- a/src/transformers/models/llama/tokenization_llama_fast.py +++ b/src/transformers/models/llama/tokenization_llama_fast.py @@ -261,13 +261,6 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/whisper/tokenization_whisper.py b/src/transformers/models/whisper/tokenization_whisper.py index 9eabef7e2db5cb..0a6ad5be6e978b 100644 --- a/src/transformers/models/whisper/tokenization_whisper.py +++ b/src/transformers/models/whisper/tokenization_whisper.py @@ -815,13 +815,6 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True): diff --git a/src/transformers/models/whisper/tokenization_whisper_fast.py b/src/transformers/models/whisper/tokenization_whisper_fast.py index ee54fca283fddd..54aa60839cd29e 100644 --- a/src/transformers/models/whisper/tokenization_whisper_fast.py +++ b/src/transformers/models/whisper/tokenization_whisper_fast.py @@ -544,13 +544,6 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a default class-level template. " - "This is very error-prone, because models are often trained with templates different from the class " - "default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" # Copied from transformers.models.whisper.tokenization_whisper.WhisperTokenizer.get_decoder_prompt_ids diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index a30daf5f7fbe69..fa0eaa14205b96 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1746,16 +1746,27 @@ def apply_chat_template( if tokenizer_kwargs is None: tokenizer_kwargs = {} + using_default_template = False + # First, handle the cases when the model has a dict of multiple templates if isinstance(self.chat_template, dict) or ( self.chat_template is None and isinstance(self.default_chat_template, dict) ): - template_dict = self.chat_template or self.default_chat_template + if self.chat_template is not None: + template_dict = self.chat_template + using_default_dict = False + else: + template_dict = self.default_chat_template + using_default_dict = True if chat_template is not None and chat_template in template_dict: # The user can pass the name of a template to the chat template argument instead of an entire template chat_template = template_dict[chat_template] + if using_default_dict: + using_default_template = True elif chat_template is None and "default" in template_dict: chat_template = template_dict["default"] + if using_default_dict: + using_default_template = True elif chat_template is None: raise ValueError( "This model has multiple chat templates with no default specified! Please either pass a chat " @@ -1769,6 +1780,17 @@ def apply_chat_template( chat_template = self.chat_template else: chat_template = self.default_chat_template + using_default_template = True + + if using_default_template: + logger.warning_once( + "No chat template is set for this tokenizer, falling back to a default class-level template. This is " + "very error-prone, because models are often trained with templates different from the class default! " + "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " + "point any code depending on them will stop working. We recommend setting a valid chat template before " + "then to ensure that this model continues working without issues." + ) + # Compilation function uses a cache to avoid recompiling the same template compiled_template = self._compile_jinja_template(chat_template) @@ -1840,13 +1862,6 @@ def default_chat_template(self): This template formats inputs in the standard ChatML format. See https://github.com/openai/openai-python/blob/main/chatml.md """ - logger.warning_once( - "No chat template is set for this tokenizer, falling back to a ChatML template. " - "This is very error-prone, because most models are not trained with a ChatML template!" - "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which " - "point any code depending on them will stop working. We recommend setting a valid chat template before " - "then to ensure that this model continues working without issues." - ) return ( "{% for message in messages %}" "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}" From d2b4f2c55a36d7504c406804057f668fbfa8febc Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 29 Apr 2024 19:40:37 +0100 Subject: [PATCH 4/4] make fixup --- src/transformers/models/idefics2/processing_idefics2.py | 1 - src/transformers/tokenization_utils_base.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index 4e984170c559a5..b20f69bd07ad82 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -16,7 +16,6 @@ Processor class for IDEFICS2. """ -import warnings from typing import TYPE_CHECKING, Dict, List, Optional, Union from ...feature_extraction_utils import BatchFeature diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index fa0eaa14205b96..0a57345e395453 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1791,7 +1791,6 @@ def apply_chat_template( "then to ensure that this model continues working without issues." ) - # Compilation function uses a cache to avoid recompiling the same template compiled_template = self._compile_jinja_template(chat_template)