diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot.py b/src/transformers/models/blenderbot/tokenization_blenderbot.py index 9a81e73b8da37a..7c1ef43bccb2d5 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot.py @@ -423,6 +423,12 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py index fdd490b12adcf9..1c0d8f3fab75e3 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py @@ -305,6 +305,12 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py index fb8086e981a9d3..240495d73894ef 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py @@ -242,6 +242,12 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py index 8daac3e04fc236..4bf0017b5f2a29 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py @@ -124,6 +124,12 @@ def default_chat_template(self): """ A very simple chat template that just adds whitespace between messages. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{% for message in messages %}" "{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}" diff --git a/src/transformers/models/bloom/tokenization_bloom_fast.py b/src/transformers/models/bloom/tokenization_bloom_fast.py index 47b78ac723f757..c0189e08b3d149 100644 --- a/src/transformers/models/bloom/tokenization_bloom_fast.py +++ b/src/transformers/models/bloom/tokenization_bloom_fast.py @@ -168,4 +168,10 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/code_llama/tokenization_code_llama.py b/src/transformers/models/code_llama/tokenization_code_llama.py index ea769b356aa629..165aa3634a4c08 100644 --- a/src/transformers/models/code_llama/tokenization_code_llama.py +++ b/src/transformers/models/code_llama/tokenization_code_llama.py @@ -469,7 +469,12 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py index 2385eb5c545bcb..ae954afa5f6ea8 100644 --- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py +++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py @@ -367,7 +367,12 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/gpt2/tokenization_gpt2.py b/src/transformers/models/gpt2/tokenization_gpt2.py index e757fd980177b7..a7b576e92defb4 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2.py +++ b/src/transformers/models/gpt2/tokenization_gpt2.py @@ -363,4 +363,10 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/gpt2/tokenization_gpt2_fast.py b/src/transformers/models/gpt2/tokenization_gpt2_fast.py index 0f7a31c9f8b541..a5dcade90a0198 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2_fast.py +++ b/src/transformers/models/gpt2/tokenization_gpt2_fast.py @@ -181,4 +181,10 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py index f666b97efd2bd0..31f8a7708adf0b 100644 --- a/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py +++ b/src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py @@ -135,4 +135,10 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" diff --git a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py index c0350879489f79..fae50aa8ffdbb0 100644 --- a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py @@ -180,6 +180,12 @@ def default_chat_template(self): """ A simple chat template that just adds BOS/EOS tokens around messages while discarding role information. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{% for message in messages %}" "{{ bos_token + eos_token + message.content + eos_token }}" diff --git a/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py b/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py index 857656fa07ce36..b069ba69bba5c1 100644 --- a/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py +++ b/src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py @@ -321,6 +321,12 @@ def default_chat_template(self): This chat template formats messages like an instant messenger chat log, with "User:" and "Bot:" strings preceding messages. BOS tokens are added between all messages. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{{ eos_token }}{{ bos_token }}" "{% for message in messages %}" diff --git a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py index e805acf3c74bca..3c3132edb444c2 100644 --- a/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py +++ b/src/transformers/models/gptsan_japanese/tokenization_gptsan_japanese.py @@ -261,6 +261,12 @@ def default_chat_template(self): A simple chat template that adds standard BOS, SEP and EOS tokens between messages while discarding role information. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{% for message in messages %}" "{% if not loop.first %}{{ bos_token}}{% endif %}" diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py index be67f0005b701f..b5d58d82d36fd2 100644 --- a/src/transformers/models/llama/tokenization_llama.py +++ b/src/transformers/models/llama/tokenization_llama.py @@ -430,7 +430,12 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/llama/tokenization_llama_fast.py b/src/transformers/models/llama/tokenization_llama_fast.py index 13d7798525f6ed..1fc2b6749b5543 100644 --- a/src/transformers/models/llama/tokenization_llama_fast.py +++ b/src/transformers/models/llama/tokenization_llama_fast.py @@ -224,7 +224,12 @@ def default_chat_template(self): snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362) in the original repository. """ - + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) template = ( "{% if messages[0]['role'] == 'system' %}" "{% set loop_messages = messages[1:] %}" # Extract system message if it's present diff --git a/src/transformers/models/whisper/tokenization_whisper.py b/src/transformers/models/whisper/tokenization_whisper.py index d2f6ea382fbceb..3fa1fe2755c218 100644 --- a/src/transformers/models/whisper/tokenization_whisper.py +++ b/src/transformers/models/whisper/tokenization_whisper.py @@ -795,6 +795,12 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True): diff --git a/src/transformers/models/whisper/tokenization_whisper_fast.py b/src/transformers/models/whisper/tokenization_whisper_fast.py index 43d8a46d7ce06a..ee44bb5918d2b0 100644 --- a/src/transformers/models/whisper/tokenization_whisper_fast.py +++ b/src/transformers/models/whisper/tokenization_whisper_fast.py @@ -563,6 +563,12 @@ def default_chat_template(self): """ A simple chat template that ignores role information and just concatenates messages with EOS tokens. """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using the default template " + f"for the {self.__class__.__name__} class. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}" # Copied from transformers.models.whisper.tokenization_whisper.WhisperTokenizer.get_decoder_prompt_ids diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index b3cfcee19a4b58..3d4bae49499097 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1780,6 +1780,12 @@ def default_chat_template(self): This template formats inputs in the standard ChatML format. See https://github.com/openai/openai-python/blob/main/chatml.md """ + logger.warning_once( + "\nNo chat template is defined for this tokenizer - using a default chat template " + "that implements the ChatML format. If the default is not appropriate for " + "your model, please set `tokenizer.chat_template` to an appropriate template. " + "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n" + ) return ( "{% for message in messages %}" "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"