Skip to content

Commit

Permalink
Add default template warning (huggingface#26637)
Browse files Browse the repository at this point in the history
* Add default template warnings

* make fixup

* Move warnings to FutureWarning

* Move warnings to FutureWarning

* fix make fixup

* Remove futurewarning
  • Loading branch information
Rocketknight1 authored and EduardoPach committed Nov 19, 2023
1 parent 12e040c commit e3046ee
Show file tree
Hide file tree
Showing 18 changed files with 108 additions and 4 deletions.
6 changes: 6 additions & 0 deletions src/transformers/models/blenderbot/tokenization_blenderbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,12 @@ def default_chat_template(self):
"""
A very simple chat template that just adds whitespace between messages.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{% for message in messages %}"
"{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,12 @@ def default_chat_template(self):
"""
A very simple chat template that just adds whitespace between messages.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{% for message in messages %}"
"{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,12 @@ def default_chat_template(self):
"""
A very simple chat template that just adds whitespace between messages.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{% for message in messages %}"
"{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ def default_chat_template(self):
"""
A very simple chat template that just adds whitespace between messages.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{% for message in messages %}"
"{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}"
Expand Down
6 changes: 6 additions & 0 deletions src/transformers/models/bloom/tokenization_bloom_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,10 @@ def default_chat_template(self):
"""
A simple chat template that ignores role information and just concatenates messages with EOS tokens.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,12 @@ def default_chat_template(self):
snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
in the original repository.
"""

logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
template = (
"{% if messages[0]['role'] == 'system' %}"
"{% set loop_messages = messages[1:] %}" # Extract system message if it's present
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,12 @@ def default_chat_template(self):
snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
in the original repository.
"""

logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
template = (
"{% if messages[0]['role'] == 'system' %}"
"{% set loop_messages = messages[1:] %}" # Extract system message if it's present
Expand Down
6 changes: 6 additions & 0 deletions src/transformers/models/gpt2/tokenization_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,4 +363,10 @@ def default_chat_template(self):
"""
A simple chat template that ignores role information and just concatenates messages with EOS tokens.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
6 changes: 6 additions & 0 deletions src/transformers/models/gpt2/tokenization_gpt2_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,10 @@ def default_chat_template(self):
"""
A simple chat template that ignores role information and just concatenates messages with EOS tokens.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,10 @@ def default_chat_template(self):
"""
A simple chat template that ignores role information and just concatenates messages with EOS tokens.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ def default_chat_template(self):
"""
A simple chat template that just adds BOS/EOS tokens around messages while discarding role information.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{% for message in messages %}"
"{{ bos_token + eos_token + message.content + eos_token }}"
Expand Down
6 changes: 6 additions & 0 deletions src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ def default_chat_template(self):
This chat template formats messages like an instant messenger chat log, with "User:" and "Bot:" strings
preceding messages. BOS tokens are added between all messages.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{{ eos_token }}{{ bos_token }}"
"{% for message in messages %}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ def default_chat_template(self):
A simple chat template that adds standard BOS, SEP and EOS tokens between messages while discarding role
information.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{% for message in messages %}"
"{% if not loop.first %}{{ bos_token}}{% endif %}"
Expand Down
7 changes: 6 additions & 1 deletion src/transformers/models/llama/tokenization_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,12 @@ def default_chat_template(self):
snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
in the original repository.
"""

logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
template = (
"{% if messages[0]['role'] == 'system' %}"
"{% set loop_messages = messages[1:] %}" # Extract system message if it's present
Expand Down
7 changes: 6 additions & 1 deletion src/transformers/models/llama/tokenization_llama_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,12 @@ def default_chat_template(self):
snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
in the original repository.
"""

logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
template = (
"{% if messages[0]['role'] == 'system' %}"
"{% set loop_messages = messages[1:] %}" # Extract system message if it's present
Expand Down
6 changes: 6 additions & 0 deletions src/transformers/models/whisper/tokenization_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,12 @@ def default_chat_template(self):
"""
A simple chat template that ignores role information and just concatenates messages with EOS tokens.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"

def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True):
Expand Down
6 changes: 6 additions & 0 deletions src/transformers/models/whisper/tokenization_whisper_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,12 @@ def default_chat_template(self):
"""
A simple chat template that ignores role information and just concatenates messages with EOS tokens.
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using the default template "
f"for the {self.__class__.__name__} class. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"

# Copied from transformers.models.whisper.tokenization_whisper.WhisperTokenizer.get_decoder_prompt_ids
Expand Down
6 changes: 6 additions & 0 deletions src/transformers/tokenization_utils_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1780,6 +1780,12 @@ def default_chat_template(self):
This template formats inputs in the standard ChatML format. See
https://github.com/openai/openai-python/blob/main/chatml.md
"""
logger.warning_once(
"\nNo chat template is defined for this tokenizer - using a default chat template "
"that implements the ChatML format. If the default is not appropriate for "
"your model, please set `tokenizer.chat_template` to an appropriate template. "
"See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
)
return (
"{% for message in messages %}"
"{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
Expand Down

0 comments on commit e3046ee

Please sign in to comment.