From b61a0490572b622cd9fc3deb70495ae5221fdee0 Mon Sep 17 00:00:00 2001 From: "jun.4" Date: Tue, 28 May 2024 17:59:27 +0900 Subject: [PATCH] Set ensure_ascii=False in JSON dump within apply_chat_template - Modified JSON dump function to set ensure_ascii to False, improving handling of non-ASCII characters. --- src/transformers/tokenization_utils_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index a8d35003287e39..3510a2a50ac10e 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1854,6 +1854,7 @@ def raise_exception(message): jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True) jinja_env.globals["raise_exception"] = raise_exception + jinja_env.policies['json.dumps_kwargs']['ensure_ascii'] = False return jinja_env.from_string(chat_template) @property