From 3dd5decfaffaff4980a84f4ef37db7e38d988a97 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 26 Feb 2024 14:12:29 +0000 Subject: [PATCH] Manual patching for BERT-like tokenizers --- src/transformers/generation/stopping_criteria.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/generation/stopping_criteria.py b/src/transformers/generation/stopping_criteria.py index 7b32c018041443..7b06b64a94c297 100644 --- a/src/transformers/generation/stopping_criteria.py +++ b/src/transformers/generation/stopping_criteria.py @@ -254,6 +254,8 @@ def _stop_string_get_matching_positions( def _cleanup_token(token: str) -> str: if token[0] in ["▁", "Ġ"]: token = " " + token[1:] + elif token[0] == "##": + token = token[2:] return token reversed_filtered_tok_list = [_cleanup_token(token)[::-1] for token in tok_list]