From 230ef0696bc3115eee94cba1a6ff846eee525cb2 Mon Sep 17 00:00:00 2001 From: Madison May Date: Mon, 18 Nov 2024 12:47:11 -0500 Subject: [PATCH] Update input_encoder.py --- finetune/encoding/input_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finetune/encoding/input_encoder.py b/finetune/encoding/input_encoder.py index eddb6266..86c901fa 100644 --- a/finetune/encoding/input_encoder.py +++ b/finetune/encoding/input_encoder.py @@ -320,7 +320,7 @@ def tokenize_context(context, encoded_output, config): ) if ( context_by_char_loc[current_char_loc][2] - and token.strip().strip("Â") not in context_by_char_loc[current_char_loc][2] + and token.strip().strip("Âĉ") not in context_by_char_loc[current_char_loc][2] ): warnings.warn( "subtoken: {} has matched up with the context for: {}".format(