From 76b89eef5fa44717a251b4a1d4a34890fed2ddfe Mon Sep 17 00:00:00 2001 From: Ita Zaporozhets Date: Wed, 22 May 2024 16:00:36 +0200 Subject: [PATCH] more general approach --- src/transformers/convert_slow_tokenizer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/transformers/convert_slow_tokenizer.py b/src/transformers/convert_slow_tokenizer.py index 83015d34c53754..705d050bb4c18f 100644 --- a/src/transformers/convert_slow_tokenizer.py +++ b/src/transformers/convert_slow_tokenizer.py @@ -1391,10 +1391,6 @@ def tokenizer(self, proto): AddedToken(self.original_tokenizer.convert_ids_to_tokens(2), normalized=False, special=True), ] ) - user_defined_symbols = [ - AddedToken(token, normalized=True, special=False) for token in proto.trainer_spec.user_defined_symbols - ] - tokenizer.add_tokens(user_defined_symbols) else: raise Exception( "You're trying to run a `Unigram` model but you're file was trained with a different algorithm"