Skip to content

Commit

Permalink
final fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sanchit-gandhi committed Sep 29, 2023
1 parent 4c23123 commit dbbdf3f
Showing 1 changed file with 2 additions and 6 deletions.
8 changes: 2 additions & 6 deletions src/transformers/models/wav2vec2/tokenization_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,7 @@ def __init__(

# make sure that tokens made of several
# characters are not split at tokenization
for token in self.encoder.keys():
if len(token) > 1:
self.add_tokens(AddedToken(token, rstrip=True, lstrip=True, normalized=False))
self.add_tokens([token for token in self.encoder.keys() if len(token) > 1])

def set_target_lang(self, target_lang: str):
"""
Expand All @@ -227,9 +225,7 @@ def set_target_lang(self, target_lang: str):

# make sure that tokens made of several
# characters are not split at tokenization
for token in self.encoder.keys():
if len(token) > 1:
self.add_tokens(token)
self.add_tokens([token for token in self.encoder.keys() if len(token) > 1])

@property
def word_delimiter_token(self) -> str:
Expand Down

0 comments on commit dbbdf3f

Please sign in to comment.