Skip to content

Commit

Permalink
Update tokenizer.py
Browse files Browse the repository at this point in the history
Fix typo. Add unk, cls, sep, and mask tokens to use with standard BERT-based config definitions
  • Loading branch information
neeravkaushal authored Nov 21, 2024
1 parent 752b101 commit eb5c138
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions safe/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,33 @@ def bos_token_id(self):

@property
def pad_token_id(self):
"""Get the bos token id"""
"""Get the pad token id"""
return self.tokenizer.token_to_id(self.tokenizer.pad_token)

@property
def eos_token_id(self):
"""Get the bos token id"""
"""Get the eos token id"""
return self.tokenizer.token_to_id(self.tokenizer.eos_token)

@property
def unk_token_id(self):
"""Get the unk token id"""
return self.tokenizer.token_to_id(self.tokenizer.unk_token)

@property
def mask_token_id(self):
"""Get the mask token id"""
return self.tokenizer.token_to_id(self.tokenizer.mask_token)

@property
def cls_token_id(self):
"""Get the cls token id"""
return self.tokenizer.token_to_id(self.tokenizer.cls_token)

@property
def sep_token_id(self):
"""Get the sep token id"""
return self.tokenizer.token_to_id(self.tokenizer.sep_token)

@classmethod
def set_special_tokens(
Expand Down

0 comments on commit eb5c138

Please sign in to comment.