Skip to content

Commit

Permalink
Merge branch 'encode-special-tokens' of github.com:huggingface/tokeni…
Browse files Browse the repository at this point in the history
…zers into encode-special-tokens
  • Loading branch information
ArthurZucker committed Jan 18, 2024
2 parents 33415e0 + 11e4ffc commit 842eced
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
17 changes: 13 additions & 4 deletions bindings/python/py_src/tokenizers/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -678,10 +678,6 @@ class Tokenizer:
:obj:`int`: The number of tokens that were created in the vocabulary
"""
pass

def set_encode_special_tokens(value:bool):
pass

def decode(self, ids, skip_special_tokens=True):
"""
Decode the given list of ids back to a string
Expand Down Expand Up @@ -1038,6 +1034,19 @@ class Tokenizer:
Whether the JSON file should be pretty formatted.
"""
pass
def set_encode_special_tokens(self, value):
"""
Modifies the tokenizer in order to use or not the special tokens
during encoding.
Args:
value (:obj:`bool`):
Whether to use the special tokens or not
Returns:
:obj:`Optional[str]`: An optional token, :obj:`None` if out of vocabulary
"""
pass
def to_str(self, pretty=False):
"""
Gets a serialized string representing this :class:`~tokenizers.Tokenizer`.
Expand Down
9 changes: 9 additions & 0 deletions bindings/python/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,15 @@ impl PyTokenizer {
self.tokenizer.id_to_token(id)
}

/// Modifies the tokenizer in order to use or not the special tokens
/// during encoding.
///
/// Args:
/// value (:obj:`bool`):
/// Whether to use the special tokens or not
///
/// Returns:
/// :obj:`Optional[str]`: An optional token, :obj:`None` if out of vocabulary
#[pyo3(text_signature = "(self, value)")]
fn set_encode_special_tokens(&mut self, value: bool) {
self.tokenizer.set_encode_special_tokens(value);
Expand Down

0 comments on commit 842eced

Please sign in to comment.