diff --git a/bindings/python/py_src/tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/__init__.pyi index 95c17a7dc..55c02b5e8 100644 --- a/bindings/python/py_src/tokenizers/__init__.pyi +++ b/bindings/python/py_src/tokenizers/__init__.pyi @@ -678,10 +678,6 @@ class Tokenizer: :obj:`int`: The number of tokens that were created in the vocabulary """ pass - - def set_encode_special_tokens(value:bool): - pass - def decode(self, ids, skip_special_tokens=True): """ Decode the given list of ids back to a string @@ -1038,6 +1034,19 @@ class Tokenizer: Whether the JSON file should be pretty formatted. """ pass + def set_encode_special_tokens(self, value): + """ + Modifies the tokenizer in order to use or not the special tokens + during encoding. + + Args: + value (:obj:`bool`): + Whether to use the special tokens or not + + Returns: + :obj:`Optional[str]`: An optional token, :obj:`None` if out of vocabulary + """ + pass def to_str(self, pretty=False): """ Gets a serialized string representing this :class:`~tokenizers.Tokenizer`. diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs index a28539540..e852c1ead 100644 --- a/bindings/python/src/tokenizer.rs +++ b/bindings/python/src/tokenizer.rs @@ -1110,6 +1110,15 @@ impl PyTokenizer { self.tokenizer.id_to_token(id) } + /// Modifies the tokenizer in order to use or not the special tokens + /// during encoding. + /// + /// Args: + /// value (:obj:`bool`): + /// Whether to use the special tokens or not + /// + /// Returns: + /// :obj:`Optional[str]`: An optional token, :obj:`None` if out of vocabulary #[pyo3(text_signature = "(self, value)")] fn set_encode_special_tokens(&mut self, value: bool) { self.tokenizer.set_encode_special_tokens(value);