diff --git a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi index ea1b4954e..a583945fc 100644 --- a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi +++ b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi @@ -421,7 +421,7 @@ class Split(PreTokenizer): Args: pattern (:obj:`str` or :class:`~tokenizers.Regex`): - A pattern used to split the string. Usually a string or a a regex built with `tokenizers.Regex`. + A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`. If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`, otherwise we consider is as a string pattern. For example `pattern="|"` means you want to split on `|` (imagine a csv file for example), while diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs index 92a40fc7c..2453d9ac7 100644 --- a/bindings/python/src/pre_tokenizers.rs +++ b/bindings/python/src/pre_tokenizers.rs @@ -334,7 +334,7 @@ impl PyWhitespaceSplit { /// /// Args: /// pattern (:obj:`str` or :class:`~tokenizers.Regex`): -/// A pattern used to split the string. Usually a string or a a regex built with `tokenizers.Regex`. +/// A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`. /// If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`, /// otherwise we consider is as a string pattern. For example `pattern="|"` /// means you want to split on `|` (imagine a csv file for example), while