Fixing the documentation for ByteLevel in Python (#982)

* Fixing the documentation for `ByteLevel` in Python * Python stub.py (after rebuilding ofc).
2025-08-23 00:35:35 +00:00 · 2022-04-14 16:29:50 +02:00
parent 8a9bb28f46
commit 66c9af26f6
2 changed files with 6 additions and 0 deletions
--- a/bindings/python/py_src/tokenizers/pre_tokenizers/init.pyi
+++ b/bindings/python/py_src/tokenizers/pre_tokenizers/init.pyi
@ -100,6 +100,9 @@ class ByteLevel(PreTokenizer):
        add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
            Whether to add a space to the first word if there isn't already one. This
            lets us treat `hello` exactly like `say hello`.
+        use_regex (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            Set this to :obj:`False` to prevent this `pre_tokenizer` from using
+            the GPT2 specific regexp for spliting on whitespace.
    """

    def __init__(self, add_prefix_space=True, use_regex=True):
--- a/bindings/python/src/pre_tokenizers.rs
+++ b/bindings/python/src/pre_tokenizers.rs
@ -228,6 +228,9 @@ macro_rules! setter {
 ///     add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
 ///         Whether to add a space to the first word if there isn't already one. This
 ///         lets us treat `hello` exactly like `say hello`.
+///     use_regex (:obj:`bool`, `optional`, defaults to :obj:`True`):
+///         Set this to :obj:`False` to prevent this `pre_tokenizer` from using
+///         the GPT2 specific regexp for spliting on whitespace.
 #[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=ByteLevel)]
 #[text_signature = "(self, add_prefix_space=True, use_regex=True)"]
 pub struct PyByteLevel {}