update Split pretokenizer docstrings (#1701)

2025-08-22 16:25:30 +00:00 · 2025-01-08 05:35:52 -06:00
parent 166edd87c8
commit 6945933829
2 changed files with 4 additions and 4 deletions
--- a/bindings/python/py_src/tokenizers/pre_tokenizers/init.pyi
+++ b/bindings/python/py_src/tokenizers/pre_tokenizers/init.pyi
@ -422,10 +422,10 @@ class Split(PreTokenizer):
    Args:
        pattern (:obj:`str` or :class:`~tokenizers.Regex`):
            A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`.
-            If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`,
+            If you want to use a regex pattern, it has to be wrapped around a `tokenizers.Regex`,
            otherwise we consider is as a string pattern. For example `pattern="|"`
            means you want to split on `|` (imagine a csv file for example), while
-            `patter=tokenizer.Regex("1|2")` means you split on either '1' or '2'.
+            `pattern=tokenizers.Regex("1|2")` means you split on either '1' or '2'.
        behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
            The behavior to use when splitting.
            Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",
--- a/bindings/python/src/pre_tokenizers.rs
+++ b/bindings/python/src/pre_tokenizers.rs
@ -359,10 +359,10 @@ impl PyWhitespaceSplit {
 /// Args:
 ///     pattern (:obj:`str` or :class:`~tokenizers.Regex`):
 ///         A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`.
-///         If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`,
+///         If you want to use a regex pattern, it has to be wrapped around a `tokenizers.Regex`,
 ///         otherwise we consider is as a string pattern. For example `pattern="|"`
 ///         means you want to split on `|` (imagine a csv file for example), while
-///         `patter=tokenizer.Regex("1|2")` means you split on either '1' or '2'.
+///         `pattern=tokenizers.Regex("1|2")` means you split on either '1' or '2'.
 ///     behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
 ///         The behavior to use when splitting.
 ///         Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",