From 6945933829145c0cbaa297daa76594800f2067ce Mon Sep 17 00:00:00 2001 From: Dylan-Harden3 <89853753+Dylan-Harden3@users.noreply.github.com> Date: Wed, 8 Jan 2025 05:35:52 -0600 Subject: [PATCH] update Split pretokenizer docstrings (#1701) --- bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi | 4 ++-- bindings/python/src/pre_tokenizers.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi index a583945f..8049daec 100644 --- a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi +++ b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi @@ -422,10 +422,10 @@ class Split(PreTokenizer): Args: pattern (:obj:`str` or :class:`~tokenizers.Regex`): A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`. - If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`, + If you want to use a regex pattern, it has to be wrapped around a `tokenizers.Regex`, otherwise we consider is as a string pattern. For example `pattern="|"` means you want to split on `|` (imagine a csv file for example), while - `patter=tokenizer.Regex("1|2")` means you split on either '1' or '2'. + `pattern=tokenizers.Regex("1|2")` means you split on either '1' or '2'. behavior (:class:`~tokenizers.SplitDelimiterBehavior`): The behavior to use when splitting. Choices: "removed", "isolated", "merged_with_previous", "merged_with_next", diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs index fdc86230..02034560 100644 --- a/bindings/python/src/pre_tokenizers.rs +++ b/bindings/python/src/pre_tokenizers.rs @@ -359,10 +359,10 @@ impl PyWhitespaceSplit { /// Args: /// pattern (:obj:`str` or :class:`~tokenizers.Regex`): /// A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`. -/// If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`, +/// If you want to use a regex pattern, it has to be wrapped around a `tokenizers.Regex`, /// otherwise we consider is as a string pattern. For example `pattern="|"` /// means you want to split on `|` (imagine a csv file for example), while -/// `patter=tokenizer.Regex("1|2")` means you split on either '1' or '2'. +/// `pattern=tokenizers.Regex("1|2")` means you split on either '1' or '2'. /// behavior (:class:`~tokenizers.SplitDelimiterBehavior`): /// The behavior to use when splitting. /// Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",