mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
update Split pretokenizer docstrings (#1701)
This commit is contained in:
@ -422,10 +422,10 @@ class Split(PreTokenizer):
|
|||||||
Args:
|
Args:
|
||||||
pattern (:obj:`str` or :class:`~tokenizers.Regex`):
|
pattern (:obj:`str` or :class:`~tokenizers.Regex`):
|
||||||
A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`.
|
A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`.
|
||||||
If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`,
|
If you want to use a regex pattern, it has to be wrapped around a `tokenizers.Regex`,
|
||||||
otherwise we consider is as a string pattern. For example `pattern="|"`
|
otherwise we consider is as a string pattern. For example `pattern="|"`
|
||||||
means you want to split on `|` (imagine a csv file for example), while
|
means you want to split on `|` (imagine a csv file for example), while
|
||||||
`patter=tokenizer.Regex("1|2")` means you split on either '1' or '2'.
|
`pattern=tokenizers.Regex("1|2")` means you split on either '1' or '2'.
|
||||||
behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
|
behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
|
||||||
The behavior to use when splitting.
|
The behavior to use when splitting.
|
||||||
Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",
|
Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",
|
||||||
|
@ -359,10 +359,10 @@ impl PyWhitespaceSplit {
|
|||||||
/// Args:
|
/// Args:
|
||||||
/// pattern (:obj:`str` or :class:`~tokenizers.Regex`):
|
/// pattern (:obj:`str` or :class:`~tokenizers.Regex`):
|
||||||
/// A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`.
|
/// A pattern used to split the string. Usually a string or a regex built with `tokenizers.Regex`.
|
||||||
/// If you want to use a regex pattern, it has to be wrapped around a `tokenizer.Regex`,
|
/// If you want to use a regex pattern, it has to be wrapped around a `tokenizers.Regex`,
|
||||||
/// otherwise we consider is as a string pattern. For example `pattern="|"`
|
/// otherwise we consider is as a string pattern. For example `pattern="|"`
|
||||||
/// means you want to split on `|` (imagine a csv file for example), while
|
/// means you want to split on `|` (imagine a csv file for example), while
|
||||||
/// `patter=tokenizer.Regex("1|2")` means you split on either '1' or '2'.
|
/// `pattern=tokenizers.Regex("1|2")` means you split on either '1' or '2'.
|
||||||
/// behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
|
/// behavior (:class:`~tokenizers.SplitDelimiterBehavior`):
|
||||||
/// The behavior to use when splitting.
|
/// The behavior to use when splitting.
|
||||||
/// Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",
|
/// Choices: "removed", "isolated", "merged_with_previous", "merged_with_next",
|
||||||
|
Reference in New Issue
Block a user