mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-08 05:38:23 +00:00
Python - Update CHANGELOG and stub files
This commit is contained in:
@@ -417,12 +417,37 @@ class Split(PreTokenizer):
|
||||
pass
|
||||
def pre_tokenize(self, pretok):
|
||||
"""
|
||||
Pre tokenize the given PreTokenizedString in-place
|
||||
Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
|
||||
|
||||
This method allows to modify a :class:`~tokenizers.PreTokenizedString` to
|
||||
keep track of the pre-tokenization, and leverage the capabilities of the
|
||||
:class:`~tokenizers.PreTokenizedString`. If you just want to see the result of
|
||||
the pre-tokenization of a raw string, you can use
|
||||
:meth:`~tokenizers.pre_tokenizers.PreTokenizer.pre_tokenize_str`
|
||||
|
||||
Args:
|
||||
pretok (:class:`~tokenizers.PreTokenizedString):
|
||||
The pre-tokenized string on which to apply this
|
||||
:class:`~tokenizers.pre_tokenizers.PreTokenizer`
|
||||
"""
|
||||
pass
|
||||
def pre_tokenize_str(self, sequence):
|
||||
"""
|
||||
Pre tokenize the given sequence
|
||||
Pre tokenize the given string
|
||||
|
||||
This method provides a way to visualize the effect of a
|
||||
:class:`~tokenizers.pre_tokenizers.PreTokenizer` but it does not keep track of the
|
||||
alignment, nor does it provide all the capabilities of the
|
||||
:class:`~tokenizers.PreTokenizedString`. If you need some of these, you can use
|
||||
:meth:`~tokenizers.pre_tokenizers.PreTokenizer.pre_tokenize`
|
||||
|
||||
Args:
|
||||
sequence (:obj:`str`):
|
||||
A string to pre-tokeize
|
||||
|
||||
Returns:
|
||||
:obj:`List[Tuple[str, Offsets]]`:
|
||||
A list of tuple with the pre-tokenized parts and their offsets
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user