mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-08 05:38:23 +00:00
Python - Update __init__.pyi
This commit is contained in:
@@ -197,7 +197,7 @@ class Encoding:
|
||||
"""
|
||||
pass
|
||||
@property
|
||||
def sequences(self):
|
||||
def sequence_ids(self):
|
||||
"""
|
||||
The generated sequence indices.
|
||||
|
||||
@@ -313,6 +313,23 @@ class Encoding:
|
||||
:obj:`List[int]`: The list of type ids
|
||||
"""
|
||||
pass
|
||||
@property
|
||||
def word_ids(self):
|
||||
"""
|
||||
The generated word indices.
|
||||
|
||||
They represent the index of the word associated to each token.
|
||||
When the input is pre-tokenized, they correspond to the ID of the given input label,
|
||||
otherwise they correspond to the words indices as defined by the
|
||||
:class:`~tokenizers.pre_tokenizers.PreTokenizer` that was used.
|
||||
|
||||
For special tokens and such (any token that was generated from something that was
|
||||
not part of the input), the output is :obj:`None`
|
||||
|
||||
Returns:
|
||||
A :obj:`List` of :obj:`Optional[int]`: A list of optional word index.
|
||||
"""
|
||||
pass
|
||||
def word_to_chars(self, word_index, sequence_index=0):
|
||||
"""
|
||||
Get the offsets of the word at the given index in one of the input sequences.
|
||||
@@ -347,6 +364,10 @@ class Encoding:
|
||||
"""
|
||||
The generated word indices.
|
||||
|
||||
.. warning::
|
||||
This is deprecated and will be removed in a future version.
|
||||
Please use :obj:`~tokenizers.Encoding.word_ids` instead.
|
||||
|
||||
They represent the index of the word associated to each token.
|
||||
When the input is pre-tokenized, they correspond to the ID of the given input label,
|
||||
otherwise they correspond to the words indices as defined by the
|
||||
|
||||
Reference in New Issue
Block a user