mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Python - add doctype to length in implementations spm unigram (#943)
This commit is contained in:
@ -101,6 +101,9 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
|
|||||||
A list of special tokens the model should know of.
|
A list of special tokens the model should know of.
|
||||||
unk_token (:obj:`str`, `optional`):
|
unk_token (:obj:`str`, `optional`):
|
||||||
The unknown token to be used by the model.
|
The unknown token to be used by the model.
|
||||||
|
length (:obj:`int`, `optional`):
|
||||||
|
The total number of sequences in the iterator. This is used to
|
||||||
|
provide meaningful progress tracking
|
||||||
"""
|
"""
|
||||||
|
|
||||||
trainer = trainers.UnigramTrainer(
|
trainer = trainers.UnigramTrainer(
|
||||||
|
Reference in New Issue
Block a user