Python - add doctype to length in implementations spm unigram (#943)

This commit is contained in:
dctelus
2022-03-08 05:59:07 -05:00
committed by GitHub
parent 4a8f5db067
commit 98249dfb0f

View File

@ -101,6 +101,9 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
A list of special tokens the model should know of. A list of special tokens the model should know of.
unk_token (:obj:`str`, `optional`): unk_token (:obj:`str`, `optional`):
The unknown token to be used by the model. The unknown token to be used by the model.
length (:obj:`int`, `optional`):
The total number of sequences in the iterator. This is used to
provide meaningful progress tracking
""" """
trainer = trainers.UnigramTrainer( trainer = trainers.UnigramTrainer(