Python - Make the trainer optional on Tokenizer.train

This commit is contained in:
Anthony MOI
2020-10-07 21:25:32 -04:00
committed by Anthony MOI
parent c230183cf6
commit 224862fe0c
7 changed files with 15 additions and 12 deletions

View File

@@ -115,4 +115,4 @@ class BertWordPieceTokenizer(BaseTokenizer):
)
if isinstance(files, str):
files = [files]
self._tokenizer.train(trainer, files)
self._tokenizer.train(files, trainer=trainer)

View File

@@ -101,4 +101,4 @@ class ByteLevelBPETokenizer(BaseTokenizer):
)
if isinstance(files, str):
files = [files]
self._tokenizer.train(trainer, files)
self._tokenizer.train(files, trainer=trainer)

View File

@@ -123,4 +123,4 @@ class CharBPETokenizer(BaseTokenizer):
)
if isinstance(files, str):
files = [files]
self._tokenizer.train(trainer, files)
self._tokenizer.train(files, trainer=trainer)

View File

@@ -74,4 +74,4 @@ class SentencePieceBPETokenizer(BaseTokenizer):
)
if isinstance(files, str):
files = [files]
self._tokenizer.train(trainer, files)
self._tokenizer.train(files, trainer=trainer)

View File

@@ -75,7 +75,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
if isinstance(files, str):
files = [files]
self._tokenizer.train(trainer, files)
self._tokenizer.train(files, trainer=trainer)
@staticmethod
def from_spm(filename: str):