mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-08 05:38:23 +00:00
Python - Make the trainer optional on Tokenizer.train
This commit is contained in:
@@ -115,4 +115,4 @@ class BertWordPieceTokenizer(BaseTokenizer):
|
||||
)
|
||||
if isinstance(files, str):
|
||||
files = [files]
|
||||
self._tokenizer.train(trainer, files)
|
||||
self._tokenizer.train(files, trainer=trainer)
|
||||
|
||||
@@ -101,4 +101,4 @@ class ByteLevelBPETokenizer(BaseTokenizer):
|
||||
)
|
||||
if isinstance(files, str):
|
||||
files = [files]
|
||||
self._tokenizer.train(trainer, files)
|
||||
self._tokenizer.train(files, trainer=trainer)
|
||||
|
||||
@@ -123,4 +123,4 @@ class CharBPETokenizer(BaseTokenizer):
|
||||
)
|
||||
if isinstance(files, str):
|
||||
files = [files]
|
||||
self._tokenizer.train(trainer, files)
|
||||
self._tokenizer.train(files, trainer=trainer)
|
||||
|
||||
@@ -74,4 +74,4 @@ class SentencePieceBPETokenizer(BaseTokenizer):
|
||||
)
|
||||
if isinstance(files, str):
|
||||
files = [files]
|
||||
self._tokenizer.train(trainer, files)
|
||||
self._tokenizer.train(files, trainer=trainer)
|
||||
|
||||
@@ -75,7 +75,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
|
||||
|
||||
if isinstance(files, str):
|
||||
files = [files]
|
||||
self._tokenizer.train(trainer, files)
|
||||
self._tokenizer.train(files, trainer=trainer)
|
||||
|
||||
@staticmethod
|
||||
def from_spm(filename: str):
|
||||
|
||||
Reference in New Issue
Block a user