Python - Add bert wordpiece training example

This commit is contained in:
Anthony MOI
2020-01-03 16:51:39 -05:00
parent 6e3efe8954
commit fab4e96b51
3 changed files with 65 additions and 1 deletions

View File

@ -40,7 +40,7 @@ trainer = trainers.BpeTrainer.new(
vocab_size=50000,
min_frequency=2,
show_progress=True,
special_tokens=[ "<s>", "<pad>", "</s" ],
special_tokens=[ "<s>", "<pad>", "</s>" ],
initial_alphabet=pre_tokenizers.ByteLevel.alphabet()
)
tokenizer.train(trainer, files)