Python - Add bert wordpiece training example

2025-12-04 19:58:21 +00:00 · 2020-01-03 16:51:39 -05:00
parent 6e3efe8954
commit fab4e96b51
3 changed files with 65 additions and 1 deletions
--- a/bindings/python/examples/train_bytelevel_bpe.py
+++ b/bindings/python/examples/train_bytelevel_bpe.py
@@ -40,7 +40,7 @@ trainer = trainers.BpeTrainer.new(
    vocab_size=50000,
    min_frequency=2,
    show_progress=True,
-    special_tokens=[ "<s>", "<pad>", "</s" ],
+    special_tokens=[ "<s>", "<pad>", "</s>" ],
    initial_alphabet=pre_tokenizers.ByteLevel.alphabet()
 )
 tokenizer.train(trainer, files)