Python - Update Bert default special tokens

Closes #106
2025-12-09 22:28:29 +00:00 · 2020-02-05 12:55:01 -05:00
parent a1284f6220
commit 42c4691e4d
2 changed files with 2 additions and 2 deletions
--- a/bindings/python/examples/train_bert_wordpiece.py
+++ b/bindings/python/examples/train_bert_wordpiece.py
@@ -42,7 +42,7 @@ trainer = tokenizer.train(
    vocab_size=10000,
    min_frequency=2,
    show_progress=True,
-    special_tokens=["[SEP]", '[UNK]', '[CLS]', "<s>", "<pad>", "</s>"],
+    special_tokens=["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"],
    limit_alphabet=1000,
    wordpieces_prefix="##"
 )