Python - Update Bert default special tokens

Closes #106
This commit is contained in:
Anthony MOI
2020-02-05 12:55:01 -05:00
parent a1284f6220
commit 42c4691e4d
2 changed files with 2 additions and 2 deletions

View File

@@ -42,7 +42,7 @@ trainer = tokenizer.train(
vocab_size=10000,
min_frequency=2,
show_progress=True,
special_tokens=["[SEP]", '[UNK]', '[CLS]', "<s>", "<pad>", "</s>"],
special_tokens=["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"],
limit_alphabet=1000,
wordpieces_prefix="##"
)