Doc - Improve snippets testing

This commit is contained in:
Anthony MOI
2020-10-02 15:52:33 -04:00
committed by Anthony MOI
parent f4e7754112
commit 000c19a7a5
12 changed files with 84 additions and 68 deletions

View File

@ -0,0 +1,19 @@
from tokenizers import Tokenizer
def test_load_tokenizer():
# START load_tokenizer
tokenizer = Tokenizer.from_file("data/roberta.json")
# END load_tokenizer
example = "This is an example"
ids = [713, 16, 41, 1246]
tokens = ["This", "Ġis", "Ġan", "Ġexample"]
encodings = tokenizer.encode(example)
assert encodings.ids == ids
assert encodings.tokens == tokens
decoded = tokenizer.decode(ids)
assert decoded == example