Files
tokenizers/bindings/python/tests/documentation/test_load.py
2020-11-02 17:07:27 -05:00

20 lines
469 B
Python

from tokenizers import Tokenizer
def test_load_tokenizer():
# START load_tokenizer
tokenizer = Tokenizer.from_file("data/roberta.json")
# END load_tokenizer
example = "This is an example"
ids = [713, 16, 41, 1246]
tokens = ["This", "Ġis", "Ġan", "Ġexample"]
encodings = tokenizer.encode(example)
assert encodings.ids == ids
assert encodings.tokens == tokens
decoded = tokenizer.decode(ids)
assert decoded == example