mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-24 00:59:19 +00:00
Doc - Improve snippets testing
This commit is contained in:
19
bindings/python/tests/documentation/test_load.py
Normal file
19
bindings/python/tests/documentation/test_load.py
Normal file
@ -0,0 +1,19 @@
|
||||
from tokenizers import Tokenizer
|
||||
|
||||
|
||||
def test_load_tokenizer():
|
||||
# START load_tokenizer
|
||||
tokenizer = Tokenizer.from_file("data/roberta.json")
|
||||
# END load_tokenizer
|
||||
|
||||
example = "This is an example"
|
||||
ids = [713, 16, 41, 1246]
|
||||
tokens = ["This", "Ġis", "Ġan", "Ġexample"]
|
||||
|
||||
encodings = tokenizer.encode(example)
|
||||
|
||||
assert encodings.ids == ids
|
||||
assert encodings.tokens == tokens
|
||||
|
||||
decoded = tokenizer.decode(ids)
|
||||
assert decoded == example
|
Reference in New Issue
Block a user