mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-07 21:28:19 +00:00
Attempt to get some documentation going.
This commit is contained in:
committed by
Anthony MOI
parent
4929809af0
commit
655809c718
@@ -9,6 +9,8 @@ class TestByteLevelBPE:
|
||||
tokenizer = ByteLevelBPETokenizer.from_file(roberta_files["vocab"], roberta_files["merges"])
|
||||
output = tokenizer.encode("The quick brown fox jumps over the lazy dog")
|
||||
|
||||
tokenizer.save("roberta.json")
|
||||
|
||||
assert output.ids == [133, 2119, 6219, 23602, 13855, 81, 5, 22414, 2335]
|
||||
assert output.tokens == [
|
||||
"The",
|
||||
|
||||
Reference in New Issue
Block a user