Attempt to get some documentation going.

This commit is contained in:
Nicolas Patry
2020-09-25 21:43:02 +02:00
committed by Anthony MOI
parent 4929809af0
commit 655809c718
15 changed files with 270 additions and 16 deletions

View File

@@ -9,6 +9,8 @@ class TestByteLevelBPE:
tokenizer = ByteLevelBPETokenizer.from_file(roberta_files["vocab"], roberta_files["merges"])
output = tokenizer.encode("The quick brown fox jumps over the lazy dog")
tokenizer.save("roberta.json")
assert output.ids == [133, 2119, 6219, 23602, 13855, 81, 5, 22414, 2335]
assert output.tokens == [
"The",