Adressing first pass of comments.

This commit is contained in:
Nicolas Patry
2020-09-23 11:29:17 +02:00
parent 1cd4824273
commit 8f8156fd2c
10 changed files with 196 additions and 125 deletions

View File

@@ -67,7 +67,10 @@ class TestByteLevelBPE:
def test_lowerspace(self, roberta_files):
tokenizer = ByteLevelBPETokenizer.from_files(
roberta_files["vocab"], roberta_files["merges"], add_prefix_space=True, lowercase=True,
roberta_files["vocab"],
roberta_files["merges"],
add_prefix_space=True,
lowercase=True,
)
output = tokenizer.encode("The Quick Brown Fox Jumps Over The Lazy Dog")