mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
style
This commit is contained in:
@ -376,7 +376,13 @@ class TestTokenizer:
|
||||
|
||||
# Can retrieve added token decoder
|
||||
vocab = tokenizer.get_added_tokens_decoder()
|
||||
assert vocab == {0: AddedToken("my", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),1: AddedToken("name", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),2: AddedToken("is", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),3: AddedToken("john", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),4: AddedToken("pair", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False)}
|
||||
assert vocab == {
|
||||
0: AddedToken("my", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
|
||||
1: AddedToken("name", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
|
||||
2: AddedToken("is", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
|
||||
3: AddedToken("john", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
|
||||
4: AddedToken("pair", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
|
||||
}
|
||||
|
||||
def test_get_vocab_size(self):
|
||||
tokenizer = Tokenizer(BPE())
|
||||
|
Reference in New Issue
Block a user