From 26fdfc2bc36571b9508438f17a01cf763cbf0be2 Mon Sep 17 00:00:00 2001 From: Arthur Zucker Date: Tue, 5 Sep 2023 16:42:45 +0000 Subject: [PATCH] style --- bindings/python/tests/bindings/test_tokenizer.py | 10 ++++++++-- bindings/python/tests/bindings/test_trainers.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/bindings/python/tests/bindings/test_tokenizer.py b/bindings/python/tests/bindings/test_tokenizer.py index 5c002046..a1e41c28 100644 --- a/bindings/python/tests/bindings/test_tokenizer.py +++ b/bindings/python/tests/bindings/test_tokenizer.py @@ -373,10 +373,16 @@ class TestTokenizer: # Can retrieve vocab without added tokens vocab = tokenizer.get_vocab(with_added_tokens=False) assert vocab == {} - + # Can retrieve added token decoder vocab = tokenizer.get_added_tokens_decoder() - assert vocab == {0: AddedToken("my", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),1: AddedToken("name", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),2: AddedToken("is", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),3: AddedToken("john", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),4: AddedToken("pair", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False)} + assert vocab == { + 0: AddedToken("my", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False), + 1: AddedToken("name", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False), + 2: AddedToken("is", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False), + 3: AddedToken("john", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False), + 4: AddedToken("pair", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False), + } def test_get_vocab_size(self): tokenizer = Tokenizer(BPE()) diff --git a/bindings/python/tests/bindings/test_trainers.py b/bindings/python/tests/bindings/test_trainers.py index 48b69c26..87021533 100644 --- a/bindings/python/tests/bindings/test_trainers.py +++ b/bindings/python/tests/bindings/test_trainers.py @@ -34,8 +34,8 @@ class TestBpeTrainer: assert trainer.min_frequency == 12 assert trainer.show_progress == False assert trainer.special_tokens == [ - AddedToken("1", special = True), - AddedToken("2", special = True), + AddedToken("1", special=True), + AddedToken("2", special=True), ] assert trainer.limit_alphabet == 13 assert sorted(trainer.initial_alphabet) == ["a", "b", "c"] @@ -91,8 +91,8 @@ class TestWordPieceTrainer: assert trainer.min_frequency == 12 assert trainer.show_progress == False assert trainer.special_tokens == [ - AddedToken("1", special = True), - AddedToken("2", special = True), + AddedToken("1", special=True), + AddedToken("2", special=True), ] assert trainer.limit_alphabet == 13 assert sorted(trainer.initial_alphabet) == ["a", "b", "c"] @@ -131,8 +131,8 @@ class TestWordLevelTrainer: assert trainer.min_frequency == 12 assert trainer.show_progress == False assert trainer.special_tokens == [ - AddedToken("1", special = True), - AddedToken("2", special = True), + AddedToken("1", special=True), + AddedToken("2", special=True), ] # Modify these @@ -272,8 +272,8 @@ class TestUnigram: assert trainer.vocab_size == 12345 assert trainer.show_progress == False assert trainer.special_tokens == [ - AddedToken("1", normalized=False, special = True), - AddedToken("2", lstrip=True, normalized=False, special = True), + AddedToken("1", normalized=False, special=True), + AddedToken("2", lstrip=True, normalized=False, special=True), ] assert sorted(trainer.initial_alphabet) == ["a", "b", "c"]