diff --git a/bindings/python/py_src/tokenizers/__init__.py b/bindings/python/py_src/tokenizers/__init__.py
index c3a37475..d43e3a35 100644
--- a/bindings/python/py_src/tokenizers/__init__.py
+++ b/bindings/python/py_src/tokenizers/__init__.py
@@ -9,7 +9,8 @@ TextInputSequence = str
PreTokenizedInputSequence = Union[List[str], Tuple[str]]
TextEncodeInput = Union[TextInputSequence, Tuple[TextInputSequence, TextInputSequence]]
PreTokenizedEncodeInput = Union[
- PreTokenizedInputSequence, Tuple[PreTokenizedInputSequence, PreTokenizedInputSequence],
+ PreTokenizedInputSequence,
+ Tuple[PreTokenizedInputSequence, PreTokenizedInputSequence],
]
InputSequence = Union[TextInputSequence, PreTokenizedInputSequence]
diff --git a/bindings/python/py_src/tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/__init__.pyi
index 67173b5e..7a086bda 100644
--- a/bindings/python/py_src/tokenizers/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/__init__.pyi
@@ -21,7 +21,8 @@ TextInputSequence = str
PreTokenizedInputSequence = Union[List[str], Tuple[str]]
TextEncodeInput = Union[TextInputSequence, Tuple[TextInputSequence, TextInputSequence]]
PreTokenizedEncodeInput = Union[
- PreTokenizedInputSequence, Tuple[PreTokenizedInputSequence, PreTokenizedInputSequence],
+ PreTokenizedInputSequence,
+ Tuple[PreTokenizedInputSequence, PreTokenizedInputSequence],
]
InputSequence = Union[TextInputSequence, PreTokenizedInputSequence]
@@ -827,7 +828,10 @@ class Tokenizer:
"""
pass
def post_process(
- self, encoding: Encoding, pair: Optional[Encoding] = None, add_special_tokens: bool = True,
+ self,
+ encoding: Encoding,
+ pair: Optional[Encoding] = None,
+ add_special_tokens: bool = True,
) -> Encoding:
"""Apply all the post-processing steps to the given encodings.
diff --git a/bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py b/bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py
index 1cbc9c3a..81f35e22 100644
--- a/bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py
+++ b/bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py
@@ -21,7 +21,10 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
"""
def __init__(
- self, vocab: Optional[str] = None, replacement: str = "▁", add_prefix_space: bool = True,
+ self,
+ vocab: Optional[str] = None,
+ replacement: str = "▁",
+ add_prefix_space: bool = True,
):
if vocab is not None:
# Let Unigram(..) fail if only one of them is None
@@ -29,7 +32,12 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
else:
tokenizer = Tokenizer(Unigram())
- tokenizer.normalizer = normalizers.Sequence([normalizers.Nmt(), normalizers.NFKC(),])
+ tokenizer.normalizer = normalizers.Sequence(
+ [
+ normalizers.Nmt(),
+ normalizers.NFKC(),
+ ]
+ )
tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
[
pre_tokenizers.WhitespaceSplit(),
@@ -60,7 +68,9 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
""" Train the model using the given files """
trainer = trainers.UnigramTrainer(
- vocab_size=vocab_size, special_tokens=special_tokens, show_progress=show_progress,
+ vocab_size=vocab_size,
+ special_tokens=special_tokens,
+ show_progress=show_progress,
)
if isinstance(files, str):
diff --git a/bindings/python/tests/bindings/test_models.py b/bindings/python/tests/bindings/test_models.py
index 7dfef0b6..45f68bcd 100644
--- a/bindings/python/tests/bindings/test_models.py
+++ b/bindings/python/tests/bindings/test_models.py
@@ -18,7 +18,10 @@ class TestBPE:
BPE(vocab=vocab)
BPE(merges=merges)
- assert isinstance(pickle.loads(pickle.dumps(BPE(vocab, merges))), BPE,)
+ assert isinstance(
+ pickle.loads(pickle.dumps(BPE(vocab, merges))),
+ BPE,
+ )
# Deprecated calls in 0.9
with pytest.deprecated_call():
diff --git a/bindings/python/tests/bindings/test_processors.py b/bindings/python/tests/bindings/test_processors.py
index 30c9ec5b..12abc156 100644
--- a/bindings/python/tests/bindings/test_processors.py
+++ b/bindings/python/tests/bindings/test_processors.py
@@ -22,7 +22,8 @@ class TestBertProcessing:
assert isinstance(processor, PostProcessor)
assert isinstance(processor, BertProcessing)
assert isinstance(
- pickle.loads(pickle.dumps(BertProcessing(("[SEP]", 0), ("[CLS]", 1)))), BertProcessing,
+ pickle.loads(pickle.dumps(BertProcessing(("[SEP]", 0), ("[CLS]", 1)))),
+ BertProcessing,
)
def test_processing(self):
@@ -94,7 +95,9 @@ class TestTemplateProcessing:
def get_roberta(self):
return TemplateProcessing(
- seq_a=" $0 ", seq_b=" $0 ", special_tokens=[("", 0), ("", 1)],
+ seq_a=" $0 ",
+ seq_b=" $0 ",
+ special_tokens=[("", 0), ("", 1)],
)
def get_t5_squad(self):