Python - Tests for parallelism with multiprocessing

Co-authored-by: Evan Pete Walsh <epwalsh10@gmail.com>
2025-12-07 21:28:19 +00:00 · 2020-06-23 11:25:39 -04:00
parent 5f760df231
commit aa3b39f692
5 changed files with 55 additions and 4 deletions
--- a/bindings/python/tests/implementations/test_bert_wordpiece.py
+++ b/bindings/python/tests/implementations/test_bert_wordpiece.py
@@ -1,4 +1,4 @@
-from ..utils import data_dir, bert_files
+from ..utils import data_dir, bert_files, multiprocessing_with_parallelism
 from tokenizers import BertWordPieceTokenizer


@@ -19,3 +19,8 @@ class TestBertWordPieceBPE:
        assert output.tokens == ["my", "name", "is", "john", "pair"]
        assert output.offsets == [(0, 2), (3, 7), (8, 10), (11, 15), (0, 4)]
        assert output.type_ids == [0, 0, 0, 0, 1]
+
+    def test_multiprocessing_with_parallelism(self, bert_files):
+        tokenizer = BertWordPieceTokenizer(bert_files["vocab"])
+        multiprocessing_with_parallelism(tokenizer, False)
+        multiprocessing_with_parallelism(tokenizer, True)
--- a/bindings/python/tests/implementations/test_byte_level_bpe.py
+++ b/bindings/python/tests/implementations/test_byte_level_bpe.py
@@ -1,4 +1,4 @@
-from ..utils import data_dir, roberta_files
+from ..utils import data_dir, roberta_files, multiprocessing_with_parallelism
 from tokenizers import ByteLevelBPETokenizer


@@ -79,3 +79,8 @@ class TestByteLevelBPE:
            "Ġlazy",
            "Ġdog",
        ]
+
+    def test_multiprocessing_with_parallelism(self, roberta_files):
+        tokenizer = ByteLevelBPETokenizer(roberta_files["vocab"], roberta_files["merges"])
+        multiprocessing_with_parallelism(tokenizer, False)
+        multiprocessing_with_parallelism(tokenizer, True)
--- a/bindings/python/tests/implementations/test_char_bpe.py
+++ b/bindings/python/tests/implementations/test_char_bpe.py
@@ -1,4 +1,4 @@
-from ..utils import data_dir, openai_files
+from ..utils import data_dir, openai_files, multiprocessing_with_parallelism
 from tokenizers import CharBPETokenizer


@@ -42,3 +42,8 @@ class TestBertWordPieceBPE:
        tokenizer = CharBPETokenizer(openai_files["vocab"], openai_files["merges"], lowercase=True)
        decoded = tokenizer.decode(tokenizer.encode("my name is john").ids)
        assert decoded == "my name is john"
+
+    def test_multiprocessing_with_parallelism(self, openai_files):
+        tokenizer = CharBPETokenizer(openai_files["vocab"], openai_files["merges"])
+        multiprocessing_with_parallelism(tokenizer, False)
+        multiprocessing_with_parallelism(tokenizer, True)