Fixing the benchmark. (#1583)

2025-08-23 00:35:35 +00:00 · 2024-08-01 10:36:53 +02:00
parent 35f338a7b8
commit c6f2c0b057
1 changed files with 29 additions and 21 deletions
--- a/bindings/python/benches/test_tiktoken.py
+++ b/bindings/python/benches/test_tiktoken.py
@ -7,6 +7,7 @@ import tiktoken
 from tokenizers import Tokenizer
 from huggingface_hub import hf_hub_download
 from typing import Tuple, List
 from multiprocessing import Process
 MODEL_ID = "meta-llama/Meta-Llama-3.1-8B"
 DATASET = "facebook/xnli"
@ -24,8 +25,8 @@ def format_byte_size(num_bytes: int) -> Tuple[str, str]:
    return f"{num_bytes_f:.2f} PB", "PB"
-def benchmark_batch(model: str, documents: list[str]) -> None:
+def benchmark_batch(model: str, documents: list[str], num_threads: int) -> None:
-    num_threads = int(os.environ["RAYON_NUM_THREADS"])
+    os.environ["RAYON_NUM_THREADS"] = str(num_threads)
    num_bytes = sum(map(len, map(str.encode, documents)))
    readable_size, unit = format_byte_size(num_bytes)
    print(f"==============")
@ -84,18 +85,25 @@ def test(model: str, dataset: str, dataset_config: str, threads: List[int]):
    input_lengths = [(10, False), (10_000, False), (10_000, True)]  # Example input lengths
    for num_threads in threads:
        os.environ["RAYON_NUM_THREADS"] = str(num_threads)
        os.environ["TOKENIZER_PARALLELISM"] = str(num_threads)
        os.environ["RAYON_RS_NUM_THREADS"] = str(num_threads)
        for length, fuse in input_lengths:
            documents = []
            for i, item in enumerate(dataset_xnli["train"]):
                documents.append("".join(item["premise"].values()))
                if i >= length:
                    break
                documents.append("".join(item["premise"].values()))
            if fuse:
                documents=["".join(documents)]
-            benchmark_batch(model, documents)
+
            # Rayon thread pool is global to a process, we need to launch
            # separate processes in order to accurately use the correct number of threads.
            # Otherwise, we're simply running tokenizers in whatever tests comes first.
            # tokenizers does NOT provide a method to change the number of threads during
            # runtime.
            p = Process(target=benchmark_batch, args=(model, documents, num_threads))
            p.start()
            p.join()
            # benchmark_batch(model, documents, num_threads)
 def main():