mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Doc - Quicktour uses python tested code
This commit is contained in:
@ -6,8 +6,8 @@ import pytest
|
||||
DATA_PATH = os.path.join("tests", "data")
|
||||
|
||||
|
||||
def download(url):
|
||||
filename = url.rsplit("/")[-1]
|
||||
def download(url, with_filename=None):
|
||||
filename = with_filename if with_filename is not None else url.rsplit("/")[-1]
|
||||
filepath = os.path.join(DATA_PATH, filename)
|
||||
if not os.path.exists(filepath):
|
||||
with open(filepath, "wb") as f:
|
||||
@ -82,6 +82,14 @@ def albert_base(data_dir):
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def doc_wiki_tokenizer(data_dir):
|
||||
return download(
|
||||
"https://s3.amazonaws.com/models.huggingface.co/bert/anthony/doc-quicktour/tokenizer.json",
|
||||
"tokenizer-wiki.json",
|
||||
)
|
||||
|
||||
|
||||
def multiprocessing_with_parallelism(tokenizer, enabled: bool):
|
||||
"""
|
||||
This helper can be used to test that disabling parallelism avoids dead locks when the
|
||||
|
Reference in New Issue
Block a user