mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Python - Add some tests utils
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -8,6 +8,7 @@ Cargo.lock
|
|||||||
|
|
||||||
/data
|
/data
|
||||||
tokenizers/data
|
tokenizers/data
|
||||||
|
bindings/python/tests/data
|
||||||
/docs
|
/docs
|
||||||
|
|
||||||
__pycache__
|
__pycache__
|
||||||
|
0
bindings/python/tests/__init__.py
Normal file
0
bindings/python/tests/__init__.py
Normal file
0
bindings/python/tests/bindings/__init__.py
Normal file
0
bindings/python/tests/bindings/__init__.py
Normal file
37
bindings/python/tests/utils.py
Normal file
37
bindings/python/tests/utils.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
DATA_PATH = os.path.join("tests", "data")
|
||||||
|
|
||||||
|
|
||||||
|
def download(url):
|
||||||
|
filename = url.rsplit("/")[-1]
|
||||||
|
filepath = os.path.join(DATA_PATH, filename)
|
||||||
|
if not os.path.exists(filepath):
|
||||||
|
with open(filepath, "wb") as f:
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
for chunk in response.iter_content(1024):
|
||||||
|
f.write(chunk)
|
||||||
|
return filepath
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def data_dir():
|
||||||
|
assert os.getcwd().endswith("python")
|
||||||
|
exist = os.path.exists(DATA_PATH) and os.path.isdir(DATA_PATH)
|
||||||
|
if not exist:
|
||||||
|
os.mkdir(DATA_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def roberta_files(data_dir):
|
||||||
|
return {
|
||||||
|
"vocab": download(
|
||||||
|
"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json"
|
||||||
|
),
|
||||||
|
"merges": download(
|
||||||
|
"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt"
|
||||||
|
),
|
||||||
|
}
|
Reference in New Issue
Block a user