mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Python - Test CharBPETokenizer
This commit is contained in:
@ -44,3 +44,15 @@ def bert_files(data_dir):
|
||||
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def openai_files(data_dir):
|
||||
return {
|
||||
"vocab": download(
|
||||
"https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json"
|
||||
),
|
||||
"merges": download(
|
||||
"https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-merges.txt"
|
||||
),
|
||||
}
|
||||
|
Reference in New Issue
Block a user