mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Removing --release
compat test.
- Leaving the one that checks that sampling follows the expected distribution. - Marking the python Unigram.train(..) test as slow - The python Unigram.train(..) test now uses `big.txt` file.
This commit is contained in:
committed by
Anthony MOI
parent
d0366529b7
commit
816632c9fa
@ -62,12 +62,7 @@ def openai_files(data_dir):
|
||||
@pytest.fixture(scope="session")
|
||||
def train_files(data_dir):
|
||||
return {
|
||||
"wagahaiwa": download(
|
||||
"https://storage.googleapis.com/tokenizers/unigram_wagahaiwa_nekodearu.txt"
|
||||
),
|
||||
"simple": download(
|
||||
"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt"
|
||||
),
|
||||
"big": download("https://norvig.com/big.txt"),
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user