mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Use a smaller train file.
This commit is contained in:
committed by
Anthony MOI
parent
7acbb1122e
commit
d0366529b7
@ -65,6 +65,9 @@ def train_files(data_dir):
|
||||
"wagahaiwa": download(
|
||||
"https://storage.googleapis.com/tokenizers/unigram_wagahaiwa_nekodearu.txt"
|
||||
),
|
||||
"simple": download(
|
||||
"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user