mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-27 10:39:47 +00:00
Python - Test Models
This commit is contained in:
@ -288,4 +288,11 @@ impl WordLevel {
|
|||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[staticmethod]
|
||||||
|
fn empty() -> Model {
|
||||||
|
Model {
|
||||||
|
model: Container::Owned(Box::new(tk::models::wordlevel::WordLevel::default())),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
23
bindings/python/tests/bindings/test_models.py
Normal file
23
bindings/python/tests/bindings/test_models.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
from ..utils import data_dir, roberta_files, bert_files
|
||||||
|
|
||||||
|
from tokenizers.models import Model, BPE, WordPiece, WordLevel
|
||||||
|
|
||||||
|
|
||||||
|
class TestBPE:
|
||||||
|
def test_instantiate(self, roberta_files):
|
||||||
|
assert isinstance(BPE.empty(), Model)
|
||||||
|
assert isinstance(BPE.from_files(roberta_files["vocab"], roberta_files["merges"]), Model)
|
||||||
|
|
||||||
|
|
||||||
|
class TestWordPiece:
|
||||||
|
def test_instantiate(self, bert_files):
|
||||||
|
assert isinstance(WordPiece.empty(), Model)
|
||||||
|
assert isinstance(WordPiece.from_files(bert_files["vocab"]), Model)
|
||||||
|
|
||||||
|
|
||||||
|
class TestWordLevel:
|
||||||
|
def test_instantiate(self, roberta_files):
|
||||||
|
assert isinstance(WordLevel.empty(), Model)
|
||||||
|
# The WordLevel model expects a vocab.json using the same format as roberta
|
||||||
|
# so we can just try to load with this file
|
||||||
|
assert isinstance(WordLevel.from_files(roberta_files["vocab"]), Model)
|
@ -35,3 +35,12 @@ def roberta_files(data_dir):
|
|||||||
"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt"
|
"https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt"
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def bert_files(data_dir):
|
||||||
|
return {
|
||||||
|
"vocab": download(
|
||||||
|
"https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
@ -158,3 +158,7 @@ class WordLevel(Model):
|
|||||||
The unknown token to be used by the model.
|
The unknown token to be used by the model.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
@staticmethod
|
||||||
|
def empty() -> Model:
|
||||||
|
""" Instantiate an empty WordLevel Model. """
|
||||||
|
pass
|
||||||
|
Reference in New Issue
Block a user