mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-05 04:08:22 +00:00
Removing forgotten places.
This commit is contained in:
committed by
Anthony MOI
parent
857948e5b8
commit
76b86f6901
@@ -3,7 +3,6 @@ from .. import pre_tokenizers
|
||||
PreTokenizer = pre_tokenizers.PreTokenizer
|
||||
ByteLevel = pre_tokenizers.ByteLevel
|
||||
Whitespace = pre_tokenizers.Whitespace
|
||||
Deduplication = pre_tokenizers.Deduplication
|
||||
Punctuation = pre_tokenizers.Punctuation
|
||||
Sequence = pre_tokenizers.Sequence
|
||||
WhitespaceSplit = pre_tokenizers.WhitespaceSplit
|
||||
|
||||
@@ -64,7 +64,6 @@ fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_class::<pre_tokenizers::PyBertPreTokenizer>()?;
|
||||
m.add_class::<pre_tokenizers::PyMetaspace>()?;
|
||||
m.add_class::<pre_tokenizers::PyCharDelimiterSplit>()?;
|
||||
m.add_class::<pre_tokenizers::PyDeduplication>()?;
|
||||
m.add_class::<pre_tokenizers::PyPunctuation>()?;
|
||||
m.add_class::<pre_tokenizers::PySequence>()?;
|
||||
Ok(())
|
||||
|
||||
@@ -9,7 +9,6 @@ from tokenizers.pre_tokenizers import (
|
||||
BertPreTokenizer,
|
||||
Metaspace,
|
||||
CharDelimiterSplit,
|
||||
Deduplication,
|
||||
Punctuation,
|
||||
Sequence,
|
||||
)
|
||||
@@ -75,14 +74,6 @@ class TestCharDelimiterSplit:
|
||||
assert isinstance(pickle.loads(pickle.dumps(CharDelimiterSplit("-"))), CharDelimiterSplit)
|
||||
|
||||
|
||||
class TestDeduplication:
|
||||
def test_instantiate(self):
|
||||
assert Deduplication() is not None
|
||||
assert isinstance(Deduplication(), PreTokenizer)
|
||||
assert isinstance(Deduplication(), Deduplication)
|
||||
assert isinstance(pickle.loads(pickle.dumps(Deduplication())), Deduplication)
|
||||
|
||||
|
||||
class TestPunctuation:
|
||||
def test_instantiate(self):
|
||||
assert Punctuation() is not None
|
||||
@@ -100,7 +91,7 @@ class TestSequence:
|
||||
assert isinstance(pickle.loads(dumped), Sequence)
|
||||
|
||||
def test_bert_like(self):
|
||||
pre_tokenizer = Sequence([Deduplication(), Punctuation()])
|
||||
pre_tokenizer = Sequence([WhitespaceSplit(), Punctuation()])
|
||||
assert isinstance(Sequence([]), PreTokenizer)
|
||||
assert isinstance(Sequence([]), Sequence)
|
||||
assert isinstance(pickle.loads(pickle.dumps(pre_tokenizer)), Sequence)
|
||||
|
||||
Reference in New Issue
Block a user