Removing forgotten places.

This commit is contained in:
Nicolas Patry
2020-08-22 10:14:53 +02:00
committed by Anthony MOI
parent 857948e5b8
commit 76b86f6901
3 changed files with 1 additions and 12 deletions

View File

@@ -3,7 +3,6 @@ from .. import pre_tokenizers
PreTokenizer = pre_tokenizers.PreTokenizer PreTokenizer = pre_tokenizers.PreTokenizer
ByteLevel = pre_tokenizers.ByteLevel ByteLevel = pre_tokenizers.ByteLevel
Whitespace = pre_tokenizers.Whitespace Whitespace = pre_tokenizers.Whitespace
Deduplication = pre_tokenizers.Deduplication
Punctuation = pre_tokenizers.Punctuation Punctuation = pre_tokenizers.Punctuation
Sequence = pre_tokenizers.Sequence Sequence = pre_tokenizers.Sequence
WhitespaceSplit = pre_tokenizers.WhitespaceSplit WhitespaceSplit = pre_tokenizers.WhitespaceSplit

View File

@@ -64,7 +64,6 @@ fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<pre_tokenizers::PyBertPreTokenizer>()?; m.add_class::<pre_tokenizers::PyBertPreTokenizer>()?;
m.add_class::<pre_tokenizers::PyMetaspace>()?; m.add_class::<pre_tokenizers::PyMetaspace>()?;
m.add_class::<pre_tokenizers::PyCharDelimiterSplit>()?; m.add_class::<pre_tokenizers::PyCharDelimiterSplit>()?;
m.add_class::<pre_tokenizers::PyDeduplication>()?;
m.add_class::<pre_tokenizers::PyPunctuation>()?; m.add_class::<pre_tokenizers::PyPunctuation>()?;
m.add_class::<pre_tokenizers::PySequence>()?; m.add_class::<pre_tokenizers::PySequence>()?;
Ok(()) Ok(())

View File

@@ -9,7 +9,6 @@ from tokenizers.pre_tokenizers import (
BertPreTokenizer, BertPreTokenizer,
Metaspace, Metaspace,
CharDelimiterSplit, CharDelimiterSplit,
Deduplication,
Punctuation, Punctuation,
Sequence, Sequence,
) )
@@ -75,14 +74,6 @@ class TestCharDelimiterSplit:
assert isinstance(pickle.loads(pickle.dumps(CharDelimiterSplit("-"))), CharDelimiterSplit) assert isinstance(pickle.loads(pickle.dumps(CharDelimiterSplit("-"))), CharDelimiterSplit)
class TestDeduplication:
def test_instantiate(self):
assert Deduplication() is not None
assert isinstance(Deduplication(), PreTokenizer)
assert isinstance(Deduplication(), Deduplication)
assert isinstance(pickle.loads(pickle.dumps(Deduplication())), Deduplication)
class TestPunctuation: class TestPunctuation:
def test_instantiate(self): def test_instantiate(self):
assert Punctuation() is not None assert Punctuation() is not None
@@ -100,7 +91,7 @@ class TestSequence:
assert isinstance(pickle.loads(dumped), Sequence) assert isinstance(pickle.loads(dumped), Sequence)
def test_bert_like(self): def test_bert_like(self):
pre_tokenizer = Sequence([Deduplication(), Punctuation()]) pre_tokenizer = Sequence([WhitespaceSplit(), Punctuation()])
assert isinstance(Sequence([]), PreTokenizer) assert isinstance(Sequence([]), PreTokenizer)
assert isinstance(Sequence([]), Sequence) assert isinstance(Sequence([]), Sequence)
assert isinstance(pickle.loads(pickle.dumps(pre_tokenizer)), Sequence) assert isinstance(pickle.loads(pickle.dumps(pre_tokenizer)), Sequence)