diff --git a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.py b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.py index a5112416..7b4cfcdb 100644 --- a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.py +++ b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.py @@ -3,7 +3,6 @@ from .. import pre_tokenizers PreTokenizer = pre_tokenizers.PreTokenizer ByteLevel = pre_tokenizers.ByteLevel Whitespace = pre_tokenizers.Whitespace -Deduplication = pre_tokenizers.Deduplication Punctuation = pre_tokenizers.Punctuation Sequence = pre_tokenizers.Sequence WhitespaceSplit = pre_tokenizers.WhitespaceSplit diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index bdb35970..9acc0115 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -64,7 +64,6 @@ fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; Ok(()) diff --git a/bindings/python/tests/bindings/test_pre_tokenizers.py b/bindings/python/tests/bindings/test_pre_tokenizers.py index 57de5e4b..543ef68f 100644 --- a/bindings/python/tests/bindings/test_pre_tokenizers.py +++ b/bindings/python/tests/bindings/test_pre_tokenizers.py @@ -9,7 +9,6 @@ from tokenizers.pre_tokenizers import ( BertPreTokenizer, Metaspace, CharDelimiterSplit, - Deduplication, Punctuation, Sequence, ) @@ -75,14 +74,6 @@ class TestCharDelimiterSplit: assert isinstance(pickle.loads(pickle.dumps(CharDelimiterSplit("-"))), CharDelimiterSplit) -class TestDeduplication: - def test_instantiate(self): - assert Deduplication() is not None - assert isinstance(Deduplication(), PreTokenizer) - assert isinstance(Deduplication(), Deduplication) - assert isinstance(pickle.loads(pickle.dumps(Deduplication())), Deduplication) - - class TestPunctuation: def test_instantiate(self): assert Punctuation() is not None @@ -100,7 +91,7 @@ class TestSequence: assert isinstance(pickle.loads(dumped), Sequence) def test_bert_like(self): - pre_tokenizer = Sequence([Deduplication(), Punctuation()]) + pre_tokenizer = Sequence([WhitespaceSplit(), Punctuation()]) assert isinstance(Sequence([]), PreTokenizer) assert isinstance(Sequence([]), Sequence) assert isinstance(pickle.loads(pickle.dumps(pre_tokenizer)), Sequence)