mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-05 04:08:22 +00:00
Removing forgotten places.
This commit is contained in:
committed by
Anthony MOI
parent
857948e5b8
commit
76b86f6901
@@ -3,7 +3,6 @@ from .. import pre_tokenizers
|
|||||||
PreTokenizer = pre_tokenizers.PreTokenizer
|
PreTokenizer = pre_tokenizers.PreTokenizer
|
||||||
ByteLevel = pre_tokenizers.ByteLevel
|
ByteLevel = pre_tokenizers.ByteLevel
|
||||||
Whitespace = pre_tokenizers.Whitespace
|
Whitespace = pre_tokenizers.Whitespace
|
||||||
Deduplication = pre_tokenizers.Deduplication
|
|
||||||
Punctuation = pre_tokenizers.Punctuation
|
Punctuation = pre_tokenizers.Punctuation
|
||||||
Sequence = pre_tokenizers.Sequence
|
Sequence = pre_tokenizers.Sequence
|
||||||
WhitespaceSplit = pre_tokenizers.WhitespaceSplit
|
WhitespaceSplit = pre_tokenizers.WhitespaceSplit
|
||||||
|
|||||||
@@ -64,7 +64,6 @@ fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
|||||||
m.add_class::<pre_tokenizers::PyBertPreTokenizer>()?;
|
m.add_class::<pre_tokenizers::PyBertPreTokenizer>()?;
|
||||||
m.add_class::<pre_tokenizers::PyMetaspace>()?;
|
m.add_class::<pre_tokenizers::PyMetaspace>()?;
|
||||||
m.add_class::<pre_tokenizers::PyCharDelimiterSplit>()?;
|
m.add_class::<pre_tokenizers::PyCharDelimiterSplit>()?;
|
||||||
m.add_class::<pre_tokenizers::PyDeduplication>()?;
|
|
||||||
m.add_class::<pre_tokenizers::PyPunctuation>()?;
|
m.add_class::<pre_tokenizers::PyPunctuation>()?;
|
||||||
m.add_class::<pre_tokenizers::PySequence>()?;
|
m.add_class::<pre_tokenizers::PySequence>()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ from tokenizers.pre_tokenizers import (
|
|||||||
BertPreTokenizer,
|
BertPreTokenizer,
|
||||||
Metaspace,
|
Metaspace,
|
||||||
CharDelimiterSplit,
|
CharDelimiterSplit,
|
||||||
Deduplication,
|
|
||||||
Punctuation,
|
Punctuation,
|
||||||
Sequence,
|
Sequence,
|
||||||
)
|
)
|
||||||
@@ -75,14 +74,6 @@ class TestCharDelimiterSplit:
|
|||||||
assert isinstance(pickle.loads(pickle.dumps(CharDelimiterSplit("-"))), CharDelimiterSplit)
|
assert isinstance(pickle.loads(pickle.dumps(CharDelimiterSplit("-"))), CharDelimiterSplit)
|
||||||
|
|
||||||
|
|
||||||
class TestDeduplication:
|
|
||||||
def test_instantiate(self):
|
|
||||||
assert Deduplication() is not None
|
|
||||||
assert isinstance(Deduplication(), PreTokenizer)
|
|
||||||
assert isinstance(Deduplication(), Deduplication)
|
|
||||||
assert isinstance(pickle.loads(pickle.dumps(Deduplication())), Deduplication)
|
|
||||||
|
|
||||||
|
|
||||||
class TestPunctuation:
|
class TestPunctuation:
|
||||||
def test_instantiate(self):
|
def test_instantiate(self):
|
||||||
assert Punctuation() is not None
|
assert Punctuation() is not None
|
||||||
@@ -100,7 +91,7 @@ class TestSequence:
|
|||||||
assert isinstance(pickle.loads(dumped), Sequence)
|
assert isinstance(pickle.loads(dumped), Sequence)
|
||||||
|
|
||||||
def test_bert_like(self):
|
def test_bert_like(self):
|
||||||
pre_tokenizer = Sequence([Deduplication(), Punctuation()])
|
pre_tokenizer = Sequence([WhitespaceSplit(), Punctuation()])
|
||||||
assert isinstance(Sequence([]), PreTokenizer)
|
assert isinstance(Sequence([]), PreTokenizer)
|
||||||
assert isinstance(Sequence([]), Sequence)
|
assert isinstance(Sequence([]), Sequence)
|
||||||
assert isinstance(pickle.loads(pickle.dumps(pre_tokenizer)), Sequence)
|
assert isinstance(pickle.loads(pickle.dumps(pre_tokenizer)), Sequence)
|
||||||
|
|||||||
Reference in New Issue
Block a user