mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-18 06:19:14 +00:00
Creating normalizers.Prepend (To be used instead of Metaspace). (#1194)
* Creating `normalizers.Prepend` (To be used instead of `Metaspace`). * Linting + stub. * Fixing pickling/unpickling by setting a default. * Black.
This commit is contained in:
@@ -4,7 +4,7 @@ import pytest
|
||||
|
||||
from tokenizers import NormalizedString, Tokenizer
|
||||
from tokenizers.models import BPE
|
||||
from tokenizers.normalizers import BertNormalizer, Lowercase, Normalizer, Sequence, Strip
|
||||
from tokenizers.normalizers import BertNormalizer, Lowercase, Normalizer, Sequence, Strip, Prepend
|
||||
|
||||
|
||||
class TestBertNormalizer:
|
||||
@@ -119,6 +119,28 @@ class TestStrip:
|
||||
assert normalizer.right == False
|
||||
|
||||
|
||||
class TestPrepend:
|
||||
def test_instantiate(self):
|
||||
assert isinstance(Prepend("▁"), Normalizer)
|
||||
assert isinstance(Prepend("▁"), Prepend)
|
||||
assert isinstance(pickle.loads(pickle.dumps(Prepend("▁"))), Prepend)
|
||||
|
||||
def test_prepend(self):
|
||||
normalizer = Prepend(prepend="▁")
|
||||
|
||||
output = normalizer.normalize_str("hello")
|
||||
assert output == "▁hello"
|
||||
|
||||
def test_can_modify(self):
|
||||
normalizer = Prepend("▁")
|
||||
|
||||
assert normalizer.prepend == "▁"
|
||||
|
||||
# Modify these
|
||||
normalizer.prepend = "-"
|
||||
assert normalizer.prepend == "-"
|
||||
|
||||
|
||||
class TestCustomNormalizer:
|
||||
class BadCustomNormalizer:
|
||||
def normalize(self, normalized, wrong):
|
||||
|
||||
Reference in New Issue
Block a user