Creating normalizers.Prepend (To be used instead of Metaspace). (#1194)

* Creating `normalizers.Prepend` (To be used instead of `Metaspace`).

* Linting + stub.

* Fixing pickling/unpickling by setting a default.

* Black.
This commit is contained in:
Nicolas Patry
2023-03-24 00:33:31 +01:00
committed by GitHub
parent 250d46c676
commit d2c8190a0f
10 changed files with 191 additions and 5 deletions

View File

@@ -4,7 +4,7 @@ import pytest
from tokenizers import NormalizedString, Tokenizer
from tokenizers.models import BPE
from tokenizers.normalizers import BertNormalizer, Lowercase, Normalizer, Sequence, Strip
from tokenizers.normalizers import BertNormalizer, Lowercase, Normalizer, Sequence, Strip, Prepend
class TestBertNormalizer:
@@ -119,6 +119,28 @@ class TestStrip:
assert normalizer.right == False
class TestPrepend:
def test_instantiate(self):
assert isinstance(Prepend(""), Normalizer)
assert isinstance(Prepend(""), Prepend)
assert isinstance(pickle.loads(pickle.dumps(Prepend(""))), Prepend)
def test_prepend(self):
normalizer = Prepend(prepend="")
output = normalizer.normalize_str("hello")
assert output == "▁hello"
def test_can_modify(self):
normalizer = Prepend("")
assert normalizer.prepend == ""
# Modify these
normalizer.prepend = "-"
assert normalizer.prepend == "-"
class TestCustomNormalizer:
class BadCustomNormalizer:
def normalize(self, normalized, wrong):