Adding Replace to decoder (to undo the Replace Normalizer for (#1195)

Metaspace split).
This commit is contained in:
Nicolas Patry
2023-03-23 23:43:47 +01:00
committed by GitHub
parent 178e294a6a
commit 250d46c676
10 changed files with 135 additions and 1 deletions

View File

@ -3,7 +3,17 @@ import pickle
import pytest
from tokenizers.decoders import CTC, BPEDecoder, ByteLevel, Decoder, Metaspace, Sequence, WordPiece, ByteFallback
from tokenizers.decoders import (
CTC,
BPEDecoder,
ByteLevel,
Decoder,
Metaspace,
Sequence,
WordPiece,
ByteFallback,
Replace,
)
class TestByteLevel:
@ -24,6 +34,18 @@ class TestByteLevel:
assert isinstance(reloaded, ByteLevel)
class TestReplace:
def test_instantiate(self):
assert Replace("_", " ") is not None
assert isinstance(Replace("_", " "), Decoder)
assert isinstance(Replace("_", " "), Replace)
# assert isinstance(pickle.loads(pickle.dumps(Replace("_", " "))), Replace)
def test_decoding(self):
decoder = Replace("_", " ")
assert decoder.decode(["My", "_name", "_is", "_John"]) == "My name is John"
class TestWordPiece:
def test_instantiate(self):
assert WordPiece() is not None