Adding a new "Replace" normalizer that takes a string and replaces it

with another String (for now).
This commit is contained in:
Nicolas Patry
2020-09-16 11:13:37 +02:00
parent 1a4a4649c3
commit 792d618006
6 changed files with 75 additions and 1 deletions

View File

@@ -12,6 +12,7 @@ Strip = normalizers.Strip
StripAccents = normalizers.StripAccents
Nmt = normalizers.Nmt
Precompiled = normalizers.Precompiled
Replace = normalizers.Replace
NORMALIZERS = {"nfc": NFC, "nfd": NFD, "nfkc": NFKC, "nfkd": NFKD}

View File

@@ -112,11 +112,17 @@ class Nmt(Normalizer):
pass
class Precompiled(Normalizer):
""" SpmNmtNfkc normalizer """
""" Precompiled normalizer """
def __init__(self, precompiled_charsmap: bytes) -> Normalizer:
pass
class Replace(Normalizer):
""" Replace normalizer """
def __init__(self, pattern: str, content: str) -> Normalizer:
pass
def unicode_normalizer_from_str(normalizer: str) -> Normalizer:
"""
Instanciate unicode normalizer from the normalizer name