Adding a new normalizer that strips accents by removing combining (#416)

* Adding a new normalizer that strips accents by removing combining

characters in unicode strings.

* Adding Node bindings

+ better normalizer impl.

* Doc comment -> Regular comment.
This commit is contained in:
Nicolas Patry
2020-09-17 09:49:41 +02:00
committed by GitHub
parent 330876ae02
commit 75464734df
10 changed files with 130 additions and 4 deletions

View File

@@ -9,6 +9,7 @@ NFKC = normalizers.NFKC
Sequence = normalizers.Sequence
Lowercase = normalizers.Lowercase
Strip = normalizers.Strip
StripAccents = normalizers.StripAccents
Nmt = normalizers.Nmt
Precompiled = normalizers.Precompiled

View File

@@ -99,6 +99,12 @@ class Strip(Normalizer):
def __init__(self, left: bool = True, right: bool = True) -> Normalizer:
pass
class StripAccents(Normalizer):
""" StripAccents normalizer """
def __init__(self) -> Normalizer:
pass
class Nmt(Normalizer):
""" Nmt normalizer """