mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-05 20:28:22 +00:00
Python - Add bindings for NormalizedString
This commit is contained in:
@@ -14,7 +14,7 @@ PreTokenizedEncodeInput = Union[
|
||||
InputSequence = Union[TextInputSequence, PreTokenizedInputSequence]
|
||||
EncodeInput = Union[TextEncodeInput, PreTokenizedEncodeInput]
|
||||
|
||||
from .tokenizers import Tokenizer, Encoding, AddedToken
|
||||
from .tokenizers import Tokenizer, Encoding, AddedToken, Regex, NormalizedString
|
||||
from .tokenizers import decoders
|
||||
from .tokenizers import models
|
||||
from .tokenizers import normalizers
|
||||
|
||||
@@ -26,6 +26,13 @@ PreTokenizedEncodeInput = Union[
|
||||
InputSequence = Union[TextInputSequence, PreTokenizedInputSequence]
|
||||
EncodeInput = Union[TextEncodeInput, PreTokenizedEncodeInput]
|
||||
|
||||
class Regex:
|
||||
""" A Regex """
|
||||
|
||||
def __new__(pattern: str) -> Regex:
|
||||
""" Instantiate a new Regex with the given pattern """
|
||||
pass
|
||||
|
||||
class Encoding:
|
||||
""" An Encoding as returned by the Tokenizer """
|
||||
|
||||
|
||||
Reference in New Issue
Block a user