diff --git a/bindings/python/tokenizers/decoders/__init__.pyi b/bindings/python/tokenizers/decoders/__init__.pyi new file mode 100644 index 00000000..7cd64d83 --- /dev/null +++ b/bindings/python/tokenizers/decoders/__init__.pyi @@ -0,0 +1,28 @@ +from .. import decoders + +class Decoder: + """Decoder + """ + + @staticmethod + def custom(): + pass + + def decode(tokens: List[str]) -> str: + pass + +class ByteLevel: + """ByteLevel + """ + + @staticmethod + def new() -> Decoder: + pass + +class WordPiece: + """WordPiece + """ + + @staticmethod + def new() -> Decoder: + pass diff --git a/bindings/python/tokenizers/models/__init__.pyi b/bindings/python/tokenizers/models/__init__.pyi new file mode 100644 index 00000000..208415b2 --- /dev/null +++ b/bindings/python/tokenizers/models/__init__.pyi @@ -0,0 +1,33 @@ +from .. import models + +class Model: + """Model + """ + + def save(folder: str, name: str) -> List[str]: + """ save + Save the current Model in the given folder, using the given name for the various + files that will get created. + Any file with the same name that already exist in this folder will be overwritten + """ + pass + +class BPE: + """BPE + """ + + def from_files(vocab: str, merges: str) -> Model: + pass + + def empty() -> Model: + pass + +class WordPiece: + """WordPiece + """ + + def from_files(vocab: str) -> Model: + pass + + def empty() -> Model: + pass diff --git a/bindings/python/tokenizers/normalizers/__init__.pyi b/bindings/python/tokenizers/normalizers/__init__.pyi new file mode 100644 index 00000000..8667d222 --- /dev/null +++ b/bindings/python/tokenizers/normalizers/__init__.pyi @@ -0,0 +1,12 @@ +from .. import normalizers + +class Normalizer: + """Normalizer + """ + +class BertNormalizer: + """BertNormalizer + """ + + def new() -> Normalizer: + pass diff --git a/bindings/python/tokenizers/pre_tokenizers/__init__.pyi b/bindings/python/tokenizers/pre_tokenizers/__init__.pyi new file mode 100644 index 00000000..8b747a0e --- /dev/null +++ b/bindings/python/tokenizers/pre_tokenizers/__init__.pyi @@ -0,0 +1,38 @@ +from .. import pre_tokenizers + +Offsets = Tuple[int, int] + +class PreTokenizer: + """PreTokenizer + """ + + def pre_tokenize(self, sequence: str) -> List[Tuple[str, Offsets]]: + pass + +class ByteLevel: + """ByteLevel + """ + + @staticmethod + def new() -> PreTokenizer: + pass + + @staticmethod + def alphabet() -> List[str]: + pass + +class Whitespace: + """Whitespace + """ + + @staticmethod + def new() -> PreTokenizer: + pass + +class BertPreTokenizer: + """BertPreTokenizer + """ + + @staticmethod + def new() -> PreTokenizer: + pass diff --git a/bindings/python/tokenizers/processors/__init__.pyi b/bindings/python/tokenizers/processors/__init__.pyi new file mode 100644 index 00000000..7350f2f5 --- /dev/null +++ b/bindings/python/tokenizers/processors/__init__.pyi @@ -0,0 +1,13 @@ +from .. import processors + +class PostProcessor: + """PostProcessor + """ + +class BertProcessing: + """BertProcessing + """ + + @staticmethod + def new(sep: Tuple[str, int], cls: Tuple[str, int]) -> PostProcessor: + pass diff --git a/bindings/python/tokenizers/trainers/__init__.pyi b/bindings/python/tokenizers/trainers/__init__.pyi new file mode 100644 index 00000000..ffce346c --- /dev/null +++ b/bindings/python/tokenizers/trainers/__init__.pyi @@ -0,0 +1,21 @@ +from .. import trainers + +class Trainer: + """Trainer + """ + +class BpeTrainer: + """BpeTrainer + """ + + @staticmethod + def new() -> Trainer: + pass + +class WordPieceTrainer: + """WordPieceTrainer + """ + + @staticmethod + def new() -> Trainer: + pass