diff --git a/bindings/python/py_src/tokenizers/models/__init__.pyi b/bindings/python/py_src/tokenizers/models/__init__.pyi index 0e0a18d2..9ed63760 100644 --- a/bindings/python/py_src/tokenizers/models/__init__.pyi +++ b/bindings/python/py_src/tokenizers/models/__init__.pyi @@ -1,4 +1,4 @@ -from .. import Encoding, Offsets +from .. import Encoding, Offsets, Token from typing import List, Optional, Union, Tuple class Model: @@ -8,6 +8,15 @@ class Model: a Model will return a instance of this class when instantiated. """ + def tokenize(self, sequence: str) -> List[Token]: + """ Tokenize the given sequence """ + pass + def token_to_id(self, token: str) -> Optional[int]: + """ Returns the id associated with the given token """ + pass + def id_to_token(self, id: int) -> Optional[str]: + """ Returns the token associated with the given id """ + pass def save(self, folder: str, name: Optional[str] = None) -> List[str]: """ Save the current model diff --git a/bindings/python/py_src/tokenizers/processors/__init__.pyi b/bindings/python/py_src/tokenizers/processors/__init__.pyi index 7e713a89..abad3544 100644 --- a/bindings/python/py_src/tokenizers/processors/__init__.pyi +++ b/bindings/python/py_src/tokenizers/processors/__init__.pyi @@ -1,3 +1,4 @@ +from .. import Encoding from typing import Tuple, Union, List class PostProcessor: @@ -14,6 +15,11 @@ class PostProcessor: :return: """ pass + def process( + self, encoding: Encoding, pair: Optional[Encoding] = None, add_special_tokens: bool = True + ) -> Encoding: + """ Post-process the given encodings, generating the final one """ + pass class BertProcessing(PostProcessor): """ BertProcessing