diff --git a/bindings/python/tokenizers/models/__init__.pyi b/bindings/python/tokenizers/models/__init__.pyi index 7cd4340d..f6a86e81 100644 --- a/bindings/python/tokenizers/models/__init__.pyi +++ b/bindings/python/tokenizers/models/__init__.pyi @@ -72,61 +72,55 @@ class Model: pass class BPE(Model): - """ BytePairEncoding model class """ + """BytePairEncoding model class + + Instantiate a BPE Model from the given vocab and merges files. + + Args: + vocab: ('`optional`) string: + Path to a vocabulary JSON file. + + merges: (`optional`) string: + Path to a merge file. + + cache_capacity: (`optional`) int: + The number of words that the BPE cache can contain. The cache allows + to speed-up the process by keeping the result of the merge operations + for a number of words. + + dropout: (`optional`) Optional[float] [0, 1]: + The BPE dropout to use. Must be an float between 0 and 1 + + unk_token: (`optional`) str: + The unknown token to be used by the model. + + continuing_subword_prefix: (`optional`) str: + The prefix to attach to subword units that don't represent a beginning of word. + + end_of_word_suffix: (`optional`) str: + The suffix to attach to subword units that represent an end of word. + """ @staticmethod - def from_files( - vocab: str, - merges: str, + def __init__( + self, + vocab: Optional[str], + merges: Optional[str], cache_capacity: Optional[int], dropout: Optional[float], unk_token: Optional[str], continuing_subword_prefix: Optional[str], end_of_word_suffix: Optional[str], - ) -> Model: - """ Instantiate a BPE Model from the given vocab and merges files. - - Args: - vocab: string: - Path to a vocabulary JSON file. - - merges: string: - Path to a merge file. - - cache_capacity: (`optional`) int: - The number of words that the BPE cache can contain. The cache allows - to speed-up the process by keeping the result of the merge operations - for a number of words. - - dropout: (`optional`) Optional[float] [0, 1]: - The BPE dropout to use. Must be an float between 0 and 1 - - unk_token: (`optional`) str: - The unknown token to be used by the model. - - continuing_subword_prefix: (`optional`) str: - The prefix to attach to subword units that don't represent a beginning of word. - - end_of_word_suffix: (`optional`) str: - The suffix to attach to subword units that represent an end of word. - """ - pass - @staticmethod - def empty() -> Model: - """ Instantiate an empty BPE Model. """ + ): pass class WordPiece(Model): - """ WordPiece model class """ + """ WordPiece model class - @staticmethod - def from_files( - vocab: str, unk_token: Optional[str], max_input_chars_per_word: Optional[int] - ) -> Model: - """ Instantiate a WordPiece Model from the given vocab file. + Instantiate a WordPiece Model from the given vocab file. Args: - vocab: string: + vocab: (`optional`) string: Path to a vocabulary file. unk_token: (`optional`) str: @@ -134,31 +128,29 @@ class WordPiece(Model): max_input_chars_per_word: (`optional`) int: The maximum number of characters to authorize in a single word. - """ - pass - @staticmethod - def empty() -> Model: - """ Instantiate an empty WordPiece Model. """ + """ + + def __init__( + self, + vocab: Optional[str], + unk_token: Optional[str], + max_input_chars_per_word: Optional[int], + ): pass class WordLevel(Model): """ Most simple tokenizer model based on mapping token from a vocab file to their corresponding id. - """ - @staticmethod - def from_files(vocab: str, unk_token: str) -> Model: - """ Instantiate a WordLevel Model from the given vocab file. + Instantiate a WordLevel Model from the given vocab file. Args: - vocab: string: + vocab: (`optional`) string: Path to a vocabulary file. unk_token: str: The unknown token to be used by the model. - """ - pass - @staticmethod - def empty() -> Model: - """ Instantiate an empty WordLevel Model. """ + """ + + def __init__(self, vocab: Optional[str], unk_token: Optional[str]): pass