Python - Improved stub file for models

2025-12-17 01:28:46 +00:00 · 2020-01-06 15:55:00 -05:00
parent 0e41e0b327
commit 1a083a6e6f
1 changed files with 64 additions and 12 deletions
--- a/bindings/python/tokenizers/models/init.pyi
+++ b/bindings/python/tokenizers/models/init.pyi
@@ -1,33 +1,85 @@
-from .. import models
+from typing import List, Optional
 class Model:
-    """Model
+    """ Base class for all models
    This class is not supposed to be instantiated directly. Instead, any implementation of
    a Model will return a instance of this class when instantiated.
    """
    def save(folder: str, name: str) -> List[str]:
-        """ save
+        """ Save the current model
-        Save the current Model in the given folder, using the given name for the various
+
        Save the current model in the given folder, using the given name for the various
        files that will get created.
-        Any file with the same name that already exist in this folder will be overwritten
+        Any file with the same name that already exist in this folder will be overwritten.
        """
        pass
 class BPE:
    """BPE
    """
-    def from_files(vocab: str, merges: str) -> Model:
+class BPE:
    """ BytePairEncoding model class """
    def from_files(vocab: str,
                   merges: str,
                   cache_capacity: Optional[int],
                   dropout: Optional[float],
                   unk_token: Optional[int],
                   continuing_subword_prefix: Optional[str],
                   end_of_word_suffix: Optional[str]) -> Model:
        """ Instantiate a BPE Model from the given vocab and merges files.
        Args:
            vocab: string:
                Path to a vocabulary JSON file.
            merges: string:
                Path to a merge file.
            cache_capacity: (`optional`) int:
                The number of words that the BPE cache can contain. The cache allows
                to speed-up the process by keeping the result of the merge operations
                for a number of words.
            dropout: (`optional`) Optional[float] [0, 1]:
                The BPE dropout to use. Must be an float between 0 and 1
            unk_token: (`optional`) int:
                The unknown token id to be used by the model.
            continuing_subword_prefix: (`optional`) str:
                The prefix to attach to subword units that don't represent a beginning of word.
            end_of_word_suffix: (`optional`) str:
                The suffix to attach to subword units that represent an end of word.
        """
        pass
    def empty() -> Model:
        """ Instantiate an empty BPE Model. """
        pass
 class WordPiece:
-    """WordPiece
+    """ WordPiece model class """
    """
-    def from_files(vocab: str) -> Model:
+    def from_files(vocab: str,
                   unk_token: Optional[str],
                   max_input_chars_per_word: Optional[int]) -> Model:
        """ Instantiate a WordPiece Model from the given vocab file.
        Args:
            vocab: string:
                Path to a vocabulary file.
            unk_token: (`optional`) str:
                The unknown token to be used by the model.
            max_input_chars_per_word: (`optional`) int:
                The maximum number of characters to authorize in a single word.
        """
        pass
    def empty() -> Model:
        """ Instantiate an empty WordPiece Model. """
        pass