Python - Update types with new models API

2025-12-06 12:48:18 +00:00 · 2020-04-06 21:17:04 +02:00
parent 2dc48e56ac
commit 69ed81e618
1 changed files with 49 additions and 57 deletions
--- a/bindings/python/tokenizers/models/init.pyi
+++ b/bindings/python/tokenizers/models/init.pyi
@@ -72,61 +72,55 @@ class Model:
        pass
 class BPE(Model):
-    """ BytePairEncoding model class """
+    """BytePairEncoding model class
    Instantiate a BPE Model from the given vocab and merges files.
    Args:
       vocab: ('`optional`) string:
           Path to a vocabulary JSON file.
       merges: (`optional`) string:
           Path to a merge file.
       cache_capacity: (`optional`) int:
           The number of words that the BPE cache can contain. The cache allows
           to speed-up the process by keeping the result of the merge operations
           for a number of words.
       dropout: (`optional`) Optional[float] [0, 1]:
           The BPE dropout to use. Must be an float between 0 and 1
       unk_token: (`optional`) str:
           The unknown token to be used by the model.
       continuing_subword_prefix: (`optional`) str:
           The prefix to attach to subword units that don't represent a beginning of word.
       end_of_word_suffix: (`optional`) str:
           The suffix to attach to subword units that represent an end of word.
    """
    @staticmethod
-    def from_files(
+    def __init__(
-        vocab: str,
+        self,
-        merges: str,
+        vocab: Optional[str],
        merges: Optional[str],
        cache_capacity: Optional[int],
        dropout: Optional[float],
        unk_token: Optional[str],
        continuing_subword_prefix: Optional[str],
        end_of_word_suffix: Optional[str],
-    ) -> Model:
+    ):
        """ Instantiate a BPE Model from the given vocab and merges files.
        Args:
            vocab: string:
                Path to a vocabulary JSON file.
            merges: string:
                Path to a merge file.
            cache_capacity: (`optional`) int:
                The number of words that the BPE cache can contain. The cache allows
                to speed-up the process by keeping the result of the merge operations
                for a number of words.
            dropout: (`optional`) Optional[float] [0, 1]:
                The BPE dropout to use. Must be an float between 0 and 1
            unk_token: (`optional`) str:
                The unknown token to be used by the model.
            continuing_subword_prefix: (`optional`) str:
                The prefix to attach to subword units that don't represent a beginning of word.
            end_of_word_suffix: (`optional`) str:
                The suffix to attach to subword units that represent an end of word.
        """
        pass
    @staticmethod
    def empty() -> Model:
        """ Instantiate an empty BPE Model. """
        pass
 class WordPiece(Model):
-    """ WordPiece model class """
+    """ WordPiece model class
-    @staticmethod
+    Instantiate a WordPiece Model from the given vocab file.
    def from_files(
        vocab: str, unk_token: Optional[str], max_input_chars_per_word: Optional[int]
    ) -> Model:
        """ Instantiate a WordPiece Model from the given vocab file.
        Args:
-            vocab: string:
+            vocab: (`optional`) string:
                Path to a vocabulary file.
            unk_token: (`optional`) str:
@@ -134,31 +128,29 @@ class WordPiece(Model):
            max_input_chars_per_word: (`optional`) int:
                The maximum number of characters to authorize in a single word.
-        """
+    """
-        pass
+
-    @staticmethod
+    def __init__(
-    def empty() -> Model:
+        self,
-        """ Instantiate an empty WordPiece Model. """
+        vocab: Optional[str],
        unk_token: Optional[str],
        max_input_chars_per_word: Optional[int],
    ):
        pass
 class WordLevel(Model):
    """
    Most simple tokenizer model based on mapping token from a vocab file to their corresponding id.
    """
-    @staticmethod
+    Instantiate a WordLevel Model from the given vocab file.
    def from_files(vocab: str, unk_token: str) -> Model:
        """ Instantiate a WordLevel Model from the given vocab file.
        Args:
-            vocab: string:
+            vocab: (`optional`) string:
                Path to a vocabulary file.
            unk_token: str:
                The unknown token to be used by the model.
-        """
+    """
-        pass
+
-    @staticmethod
+    def __init__(self, vocab: Optional[str], unk_token: Optional[str]):
    def empty() -> Model:
        """ Instantiate an empty WordLevel Model. """
        pass