Python - Update types with new models API

2025-12-06 12:48:18 +00:00 · 2020-04-06 21:17:04 +02:00
parent 2dc48e56ac
commit 69ed81e618
1 changed files with 49 additions and 57 deletions
--- a/bindings/python/tokenizers/models/init.pyi
+++ b/bindings/python/tokenizers/models/init.pyi
@@ -72,61 +72,55 @@ class Model:
        pass

 class BPE(Model):
-    """ BytePairEncoding model class """
+    """BytePairEncoding model class
+
+    Instantiate a BPE Model from the given vocab and merges files.
+
+    Args:
+       vocab: ('`optional`) string:
+           Path to a vocabulary JSON file.
+
+       merges: (`optional`) string:
+           Path to a merge file.
+
+       cache_capacity: (`optional`) int:
+           The number of words that the BPE cache can contain. The cache allows
+           to speed-up the process by keeping the result of the merge operations
+           for a number of words.
+
+       dropout: (`optional`) Optional[float] [0, 1]:
+           The BPE dropout to use. Must be an float between 0 and 1
+
+       unk_token: (`optional`) str:
+           The unknown token to be used by the model.
+
+       continuing_subword_prefix: (`optional`) str:
+           The prefix to attach to subword units that don't represent a beginning of word.
+
+       end_of_word_suffix: (`optional`) str:
+           The suffix to attach to subword units that represent an end of word.
+    """

    @staticmethod
-    def from_files(
-        vocab: str,
-        merges: str,
+    def __init__(
+        self,
+        vocab: Optional[str],
+        merges: Optional[str],
        cache_capacity: Optional[int],
        dropout: Optional[float],
        unk_token: Optional[str],
        continuing_subword_prefix: Optional[str],
        end_of_word_suffix: Optional[str],
-    ) -> Model:
-        """ Instantiate a BPE Model from the given vocab and merges files.
-
-        Args:
-            vocab: string:
-                Path to a vocabulary JSON file.
-
-            merges: string:
-                Path to a merge file.
-
-            cache_capacity: (`optional`) int:
-                The number of words that the BPE cache can contain. The cache allows
-                to speed-up the process by keeping the result of the merge operations
-                for a number of words.
-
-            dropout: (`optional`) Optional[float] [0, 1]:
-                The BPE dropout to use. Must be an float between 0 and 1
-
-            unk_token: (`optional`) str:
-                The unknown token to be used by the model.
-
-            continuing_subword_prefix: (`optional`) str:
-                The prefix to attach to subword units that don't represent a beginning of word.
-
-            end_of_word_suffix: (`optional`) str:
-                The suffix to attach to subword units that represent an end of word.
-        """
-        pass
-    @staticmethod
-    def empty() -> Model:
-        """ Instantiate an empty BPE Model. """
+    ):
        pass

 class WordPiece(Model):
-    """ WordPiece model class """
+    """ WordPiece model class

-    @staticmethod
-    def from_files(
-        vocab: str, unk_token: Optional[str], max_input_chars_per_word: Optional[int]
-    ) -> Model:
-        """ Instantiate a WordPiece Model from the given vocab file.
+    Instantiate a WordPiece Model from the given vocab file.

        Args:
-            vocab: string:
+            vocab: (`optional`) string:
                Path to a vocabulary file.

            unk_token: (`optional`) str:
@@ -134,31 +128,29 @@ class WordPiece(Model):

            max_input_chars_per_word: (`optional`) int:
                The maximum number of characters to authorize in a single word.
-        """
-        pass
-    @staticmethod
-    def empty() -> Model:
-        """ Instantiate an empty WordPiece Model. """
+    """
+
+    def __init__(
+        self,
+        vocab: Optional[str],
+        unk_token: Optional[str],
+        max_input_chars_per_word: Optional[int],
+    ):
        pass

 class WordLevel(Model):
    """
    Most simple tokenizer model based on mapping token from a vocab file to their corresponding id.
-    """

-    @staticmethod
-    def from_files(vocab: str, unk_token: str) -> Model:
-        """ Instantiate a WordLevel Model from the given vocab file.
+    Instantiate a WordLevel Model from the given vocab file.

        Args:
-            vocab: string:
+            vocab: (`optional`) string:
                Path to a vocabulary file.

            unk_token: str:
                The unknown token to be used by the model.
-        """
-        pass
-    @staticmethod
-    def empty() -> Model:
-        """ Instantiate an empty WordLevel Model. """
+    """
+
+    def __init__(self, vocab: Optional[str], unk_token: Optional[str]):
        pass