Python - Update types with new models API

This commit is contained in:
Bjarte Johansen
2020-04-06 21:17:04 +02:00
parent 2dc48e56ac
commit 69ed81e618

View File

@@ -72,61 +72,55 @@ class Model:
pass
class BPE(Model):
""" BytePairEncoding model class """
"""BytePairEncoding model class
Instantiate a BPE Model from the given vocab and merges files.
Args:
vocab: ('`optional`) string:
Path to a vocabulary JSON file.
merges: (`optional`) string:
Path to a merge file.
cache_capacity: (`optional`) int:
The number of words that the BPE cache can contain. The cache allows
to speed-up the process by keeping the result of the merge operations
for a number of words.
dropout: (`optional`) Optional[float] [0, 1]:
The BPE dropout to use. Must be an float between 0 and 1
unk_token: (`optional`) str:
The unknown token to be used by the model.
continuing_subword_prefix: (`optional`) str:
The prefix to attach to subword units that don't represent a beginning of word.
end_of_word_suffix: (`optional`) str:
The suffix to attach to subword units that represent an end of word.
"""
@staticmethod
def from_files(
vocab: str,
merges: str,
def __init__(
self,
vocab: Optional[str],
merges: Optional[str],
cache_capacity: Optional[int],
dropout: Optional[float],
unk_token: Optional[str],
continuing_subword_prefix: Optional[str],
end_of_word_suffix: Optional[str],
) -> Model:
""" Instantiate a BPE Model from the given vocab and merges files.
Args:
vocab: string:
Path to a vocabulary JSON file.
merges: string:
Path to a merge file.
cache_capacity: (`optional`) int:
The number of words that the BPE cache can contain. The cache allows
to speed-up the process by keeping the result of the merge operations
for a number of words.
dropout: (`optional`) Optional[float] [0, 1]:
The BPE dropout to use. Must be an float between 0 and 1
unk_token: (`optional`) str:
The unknown token to be used by the model.
continuing_subword_prefix: (`optional`) str:
The prefix to attach to subword units that don't represent a beginning of word.
end_of_word_suffix: (`optional`) str:
The suffix to attach to subword units that represent an end of word.
"""
pass
@staticmethod
def empty() -> Model:
""" Instantiate an empty BPE Model. """
):
pass
class WordPiece(Model):
""" WordPiece model class """
""" WordPiece model class
@staticmethod
def from_files(
vocab: str, unk_token: Optional[str], max_input_chars_per_word: Optional[int]
) -> Model:
""" Instantiate a WordPiece Model from the given vocab file.
Instantiate a WordPiece Model from the given vocab file.
Args:
vocab: string:
vocab: (`optional`) string:
Path to a vocabulary file.
unk_token: (`optional`) str:
@@ -134,31 +128,29 @@ class WordPiece(Model):
max_input_chars_per_word: (`optional`) int:
The maximum number of characters to authorize in a single word.
"""
pass
@staticmethod
def empty() -> Model:
""" Instantiate an empty WordPiece Model. """
"""
def __init__(
self,
vocab: Optional[str],
unk_token: Optional[str],
max_input_chars_per_word: Optional[int],
):
pass
class WordLevel(Model):
"""
Most simple tokenizer model based on mapping token from a vocab file to their corresponding id.
"""
@staticmethod
def from_files(vocab: str, unk_token: str) -> Model:
""" Instantiate a WordLevel Model from the given vocab file.
Instantiate a WordLevel Model from the given vocab file.
Args:
vocab: string:
vocab: (`optional`) string:
Path to a vocabulary file.
unk_token: str:
The unknown token to be used by the model.
"""
pass
@staticmethod
def empty() -> Model:
""" Instantiate an empty WordLevel Model. """
"""
def __init__(self, vocab: Optional[str], unk_token: Optional[str]):
pass