mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-06 12:48:18 +00:00
Python - Update types with new models API
This commit is contained in:
@@ -72,25 +72,15 @@ class Model:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
class BPE(Model):
|
class BPE(Model):
|
||||||
""" BytePairEncoding model class """
|
"""BytePairEncoding model class
|
||||||
|
|
||||||
@staticmethod
|
Instantiate a BPE Model from the given vocab and merges files.
|
||||||
def from_files(
|
|
||||||
vocab: str,
|
|
||||||
merges: str,
|
|
||||||
cache_capacity: Optional[int],
|
|
||||||
dropout: Optional[float],
|
|
||||||
unk_token: Optional[str],
|
|
||||||
continuing_subword_prefix: Optional[str],
|
|
||||||
end_of_word_suffix: Optional[str],
|
|
||||||
) -> Model:
|
|
||||||
""" Instantiate a BPE Model from the given vocab and merges files.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
vocab: string:
|
vocab: ('`optional`) string:
|
||||||
Path to a vocabulary JSON file.
|
Path to a vocabulary JSON file.
|
||||||
|
|
||||||
merges: string:
|
merges: (`optional`) string:
|
||||||
Path to a merge file.
|
Path to a merge file.
|
||||||
|
|
||||||
cache_capacity: (`optional`) int:
|
cache_capacity: (`optional`) int:
|
||||||
@@ -110,23 +100,27 @@ class BPE(Model):
|
|||||||
end_of_word_suffix: (`optional`) str:
|
end_of_word_suffix: (`optional`) str:
|
||||||
The suffix to attach to subword units that represent an end of word.
|
The suffix to attach to subword units that represent an end of word.
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def empty() -> Model:
|
def __init__(
|
||||||
""" Instantiate an empty BPE Model. """
|
self,
|
||||||
|
vocab: Optional[str],
|
||||||
|
merges: Optional[str],
|
||||||
|
cache_capacity: Optional[int],
|
||||||
|
dropout: Optional[float],
|
||||||
|
unk_token: Optional[str],
|
||||||
|
continuing_subword_prefix: Optional[str],
|
||||||
|
end_of_word_suffix: Optional[str],
|
||||||
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class WordPiece(Model):
|
class WordPiece(Model):
|
||||||
""" WordPiece model class """
|
""" WordPiece model class
|
||||||
|
|
||||||
@staticmethod
|
Instantiate a WordPiece Model from the given vocab file.
|
||||||
def from_files(
|
|
||||||
vocab: str, unk_token: Optional[str], max_input_chars_per_word: Optional[int]
|
|
||||||
) -> Model:
|
|
||||||
""" Instantiate a WordPiece Model from the given vocab file.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
vocab: string:
|
vocab: (`optional`) string:
|
||||||
Path to a vocabulary file.
|
Path to a vocabulary file.
|
||||||
|
|
||||||
unk_token: (`optional`) str:
|
unk_token: (`optional`) str:
|
||||||
@@ -135,30 +129,28 @@ class WordPiece(Model):
|
|||||||
max_input_chars_per_word: (`optional`) int:
|
max_input_chars_per_word: (`optional`) int:
|
||||||
The maximum number of characters to authorize in a single word.
|
The maximum number of characters to authorize in a single word.
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
@staticmethod
|
def __init__(
|
||||||
def empty() -> Model:
|
self,
|
||||||
""" Instantiate an empty WordPiece Model. """
|
vocab: Optional[str],
|
||||||
|
unk_token: Optional[str],
|
||||||
|
max_input_chars_per_word: Optional[int],
|
||||||
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class WordLevel(Model):
|
class WordLevel(Model):
|
||||||
"""
|
"""
|
||||||
Most simple tokenizer model based on mapping token from a vocab file to their corresponding id.
|
Most simple tokenizer model based on mapping token from a vocab file to their corresponding id.
|
||||||
"""
|
|
||||||
|
|
||||||
@staticmethod
|
Instantiate a WordLevel Model from the given vocab file.
|
||||||
def from_files(vocab: str, unk_token: str) -> Model:
|
|
||||||
""" Instantiate a WordLevel Model from the given vocab file.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
vocab: string:
|
vocab: (`optional`) string:
|
||||||
Path to a vocabulary file.
|
Path to a vocabulary file.
|
||||||
|
|
||||||
unk_token: str:
|
unk_token: str:
|
||||||
The unknown token to be used by the model.
|
The unknown token to be used by the model.
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
@staticmethod
|
def __init__(self, vocab: Optional[str], unk_token: Optional[str]):
|
||||||
def empty() -> Model:
|
|
||||||
""" Instantiate an empty WordLevel Model. """
|
|
||||||
pass
|
pass
|
||||||
|
|||||||
Reference in New Issue
Block a user