mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-08 13:48:19 +00:00
Python - Improve typings
This commit is contained in:
@@ -21,6 +21,14 @@ class IndexableString:
|
|||||||
Works almost like a `str`, but allows indexing on offsets
|
Works almost like a `str`, but allows indexing on offsets
|
||||||
provided on an `Encoding`
|
provided on an `Encoding`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def offsets(self, offsets: Tuple[int, int]) -> Optional[Tuple[int, int]]:
|
||||||
|
""" Convert the Encoding's offsets to the current string.
|
||||||
|
|
||||||
|
`Encoding` provides a list of offsets that are actually offsets to the Normalized
|
||||||
|
version of text. Calling this method with the offsets provided by `Encoding` will make
|
||||||
|
sure that said offsets can be used to index the `str` directly.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class Encoding:
|
class Encoding:
|
||||||
@@ -53,7 +61,11 @@ class Encoding:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def offsets(self) -> List[Offsets]:
|
def offsets(self) -> List[Offsets]:
|
||||||
""" The offsets """
|
""" The offsets.
|
||||||
|
These offsets can be used to index any `IndexableString` directly. If you want to
|
||||||
|
index the original `str`, make sure to retrieve the converted offsets using the `.offsets`
|
||||||
|
method on the `original_str`.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -11,14 +11,14 @@ class Decoder:
|
|||||||
""" Decode the given list of string to a final string """
|
""" Decode the given list of string to a final string """
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class ByteLevel:
|
class ByteLevel(Decoder):
|
||||||
""" ByteLevel Decoder """
|
""" ByteLevel Decoder """
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
""" Instantiate a new ByteLevel Decoder """
|
""" Instantiate a new ByteLevel Decoder """
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class WordPiece:
|
class WordPiece(Decoder):
|
||||||
""" WordPiece Decoder """
|
""" WordPiece Decoder """
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -31,7 +31,7 @@ class WordPiece:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class Metaspace:
|
class Metaspace(Decoder):
|
||||||
""" Metaspace decoder """
|
""" Metaspace decoder """
|
||||||
|
|
||||||
def __init__(self, replacement: str = "▁", add_prefix_space: bool = True) -> None:
|
def __init__(self, replacement: str = "▁", add_prefix_space: bool = True) -> None:
|
||||||
@@ -48,7 +48,7 @@ class Metaspace:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class BPEDecoder:
|
class BPEDecoder(Decoder):
|
||||||
""" BPEDecoder """
|
""" BPEDecoder """
|
||||||
|
|
||||||
def __init__(self, suffix: str = "</w>") -> None:
|
def __init__(self, suffix: str = "</w>") -> None:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ class PostProcessor:
|
|||||||
a PostProcessor will return an instance of this class when instantiated.
|
a PostProcessor will return an instance of this class when instantiated.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class BertProcessing:
|
class BertProcessing(PostProcessor):
|
||||||
""" BertProcessing
|
""" BertProcessing
|
||||||
|
|
||||||
This post-processor takes care of adding the special tokens needed by
|
This post-processor takes care of adding the special tokens needed by
|
||||||
@@ -31,7 +31,7 @@ class BertProcessing:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class RobertaProcessing:
|
class RobertaProcessing(PostProcessor):
|
||||||
""" RobertaProcessing
|
""" RobertaProcessing
|
||||||
|
|
||||||
This post-processor takes care of adding the special tokens needed by
|
This post-processor takes care of adding the special tokens needed by
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ class Trainer:
|
|||||||
Trainer will return an instance of this class when instantiated.
|
Trainer will return an instance of this class when instantiated.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class BpeTrainer:
|
class BpeTrainer(Trainer):
|
||||||
""" BpeTrainer
|
""" BpeTrainer
|
||||||
|
|
||||||
Capable of training a BPE model
|
Capable of training a BPE model
|
||||||
@@ -59,7 +59,7 @@ class BpeTrainer:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class WordPieceTrainer:
|
class WordPieceTrainer(Trainer):
|
||||||
""" WordPieceTrainer
|
""" WordPieceTrainer
|
||||||
|
|
||||||
Capable of training a WordPiece model
|
Capable of training a WordPiece model
|
||||||
|
|||||||
Reference in New Issue
Block a user