mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-07 13:18:31 +00:00
Python - Improve typings
This commit is contained in:
@@ -21,6 +21,14 @@ class IndexableString:
|
||||
Works almost like a `str`, but allows indexing on offsets
|
||||
provided on an `Encoding`
|
||||
"""
|
||||
|
||||
def offsets(self, offsets: Tuple[int, int]) -> Optional[Tuple[int, int]]:
|
||||
""" Convert the Encoding's offsets to the current string.
|
||||
|
||||
`Encoding` provides a list of offsets that are actually offsets to the Normalized
|
||||
version of text. Calling this method with the offsets provided by `Encoding` will make
|
||||
sure that said offsets can be used to index the `str` directly.
|
||||
"""
|
||||
pass
|
||||
|
||||
class Encoding:
|
||||
@@ -53,7 +61,11 @@ class Encoding:
|
||||
|
||||
@property
|
||||
def offsets(self) -> List[Offsets]:
|
||||
""" The offsets """
|
||||
""" The offsets.
|
||||
These offsets can be used to index any `IndexableString` directly. If you want to
|
||||
index the original `str`, make sure to retrieve the converted offsets using the `.offsets`
|
||||
method on the `original_str`.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
|
||||
@@ -11,14 +11,14 @@ class Decoder:
|
||||
""" Decode the given list of string to a final string """
|
||||
pass
|
||||
|
||||
class ByteLevel:
|
||||
class ByteLevel(Decoder):
|
||||
""" ByteLevel Decoder """
|
||||
|
||||
def __init__(self) -> None:
|
||||
""" Instantiate a new ByteLevel Decoder """
|
||||
pass
|
||||
|
||||
class WordPiece:
|
||||
class WordPiece(Decoder):
|
||||
""" WordPiece Decoder """
|
||||
|
||||
@staticmethod
|
||||
@@ -31,7 +31,7 @@ class WordPiece:
|
||||
"""
|
||||
pass
|
||||
|
||||
class Metaspace:
|
||||
class Metaspace(Decoder):
|
||||
""" Metaspace decoder """
|
||||
|
||||
def __init__(self, replacement: str = "▁", add_prefix_space: bool = True) -> None:
|
||||
@@ -48,7 +48,7 @@ class Metaspace:
|
||||
"""
|
||||
pass
|
||||
|
||||
class BPEDecoder:
|
||||
class BPEDecoder(Decoder):
|
||||
""" BPEDecoder """
|
||||
|
||||
def __init__(self, suffix: str = "</w>") -> None:
|
||||
|
||||
@@ -7,7 +7,7 @@ class PostProcessor:
|
||||
a PostProcessor will return an instance of this class when instantiated.
|
||||
"""
|
||||
|
||||
class BertProcessing:
|
||||
class BertProcessing(PostProcessor):
|
||||
""" BertProcessing
|
||||
|
||||
This post-processor takes care of adding the special tokens needed by
|
||||
@@ -31,7 +31,7 @@ class BertProcessing:
|
||||
"""
|
||||
pass
|
||||
|
||||
class RobertaProcessing:
|
||||
class RobertaProcessing(PostProcessor):
|
||||
""" RobertaProcessing
|
||||
|
||||
This post-processor takes care of adding the special tokens needed by
|
||||
|
||||
@@ -7,7 +7,7 @@ class Trainer:
|
||||
Trainer will return an instance of this class when instantiated.
|
||||
"""
|
||||
|
||||
class BpeTrainer:
|
||||
class BpeTrainer(Trainer):
|
||||
""" BpeTrainer
|
||||
|
||||
Capable of training a BPE model
|
||||
@@ -59,7 +59,7 @@ class BpeTrainer:
|
||||
"""
|
||||
pass
|
||||
|
||||
class WordPieceTrainer:
|
||||
class WordPieceTrainer(Trainer):
|
||||
""" WordPieceTrainer
|
||||
|
||||
Capable of training a WordPiece model
|
||||
|
||||
Reference in New Issue
Block a user