From cc01186fd739bf1ed3657c1d38891aef61aca5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kok=C5=AB?= <86721361+Coqueue@users.noreply.github.com> Date: Tue, 27 May 2025 15:23:58 +0900 Subject: [PATCH] Fix type notation of merges in BPE Python binding (#1766) --- .../python/py_src/tokenizers/implementations/byte_level_bpe.py | 2 +- .../python/py_src/tokenizers/implementations/char_level_bpe.py | 2 +- .../py_src/tokenizers/implementations/sentencepiece_bpe.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bindings/python/py_src/tokenizers/implementations/byte_level_bpe.py b/bindings/python/py_src/tokenizers/implementations/byte_level_bpe.py index c7e3dbc4..f65f05e1 100644 --- a/bindings/python/py_src/tokenizers/implementations/byte_level_bpe.py +++ b/bindings/python/py_src/tokenizers/implementations/byte_level_bpe.py @@ -16,7 +16,7 @@ class ByteLevelBPETokenizer(BaseTokenizer): def __init__( self, vocab: Optional[Union[str, Dict[str, int]]] = None, - merges: Optional[Union[str, Dict[Tuple[int, int], Tuple[int, int]]]] = None, + merges: Optional[Union[str, List[Tuple[str, str]]]] = None, add_prefix_space: bool = False, lowercase: bool = False, dropout: Optional[float] = None, diff --git a/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py b/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py index 29ca5977..62b5bcdf 100644 --- a/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py +++ b/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py @@ -25,7 +25,7 @@ class CharBPETokenizer(BaseTokenizer): def __init__( self, vocab: Optional[Union[str, Dict[str, int]]] = None, - merges: Optional[Union[str, Dict[Tuple[int, int], Tuple[int, int]]]] = None, + merges: Optional[Union[str, List[Tuple[str, str]]]] = None, unk_token: Union[str, AddedToken] = "", suffix: str = "", dropout: Optional[float] = None, diff --git a/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py b/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py index cd550b41..26200489 100644 --- a/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py +++ b/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py @@ -16,7 +16,7 @@ class SentencePieceBPETokenizer(BaseTokenizer): def __init__( self, vocab: Optional[Union[str, Dict[str, int]]] = None, - merges: Optional[Union[str, Dict[Tuple[int, int], Tuple[int, int]]]] = None, + merges: Optional[Union[str, List[Tuple[str, str]]]] = None, unk_token: Union[str, AddedToken] = "", replacement: str = "▁", add_prefix_space: bool = True,