Python - Typings update

This commit is contained in:
Anthony MOI
2020-01-10 10:06:24 -05:00
parent b357a3ed5a
commit b27737d97c
4 changed files with 8 additions and 8 deletions

View File

@ -5,7 +5,7 @@ from tokenizers.pre_tokenizers import BertPreTokenizer
from tokenizers.processors import BertProcessing
from .base_tokenizer import BaseTokenizer
from typing import Optional, List
from typing import Optional, List, Union
class BertWordPieceTokenizer(BaseTokenizer):
""" Bert WordPiece Tokenizer """
@ -62,7 +62,7 @@ class BertWordPieceTokenizer(BaseTokenizer):
super().__init__(tokenizer, parameters)
def train(self, files: List[str],
def train(self, files: Union[str, List[str]],
vocab_size: int=30000,
min_frequency: int=2,
limit_alphabet: int=1000,

View File

@ -3,7 +3,7 @@ from tokenizers.models import BPE
from tokenizers.normalizers import NFKC, Sequence, Lowercase
from .base_tokenizer import BaseTokenizer
from typing import Optional, List
from typing import Optional, List, Union
class BPETokenizer(BaseTokenizer):
""" Original BPE Tokenizer
@ -42,7 +42,7 @@ class BPETokenizer(BaseTokenizer):
super().__init__(tokenizer, parameters)
def train(self, files: List[str],
def train(self, files: Union[str, List[str]],
vocab_size: int=30000,
min_frequency: int=2,
special_tokens: List[str]=["<unk>"],

View File

@ -3,7 +3,7 @@ from tokenizers.models import BPE
from tokenizers.normalizers import NFKC
from .base_tokenizer import BaseTokenizer
from typing import Optional, List
from typing import Optional, List, Union
class ByteLevelBPETokenizer(BaseTokenizer):
""" ByteLevelBPETokenizer
@ -31,7 +31,7 @@ class ByteLevelBPETokenizer(BaseTokenizer):
super().__init__(tokenizer, parameters)
def train(self, files: List[str],
def train(self, files: Union[str, List[str]],
vocab_size: int=30000,
min_frequency: int=2,
show_progress: bool=True,

View File

@ -3,7 +3,7 @@ from tokenizers.models import BPE
from tokenizers.normalizers import NFKC
from .base_tokenizer import BaseTokenizer
from typing import Optional, List
from typing import Optional, List, Union
class SentencePieceBPETokenizer(BaseTokenizer):
""" SentencePiece BPE Tokenizer
@ -42,7 +42,7 @@ class SentencePieceBPETokenizer(BaseTokenizer):
super().__init__(tokenizer, parameters)
def train(self, files: List[str],
def train(self, files: Union[str, List[str]],
vocab_size: int=30000,
min_frequency: int=2,
special_tokens: List[str]=["<unk>"],