mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-10 14:48:22 +00:00
Merge pull request #149 from colinclement/master
Allow dropout option in ByteLevelBPETokenizer
This commit is contained in:
@@ -15,16 +15,18 @@ class ByteLevelBPETokenizer(BaseTokenizer):
|
|||||||
vocab_file: Optional[str]=None,
|
vocab_file: Optional[str]=None,
|
||||||
merges_file: Optional[str]=None,
|
merges_file: Optional[str]=None,
|
||||||
add_prefix_space: bool=False,
|
add_prefix_space: bool=False,
|
||||||
do_lowercase: bool = False,
|
do_lowercase: bool=False,
|
||||||
unicode_normalizer: Optional[str] = None,
|
dropout: Optional[float]=None,
|
||||||
continuing_subword_prefix: Optional[str] = None,
|
unicode_normalizer: Optional[str]=None,
|
||||||
end_of_word_suffix: Optional[str] = None
|
continuing_subword_prefix: Optional[str]=None,
|
||||||
|
end_of_word_suffix: Optional[str]=None
|
||||||
):
|
):
|
||||||
if vocab_file is not None and merges_file is not None:
|
if vocab_file is not None and merges_file is not None:
|
||||||
tokenizer = Tokenizer(BPE.from_files(
|
tokenizer = Tokenizer(BPE.from_files(
|
||||||
vocab_file, merges_file,
|
vocab_file, merges_file,
|
||||||
|
dropout=dropout,
|
||||||
continuing_subword_prefix=continuing_subword_prefix or "",
|
continuing_subword_prefix=continuing_subword_prefix or "",
|
||||||
end_of_word_suffix=end_of_word_suffix or ""
|
end_of_word_suffix=end_of_word_suffix or "",
|
||||||
))
|
))
|
||||||
else:
|
else:
|
||||||
tokenizer = Tokenizer(BPE.empty())
|
tokenizer = Tokenizer(BPE.empty())
|
||||||
|
|||||||
Reference in New Issue
Block a user