Merge pull request #149 from colinclement/master

Allow dropout option in ByteLevelBPETokenizer
This commit is contained in:
MOI Anthony
2020-02-18 09:59:40 -05:00
committed by GitHub

View File

@@ -15,16 +15,18 @@ class ByteLevelBPETokenizer(BaseTokenizer):
vocab_file: Optional[str]=None,
merges_file: Optional[str]=None,
add_prefix_space: bool=False,
do_lowercase: bool = False,
unicode_normalizer: Optional[str] = None,
continuing_subword_prefix: Optional[str] = None,
end_of_word_suffix: Optional[str] = None
do_lowercase: bool=False,
dropout: Optional[float]=None,
unicode_normalizer: Optional[str]=None,
continuing_subword_prefix: Optional[str]=None,
end_of_word_suffix: Optional[str]=None
):
if vocab_file is not None and merges_file is not None:
tokenizer = Tokenizer(BPE.from_files(
vocab_file, merges_file,
dropout=dropout,
continuing_subword_prefix=continuing_subword_prefix or "",
end_of_word_suffix=end_of_word_suffix or ""
end_of_word_suffix=end_of_word_suffix or "",
))
else:
tokenizer = Tokenizer(BPE.empty())