mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-08 05:38:23 +00:00
Merge pull request #149 from colinclement/master
Allow dropout option in ByteLevelBPETokenizer
This commit is contained in:
@@ -15,16 +15,18 @@ class ByteLevelBPETokenizer(BaseTokenizer):
|
||||
vocab_file: Optional[str]=None,
|
||||
merges_file: Optional[str]=None,
|
||||
add_prefix_space: bool=False,
|
||||
do_lowercase: bool = False,
|
||||
unicode_normalizer: Optional[str] = None,
|
||||
continuing_subword_prefix: Optional[str] = None,
|
||||
end_of_word_suffix: Optional[str] = None
|
||||
do_lowercase: bool=False,
|
||||
dropout: Optional[float]=None,
|
||||
unicode_normalizer: Optional[str]=None,
|
||||
continuing_subword_prefix: Optional[str]=None,
|
||||
end_of_word_suffix: Optional[str]=None
|
||||
):
|
||||
if vocab_file is not None and merges_file is not None:
|
||||
tokenizer = Tokenizer(BPE.from_files(
|
||||
vocab_file, merges_file,
|
||||
dropout=dropout,
|
||||
continuing_subword_prefix=continuing_subword_prefix or "",
|
||||
end_of_word_suffix=end_of_word_suffix or ""
|
||||
end_of_word_suffix=end_of_word_suffix or "",
|
||||
))
|
||||
else:
|
||||
tokenizer = Tokenizer(BPE.empty())
|
||||
|
||||
Reference in New Issue
Block a user