mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-10 14:48:22 +00:00
Merge pull request #149 from colinclement/master
Allow dropout option in ByteLevelBPETokenizer
This commit is contained in:
@@ -16,6 +16,7 @@ class ByteLevelBPETokenizer(BaseTokenizer):
|
||||
merges_file: Optional[str]=None,
|
||||
add_prefix_space: bool=False,
|
||||
do_lowercase: bool=False,
|
||||
dropout: Optional[float]=None,
|
||||
unicode_normalizer: Optional[str]=None,
|
||||
continuing_subword_prefix: Optional[str]=None,
|
||||
end_of_word_suffix: Optional[str]=None
|
||||
@@ -23,8 +24,9 @@ class ByteLevelBPETokenizer(BaseTokenizer):
|
||||
if vocab_file is not None and merges_file is not None:
|
||||
tokenizer = Tokenizer(BPE.from_files(
|
||||
vocab_file, merges_file,
|
||||
dropout=dropout,
|
||||
continuing_subword_prefix=continuing_subword_prefix or "",
|
||||
end_of_word_suffix=end_of_word_suffix or ""
|
||||
end_of_word_suffix=end_of_word_suffix or "",
|
||||
))
|
||||
else:
|
||||
tokenizer = Tokenizer(BPE.empty())
|
||||
|
||||
Reference in New Issue
Block a user