mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-05 04:08:22 +00:00
Going back for not fuse_unk by default for BPE, but add a flag to
enable it.
This commit is contained in:
committed by
Anthony MOI
parent
940f8bd8fa
commit
9d3a93db5b
@@ -45,6 +45,9 @@ class BPE(Model):
|
||||
|
||||
end_of_word_suffix: (`optional`) str:
|
||||
The suffix to attach to subword units that represent an end of word.
|
||||
|
||||
fuse_unk: (`optional`) bool:
|
||||
Multiple unk tokens get fused into only 1
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@@ -57,6 +60,7 @@ class BPE(Model):
|
||||
unk_token: Optional[str],
|
||||
continuing_subword_prefix: Optional[str],
|
||||
end_of_word_suffix: Optional[str],
|
||||
fuse_unk: Optional[bool],
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user