mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-07 13:18:31 +00:00
Use WhitespaceSplit for BPETokenizer
This commit is contained in:
@@ -30,7 +30,7 @@ class BPETokenizer(BaseTokenizer):
|
||||
NFKC.new(),
|
||||
Lowercase.new()
|
||||
])
|
||||
tokenizer.pre_tokenizer = pre_tokenizers.Whitespace.new()
|
||||
tokenizer.pre_tokenizer = pre_tokenizers.WhitespaceSplit.new()
|
||||
tokenizer.decoder = decoders.BPEDecoder.new(suffix=suffix)
|
||||
|
||||
parameters = {
|
||||
|
||||
Reference in New Issue
Block a user