Use WhitespaceSplit for BPETokenizer

This commit is contained in:
Anthony MOI
2020-01-17 18:33:29 -05:00
parent fc601289eb
commit 395f605fd2
2 changed files with 3 additions and 3 deletions

View File

@@ -30,7 +30,7 @@ class BPETokenizer(BaseTokenizer):
NFKC.new(),
Lowercase.new()
])
tokenizer.pre_tokenizer = pre_tokenizers.Whitespace.new()
tokenizer.pre_tokenizer = pre_tokenizers.WhitespaceSplit.new()
tokenizer.decoder = decoders.BPEDecoder.new(suffix=suffix)
parameters = {