diff --git a/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py b/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py index 864892de..29ca5977 100644 --- a/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py +++ b/bindings/python/py_src/tokenizers/implementations/char_level_bpe.py @@ -45,7 +45,7 @@ class CharBPETokenizer(BaseTokenizer): ) ) else: - tokenizer = Tokenizer(BPE()) + tokenizer = Tokenizer(BPE(unk_token=str(unk_token), dropout=dropout, end_of_word_suffix=suffix)) if tokenizer.token_to_id(str(unk_token)) is not None: tokenizer.add_special_tokens([str(unk_token)])