From e8925a33dac4c5d27969e5a264e5d3f12d219300 Mon Sep 17 00:00:00 2001 From: Anthony MOI Date: Thu, 26 Mar 2020 14:19:37 -0400 Subject: [PATCH] Python - remove add_special_tokens from BertWordPieceTokenizer init --- bindings/python/tokenizers/implementations/bert_wordpiece.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bindings/python/tokenizers/implementations/bert_wordpiece.py b/bindings/python/tokenizers/implementations/bert_wordpiece.py index 07dde0e1..e3910c84 100644 --- a/bindings/python/tokenizers/implementations/bert_wordpiece.py +++ b/bindings/python/tokenizers/implementations/bert_wordpiece.py @@ -14,7 +14,6 @@ class BertWordPieceTokenizer(BaseTokenizer): def __init__( self, vocab_file: Optional[str] = None, - add_special_tokens: bool = True, unk_token: Union[str, AddedToken] = "[UNK]", sep_token: Union[str, AddedToken] = "[SEP]", cls_token: Union[str, AddedToken] = "[CLS]", @@ -52,7 +51,7 @@ class BertWordPieceTokenizer(BaseTokenizer): ) tokenizer.pre_tokenizer = BertPreTokenizer() - if add_special_tokens and vocab_file is not None: + if vocab_file is not None: sep_token_id = tokenizer.token_to_id(str(sep_token)) if sep_token_id is None: raise TypeError("sep_token not found in the vocabulary") @@ -67,7 +66,6 @@ class BertWordPieceTokenizer(BaseTokenizer): parameters = { "model": "BertWordPiece", - "add_special_tokens": add_special_tokens, "unk_token": unk_token, "sep_token": sep_token, "cls_token": cls_token,