mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-31 12:39:21 +00:00
We use 19.10b0 not 20 here...
This commit is contained in:
@ -32,10 +32,7 @@ if not files:
|
||||
|
||||
# Initialize an empty tokenizer
|
||||
tokenizer = BertWordPieceTokenizer(
|
||||
clean_text=True,
|
||||
handle_chinese_chars=True,
|
||||
strip_accents=True,
|
||||
lowercase=True,
|
||||
clean_text=True, handle_chinese_chars=True, strip_accents=True, lowercase=True,
|
||||
)
|
||||
|
||||
# And then train
|
||||
|
Reference in New Issue
Block a user