mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Adressing first pass of comments.
This commit is contained in:
@ -32,7 +32,10 @@ if not files:
|
||||
|
||||
# Initialize an empty tokenizer
|
||||
tokenizer = BertWordPieceTokenizer(
|
||||
clean_text=True, handle_chinese_chars=True, strip_accents=True, lowercase=True,
|
||||
clean_text=True,
|
||||
handle_chinese_chars=True,
|
||||
strip_accents=True,
|
||||
lowercase=True,
|
||||
)
|
||||
|
||||
# And then train
|
||||
|
Reference in New Issue
Block a user