mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Python - Update examples with getter/setter
This commit is contained in:
@ -32,14 +32,14 @@ if not files:
|
||||
tokenizer = Tokenizer(models.WordPiece.empty())
|
||||
|
||||
# Customize all the steps
|
||||
tokenizer.with_normalizer(normalizers.BertNormalizer.new(
|
||||
tokenizer.normalizer = normalizers.BertNormalizer.new(
|
||||
clean_text=True,
|
||||
handle_chinese_chars=True,
|
||||
strip_accents=True,
|
||||
lowercase=True,
|
||||
))
|
||||
tokenizer.with_pre_tokenizer(pre_tokenizers.BertPreTokenizer.new())
|
||||
tokenizer.with_decoder(decoders.WordPiece.new())
|
||||
)
|
||||
tokenizer.pre_tokenizer = pre_tokenizers.BertPreTokenizer.new()
|
||||
tokenizer.decoder = decoders.WordPiece.new()
|
||||
|
||||
# And then train
|
||||
trainer = trainers.WordPieceTrainer.new(
|
||||
|
Reference in New Issue
Block a user