Python - Update doc and readme for add_prefix_space

This commit is contained in:
Anthony MOI
2019-12-26 10:34:53 -05:00
parent 1879cb0bcb
commit a7734ffc9f
2 changed files with 3 additions and 3 deletions

View File

@ -61,7 +61,7 @@ bpe = models.BPE.from_files(vocab, merges)
tokenizer = Tokenizer(bpe)
# Customize pre-tokenization and decoding
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(True))
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(add_prefix_space=True))
tokenizer.with_decoder(decoders.ByteLevel.new())
# And then encode:
@ -85,7 +85,7 @@ from tokenizers import Tokenizer, models, pre_tokenizers, decoders, trainers
tokenizer = Tokenizer(models.BPE.empty())
# Customize pre-tokenization and decoding
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(True))
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(add_prefix_space=True))
tokenizer.with_decoder(decoders.ByteLevel.new())
# And then train