mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Bump version and update Readme
This commit is contained in:
@ -61,7 +61,7 @@ bpe = models.BPE.from_files(vocab, merges)
|
||||
tokenizer = Tokenizer(bpe)
|
||||
|
||||
# Customize pre-tokenization and decoding
|
||||
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new())
|
||||
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(True))
|
||||
tokenizer.with_decoder(decoders.ByteLevel.new())
|
||||
|
||||
# And then encode:
|
||||
@ -85,7 +85,7 @@ from tokenizers import Tokenizer, models, pre_tokenizers, decoders, trainers
|
||||
tokenizer = Tokenizer(models.BPE.empty())
|
||||
|
||||
# Customize pre-tokenization and decoding
|
||||
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new())
|
||||
tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(True))
|
||||
tokenizer.with_decoder(decoders.ByteLevel.new())
|
||||
|
||||
# And then train
|
||||
|
Reference in New Issue
Block a user