mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-06 20:58:22 +00:00
Python - Update README with new API
This commit is contained in:
@@ -126,7 +126,7 @@ from tokenizers import Tokenizer, models, pre_tokenizers, decoders, processors
|
|||||||
# Load a BPE Model
|
# Load a BPE Model
|
||||||
vocab = "./path/to/vocab.json"
|
vocab = "./path/to/vocab.json"
|
||||||
merges = "./path/to/merges.txt"
|
merges = "./path/to/merges.txt"
|
||||||
bpe = models.BPE.from_files(vocab, merges)
|
bpe = models.BPE(vocab, merges)
|
||||||
|
|
||||||
# Initialize a tokenizer
|
# Initialize a tokenizer
|
||||||
tokenizer = Tokenizer(bpe)
|
tokenizer = Tokenizer(bpe)
|
||||||
@@ -155,7 +155,7 @@ print(encoded)
|
|||||||
from tokenizers import Tokenizer, models, pre_tokenizers, decoders, trainers, processors
|
from tokenizers import Tokenizer, models, pre_tokenizers, decoders, trainers, processors
|
||||||
|
|
||||||
# Initialize a tokenizer
|
# Initialize a tokenizer
|
||||||
tokenizer = Tokenizer(models.BPE.empty())
|
tokenizer = Tokenizer(models.BPE())
|
||||||
|
|
||||||
# Customize pre-tokenization and decoding
|
# Customize pre-tokenization and decoding
|
||||||
tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=True)
|
tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user