Fix Python README example

2025-08-22 16:25:30 +00:00 · 2021-10-07 16:56:48 +02:00
parent b0ee27847f
commit 1dc19e0dd4
1 changed files with 8 additions and 4 deletions
--- a/bindings/python/README.md
+++ b/bindings/python/README.md
@ -145,11 +145,15 @@ tokenizer.decoder = decoders.ByteLevel()
 tokenizer.post_processor = processors.ByteLevel(trim_offsets=True)

 # And then train
-trainer = trainers.BpeTrainer(vocab_size=20000, min_frequency=2)
+trainer = trainers.BpeTrainer(
+    vocab_size=20000,
+    min_frequency=2,
+    initial_alphabet=pre_tokenizers.ByteLevel.alphabet()
+)
 tokenizer.train([
-	"./path/to/dataset/1.txt",
-	"./path/to/dataset/2.txt",
-	"./path/to/dataset/3.txt"
+    "./path/to/dataset/1.txt",
+    "./path/to/dataset/2.txt",
+    "./path/to/dataset/3.txt"
 ], trainer=trainer)

 # And Save it