Improve docs and fix tests around training

This commit is contained in:
Anthony MOI
2020-11-27 16:44:17 -05:00
committed by Anthony MOI
parent 06f6ba3fce
commit 3a8627ce4d
9 changed files with 101 additions and 24 deletions

View File

@ -15,5 +15,6 @@ def batch_iterator():
for i in range(0, len(dataset["train"]), batch_length):
yield dataset["train"][i : i + batch_length]["text"]
# And finally train
bpe_tokenizer.train_from_iterator(batch_iterator(), length=len(dataset["train"]))