add example / doc test for BPE trainer

2025-08-23 16:49:27 +00:00 · 2019-12-19 15:28:58 -08:00
parent 69212e17e9
commit 6d51e7a393
1 changed files with 16 additions and 5 deletions
--- a/tokenizers/src/models/bpe/trainer.rs
+++ b/tokenizers/src/models/bpe/trainer.rs
@ -1,8 +1,3 @@
-//!
-//! # Trainer
-//!
-//! In charge of training a BPE model
-//!
 #![allow(clippy::map_entry)]

 use super::{Pair, Word, BPE};
@ -40,6 +35,22 @@ impl Default for BpeTrainerConfig {
    }
 }

+/// In charge of training a BPE model from a mapping of words to word counts.
+///
+/// # Examples
+///
+/// ```
+/// use std::collections::HashMap;
+/// use tokenizers::tokenizer::Trainer;
+/// use tokenizers::models::bpe::BpeTrainer;
+///
+/// let word_counts: HashMap<String, u32> = [
+///     (String::from("Hello"), 1),
+///     (String::from("World"), 1),
+/// ].iter().cloned().collect();
+/// let trainer = BpeTrainer::default();
+/// let model = trainer.train(word_counts);
+/// ```
 #[derive(Default)]
 pub struct BpeTrainer {
    // Training parameters