mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
add example / doc test for BPE trainer
This commit is contained in:
@ -1,8 +1,3 @@
|
|||||||
//!
|
|
||||||
//! # Trainer
|
|
||||||
//!
|
|
||||||
//! In charge of training a BPE model
|
|
||||||
//!
|
|
||||||
#![allow(clippy::map_entry)]
|
#![allow(clippy::map_entry)]
|
||||||
|
|
||||||
use super::{Pair, Word, BPE};
|
use super::{Pair, Word, BPE};
|
||||||
@ -40,6 +35,22 @@ impl Default for BpeTrainerConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// In charge of training a BPE model from a mapping of words to word counts.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use std::collections::HashMap;
|
||||||
|
/// use tokenizers::tokenizer::Trainer;
|
||||||
|
/// use tokenizers::models::bpe::BpeTrainer;
|
||||||
|
///
|
||||||
|
/// let word_counts: HashMap<String, u32> = [
|
||||||
|
/// (String::from("Hello"), 1),
|
||||||
|
/// (String::from("World"), 1),
|
||||||
|
/// ].iter().cloned().collect();
|
||||||
|
/// let trainer = BpeTrainer::default();
|
||||||
|
/// let model = trainer.train(word_counts);
|
||||||
|
/// ```
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct BpeTrainer {
|
pub struct BpeTrainer {
|
||||||
// Training parameters
|
// Training parameters
|
||||||
|
Reference in New Issue
Block a user