mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-16 17:18:43 +00:00
Rust - Fix WordLevelTrainer default values
This commit is contained in:
@@ -7,13 +7,13 @@ use std::collections::HashMap;
|
|||||||
#[derive(Debug, Clone, Builder)]
|
#[derive(Debug, Clone, Builder)]
|
||||||
pub struct WordLevelTrainer {
|
pub struct WordLevelTrainer {
|
||||||
/// The minimum frequency a word must have to be part of the vocabulary
|
/// The minimum frequency a word must have to be part of the vocabulary
|
||||||
#[builder(default)]
|
#[builder(default = "0")]
|
||||||
pub min_frequency: u32,
|
pub min_frequency: u32,
|
||||||
/// The target vocabulary size
|
/// The target vocabulary size
|
||||||
#[builder(default)]
|
#[builder(default = "30_000")]
|
||||||
pub vocab_size: usize,
|
pub vocab_size: usize,
|
||||||
/// Whether to show progress while training
|
/// Whether to show progress while training
|
||||||
#[builder(default)]
|
#[builder(default = "true")]
|
||||||
pub show_progress: bool,
|
pub show_progress: bool,
|
||||||
/// A list of special tokens that the model should know of
|
/// A list of special tokens that the model should know of
|
||||||
#[builder(default)]
|
#[builder(default)]
|
||||||
@@ -25,13 +25,7 @@ pub struct WordLevelTrainer {
|
|||||||
|
|
||||||
impl Default for WordLevelTrainer {
|
impl Default for WordLevelTrainer {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self::builder().build().unwrap()
|
||||||
min_frequency: 0,
|
|
||||||
vocab_size: 30_000,
|
|
||||||
show_progress: true,
|
|
||||||
special_tokens: vec![],
|
|
||||||
words: HashMap::new(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user