mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Python - Expose BpeTrainer options
This commit is contained in:
@ -25,13 +25,19 @@ impl BpeTrainer {
|
||||
for (key, val) in kwargs {
|
||||
let key: &str = key.extract()?;
|
||||
match key {
|
||||
"vocab_size" => {
|
||||
let size: usize = val.extract()?;
|
||||
trainer.vocab_size = size;
|
||||
}
|
||||
"min_frequency" => {
|
||||
let freq: u32 = val.extract()?;
|
||||
trainer.min_frequency = freq;
|
||||
"vocab_size" => trainer.vocab_size = val.extract()?,
|
||||
"min_frequency" => trainer.min_frequency = val.extract()?,
|
||||
"show_progress" => trainer.show_progress = val.extract()?,
|
||||
"special_tokens" => trainer.special_tokens = val.extract()?,
|
||||
"limit_alphabet" => trainer.limit_alphabet = val.extract()?,
|
||||
"initial_alphabet" => {
|
||||
let alphabet: Vec<String> = val.extract()?;
|
||||
trainer.initial_alphabet = alphabet
|
||||
.into_iter()
|
||||
.map(|s| s.chars().nth(0))
|
||||
.filter(|c| c.is_some())
|
||||
.map(|c| c.unwrap())
|
||||
.collect();
|
||||
}
|
||||
_ => println!("Ignored unknown kwargs option {}", key),
|
||||
};
|
||||
|
Reference in New Issue
Block a user