mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Add in-place train.
This commit is contained in:
committed by
Anthony MOI
parent
ac8af63f70
commit
10a39ba6b4
@ -667,10 +667,8 @@ impl PyTokenizer {
|
||||
}
|
||||
|
||||
fn train(&mut self, trainer: &PyTrainer, files: Vec<String>) -> PyResult<()> {
|
||||
self.tokenizer = self
|
||||
.tokenizer
|
||||
.clone()
|
||||
.train(trainer, files)
|
||||
self.tokenizer
|
||||
.train_and_replace(trainer, files)
|
||||
.map_err(|e| exceptions::Exception::py_err(format!("{}", e)))?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1004,7 +1004,7 @@ where
|
||||
Ok(words)
|
||||
}
|
||||
|
||||
/// Train a model and replace our current Model, using the given Trainer
|
||||
/// Train a model and return a new Tokenizer, using the given Trainer
|
||||
pub fn train<T, TM>(
|
||||
self,
|
||||
trainer: &T,
|
||||
@ -1032,6 +1032,20 @@ where
|
||||
|
||||
Ok(new_tok)
|
||||
}
|
||||
|
||||
/// Train a model and replace our current Model, using the given Trainer
|
||||
pub fn train_and_replace<T>(&mut self, trainer: &T, files: Vec<String>) -> Result<()>
|
||||
where
|
||||
T: Trainer<Model = M> + Sync,
|
||||
{
|
||||
let words = self.word_count(trainer, files)?;
|
||||
|
||||
let (model, special_tokens) = trainer.train(words)?;
|
||||
self.model = model;
|
||||
self.add_special_tokens(&special_tokens);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, N, PT, PP, D> std::str::FromStr for TokenizerImpl<M, N, PT, PP, D>
|
||||
|
Reference in New Issue
Block a user