mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Train Model in place
This let us keep everything that was set on the model except from the vocabulary when trained. For example, this let us keep the configured `unk_token` of BPE when its trained.
This commit is contained in:
@ -84,11 +84,13 @@ impl Trainer for PyTrainer {
|
||||
self.trainer.should_show_progress()
|
||||
}
|
||||
|
||||
fn train(&self, words: HashMap<String, u32>) -> tk::Result<(PyModel, Vec<tk::AddedToken>)> {
|
||||
self.trainer.train(words).map(|(m, t)| {
|
||||
let m = PyModel { model: Arc::new(m) };
|
||||
(m, t)
|
||||
})
|
||||
fn train(
|
||||
&self,
|
||||
words: HashMap<String, u32>,
|
||||
model: &mut PyModel,
|
||||
) -> tk::Result<Vec<tk::AddedToken>> {
|
||||
todo!("FIX THIS");
|
||||
self.trainer.train(words, &mut model.model)
|
||||
}
|
||||
|
||||
fn process_tokens(&self, words: &mut HashMap<String, u32>, tokens: Vec<String>) {
|
||||
|
Reference in New Issue
Block a user