mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Train Model in place
This let us keep everything that was set on the model except from the vocabulary when trained. For example, this let us keep the configured `unk_token` of BPE when its trained.
This commit is contained in:
@ -1044,7 +1044,7 @@ impl PyTokenizer {
|
||||
let trainer =
|
||||
trainer.map_or_else(|| self.tokenizer.get_model().get_trainer(), |t| t.clone());
|
||||
Python::with_gil(|py| {
|
||||
py.allow_threads(|| ToPyResult(self.tokenizer.train_and_replace(&trainer, files)).into())
|
||||
py.allow_threads(|| ToPyResult(self.tokenizer.train(&trainer, files)).into())
|
||||
})
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user