Python - Can add tokens

This commit is contained in:
Anthony MOI
2019-12-16 18:45:26 -05:00
parent f92e73b8f3
commit 1a90cc96e5

View File

@ -122,6 +122,31 @@ impl Tokenizer {
self.tokenizer.id_to_token(id)
}
fn add_tokens(&mut self, tokens: &PyList) -> PyResult<usize> {
let tokens = tokens
.into_iter()
.map(|token| {
if let Ok(content) = token.extract::<String>() {
Ok(tk::tokenizer::AddedToken {
content,
..Default::default()
})
} else if let Ok((content, single_word)) = token.extract::<(String, bool)>() {
Ok(tk::tokenizer::AddedToken {
content,
single_word,
})
} else {
Err(exceptions::Exception::py_err(
"Input must be a list[str] or list[(str, bool)]",
))
}
})
.collect::<PyResult<Vec<_>>>()?;
Ok(self.tokenizer.add_tokens(&tokens))
}
fn train(&mut self, trainer: &Trainer, files: Vec<String>) -> PyResult<()> {
trainer.trainer.execute(|trainer| {
if let Err(e) = self.tokenizer.train(trainer, files) {