Give error when initializing tokenizer with too high stride (#1306)

* Split `get_n_added_tokens` into separate method

* Modify `TokenizerImpl.with_truncation()` to raise an error if given bad parameters

* Return Python error if `tokenizer.with_truncation()` fails

* Add dummy variable assignment for `no_truncation()` case

* Unrelated fmt fix.

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
Connor Boyle
2023-07-28 00:16:44 -07:00
committed by GitHub
parent bb38f390a6
commit c2664ae13f
3 changed files with 38 additions and 17 deletions

View File

@ -712,15 +712,16 @@ impl PyTokenizer {
}
}
self.tokenizer.with_truncation(Some(params));
if let Err(error_message) = self.tokenizer.with_truncation(Some(params)) {
return Err(PyError(error_message.to_string()).into_pyerr::<exceptions::PyValueError>());
}
Ok(())
}
/// Disable truncation
#[pyo3(text_signature = "(self)")]
fn no_truncation(&mut self) {
self.tokenizer.with_truncation(None);
let _ = self.tokenizer.with_truncation(None);
}
/// Get the currently set truncation parameters