Python - Rewrite PyDecoder and PyPreTokenizer

This commit is contained in:
Anthony MOI
2019-12-13 12:20:25 -05:00
parent 2a0ad97809
commit 7cf4b3a6cd
2 changed files with 31 additions and 68 deletions

View File

@ -55,46 +55,27 @@ struct PyDecoder {
impl PyDecoder {
pub fn new(class: PyObject) -> PyResult<Self> {
let decoder = PyDecoder { class };
// Quickly test the PyDecoder
decoder._decode(vec![
"This".into(),
"is".into(),
"a".into(),
"sentence".into(),
])?;
Ok(decoder)
}
fn _decode(&self, tokens: Vec<String>) -> PyResult<String> {
let gil = Python::acquire_gil();
let py = gil.python();
let args = PyTuple::new(py, &[tokens]);
let res = self.class.call_method(py, "decode", args, None)?;
let decoded = res
.cast_as::<PyString>(py)
.map_err(|_| exceptions::TypeError::py_err("`decode` is expected to return a str"))?;
Ok(decoded.to_string()?.into_owned())
Ok(PyDecoder { class })
}
}
impl tk::tokenizer::Decoder for PyDecoder {
fn decode(&self, tokens: Vec<String>) -> String {
match self._decode(tokens) {
Ok(res) => res,
Err(e) => {
fn decode(&self, tokens: Vec<String>) -> Result<String> {
let gil = Python::acquire_gil();
let py = gil.python();
e.print(py);
// Return an empty string as fallback
String::from("")
}
let args = PyTuple::new(py, &[tokens]);
match self.class.call_method(py, "decode", args, None) {
Ok(res) => Ok(res
.cast_as::<PyString>(py)
.map_err(|_| PyError::from("`decode` is expected to return a str"))?
.to_string()
.map_err(|_| PyError::from("`decode` is expected to return a str"))?
.into_owned()),
Err(e) => Err(Box::new(PyError(format!(
"Error while calling `decode`: {:?}",
e
)))),
}
}
}

View File

@ -66,44 +66,26 @@ struct PyPreTokenizer {
impl PyPreTokenizer {
pub fn new(class: PyObject) -> PyResult<Self> {
let pretok = PyPreTokenizer { class };
// Quickly test the PyPreTokenizer
pretok._pre_tokenize("This is a test sentence")?;
Ok(pretok)
}
fn _pre_tokenize(&self, sentence: &str) -> PyResult<Vec<String>> {
let gil = Python::acquire_gil();
let py = gil.python();
let args = PyTuple::new(py, &[sentence]);
let res = self.class.call_method(py, "pre_tokenize", args, None)?;
let tokens = res.cast_as::<PyList>(py).map_err(|_| {
exceptions::TypeError::py_err("`pre_tokenize` is expected to return a List[str]`")
})?;
let tokens: Vec<String> = tokens.extract().map_err(|_| {
exceptions::TypeError::py_err("`pre_tokenize` is expected to return a List[str]`")
})?;
Ok(tokens)
Ok(PyPreTokenizer { class })
}
}
impl tk::tokenizer::PreTokenizer for PyPreTokenizer {
fn pre_tokenize(&self, sentence: &str) -> Vec<String> {
match self._pre_tokenize(sentence) {
Ok(res) => res,
Err(e) => {
fn pre_tokenize(&self, sentence: &str) -> Result<Vec<String>> {
let gil = Python::acquire_gil();
let py = gil.python();
e.print(py);
// Return an empty Vec as fallback
vec![]
}
let args = PyTuple::new(py, &[sentence]);
match self.class.call_method(py, "pre_tokenize", args, None) {
Ok(res) => Ok(res
.cast_as::<PyList>(py)
.map_err(|_| PyError::from("`pre_tokenize is expected to return a List[str]"))?
.extract::<Vec<String>>()
.map_err(|_| PyError::from("`pre_tokenize` is expected to return a List[str]"))?),
Err(e) => Err(Box::new(PyError(format!(
"Error while calling `pre_tokenize`: {:?}",
e
)))),
}
}
}