Python - Custom PreTokenizer backbone

This commit is contained in:
Anthony MOI
2019-11-23 23:59:33 -05:00
parent 891fc12de2
commit bd1aa80d8a

View File

@ -7,6 +7,16 @@ use pyo3::prelude::*;
pub struct PreTokenizer { pub struct PreTokenizer {
pub pretok: Container<dyn tk::tokenizer::PreTokenizer + Sync>, pub pretok: Container<dyn tk::tokenizer::PreTokenizer + Sync>,
} }
#[pymethods]
impl PreTokenizer {
#[staticmethod]
fn from_python(pretok: PyObject) -> PyResult<Self> {
let py_pretok = PyPreTokenizer::new(pretok)?;
Ok(PreTokenizer {
pretok: Container::Owned(Box::new(py_pretok)),
})
}
}
#[pyclass] #[pyclass]
pub struct ByteLevel {} pub struct ByteLevel {}
@ -19,3 +29,21 @@ impl ByteLevel {
}) })
} }
} }
/// Attempt at providing Python the ability to give its own PreTokenizer
struct PyPreTokenizer {
class: PyObject,
}
impl PyPreTokenizer {
pub fn new(class: PyObject) -> PyResult<Self> {
// test the given PyObject
Ok(PyPreTokenizer { class })
}
}
impl tk::tokenizer::PreTokenizer for PyPreTokenizer {
fn pre_tokenize(&self, sentence: &str) -> Vec<String> {
unimplemented!()
}
}