From f6a9b57b5b6167c481e3eb115bac70b51c743371 Mon Sep 17 00:00:00 2001 From: Anthony MOI Date: Fri, 22 Nov 2019 20:56:50 -0500 Subject: [PATCH] Python - Add pre_tokenizers module --- bindings/python/src/lib.rs | 9 +++++++++ bindings/python/src/pre_tokenizers.rs | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 bindings/python/src/pre_tokenizers.rs diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index b1642229..e558c9b0 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -1,4 +1,5 @@ mod models; +mod pre_tokenizers; mod token; mod tokenizer; mod utils; @@ -14,10 +15,18 @@ pub fn models(_py: Python, m: &PyModule) -> PyResult<()> { Ok(()) } +/// PreTokenizers Module +#[pymodule] +pub fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} + /// Tokenizers Module #[pymodule] fn tokenizers(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_wrapped(wrap_pymodule!(models))?; + m.add_wrapped(wrap_pymodule!(pre_tokenizers))?; Ok(()) } diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs new file mode 100644 index 00000000..26eadc68 --- /dev/null +++ b/bindings/python/src/pre_tokenizers.rs @@ -0,0 +1,21 @@ +extern crate tokenizers as tk; + +use super::utils::Container; +use pyo3::prelude::*; + +#[pyclass] +pub struct PreTokenizer { + pub pretok: Container, +} + +#[pyclass] +pub struct ByteLevel {} +#[pymethods] +impl ByteLevel { + #[staticmethod] + fn new() -> PyResult { + Ok(PreTokenizer { + pretok: Container::Owned(Box::new(tk::pre_tokenizers::byte_level::ByteLevel)), + }) + } +}