Python - Add pre_tokenizers module

This commit is contained in:
Anthony MOI
2019-11-22 20:56:50 -05:00
parent 39a6d04c53
commit f6a9b57b5b
2 changed files with 30 additions and 0 deletions

View File

@ -1,4 +1,5 @@
mod models;
mod pre_tokenizers;
mod token;
mod tokenizer;
mod utils;
@ -14,10 +15,18 @@ pub fn models(_py: Python, m: &PyModule) -> PyResult<()> {
Ok(())
}
/// PreTokenizers Module
#[pymodule]
pub fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<pre_tokenizers::ByteLevel>()?;
Ok(())
}
/// Tokenizers Module
#[pymodule]
fn tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<tokenizer::Tokenizer>()?;
m.add_wrapped(wrap_pymodule!(models))?;
m.add_wrapped(wrap_pymodule!(pre_tokenizers))?;
Ok(())
}

View File

@ -0,0 +1,21 @@
extern crate tokenizers as tk;
use super::utils::Container;
use pyo3::prelude::*;
#[pyclass]
pub struct PreTokenizer {
pub pretok: Container<dyn tk::tokenizer::PreTokenizer + Sync>,
}
#[pyclass]
pub struct ByteLevel {}
#[pymethods]
impl ByteLevel {
#[staticmethod]
fn new() -> PyResult<PreTokenizer> {
Ok(PreTokenizer {
pretok: Container::Owned(Box::new(tk::pre_tokenizers::byte_level::ByteLevel)),
})
}
}