mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Python - Add pre_tokenizers module
This commit is contained in:
@ -1,4 +1,5 @@
|
|||||||
mod models;
|
mod models;
|
||||||
|
mod pre_tokenizers;
|
||||||
mod token;
|
mod token;
|
||||||
mod tokenizer;
|
mod tokenizer;
|
||||||
mod utils;
|
mod utils;
|
||||||
@ -14,10 +15,18 @@ pub fn models(_py: Python, m: &PyModule) -> PyResult<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// PreTokenizers Module
|
||||||
|
#[pymodule]
|
||||||
|
pub fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||||
|
m.add_class::<pre_tokenizers::ByteLevel>()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Tokenizers Module
|
/// Tokenizers Module
|
||||||
#[pymodule]
|
#[pymodule]
|
||||||
fn tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
fn tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||||
m.add_class::<tokenizer::Tokenizer>()?;
|
m.add_class::<tokenizer::Tokenizer>()?;
|
||||||
m.add_wrapped(wrap_pymodule!(models))?;
|
m.add_wrapped(wrap_pymodule!(models))?;
|
||||||
|
m.add_wrapped(wrap_pymodule!(pre_tokenizers))?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
21
bindings/python/src/pre_tokenizers.rs
Normal file
21
bindings/python/src/pre_tokenizers.rs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
extern crate tokenizers as tk;
|
||||||
|
|
||||||
|
use super::utils::Container;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
|
||||||
|
#[pyclass]
|
||||||
|
pub struct PreTokenizer {
|
||||||
|
pub pretok: Container<dyn tk::tokenizer::PreTokenizer + Sync>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass]
|
||||||
|
pub struct ByteLevel {}
|
||||||
|
#[pymethods]
|
||||||
|
impl ByteLevel {
|
||||||
|
#[staticmethod]
|
||||||
|
fn new() -> PyResult<PreTokenizer> {
|
||||||
|
Ok(PreTokenizer {
|
||||||
|
pretok: Container::Owned(Box::new(tk::pre_tokenizers::byte_level::ByteLevel)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user