mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-06 20:58:22 +00:00
Python - Expose Whitespace PreTokenizer
This commit is contained in:
@@ -35,6 +35,7 @@ fn models(_py: Python, m: &PyModule) -> PyResult<()> {
|
|||||||
fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||||
m.add_class::<pre_tokenizers::PreTokenizer>()?;
|
m.add_class::<pre_tokenizers::PreTokenizer>()?;
|
||||||
m.add_class::<pre_tokenizers::ByteLevel>()?;
|
m.add_class::<pre_tokenizers::ByteLevel>()?;
|
||||||
|
m.add_class::<pre_tokenizers::Whitespace>()?;
|
||||||
m.add_class::<pre_tokenizers::BertPreTokenizer>()?;
|
m.add_class::<pre_tokenizers::BertPreTokenizer>()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,6 +52,18 @@ impl ByteLevel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[pyclass]
|
||||||
|
pub struct Whitespace {}
|
||||||
|
#[pymethods]
|
||||||
|
impl Whitespace {
|
||||||
|
#[staticmethod]
|
||||||
|
fn new() -> PyResult<PreTokenizer> {
|
||||||
|
Ok(PreTokenizer {
|
||||||
|
pretok: Container::Owned(Box::new(tk::pre_tokenizers::whitespace::Whitespace)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[pyclass]
|
#[pyclass]
|
||||||
pub struct BertPreTokenizer {}
|
pub struct BertPreTokenizer {}
|
||||||
#[pymethods]
|
#[pymethods]
|
||||||
|
|||||||
Reference in New Issue
Block a user