mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Add Sequence Normalizer
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
extern crate tokenizers as tk;
|
||||
|
||||
use super::utils::Container;
|
||||
use pyo3::exceptions;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::*;
|
||||
|
||||
@ -92,3 +93,31 @@ impl NFKC {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
pub struct Sequence {}
|
||||
#[pymethods]
|
||||
impl Sequence {
|
||||
#[staticmethod]
|
||||
fn new(normalizers: &PyList) -> PyResult<Normalizer> {
|
||||
let normalizers = normalizers
|
||||
.iter()
|
||||
.map(|n| {
|
||||
let normalizer: &mut Normalizer = n.extract()?;
|
||||
if let Some(normalizer) = normalizer.normalizer.to_pointer() {
|
||||
Ok(normalizer)
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"At least one normalizer is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
})
|
||||
.collect::<PyResult<_>>()?;
|
||||
|
||||
Ok(Normalizer {
|
||||
normalizer: Container::Owned(Box::new(tk::normalizers::utils::Sequence::new(
|
||||
normalizers,
|
||||
))),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user