Add Sequence Normalizer

This commit is contained in:
Anthony MOI
2020-01-06 21:03:05 -05:00
parent 5c02bbbc4c
commit 185b6f0b8b
6 changed files with 70 additions and 0 deletions

View File

@ -1,6 +1,7 @@
extern crate tokenizers as tk;
use super::utils::Container;
use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::*;
@ -92,3 +93,31 @@ impl NFKC {
})
}
}
#[pyclass]
pub struct Sequence {}
#[pymethods]
impl Sequence {
#[staticmethod]
fn new(normalizers: &PyList) -> PyResult<Normalizer> {
let normalizers = normalizers
.iter()
.map(|n| {
let normalizer: &mut Normalizer = n.extract()?;
if let Some(normalizer) = normalizer.normalizer.to_pointer() {
Ok(normalizer)
} else {
Err(exceptions::Exception::py_err(
"At least one normalizer is already being used in another Tokenizer",
))
}
})
.collect::<PyResult<_>>()?;
Ok(Normalizer {
normalizer: Container::Owned(Box::new(tk::normalizers::utils::Sequence::new(
normalizers,
))),
})
}
}