diff --git a/bindings/python/src/encoding.rs b/bindings/python/src/encoding.rs index 17954814..f5d5e9d0 100644 --- a/bindings/python/src/encoding.rs +++ b/bindings/python/src/encoding.rs @@ -1,6 +1,7 @@ extern crate tokenizers as tk; use pyo3::prelude::*; +use pyo3::types::*; #[pyclass(dict)] #[repr(transparent)] @@ -26,6 +27,34 @@ impl Encoding { self.encoding.get_normalized().get().to_owned() } + #[args(kwargs = "**")] + fn get_range( + &self, + range: (usize, usize), + kwargs: Option<&PyDict>, + ) -> PyResult> { + let mut original = false; + if let Some(kwargs) = kwargs { + if let Some(koriginal) = kwargs.get_item("original") { + original = koriginal.extract()?; + } + } + + if original { + Ok(self + .encoding + .get_normalized() + .get_range_original(range.0..range.1) + .map(|s| s.to_owned())) + } else { + Ok(self + .encoding + .get_normalized() + .get_range(range.0..range.1) + .map(|s| s.to_owned())) + } + } + #[getter] fn get_ids(&self) -> Vec { self.encoding.get_ids().to_vec()