mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Python - Add ability to retrieve a range of string
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
extern crate tokenizers as tk;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::*;
|
||||
|
||||
#[pyclass(dict)]
|
||||
#[repr(transparent)]
|
||||
@ -26,6 +27,34 @@ impl Encoding {
|
||||
self.encoding.get_normalized().get().to_owned()
|
||||
}
|
||||
|
||||
#[args(kwargs = "**")]
|
||||
fn get_range(
|
||||
&self,
|
||||
range: (usize, usize),
|
||||
kwargs: Option<&PyDict>,
|
||||
) -> PyResult<Option<String>> {
|
||||
let mut original = false;
|
||||
if let Some(kwargs) = kwargs {
|
||||
if let Some(koriginal) = kwargs.get_item("original") {
|
||||
original = koriginal.extract()?;
|
||||
}
|
||||
}
|
||||
|
||||
if original {
|
||||
Ok(self
|
||||
.encoding
|
||||
.get_normalized()
|
||||
.get_range_original(range.0..range.1)
|
||||
.map(|s| s.to_owned()))
|
||||
} else {
|
||||
Ok(self
|
||||
.encoding
|
||||
.get_normalized()
|
||||
.get_range(range.0..range.1)
|
||||
.map(|s| s.to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_ids(&self) -> Vec<u32> {
|
||||
self.encoding.get_ids().to_vec()
|
||||
|
Reference in New Issue
Block a user