Python - Provide mapping to original offsets

As requested on #81
This commit is contained in:
Anthony MOI
2020-02-05 13:33:19 -05:00
parent 42c4691e4d
commit 8decd020cb
2 changed files with 74 additions and 46 deletions

View File

@ -7,6 +7,43 @@ use pyo3::types::*;
use pyo3::{PyMappingProtocol, PyObjectProtocol};
use tk::tokenizer::PaddingDirection;
fn get_range(item: PyObject, max_len: usize) -> PyResult<std::ops::Range<usize>> {
let gil = Python::acquire_gil();
let py = gil.python();
let slice = if let Ok(index) = item.extract::<isize>(py) {
if index >= max_len as isize || index < -(max_len as isize) {
Err(exceptions::IndexError::py_err("Index out of bounds"))
} else {
Ok(if index == -1 {
PySlice::new(py, index, max_len as isize, 1)
} else {
PySlice::new(py, index, index + 1, 1)
})
}
} else if let Ok(slice) = item.cast_as::<PySlice>(py) {
Ok(slice)
} else if let Ok(offset) = item.cast_as::<PyTuple>(py) {
if offset.len() == 2 {
let start = offset.get_item(0).extract::<isize>()?;
let end = offset.get_item(1).extract::<isize>()?;
Ok(PySlice::new(py, start, end, 1))
} else {
Err(exceptions::TypeError::py_err("Expected Tuple[int, int]"))
}
} else {
Err(exceptions::TypeError::py_err(
"Expected number or slice or Tuple[int, int]",
))
}?;
// Find out range from the slice
let len: std::os::raw::c_long = (max_len as i32) as _;
let PySliceIndices { start, stop, .. } = slice.indices(len)?;
Ok(start as usize..stop as usize)
}
enum IndexableStringType {
Original,
Normalized,
@ -18,7 +55,19 @@ pub struct IndexableString {
t: IndexableStringType,
}
#[pymethods]
impl IndexableString {}
impl IndexableString {
fn offsets(&self, item: PyObject) -> PyResult<Option<(usize, usize)>> {
let range = get_range(item, self.s.len())?;
match self.t {
IndexableStringType::Original => Ok(self
.s
.get_original_offsets(range)
.map(|range| (range.start, range.end))),
IndexableStringType::Normalized => Ok(Some((range.start, range.end))),
}
}
}
#[pyproto]
impl PyObjectProtocol for IndexableString {
@ -40,40 +89,8 @@ impl PyObjectProtocol for IndexableString {
#[pyproto]
impl PyMappingProtocol for IndexableString {
fn __getitem__(&self, item: PyObject) -> PyResult<String> {
let gil = Python::acquire_gil();
let py = gil.python();
// Make a slice from a number or get a slice directly
let slice = if let Ok(index) = item.extract::<isize>(py) {
if index >= self.s.len() as isize || index < -(self.s.len() as isize) {
Err(exceptions::IndexError::py_err("Index out of bounds"))
} else {
Ok(if index == -1 {
PySlice::new(py, index, self.s.len() as isize, 1)
} else {
PySlice::new(py, index, index + 1, 1)
})
}
} else if let Ok(slice) = item.cast_as::<PySlice>(py) {
Ok(slice)
} else if let Ok(offset) = item.cast_as::<PyTuple>(py) {
if offset.len() == 2 {
let start = offset.get_item(0).extract::<isize>()?;
let end = offset.get_item(1).extract::<isize>()?;
Ok(PySlice::new(py, start, end, 1))
} else {
Err(exceptions::TypeError::py_err("Expected Tuple[int, int]"))
}
} else {
Err(exceptions::TypeError::py_err(
"Expected number or slice or Tuple[int, int]",
))
}?;
// Find out range from the slice
let len: std::os::raw::c_long = (self.s.len() as i32) as _;
let PySliceIndices { start, stop, .. } = slice.indices(len)?;
let range = start as usize..stop as usize;
// Find out the range
let range = get_range(item, self.s.len())?;
// Get the range from the relevant string
let s = match self.t {