Support None to reset pre_tokenizers and normalizers, and index sequences (#1590)

* initial commit * support None * fix clippy * cleanup * clean? * propagate to pre_tokenizer * fix test * fix rust tests * fix node * propagate to decoder and post processor * fix calls * lint * fmt * node be happy I am fixing you * initial commit * support None * fix clippy * cleanup * clean? * propagate to pre_tokenizer * fix test * fix rust tests * fix node * propagate to decoder and post processor * fix calls * lint * fmt * node be happy I am fixing you * add a small test * styling * style merge * fix merge test * fmt * nits * update tset
2025-08-23 00:35:35 +00:00 · 2024-08-07 12:52:35 +02:00
parent eea8e1ae6f
commit bded212356
13 changed files with 134 additions and 67 deletions
--- a/bindings/python/src/normalizers.rs
+++ b/bindings/python/src/normalizers.rs
@ -1,8 +1,6 @@
-use std::sync::{Arc, RwLock};
-
-use pyo3::exceptions;
-use pyo3::prelude::*;
 use pyo3::types::*;
+use pyo3::{exceptions, prelude::*};
+use std::sync::{Arc, RwLock};

 use crate::error::ToPyResult;
 use crate::utils::{PyNormalizedString, PyNormalizedStringRefMut, PyPattern};
@ -354,6 +352,7 @@ impl PyNFKC {
 ///         A list of Normalizer to be run as a sequence
 #[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Sequence")]
 pub struct PySequence {}
+
 #[pymethods]
 impl PySequence {
    #[new]
@ -380,6 +379,22 @@ impl PySequence {
    fn __len__(&self) -> usize {
        0
    }
+
+    fn __getitem__(self_: PyRef<'_, Self>, py: Python<'_>, index: usize) -> PyResult<Py<PyAny>> {
+        match &self_.as_ref().normalizer {
+            PyNormalizerTypeWrapper::Sequence(inner) => match inner.get(index) {
+                Some(item) => PyNormalizer::new(PyNormalizerTypeWrapper::Single(Arc::clone(item)))
+                    .get_as_subtype(py),
+                _ => Err(PyErr::new::<pyo3::exceptions::PyIndexError, _>(
+                    "Index not found",
+                )),
+            },
+            PyNormalizerTypeWrapper::Single(inner) => {
+                PyNormalizer::new(PyNormalizerTypeWrapper::Single(Arc::clone(inner)))
+                    .get_as_subtype(py)
+            }
+        }
+    }
 }

 /// Lowercase Normalizer