PyO3 0.21. (#1494)

* PyO3 0.21. * Upgraded everything. * Rustfmt.
2025-08-23 00:35:35 +00:00 · 2024-04-16 13:49:52 +02:00
parent 914576f7ed
commit d5a8cc7a49
16 changed files with 180 additions and 197 deletions
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@ -9,24 +9,24 @@ name = "tokenizers"
 crate-type = ["cdylib"]
 [dependencies]
-rayon = "1.8"
+rayon = "1.10"
 serde = { version = "1.0", features = [ "rc", "derive" ]}
 serde_json = "1.0"
 libc = "0.2"
-env_logger = "0.10.0"
+env_logger = "0.11"
-pyo3 = { version = "0.20" }
+pyo3 = { version = "0.21" }
-numpy = "0.20.0"
+numpy = "0.21"
 ndarray = "0.15"
 onig = { version = "6.4", default-features = false }
-itertools = "0.11"
+itertools = "0.12"
 [dependencies.tokenizers]
 version = "0.16.0-dev.0"
 path = "../../tokenizers"
 [dev-dependencies]
-tempfile = "3.8"
+tempfile = "3.10"
-pyo3 = { version = "0.20", features = ["auto-initialize"] }
+pyo3 = { version = "0.21", features = ["auto-initialize"] }
 [features]
 defaut = ["pyo3/extension-module"]
--- a/bindings/python/src/decoders.rs
+++ b/bindings/python/src/decoders.rs
@ -1,7 +1,6 @@
 use std::sync::{Arc, RwLock};
 use crate::pre_tokenizers::from_string;
 use crate::utils::PyChar;
 use crate::utils::PyPattern;
 use pyo3::exceptions;
 use pyo3::prelude::*;
@ -85,7 +84,7 @@ impl PyDecoder {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -161,7 +160,7 @@ pub struct PyByteLevelDec {}
 impl PyByteLevelDec {
    #[new]
    #[pyo3(signature = (**_kwargs), text_signature = "(self)")]
-    fn new(_kwargs: Option<&PyDict>) -> (Self, PyDecoder) {
+    fn new(_kwargs: Option<&Bound<'_, PyDict>>) -> (Self, PyDecoder) {
        (PyByteLevelDec {}, ByteLevel::default().into())
    }
 }
@ -318,8 +317,8 @@ impl PyMetaspaceDec {
    }
    #[setter]
-    fn set_replacement(self_: PyRef<Self>, replacement: PyChar) {
+    fn set_replacement(self_: PyRef<Self>, replacement: char) {
-        setter!(self_, Metaspace, @set_replacement, replacement.0);
+        setter!(self_, Metaspace, @set_replacement, replacement);
    }
    #[getter]
@ -352,16 +351,12 @@ impl PyMetaspaceDec {
    }
    #[new]
-    #[pyo3(signature = (replacement = PyChar('▁'), prepend_scheme = String::from("always"), split = true), text_signature = "(self, replacement = \"▁\",  prepend_scheme = \"always\", split = True)")]
+    #[pyo3(signature = (replacement = '▁', prepend_scheme = String::from("always"), split = true), text_signature = "(self, replacement = \"▁\",  prepend_scheme = \"always\", split = True)")]
-    fn new(
+    fn new(replacement: char, prepend_scheme: String, split: bool) -> PyResult<(Self, PyDecoder)> {
        replacement: PyChar,
        prepend_scheme: String,
        split: bool,
    ) -> PyResult<(Self, PyDecoder)> {
        let prepend_scheme = from_string(prepend_scheme)?;
        Ok((
            PyMetaspaceDec {},
-            Metaspace::new(replacement.0, prepend_scheme, split).into(),
+            Metaspace::new(replacement, prepend_scheme, split).into(),
        ))
    }
 }
@ -463,7 +458,7 @@ pub struct PySequenceDecoder {}
 impl PySequenceDecoder {
    #[new]
    #[pyo3(signature = (decoders_py), text_signature = "(self, decoders)")]
-    fn new(decoders_py: &PyList) -> PyResult<(Self, PyDecoder)> {
+    fn new(decoders_py: &Bound<'_, PyList>) -> PyResult<(Self, PyDecoder)> {
        let mut decoders: Vec<DecoderWrapper> = Vec::with_capacity(decoders_py.len());
        for decoder_py in decoders_py.iter() {
            let decoder: PyRef<PyDecoder> = decoder_py.extract()?;
@ -476,8 +471,8 @@ impl PySequenceDecoder {
        Ok((PySequenceDecoder {}, Sequence::new(decoders).into()))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [PyList::empty(py)])
+        PyTuple::new_bound(py, [PyList::empty_bound(py)])
    }
 }
@ -497,7 +492,7 @@ impl Decoder for CustomDecoder {
        Python::with_gil(|py| {
            let decoded = self
                .inner
-                .call_method(py, "decode", (tokens,), None)?
+                .call_method_bound(py, "decode", (tokens,), None)?
                .extract(py)?;
            Ok(decoded)
        })
@ -507,7 +502,7 @@ impl Decoder for CustomDecoder {
        Python::with_gil(|py| {
            let decoded = self
                .inner
-                .call_method(py, "decode_chain", (tokens,), None)?
+                .call_method_bound(py, "decode_chain", (tokens,), None)?
                .extract(py)?;
            Ok(decoded)
        })
@ -572,7 +567,7 @@ impl Decoder for PyDecoderWrapper {
 /// Decoders Module
 #[pymodule]
-pub fn decoders(_py: Python, m: &PyModule) -> PyResult<()> {
+pub fn decoders(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyDecoder>()?;
    m.add_class::<PyByteLevelDec>()?;
    m.add_class::<PyReplaceDec>()?;
@ -602,7 +597,7 @@ mod test {
        Python::with_gil(|py| {
            let py_dec = PyDecoder::new(Metaspace::default().into());
            let py_meta = py_dec.get_as_subtype(py).unwrap();
-            assert_eq!("Metaspace", py_meta.as_ref(py).get_type().name().unwrap());
+            assert_eq!("Metaspace", py_meta.bind(py).get_type().qualname().unwrap());
        })
    }
--- a/bindings/python/src/encoding.rs
+++ b/bindings/python/src/encoding.rs
@ -37,7 +37,7 @@ impl PyEncoding {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -391,10 +391,10 @@ impl PyEncoding {
    #[pyo3(
        text_signature = "(self, length, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]')"
    )]
-    fn pad(&mut self, length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
+    fn pad(&mut self, length: usize, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<()> {
        let mut pad_id = 0;
        let mut pad_type_id = 0;
-        let mut pad_token = "[PAD]";
+        let mut pad_token = "[PAD]".to_string();
        let mut direction = PaddingDirection::Right;
        if let Some(kwargs) = kwargs {
@ -422,7 +422,7 @@ impl PyEncoding {
            }
        }
        self.encoding
-            .pad(length, pad_id, pad_type_id, pad_token, direction);
+            .pad(length, pad_id, pad_type_id, &pad_token, direction);
        Ok(())
    }
--- a/bindings/python/src/error.rs
+++ b/bindings/python/src/error.rs
@ -35,7 +35,7 @@ impl<T> ToPyResult<T> {
 }
 pub(crate) fn deprecation_warning(py: Python<'_>, version: &str, message: &str) -> PyResult<()> {
-    let deprecation_warning = py.import("builtins")?.getattr("DeprecationWarning")?;
+    let deprecation_warning = py.import_bound("builtins")?.getattr("DeprecationWarning")?;
    let full_message = format!("Deprecated in {}: {}", version, message);
-    pyo3::PyErr::warn(py, deprecation_warning, &full_message, 0)
+    pyo3::PyErr::warn_bound(py, &deprecation_warning, &full_message, 0)
 }
--- a/bindings/python/src/lib.rs
+++ b/bindings/python/src/lib.rs
@ -47,7 +47,7 @@ extern "C" fn child_after_fork() {
 /// Tokenizers Module
 #[pymodule]
-pub fn tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
+pub fn tokenizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
    let _ = env_logger::try_init_from_env("TOKENIZERS_LOG");
    // Register the fork callback
--- a/bindings/python/src/models.rs
+++ b/bindings/python/src/models.rs
@ -105,7 +105,7 @@ impl PyModel {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -260,7 +260,10 @@ impl PyModel {
 pub struct PyBPE {}
 impl PyBPE {
-    fn with_builder(mut builder: BpeBuilder, kwargs: Option<&PyDict>) -> PyResult<(Self, PyModel)> {
+    fn with_builder(
        mut builder: BpeBuilder,
        kwargs: Option<&Bound<'_, PyDict>>,
    ) -> PyResult<(Self, PyModel)> {
        if let Some(kwargs) = kwargs {
            for (key, value) in kwargs {
                let key: &str = key.extract()?;
@ -321,14 +324,14 @@ macro_rules! setter {
 }
 #[derive(FromPyObject)]
-enum PyVocab<'a> {
+enum PyVocab {
    Vocab(Vocab),
-    Filename(&'a str),
+    Filename(String),
 }
 #[derive(FromPyObject)]
-enum PyMerges<'a> {
+enum PyMerges {
    Merges(Merges),
-    Filename(&'a str),
+    Filename(String),
 }
 #[pymethods]
@ -417,7 +420,7 @@ impl PyBPE {
        py: Python<'_>,
        vocab: Option<PyVocab>,
        merges: Option<PyMerges>,
-        kwargs: Option<&PyDict>,
+        kwargs: Option<&Bound<'_, PyDict>>,
    ) -> PyResult<(Self, PyModel)> {
        if (vocab.is_some() && merges.is_none()) || (vocab.is_none() && merges.is_some()) {
            return Err(exceptions::PyValueError::new_err(
@ -502,11 +505,11 @@ impl PyBPE {
    #[pyo3(signature = (vocab, merges, **kwargs))]
    #[pyo3(text_signature = "(cls, vocab, merge, **kwargs)")]
    fn from_file(
-        _cls: &PyType,
+        _cls: &Bound<'_, PyType>,
        py: Python,
        vocab: &str,
        merges: &str,
-        kwargs: Option<&PyDict>,
+        kwargs: Option<&Bound<'_, PyDict>>,
    ) -> PyResult<Py<Self>> {
        let (vocab, merges) = BPE::read_file(vocab, merges).map_err(|e| {
            exceptions::PyException::new_err(format!("Error while reading BPE files: {}", e))
@ -540,7 +543,7 @@ pub struct PyWordPiece {}
 impl PyWordPiece {
    fn with_builder(
        mut builder: WordPieceBuilder,
-        kwargs: Option<&PyDict>,
+        kwargs: Option<&Bound<'_, PyDict>>,
    ) -> PyResult<(Self, PyModel)> {
        if let Some(kwargs) = kwargs {
            for (key, val) in kwargs {
@ -612,7 +615,7 @@ impl PyWordPiece {
    fn new(
        py: Python<'_>,
        vocab: Option<PyVocab>,
-        kwargs: Option<&PyDict>,
+        kwargs: Option<&Bound<'_, PyDict>>,
    ) -> PyResult<(Self, PyModel)> {
        let mut builder = WordPiece::builder();
@ -677,10 +680,10 @@ impl PyWordPiece {
    #[pyo3(signature = (vocab, **kwargs))]
    #[pyo3(text_signature = "(vocab, **kwargs)")]
    fn from_file(
-        _cls: &PyType,
+        _cls: &Bound<'_, PyType>,
        py: Python,
        vocab: &str,
-        kwargs: Option<&PyDict>,
+        kwargs: Option<&Bound<'_, PyDict>>,
    ) -> PyResult<Py<Self>> {
        let vocab = WordPiece::read_file(vocab).map_err(|e| {
            exceptions::PyException::new_err(format!("Error while reading WordPiece file: {}", e))
@ -796,7 +799,7 @@ impl PyWordLevel {
    #[pyo3(signature = (vocab, unk_token = None))]
    #[pyo3(text_signature = "(vocab, unk_token)")]
    fn from_file(
-        _cls: &PyType,
+        _cls: &Bound<'_, PyType>,
        py: Python,
        vocab: &str,
        unk_token: Option<String>,
@ -849,7 +852,7 @@ impl PyUnigram {
 /// Models Module
 #[pymodule]
-pub fn models(_py: Python, m: &PyModule) -> PyResult<()> {
+pub fn models(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyModel>()?;
    m.add_class::<PyBPE>()?;
    m.add_class::<PyWordPiece>()?;
@ -870,7 +873,7 @@ mod test {
        Python::with_gil(|py| {
            let py_model = PyModel::from(BPE::default());
            let py_bpe = py_model.get_as_subtype(py).unwrap();
-            assert_eq!("BPE", py_bpe.as_ref(py).get_type().name().unwrap());
+            assert_eq!("BPE", py_bpe.bind(py).get_type().qualname().unwrap());
        })
    }
--- a/bindings/python/src/normalizers.rs
+++ b/bindings/python/src/normalizers.rs
@ -113,7 +113,7 @@ impl PyNormalizer {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -345,7 +345,7 @@ pub struct PySequence {}
 impl PySequence {
    #[new]
    #[pyo3(text_signature = None)]
-    fn new(normalizers: &PyList) -> PyResult<(Self, PyNormalizer)> {
+    fn new(normalizers: &Bound<'_, PyList>) -> PyResult<(Self, PyNormalizer)> {
        let mut sequence = Vec::with_capacity(normalizers.len());
        for n in normalizers.iter() {
            let normalizer: PyRef<PyNormalizer> = n.extract()?;
@ -360,8 +360,8 @@ impl PySequence {
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [PyList::empty(py)])
+        PyTuple::new_bound(py, [PyList::empty_bound(py)])
    }
    fn __len__(&self) -> usize {
@ -467,11 +467,11 @@ pub struct PyPrecompiled {}
 impl PyPrecompiled {
    #[new]
    #[pyo3(text_signature = "(self, precompiled_charsmap)")]
-    fn new(py_precompiled_charsmap: &PyBytes) -> PyResult<(Self, PyNormalizer)> {
+    fn new(precompiled_charsmap: Vec<u8>) -> PyResult<(Self, PyNormalizer)> {
-        let precompiled_charsmap: &[u8] = FromPyObject::extract(py_precompiled_charsmap)?;
+        // let precompiled_charsmap: Vec<u8> = FromPyObject::extract(py_precompiled_charsmap)?;
        Ok((
            PyPrecompiled {},
-            Precompiled::from(precompiled_charsmap)
+            Precompiled::from(&precompiled_charsmap)
                .map_err(|e| {
                    exceptions::PyException::new_err(format!(
                        "Error while attempting to build Precompiled normalizer: {}",
@ -512,7 +512,7 @@ impl tk::tokenizer::Normalizer for CustomNormalizer {
    fn normalize(&self, normalized: &mut NormalizedString) -> tk::Result<()> {
        Python::with_gil(|py| {
            let normalized = PyNormalizedStringRefMut::new(normalized);
-            let py_normalized = self.inner.as_ref(py);
+            let py_normalized = self.inner.bind(py);
            py_normalized.call_method("normalize", (normalized.get(),), None)?;
            Ok(())
        })
@ -635,7 +635,7 @@ impl Normalizer for PyNormalizerWrapper {
 /// Normalizers Module
 #[pymodule]
-pub fn normalizers(_py: Python, m: &PyModule) -> PyResult<()> {
+pub fn normalizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyNormalizer>()?;
    m.add_class::<PyBertNormalizer>()?;
    m.add_class::<PyNFD>()?;
@ -667,7 +667,7 @@ mod test {
        Python::with_gil(|py| {
            let py_norm = PyNormalizer::new(NFC.into());
            let py_nfc = py_norm.get_as_subtype(py).unwrap();
-            assert_eq!("NFC", py_nfc.as_ref(py).get_type().name().unwrap());
+            assert_eq!("NFC", py_nfc.bind(py).get_type().qualname().unwrap());
        })
    }
--- a/bindings/python/src/pre_tokenizers.rs
+++ b/bindings/python/src/pre_tokenizers.rs
@ -118,7 +118,7 @@ impl PyPreTokenizer {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -263,7 +263,7 @@ impl PyByteLevel {
    fn new(
        add_prefix_space: bool,
        use_regex: bool,
-        _kwargs: Option<&PyDict>,
+        _kwargs: Option<&Bound<'_, PyDict>>,
    ) -> (Self, PyPreTokenizer) {
        (
            PyByteLevel {},
@ -352,8 +352,8 @@ impl PySplit {
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [" ", "removed"])
+        PyTuple::new_bound(py, [" ", "removed"])
    }
 }
@ -372,21 +372,21 @@ impl PyCharDelimiterSplit {
    }
    #[setter]
-    fn set_delimiter(self_: PyRef<Self>, delimiter: PyChar) {
+    fn set_delimiter(self_: PyRef<Self>, delimiter: char) {
-        setter!(self_, Delimiter, delimiter, delimiter.0);
+        setter!(self_, Delimiter, delimiter, delimiter);
    }
    #[new]
    #[pyo3(text_signature = None)]
-    pub fn new(delimiter: PyChar) -> PyResult<(Self, PyPreTokenizer)> {
+    pub fn new(delimiter: char) -> PyResult<(Self, PyPreTokenizer)> {
        Ok((
            PyCharDelimiterSplit {},
-            CharDelimiterSplit::new(delimiter.0).into(),
+            CharDelimiterSplit::new(delimiter).into(),
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [" "])
+        PyTuple::new_bound(py, [" "])
    }
 }
@ -430,7 +430,7 @@ pub struct PySequence {}
 impl PySequence {
    #[new]
    #[pyo3(text_signature = "(self, pretokenizers)")]
-    fn new(pre_tokenizers: &PyList) -> PyResult<(Self, PyPreTokenizer)> {
+    fn new(pre_tokenizers: &Bound<'_, PyList>) -> PyResult<(Self, PyPreTokenizer)> {
        let mut sequence = Vec::with_capacity(pre_tokenizers.len());
        for n in pre_tokenizers.iter() {
            let pretokenizer: PyRef<PyPreTokenizer> = n.extract()?;
@ -447,8 +447,8 @@ impl PySequence {
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [PyList::empty(py)])
+        PyTuple::new_bound(py, [PyList::empty_bound(py)])
    }
 }
@ -490,8 +490,8 @@ impl PyMetaspace {
    }
    #[setter]
-    fn set_replacement(self_: PyRef<Self>, replacement: PyChar) {
+    fn set_replacement(self_: PyRef<Self>, replacement: char) {
-        setter!(self_, Metaspace, @set_replacement, replacement.0);
+        setter!(self_, Metaspace, @set_replacement, replacement);
    }
    #[getter]
@ -524,15 +524,15 @@ impl PyMetaspace {
    }
    #[new]
-    #[pyo3(signature = (replacement = PyChar('▁'), prepend_scheme=String::from("always"), split=true), text_signature = "(self, replacement=\"_\", prepend_scheme=\"always\", split=True)")]
+    #[pyo3(signature = (replacement = '▁', prepend_scheme=String::from("always"), split=true), text_signature = "(self, replacement=\"_\", prepend_scheme=\"always\", split=True)")]
    fn new(
-        replacement: PyChar,
+        replacement: char,
        prepend_scheme: String,
        split: bool,
    ) -> PyResult<(Self, PyPreTokenizer)> {
        // Create a new Metaspace instance
        let prepend_scheme = from_string(prepend_scheme)?;
-        let new_instance: Metaspace = Metaspace::new(replacement.0, prepend_scheme, split);
+        let new_instance: Metaspace = Metaspace::new(replacement, prepend_scheme, split);
        Ok((PyMetaspace {}, new_instance.into()))
    }
 }
@ -599,7 +599,7 @@ impl tk::tokenizer::PreTokenizer for CustomPreTokenizer {
    fn pre_tokenize(&self, sentence: &mut PreTokenizedString) -> tk::Result<()> {
        Python::with_gil(|py| {
            let pretok = PyPreTokenizedStringRefMut::new(sentence);
-            let py_pretok = self.inner.as_ref(py);
+            let py_pretok = self.inner.bind(py);
            py_pretok.call_method("pre_tokenize", (pretok.get(),), None)?;
            Ok(())
        })
@ -722,7 +722,7 @@ impl PreTokenizer for PyPreTokenizerWrapper {
 /// PreTokenizers Module
 #[pymodule]
-pub fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
+pub fn pre_tokenizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyPreTokenizer>()?;
    m.add_class::<PyByteLevel>()?;
    m.add_class::<PyWhitespace>()?;
@ -754,7 +754,7 @@ mod test {
        Python::with_gil(|py| {
            let py_norm = PyPreTokenizer::new(Whitespace {}.into());
            let py_wsp = py_norm.get_as_subtype(py).unwrap();
-            assert_eq!("Whitespace", py_wsp.as_ref(py).get_type().name().unwrap());
+            assert_eq!("Whitespace", py_wsp.bind(py).get_type().qualname().unwrap());
        })
    }
--- a/bindings/python/src/processors.rs
+++ b/bindings/python/src/processors.rs
@ -78,7 +78,7 @@ impl PyPostProcessor {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -166,8 +166,8 @@ impl PyBertProcessing {
        )
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [("", 0), ("", 0)])
+        PyTuple::new_bound(py, [("", 0), ("", 0)])
    }
 }
@ -216,8 +216,8 @@ impl PyRobertaProcessing {
        )
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [("", 0), ("", 0)])
+        PyTuple::new_bound(py, [("", 0), ("", 0)])
    }
 }
@ -235,7 +235,10 @@ pub struct PyByteLevel {}
 impl PyByteLevel {
    #[new]
    #[pyo3(signature = (trim_offsets = None, **_kwargs), text_signature = "(self, trim_offsets=True)")]
-    fn new(trim_offsets: Option<bool>, _kwargs: Option<&PyDict>) -> (Self, PyPostProcessor) {
+    fn new(
        trim_offsets: Option<bool>,
        _kwargs: Option<&Bound<'_, PyDict>>,
    ) -> (Self, PyPostProcessor) {
        let mut byte_level = ByteLevel::default();
        if let Some(to) = trim_offsets {
@ -304,7 +307,7 @@ impl FromPyObject<'_> for PyTemplate {
            Ok(Self(
                s.try_into().map_err(exceptions::PyValueError::new_err)?,
            ))
-        } else if let Ok(s) = ob.extract::<Vec<&str>>() {
+        } else if let Ok(s) = ob.extract::<Vec<String>>() {
            Ok(Self(
                s.try_into().map_err(exceptions::PyValueError::new_err)?,
            ))
@ -424,7 +427,7 @@ pub struct PySequence {}
 impl PySequence {
    #[new]
    #[pyo3(signature = (processors_py), text_signature = "(self, processors)")]
-    fn new(processors_py: &PyList) -> (Self, PyPostProcessor) {
+    fn new(processors_py: &Bound<'_, PyList>) -> (Self, PyPostProcessor) {
        let mut processors: Vec<PostProcessorWrapper> = Vec::with_capacity(processors_py.len());
        for n in processors_py.iter() {
            let processor: PyRef<PyPostProcessor> = n.extract().unwrap();
@ -438,14 +441,14 @@ impl PySequence {
        )
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
-        PyTuple::new(py, [PyList::empty(py)])
+        PyTuple::new_bound(py, [PyList::empty_bound(py)])
    }
 }
 /// Processors Module
 #[pymodule]
-pub fn processors(_py: Python, m: &PyModule) -> PyResult<()> {
+pub fn processors(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyPostProcessor>()?;
    m.add_class::<PyBertProcessing>()?;
    m.add_class::<PyRobertaProcessing>()?;
@ -474,7 +477,7 @@ mod test {
            let py_bert = py_proc.get_as_subtype(py).unwrap();
            assert_eq!(
                "BertProcessing",
-                py_bert.as_ref(py).get_type().name().unwrap()
+                py_bert.bind(py).get_type().qualname().unwrap()
            );
        })
    }
--- a/bindings/python/src/tokenizer.rs
+++ b/bindings/python/src/tokenizer.rs
@ -98,8 +98,8 @@ impl PyAddedToken {
        token
    }
-    pub fn as_pydict<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> {
+    pub fn as_pydict<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
-        let dict = PyDict::new(py);
+        let dict = PyDict::new_bound(py);
        let token = self.get_token();
        dict.set_item("content", token.content)?;
@ -130,7 +130,7 @@ impl From<tk::AddedToken> for PyAddedToken {
 impl PyAddedToken {
    #[new]
    #[pyo3(signature = (content=None, **kwargs), text_signature = "(self, content, single_word=False, lstrip=False, rstrip=False, normalized=True, special=False)")]
-    fn __new__(content: Option<&str>, kwargs: Option<&PyDict>) -> PyResult<Self> {
+    fn __new__(content: Option<&str>, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<Self> {
        let mut token = PyAddedToken::from(content.unwrap_or(""), None);
        if let Some(kwargs) = kwargs {
@ -150,7 +150,7 @@ impl PyAddedToken {
        Ok(token)
    }
-    fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> {
+    fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
        self.as_pydict(py)
    }
@ -329,7 +329,7 @@ impl FromPyObject<'_> for PyArrayUnicode {
                    );
                    let py = ob.py();
                    let obj = PyObject::from_owned_ptr(py, unicode);
-                    let s = obj.downcast::<PyString>(py)?;
+                    let s = obj.downcast_bound::<PyString>(py)?;
                    Ok(s.to_string_lossy().trim_matches(char::from(0)).to_owned())
                })
                .collect::<PyResult<Vec<_>>>()?;
@ -353,7 +353,7 @@ impl FromPyObject<'_> for PyArrayStr {
            .as_array()
            .iter()
            .map(|obj| {
-                let s = obj.downcast::<PyString>(ob.py())?;
+                let s = obj.downcast_bound::<PyString>(ob.py())?;
                Ok(s.to_string_lossy().into_owned())
            })
            .collect::<PyResult<Vec<_>>>()?;
@ -377,12 +377,12 @@ impl<'s> FromPyObject<'s> for PreTokenizedInputSequence<'s> {
            return Ok(Self(seq.into()));
        }
        if let Ok(s) = ob.downcast::<PyList>() {
-            if let Ok(seq) = s.extract::<Vec<&str>>() {
+            if let Ok(seq) = s.extract::<Vec<String>>() {
                return Ok(Self(seq.into()));
            }
        }
        if let Ok(s) = ob.downcast::<PyTuple>() {
-            if let Ok(seq) = s.extract::<Vec<&str>>() {
+            if let Ok(seq) = s.extract::<Vec<String>>() {
                return Ok(Self(seq.into()));
            }
        }
@ -492,7 +492,7 @@ impl PyTokenizer {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -510,9 +510,9 @@ impl PyTokenizer {
        }
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
        let model = PyModel::from(BPE::default()).into_py(py);
-        PyTuple::new(py, vec![model])
+        PyTuple::new_bound(py, vec![model])
    }
    /// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string.
@ -557,7 +557,7 @@ impl PyTokenizer {
    ///     :class:`~tokenizers.Tokenizer`: The new tokenizer
    #[staticmethod]
    #[pyo3(text_signature = "(buffer)")]
-    fn from_buffer(buffer: &PyBytes) -> PyResult<Self> {
+    fn from_buffer(buffer: &Bound<'_, PyBytes>) -> PyResult<Self> {
        let tokenizer = serde_json::from_slice(buffer.as_bytes()).map_err(|e| {
            exceptions::PyValueError::new_err(format!(
                "Cannot instantiate Tokenizer from buffer: {}",
@ -591,18 +591,18 @@ impl PyTokenizer {
        auth_token: Option<String>,
    ) -> PyResult<Self> {
        let path = Python::with_gil(|py| -> PyResult<String> {
-            let huggingface_hub = PyModule::import(py, intern!(py, "huggingface_hub"))?;
+            let huggingface_hub = PyModule::import_bound(py, intern!(py, "huggingface_hub"))?;
            let hf_hub_download = huggingface_hub.getattr(intern!(py, "hf_hub_download"))?;
            let kwargs = [
                (intern!(py, "repo_id"), identifier),
                (intern!(py, "filename"), "tokenizer.json"),
                (intern!(py, "revision"), &revision),
            ]
-            .into_py_dict(py);
+            .into_py_dict_bound(py);
            if let Some(auth_token) = auth_token {
                kwargs.set_item(intern!(py, "token"), auth_token)?;
            }
-            let path: String = hf_hub_download.call((), Some(kwargs))?.extract()?;
+            let path: String = hf_hub_download.call((), Some(&kwargs))?.extract()?;
            Ok(path)
        })?;
@ -712,7 +712,11 @@ impl PyTokenizer {
    #[pyo3(
        text_signature = "(self, max_length, stride=0, strategy='longest_first', direction='right')"
    )]
-    fn enable_truncation(&mut self, max_length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
+    fn enable_truncation(
        &mut self,
        max_length: usize,
        kwargs: Option<&Bound<'_, PyDict>>,
    ) -> PyResult<()> {
        let mut params = TruncationParams {
            max_length,
            ..Default::default()
@ -777,9 +781,9 @@ impl PyTokenizer {
    ///     (:obj:`dict`, `optional`):
    ///         A dict with the current truncation parameters if truncation is enabled
    #[getter]
-    fn get_truncation<'py>(&self, py: Python<'py>) -> PyResult<Option<&'py PyDict>> {
+    fn get_truncation<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyDict>>> {
        self.tokenizer.get_truncation().map_or(Ok(None), |params| {
-            let dict = PyDict::new(py);
+            let dict = PyDict::new_bound(py);
            dict.set_item("max_length", params.max_length)?;
            dict.set_item("stride", params.stride)?;
@ -817,7 +821,7 @@ impl PyTokenizer {
    #[pyo3(
        text_signature = "(self, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]', length=None, pad_to_multiple_of=None)"
    )]
-    fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> {
+    fn enable_padding(&mut self, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<()> {
        let mut params = PaddingParams::default();
        if let Some(kwargs) = kwargs {
@ -887,9 +891,9 @@ impl PyTokenizer {
    ///     (:obj:`dict`, `optional`):
    ///         A dict with the current padding parameters if padding is enabled
    #[getter]
-    fn get_padding<'py>(&self, py: Python<'py>) -> PyResult<Option<&'py PyDict>> {
+    fn get_padding<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyDict>>> {
        self.tokenizer.get_padding().map_or(Ok(None), |params| {
-            let dict = PyDict::new(py);
+            let dict = PyDict::new_bound(py);
            dict.set_item(
                "length",
@ -948,8 +952,8 @@ impl PyTokenizer {
    )]
    fn encode(
        &self,
-        sequence: &PyAny,
+        sequence: &Bound<'_, PyAny>,
-        pair: Option<&PyAny>,
+        pair: Option<&Bound<'_, PyAny>>,
        is_pretokenized: bool,
        add_special_tokens: bool,
    ) -> PyResult<PyEncoding> {
@ -1141,7 +1145,7 @@ impl PyTokenizer {
    /// Returns:
    ///     :obj:`int`: The number of tokens that were created in the vocabulary
    #[pyo3(text_signature = "(self, tokens)")]
-    fn add_tokens(&mut self, tokens: &PyList) -> PyResult<usize> {
+    fn add_tokens(&mut self, tokens: &Bound<'_, PyList>) -> PyResult<usize> {
        let tokens = tokens
            .into_iter()
            .map(|token| {
@ -1178,7 +1182,7 @@ impl PyTokenizer {
    /// Returns:
    ///     :obj:`int`: The number of tokens that were created in the vocabulary
    #[pyo3(text_signature = "(self, tokens)")]
-    fn add_special_tokens(&mut self, tokens: &PyList) -> PyResult<usize> {
+    fn add_special_tokens(&mut self, tokens: &Bound<'_, PyList>) -> PyResult<usize> {
        let tokens = tokens
            .into_iter()
            .map(|token| {
@ -1251,7 +1255,7 @@ impl PyTokenizer {
    fn train_from_iterator(
        &mut self,
        py: Python,
-        iterator: &PyAny,
+        iterator: &Bound<'_, PyAny>,
        trainer: Option<&mut PyTrainer>,
        length: Option<usize>,
    ) -> PyResult<()> {
--- a/bindings/python/src/trainers.rs
+++ b/bindings/python/src/trainers.rs
@ -2,7 +2,6 @@ use std::sync::{Arc, RwLock};
 use crate::models::PyModel;
 use crate::tokenizer::PyAddedToken;
 use crate::utils::PyChar;
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use pyo3::types::*;
@ -52,7 +51,7 @@ impl PyTrainer {
                e
            ))
        })?;
-        Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
+        Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
    }
    fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -215,7 +214,7 @@ impl PyBpeTrainer {
    }
    #[setter]
-    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
+    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
        setter!(
            self_,
            BpeTrainer,
@ -269,12 +268,12 @@ impl PyBpeTrainer {
    }
    #[setter]
-    fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) {
+    fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
        setter!(
            self_,
            BpeTrainer,
            initial_alphabet,
-            alphabet.into_iter().map(|c| c.0).collect()
+            alphabet.into_iter().collect()
        );
    }
@ -300,7 +299,7 @@ impl PyBpeTrainer {
    #[new]
    #[pyo3(signature = (**kwargs), text_signature = None)]
-    pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
+    pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
        let mut builder = tk::models::bpe::BpeTrainer::builder();
        if let Some(kwargs) = kwargs {
            for (key, val) in kwargs {
@ -429,7 +428,7 @@ impl PyWordPieceTrainer {
    }
    #[setter]
-    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
+    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
        setter!(
            self_,
            WordPieceTrainer,
@ -473,12 +472,12 @@ impl PyWordPieceTrainer {
    }
    #[setter]
-    fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) {
+    fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
        setter!(
            self_,
            WordPieceTrainer,
            @set_initial_alphabet,
-            alphabet.into_iter().map(|c| c.0).collect()
+            alphabet.into_iter().collect()
        );
    }
@ -507,7 +506,7 @@ impl PyWordPieceTrainer {
        signature = (** kwargs),
        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)"
    )]
-    pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
+    pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
        let mut builder = tk::models::wordpiece::WordPieceTrainer::builder();
        if let Some(kwargs) = kwargs {
            for (key, val) in kwargs {
@ -621,7 +620,7 @@ impl PyWordLevelTrainer {
    }
    #[setter]
-    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
+    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
        setter!(
            self_,
            WordLevelTrainer,
@ -647,7 +646,7 @@ impl PyWordLevelTrainer {
    #[new]
    #[pyo3(signature = (**kwargs), text_signature = None)]
-    pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
+    pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
        let mut builder = tk::models::wordlevel::WordLevelTrainer::builder();
        if let Some(kwargs) = kwargs {
@ -767,7 +766,7 @@ impl PyUnigramTrainer {
    }
    #[setter]
-    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
+    fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
        setter!(
            self_,
            UnigramTrainer,
@ -801,12 +800,12 @@ impl PyUnigramTrainer {
    }
    #[setter]
-    fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) {
+    fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
        setter!(
            self_,
            UnigramTrainer,
            initial_alphabet,
-            alphabet.into_iter().map(|c| c.0).collect()
+            alphabet.into_iter().collect()
        );
    }
@ -815,7 +814,7 @@ impl PyUnigramTrainer {
        signature = (**kwargs),
        text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"
    )]
-    pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
+    pub fn new(kwargs: Option<Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
        let mut builder = tk::models::unigram::UnigramTrainer::builder();
        if let Some(kwargs) = kwargs {
            for (key, val) in kwargs {
@ -874,7 +873,7 @@ impl PyUnigramTrainer {
 /// Trainers Module
 #[pymodule]
-pub fn trainers(_py: Python, m: &PyModule) -> PyResult<()> {
+pub fn trainers(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyTrainer>()?;
    m.add_class::<PyBpeTrainer>()?;
    m.add_class::<PyWordPieceTrainer>()?;
@ -893,7 +892,7 @@ mod tests {
        Python::with_gil(|py| {
            let py_trainer = PyTrainer::new(Arc::new(RwLock::new(BpeTrainer::default().into())));
            let py_bpe = py_trainer.get_as_subtype(py).unwrap();
-            assert_eq!("BpeTrainer", py_bpe.as_ref(py).get_type().name().unwrap());
+            assert_eq!("BpeTrainer", py_bpe.bind(py).get_type().qualname().unwrap());
        })
    }
 }
--- a/bindings/python/src/utils/iterators.rs
+++ b/bindings/python/src/utils/iterators.rs
@ -50,7 +50,7 @@ pub struct PyBufferedIterator<T, F> {
 impl<T, F, I> PyBufferedIterator<T, F>
 where
-    F: Fn(&PyAny) -> I,
+    F: Fn(Bound<'_, PyAny>) -> I,
    I: IntoIterator<Item = PyResult<T>>,
 {
    /// Create a new PyBufferedIterator using the provided Python object.
@ -62,10 +62,10 @@ where
    ///
    /// The `buffer_size` represents the number of items that we buffer before we
    /// need to acquire the GIL again.
-    pub fn new(iter: &PyAny, converter: F, buffer_size: usize) -> PyResult<Self> {
+    pub fn new(iter: &Bound<'_, PyAny>, converter: F, buffer_size: usize) -> PyResult<Self> {
        let py = iter.py();
        let iter: Py<PyAny> = unsafe {
-            py.from_borrowed_ptr_or_err::<PyAny>(pyo3::ffi::PyObject_GetIter(iter.as_ptr()))?
+            Bound::from_borrowed_ptr_or_err(py, pyo3::ffi::PyObject_GetIter(iter.as_ptr()))?
                .to_object(py)
        };
@ -89,9 +89,10 @@ where
            }
            match unsafe {
-                py.from_owned_ptr_or_opt::<PyAny>(pyo3::ffi::PyIter_Next(
+                Bound::from_owned_ptr_or_opt(
-                    self.iter.as_ref().unwrap().as_ref(py).as_ptr(),
+                    py,
-                ))
+                    pyo3::ffi::PyIter_Next(self.iter.as_ref().unwrap().bind(py).as_ptr()),
                )
            } {
                Some(obj) => self.buffer.extend((self.converter)(obj)),
                None => {
@ -112,7 +113,7 @@ where
 impl<T, F, I> Iterator for PyBufferedIterator<T, F>
 where
-    F: Fn(&PyAny) -> I,
+    F: Fn(Bound<'_, PyAny>) -> I,
    I: IntoIterator<Item = PyResult<T>>,
 {
    type Item = PyResult<T>;
--- a/bindings/python/src/utils/mod.rs
+++ b/bindings/python/src/utils/mod.rs
@ -1,6 +1,3 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use pyo3::types::*;
 use std::marker::PhantomData;
 use std::sync::{Arc, Mutex};
@ -14,25 +11,6 @@ pub use normalization::*;
 pub use pretokenization::*;
 pub use regex::*;
 // PyChar
 // This type is a temporary hack to accept `char` as argument
 // To be removed once https://github.com/PyO3/pyo3/pull/1282 has been released
 pub struct PyChar(pub char);
 impl FromPyObject<'_> for PyChar {
    fn extract(obj: &PyAny) -> PyResult<Self> {
        let s = <PyString as PyTryFrom<'_>>::try_from(obj)?.to_str()?;
        let mut iter = s.chars();
        if let (Some(ch), None) = (iter.next(), iter.next()) {
            Ok(Self(ch))
        } else {
            Err(exceptions::PyValueError::new_err(
                "expected a string of length 1",
            ))
        }
    }
 }
 // RefMut utils
 pub trait DestroyPtr {
--- a/bindings/python/src/utils/normalization.rs
+++ b/bindings/python/src/utils/normalization.rs
@ -9,15 +9,15 @@ use tk::pattern::Pattern;
 /// Represents a Pattern as used by `NormalizedString`
 #[derive(Clone, FromPyObject)]
-pub enum PyPattern<'p> {
+pub enum PyPattern {
    #[pyo3(annotation = "str")]
-    Str(&'p str),
+    Str(String),
    #[pyo3(annotation = "tokenizers.Regex")]
    Regex(Py<PyRegex>),
    // TODO: Add the compatibility for Fn(char) -> bool
 }
-impl Pattern for PyPattern<'_> {
+impl Pattern for PyPattern {
    fn find_matches(&self, inside: &str) -> tk::Result<Vec<(tk::Offsets, bool)>> {
        match self {
            PyPattern::Str(s) => {
@ -35,8 +35,8 @@ impl Pattern for PyPattern<'_> {
    }
 }
-impl From<PyPattern<'_>> for tk::normalizers::replace::ReplacePattern {
+impl From<PyPattern> for tk::normalizers::replace::ReplacePattern {
-    fn from(pattern: PyPattern<'_>) -> Self {
+    fn from(pattern: PyPattern) -> Self {
        match pattern {
            PyPattern::Str(s) => Self::String(s.to_owned()),
            PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
@ -44,8 +44,8 @@ impl From<PyPattern<'_>> for tk::normalizers::replace::ReplacePattern {
    }
 }
-impl From<PyPattern<'_>> for tk::pre_tokenizers::split::SplitPattern {
+impl From<PyPattern> for tk::pre_tokenizers::split::SplitPattern {
-    fn from(pattern: PyPattern<'_>) -> Self {
+    fn from(pattern: PyPattern) -> Self {
        match pattern {
            PyPattern::Str(s) => Self::String(s.to_owned()),
            PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
@ -117,7 +117,7 @@ impl From<PySplitDelimiterBehavior> for SplitDelimiterBehavior {
    }
 }
-fn filter(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> {
+fn filter(normalized: &mut NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
    let err = "`filter` expect a callable with the signature: `fn(char) -> bool`";
    if !func.is_callable() {
@ -134,7 +134,7 @@ fn filter(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> {
    }
 }
-fn for_each(normalized: &NormalizedString, func: &PyAny) -> PyResult<()> {
+fn for_each(normalized: &NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
    let err = "`for_each` expect a callable with the signature: `fn(char)`";
    if !func.is_callable() {
@ -148,14 +148,14 @@ fn for_each(normalized: &NormalizedString, func: &PyAny) -> PyResult<()> {
    }
 }
-fn map(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> {
+fn map(normalized: &mut NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
    let err = "`map` expect a callable with the signature: `fn(char) -> char`";
    if !func.is_callable() {
        Err(exceptions::PyTypeError::new_err(err))
    } else {
        normalized.map(|c| {
-            let c: &str = func
+            let c: String = func
                .call1((c.to_string(),))
                .expect(err)
                .extract()
@ -296,13 +296,13 @@ impl PyNormalizedString {
    /// Filter each character of the string using the given func
    #[pyo3(text_signature = "(self, func)")]
-    fn filter(&mut self, func: &PyAny) -> PyResult<()> {
+    fn filter(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        filter(&mut self.normalized, func)
    }
    /// Calls the given function for each character of the string
    #[pyo3(text_signature = "(self, func)")]
-    fn for_each(&self, func: &PyAny) -> PyResult<()> {
+    fn for_each(&self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        for_each(&self.normalized, func)
    }
@ -311,7 +311,7 @@ impl PyNormalizedString {
    /// Replaces each character of the string using the returned value. Each
    /// returned value **must** be a str of length 1 (ie a character).
    #[pyo3(text_signature = "(self, func)")]
-    fn map(&mut self, func: &PyAny) -> PyResult<()> {
+    fn map(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        map(&mut self.normalized, func)
    }
@ -551,21 +551,21 @@ impl PyNormalizedStringRefMut {
            .ok_or_else(PyNormalizedStringRefMut::destroyed_error)?
    }
-    fn filter(&mut self, func: &PyAny) -> PyResult<()> {
+    fn filter(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        self.inner
            .map_mut(|n| filter(n, func))
            .ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;
        Ok(())
    }
-    fn for_each(&self, func: &PyAny) -> PyResult<()> {
+    fn for_each(&self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        self.inner
            .map(|n| for_each(n, func))
            .ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;
        Ok(())
    }
-    fn map(&mut self, func: &PyAny) -> PyResult<()> {
+    fn map(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        self.inner
            .map_mut(|n| map(n, func))
            .ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;
--- a/bindings/python/src/utils/pretokenization.rs
+++ b/bindings/python/src/utils/pretokenization.rs
@ -12,7 +12,7 @@ use crate::error::ToPyResult;
 use crate::token::PyToken;
 use tk::{OffsetReferential, OffsetType, Offsets, PreTokenizedString, Token};
-fn split(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
+fn split(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
    if !func.is_callable() {
        Err(exceptions::PyTypeError::new_err(
            "`split` expect a callable with the signature: \
@ -30,7 +30,7 @@ fn split(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
    }
 }
-fn normalize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
+fn normalize(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
    if !func.is_callable() {
        Err(exceptions::PyTypeError::new_err(
            "`normalize` expect a callable with the signature: \
@ -46,7 +46,7 @@ fn normalize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
    }
 }
-fn tokenize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
+fn tokenize(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
    if !func.is_callable() {
        Err(exceptions::PyTypeError::new_err(
            "`tokenize` expect a callable with the signature: \
@ -183,7 +183,7 @@ impl PyPreTokenizedString {
    ///         In order for the offsets to be tracked accurately, any returned `NormalizedString`
    ///         should come from calling either `.split` or `.slice` on the received one.
    #[pyo3(text_signature = "(self, func)")]
-    fn split(&mut self, func: &PyAny) -> PyResult<()> {
+    fn split(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        split(&mut self.pretok, func)
    }
@ -195,7 +195,7 @@ impl PyPreTokenizedString {
    ///         does not need to return anything, just calling the methods on the provided
    ///         NormalizedString allow its modification.
    #[pyo3(text_signature = "(self, func)")]
-    fn normalize(&mut self, func: &PyAny) -> PyResult<()> {
+    fn normalize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        normalize(&mut self.pretok, func)
    }
@ -206,7 +206,7 @@ impl PyPreTokenizedString {
    ///         The function used to tokenize each underlying split. This function must return
    ///         a list of Token generated from the input str.
    #[pyo3(text_signature = "(self, func)")]
-    fn tokenize(&mut self, func: &PyAny) -> PyResult<()> {
+    fn tokenize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        tokenize(&mut self.pretok, func)
    }
@ -289,19 +289,19 @@ impl PyPreTokenizedStringRefMut {
 #[pymethods]
 impl PyPreTokenizedStringRefMut {
-    fn split(&mut self, func: &PyAny) -> PyResult<()> {
+    fn split(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        self.inner
            .map_mut(|pretok| split(pretok, func))
            .ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?
    }
-    fn normalize(&mut self, func: &PyAny) -> PyResult<()> {
+    fn normalize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        self.inner
            .map_mut(|pretok| normalize(pretok, func))
            .ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?
    }
-    fn tokenize(&mut self, func: &PyAny) -> PyResult<()> {
+    fn tokenize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
        self.inner
            .map_mut(|pretok| tokenize(pretok, func))
            .ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?
--- a/tokenizers/Cargo.toml
+++ b/tokenizers/Cargo.toml
@ -40,19 +40,19 @@ harness = false
 lazy_static = "1.4"
 rand = "0.8"
 onig = { version = "6.4", default-features = false, optional = true }
-regex = "1.9"
+regex = "1.10"
 regex-syntax = "0.8"
-rayon = "1.8"
+rayon = "1.10"
 rayon-cond = "0.3"
 serde = { version = "1.0", features = [ "derive" ] }
 serde_json = "1.0"
 unicode-normalization-alignments = "0.1"
 unicode_categories = "0.1"
-unicode-segmentation = "1.10"
+unicode-segmentation = "1.11"
 indicatif = {version = "0.17", optional = true}
 itertools = "0.12"
 log = "0.4"
-derive_builder = "0.13"
+derive_builder = "0.20"
 spm_precompiled = "0.1"
 hf-hub = { version = "0.3.2", optional = true }
 aho-corasick = "1.1"
@ -62,7 +62,7 @@ thiserror = "1.0.49"
 fancy-regex = { version = "0.13", optional = true}
 getrandom = { version = "0.2.10" }
 esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
-monostate = "0.1.9"
+monostate = "0.1.12"
 [features]
 default = ["progressbar", "onig", "esaxx_fast"]
@ -73,7 +73,7 @@ unstable_wasm = ["fancy-regex", "getrandom/js"]
 [dev-dependencies]
 criterion = "0.5"
-tempfile = "3.8"
+tempfile = "3.10"
 assert_approx_eq = "1.1"
 [profile.release]