PyO3 0.21. (#1494)

* PyO3 0.21.

* Upgraded everything.

* Rustfmt.
This commit is contained in:
Nicolas Patry
2024-04-16 13:49:52 +02:00
committed by GitHub
parent 914576f7ed
commit d5a8cc7a49
16 changed files with 180 additions and 197 deletions

View File

@ -9,24 +9,24 @@ name = "tokenizers"
crate-type = ["cdylib"]
[dependencies]
rayon = "1.8"
rayon = "1.10"
serde = { version = "1.0", features = [ "rc", "derive" ]}
serde_json = "1.0"
libc = "0.2"
env_logger = "0.10.0"
pyo3 = { version = "0.20" }
numpy = "0.20.0"
env_logger = "0.11"
pyo3 = { version = "0.21" }
numpy = "0.21"
ndarray = "0.15"
onig = { version = "6.4", default-features = false }
itertools = "0.11"
itertools = "0.12"
[dependencies.tokenizers]
version = "0.16.0-dev.0"
path = "../../tokenizers"
[dev-dependencies]
tempfile = "3.8"
pyo3 = { version = "0.20", features = ["auto-initialize"] }
tempfile = "3.10"
pyo3 = { version = "0.21", features = ["auto-initialize"] }
[features]
defaut = ["pyo3/extension-module"]

View File

@ -1,7 +1,6 @@
use std::sync::{Arc, RwLock};
use crate::pre_tokenizers::from_string;
use crate::utils::PyChar;
use crate::utils::PyPattern;
use pyo3::exceptions;
use pyo3::prelude::*;
@ -85,7 +84,7 @@ impl PyDecoder {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -161,7 +160,7 @@ pub struct PyByteLevelDec {}
impl PyByteLevelDec {
#[new]
#[pyo3(signature = (**_kwargs), text_signature = "(self)")]
fn new(_kwargs: Option<&PyDict>) -> (Self, PyDecoder) {
fn new(_kwargs: Option<&Bound<'_, PyDict>>) -> (Self, PyDecoder) {
(PyByteLevelDec {}, ByteLevel::default().into())
}
}
@ -318,8 +317,8 @@ impl PyMetaspaceDec {
}
#[setter]
fn set_replacement(self_: PyRef<Self>, replacement: PyChar) {
setter!(self_, Metaspace, @set_replacement, replacement.0);
fn set_replacement(self_: PyRef<Self>, replacement: char) {
setter!(self_, Metaspace, @set_replacement, replacement);
}
#[getter]
@ -352,16 +351,12 @@ impl PyMetaspaceDec {
}
#[new]
#[pyo3(signature = (replacement = PyChar('▁'), prepend_scheme = String::from("always"), split = true), text_signature = "(self, replacement = \"\", prepend_scheme = \"always\", split = True)")]
fn new(
replacement: PyChar,
prepend_scheme: String,
split: bool,
) -> PyResult<(Self, PyDecoder)> {
#[pyo3(signature = (replacement = '▁', prepend_scheme = String::from("always"), split = true), text_signature = "(self, replacement = \"\", prepend_scheme = \"always\", split = True)")]
fn new(replacement: char, prepend_scheme: String, split: bool) -> PyResult<(Self, PyDecoder)> {
let prepend_scheme = from_string(prepend_scheme)?;
Ok((
PyMetaspaceDec {},
Metaspace::new(replacement.0, prepend_scheme, split).into(),
Metaspace::new(replacement, prepend_scheme, split).into(),
))
}
}
@ -463,7 +458,7 @@ pub struct PySequenceDecoder {}
impl PySequenceDecoder {
#[new]
#[pyo3(signature = (decoders_py), text_signature = "(self, decoders)")]
fn new(decoders_py: &PyList) -> PyResult<(Self, PyDecoder)> {
fn new(decoders_py: &Bound<'_, PyList>) -> PyResult<(Self, PyDecoder)> {
let mut decoders: Vec<DecoderWrapper> = Vec::with_capacity(decoders_py.len());
for decoder_py in decoders_py.iter() {
let decoder: PyRef<PyDecoder> = decoder_py.extract()?;
@ -476,8 +471,8 @@ impl PySequenceDecoder {
Ok((PySequenceDecoder {}, Sequence::new(decoders).into()))
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [PyList::empty(py)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [PyList::empty_bound(py)])
}
}
@ -497,7 +492,7 @@ impl Decoder for CustomDecoder {
Python::with_gil(|py| {
let decoded = self
.inner
.call_method(py, "decode", (tokens,), None)?
.call_method_bound(py, "decode", (tokens,), None)?
.extract(py)?;
Ok(decoded)
})
@ -507,7 +502,7 @@ impl Decoder for CustomDecoder {
Python::with_gil(|py| {
let decoded = self
.inner
.call_method(py, "decode_chain", (tokens,), None)?
.call_method_bound(py, "decode_chain", (tokens,), None)?
.extract(py)?;
Ok(decoded)
})
@ -572,7 +567,7 @@ impl Decoder for PyDecoderWrapper {
/// Decoders Module
#[pymodule]
pub fn decoders(_py: Python, m: &PyModule) -> PyResult<()> {
pub fn decoders(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyDecoder>()?;
m.add_class::<PyByteLevelDec>()?;
m.add_class::<PyReplaceDec>()?;
@ -602,7 +597,7 @@ mod test {
Python::with_gil(|py| {
let py_dec = PyDecoder::new(Metaspace::default().into());
let py_meta = py_dec.get_as_subtype(py).unwrap();
assert_eq!("Metaspace", py_meta.as_ref(py).get_type().name().unwrap());
assert_eq!("Metaspace", py_meta.bind(py).get_type().qualname().unwrap());
})
}

View File

@ -37,7 +37,7 @@ impl PyEncoding {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -391,10 +391,10 @@ impl PyEncoding {
#[pyo3(
text_signature = "(self, length, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]')"
)]
fn pad(&mut self, length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
fn pad(&mut self, length: usize, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<()> {
let mut pad_id = 0;
let mut pad_type_id = 0;
let mut pad_token = "[PAD]";
let mut pad_token = "[PAD]".to_string();
let mut direction = PaddingDirection::Right;
if let Some(kwargs) = kwargs {
@ -422,7 +422,7 @@ impl PyEncoding {
}
}
self.encoding
.pad(length, pad_id, pad_type_id, pad_token, direction);
.pad(length, pad_id, pad_type_id, &pad_token, direction);
Ok(())
}

View File

@ -35,7 +35,7 @@ impl<T> ToPyResult<T> {
}
pub(crate) fn deprecation_warning(py: Python<'_>, version: &str, message: &str) -> PyResult<()> {
let deprecation_warning = py.import("builtins")?.getattr("DeprecationWarning")?;
let deprecation_warning = py.import_bound("builtins")?.getattr("DeprecationWarning")?;
let full_message = format!("Deprecated in {}: {}", version, message);
pyo3::PyErr::warn(py, deprecation_warning, &full_message, 0)
pyo3::PyErr::warn_bound(py, &deprecation_warning, &full_message, 0)
}

View File

@ -47,7 +47,7 @@ extern "C" fn child_after_fork() {
/// Tokenizers Module
#[pymodule]
pub fn tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
pub fn tokenizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
let _ = env_logger::try_init_from_env("TOKENIZERS_LOG");
// Register the fork callback

View File

@ -105,7 +105,7 @@ impl PyModel {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -260,7 +260,10 @@ impl PyModel {
pub struct PyBPE {}
impl PyBPE {
fn with_builder(mut builder: BpeBuilder, kwargs: Option<&PyDict>) -> PyResult<(Self, PyModel)> {
fn with_builder(
mut builder: BpeBuilder,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> {
if let Some(kwargs) = kwargs {
for (key, value) in kwargs {
let key: &str = key.extract()?;
@ -321,14 +324,14 @@ macro_rules! setter {
}
#[derive(FromPyObject)]
enum PyVocab<'a> {
enum PyVocab {
Vocab(Vocab),
Filename(&'a str),
Filename(String),
}
#[derive(FromPyObject)]
enum PyMerges<'a> {
enum PyMerges {
Merges(Merges),
Filename(&'a str),
Filename(String),
}
#[pymethods]
@ -417,7 +420,7 @@ impl PyBPE {
py: Python<'_>,
vocab: Option<PyVocab>,
merges: Option<PyMerges>,
kwargs: Option<&PyDict>,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> {
if (vocab.is_some() && merges.is_none()) || (vocab.is_none() && merges.is_some()) {
return Err(exceptions::PyValueError::new_err(
@ -502,11 +505,11 @@ impl PyBPE {
#[pyo3(signature = (vocab, merges, **kwargs))]
#[pyo3(text_signature = "(cls, vocab, merge, **kwargs)")]
fn from_file(
_cls: &PyType,
_cls: &Bound<'_, PyType>,
py: Python,
vocab: &str,
merges: &str,
kwargs: Option<&PyDict>,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<Py<Self>> {
let (vocab, merges) = BPE::read_file(vocab, merges).map_err(|e| {
exceptions::PyException::new_err(format!("Error while reading BPE files: {}", e))
@ -540,7 +543,7 @@ pub struct PyWordPiece {}
impl PyWordPiece {
fn with_builder(
mut builder: WordPieceBuilder,
kwargs: Option<&PyDict>,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> {
if let Some(kwargs) = kwargs {
for (key, val) in kwargs {
@ -612,7 +615,7 @@ impl PyWordPiece {
fn new(
py: Python<'_>,
vocab: Option<PyVocab>,
kwargs: Option<&PyDict>,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> {
let mut builder = WordPiece::builder();
@ -677,10 +680,10 @@ impl PyWordPiece {
#[pyo3(signature = (vocab, **kwargs))]
#[pyo3(text_signature = "(vocab, **kwargs)")]
fn from_file(
_cls: &PyType,
_cls: &Bound<'_, PyType>,
py: Python,
vocab: &str,
kwargs: Option<&PyDict>,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<Py<Self>> {
let vocab = WordPiece::read_file(vocab).map_err(|e| {
exceptions::PyException::new_err(format!("Error while reading WordPiece file: {}", e))
@ -796,7 +799,7 @@ impl PyWordLevel {
#[pyo3(signature = (vocab, unk_token = None))]
#[pyo3(text_signature = "(vocab, unk_token)")]
fn from_file(
_cls: &PyType,
_cls: &Bound<'_, PyType>,
py: Python,
vocab: &str,
unk_token: Option<String>,
@ -849,7 +852,7 @@ impl PyUnigram {
/// Models Module
#[pymodule]
pub fn models(_py: Python, m: &PyModule) -> PyResult<()> {
pub fn models(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyModel>()?;
m.add_class::<PyBPE>()?;
m.add_class::<PyWordPiece>()?;
@ -870,7 +873,7 @@ mod test {
Python::with_gil(|py| {
let py_model = PyModel::from(BPE::default());
let py_bpe = py_model.get_as_subtype(py).unwrap();
assert_eq!("BPE", py_bpe.as_ref(py).get_type().name().unwrap());
assert_eq!("BPE", py_bpe.bind(py).get_type().qualname().unwrap());
})
}

View File

@ -113,7 +113,7 @@ impl PyNormalizer {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -345,7 +345,7 @@ pub struct PySequence {}
impl PySequence {
#[new]
#[pyo3(text_signature = None)]
fn new(normalizers: &PyList) -> PyResult<(Self, PyNormalizer)> {
fn new(normalizers: &Bound<'_, PyList>) -> PyResult<(Self, PyNormalizer)> {
let mut sequence = Vec::with_capacity(normalizers.len());
for n in normalizers.iter() {
let normalizer: PyRef<PyNormalizer> = n.extract()?;
@ -360,8 +360,8 @@ impl PySequence {
))
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [PyList::empty(py)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [PyList::empty_bound(py)])
}
fn __len__(&self) -> usize {
@ -467,11 +467,11 @@ pub struct PyPrecompiled {}
impl PyPrecompiled {
#[new]
#[pyo3(text_signature = "(self, precompiled_charsmap)")]
fn new(py_precompiled_charsmap: &PyBytes) -> PyResult<(Self, PyNormalizer)> {
let precompiled_charsmap: &[u8] = FromPyObject::extract(py_precompiled_charsmap)?;
fn new(precompiled_charsmap: Vec<u8>) -> PyResult<(Self, PyNormalizer)> {
// let precompiled_charsmap: Vec<u8> = FromPyObject::extract(py_precompiled_charsmap)?;
Ok((
PyPrecompiled {},
Precompiled::from(precompiled_charsmap)
Precompiled::from(&precompiled_charsmap)
.map_err(|e| {
exceptions::PyException::new_err(format!(
"Error while attempting to build Precompiled normalizer: {}",
@ -512,7 +512,7 @@ impl tk::tokenizer::Normalizer for CustomNormalizer {
fn normalize(&self, normalized: &mut NormalizedString) -> tk::Result<()> {
Python::with_gil(|py| {
let normalized = PyNormalizedStringRefMut::new(normalized);
let py_normalized = self.inner.as_ref(py);
let py_normalized = self.inner.bind(py);
py_normalized.call_method("normalize", (normalized.get(),), None)?;
Ok(())
})
@ -635,7 +635,7 @@ impl Normalizer for PyNormalizerWrapper {
/// Normalizers Module
#[pymodule]
pub fn normalizers(_py: Python, m: &PyModule) -> PyResult<()> {
pub fn normalizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyNormalizer>()?;
m.add_class::<PyBertNormalizer>()?;
m.add_class::<PyNFD>()?;
@ -667,7 +667,7 @@ mod test {
Python::with_gil(|py| {
let py_norm = PyNormalizer::new(NFC.into());
let py_nfc = py_norm.get_as_subtype(py).unwrap();
assert_eq!("NFC", py_nfc.as_ref(py).get_type().name().unwrap());
assert_eq!("NFC", py_nfc.bind(py).get_type().qualname().unwrap());
})
}

View File

@ -118,7 +118,7 @@ impl PyPreTokenizer {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -263,7 +263,7 @@ impl PyByteLevel {
fn new(
add_prefix_space: bool,
use_regex: bool,
_kwargs: Option<&PyDict>,
_kwargs: Option<&Bound<'_, PyDict>>,
) -> (Self, PyPreTokenizer) {
(
PyByteLevel {},
@ -352,8 +352,8 @@ impl PySplit {
))
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [" ", "removed"])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [" ", "removed"])
}
}
@ -372,21 +372,21 @@ impl PyCharDelimiterSplit {
}
#[setter]
fn set_delimiter(self_: PyRef<Self>, delimiter: PyChar) {
setter!(self_, Delimiter, delimiter, delimiter.0);
fn set_delimiter(self_: PyRef<Self>, delimiter: char) {
setter!(self_, Delimiter, delimiter, delimiter);
}
#[new]
#[pyo3(text_signature = None)]
pub fn new(delimiter: PyChar) -> PyResult<(Self, PyPreTokenizer)> {
pub fn new(delimiter: char) -> PyResult<(Self, PyPreTokenizer)> {
Ok((
PyCharDelimiterSplit {},
CharDelimiterSplit::new(delimiter.0).into(),
CharDelimiterSplit::new(delimiter).into(),
))
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [" "])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [" "])
}
}
@ -430,7 +430,7 @@ pub struct PySequence {}
impl PySequence {
#[new]
#[pyo3(text_signature = "(self, pretokenizers)")]
fn new(pre_tokenizers: &PyList) -> PyResult<(Self, PyPreTokenizer)> {
fn new(pre_tokenizers: &Bound<'_, PyList>) -> PyResult<(Self, PyPreTokenizer)> {
let mut sequence = Vec::with_capacity(pre_tokenizers.len());
for n in pre_tokenizers.iter() {
let pretokenizer: PyRef<PyPreTokenizer> = n.extract()?;
@ -447,8 +447,8 @@ impl PySequence {
))
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [PyList::empty(py)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [PyList::empty_bound(py)])
}
}
@ -490,8 +490,8 @@ impl PyMetaspace {
}
#[setter]
fn set_replacement(self_: PyRef<Self>, replacement: PyChar) {
setter!(self_, Metaspace, @set_replacement, replacement.0);
fn set_replacement(self_: PyRef<Self>, replacement: char) {
setter!(self_, Metaspace, @set_replacement, replacement);
}
#[getter]
@ -524,15 +524,15 @@ impl PyMetaspace {
}
#[new]
#[pyo3(signature = (replacement = PyChar('▁'), prepend_scheme=String::from("always"), split=true), text_signature = "(self, replacement=\"_\", prepend_scheme=\"always\", split=True)")]
#[pyo3(signature = (replacement = '▁', prepend_scheme=String::from("always"), split=true), text_signature = "(self, replacement=\"_\", prepend_scheme=\"always\", split=True)")]
fn new(
replacement: PyChar,
replacement: char,
prepend_scheme: String,
split: bool,
) -> PyResult<(Self, PyPreTokenizer)> {
// Create a new Metaspace instance
let prepend_scheme = from_string(prepend_scheme)?;
let new_instance: Metaspace = Metaspace::new(replacement.0, prepend_scheme, split);
let new_instance: Metaspace = Metaspace::new(replacement, prepend_scheme, split);
Ok((PyMetaspace {}, new_instance.into()))
}
}
@ -599,7 +599,7 @@ impl tk::tokenizer::PreTokenizer for CustomPreTokenizer {
fn pre_tokenize(&self, sentence: &mut PreTokenizedString) -> tk::Result<()> {
Python::with_gil(|py| {
let pretok = PyPreTokenizedStringRefMut::new(sentence);
let py_pretok = self.inner.as_ref(py);
let py_pretok = self.inner.bind(py);
py_pretok.call_method("pre_tokenize", (pretok.get(),), None)?;
Ok(())
})
@ -722,7 +722,7 @@ impl PreTokenizer for PyPreTokenizerWrapper {
/// PreTokenizers Module
#[pymodule]
pub fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
pub fn pre_tokenizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyPreTokenizer>()?;
m.add_class::<PyByteLevel>()?;
m.add_class::<PyWhitespace>()?;
@ -754,7 +754,7 @@ mod test {
Python::with_gil(|py| {
let py_norm = PyPreTokenizer::new(Whitespace {}.into());
let py_wsp = py_norm.get_as_subtype(py).unwrap();
assert_eq!("Whitespace", py_wsp.as_ref(py).get_type().name().unwrap());
assert_eq!("Whitespace", py_wsp.bind(py).get_type().qualname().unwrap());
})
}

View File

@ -78,7 +78,7 @@ impl PyPostProcessor {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -166,8 +166,8 @@ impl PyBertProcessing {
)
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [("", 0), ("", 0)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [("", 0), ("", 0)])
}
}
@ -216,8 +216,8 @@ impl PyRobertaProcessing {
)
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [("", 0), ("", 0)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [("", 0), ("", 0)])
}
}
@ -235,7 +235,10 @@ pub struct PyByteLevel {}
impl PyByteLevel {
#[new]
#[pyo3(signature = (trim_offsets = None, **_kwargs), text_signature = "(self, trim_offsets=True)")]
fn new(trim_offsets: Option<bool>, _kwargs: Option<&PyDict>) -> (Self, PyPostProcessor) {
fn new(
trim_offsets: Option<bool>,
_kwargs: Option<&Bound<'_, PyDict>>,
) -> (Self, PyPostProcessor) {
let mut byte_level = ByteLevel::default();
if let Some(to) = trim_offsets {
@ -304,7 +307,7 @@ impl FromPyObject<'_> for PyTemplate {
Ok(Self(
s.try_into().map_err(exceptions::PyValueError::new_err)?,
))
} else if let Ok(s) = ob.extract::<Vec<&str>>() {
} else if let Ok(s) = ob.extract::<Vec<String>>() {
Ok(Self(
s.try_into().map_err(exceptions::PyValueError::new_err)?,
))
@ -424,7 +427,7 @@ pub struct PySequence {}
impl PySequence {
#[new]
#[pyo3(signature = (processors_py), text_signature = "(self, processors)")]
fn new(processors_py: &PyList) -> (Self, PyPostProcessor) {
fn new(processors_py: &Bound<'_, PyList>) -> (Self, PyPostProcessor) {
let mut processors: Vec<PostProcessorWrapper> = Vec::with_capacity(processors_py.len());
for n in processors_py.iter() {
let processor: PyRef<PyPostProcessor> = n.extract().unwrap();
@ -438,14 +441,14 @@ impl PySequence {
)
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, [PyList::empty(py)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [PyList::empty_bound(py)])
}
}
/// Processors Module
#[pymodule]
pub fn processors(_py: Python, m: &PyModule) -> PyResult<()> {
pub fn processors(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyPostProcessor>()?;
m.add_class::<PyBertProcessing>()?;
m.add_class::<PyRobertaProcessing>()?;
@ -474,7 +477,7 @@ mod test {
let py_bert = py_proc.get_as_subtype(py).unwrap();
assert_eq!(
"BertProcessing",
py_bert.as_ref(py).get_type().name().unwrap()
py_bert.bind(py).get_type().qualname().unwrap()
);
})
}

View File

@ -98,8 +98,8 @@ impl PyAddedToken {
token
}
pub fn as_pydict<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> {
let dict = PyDict::new(py);
pub fn as_pydict<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
let dict = PyDict::new_bound(py);
let token = self.get_token();
dict.set_item("content", token.content)?;
@ -130,7 +130,7 @@ impl From<tk::AddedToken> for PyAddedToken {
impl PyAddedToken {
#[new]
#[pyo3(signature = (content=None, **kwargs), text_signature = "(self, content, single_word=False, lstrip=False, rstrip=False, normalized=True, special=False)")]
fn __new__(content: Option<&str>, kwargs: Option<&PyDict>) -> PyResult<Self> {
fn __new__(content: Option<&str>, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<Self> {
let mut token = PyAddedToken::from(content.unwrap_or(""), None);
if let Some(kwargs) = kwargs {
@ -150,7 +150,7 @@ impl PyAddedToken {
Ok(token)
}
fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> {
fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
self.as_pydict(py)
}
@ -329,7 +329,7 @@ impl FromPyObject<'_> for PyArrayUnicode {
);
let py = ob.py();
let obj = PyObject::from_owned_ptr(py, unicode);
let s = obj.downcast::<PyString>(py)?;
let s = obj.downcast_bound::<PyString>(py)?;
Ok(s.to_string_lossy().trim_matches(char::from(0)).to_owned())
})
.collect::<PyResult<Vec<_>>>()?;
@ -353,7 +353,7 @@ impl FromPyObject<'_> for PyArrayStr {
.as_array()
.iter()
.map(|obj| {
let s = obj.downcast::<PyString>(ob.py())?;
let s = obj.downcast_bound::<PyString>(ob.py())?;
Ok(s.to_string_lossy().into_owned())
})
.collect::<PyResult<Vec<_>>>()?;
@ -377,12 +377,12 @@ impl<'s> FromPyObject<'s> for PreTokenizedInputSequence<'s> {
return Ok(Self(seq.into()));
}
if let Ok(s) = ob.downcast::<PyList>() {
if let Ok(seq) = s.extract::<Vec<&str>>() {
if let Ok(seq) = s.extract::<Vec<String>>() {
return Ok(Self(seq.into()));
}
}
if let Ok(s) = ob.downcast::<PyTuple>() {
if let Ok(seq) = s.extract::<Vec<&str>>() {
if let Ok(seq) = s.extract::<Vec<String>>() {
return Ok(Self(seq.into()));
}
}
@ -492,7 +492,7 @@ impl PyTokenizer {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -510,9 +510,9 @@ impl PyTokenizer {
}
}
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
let model = PyModel::from(BPE::default()).into_py(py);
PyTuple::new(py, vec![model])
PyTuple::new_bound(py, vec![model])
}
/// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string.
@ -557,7 +557,7 @@ impl PyTokenizer {
/// :class:`~tokenizers.Tokenizer`: The new tokenizer
#[staticmethod]
#[pyo3(text_signature = "(buffer)")]
fn from_buffer(buffer: &PyBytes) -> PyResult<Self> {
fn from_buffer(buffer: &Bound<'_, PyBytes>) -> PyResult<Self> {
let tokenizer = serde_json::from_slice(buffer.as_bytes()).map_err(|e| {
exceptions::PyValueError::new_err(format!(
"Cannot instantiate Tokenizer from buffer: {}",
@ -591,18 +591,18 @@ impl PyTokenizer {
auth_token: Option<String>,
) -> PyResult<Self> {
let path = Python::with_gil(|py| -> PyResult<String> {
let huggingface_hub = PyModule::import(py, intern!(py, "huggingface_hub"))?;
let huggingface_hub = PyModule::import_bound(py, intern!(py, "huggingface_hub"))?;
let hf_hub_download = huggingface_hub.getattr(intern!(py, "hf_hub_download"))?;
let kwargs = [
(intern!(py, "repo_id"), identifier),
(intern!(py, "filename"), "tokenizer.json"),
(intern!(py, "revision"), &revision),
]
.into_py_dict(py);
.into_py_dict_bound(py);
if let Some(auth_token) = auth_token {
kwargs.set_item(intern!(py, "token"), auth_token)?;
}
let path: String = hf_hub_download.call((), Some(kwargs))?.extract()?;
let path: String = hf_hub_download.call((), Some(&kwargs))?.extract()?;
Ok(path)
})?;
@ -712,7 +712,11 @@ impl PyTokenizer {
#[pyo3(
text_signature = "(self, max_length, stride=0, strategy='longest_first', direction='right')"
)]
fn enable_truncation(&mut self, max_length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
fn enable_truncation(
&mut self,
max_length: usize,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<()> {
let mut params = TruncationParams {
max_length,
..Default::default()
@ -777,9 +781,9 @@ impl PyTokenizer {
/// (:obj:`dict`, `optional`):
/// A dict with the current truncation parameters if truncation is enabled
#[getter]
fn get_truncation<'py>(&self, py: Python<'py>) -> PyResult<Option<&'py PyDict>> {
fn get_truncation<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyDict>>> {
self.tokenizer.get_truncation().map_or(Ok(None), |params| {
let dict = PyDict::new(py);
let dict = PyDict::new_bound(py);
dict.set_item("max_length", params.max_length)?;
dict.set_item("stride", params.stride)?;
@ -817,7 +821,7 @@ impl PyTokenizer {
#[pyo3(
text_signature = "(self, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]', length=None, pad_to_multiple_of=None)"
)]
fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> {
fn enable_padding(&mut self, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<()> {
let mut params = PaddingParams::default();
if let Some(kwargs) = kwargs {
@ -887,9 +891,9 @@ impl PyTokenizer {
/// (:obj:`dict`, `optional`):
/// A dict with the current padding parameters if padding is enabled
#[getter]
fn get_padding<'py>(&self, py: Python<'py>) -> PyResult<Option<&'py PyDict>> {
fn get_padding<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyDict>>> {
self.tokenizer.get_padding().map_or(Ok(None), |params| {
let dict = PyDict::new(py);
let dict = PyDict::new_bound(py);
dict.set_item(
"length",
@ -948,8 +952,8 @@ impl PyTokenizer {
)]
fn encode(
&self,
sequence: &PyAny,
pair: Option<&PyAny>,
sequence: &Bound<'_, PyAny>,
pair: Option<&Bound<'_, PyAny>>,
is_pretokenized: bool,
add_special_tokens: bool,
) -> PyResult<PyEncoding> {
@ -1141,7 +1145,7 @@ impl PyTokenizer {
/// Returns:
/// :obj:`int`: The number of tokens that were created in the vocabulary
#[pyo3(text_signature = "(self, tokens)")]
fn add_tokens(&mut self, tokens: &PyList) -> PyResult<usize> {
fn add_tokens(&mut self, tokens: &Bound<'_, PyList>) -> PyResult<usize> {
let tokens = tokens
.into_iter()
.map(|token| {
@ -1178,7 +1182,7 @@ impl PyTokenizer {
/// Returns:
/// :obj:`int`: The number of tokens that were created in the vocabulary
#[pyo3(text_signature = "(self, tokens)")]
fn add_special_tokens(&mut self, tokens: &PyList) -> PyResult<usize> {
fn add_special_tokens(&mut self, tokens: &Bound<'_, PyList>) -> PyResult<usize> {
let tokens = tokens
.into_iter()
.map(|token| {
@ -1251,7 +1255,7 @@ impl PyTokenizer {
fn train_from_iterator(
&mut self,
py: Python,
iterator: &PyAny,
iterator: &Bound<'_, PyAny>,
trainer: Option<&mut PyTrainer>,
length: Option<usize>,
) -> PyResult<()> {

View File

@ -2,7 +2,6 @@ use std::sync::{Arc, RwLock};
use crate::models::PyModel;
use crate::tokenizer::PyAddedToken;
use crate::utils::PyChar;
use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::*;
@ -52,7 +51,7 @@ impl PyTrainer {
e
))
})?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
}
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -215,7 +214,7 @@ impl PyBpeTrainer {
}
#[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!(
self_,
BpeTrainer,
@ -269,12 +268,12 @@ impl PyBpeTrainer {
}
#[setter]
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) {
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
setter!(
self_,
BpeTrainer,
initial_alphabet,
alphabet.into_iter().map(|c| c.0).collect()
alphabet.into_iter().collect()
);
}
@ -300,7 +299,7 @@ impl PyBpeTrainer {
#[new]
#[pyo3(signature = (**kwargs), text_signature = None)]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::bpe::BpeTrainer::builder();
if let Some(kwargs) = kwargs {
for (key, val) in kwargs {
@ -429,7 +428,7 @@ impl PyWordPieceTrainer {
}
#[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!(
self_,
WordPieceTrainer,
@ -473,12 +472,12 @@ impl PyWordPieceTrainer {
}
#[setter]
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) {
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
setter!(
self_,
WordPieceTrainer,
@set_initial_alphabet,
alphabet.into_iter().map(|c| c.0).collect()
alphabet.into_iter().collect()
);
}
@ -507,7 +506,7 @@ impl PyWordPieceTrainer {
signature = (** kwargs),
text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)"
)]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::wordpiece::WordPieceTrainer::builder();
if let Some(kwargs) = kwargs {
for (key, val) in kwargs {
@ -621,7 +620,7 @@ impl PyWordLevelTrainer {
}
#[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!(
self_,
WordLevelTrainer,
@ -647,7 +646,7 @@ impl PyWordLevelTrainer {
#[new]
#[pyo3(signature = (**kwargs), text_signature = None)]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::wordlevel::WordLevelTrainer::builder();
if let Some(kwargs) = kwargs {
@ -767,7 +766,7 @@ impl PyUnigramTrainer {
}
#[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> {
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!(
self_,
UnigramTrainer,
@ -801,12 +800,12 @@ impl PyUnigramTrainer {
}
#[setter]
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) {
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
setter!(
self_,
UnigramTrainer,
initial_alphabet,
alphabet.into_iter().map(|c| c.0).collect()
alphabet.into_iter().collect()
);
}
@ -815,7 +814,7 @@ impl PyUnigramTrainer {
signature = (**kwargs),
text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"
)]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> {
pub fn new(kwargs: Option<Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::unigram::UnigramTrainer::builder();
if let Some(kwargs) = kwargs {
for (key, val) in kwargs {
@ -874,7 +873,7 @@ impl PyUnigramTrainer {
/// Trainers Module
#[pymodule]
pub fn trainers(_py: Python, m: &PyModule) -> PyResult<()> {
pub fn trainers(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyTrainer>()?;
m.add_class::<PyBpeTrainer>()?;
m.add_class::<PyWordPieceTrainer>()?;
@ -893,7 +892,7 @@ mod tests {
Python::with_gil(|py| {
let py_trainer = PyTrainer::new(Arc::new(RwLock::new(BpeTrainer::default().into())));
let py_bpe = py_trainer.get_as_subtype(py).unwrap();
assert_eq!("BpeTrainer", py_bpe.as_ref(py).get_type().name().unwrap());
assert_eq!("BpeTrainer", py_bpe.bind(py).get_type().qualname().unwrap());
})
}
}

View File

@ -50,7 +50,7 @@ pub struct PyBufferedIterator<T, F> {
impl<T, F, I> PyBufferedIterator<T, F>
where
F: Fn(&PyAny) -> I,
F: Fn(Bound<'_, PyAny>) -> I,
I: IntoIterator<Item = PyResult<T>>,
{
/// Create a new PyBufferedIterator using the provided Python object.
@ -62,10 +62,10 @@ where
///
/// The `buffer_size` represents the number of items that we buffer before we
/// need to acquire the GIL again.
pub fn new(iter: &PyAny, converter: F, buffer_size: usize) -> PyResult<Self> {
pub fn new(iter: &Bound<'_, PyAny>, converter: F, buffer_size: usize) -> PyResult<Self> {
let py = iter.py();
let iter: Py<PyAny> = unsafe {
py.from_borrowed_ptr_or_err::<PyAny>(pyo3::ffi::PyObject_GetIter(iter.as_ptr()))?
Bound::from_borrowed_ptr_or_err(py, pyo3::ffi::PyObject_GetIter(iter.as_ptr()))?
.to_object(py)
};
@ -89,9 +89,10 @@ where
}
match unsafe {
py.from_owned_ptr_or_opt::<PyAny>(pyo3::ffi::PyIter_Next(
self.iter.as_ref().unwrap().as_ref(py).as_ptr(),
))
Bound::from_owned_ptr_or_opt(
py,
pyo3::ffi::PyIter_Next(self.iter.as_ref().unwrap().bind(py).as_ptr()),
)
} {
Some(obj) => self.buffer.extend((self.converter)(obj)),
None => {
@ -112,7 +113,7 @@ where
impl<T, F, I> Iterator for PyBufferedIterator<T, F>
where
F: Fn(&PyAny) -> I,
F: Fn(Bound<'_, PyAny>) -> I,
I: IntoIterator<Item = PyResult<T>>,
{
type Item = PyResult<T>;

View File

@ -1,6 +1,3 @@
use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::*;
use std::marker::PhantomData;
use std::sync::{Arc, Mutex};
@ -14,25 +11,6 @@ pub use normalization::*;
pub use pretokenization::*;
pub use regex::*;
// PyChar
// This type is a temporary hack to accept `char` as argument
// To be removed once https://github.com/PyO3/pyo3/pull/1282 has been released
pub struct PyChar(pub char);
impl FromPyObject<'_> for PyChar {
fn extract(obj: &PyAny) -> PyResult<Self> {
let s = <PyString as PyTryFrom<'_>>::try_from(obj)?.to_str()?;
let mut iter = s.chars();
if let (Some(ch), None) = (iter.next(), iter.next()) {
Ok(Self(ch))
} else {
Err(exceptions::PyValueError::new_err(
"expected a string of length 1",
))
}
}
}
// RefMut utils
pub trait DestroyPtr {

View File

@ -9,15 +9,15 @@ use tk::pattern::Pattern;
/// Represents a Pattern as used by `NormalizedString`
#[derive(Clone, FromPyObject)]
pub enum PyPattern<'p> {
pub enum PyPattern {
#[pyo3(annotation = "str")]
Str(&'p str),
Str(String),
#[pyo3(annotation = "tokenizers.Regex")]
Regex(Py<PyRegex>),
// TODO: Add the compatibility for Fn(char) -> bool
}
impl Pattern for PyPattern<'_> {
impl Pattern for PyPattern {
fn find_matches(&self, inside: &str) -> tk::Result<Vec<(tk::Offsets, bool)>> {
match self {
PyPattern::Str(s) => {
@ -35,8 +35,8 @@ impl Pattern for PyPattern<'_> {
}
}
impl From<PyPattern<'_>> for tk::normalizers::replace::ReplacePattern {
fn from(pattern: PyPattern<'_>) -> Self {
impl From<PyPattern> for tk::normalizers::replace::ReplacePattern {
fn from(pattern: PyPattern) -> Self {
match pattern {
PyPattern::Str(s) => Self::String(s.to_owned()),
PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
@ -44,8 +44,8 @@ impl From<PyPattern<'_>> for tk::normalizers::replace::ReplacePattern {
}
}
impl From<PyPattern<'_>> for tk::pre_tokenizers::split::SplitPattern {
fn from(pattern: PyPattern<'_>) -> Self {
impl From<PyPattern> for tk::pre_tokenizers::split::SplitPattern {
fn from(pattern: PyPattern) -> Self {
match pattern {
PyPattern::Str(s) => Self::String(s.to_owned()),
PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
@ -117,7 +117,7 @@ impl From<PySplitDelimiterBehavior> for SplitDelimiterBehavior {
}
}
fn filter(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> {
fn filter(normalized: &mut NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
let err = "`filter` expect a callable with the signature: `fn(char) -> bool`";
if !func.is_callable() {
@ -134,7 +134,7 @@ fn filter(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> {
}
}
fn for_each(normalized: &NormalizedString, func: &PyAny) -> PyResult<()> {
fn for_each(normalized: &NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
let err = "`for_each` expect a callable with the signature: `fn(char)`";
if !func.is_callable() {
@ -148,14 +148,14 @@ fn for_each(normalized: &NormalizedString, func: &PyAny) -> PyResult<()> {
}
}
fn map(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> {
fn map(normalized: &mut NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
let err = "`map` expect a callable with the signature: `fn(char) -> char`";
if !func.is_callable() {
Err(exceptions::PyTypeError::new_err(err))
} else {
normalized.map(|c| {
let c: &str = func
let c: String = func
.call1((c.to_string(),))
.expect(err)
.extract()
@ -296,13 +296,13 @@ impl PyNormalizedString {
/// Filter each character of the string using the given func
#[pyo3(text_signature = "(self, func)")]
fn filter(&mut self, func: &PyAny) -> PyResult<()> {
fn filter(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
filter(&mut self.normalized, func)
}
/// Calls the given function for each character of the string
#[pyo3(text_signature = "(self, func)")]
fn for_each(&self, func: &PyAny) -> PyResult<()> {
fn for_each(&self, func: &Bound<'_, PyAny>) -> PyResult<()> {
for_each(&self.normalized, func)
}
@ -311,7 +311,7 @@ impl PyNormalizedString {
/// Replaces each character of the string using the returned value. Each
/// returned value **must** be a str of length 1 (ie a character).
#[pyo3(text_signature = "(self, func)")]
fn map(&mut self, func: &PyAny) -> PyResult<()> {
fn map(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
map(&mut self.normalized, func)
}
@ -551,21 +551,21 @@ impl PyNormalizedStringRefMut {
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)?
}
fn filter(&mut self, func: &PyAny) -> PyResult<()> {
fn filter(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner
.map_mut(|n| filter(n, func))
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;
Ok(())
}
fn for_each(&self, func: &PyAny) -> PyResult<()> {
fn for_each(&self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner
.map(|n| for_each(n, func))
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;
Ok(())
}
fn map(&mut self, func: &PyAny) -> PyResult<()> {
fn map(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner
.map_mut(|n| map(n, func))
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;

View File

@ -12,7 +12,7 @@ use crate::error::ToPyResult;
use crate::token::PyToken;
use tk::{OffsetReferential, OffsetType, Offsets, PreTokenizedString, Token};
fn split(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
fn split(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
if !func.is_callable() {
Err(exceptions::PyTypeError::new_err(
"`split` expect a callable with the signature: \
@ -30,7 +30,7 @@ fn split(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
}
}
fn normalize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
fn normalize(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
if !func.is_callable() {
Err(exceptions::PyTypeError::new_err(
"`normalize` expect a callable with the signature: \
@ -46,7 +46,7 @@ fn normalize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
}
}
fn tokenize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
fn tokenize(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
if !func.is_callable() {
Err(exceptions::PyTypeError::new_err(
"`tokenize` expect a callable with the signature: \
@ -183,7 +183,7 @@ impl PyPreTokenizedString {
/// In order for the offsets to be tracked accurately, any returned `NormalizedString`
/// should come from calling either `.split` or `.slice` on the received one.
#[pyo3(text_signature = "(self, func)")]
fn split(&mut self, func: &PyAny) -> PyResult<()> {
fn split(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
split(&mut self.pretok, func)
}
@ -195,7 +195,7 @@ impl PyPreTokenizedString {
/// does not need to return anything, just calling the methods on the provided
/// NormalizedString allow its modification.
#[pyo3(text_signature = "(self, func)")]
fn normalize(&mut self, func: &PyAny) -> PyResult<()> {
fn normalize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
normalize(&mut self.pretok, func)
}
@ -206,7 +206,7 @@ impl PyPreTokenizedString {
/// The function used to tokenize each underlying split. This function must return
/// a list of Token generated from the input str.
#[pyo3(text_signature = "(self, func)")]
fn tokenize(&mut self, func: &PyAny) -> PyResult<()> {
fn tokenize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
tokenize(&mut self.pretok, func)
}
@ -289,19 +289,19 @@ impl PyPreTokenizedStringRefMut {
#[pymethods]
impl PyPreTokenizedStringRefMut {
fn split(&mut self, func: &PyAny) -> PyResult<()> {
fn split(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner
.map_mut(|pretok| split(pretok, func))
.ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?
}
fn normalize(&mut self, func: &PyAny) -> PyResult<()> {
fn normalize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner
.map_mut(|pretok| normalize(pretok, func))
.ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?
}
fn tokenize(&mut self, func: &PyAny) -> PyResult<()> {
fn tokenize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner
.map_mut(|pretok| tokenize(pretok, func))
.ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?