PyO3 0.21. (#1494)

* PyO3 0.21.

* Upgraded everything.

* Rustfmt.
This commit is contained in:
Nicolas Patry
2024-04-16 13:49:52 +02:00
committed by GitHub
parent 914576f7ed
commit d5a8cc7a49
16 changed files with 180 additions and 197 deletions

View File

@ -9,24 +9,24 @@ name = "tokenizers"
crate-type = ["cdylib"] crate-type = ["cdylib"]
[dependencies] [dependencies]
rayon = "1.8" rayon = "1.10"
serde = { version = "1.0", features = [ "rc", "derive" ]} serde = { version = "1.0", features = [ "rc", "derive" ]}
serde_json = "1.0" serde_json = "1.0"
libc = "0.2" libc = "0.2"
env_logger = "0.10.0" env_logger = "0.11"
pyo3 = { version = "0.20" } pyo3 = { version = "0.21" }
numpy = "0.20.0" numpy = "0.21"
ndarray = "0.15" ndarray = "0.15"
onig = { version = "6.4", default-features = false } onig = { version = "6.4", default-features = false }
itertools = "0.11" itertools = "0.12"
[dependencies.tokenizers] [dependencies.tokenizers]
version = "0.16.0-dev.0" version = "0.16.0-dev.0"
path = "../../tokenizers" path = "../../tokenizers"
[dev-dependencies] [dev-dependencies]
tempfile = "3.8" tempfile = "3.10"
pyo3 = { version = "0.20", features = ["auto-initialize"] } pyo3 = { version = "0.21", features = ["auto-initialize"] }
[features] [features]
defaut = ["pyo3/extension-module"] defaut = ["pyo3/extension-module"]

View File

@ -1,7 +1,6 @@
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use crate::pre_tokenizers::from_string; use crate::pre_tokenizers::from_string;
use crate::utils::PyChar;
use crate::utils::PyPattern; use crate::utils::PyPattern;
use pyo3::exceptions; use pyo3::exceptions;
use pyo3::prelude::*; use pyo3::prelude::*;
@ -85,7 +84,7 @@ impl PyDecoder {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -161,7 +160,7 @@ pub struct PyByteLevelDec {}
impl PyByteLevelDec { impl PyByteLevelDec {
#[new] #[new]
#[pyo3(signature = (**_kwargs), text_signature = "(self)")] #[pyo3(signature = (**_kwargs), text_signature = "(self)")]
fn new(_kwargs: Option<&PyDict>) -> (Self, PyDecoder) { fn new(_kwargs: Option<&Bound<'_, PyDict>>) -> (Self, PyDecoder) {
(PyByteLevelDec {}, ByteLevel::default().into()) (PyByteLevelDec {}, ByteLevel::default().into())
} }
} }
@ -318,8 +317,8 @@ impl PyMetaspaceDec {
} }
#[setter] #[setter]
fn set_replacement(self_: PyRef<Self>, replacement: PyChar) { fn set_replacement(self_: PyRef<Self>, replacement: char) {
setter!(self_, Metaspace, @set_replacement, replacement.0); setter!(self_, Metaspace, @set_replacement, replacement);
} }
#[getter] #[getter]
@ -352,16 +351,12 @@ impl PyMetaspaceDec {
} }
#[new] #[new]
#[pyo3(signature = (replacement = PyChar('▁'), prepend_scheme = String::from("always"), split = true), text_signature = "(self, replacement = \"\", prepend_scheme = \"always\", split = True)")] #[pyo3(signature = (replacement = '▁', prepend_scheme = String::from("always"), split = true), text_signature = "(self, replacement = \"\", prepend_scheme = \"always\", split = True)")]
fn new( fn new(replacement: char, prepend_scheme: String, split: bool) -> PyResult<(Self, PyDecoder)> {
replacement: PyChar,
prepend_scheme: String,
split: bool,
) -> PyResult<(Self, PyDecoder)> {
let prepend_scheme = from_string(prepend_scheme)?; let prepend_scheme = from_string(prepend_scheme)?;
Ok(( Ok((
PyMetaspaceDec {}, PyMetaspaceDec {},
Metaspace::new(replacement.0, prepend_scheme, split).into(), Metaspace::new(replacement, prepend_scheme, split).into(),
)) ))
} }
} }
@ -463,7 +458,7 @@ pub struct PySequenceDecoder {}
impl PySequenceDecoder { impl PySequenceDecoder {
#[new] #[new]
#[pyo3(signature = (decoders_py), text_signature = "(self, decoders)")] #[pyo3(signature = (decoders_py), text_signature = "(self, decoders)")]
fn new(decoders_py: &PyList) -> PyResult<(Self, PyDecoder)> { fn new(decoders_py: &Bound<'_, PyList>) -> PyResult<(Self, PyDecoder)> {
let mut decoders: Vec<DecoderWrapper> = Vec::with_capacity(decoders_py.len()); let mut decoders: Vec<DecoderWrapper> = Vec::with_capacity(decoders_py.len());
for decoder_py in decoders_py.iter() { for decoder_py in decoders_py.iter() {
let decoder: PyRef<PyDecoder> = decoder_py.extract()?; let decoder: PyRef<PyDecoder> = decoder_py.extract()?;
@ -476,8 +471,8 @@ impl PySequenceDecoder {
Ok((PySequenceDecoder {}, Sequence::new(decoders).into())) Ok((PySequenceDecoder {}, Sequence::new(decoders).into()))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [PyList::empty(py)]) PyTuple::new_bound(py, [PyList::empty_bound(py)])
} }
} }
@ -497,7 +492,7 @@ impl Decoder for CustomDecoder {
Python::with_gil(|py| { Python::with_gil(|py| {
let decoded = self let decoded = self
.inner .inner
.call_method(py, "decode", (tokens,), None)? .call_method_bound(py, "decode", (tokens,), None)?
.extract(py)?; .extract(py)?;
Ok(decoded) Ok(decoded)
}) })
@ -507,7 +502,7 @@ impl Decoder for CustomDecoder {
Python::with_gil(|py| { Python::with_gil(|py| {
let decoded = self let decoded = self
.inner .inner
.call_method(py, "decode_chain", (tokens,), None)? .call_method_bound(py, "decode_chain", (tokens,), None)?
.extract(py)?; .extract(py)?;
Ok(decoded) Ok(decoded)
}) })
@ -572,7 +567,7 @@ impl Decoder for PyDecoderWrapper {
/// Decoders Module /// Decoders Module
#[pymodule] #[pymodule]
pub fn decoders(_py: Python, m: &PyModule) -> PyResult<()> { pub fn decoders(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyDecoder>()?; m.add_class::<PyDecoder>()?;
m.add_class::<PyByteLevelDec>()?; m.add_class::<PyByteLevelDec>()?;
m.add_class::<PyReplaceDec>()?; m.add_class::<PyReplaceDec>()?;
@ -602,7 +597,7 @@ mod test {
Python::with_gil(|py| { Python::with_gil(|py| {
let py_dec = PyDecoder::new(Metaspace::default().into()); let py_dec = PyDecoder::new(Metaspace::default().into());
let py_meta = py_dec.get_as_subtype(py).unwrap(); let py_meta = py_dec.get_as_subtype(py).unwrap();
assert_eq!("Metaspace", py_meta.as_ref(py).get_type().name().unwrap()); assert_eq!("Metaspace", py_meta.bind(py).get_type().qualname().unwrap());
}) })
} }

View File

@ -37,7 +37,7 @@ impl PyEncoding {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -391,10 +391,10 @@ impl PyEncoding {
#[pyo3( #[pyo3(
text_signature = "(self, length, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]')" text_signature = "(self, length, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]')"
)] )]
fn pad(&mut self, length: usize, kwargs: Option<&PyDict>) -> PyResult<()> { fn pad(&mut self, length: usize, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<()> {
let mut pad_id = 0; let mut pad_id = 0;
let mut pad_type_id = 0; let mut pad_type_id = 0;
let mut pad_token = "[PAD]"; let mut pad_token = "[PAD]".to_string();
let mut direction = PaddingDirection::Right; let mut direction = PaddingDirection::Right;
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
@ -422,7 +422,7 @@ impl PyEncoding {
} }
} }
self.encoding self.encoding
.pad(length, pad_id, pad_type_id, pad_token, direction); .pad(length, pad_id, pad_type_id, &pad_token, direction);
Ok(()) Ok(())
} }

View File

@ -35,7 +35,7 @@ impl<T> ToPyResult<T> {
} }
pub(crate) fn deprecation_warning(py: Python<'_>, version: &str, message: &str) -> PyResult<()> { pub(crate) fn deprecation_warning(py: Python<'_>, version: &str, message: &str) -> PyResult<()> {
let deprecation_warning = py.import("builtins")?.getattr("DeprecationWarning")?; let deprecation_warning = py.import_bound("builtins")?.getattr("DeprecationWarning")?;
let full_message = format!("Deprecated in {}: {}", version, message); let full_message = format!("Deprecated in {}: {}", version, message);
pyo3::PyErr::warn(py, deprecation_warning, &full_message, 0) pyo3::PyErr::warn_bound(py, &deprecation_warning, &full_message, 0)
} }

View File

@ -47,7 +47,7 @@ extern "C" fn child_after_fork() {
/// Tokenizers Module /// Tokenizers Module
#[pymodule] #[pymodule]
pub fn tokenizers(_py: Python, m: &PyModule) -> PyResult<()> { pub fn tokenizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
let _ = env_logger::try_init_from_env("TOKENIZERS_LOG"); let _ = env_logger::try_init_from_env("TOKENIZERS_LOG");
// Register the fork callback // Register the fork callback

View File

@ -105,7 +105,7 @@ impl PyModel {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -260,7 +260,10 @@ impl PyModel {
pub struct PyBPE {} pub struct PyBPE {}
impl PyBPE { impl PyBPE {
fn with_builder(mut builder: BpeBuilder, kwargs: Option<&PyDict>) -> PyResult<(Self, PyModel)> { fn with_builder(
mut builder: BpeBuilder,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> {
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
for (key, value) in kwargs { for (key, value) in kwargs {
let key: &str = key.extract()?; let key: &str = key.extract()?;
@ -321,14 +324,14 @@ macro_rules! setter {
} }
#[derive(FromPyObject)] #[derive(FromPyObject)]
enum PyVocab<'a> { enum PyVocab {
Vocab(Vocab), Vocab(Vocab),
Filename(&'a str), Filename(String),
} }
#[derive(FromPyObject)] #[derive(FromPyObject)]
enum PyMerges<'a> { enum PyMerges {
Merges(Merges), Merges(Merges),
Filename(&'a str), Filename(String),
} }
#[pymethods] #[pymethods]
@ -417,7 +420,7 @@ impl PyBPE {
py: Python<'_>, py: Python<'_>,
vocab: Option<PyVocab>, vocab: Option<PyVocab>,
merges: Option<PyMerges>, merges: Option<PyMerges>,
kwargs: Option<&PyDict>, kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> { ) -> PyResult<(Self, PyModel)> {
if (vocab.is_some() && merges.is_none()) || (vocab.is_none() && merges.is_some()) { if (vocab.is_some() && merges.is_none()) || (vocab.is_none() && merges.is_some()) {
return Err(exceptions::PyValueError::new_err( return Err(exceptions::PyValueError::new_err(
@ -502,11 +505,11 @@ impl PyBPE {
#[pyo3(signature = (vocab, merges, **kwargs))] #[pyo3(signature = (vocab, merges, **kwargs))]
#[pyo3(text_signature = "(cls, vocab, merge, **kwargs)")] #[pyo3(text_signature = "(cls, vocab, merge, **kwargs)")]
fn from_file( fn from_file(
_cls: &PyType, _cls: &Bound<'_, PyType>,
py: Python, py: Python,
vocab: &str, vocab: &str,
merges: &str, merges: &str,
kwargs: Option<&PyDict>, kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<Py<Self>> { ) -> PyResult<Py<Self>> {
let (vocab, merges) = BPE::read_file(vocab, merges).map_err(|e| { let (vocab, merges) = BPE::read_file(vocab, merges).map_err(|e| {
exceptions::PyException::new_err(format!("Error while reading BPE files: {}", e)) exceptions::PyException::new_err(format!("Error while reading BPE files: {}", e))
@ -540,7 +543,7 @@ pub struct PyWordPiece {}
impl PyWordPiece { impl PyWordPiece {
fn with_builder( fn with_builder(
mut builder: WordPieceBuilder, mut builder: WordPieceBuilder,
kwargs: Option<&PyDict>, kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> { ) -> PyResult<(Self, PyModel)> {
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
for (key, val) in kwargs { for (key, val) in kwargs {
@ -612,7 +615,7 @@ impl PyWordPiece {
fn new( fn new(
py: Python<'_>, py: Python<'_>,
vocab: Option<PyVocab>, vocab: Option<PyVocab>,
kwargs: Option<&PyDict>, kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> { ) -> PyResult<(Self, PyModel)> {
let mut builder = WordPiece::builder(); let mut builder = WordPiece::builder();
@ -677,10 +680,10 @@ impl PyWordPiece {
#[pyo3(signature = (vocab, **kwargs))] #[pyo3(signature = (vocab, **kwargs))]
#[pyo3(text_signature = "(vocab, **kwargs)")] #[pyo3(text_signature = "(vocab, **kwargs)")]
fn from_file( fn from_file(
_cls: &PyType, _cls: &Bound<'_, PyType>,
py: Python, py: Python,
vocab: &str, vocab: &str,
kwargs: Option<&PyDict>, kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<Py<Self>> { ) -> PyResult<Py<Self>> {
let vocab = WordPiece::read_file(vocab).map_err(|e| { let vocab = WordPiece::read_file(vocab).map_err(|e| {
exceptions::PyException::new_err(format!("Error while reading WordPiece file: {}", e)) exceptions::PyException::new_err(format!("Error while reading WordPiece file: {}", e))
@ -796,7 +799,7 @@ impl PyWordLevel {
#[pyo3(signature = (vocab, unk_token = None))] #[pyo3(signature = (vocab, unk_token = None))]
#[pyo3(text_signature = "(vocab, unk_token)")] #[pyo3(text_signature = "(vocab, unk_token)")]
fn from_file( fn from_file(
_cls: &PyType, _cls: &Bound<'_, PyType>,
py: Python, py: Python,
vocab: &str, vocab: &str,
unk_token: Option<String>, unk_token: Option<String>,
@ -849,7 +852,7 @@ impl PyUnigram {
/// Models Module /// Models Module
#[pymodule] #[pymodule]
pub fn models(_py: Python, m: &PyModule) -> PyResult<()> { pub fn models(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyModel>()?; m.add_class::<PyModel>()?;
m.add_class::<PyBPE>()?; m.add_class::<PyBPE>()?;
m.add_class::<PyWordPiece>()?; m.add_class::<PyWordPiece>()?;
@ -870,7 +873,7 @@ mod test {
Python::with_gil(|py| { Python::with_gil(|py| {
let py_model = PyModel::from(BPE::default()); let py_model = PyModel::from(BPE::default());
let py_bpe = py_model.get_as_subtype(py).unwrap(); let py_bpe = py_model.get_as_subtype(py).unwrap();
assert_eq!("BPE", py_bpe.as_ref(py).get_type().name().unwrap()); assert_eq!("BPE", py_bpe.bind(py).get_type().qualname().unwrap());
}) })
} }

View File

@ -113,7 +113,7 @@ impl PyNormalizer {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -345,7 +345,7 @@ pub struct PySequence {}
impl PySequence { impl PySequence {
#[new] #[new]
#[pyo3(text_signature = None)] #[pyo3(text_signature = None)]
fn new(normalizers: &PyList) -> PyResult<(Self, PyNormalizer)> { fn new(normalizers: &Bound<'_, PyList>) -> PyResult<(Self, PyNormalizer)> {
let mut sequence = Vec::with_capacity(normalizers.len()); let mut sequence = Vec::with_capacity(normalizers.len());
for n in normalizers.iter() { for n in normalizers.iter() {
let normalizer: PyRef<PyNormalizer> = n.extract()?; let normalizer: PyRef<PyNormalizer> = n.extract()?;
@ -360,8 +360,8 @@ impl PySequence {
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [PyList::empty(py)]) PyTuple::new_bound(py, [PyList::empty_bound(py)])
} }
fn __len__(&self) -> usize { fn __len__(&self) -> usize {
@ -467,11 +467,11 @@ pub struct PyPrecompiled {}
impl PyPrecompiled { impl PyPrecompiled {
#[new] #[new]
#[pyo3(text_signature = "(self, precompiled_charsmap)")] #[pyo3(text_signature = "(self, precompiled_charsmap)")]
fn new(py_precompiled_charsmap: &PyBytes) -> PyResult<(Self, PyNormalizer)> { fn new(precompiled_charsmap: Vec<u8>) -> PyResult<(Self, PyNormalizer)> {
let precompiled_charsmap: &[u8] = FromPyObject::extract(py_precompiled_charsmap)?; // let precompiled_charsmap: Vec<u8> = FromPyObject::extract(py_precompiled_charsmap)?;
Ok(( Ok((
PyPrecompiled {}, PyPrecompiled {},
Precompiled::from(precompiled_charsmap) Precompiled::from(&precompiled_charsmap)
.map_err(|e| { .map_err(|e| {
exceptions::PyException::new_err(format!( exceptions::PyException::new_err(format!(
"Error while attempting to build Precompiled normalizer: {}", "Error while attempting to build Precompiled normalizer: {}",
@ -512,7 +512,7 @@ impl tk::tokenizer::Normalizer for CustomNormalizer {
fn normalize(&self, normalized: &mut NormalizedString) -> tk::Result<()> { fn normalize(&self, normalized: &mut NormalizedString) -> tk::Result<()> {
Python::with_gil(|py| { Python::with_gil(|py| {
let normalized = PyNormalizedStringRefMut::new(normalized); let normalized = PyNormalizedStringRefMut::new(normalized);
let py_normalized = self.inner.as_ref(py); let py_normalized = self.inner.bind(py);
py_normalized.call_method("normalize", (normalized.get(),), None)?; py_normalized.call_method("normalize", (normalized.get(),), None)?;
Ok(()) Ok(())
}) })
@ -635,7 +635,7 @@ impl Normalizer for PyNormalizerWrapper {
/// Normalizers Module /// Normalizers Module
#[pymodule] #[pymodule]
pub fn normalizers(_py: Python, m: &PyModule) -> PyResult<()> { pub fn normalizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyNormalizer>()?; m.add_class::<PyNormalizer>()?;
m.add_class::<PyBertNormalizer>()?; m.add_class::<PyBertNormalizer>()?;
m.add_class::<PyNFD>()?; m.add_class::<PyNFD>()?;
@ -667,7 +667,7 @@ mod test {
Python::with_gil(|py| { Python::with_gil(|py| {
let py_norm = PyNormalizer::new(NFC.into()); let py_norm = PyNormalizer::new(NFC.into());
let py_nfc = py_norm.get_as_subtype(py).unwrap(); let py_nfc = py_norm.get_as_subtype(py).unwrap();
assert_eq!("NFC", py_nfc.as_ref(py).get_type().name().unwrap()); assert_eq!("NFC", py_nfc.bind(py).get_type().qualname().unwrap());
}) })
} }

View File

@ -118,7 +118,7 @@ impl PyPreTokenizer {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -263,7 +263,7 @@ impl PyByteLevel {
fn new( fn new(
add_prefix_space: bool, add_prefix_space: bool,
use_regex: bool, use_regex: bool,
_kwargs: Option<&PyDict>, _kwargs: Option<&Bound<'_, PyDict>>,
) -> (Self, PyPreTokenizer) { ) -> (Self, PyPreTokenizer) {
( (
PyByteLevel {}, PyByteLevel {},
@ -352,8 +352,8 @@ impl PySplit {
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [" ", "removed"]) PyTuple::new_bound(py, [" ", "removed"])
} }
} }
@ -372,21 +372,21 @@ impl PyCharDelimiterSplit {
} }
#[setter] #[setter]
fn set_delimiter(self_: PyRef<Self>, delimiter: PyChar) { fn set_delimiter(self_: PyRef<Self>, delimiter: char) {
setter!(self_, Delimiter, delimiter, delimiter.0); setter!(self_, Delimiter, delimiter, delimiter);
} }
#[new] #[new]
#[pyo3(text_signature = None)] #[pyo3(text_signature = None)]
pub fn new(delimiter: PyChar) -> PyResult<(Self, PyPreTokenizer)> { pub fn new(delimiter: char) -> PyResult<(Self, PyPreTokenizer)> {
Ok(( Ok((
PyCharDelimiterSplit {}, PyCharDelimiterSplit {},
CharDelimiterSplit::new(delimiter.0).into(), CharDelimiterSplit::new(delimiter).into(),
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [" "]) PyTuple::new_bound(py, [" "])
} }
} }
@ -430,7 +430,7 @@ pub struct PySequence {}
impl PySequence { impl PySequence {
#[new] #[new]
#[pyo3(text_signature = "(self, pretokenizers)")] #[pyo3(text_signature = "(self, pretokenizers)")]
fn new(pre_tokenizers: &PyList) -> PyResult<(Self, PyPreTokenizer)> { fn new(pre_tokenizers: &Bound<'_, PyList>) -> PyResult<(Self, PyPreTokenizer)> {
let mut sequence = Vec::with_capacity(pre_tokenizers.len()); let mut sequence = Vec::with_capacity(pre_tokenizers.len());
for n in pre_tokenizers.iter() { for n in pre_tokenizers.iter() {
let pretokenizer: PyRef<PyPreTokenizer> = n.extract()?; let pretokenizer: PyRef<PyPreTokenizer> = n.extract()?;
@ -447,8 +447,8 @@ impl PySequence {
)) ))
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [PyList::empty(py)]) PyTuple::new_bound(py, [PyList::empty_bound(py)])
} }
} }
@ -490,8 +490,8 @@ impl PyMetaspace {
} }
#[setter] #[setter]
fn set_replacement(self_: PyRef<Self>, replacement: PyChar) { fn set_replacement(self_: PyRef<Self>, replacement: char) {
setter!(self_, Metaspace, @set_replacement, replacement.0); setter!(self_, Metaspace, @set_replacement, replacement);
} }
#[getter] #[getter]
@ -524,15 +524,15 @@ impl PyMetaspace {
} }
#[new] #[new]
#[pyo3(signature = (replacement = PyChar('▁'), prepend_scheme=String::from("always"), split=true), text_signature = "(self, replacement=\"_\", prepend_scheme=\"always\", split=True)")] #[pyo3(signature = (replacement = '▁', prepend_scheme=String::from("always"), split=true), text_signature = "(self, replacement=\"_\", prepend_scheme=\"always\", split=True)")]
fn new( fn new(
replacement: PyChar, replacement: char,
prepend_scheme: String, prepend_scheme: String,
split: bool, split: bool,
) -> PyResult<(Self, PyPreTokenizer)> { ) -> PyResult<(Self, PyPreTokenizer)> {
// Create a new Metaspace instance // Create a new Metaspace instance
let prepend_scheme = from_string(prepend_scheme)?; let prepend_scheme = from_string(prepend_scheme)?;
let new_instance: Metaspace = Metaspace::new(replacement.0, prepend_scheme, split); let new_instance: Metaspace = Metaspace::new(replacement, prepend_scheme, split);
Ok((PyMetaspace {}, new_instance.into())) Ok((PyMetaspace {}, new_instance.into()))
} }
} }
@ -599,7 +599,7 @@ impl tk::tokenizer::PreTokenizer for CustomPreTokenizer {
fn pre_tokenize(&self, sentence: &mut PreTokenizedString) -> tk::Result<()> { fn pre_tokenize(&self, sentence: &mut PreTokenizedString) -> tk::Result<()> {
Python::with_gil(|py| { Python::with_gil(|py| {
let pretok = PyPreTokenizedStringRefMut::new(sentence); let pretok = PyPreTokenizedStringRefMut::new(sentence);
let py_pretok = self.inner.as_ref(py); let py_pretok = self.inner.bind(py);
py_pretok.call_method("pre_tokenize", (pretok.get(),), None)?; py_pretok.call_method("pre_tokenize", (pretok.get(),), None)?;
Ok(()) Ok(())
}) })
@ -722,7 +722,7 @@ impl PreTokenizer for PyPreTokenizerWrapper {
/// PreTokenizers Module /// PreTokenizers Module
#[pymodule] #[pymodule]
pub fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> { pub fn pre_tokenizers(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyPreTokenizer>()?; m.add_class::<PyPreTokenizer>()?;
m.add_class::<PyByteLevel>()?; m.add_class::<PyByteLevel>()?;
m.add_class::<PyWhitespace>()?; m.add_class::<PyWhitespace>()?;
@ -754,7 +754,7 @@ mod test {
Python::with_gil(|py| { Python::with_gil(|py| {
let py_norm = PyPreTokenizer::new(Whitespace {}.into()); let py_norm = PyPreTokenizer::new(Whitespace {}.into());
let py_wsp = py_norm.get_as_subtype(py).unwrap(); let py_wsp = py_norm.get_as_subtype(py).unwrap();
assert_eq!("Whitespace", py_wsp.as_ref(py).get_type().name().unwrap()); assert_eq!("Whitespace", py_wsp.bind(py).get_type().qualname().unwrap());
}) })
} }

View File

@ -78,7 +78,7 @@ impl PyPostProcessor {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -166,8 +166,8 @@ impl PyBertProcessing {
) )
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [("", 0), ("", 0)]) PyTuple::new_bound(py, [("", 0), ("", 0)])
} }
} }
@ -216,8 +216,8 @@ impl PyRobertaProcessing {
) )
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [("", 0), ("", 0)]) PyTuple::new_bound(py, [("", 0), ("", 0)])
} }
} }
@ -235,7 +235,10 @@ pub struct PyByteLevel {}
impl PyByteLevel { impl PyByteLevel {
#[new] #[new]
#[pyo3(signature = (trim_offsets = None, **_kwargs), text_signature = "(self, trim_offsets=True)")] #[pyo3(signature = (trim_offsets = None, **_kwargs), text_signature = "(self, trim_offsets=True)")]
fn new(trim_offsets: Option<bool>, _kwargs: Option<&PyDict>) -> (Self, PyPostProcessor) { fn new(
trim_offsets: Option<bool>,
_kwargs: Option<&Bound<'_, PyDict>>,
) -> (Self, PyPostProcessor) {
let mut byte_level = ByteLevel::default(); let mut byte_level = ByteLevel::default();
if let Some(to) = trim_offsets { if let Some(to) = trim_offsets {
@ -304,7 +307,7 @@ impl FromPyObject<'_> for PyTemplate {
Ok(Self( Ok(Self(
s.try_into().map_err(exceptions::PyValueError::new_err)?, s.try_into().map_err(exceptions::PyValueError::new_err)?,
)) ))
} else if let Ok(s) = ob.extract::<Vec<&str>>() { } else if let Ok(s) = ob.extract::<Vec<String>>() {
Ok(Self( Ok(Self(
s.try_into().map_err(exceptions::PyValueError::new_err)?, s.try_into().map_err(exceptions::PyValueError::new_err)?,
)) ))
@ -424,7 +427,7 @@ pub struct PySequence {}
impl PySequence { impl PySequence {
#[new] #[new]
#[pyo3(signature = (processors_py), text_signature = "(self, processors)")] #[pyo3(signature = (processors_py), text_signature = "(self, processors)")]
fn new(processors_py: &PyList) -> (Self, PyPostProcessor) { fn new(processors_py: &Bound<'_, PyList>) -> (Self, PyPostProcessor) {
let mut processors: Vec<PostProcessorWrapper> = Vec::with_capacity(processors_py.len()); let mut processors: Vec<PostProcessorWrapper> = Vec::with_capacity(processors_py.len());
for n in processors_py.iter() { for n in processors_py.iter() {
let processor: PyRef<PyPostProcessor> = n.extract().unwrap(); let processor: PyRef<PyPostProcessor> = n.extract().unwrap();
@ -438,14 +441,14 @@ impl PySequence {
) )
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new(py, [PyList::empty(py)]) PyTuple::new_bound(py, [PyList::empty_bound(py)])
} }
} }
/// Processors Module /// Processors Module
#[pymodule] #[pymodule]
pub fn processors(_py: Python, m: &PyModule) -> PyResult<()> { pub fn processors(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyPostProcessor>()?; m.add_class::<PyPostProcessor>()?;
m.add_class::<PyBertProcessing>()?; m.add_class::<PyBertProcessing>()?;
m.add_class::<PyRobertaProcessing>()?; m.add_class::<PyRobertaProcessing>()?;
@ -474,7 +477,7 @@ mod test {
let py_bert = py_proc.get_as_subtype(py).unwrap(); let py_bert = py_proc.get_as_subtype(py).unwrap();
assert_eq!( assert_eq!(
"BertProcessing", "BertProcessing",
py_bert.as_ref(py).get_type().name().unwrap() py_bert.bind(py).get_type().qualname().unwrap()
); );
}) })
} }

View File

@ -98,8 +98,8 @@ impl PyAddedToken {
token token
} }
pub fn as_pydict<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> { pub fn as_pydict<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
let dict = PyDict::new(py); let dict = PyDict::new_bound(py);
let token = self.get_token(); let token = self.get_token();
dict.set_item("content", token.content)?; dict.set_item("content", token.content)?;
@ -130,7 +130,7 @@ impl From<tk::AddedToken> for PyAddedToken {
impl PyAddedToken { impl PyAddedToken {
#[new] #[new]
#[pyo3(signature = (content=None, **kwargs), text_signature = "(self, content, single_word=False, lstrip=False, rstrip=False, normalized=True, special=False)")] #[pyo3(signature = (content=None, **kwargs), text_signature = "(self, content, single_word=False, lstrip=False, rstrip=False, normalized=True, special=False)")]
fn __new__(content: Option<&str>, kwargs: Option<&PyDict>) -> PyResult<Self> { fn __new__(content: Option<&str>, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<Self> {
let mut token = PyAddedToken::from(content.unwrap_or(""), None); let mut token = PyAddedToken::from(content.unwrap_or(""), None);
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
@ -150,7 +150,7 @@ impl PyAddedToken {
Ok(token) Ok(token)
} }
fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> { fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
self.as_pydict(py) self.as_pydict(py)
} }
@ -329,7 +329,7 @@ impl FromPyObject<'_> for PyArrayUnicode {
); );
let py = ob.py(); let py = ob.py();
let obj = PyObject::from_owned_ptr(py, unicode); let obj = PyObject::from_owned_ptr(py, unicode);
let s = obj.downcast::<PyString>(py)?; let s = obj.downcast_bound::<PyString>(py)?;
Ok(s.to_string_lossy().trim_matches(char::from(0)).to_owned()) Ok(s.to_string_lossy().trim_matches(char::from(0)).to_owned())
}) })
.collect::<PyResult<Vec<_>>>()?; .collect::<PyResult<Vec<_>>>()?;
@ -353,7 +353,7 @@ impl FromPyObject<'_> for PyArrayStr {
.as_array() .as_array()
.iter() .iter()
.map(|obj| { .map(|obj| {
let s = obj.downcast::<PyString>(ob.py())?; let s = obj.downcast_bound::<PyString>(ob.py())?;
Ok(s.to_string_lossy().into_owned()) Ok(s.to_string_lossy().into_owned())
}) })
.collect::<PyResult<Vec<_>>>()?; .collect::<PyResult<Vec<_>>>()?;
@ -377,12 +377,12 @@ impl<'s> FromPyObject<'s> for PreTokenizedInputSequence<'s> {
return Ok(Self(seq.into())); return Ok(Self(seq.into()));
} }
if let Ok(s) = ob.downcast::<PyList>() { if let Ok(s) = ob.downcast::<PyList>() {
if let Ok(seq) = s.extract::<Vec<&str>>() { if let Ok(seq) = s.extract::<Vec<String>>() {
return Ok(Self(seq.into())); return Ok(Self(seq.into()));
} }
} }
if let Ok(s) = ob.downcast::<PyTuple>() { if let Ok(s) = ob.downcast::<PyTuple>() {
if let Ok(seq) = s.extract::<Vec<&str>>() { if let Ok(seq) = s.extract::<Vec<String>>() {
return Ok(Self(seq.into())); return Ok(Self(seq.into()));
} }
} }
@ -492,7 +492,7 @@ impl PyTokenizer {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -510,9 +510,9 @@ impl PyTokenizer {
} }
} }
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple { fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
let model = PyModel::from(BPE::default()).into_py(py); let model = PyModel::from(BPE::default()).into_py(py);
PyTuple::new(py, vec![model]) PyTuple::new_bound(py, vec![model])
} }
/// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string. /// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string.
@ -557,7 +557,7 @@ impl PyTokenizer {
/// :class:`~tokenizers.Tokenizer`: The new tokenizer /// :class:`~tokenizers.Tokenizer`: The new tokenizer
#[staticmethod] #[staticmethod]
#[pyo3(text_signature = "(buffer)")] #[pyo3(text_signature = "(buffer)")]
fn from_buffer(buffer: &PyBytes) -> PyResult<Self> { fn from_buffer(buffer: &Bound<'_, PyBytes>) -> PyResult<Self> {
let tokenizer = serde_json::from_slice(buffer.as_bytes()).map_err(|e| { let tokenizer = serde_json::from_slice(buffer.as_bytes()).map_err(|e| {
exceptions::PyValueError::new_err(format!( exceptions::PyValueError::new_err(format!(
"Cannot instantiate Tokenizer from buffer: {}", "Cannot instantiate Tokenizer from buffer: {}",
@ -591,18 +591,18 @@ impl PyTokenizer {
auth_token: Option<String>, auth_token: Option<String>,
) -> PyResult<Self> { ) -> PyResult<Self> {
let path = Python::with_gil(|py| -> PyResult<String> { let path = Python::with_gil(|py| -> PyResult<String> {
let huggingface_hub = PyModule::import(py, intern!(py, "huggingface_hub"))?; let huggingface_hub = PyModule::import_bound(py, intern!(py, "huggingface_hub"))?;
let hf_hub_download = huggingface_hub.getattr(intern!(py, "hf_hub_download"))?; let hf_hub_download = huggingface_hub.getattr(intern!(py, "hf_hub_download"))?;
let kwargs = [ let kwargs = [
(intern!(py, "repo_id"), identifier), (intern!(py, "repo_id"), identifier),
(intern!(py, "filename"), "tokenizer.json"), (intern!(py, "filename"), "tokenizer.json"),
(intern!(py, "revision"), &revision), (intern!(py, "revision"), &revision),
] ]
.into_py_dict(py); .into_py_dict_bound(py);
if let Some(auth_token) = auth_token { if let Some(auth_token) = auth_token {
kwargs.set_item(intern!(py, "token"), auth_token)?; kwargs.set_item(intern!(py, "token"), auth_token)?;
} }
let path: String = hf_hub_download.call((), Some(kwargs))?.extract()?; let path: String = hf_hub_download.call((), Some(&kwargs))?.extract()?;
Ok(path) Ok(path)
})?; })?;
@ -712,7 +712,11 @@ impl PyTokenizer {
#[pyo3( #[pyo3(
text_signature = "(self, max_length, stride=0, strategy='longest_first', direction='right')" text_signature = "(self, max_length, stride=0, strategy='longest_first', direction='right')"
)] )]
fn enable_truncation(&mut self, max_length: usize, kwargs: Option<&PyDict>) -> PyResult<()> { fn enable_truncation(
&mut self,
max_length: usize,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<()> {
let mut params = TruncationParams { let mut params = TruncationParams {
max_length, max_length,
..Default::default() ..Default::default()
@ -777,9 +781,9 @@ impl PyTokenizer {
/// (:obj:`dict`, `optional`): /// (:obj:`dict`, `optional`):
/// A dict with the current truncation parameters if truncation is enabled /// A dict with the current truncation parameters if truncation is enabled
#[getter] #[getter]
fn get_truncation<'py>(&self, py: Python<'py>) -> PyResult<Option<&'py PyDict>> { fn get_truncation<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyDict>>> {
self.tokenizer.get_truncation().map_or(Ok(None), |params| { self.tokenizer.get_truncation().map_or(Ok(None), |params| {
let dict = PyDict::new(py); let dict = PyDict::new_bound(py);
dict.set_item("max_length", params.max_length)?; dict.set_item("max_length", params.max_length)?;
dict.set_item("stride", params.stride)?; dict.set_item("stride", params.stride)?;
@ -817,7 +821,7 @@ impl PyTokenizer {
#[pyo3( #[pyo3(
text_signature = "(self, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]', length=None, pad_to_multiple_of=None)" text_signature = "(self, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]', length=None, pad_to_multiple_of=None)"
)] )]
fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> { fn enable_padding(&mut self, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<()> {
let mut params = PaddingParams::default(); let mut params = PaddingParams::default();
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
@ -887,9 +891,9 @@ impl PyTokenizer {
/// (:obj:`dict`, `optional`): /// (:obj:`dict`, `optional`):
/// A dict with the current padding parameters if padding is enabled /// A dict with the current padding parameters if padding is enabled
#[getter] #[getter]
fn get_padding<'py>(&self, py: Python<'py>) -> PyResult<Option<&'py PyDict>> { fn get_padding<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyDict>>> {
self.tokenizer.get_padding().map_or(Ok(None), |params| { self.tokenizer.get_padding().map_or(Ok(None), |params| {
let dict = PyDict::new(py); let dict = PyDict::new_bound(py);
dict.set_item( dict.set_item(
"length", "length",
@ -948,8 +952,8 @@ impl PyTokenizer {
)] )]
fn encode( fn encode(
&self, &self,
sequence: &PyAny, sequence: &Bound<'_, PyAny>,
pair: Option<&PyAny>, pair: Option<&Bound<'_, PyAny>>,
is_pretokenized: bool, is_pretokenized: bool,
add_special_tokens: bool, add_special_tokens: bool,
) -> PyResult<PyEncoding> { ) -> PyResult<PyEncoding> {
@ -1141,7 +1145,7 @@ impl PyTokenizer {
/// Returns: /// Returns:
/// :obj:`int`: The number of tokens that were created in the vocabulary /// :obj:`int`: The number of tokens that were created in the vocabulary
#[pyo3(text_signature = "(self, tokens)")] #[pyo3(text_signature = "(self, tokens)")]
fn add_tokens(&mut self, tokens: &PyList) -> PyResult<usize> { fn add_tokens(&mut self, tokens: &Bound<'_, PyList>) -> PyResult<usize> {
let tokens = tokens let tokens = tokens
.into_iter() .into_iter()
.map(|token| { .map(|token| {
@ -1178,7 +1182,7 @@ impl PyTokenizer {
/// Returns: /// Returns:
/// :obj:`int`: The number of tokens that were created in the vocabulary /// :obj:`int`: The number of tokens that were created in the vocabulary
#[pyo3(text_signature = "(self, tokens)")] #[pyo3(text_signature = "(self, tokens)")]
fn add_special_tokens(&mut self, tokens: &PyList) -> PyResult<usize> { fn add_special_tokens(&mut self, tokens: &Bound<'_, PyList>) -> PyResult<usize> {
let tokens = tokens let tokens = tokens
.into_iter() .into_iter()
.map(|token| { .map(|token| {
@ -1251,7 +1255,7 @@ impl PyTokenizer {
fn train_from_iterator( fn train_from_iterator(
&mut self, &mut self,
py: Python, py: Python,
iterator: &PyAny, iterator: &Bound<'_, PyAny>,
trainer: Option<&mut PyTrainer>, trainer: Option<&mut PyTrainer>,
length: Option<usize>, length: Option<usize>,
) -> PyResult<()> { ) -> PyResult<()> {

View File

@ -2,7 +2,6 @@ use std::sync::{Arc, RwLock};
use crate::models::PyModel; use crate::models::PyModel;
use crate::tokenizer::PyAddedToken; use crate::tokenizer::PyAddedToken;
use crate::utils::PyChar;
use pyo3::exceptions; use pyo3::exceptions;
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::*; use pyo3::types::*;
@ -52,7 +51,7 @@ impl PyTrainer {
e e
)) ))
})?; })?;
Ok(PyBytes::new(py, data.as_bytes()).to_object(py)) Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
} }
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
@ -215,7 +214,7 @@ impl PyBpeTrainer {
} }
#[setter] #[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> { fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!( setter!(
self_, self_,
BpeTrainer, BpeTrainer,
@ -269,12 +268,12 @@ impl PyBpeTrainer {
} }
#[setter] #[setter]
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) { fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
setter!( setter!(
self_, self_,
BpeTrainer, BpeTrainer,
initial_alphabet, initial_alphabet,
alphabet.into_iter().map(|c| c.0).collect() alphabet.into_iter().collect()
); );
} }
@ -300,7 +299,7 @@ impl PyBpeTrainer {
#[new] #[new]
#[pyo3(signature = (**kwargs), text_signature = None)] #[pyo3(signature = (**kwargs), text_signature = None)]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> { pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::bpe::BpeTrainer::builder(); let mut builder = tk::models::bpe::BpeTrainer::builder();
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
for (key, val) in kwargs { for (key, val) in kwargs {
@ -429,7 +428,7 @@ impl PyWordPieceTrainer {
} }
#[setter] #[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> { fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!( setter!(
self_, self_,
WordPieceTrainer, WordPieceTrainer,
@ -473,12 +472,12 @@ impl PyWordPieceTrainer {
} }
#[setter] #[setter]
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) { fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
setter!( setter!(
self_, self_,
WordPieceTrainer, WordPieceTrainer,
@set_initial_alphabet, @set_initial_alphabet,
alphabet.into_iter().map(|c| c.0).collect() alphabet.into_iter().collect()
); );
} }
@ -507,7 +506,7 @@ impl PyWordPieceTrainer {
signature = (** kwargs), signature = (** kwargs),
text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)" text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)"
)] )]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> { pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::wordpiece::WordPieceTrainer::builder(); let mut builder = tk::models::wordpiece::WordPieceTrainer::builder();
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
for (key, val) in kwargs { for (key, val) in kwargs {
@ -621,7 +620,7 @@ impl PyWordLevelTrainer {
} }
#[setter] #[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> { fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!( setter!(
self_, self_,
WordLevelTrainer, WordLevelTrainer,
@ -647,7 +646,7 @@ impl PyWordLevelTrainer {
#[new] #[new]
#[pyo3(signature = (**kwargs), text_signature = None)] #[pyo3(signature = (**kwargs), text_signature = None)]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> { pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::wordlevel::WordLevelTrainer::builder(); let mut builder = tk::models::wordlevel::WordLevelTrainer::builder();
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
@ -767,7 +766,7 @@ impl PyUnigramTrainer {
} }
#[setter] #[setter]
fn set_special_tokens(self_: PyRef<Self>, special_tokens: &PyList) -> PyResult<()> { fn set_special_tokens(self_: PyRef<Self>, special_tokens: &Bound<'_, PyList>) -> PyResult<()> {
setter!( setter!(
self_, self_,
UnigramTrainer, UnigramTrainer,
@ -801,12 +800,12 @@ impl PyUnigramTrainer {
} }
#[setter] #[setter]
fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<PyChar>) { fn set_initial_alphabet(self_: PyRef<Self>, alphabet: Vec<char>) {
setter!( setter!(
self_, self_,
UnigramTrainer, UnigramTrainer,
initial_alphabet, initial_alphabet,
alphabet.into_iter().map(|c| c.0).collect() alphabet.into_iter().collect()
); );
} }
@ -815,7 +814,7 @@ impl PyUnigramTrainer {
signature = (**kwargs), signature = (**kwargs),
text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)" text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"
)] )]
pub fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyTrainer)> { pub fn new(kwargs: Option<Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
let mut builder = tk::models::unigram::UnigramTrainer::builder(); let mut builder = tk::models::unigram::UnigramTrainer::builder();
if let Some(kwargs) = kwargs { if let Some(kwargs) = kwargs {
for (key, val) in kwargs { for (key, val) in kwargs {
@ -874,7 +873,7 @@ impl PyUnigramTrainer {
/// Trainers Module /// Trainers Module
#[pymodule] #[pymodule]
pub fn trainers(_py: Python, m: &PyModule) -> PyResult<()> { pub fn trainers(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyTrainer>()?; m.add_class::<PyTrainer>()?;
m.add_class::<PyBpeTrainer>()?; m.add_class::<PyBpeTrainer>()?;
m.add_class::<PyWordPieceTrainer>()?; m.add_class::<PyWordPieceTrainer>()?;
@ -893,7 +892,7 @@ mod tests {
Python::with_gil(|py| { Python::with_gil(|py| {
let py_trainer = PyTrainer::new(Arc::new(RwLock::new(BpeTrainer::default().into()))); let py_trainer = PyTrainer::new(Arc::new(RwLock::new(BpeTrainer::default().into())));
let py_bpe = py_trainer.get_as_subtype(py).unwrap(); let py_bpe = py_trainer.get_as_subtype(py).unwrap();
assert_eq!("BpeTrainer", py_bpe.as_ref(py).get_type().name().unwrap()); assert_eq!("BpeTrainer", py_bpe.bind(py).get_type().qualname().unwrap());
}) })
} }
} }

View File

@ -50,7 +50,7 @@ pub struct PyBufferedIterator<T, F> {
impl<T, F, I> PyBufferedIterator<T, F> impl<T, F, I> PyBufferedIterator<T, F>
where where
F: Fn(&PyAny) -> I, F: Fn(Bound<'_, PyAny>) -> I,
I: IntoIterator<Item = PyResult<T>>, I: IntoIterator<Item = PyResult<T>>,
{ {
/// Create a new PyBufferedIterator using the provided Python object. /// Create a new PyBufferedIterator using the provided Python object.
@ -62,10 +62,10 @@ where
/// ///
/// The `buffer_size` represents the number of items that we buffer before we /// The `buffer_size` represents the number of items that we buffer before we
/// need to acquire the GIL again. /// need to acquire the GIL again.
pub fn new(iter: &PyAny, converter: F, buffer_size: usize) -> PyResult<Self> { pub fn new(iter: &Bound<'_, PyAny>, converter: F, buffer_size: usize) -> PyResult<Self> {
let py = iter.py(); let py = iter.py();
let iter: Py<PyAny> = unsafe { let iter: Py<PyAny> = unsafe {
py.from_borrowed_ptr_or_err::<PyAny>(pyo3::ffi::PyObject_GetIter(iter.as_ptr()))? Bound::from_borrowed_ptr_or_err(py, pyo3::ffi::PyObject_GetIter(iter.as_ptr()))?
.to_object(py) .to_object(py)
}; };
@ -89,9 +89,10 @@ where
} }
match unsafe { match unsafe {
py.from_owned_ptr_or_opt::<PyAny>(pyo3::ffi::PyIter_Next( Bound::from_owned_ptr_or_opt(
self.iter.as_ref().unwrap().as_ref(py).as_ptr(), py,
)) pyo3::ffi::PyIter_Next(self.iter.as_ref().unwrap().bind(py).as_ptr()),
)
} { } {
Some(obj) => self.buffer.extend((self.converter)(obj)), Some(obj) => self.buffer.extend((self.converter)(obj)),
None => { None => {
@ -112,7 +113,7 @@ where
impl<T, F, I> Iterator for PyBufferedIterator<T, F> impl<T, F, I> Iterator for PyBufferedIterator<T, F>
where where
F: Fn(&PyAny) -> I, F: Fn(Bound<'_, PyAny>) -> I,
I: IntoIterator<Item = PyResult<T>>, I: IntoIterator<Item = PyResult<T>>,
{ {
type Item = PyResult<T>; type Item = PyResult<T>;

View File

@ -1,6 +1,3 @@
use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::*;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
@ -14,25 +11,6 @@ pub use normalization::*;
pub use pretokenization::*; pub use pretokenization::*;
pub use regex::*; pub use regex::*;
// PyChar
// This type is a temporary hack to accept `char` as argument
// To be removed once https://github.com/PyO3/pyo3/pull/1282 has been released
pub struct PyChar(pub char);
impl FromPyObject<'_> for PyChar {
fn extract(obj: &PyAny) -> PyResult<Self> {
let s = <PyString as PyTryFrom<'_>>::try_from(obj)?.to_str()?;
let mut iter = s.chars();
if let (Some(ch), None) = (iter.next(), iter.next()) {
Ok(Self(ch))
} else {
Err(exceptions::PyValueError::new_err(
"expected a string of length 1",
))
}
}
}
// RefMut utils // RefMut utils
pub trait DestroyPtr { pub trait DestroyPtr {

View File

@ -9,15 +9,15 @@ use tk::pattern::Pattern;
/// Represents a Pattern as used by `NormalizedString` /// Represents a Pattern as used by `NormalizedString`
#[derive(Clone, FromPyObject)] #[derive(Clone, FromPyObject)]
pub enum PyPattern<'p> { pub enum PyPattern {
#[pyo3(annotation = "str")] #[pyo3(annotation = "str")]
Str(&'p str), Str(String),
#[pyo3(annotation = "tokenizers.Regex")] #[pyo3(annotation = "tokenizers.Regex")]
Regex(Py<PyRegex>), Regex(Py<PyRegex>),
// TODO: Add the compatibility for Fn(char) -> bool // TODO: Add the compatibility for Fn(char) -> bool
} }
impl Pattern for PyPattern<'_> { impl Pattern for PyPattern {
fn find_matches(&self, inside: &str) -> tk::Result<Vec<(tk::Offsets, bool)>> { fn find_matches(&self, inside: &str) -> tk::Result<Vec<(tk::Offsets, bool)>> {
match self { match self {
PyPattern::Str(s) => { PyPattern::Str(s) => {
@ -35,8 +35,8 @@ impl Pattern for PyPattern<'_> {
} }
} }
impl From<PyPattern<'_>> for tk::normalizers::replace::ReplacePattern { impl From<PyPattern> for tk::normalizers::replace::ReplacePattern {
fn from(pattern: PyPattern<'_>) -> Self { fn from(pattern: PyPattern) -> Self {
match pattern { match pattern {
PyPattern::Str(s) => Self::String(s.to_owned()), PyPattern::Str(s) => Self::String(s.to_owned()),
PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())), PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
@ -44,8 +44,8 @@ impl From<PyPattern<'_>> for tk::normalizers::replace::ReplacePattern {
} }
} }
impl From<PyPattern<'_>> for tk::pre_tokenizers::split::SplitPattern { impl From<PyPattern> for tk::pre_tokenizers::split::SplitPattern {
fn from(pattern: PyPattern<'_>) -> Self { fn from(pattern: PyPattern) -> Self {
match pattern { match pattern {
PyPattern::Str(s) => Self::String(s.to_owned()), PyPattern::Str(s) => Self::String(s.to_owned()),
PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())), PyPattern::Regex(r) => Python::with_gil(|py| Self::Regex(r.borrow(py).pattern.clone())),
@ -117,7 +117,7 @@ impl From<PySplitDelimiterBehavior> for SplitDelimiterBehavior {
} }
} }
fn filter(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> { fn filter(normalized: &mut NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
let err = "`filter` expect a callable with the signature: `fn(char) -> bool`"; let err = "`filter` expect a callable with the signature: `fn(char) -> bool`";
if !func.is_callable() { if !func.is_callable() {
@ -134,7 +134,7 @@ fn filter(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> {
} }
} }
fn for_each(normalized: &NormalizedString, func: &PyAny) -> PyResult<()> { fn for_each(normalized: &NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
let err = "`for_each` expect a callable with the signature: `fn(char)`"; let err = "`for_each` expect a callable with the signature: `fn(char)`";
if !func.is_callable() { if !func.is_callable() {
@ -148,14 +148,14 @@ fn for_each(normalized: &NormalizedString, func: &PyAny) -> PyResult<()> {
} }
} }
fn map(normalized: &mut NormalizedString, func: &PyAny) -> PyResult<()> { fn map(normalized: &mut NormalizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
let err = "`map` expect a callable with the signature: `fn(char) -> char`"; let err = "`map` expect a callable with the signature: `fn(char) -> char`";
if !func.is_callable() { if !func.is_callable() {
Err(exceptions::PyTypeError::new_err(err)) Err(exceptions::PyTypeError::new_err(err))
} else { } else {
normalized.map(|c| { normalized.map(|c| {
let c: &str = func let c: String = func
.call1((c.to_string(),)) .call1((c.to_string(),))
.expect(err) .expect(err)
.extract() .extract()
@ -296,13 +296,13 @@ impl PyNormalizedString {
/// Filter each character of the string using the given func /// Filter each character of the string using the given func
#[pyo3(text_signature = "(self, func)")] #[pyo3(text_signature = "(self, func)")]
fn filter(&mut self, func: &PyAny) -> PyResult<()> { fn filter(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
filter(&mut self.normalized, func) filter(&mut self.normalized, func)
} }
/// Calls the given function for each character of the string /// Calls the given function for each character of the string
#[pyo3(text_signature = "(self, func)")] #[pyo3(text_signature = "(self, func)")]
fn for_each(&self, func: &PyAny) -> PyResult<()> { fn for_each(&self, func: &Bound<'_, PyAny>) -> PyResult<()> {
for_each(&self.normalized, func) for_each(&self.normalized, func)
} }
@ -311,7 +311,7 @@ impl PyNormalizedString {
/// Replaces each character of the string using the returned value. Each /// Replaces each character of the string using the returned value. Each
/// returned value **must** be a str of length 1 (ie a character). /// returned value **must** be a str of length 1 (ie a character).
#[pyo3(text_signature = "(self, func)")] #[pyo3(text_signature = "(self, func)")]
fn map(&mut self, func: &PyAny) -> PyResult<()> { fn map(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
map(&mut self.normalized, func) map(&mut self.normalized, func)
} }
@ -551,21 +551,21 @@ impl PyNormalizedStringRefMut {
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)? .ok_or_else(PyNormalizedStringRefMut::destroyed_error)?
} }
fn filter(&mut self, func: &PyAny) -> PyResult<()> { fn filter(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner self.inner
.map_mut(|n| filter(n, func)) .map_mut(|n| filter(n, func))
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)??; .ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;
Ok(()) Ok(())
} }
fn for_each(&self, func: &PyAny) -> PyResult<()> { fn for_each(&self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner self.inner
.map(|n| for_each(n, func)) .map(|n| for_each(n, func))
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)??; .ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;
Ok(()) Ok(())
} }
fn map(&mut self, func: &PyAny) -> PyResult<()> { fn map(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner self.inner
.map_mut(|n| map(n, func)) .map_mut(|n| map(n, func))
.ok_or_else(PyNormalizedStringRefMut::destroyed_error)??; .ok_or_else(PyNormalizedStringRefMut::destroyed_error)??;

View File

@ -12,7 +12,7 @@ use crate::error::ToPyResult;
use crate::token::PyToken; use crate::token::PyToken;
use tk::{OffsetReferential, OffsetType, Offsets, PreTokenizedString, Token}; use tk::{OffsetReferential, OffsetType, Offsets, PreTokenizedString, Token};
fn split(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> { fn split(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
if !func.is_callable() { if !func.is_callable() {
Err(exceptions::PyTypeError::new_err( Err(exceptions::PyTypeError::new_err(
"`split` expect a callable with the signature: \ "`split` expect a callable with the signature: \
@ -30,7 +30,7 @@ fn split(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
} }
} }
fn normalize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> { fn normalize(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
if !func.is_callable() { if !func.is_callable() {
Err(exceptions::PyTypeError::new_err( Err(exceptions::PyTypeError::new_err(
"`normalize` expect a callable with the signature: \ "`normalize` expect a callable with the signature: \
@ -46,7 +46,7 @@ fn normalize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> {
} }
} }
fn tokenize(pretok: &mut PreTokenizedString, func: &PyAny) -> PyResult<()> { fn tokenize(pretok: &mut PreTokenizedString, func: &Bound<'_, PyAny>) -> PyResult<()> {
if !func.is_callable() { if !func.is_callable() {
Err(exceptions::PyTypeError::new_err( Err(exceptions::PyTypeError::new_err(
"`tokenize` expect a callable with the signature: \ "`tokenize` expect a callable with the signature: \
@ -183,7 +183,7 @@ impl PyPreTokenizedString {
/// In order for the offsets to be tracked accurately, any returned `NormalizedString` /// In order for the offsets to be tracked accurately, any returned `NormalizedString`
/// should come from calling either `.split` or `.slice` on the received one. /// should come from calling either `.split` or `.slice` on the received one.
#[pyo3(text_signature = "(self, func)")] #[pyo3(text_signature = "(self, func)")]
fn split(&mut self, func: &PyAny) -> PyResult<()> { fn split(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
split(&mut self.pretok, func) split(&mut self.pretok, func)
} }
@ -195,7 +195,7 @@ impl PyPreTokenizedString {
/// does not need to return anything, just calling the methods on the provided /// does not need to return anything, just calling the methods on the provided
/// NormalizedString allow its modification. /// NormalizedString allow its modification.
#[pyo3(text_signature = "(self, func)")] #[pyo3(text_signature = "(self, func)")]
fn normalize(&mut self, func: &PyAny) -> PyResult<()> { fn normalize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
normalize(&mut self.pretok, func) normalize(&mut self.pretok, func)
} }
@ -206,7 +206,7 @@ impl PyPreTokenizedString {
/// The function used to tokenize each underlying split. This function must return /// The function used to tokenize each underlying split. This function must return
/// a list of Token generated from the input str. /// a list of Token generated from the input str.
#[pyo3(text_signature = "(self, func)")] #[pyo3(text_signature = "(self, func)")]
fn tokenize(&mut self, func: &PyAny) -> PyResult<()> { fn tokenize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
tokenize(&mut self.pretok, func) tokenize(&mut self.pretok, func)
} }
@ -289,19 +289,19 @@ impl PyPreTokenizedStringRefMut {
#[pymethods] #[pymethods]
impl PyPreTokenizedStringRefMut { impl PyPreTokenizedStringRefMut {
fn split(&mut self, func: &PyAny) -> PyResult<()> { fn split(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner self.inner
.map_mut(|pretok| split(pretok, func)) .map_mut(|pretok| split(pretok, func))
.ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)? .ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?
} }
fn normalize(&mut self, func: &PyAny) -> PyResult<()> { fn normalize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner self.inner
.map_mut(|pretok| normalize(pretok, func)) .map_mut(|pretok| normalize(pretok, func))
.ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)? .ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?
} }
fn tokenize(&mut self, func: &PyAny) -> PyResult<()> { fn tokenize(&mut self, func: &Bound<'_, PyAny>) -> PyResult<()> {
self.inner self.inner
.map_mut(|pretok| tokenize(pretok, func)) .map_mut(|pretok| tokenize(pretok, func))
.ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)? .ok_or_else(PyPreTokenizedStringRefMut::destroyed_error)?

View File

@ -40,19 +40,19 @@ harness = false
lazy_static = "1.4" lazy_static = "1.4"
rand = "0.8" rand = "0.8"
onig = { version = "6.4", default-features = false, optional = true } onig = { version = "6.4", default-features = false, optional = true }
regex = "1.9" regex = "1.10"
regex-syntax = "0.8" regex-syntax = "0.8"
rayon = "1.8" rayon = "1.10"
rayon-cond = "0.3" rayon-cond = "0.3"
serde = { version = "1.0", features = [ "derive" ] } serde = { version = "1.0", features = [ "derive" ] }
serde_json = "1.0" serde_json = "1.0"
unicode-normalization-alignments = "0.1" unicode-normalization-alignments = "0.1"
unicode_categories = "0.1" unicode_categories = "0.1"
unicode-segmentation = "1.10" unicode-segmentation = "1.11"
indicatif = {version = "0.17", optional = true} indicatif = {version = "0.17", optional = true}
itertools = "0.12" itertools = "0.12"
log = "0.4" log = "0.4"
derive_builder = "0.13" derive_builder = "0.20"
spm_precompiled = "0.1" spm_precompiled = "0.1"
hf-hub = { version = "0.3.2", optional = true } hf-hub = { version = "0.3.2", optional = true }
aho-corasick = "1.1" aho-corasick = "1.1"
@ -62,7 +62,7 @@ thiserror = "1.0.49"
fancy-regex = { version = "0.13", optional = true} fancy-regex = { version = "0.13", optional = true}
getrandom = { version = "0.2.10" } getrandom = { version = "0.2.10" }
esaxx-rs = { version = "0.1.10", default-features = false, features=[]} esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
monostate = "0.1.9" monostate = "0.1.12"
[features] [features]
default = ["progressbar", "onig", "esaxx_fast"] default = ["progressbar", "onig", "esaxx_fast"]
@ -73,7 +73,7 @@ unstable_wasm = ["fancy-regex", "getrandom/js"]
[dev-dependencies] [dev-dependencies]
criterion = "0.5" criterion = "0.5"
tempfile = "3.8" tempfile = "3.10"
assert_approx_eq = "1.1" assert_approx_eq = "1.1"
[profile.release] [profile.release]