Upgrade pyo3 to 0.16 (#956)

* Upgrade pyo3 to 0.15

Rebase-conflicts-fixed-by: H. Vetinari <h.vetinari@gmx.com>

* Upgrade pyo3 to 0.16

Rebase-conflicts-fixed-by: H. Vetinari <h.vetinari@gmx.com>

* Install Python before running cargo clippy

* Fix clippy warnings

* Use `PyArray_Check` instead of downcasting to `PyArray1<u8>`

* Enable `auto-initialize` of pyo3 to fix `cargo test
--no-default-features`

* Fix some test cases

Why do they change?

* Refactor and add SAFETY comments to `PyArrayUnicode`

Replace deprecated `PyUnicode_FromUnicode` with `PyUnicode_FromKindAndData`

Co-authored-by: messense <messense@icloud.com>
This commit is contained in:
h-vetinari
2022-05-06 00:48:40 +11:00
committed by GitHub
parent 6533bf0fad
commit 519cc13be0
19 changed files with 620 additions and 620 deletions

View File

@ -3,7 +3,6 @@ use std::sync::{Arc, RwLock};
use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::*;
use pyo3::PySequenceProtocol;
use crate::error::ToPyResult;
use crate::utils::{PyNormalizedString, PyNormalizedStringRefMut, PyPattern};
@ -43,7 +42,7 @@ impl PyNormalizedStringMut<'_> {
///
/// This class is not supposed to be instantiated directly. Instead, any implementation of a
/// Normalizer will return an instance of this class when instantiated.
#[pyclass(dict, module = "tokenizers.normalizers", name=Normalizer)]
#[pyclass(dict, module = "tokenizers.normalizers", name = "Normalizer", subclass)]
#[derive(Clone, Serialize, Deserialize)]
pub struct PyNormalizer {
#[serde(flatten)]
@ -144,7 +143,7 @@ impl PyNormalizer {
/// normalized (:class:`~tokenizers.NormalizedString`):
/// The normalized string on which to apply this
/// :class:`~tokenizers.normalizers.Normalizer`
#[text_signature = "(self, normalized)"]
#[pyo3(text_signature = "(self, normalized)")]
fn normalize(&self, mut normalized: PyNormalizedStringMut) -> PyResult<()> {
normalized.normalize_with(&self.normalizer)
}
@ -162,7 +161,7 @@ impl PyNormalizer {
///
/// Returns:
/// :obj:`str`: A string after normalization
#[text_signature = "(self, sequence)"]
#[pyo3(text_signature = "(self, sequence)")]
fn normalize_str(&self, sequence: &str) -> PyResult<String> {
let mut normalized = NormalizedString::from(sequence);
ToPyResult(self.normalizer.normalize(&mut normalized)).into_py()?;
@ -217,8 +216,10 @@ macro_rules! setter {
///
/// lowercase (:obj:`bool`, `optional`, defaults to :obj:`True`):
/// Whether to lowercase.
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=BertNormalizer)]
#[text_signature = "(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "BertNormalizer")]
#[pyo3(
text_signature = "(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)"
)]
pub struct PyBertNormalizer {}
#[pymethods]
impl PyBertNormalizer {
@ -287,8 +288,8 @@ impl PyBertNormalizer {
}
/// NFD Unicode Normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFD)]
#[text_signature = "(self)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFD")]
#[pyo3(text_signature = "(self)")]
pub struct PyNFD {}
#[pymethods]
impl PyNFD {
@ -299,8 +300,8 @@ impl PyNFD {
}
/// NFKD Unicode Normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFKD)]
#[text_signature = "(self)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFKD")]
#[pyo3(text_signature = "(self)")]
pub struct PyNFKD {}
#[pymethods]
impl PyNFKD {
@ -311,8 +312,8 @@ impl PyNFKD {
}
/// NFC Unicode Normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFC)]
#[text_signature = "(self)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFC")]
#[pyo3(text_signature = "(self)")]
pub struct PyNFC {}
#[pymethods]
impl PyNFC {
@ -323,8 +324,8 @@ impl PyNFC {
}
/// NFKC Unicode Normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFKC)]
#[text_signature = "(self)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFKC")]
#[pyo3(text_signature = "(self)")]
pub struct PyNFKC {}
#[pymethods]
impl PyNFKC {
@ -340,7 +341,7 @@ impl PyNFKC {
/// Args:
/// normalizers (:obj:`List[Normalizer]`):
/// A list of Normalizer to be run as a sequence
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Sequence)]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Sequence")]
pub struct PySequence {}
#[pymethods]
impl PySequence {
@ -363,18 +364,15 @@ impl PySequence {
fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
PyTuple::new(py, &[PyList::empty(py)])
}
}
#[pyproto]
impl PySequenceProtocol for PySequence {
fn __len__(&self) -> usize {
0
}
}
/// Lowercase Normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Lowercase)]
#[text_signature = "(self)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Lowercase")]
#[pyo3(text_signature = "(self)")]
pub struct PyLowercase {}
#[pymethods]
impl PyLowercase {
@ -385,8 +383,8 @@ impl PyLowercase {
}
/// Strip normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Strip)]
#[text_signature = "(self, left=True, right=True)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Strip")]
#[pyo3(text_signature = "(self, left=True, right=True)")]
pub struct PyStrip {}
#[pymethods]
impl PyStrip {
@ -418,8 +416,8 @@ impl PyStrip {
}
/// StripAccents normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=StripAccents)]
#[text_signature = "(self)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "StripAccents")]
#[pyo3(text_signature = "(self)")]
pub struct PyStripAccents {}
#[pymethods]
impl PyStripAccents {
@ -430,8 +428,8 @@ impl PyStripAccents {
}
/// Nmt normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Nmt)]
#[text_signature = "(self)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Nmt")]
#[pyo3(text_signature = "(self)")]
pub struct PyNmt {}
#[pymethods]
impl PyNmt {
@ -443,8 +441,8 @@ impl PyNmt {
/// Precompiled normalizer
/// Don't use manually it is used for compatiblity for SentencePiece.
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Precompiled)]
#[text_signature = "(self, precompiled_charsmap)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Precompiled")]
#[pyo3(text_signature = "(self, precompiled_charsmap)")]
pub struct PyPrecompiled {}
#[pymethods]
impl PyPrecompiled {
@ -466,8 +464,8 @@ impl PyPrecompiled {
}
/// Replace normalizer
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Replace)]
#[text_signature = "(self, pattern, content)"]
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Replace")]
#[pyo3(text_signature = "(self, pattern, content)")]
pub struct PyReplace {}
#[pymethods]
impl PyReplace {
@ -630,8 +628,8 @@ mod test {
let py_nfc = py_norm.get_as_subtype().unwrap();
let gil = Python::acquire_gil();
assert_eq!(
"tokenizers.normalizers.NFC",
py_nfc.as_ref(gil.python()).get_type().name()
"NFC",
py_nfc.as_ref(gil.python()).get_type().name().unwrap()
);
}