Upgrade pyo3 to 0.16 (#956)

* Upgrade pyo3 to 0.15 Rebase-conflicts-fixed-by: H. Vetinari <h.vetinari@gmx.com> * Upgrade pyo3 to 0.16 Rebase-conflicts-fixed-by: H. Vetinari <h.vetinari@gmx.com> * Install Python before running cargo clippy * Fix clippy warnings * Use `PyArray_Check` instead of downcasting to `PyArray1<u8>` * Enable `auto-initialize` of pyo3 to fix `cargo test --no-default-features` * Fix some test cases Why do they change? * Refactor and add SAFETY comments to `PyArrayUnicode` Replace deprecated `PyUnicode_FromUnicode` with `PyUnicode_FromKindAndData` Co-authored-by: messense <messense@icloud.com>
2025-08-23 00:35:35 +00:00 · 2022-05-06 00:48:40 +11:00
parent 6533bf0fad
commit 519cc13be0
19 changed files with 620 additions and 620 deletions
--- a/bindings/python/src/trainers.rs
+++ b/bindings/python/src/trainers.rs
@ -15,7 +15,7 @@ use tokenizers as tk;
 ///
 /// This class is not supposed to be instantiated directly. Instead, any implementation of a
 /// Trainer will return an instance of this class when instantiated.
-#[pyclass(name=Trainer, module = "tokenizers.trainers", name=Trainer)]
+#[pyclass(module = "tokenizers.trainers", name = "Trainer", subclass)]
 #[derive(Clone, Deserialize, Serialize)]
 pub struct PyTrainer {
    #[serde(flatten)]
@ -164,7 +164,7 @@ macro_rules! setter {
 ///
 ///     end_of_word_suffix (:obj:`str`, `optional`):
 ///         A suffix to be used for every subword that is a end-of-word.
-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=BpeTrainer)]
+#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "BpeTrainer")]
 pub struct PyBpeTrainer {}
 #[pymethods]
 impl PyBpeTrainer {
@ -367,8 +367,10 @@ impl PyBpeTrainer {
 ///
 ///     end_of_word_suffix (:obj:`str`, `optional`):
 ///         A suffix to be used for every subword that is a end-of-word.
-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=WordPieceTrainer)]
-#[text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)"]
+#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "WordPieceTrainer")]
+#[pyo3(
+    text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)"
+)]
 pub struct PyWordPieceTrainer {}
 #[pymethods]
 impl PyWordPieceTrainer {
@ -557,7 +559,7 @@ impl PyWordPieceTrainer {
 ///
 ///     special_tokens (:obj:`List[Union[str, AddedToken]]`):
 ///         A list of special tokens the model should know of.
-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=WordLevelTrainer)]
+#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "WordLevelTrainer")]
 pub struct PyWordLevelTrainer {}
 #[pymethods]
 impl PyWordLevelTrainer {
@ -713,8 +715,10 @@ impl PyWordLevelTrainer {
 ///     n_sub_iterations (:obj:`int`):
 ///         The number of iterations of the EM algorithm to perform before
 ///         pruning the vocabulary.
-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=UnigramTrainer)]
-#[text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"]
+#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "UnigramTrainer")]
+#[pyo3(
+    text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"
+)]
 pub struct PyUnigramTrainer {}
 #[pymethods]
 impl PyUnigramTrainer {
@ -864,8 +868,8 @@ mod tests {
        let py_bpe = py_trainer.get_as_subtype().unwrap();
        let gil = Python::acquire_gil();
        assert_eq!(
-            "tokenizers.trainers.BpeTrainer",
-            py_bpe.as_ref(gil.python()).get_type().name()
+            "BpeTrainer",
+            py_bpe.as_ref(gil.python()).get_type().name().unwrap()
        );
    }
 }