Remove Send + Sync requirements from Model.

2025-12-07 21:28:19 +00:00 · 2020-07-31 17:16:04 +02:00
parent 42b810488f
commit aaf8e932b1
3 changed files with 20 additions and 11 deletions
--- a/bindings/python/src/models.rs
+++ b/bindings/python/src/models.rs
@@ -14,17 +14,18 @@ use tk::{Model, Token};
 use tokenizers as tk;

 use super::error::ToPyResult;
+use tk::models::ModelWrapper;

 /// A Model represents some tokenization algorithm like BPE or Word
 /// This class cannot be constructed directly. Please use one of the concrete models.
 #[pyclass(module = "tokenizers.models", name=Model)]
 #[derive(Clone)]
 pub struct PyModel {
-    pub model: Arc<dyn Model>,
+    pub model: Arc<ModelWrapper>,
 }

 impl PyModel {
-    pub(crate) fn new(model: Arc<dyn Model>) -> Self {
+    pub(crate) fn new(model: Arc<ModelWrapper>) -> Self {
        PyModel { model }
    }
 }
@@ -83,7 +84,7 @@ impl PyModel {
        // Instantiate a default empty model. This doesn't really make sense, but we need
        // to be able to instantiate an empty model for pickle capabilities.
        Ok(PyModel {
-            model: Arc::new(BPE::default()),
+            model: Arc::new(BPE::default().into()),
        })
    }

@@ -175,7 +176,7 @@ impl PyBPE {
                "Error while initializing BPE: {}",
                e
            ))),
-            Ok(bpe) => Ok((PyBPE {}, PyModel::new(Arc::new(bpe)))),
+            Ok(bpe) => Ok((PyBPE {}, PyModel::new(Arc::new(bpe.into())))),
        }
    }
 }
@@ -220,7 +221,7 @@ impl PyWordPiece {
                    "Error while initializing WordPiece",
                ))
            }
-            Ok(wordpiece) => Ok((PyWordPiece {}, PyModel::new(Arc::new(wordpiece)))),
+            Ok(wordpiece) => Ok((PyWordPiece {}, PyModel::new(Arc::new(wordpiece.into())))),
        }
    }
 }
@@ -253,10 +254,10 @@ impl PyWordLevel {
                        "Error while initializing WordLevel",
                    ))
                }
-                Ok(model) => Ok((PyWordLevel {}, PyModel::new(Arc::new(model)))),
+                Ok(model) => Ok((PyWordLevel {}, PyModel::new(Arc::new(model.into())))),
            }
        } else {
-            Ok((PyWordLevel {}, PyModel::new(Arc::new(WordLevel::default()))))
+            Ok((PyWordLevel {}, PyModel::new(Arc::new(WordLevel::default().into()))))
        }
    }
 }
--- a/bindings/python/src/tokenizer.rs
+++ b/bindings/python/src/tokenizer.rs
@@ -318,7 +318,7 @@ impl PyTokenizer {
    }

    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
-        let model: PyObject = PyModel::new(Arc::new(BPE::default())).into_py(py);
+        let model: PyObject = PyModel::new(Arc::new(BPE::default().into())).into_py(py);
        let args = PyTuple::new(py, vec![model]);
        Ok(args)
    }
--- a/tokenizers/src/tokenizer/mod.rs
+++ b/tokenizers/src/tokenizer/mod.rs
@@ -70,7 +70,7 @@ pub trait PreTokenizer: Send + Sync {

 #[typetag::serde(tag = "type")]
 /// Represents a model used during Tokenization (like BPE or Word or Unigram).
-pub trait Model: Send + Sync {
+pub trait Model {
    /// Tokenize the given sequence into multiple underlying `Token`. The `offsets` on the `Token`
    /// are expected to be relative to the given sequence.
    fn tokenize(&self, sequence: &str) -> Result<Vec<Token>>;
@@ -709,7 +709,10 @@ where
        &self,
        inputs: Vec<E>,
        add_special_tokens: bool,
-    ) -> Result<Vec<Encoding>> {
+    ) -> Result<Vec<Encoding>>
+    where
+        M: Send + Sync,
+    {
        let mut encodings = inputs
            .into_maybe_par_iter()
            .map(|input| self.encode(input, add_special_tokens))
@@ -749,7 +752,10 @@ where
        &self,
        sentences: Vec<Vec<u32>>,
        skip_special_tokens: bool,
-    ) -> Result<Vec<String>> {
+    ) -> Result<Vec<String>>
+    where
+        M: Send + Sync,
+    {
        sentences
            .into_maybe_par_iter()
            .map(|sentence| self.decode(sentence, skip_special_tokens))
@@ -761,6 +767,7 @@ where
    where
        T: Trainer<Model = MN>,
        MN: Model,
+        M: Send + Sync,
    {
        let max_read = 1_000_000;
        let mut len = 0;
@@ -849,6 +856,7 @@ where
    where
        T: Trainer<Model = TM>,
        TM: Model,
+        M: Send + Sync,
    {
        let words = self.word_count(trainer, files)?;