Fix clippy warnings

2025-08-23 00:35:35 +00:00 · 2021-03-10 20:26:39 -05:00
parent ee95e7f0cd
commit 56a9196030
9 changed files with 106 additions and 117 deletions
--- a/bindings/python/src/decoders.rs
+++ b/bindings/python/src/decoders.rs
@ -57,11 +57,9 @@ impl Decoder for PyDecoder {
 #[pymethods]
 impl PyDecoder {
    #[staticmethod]
-    fn custom(decoder: PyObject) -> PyResult<Self> {
+    fn custom(decoder: PyObject) -> Self {
-        let decoder = PyDecoderWrapper::Custom(
+        let decoder = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(decoder))));
-            CustomDecoder::new(decoder).map(|d| Arc::new(RwLock::new(d)))?,
+        PyDecoder::new(decoder)
        );
        Ok(PyDecoder::new(decoder))
    }
    fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -147,8 +145,8 @@ pub struct PyByteLevelDec {}
 #[pymethods]
 impl PyByteLevelDec {
    #[new]
-    fn new() -> PyResult<(Self, PyDecoder)> {
+    fn new() -> (Self, PyDecoder) {
-        Ok((PyByteLevelDec {}, ByteLevel::default().into()))
+        (PyByteLevelDec {}, ByteLevel::default().into())
    }
 }
@ -188,8 +186,8 @@ impl PyWordPieceDec {
    #[new]
    #[args(prefix = "String::from(\"##\")", cleanup = "true")]
-    fn new(prefix: String, cleanup: bool) -> PyResult<(Self, PyDecoder)> {
+    fn new(prefix: String, cleanup: bool) -> (Self, PyDecoder) {
-        Ok((PyWordPieceDec {}, WordPiece::new(prefix, cleanup).into()))
+        (PyWordPieceDec {}, WordPiece::new(prefix, cleanup).into())
    }
 }
@ -230,11 +228,11 @@ impl PyMetaspaceDec {
    #[new]
    #[args(replacement = "PyChar('▁')", add_prefix_space = "true")]
-    fn new(replacement: PyChar, add_prefix_space: bool) -> PyResult<(Self, PyDecoder)> {
+    fn new(replacement: PyChar, add_prefix_space: bool) -> (Self, PyDecoder) {
-        Ok((
+        (
            PyMetaspaceDec {},
            Metaspace::new(replacement.0, add_prefix_space).into(),
-        ))
+        )
    }
 }
@ -261,8 +259,8 @@ impl PyBPEDecoder {
    #[new]
    #[args(suffix = "String::from(\"</w>\")")]
-    fn new(suffix: String) -> PyResult<(Self, PyDecoder)> {
+    fn new(suffix: String) -> (Self, PyDecoder) {
-        Ok((PyBPEDecoder {}, BPEDecoder::new(suffix).into()))
+        (PyBPEDecoder {}, BPEDecoder::new(suffix).into())
    }
 }
@ -272,8 +270,8 @@ pub(crate) struct CustomDecoder {
 }
 impl CustomDecoder {
-    pub(crate) fn new(inner: PyObject) -> PyResult<Self> {
+    pub(crate) fn new(inner: PyObject) -> Self {
-        Ok(CustomDecoder { inner })
+        CustomDecoder { inner }
    }
 }
@ -387,8 +385,7 @@ mod test {
            let obj: PyObject = Py::new(py, py_msp).unwrap().into_py(py);
            obj
        });
-        let py_seq =
+        let py_seq = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj))));
            PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj).unwrap())));
        assert!(serde_json::to_string(&py_seq).is_err());
    }
 }
--- a/bindings/python/src/encoding.rs
+++ b/bindings/python/src/encoding.rs
@ -41,10 +41,10 @@ impl PySequenceProtocol for PyEncoding {
 #[pymethods]
 impl PyEncoding {
    #[new]
-    fn new() -> PyResult<Self> {
+    fn new() -> Self {
-        Ok(Self {
+        Self {
            encoding: tk::tokenizer::Encoding::default(),
-        })
+        }
    }
    fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -441,8 +441,7 @@ impl PyEncoding {
    ///         The length of previous content to be included in each overflowing piece
    #[args(stride = "0")]
    #[text_signature = "(self, max_length, stride=0)"]
-    fn truncate(&mut self, max_length: usize, stride: usize) -> PyResult<()> {
+    fn truncate(&mut self, max_length: usize, stride: usize) {
        self.encoding.truncate(max_length, stride);
        Ok(())
    }
 }
--- a/bindings/python/src/models.rs
+++ b/bindings/python/src/models.rs
@ -91,12 +91,12 @@ where
 #[pymethods]
 impl PyModel {
    #[new]
-    fn __new__() -> PyResult<Self> {
+    fn __new__() -> Self {
        // Instantiate a default empty model. This doesn't really make sense, but we need
        // to be able to instantiate an empty model for pickle capabilities.
-        Ok(PyModel {
+        PyModel {
            model: Arc::new(RwLock::new(BPE::default().into())),
-        })
+        }
    }
    fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
--- a/bindings/python/src/normalizers.rs
+++ b/bindings/python/src/normalizers.rs
@ -102,10 +102,10 @@ impl Normalizer for PyNormalizer {
 #[pymethods]
 impl PyNormalizer {
    #[staticmethod]
-    fn custom(obj: PyObject) -> PyResult<Self> {
+    fn custom(obj: PyObject) -> Self {
-        Ok(Self {
+        Self {
            normalizer: PyNormalizerWrapper::Custom(CustomNormalizer::new(obj)).into(),
-        })
+        }
    }
    fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -279,10 +279,10 @@ impl PyBertNormalizer {
        handle_chinese_chars: bool,
        strip_accents: Option<bool>,
        lowercase: bool,
-    ) -> PyResult<(Self, PyNormalizer)> {
+    ) -> (Self, PyNormalizer) {
        let normalizer =
            BertNormalizer::new(clean_text, handle_chinese_chars, strip_accents, lowercase);
-        Ok((PyBertNormalizer {}, normalizer.into()))
+        (PyBertNormalizer {}, normalizer.into())
    }
 }
@ -293,8 +293,8 @@ pub struct PyNFD {}
 #[pymethods]
 impl PyNFD {
    #[new]
-    fn new() -> PyResult<(Self, PyNormalizer)> {
+    fn new() -> (Self, PyNormalizer) {
-        Ok((PyNFD {}, PyNormalizer::new(NFD.into())))
+        (PyNFD {}, PyNormalizer::new(NFD.into()))
    }
 }
@ -305,8 +305,8 @@ pub struct PyNFKD {}
 #[pymethods]
 impl PyNFKD {
    #[new]
-    fn new() -> PyResult<(Self, PyNormalizer)> {
+    fn new() -> (Self, PyNormalizer) {
-        Ok((PyNFKD {}, NFKD.into()))
+        (PyNFKD {}, NFKD.into())
    }
 }
@ -317,8 +317,8 @@ pub struct PyNFC {}
 #[pymethods]
 impl PyNFC {
    #[new]
-    fn new() -> PyResult<(Self, PyNormalizer)> {
+    fn new() -> (Self, PyNormalizer) {
-        Ok((PyNFC {}, NFC.into()))
+        (PyNFC {}, NFC.into())
    }
 }
@ -329,8 +329,8 @@ pub struct PyNFKC {}
 #[pymethods]
 impl PyNFKC {
    #[new]
-    fn new() -> PyResult<(Self, PyNormalizer)> {
+    fn new() -> (Self, PyNormalizer) {
-        Ok((PyNFKC {}, NFKC.into()))
+        (PyNFKC {}, NFKC.into())
    }
 }
@ -360,8 +360,8 @@ impl PySequence {
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
-        Ok(PyTuple::new(py, &[PyList::empty(py)]))
+        PyTuple::new(py, &[PyList::empty(py)])
    }
 }
@ -379,8 +379,8 @@ pub struct PyLowercase {}
 #[pymethods]
 impl PyLowercase {
    #[new]
-    fn new() -> PyResult<(Self, PyNormalizer)> {
+    fn new() -> (Self, PyNormalizer) {
-        Ok((PyLowercase {}, Lowercase.into()))
+        (PyLowercase {}, Lowercase.into())
    }
 }
@ -412,8 +412,8 @@ impl PyStrip {
    #[new]
    #[args(left = "true", right = "true")]
-    fn new(left: bool, right: bool) -> PyResult<(Self, PyNormalizer)> {
+    fn new(left: bool, right: bool) -> (Self, PyNormalizer) {
-        Ok((PyStrip {}, Strip::new(left, right).into()))
+        (PyStrip {}, Strip::new(left, right).into())
    }
 }
@ -424,8 +424,8 @@ pub struct PyStripAccents {}
 #[pymethods]
 impl PyStripAccents {
    #[new]
-    fn new() -> PyResult<(Self, PyNormalizer)> {
+    fn new() -> (Self, PyNormalizer) {
-        Ok((PyStripAccents {}, StripAccents.into()))
+        (PyStripAccents {}, StripAccents.into())
    }
 }
@ -436,8 +436,8 @@ pub struct PyNmt {}
 #[pymethods]
 impl PyNmt {
    #[new]
-    fn new() -> PyResult<(Self, PyNormalizer)> {
+    fn new() -> (Self, PyNormalizer) {
-        Ok((PyNmt {}, Nmt.into()))
+        (PyNmt {}, Nmt.into())
    }
 }
--- a/bindings/python/src/pre_tokenizers.rs
+++ b/bindings/python/src/pre_tokenizers.rs
@ -101,10 +101,10 @@ impl PreTokenizer for PyPreTokenizer {
 #[pymethods]
 impl PyPreTokenizer {
    #[staticmethod]
-    fn custom(pretok: PyObject) -> PyResult<Self> {
+    fn custom(pretok: PyObject) -> Self {
-        Ok(PyPreTokenizer {
+        PyPreTokenizer {
            pretok: PyPreTokenizerWrapper::Custom(CustomPreTokenizer::new(pretok)).into(),
-        })
+        }
    }
    fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
@ -244,13 +244,13 @@ impl PyByteLevel {
    #[new]
    #[args(add_prefix_space = "true", _kwargs = "**")]
-    fn new(add_prefix_space: bool, _kwargs: Option<&PyDict>) -> PyResult<(Self, PyPreTokenizer)> {
+    fn new(add_prefix_space: bool, _kwargs: Option<&PyDict>) -> (Self, PyPreTokenizer) {
-        Ok((
+        (
            PyByteLevel {},
            ByteLevel::default()
                .add_prefix_space(add_prefix_space)
                .into(),
-        ))
+        )
    }
    /// Returns the alphabet used by this PreTokenizer.
@ -278,8 +278,8 @@ pub struct PyWhitespace {}
 #[pymethods]
 impl PyWhitespace {
    #[new]
-    fn new() -> PyResult<(Self, PyPreTokenizer)> {
+    fn new() -> (Self, PyPreTokenizer) {
-        Ok((PyWhitespace {}, Whitespace::default().into()))
+        (PyWhitespace {}, Whitespace::default().into())
    }
 }
@ -290,8 +290,8 @@ pub struct PyWhitespaceSplit {}
 #[pymethods]
 impl PyWhitespaceSplit {
    #[new]
-    fn new() -> PyResult<(Self, PyPreTokenizer)> {
+    fn new() -> (Self, PyPreTokenizer) {
-        Ok((PyWhitespaceSplit {}, WhitespaceSplit.into()))
+        (PyWhitespaceSplit {}, WhitespaceSplit.into())
    }
 }
@ -332,8 +332,8 @@ impl PySplit {
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
-        Ok(PyTuple::new(py, &[" ", "removed"]))
+        PyTuple::new(py, &[" ", "removed"])
    }
 }
@ -364,8 +364,8 @@ impl PyCharDelimiterSplit {
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
-        Ok(PyTuple::new(py, &[" "]))
+        PyTuple::new(py, &[" "])
    }
 }
@ -379,8 +379,8 @@ pub struct PyBertPreTokenizer {}
 #[pymethods]
 impl PyBertPreTokenizer {
    #[new]
-    fn new() -> PyResult<(Self, PyPreTokenizer)> {
+    fn new() -> (Self, PyPreTokenizer) {
-        Ok((PyBertPreTokenizer {}, BertPreTokenizer.into()))
+        (PyBertPreTokenizer {}, BertPreTokenizer.into())
    }
 }
@ -391,8 +391,8 @@ pub struct PyPunctuation {}
 #[pymethods]
 impl PyPunctuation {
    #[new]
-    fn new() -> PyResult<(Self, PyPreTokenizer)> {
+    fn new() -> (Self, PyPreTokenizer) {
-        Ok((PyPunctuation {}, Punctuation.into()))
+        (PyPunctuation {}, Punctuation.into())
    }
 }
@ -420,8 +420,8 @@ impl PySequence {
        ))
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
-        Ok(PyTuple::new(py, &[PyList::empty(py)]))
+        PyTuple::new(py, &[PyList::empty(py)])
    }
 }
@ -465,11 +465,11 @@ impl PyMetaspace {
    #[new]
    #[args(replacement = "PyChar('▁')", add_prefix_space = "true")]
-    fn new(replacement: PyChar, add_prefix_space: bool) -> PyResult<(Self, PyPreTokenizer)> {
+    fn new(replacement: PyChar, add_prefix_space: bool) -> (Self, PyPreTokenizer) {
-        Ok((
+        (
            PyMetaspace {},
            Metaspace::new(replacement.0, add_prefix_space).into(),
-        ))
+        )
    }
 }
@ -501,8 +501,8 @@ impl PyDigits {
    #[new]
    #[args(individual_digits = false)]
-    fn new(individual_digits: bool) -> PyResult<(Self, PyPreTokenizer)> {
+    fn new(individual_digits: bool) -> (Self, PyPreTokenizer) {
-        Ok((PyDigits {}, Digits::new(individual_digits).into()))
+        (PyDigits {}, Digits::new(individual_digits).into())
    }
 }
@ -516,8 +516,8 @@ pub struct PyUnicodeScripts {}
 #[pymethods]
 impl PyUnicodeScripts {
    #[new]
-    fn new() -> PyResult<(Self, PyPreTokenizer)> {
+    fn new() -> (Self, PyPreTokenizer) {
-        Ok((PyUnicodeScripts {}, UnicodeScripts::new().into()))
+        (PyUnicodeScripts {}, UnicodeScripts::new().into())
    }
 }
--- a/bindings/python/src/processors.rs
+++ b/bindings/python/src/processors.rs
@ -155,15 +155,15 @@ pub struct PyBertProcessing {}
 #[pymethods]
 impl PyBertProcessing {
    #[new]
-    fn new(sep: (String, u32), cls: (String, u32)) -> PyResult<(Self, PyPostProcessor)> {
+    fn new(sep: (String, u32), cls: (String, u32)) -> (Self, PyPostProcessor) {
-        Ok((
+        (
            PyBertProcessing {},
            PyPostProcessor::new(Arc::new(BertProcessing::new(sep, cls).into())),
-        ))
+        )
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
-        Ok(PyTuple::new(py, &[("", 0), ("", 0)]))
+        PyTuple::new(py, &[("", 0), ("", 0)])
    }
 }
@ -203,18 +203,18 @@ impl PyRobertaProcessing {
        cls: (String, u32),
        trim_offsets: bool,
        add_prefix_space: bool,
-    ) -> PyResult<(Self, PyPostProcessor)> {
+    ) -> (Self, PyPostProcessor) {
        let proc = RobertaProcessing::new(sep, cls)
            .trim_offsets(trim_offsets)
            .add_prefix_space(add_prefix_space);
-        Ok((
+        (
            PyRobertaProcessing {},
            PyPostProcessor::new(Arc::new(proc.into())),
-        ))
+        )
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
-        Ok(PyTuple::new(py, &[("", 0), ("", 0)]))
+        PyTuple::new(py, &[("", 0), ("", 0)])
    }
 }
@ -233,20 +233,17 @@ pub struct PyByteLevel {}
 impl PyByteLevel {
    #[new]
    #[args(trim_offsets = "None", _kwargs = "**")]
-    fn new(
+    fn new(trim_offsets: Option<bool>, _kwargs: Option<&PyDict>) -> (Self, PyPostProcessor) {
        trim_offsets: Option<bool>,
        _kwargs: Option<&PyDict>,
    ) -> PyResult<(Self, PyPostProcessor)> {
        let mut byte_level = ByteLevel::default();
        if let Some(to) = trim_offsets {
            byte_level = byte_level.trim_offsets(to);
        }
-        Ok((
+        (
            PyByteLevel {},
            PyPostProcessor::new(Arc::new(byte_level.into())),
-        ))
+        )
    }
 }
--- a/bindings/python/src/token.rs
+++ b/bindings/python/src/token.rs
@ -25,21 +25,21 @@ impl PyToken {
    }
    #[getter]
-    fn get_id(&self) -> PyResult<u32> {
+    fn get_id(&self) -> u32 {
-        Ok(self.token.id)
+        self.token.id
    }
    #[getter]
-    fn get_value(&self) -> PyResult<&str> {
+    fn get_value(&self) -> &str {
-        Ok(&self.token.value)
+        &self.token.value
    }
    #[getter]
-    fn get_offsets(&self) -> PyResult<(usize, usize)> {
+    fn get_offsets(&self) -> (usize, usize) {
-        Ok(self.token.offsets)
+        self.token.offsets
    }
-    fn as_tuple(&self) -> PyResult<(u32, &str, (usize, usize))> {
+    fn as_tuple(&self) -> (u32, &str, (usize, usize)) {
-        Ok((self.token.id, &self.token.value, self.token.offsets))
+        (self.token.id, &self.token.value, self.token.offsets)
    }
 }
--- a/bindings/python/src/tokenizer.rs
+++ b/bindings/python/src/tokenizer.rs
@ -487,10 +487,9 @@ impl PyTokenizer {
        }
    }
-    fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<&'p PyTuple> {
+    fn __getnewargs__<'p>(&self, py: Python<'p>) -> &'p PyTuple {
        let model = PyModel::from(BPE::default()).into_py(py);
-        let args = PyTuple::new(py, vec![model]);
+        PyTuple::new(py, vec![model])
        Ok(args)
    }
    /// Instantiate a new :class:`~tokenizers.Tokenizer` from the given JSON string.
@ -577,11 +576,10 @@ impl PyTokenizer {
    /// :param is_pair: Boolean indicating if the input would be a single sentence or a pair
    /// :return:
    #[text_signature = "(self, is_pair)"]
-    fn num_special_tokens_to_add(&self, is_pair: bool) -> PyResult<usize> {
+    fn num_special_tokens_to_add(&self, is_pair: bool) -> usize {
-        Ok(self
+        self.tokenizer
            .tokenizer
            .get_post_processor()
-            .map_or(0, |p| p.added_tokens(is_pair)))
+            .map_or(0, |p| p.added_tokens(is_pair))
    }
    /// Get the underlying vocabulary
@ -594,8 +592,8 @@ impl PyTokenizer {
    ///     :obj:`Dict[str, int]`: The vocabulary
    #[args(with_added_tokens = true)]
    #[text_signature = "(self, with_added_tokens=True)"]
-    fn get_vocab(&self, with_added_tokens: bool) -> PyResult<HashMap<String, u32>> {
+    fn get_vocab(&self, with_added_tokens: bool) -> HashMap<String, u32> {
-        Ok(self.tokenizer.get_vocab(with_added_tokens))
+        self.tokenizer.get_vocab(with_added_tokens)
    }
    /// Get the size of the underlying vocabulary
@ -608,8 +606,8 @@ impl PyTokenizer {
    ///     :obj:`int`: The size of the vocabulary
    #[args(with_added_tokens = true)]
    #[text_signature = "(self, with_added_tokens=True)"]
-    fn get_vocab_size(&self, with_added_tokens: bool) -> PyResult<usize> {
+    fn get_vocab_size(&self, with_added_tokens: bool) -> usize {
-        Ok(self.tokenizer.get_vocab_size(with_added_tokens))
+        self.tokenizer.get_vocab_size(with_added_tokens)
    }
    /// Enable truncation
--- a/tokenizers/src/models/unigram/trainer.rs
+++ b/tokenizers/src/models/unigram/trainer.rs
@ -177,7 +177,7 @@ impl UnigramTrainer {
        &self,
        sentences: &[Sentence],
        _progress: &Option<ProgressBar>,
-    ) -> Result<Vec<SentencePiece>> {
+    ) -> Vec<SentencePiece> {
        // Put all sentences in a string, separated by \0
        let total: usize = sentences
            .iter()
@ -245,7 +245,7 @@ impl UnigramTrainer {
            }
        }
        to_log_prob(&mut seed_sentencepieces);
-        Ok(seed_sentencepieces)
+        seed_sentencepieces
    }
    fn prune_sentence_pieces(
        &self,
@ -469,7 +469,7 @@ impl UnigramTrainer {
        // We use a UNK token when training, whatever the `self.unk_token`
        pieces.push(("<UNK>".into(), f64::NAN));
-        pieces.extend(self.make_seed_sentence_pieces(&sentences, &progress)?);
+        pieces.extend(self.make_seed_sentence_pieces(&sentences, &progress));
        self.finalize_progress(&progress, sentences.len());
        // Useful to check compatibility with spm.
@ -604,9 +604,7 @@ mod tests {
        assert_eq!(required_chars.len(), 13);
        let progress = None;
-        let table = trainer
+        let table = trainer.make_seed_sentence_pieces(&sentences, &progress);
            .make_seed_sentence_pieces(&sentences, &progress)
            .unwrap();
        let target_strings = vec![
            "s", "i", " ", "達", "友", "ん", "は", "に", "ち", "こ", "h", "a", "T", "is ", "s ",