Makes decode and decode_batch work on borrowed content. (#1251)

* Makes `decode` and `decode_batch` work on borrowed content. * Make `decode_batch` work with borrowed content. * Fix lint. * Attempt to map it into Node. * Second attempt. * Step by step. * One more step. * Fix lint. * Please ... * Removing collect. * Revert "Removing collect." This reverts commit 2f7ec04dc84df3cc5488625a4fcb492fdc3545e2. --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
2025-08-23 00:35:35 +00:00 · 2023-05-17 11:18:15 +02:00
parent cefc41e8ec
commit b4fcc9ce6e
5 changed files with 17 additions and 13 deletions
--- a/bindings/python/src/tokenizer.rs
+++ b/bindings/python/src/tokenizer.rs
@ -1009,7 +1009,7 @@ impl PyTokenizer {
    #[pyo3(signature = (ids, skip_special_tokens = true))]
    #[pyo3(text_signature = "(self, ids, skip_special_tokens=True)")]
    fn decode(&self, ids: Vec<u32>, skip_special_tokens: bool) -> PyResult<String> {
-        ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into()
+        ToPyResult(self.tokenizer.decode(&ids, skip_special_tokens)).into()
    }

    /// Decode a batch of ids back to their corresponding string
@ -1032,7 +1032,8 @@ impl PyTokenizer {
        skip_special_tokens: bool,
    ) -> PyResult<Vec<String>> {
        py.allow_threads(|| {
-            ToPyResult(self.tokenizer.decode_batch(sequences, skip_special_tokens)).into()
+            let slices = sequences.iter().map(|v| &v[..]).collect::<Vec<&[u32]>>();
+            ToPyResult(self.tokenizer.decode_batch(&slices, skip_special_tokens)).into()
        })
    }