Makes decode and decode_batch work on borrowed content. (#1251)

* Makes `decode` and `decode_batch` work on borrowed content.

* Make `decode_batch` work with borrowed content.

* Fix lint.

* Attempt to map it into Node.

* Second attempt.

* Step by step.

* One more step.

* Fix lint.

* Please ...

* Removing collect.

* Revert "Removing collect."

This reverts commit 2f7ec04dc84df3cc5488625a4fcb492fdc3545e2.

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
Funtowicz Morgan
2023-05-17 11:18:15 +02:00
committed by GitHub
parent cefc41e8ec
commit b4fcc9ce6e
5 changed files with 17 additions and 13 deletions

View File

@ -1009,7 +1009,7 @@ impl PyTokenizer {
#[pyo3(signature = (ids, skip_special_tokens = true))]
#[pyo3(text_signature = "(self, ids, skip_special_tokens=True)")]
fn decode(&self, ids: Vec<u32>, skip_special_tokens: bool) -> PyResult<String> {
ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into()
ToPyResult(self.tokenizer.decode(&ids, skip_special_tokens)).into()
}
/// Decode a batch of ids back to their corresponding string
@ -1032,7 +1032,8 @@ impl PyTokenizer {
skip_special_tokens: bool,
) -> PyResult<Vec<String>> {
py.allow_threads(|| {
ToPyResult(self.tokenizer.decode_batch(sequences, skip_special_tokens)).into()
let slices = sequences.iter().map(|v| &v[..]).collect::<Vec<&[u32]>>();
ToPyResult(self.tokenizer.decode_batch(&slices, skip_special_tokens)).into()
})
}