mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Makes decode
and decode_batch
work on borrowed content. (#1251)
* Makes `decode` and `decode_batch` work on borrowed content. * Make `decode_batch` work with borrowed content. * Fix lint. * Attempt to map it into Node. * Second attempt. * Step by step. * One more step. * Fix lint. * Please ... * Removing collect. * Revert "Removing collect." This reverts commit 2f7ec04dc84df3cc5488625a4fcb492fdc3545e2. --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
@ -1009,7 +1009,7 @@ impl PyTokenizer {
|
||||
#[pyo3(signature = (ids, skip_special_tokens = true))]
|
||||
#[pyo3(text_signature = "(self, ids, skip_special_tokens=True)")]
|
||||
fn decode(&self, ids: Vec<u32>, skip_special_tokens: bool) -> PyResult<String> {
|
||||
ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into()
|
||||
ToPyResult(self.tokenizer.decode(&ids, skip_special_tokens)).into()
|
||||
}
|
||||
|
||||
/// Decode a batch of ids back to their corresponding string
|
||||
@ -1032,7 +1032,8 @@ impl PyTokenizer {
|
||||
skip_special_tokens: bool,
|
||||
) -> PyResult<Vec<String>> {
|
||||
py.allow_threads(|| {
|
||||
ToPyResult(self.tokenizer.decode_batch(sequences, skip_special_tokens)).into()
|
||||
let slices = sequences.iter().map(|v| &v[..]).collect::<Vec<&[u32]>>();
|
||||
ToPyResult(self.tokenizer.decode_batch(&slices, skip_special_tokens)).into()
|
||||
})
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user