mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-09 22:28:29 +00:00
Python - Improve decode/decode_batch API
This commit is contained in:
@@ -244,15 +244,33 @@ impl Tokenizer {
|
|||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decode(&self, ids: Vec<u32>, skip_special_tokens: bool) -> PyResult<String> {
|
#[args(kwargs = "**")]
|
||||||
|
fn decode(&self, ids: Vec<u32>, kwargs: Option<&PyDict>) -> PyResult<String> {
|
||||||
|
let mut skip_special_tokens = true;
|
||||||
|
|
||||||
|
if let Some(kwargs) = kwargs {
|
||||||
|
if let Some(skip) = kwargs.get_item("skip_special_tokens") {
|
||||||
|
skip_special_tokens = skip.extract()?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into()
|
ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[args(kwargs = "**")]
|
||||||
fn decode_batch(
|
fn decode_batch(
|
||||||
&self,
|
&self,
|
||||||
sentences: Vec<Vec<u32>>,
|
sentences: Vec<Vec<u32>>,
|
||||||
skip_special_tokens: bool,
|
kwargs: Option<&PyDict>,
|
||||||
) -> PyResult<Vec<String>> {
|
) -> PyResult<Vec<String>> {
|
||||||
|
let mut skip_special_tokens = true;
|
||||||
|
|
||||||
|
if let Some(kwargs) = kwargs {
|
||||||
|
if let Some(skip) = kwargs.get_item("skip_special_tokens") {
|
||||||
|
skip_special_tokens = skip.extract()?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ToPyResult(self.tokenizer.decode_batch(sentences, skip_special_tokens)).into()
|
ToPyResult(self.tokenizer.decode_batch(sentences, skip_special_tokens)).into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user