mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Remove kwargs mapping on Tokenizer decode/decode_batch as their is only one possible arg.
This is suggested by the current issue https://github.com/huggingface/tokenizers/issues/54#issuecomment-574104841. kwargs cannot be called as positional argument, they have to be named one, replacing kwargs with the actual skip_special_tokens allows both (named and positional) syntax. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co>
This commit is contained in:
@ -189,34 +189,22 @@ impl Tokenizer {
|
||||
.into()
|
||||
}
|
||||
|
||||
#[args(kwargs = "**")]
|
||||
fn decode(&self, ids: Vec<u32>, kwargs: Option<&PyDict>) -> PyResult<String> {
|
||||
let mut skip_special_tokens = true;
|
||||
|
||||
if let Some(kwargs) = kwargs {
|
||||
if let Some(skip) = kwargs.get_item("skip_special_tokens") {
|
||||
skip_special_tokens = skip.extract()?;
|
||||
}
|
||||
}
|
||||
|
||||
ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into()
|
||||
fn decode(&self, ids: Vec<u32>, skip_special_tokens: Option<bool>) -> PyResult<String> {
|
||||
ToPyResult(self.tokenizer.decode(
|
||||
ids,
|
||||
skip_special_tokens.unwrap_or(true),
|
||||
)).into()
|
||||
}
|
||||
|
||||
#[args(kwargs = "**")]
|
||||
fn decode_batch(
|
||||
&self,
|
||||
sentences: Vec<Vec<u32>>,
|
||||
kwargs: Option<&PyDict>,
|
||||
skip_special_tokens: Option<bool>,
|
||||
) -> PyResult<Vec<String>> {
|
||||
let mut skip_special_tokens = true;
|
||||
|
||||
if let Some(kwargs) = kwargs {
|
||||
if let Some(skip) = kwargs.get_item("skip_special_tokens") {
|
||||
skip_special_tokens = skip.extract()?;
|
||||
}
|
||||
}
|
||||
|
||||
ToPyResult(self.tokenizer.decode_batch(sentences, skip_special_tokens)).into()
|
||||
ToPyResult(self.tokenizer.decode_batch(
|
||||
sentences,
|
||||
skip_special_tokens.unwrap_or(true),
|
||||
)).into()
|
||||
}
|
||||
|
||||
fn token_to_id(&self, token: &str) -> Option<u32> {
|
||||
|
Reference in New Issue
Block a user