mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Revert "[BREAKING CHANGE] Ignore added_tokens (both special and not) … (#1569)
* Revert "[BREAKING CHANGE] Ignore added_tokens (both special and not) in the decoder (#1513)"
This reverts commit 25aee8b88c
.
* don't remove audit
* deprecate id_to_token
* use simple id to token
* don't break id_to_token since we are deprecating anyways?
This commit is contained in:
@ -847,35 +847,23 @@ where
|
||||
|
||||
/// Decode the given ids, back to a String
|
||||
pub fn decode(&self, ids: &[u32], skip_special_tokens: bool) -> Result<String> {
|
||||
let mut result = String::with_capacity(ids.len());
|
||||
let mut chunks = Vec::with_capacity(ids.len());
|
||||
for id in ids {
|
||||
if let Some(added_token) = self.added_vocabulary.simple_id_to_token(*id) {
|
||||
if skip_special_tokens && self.added_vocabulary.is_special_token(&added_token) {
|
||||
continue;
|
||||
}
|
||||
let text_chunk = if let Some(decoder) = &self.decoder {
|
||||
decoder.decode(chunks.clone())?
|
||||
} else {
|
||||
chunks.join(" ")
|
||||
};
|
||||
result.push_str(&text_chunk);
|
||||
if !result.is_empty() && self.decoder.is_none() {
|
||||
result.push(' ');
|
||||
}
|
||||
result.push_str(&added_token);
|
||||
chunks.clear();
|
||||
} else if let Some(token) = self.model.id_to_token(*id) {
|
||||
chunks.push(token);
|
||||
}
|
||||
}
|
||||
let text_chunk = if let Some(decoder) = &self.decoder {
|
||||
decoder.decode(chunks.clone())?
|
||||
let tokens = ids
|
||||
.iter()
|
||||
.filter_map(|id| {
|
||||
self.added_vocabulary
|
||||
.simple_id_to_token(*id)
|
||||
.or_else(|| self.model.id_to_token(*id))
|
||||
.filter(|token| {
|
||||
!skip_special_tokens || !self.added_vocabulary.is_special_token(token)
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if let Some(decoder) = &self.decoder {
|
||||
decoder.decode(tokens)
|
||||
} else {
|
||||
chunks.join(" ")
|
||||
};
|
||||
result.push_str(&text_chunk);
|
||||
Ok(result)
|
||||
Ok(tokens.join(" "))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user