correctly compute the new id: we take the max of the AddedToken + get_vocab_size

This commit is contained in:
Arthur Zucker
2023-09-01 19:03:33 +00:00
parent db319492f7
commit 2b72017e17

View File

@ -264,7 +264,7 @@ impl AddedVocabulary {
let id = if let Some(id) = self.token_to_id(&token.content, model) { let id = if let Some(id) = self.token_to_id(&token.content, model) {
id id
} else { } else {
let new_id = (model.get_vocab_size() + self.added_tokens_map.len()) as u32; let new_id = (model.get_vocab_size() + cmp::max(self.added_tokens_map_r.keys(),0)) as u32;
new_id new_id
}; };