mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
correctly compute the new id: we take the max of the AddedToken + get_vocab_size
This commit is contained in:
@ -264,7 +264,7 @@ impl AddedVocabulary {
|
|||||||
let id = if let Some(id) = self.token_to_id(&token.content, model) {
|
let id = if let Some(id) = self.token_to_id(&token.content, model) {
|
||||||
id
|
id
|
||||||
} else {
|
} else {
|
||||||
let new_id = (model.get_vocab_size() + self.added_tokens_map.len()) as u32;
|
let new_id = (model.get_vocab_size() + cmp::max(self.added_tokens_map_r.keys(),0)) as u32;
|
||||||
new_id
|
new_id
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user