This commit is contained in:
Arthur Zucker
2023-09-05 13:34:43 +00:00
parent 93b37f36dc
commit e5fc051ad2

View File

@ -257,11 +257,16 @@ impl AddedVocabulary {
let new_id = if let Some(new_id) = self.token_to_id(&token.content, model) { let new_id = if let Some(new_id) = self.token_to_id(&token.content, model) {
new_id new_id
} else { } else {
self.added_tokens_map self.added_tokens_map.values().cloned().max().map_or(
.values() model.get_vocab_size() as u32,
.cloned() |max| {
.max() if max >= (model.get_vocab_size() as u32) || model.get_vocab_size() == 0 {
.map_or(model.get_vocab_size() as u32, |max| max + 1) max + 1
} else {
model.get_vocab_size() as u32
}
},
)
}; };
// Make sure we modify the previous entry // Make sure we modify the previous entry
self.added_tokens_map self.added_tokens_map
@ -681,7 +686,6 @@ mod tests {
token.special = true; token.special = true;
assert_eq!(token.special, true); // Token was already there assert_eq!(token.special, true); // Token was already there
} }
#[test] #[test]