mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
update
This commit is contained in:
@ -257,11 +257,16 @@ impl AddedVocabulary {
|
|||||||
let new_id = if let Some(new_id) = self.token_to_id(&token.content, model) {
|
let new_id = if let Some(new_id) = self.token_to_id(&token.content, model) {
|
||||||
new_id
|
new_id
|
||||||
} else {
|
} else {
|
||||||
self.added_tokens_map
|
self.added_tokens_map.values().cloned().max().map_or(
|
||||||
.values()
|
model.get_vocab_size() as u32,
|
||||||
.cloned()
|
|max| {
|
||||||
.max()
|
if max >= (model.get_vocab_size() as u32) || model.get_vocab_size() == 0 {
|
||||||
.map_or(model.get_vocab_size() as u32, |max| max + 1)
|
max + 1
|
||||||
|
} else {
|
||||||
|
model.get_vocab_size() as u32
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
};
|
};
|
||||||
// Make sure we modify the previous entry
|
// Make sure we modify the previous entry
|
||||||
self.added_tokens_map
|
self.added_tokens_map
|
||||||
@ -681,7 +686,6 @@ mod tests {
|
|||||||
|
|
||||||
token.special = true;
|
token.special = true;
|
||||||
assert_eq!(token.special, true); // Token was already there
|
assert_eq!(token.special, true); // Token was already there
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Reference in New Issue
Block a user