mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
clippy
This commit is contained in:
@ -271,10 +271,12 @@ impl AddedVocabulary {
|
|||||||
if self.added_tokens_map_r.values().any(|val| val == token) {
|
if self.added_tokens_map_r.values().any(|val| val == token) {
|
||||||
// We only ignore if the AddedToken is already part of the added_tokens_map_r
|
// We only ignore if the AddedToken is already part of the added_tokens_map_r
|
||||||
ignored += 1;
|
ignored += 1;
|
||||||
}
|
} else {
|
||||||
else{
|
|
||||||
// Make sure we modify the previous entry
|
// Make sure we modify the previous entry
|
||||||
self.added_tokens_map.entry(token.content.clone()).and_modify(|old_id| *old_id = id).or_insert_with(||id);
|
self.added_tokens_map
|
||||||
|
.entry(token.content.clone())
|
||||||
|
.and_modify(|old_id| *old_id = id)
|
||||||
|
.or_insert_with(|| id);
|
||||||
if !self.special_tokens_set.contains(&token.content) {
|
if !self.special_tokens_set.contains(&token.content) {
|
||||||
self.added_tokens.push(token.clone());
|
self.added_tokens.push(token.clone());
|
||||||
}
|
}
|
||||||
@ -284,7 +286,6 @@ impl AddedVocabulary {
|
|||||||
.and_modify(|t| *t = token.clone())
|
.and_modify(|t| *t = token.clone())
|
||||||
.or_insert_with(|| token.clone());
|
.or_insert_with(|| token.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.refresh_added_tokens(model, normalizer);
|
self.refresh_added_tokens(model, normalizer);
|
||||||
@ -604,7 +605,10 @@ mod tests {
|
|||||||
|
|
||||||
// Also adds tokens already covered by the model
|
// Also adds tokens already covered by the model
|
||||||
let added_token = AddedToken::from("test", false);
|
let added_token = AddedToken::from("test", false);
|
||||||
assert_eq!(vocab.add_tokens(&[added_token.clone()], &model, normalizer),1);
|
assert_eq!(
|
||||||
|
vocab.add_tokens(&[added_token.clone()], &model, normalizer),
|
||||||
|
1
|
||||||
|
);
|
||||||
assert_eq!(vocab.len(), 3);
|
assert_eq!(vocab.len(), 3);
|
||||||
|
|
||||||
assert_eq!(vocab.get_vocab_r()[&0], added_token);
|
assert_eq!(vocab.get_vocab_r()[&0], added_token);
|
||||||
@ -647,11 +651,14 @@ mod tests {
|
|||||||
);
|
);
|
||||||
assert_eq!(vocab.len(), 3); // New token was added
|
assert_eq!(vocab.len(), 3); // New token was added
|
||||||
assert!(vocab.is_special_token("test"));
|
assert!(vocab.is_special_token("test"));
|
||||||
assert_eq!(*vocab.get_vocab_r(), HashMap::from([
|
assert_eq!(
|
||||||
|
*vocab.get_vocab_r(),
|
||||||
|
HashMap::from([
|
||||||
(2, AddedToken::from("added_token_1", true)),
|
(2, AddedToken::from("added_token_1", true)),
|
||||||
(3, AddedToken::from("added_token_2", true)),
|
(3, AddedToken::from("added_token_2", true)),
|
||||||
(0, AddedToken::from("test", true)),
|
(0, AddedToken::from("test", true)),
|
||||||
]));
|
])
|
||||||
|
);
|
||||||
assert!(vocab.added_tokens_map.contains_key("test"));
|
assert!(vocab.added_tokens_map.contains_key("test"));
|
||||||
assert!(vocab.added_tokens_map_r.contains_key(&0));
|
assert!(vocab.added_tokens_map_r.contains_key(&0));
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user