diff --git a/tokenizers/src/models/bpe/trainer.rs b/tokenizers/src/models/bpe/trainer.rs index 303fdbc8..8178bcf5 100644 --- a/tokenizers/src/models/bpe/trainer.rs +++ b/tokenizers/src/models/bpe/trainer.rs @@ -523,7 +523,7 @@ impl BpeTrainer { .get(&new_token) .copied() .unwrap_or(id_to_word.len() as u32); - if word_to_id.get(&new_token).is_none() { + if !word_to_id.contains_key(&new_token) { id_to_word.push(new_token.clone()); word_to_id.insert(new_token.clone(), new_token_id); } diff --git a/tokenizers/src/models/wordpiece/mod.rs b/tokenizers/src/models/wordpiece/mod.rs index 9baf2458..a75134d2 100644 --- a/tokenizers/src/models/wordpiece/mod.rs +++ b/tokenizers/src/models/wordpiece/mod.rs @@ -180,10 +180,10 @@ impl WordPiece { pub fn from_bpe(bpe: &BPE) -> Self { let mut wp = Self::builder().vocab(bpe.get_vocab()).build().unwrap(); if let Some(unk) = bpe.get_unk_token() { - wp.unk_token = unk.to_owned(); + unk.clone_into(&mut wp.unk_token); } if let Some(prefix) = bpe.get_continuing_subword_prefix() { - wp.continuing_subword_prefix = prefix.to_owned(); + prefix.clone_into(&mut wp.continuing_subword_prefix); } wp }