Small performance fixup (negligible but obviously better).

This commit is contained in:
Nicolas Patry
2024-08-01 11:34:02 +02:00
parent 1df498a186
commit 9e0c791f2b

View File

@ -460,14 +460,14 @@ impl BPE {
}
fn tokenize_with_cache(&self, sequence: &str) -> Result<Vec<Token>> {
if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) {
return Ok(self.word_to_tokens(hit).collect());
}
if self.ignore_merges {
if let Some(id) = self.vocab.get(sequence) {
return Ok(vec![Token::new(*id, sequence.to_string().clone(), (0, 0))]);
}
}
if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) {
return Ok(self.word_to_tokens(hit).collect());
}
let word = self.merge_word(sequence)?;
let ret = self.word_to_tokens(&word).collect();
if let Some(ref cache) = self.cache {