mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Small performance fixup (negligible but obviously better).
This commit is contained in:
@ -460,14 +460,14 @@ impl BPE {
|
||||
}
|
||||
|
||||
fn tokenize_with_cache(&self, sequence: &str) -> Result<Vec<Token>> {
|
||||
if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) {
|
||||
return Ok(self.word_to_tokens(hit).collect());
|
||||
}
|
||||
if self.ignore_merges {
|
||||
if let Some(id) = self.vocab.get(sequence) {
|
||||
return Ok(vec![Token::new(*id, sequence.to_string().clone(), (0, 0))]);
|
||||
}
|
||||
}
|
||||
if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) {
|
||||
return Ok(self.word_to_tokens(hit).collect());
|
||||
}
|
||||
let word = self.merge_word(sequence)?;
|
||||
let ret = self.word_to_tokens(&word).collect();
|
||||
if let Some(ref cache) = self.cache {
|
||||
|
Reference in New Issue
Block a user