mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Small performance fixup (negligible but obviously better).
This commit is contained in:
@ -460,14 +460,14 @@ impl BPE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn tokenize_with_cache(&self, sequence: &str) -> Result<Vec<Token>> {
|
fn tokenize_with_cache(&self, sequence: &str) -> Result<Vec<Token>> {
|
||||||
if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) {
|
|
||||||
return Ok(self.word_to_tokens(hit).collect());
|
|
||||||
}
|
|
||||||
if self.ignore_merges {
|
if self.ignore_merges {
|
||||||
if let Some(id) = self.vocab.get(sequence) {
|
if let Some(id) = self.vocab.get(sequence) {
|
||||||
return Ok(vec![Token::new(*id, sequence.to_string().clone(), (0, 0))]);
|
return Ok(vec![Token::new(*id, sequence.to_string().clone(), (0, 0))]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) {
|
||||||
|
return Ok(self.word_to_tokens(hit).collect());
|
||||||
|
}
|
||||||
let word = self.merge_word(sequence)?;
|
let word = self.merge_word(sequence)?;
|
||||||
let ret = self.word_to_tokens(&word).collect();
|
let ret = self.word_to_tokens(&word).collect();
|
||||||
if let Some(ref cache) = self.cache {
|
if let Some(ref cache) = self.cache {
|
||||||
|
Reference in New Issue
Block a user