From 9a10acc981cc3205e108124eb994b92bd4250eb1 Mon Sep 17 00:00:00 2001 From: epwalsh Date: Wed, 1 Jan 2020 14:06:31 -0500 Subject: [PATCH] don't create unnecessary vectors when accessing cache --- tokenizers/src/models/bpe/cache.rs | 19 +++++++++++++------ tokenizers/src/models/bpe/model.rs | 20 +++++--------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/tokenizers/src/models/bpe/cache.rs b/tokenizers/src/models/bpe/cache.rs index 86e60f55..5f657ac3 100644 --- a/tokenizers/src/models/bpe/cache.rs +++ b/tokenizers/src/models/bpe/cache.rs @@ -26,20 +26,27 @@ where } } - pub fn get_values(&self, keys: &[K]) -> Vec> { + pub fn get_values(&self, keys_iter: I) -> Option>> + where + I: Iterator, + { let mut lock = self.map.try_lock(); if let Ok(ref mut cache) = lock { - keys.iter().map(|k| cache.get(k).cloned()).collect() + Some(keys_iter.map(|k| cache.get(&k).cloned()).collect()) } else { - keys.iter().map(|_| None).collect() + None } } - pub fn set_values(&self, keys: Vec, values: Vec) { + pub fn set_values(&self, keys_iter: I, values_iter: J) + where + I: Iterator, + J: Iterator>, + { let mut lock = self.map.try_lock(); if let Ok(ref mut cache) = lock { - for (key, value) in keys.into_iter().zip(values) { - cache.insert(key, value); + for (key, value) in keys_iter.zip(values_iter).filter(|(_, v)| v.is_some()) { + cache.insert(key, value.unwrap()); } } } diff --git a/tokenizers/src/models/bpe/model.rs b/tokenizers/src/models/bpe/model.rs index b484943d..416db87b 100644 --- a/tokenizers/src/models/bpe/model.rs +++ b/tokenizers/src/models/bpe/model.rs @@ -212,14 +212,9 @@ impl Model for BPE { let mut encoded: Vec = Vec::with_capacity(sentence.len()); let mut cached_words = match self.dropout { - None => Some( - self.cache.get_values( - &sentence - .iter() - .map(|(s, _)| s.to_owned()) - .collect::>(), - ), - ), + None => self + .cache + .get_values(sentence.iter().map(|(s, _)| s.clone())), Some(_) => None, // If using dropout we don't want to use a cached. }; @@ -250,13 +245,8 @@ impl Model for BPE { // Also update cache if let Some(cache) = cached_words { - let (keys, values) = sentence - .into_iter() - .zip(cache) - .filter(|(_, v)| v.is_some()) - .map(|(k, v)| (k.0, v.unwrap())) - .unzip::<_, _, Vec, Vec>(); - self.cache.set_values(keys, values); + let keys_iter = sentence.into_iter().map(|(s, _)| s); + self.cache.set_values(keys_iter, cache.into_iter()); } Ok(encoded)