More cache options. (#1675)

* More cache options.

* Fixing error messages.
This commit is contained in:
Nicolas Patry
2024-11-06 18:12:09 +08:00
committed by GitHub
parent 1740bff7a6
commit c6b5c3eab7
5 changed files with 89 additions and 0 deletions

View File

@ -534,6 +534,30 @@ impl PyBPE {
)?, )?,
) )
} }
/// Clears the internal cache
#[pyo3(signature = ())]
#[pyo3(text_signature = "(self)")]
fn _clear_cache(self_: PyRef<Self>) -> PyResult<()> {
let super_ = self_.as_ref();
let mut model = super_.model.write().map_err(|e| {
exceptions::PyException::new_err(format!("Error while clearing BPE cache: {}", e))
})?;
model.clear_cache();
Ok(())
}
/// Resize the internal cache
#[pyo3(signature = (capacity))]
#[pyo3(text_signature = "(self, capacity)")]
fn _resize_cache(self_: PyRef<Self>, capacity: usize) -> PyResult<()> {
let super_ = self_.as_ref();
let mut model = super_.model.write().map_err(|e| {
exceptions::PyException::new_err(format!("Error while resizing BPE cache: {}", e))
})?;
model.resize_cache(capacity);
Ok(())
}
} }
/// An implementation of the WordPiece algorithm /// An implementation of the WordPiece algorithm
@ -858,6 +882,30 @@ impl PyUnigram {
)), )),
} }
} }
/// Clears the internal cache
#[pyo3(signature = ())]
#[pyo3(text_signature = "(self)")]
fn _clear_cache(self_: PyRef<Self>) -> PyResult<()> {
let super_ = self_.as_ref();
let mut model = super_.model.write().map_err(|e| {
exceptions::PyException::new_err(format!("Error while clearing Unigram cache: {}", e))
})?;
model.clear_cache();
Ok(())
}
/// Resize the internal cache
#[pyo3(signature = (capacity))]
#[pyo3(text_signature = "(self, capacity)")]
fn _resize_cache(self_: PyRef<Self>, capacity: usize) -> PyResult<()> {
let super_ = self_.as_ref();
let mut model = super_.model.write().map_err(|e| {
exceptions::PyException::new_err(format!("Error while resizing Unigram cache: {}", e))
})?;
model.resize_cache(capacity);
Ok(())
}
} }
/// Models Module /// Models Module

View File

@ -354,6 +354,13 @@ impl BPE {
} }
} }
/// Resize the cache
pub fn resize_cache(&mut self, capacity: usize) {
if let Some(ref mut cache) = self.cache {
cache.resize(capacity);
}
}
pub fn get_vocab(&self) -> Vocab { pub fn get_vocab(&self) -> Vocab {
self.vocab.clone() self.vocab.clone()
} }

View File

@ -207,6 +207,23 @@ impl Model for ModelWrapper {
} }
} }
impl ModelWrapper {
pub fn clear_cache(&mut self) {
match self {
Self::Unigram(model) => model.clear_cache(),
Self::BPE(model) => model.clear_cache(),
_ => (),
}
}
pub fn resize_cache(&mut self, capacity: usize) {
match self {
Self::Unigram(model) => model.resize_cache(capacity),
Self::BPE(model) => model.resize_cache(capacity),
_ => (),
}
}
}
#[derive(Clone, Serialize, Deserialize)] #[derive(Clone, Serialize, Deserialize)]
pub enum TrainerWrapper { pub enum TrainerWrapper {
BpeTrainer(BpeTrainer), BpeTrainer(BpeTrainer),

View File

@ -377,6 +377,16 @@ impl Unigram {
let string = read_to_string(path)?; let string = read_to_string(path)?;
Ok(serde_json::from_str(&string)?) Ok(serde_json::from_str(&string)?)
} }
/// Clears the internal cache
pub fn clear_cache(&mut self) {
self.cache.clear();
}
/// Resize the cache
pub fn resize_cache(&mut self, capacity: usize) {
self.cache.resize(capacity);
}
} }
/// Iterator to iterate of vocabulary of the model, and their relative score. /// Iterator to iterate of vocabulary of the model, and their relative score.

View File

@ -115,4 +115,11 @@ where
pub(crate) fn set(&self, key: K, value: V) { pub(crate) fn set(&self, key: K, value: V) {
self.set_values(std::iter::once((key, value))) self.set_values(std::iter::once((key, value)))
} }
pub(crate) fn resize(&mut self, capacity: usize) {
self.capacity = capacity;
if let Ok(mut cache) = self.map.try_write() {
cache.shrink_to(capacity);
}
}
} }