mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 08:15:49 +00:00
More cache options. (#1675)
* More cache options. * Fixing error messages.
This commit is contained in:
@ -534,6 +534,30 @@ impl PyBPE {
|
||||
)?,
|
||||
)
|
||||
}
|
||||
|
||||
/// Clears the internal cache
|
||||
#[pyo3(signature = ())]
|
||||
#[pyo3(text_signature = "(self)")]
|
||||
fn _clear_cache(self_: PyRef<Self>) -> PyResult<()> {
|
||||
let super_ = self_.as_ref();
|
||||
let mut model = super_.model.write().map_err(|e| {
|
||||
exceptions::PyException::new_err(format!("Error while clearing BPE cache: {}", e))
|
||||
})?;
|
||||
model.clear_cache();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resize the internal cache
|
||||
#[pyo3(signature = (capacity))]
|
||||
#[pyo3(text_signature = "(self, capacity)")]
|
||||
fn _resize_cache(self_: PyRef<Self>, capacity: usize) -> PyResult<()> {
|
||||
let super_ = self_.as_ref();
|
||||
let mut model = super_.model.write().map_err(|e| {
|
||||
exceptions::PyException::new_err(format!("Error while resizing BPE cache: {}", e))
|
||||
})?;
|
||||
model.resize_cache(capacity);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of the WordPiece algorithm
|
||||
@ -858,6 +882,30 @@ impl PyUnigram {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears the internal cache
|
||||
#[pyo3(signature = ())]
|
||||
#[pyo3(text_signature = "(self)")]
|
||||
fn _clear_cache(self_: PyRef<Self>) -> PyResult<()> {
|
||||
let super_ = self_.as_ref();
|
||||
let mut model = super_.model.write().map_err(|e| {
|
||||
exceptions::PyException::new_err(format!("Error while clearing Unigram cache: {}", e))
|
||||
})?;
|
||||
model.clear_cache();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resize the internal cache
|
||||
#[pyo3(signature = (capacity))]
|
||||
#[pyo3(text_signature = "(self, capacity)")]
|
||||
fn _resize_cache(self_: PyRef<Self>, capacity: usize) -> PyResult<()> {
|
||||
let super_ = self_.as_ref();
|
||||
let mut model = super_.model.write().map_err(|e| {
|
||||
exceptions::PyException::new_err(format!("Error while resizing Unigram cache: {}", e))
|
||||
})?;
|
||||
model.resize_cache(capacity);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Models Module
|
||||
|
@ -354,6 +354,13 @@ impl BPE {
|
||||
}
|
||||
}
|
||||
|
||||
/// Resize the cache
|
||||
pub fn resize_cache(&mut self, capacity: usize) {
|
||||
if let Some(ref mut cache) = self.cache {
|
||||
cache.resize(capacity);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_vocab(&self) -> Vocab {
|
||||
self.vocab.clone()
|
||||
}
|
||||
|
@ -207,6 +207,23 @@ impl Model for ModelWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
impl ModelWrapper {
|
||||
pub fn clear_cache(&mut self) {
|
||||
match self {
|
||||
Self::Unigram(model) => model.clear_cache(),
|
||||
Self::BPE(model) => model.clear_cache(),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
pub fn resize_cache(&mut self, capacity: usize) {
|
||||
match self {
|
||||
Self::Unigram(model) => model.resize_cache(capacity),
|
||||
Self::BPE(model) => model.resize_cache(capacity),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub enum TrainerWrapper {
|
||||
BpeTrainer(BpeTrainer),
|
||||
|
@ -377,6 +377,16 @@ impl Unigram {
|
||||
let string = read_to_string(path)?;
|
||||
Ok(serde_json::from_str(&string)?)
|
||||
}
|
||||
|
||||
/// Clears the internal cache
|
||||
pub fn clear_cache(&mut self) {
|
||||
self.cache.clear();
|
||||
}
|
||||
|
||||
/// Resize the cache
|
||||
pub fn resize_cache(&mut self, capacity: usize) {
|
||||
self.cache.resize(capacity);
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator to iterate of vocabulary of the model, and their relative score.
|
||||
|
@ -115,4 +115,11 @@ where
|
||||
pub(crate) fn set(&self, key: K, value: V) {
|
||||
self.set_values(std::iter::once((key, value)))
|
||||
}
|
||||
|
||||
pub(crate) fn resize(&mut self, capacity: usize) {
|
||||
self.capacity = capacity;
|
||||
if let Ok(mut cache) = self.map.try_write() {
|
||||
cache.shrink_to(capacity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user