mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Python - Make all relevant classes pickable
This commit is contained in:
@ -1,16 +1,46 @@
|
||||
extern crate tokenizers as tk;
|
||||
|
||||
use super::utils::Container;
|
||||
use pyo3::exceptions;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::*;
|
||||
|
||||
#[pyclass(dict)]
|
||||
#[pyclass(dict, module = "tokenizers.processors")]
|
||||
pub struct PostProcessor {
|
||||
pub processor: Container<dyn tk::tokenizer::PostProcessor>,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl PostProcessor {
|
||||
fn __getstate__(&self, py: Python) -> PyResult<PyObject> {
|
||||
let data = self
|
||||
.processor
|
||||
.execute(|processor| serde_json::to_string(&processor))
|
||||
.map_err(|e| {
|
||||
exceptions::Exception::py_err(format!(
|
||||
"Error while attempting to pickle PostProcessor: {}",
|
||||
e.to_string()
|
||||
))
|
||||
})?;
|
||||
Ok(PyBytes::new(py, data.as_bytes()).to_object(py))
|
||||
}
|
||||
|
||||
fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
|
||||
match state.extract::<&PyBytes>(py) {
|
||||
Ok(s) => {
|
||||
self.processor =
|
||||
Container::Owned(serde_json::from_slice(s.as_bytes()).map_err(|e| {
|
||||
exceptions::Exception::py_err(format!(
|
||||
"Error while attempting to unpickle PostProcessor: {}",
|
||||
e.to_string()
|
||||
))
|
||||
})?);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn num_special_tokens_to_add(&self, is_pair: bool) -> usize {
|
||||
self.processor.execute(|p| p.added_tokens(is_pair))
|
||||
}
|
||||
|
Reference in New Issue
Block a user