Ensure serialization works in all expected ways.

This commit is contained in:
Sebastian Puetz
2020-08-01 13:34:18 +02:00
committed by Anthony MOI
parent aaf8e932b1
commit 16f75d9efc
39 changed files with 1303 additions and 615 deletions

View File

@ -7,8 +7,8 @@ use pyo3::types::*;
use pyo3::PyObjectProtocol;
use tk::models::bpe::BPE;
use tk::tokenizer::{
PaddingDirection, PaddingParams, PaddingStrategy, PostProcessor, TokenizerImpl, TruncationParams,
TruncationStrategy,
PaddingDirection, PaddingParams, PaddingStrategy, PostProcessor, TokenizerImpl,
TruncationParams, TruncationStrategy,
};
use tokenizers as tk;
@ -695,8 +695,8 @@ impl PyTokenizer {
}
#[getter]
fn get_model(&self) -> PyModel {
self.tokenizer.get_model().clone()
fn get_model(&self) -> PyResult<PyObject> {
self.tokenizer.get_model().get_as_subtype()
}
#[setter]
@ -705,8 +705,12 @@ impl PyTokenizer {
}
#[getter]
fn get_normalizer(&self) -> Option<PyNormalizer> {
self.tokenizer.get_normalizer().cloned()
fn get_normalizer(&self) -> PyResult<PyObject> {
if let Some(n) = self.tokenizer.get_normalizer() {
n.get_as_subtype()
} else {
Ok(Python::acquire_gil().python().None())
}
}
#[setter]
@ -715,8 +719,12 @@ impl PyTokenizer {
}
#[getter]
fn get_pre_tokenizer(&self) -> Option<PyPreTokenizer> {
self.tokenizer.get_pre_tokenizer().cloned()
fn get_pre_tokenizer(&self) -> PyResult<PyObject> {
if let Some(pt) = self.tokenizer.get_pre_tokenizer() {
pt.get_as_subtype()
} else {
Ok(Python::acquire_gil().python().None())
}
}
#[setter]
@ -725,8 +733,12 @@ impl PyTokenizer {
}
#[getter]
fn get_post_processor(&self) -> Option<PyPostProcessor> {
self.tokenizer.get_post_processor().cloned()
fn get_post_processor(&self) -> PyResult<PyObject> {
if let Some(n) = self.tokenizer.get_post_processor() {
n.get_as_subtype()
} else {
Ok(Python::acquire_gil().python().None())
}
}
#[setter]
@ -735,8 +747,12 @@ impl PyTokenizer {
}
#[getter]
fn get_decoder(&self) -> Option<PyDecoder> {
self.tokenizer.get_decoder().cloned()
fn get_decoder(&self) -> PyResult<PyObject> {
if let Some(dec) = self.tokenizer.get_decoder() {
dec.get_as_subtype()
} else {
Ok(Python::acquire_gil().python().None())
}
}
#[setter]