mirror of
https://github.com/mii443/tokenizers.git
synced 2025-09-02 15:29:21 +00:00
Python - Use Getter/Setter to get/modify Tokenizer's parts
This commit is contained in:
@ -55,61 +55,6 @@ impl Tokenizer {
|
||||
Ok(self.tokenizer.get_vocab_size(with_added_tokens))
|
||||
}
|
||||
|
||||
fn with_model(&mut self, model: &mut Model) -> PyResult<()> {
|
||||
if let Some(model) = model.model.to_pointer() {
|
||||
self.tokenizer.with_model(model);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Model is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn with_pre_tokenizer(&mut self, pretok: &mut PreTokenizer) -> PyResult<()> {
|
||||
if let Some(pretok) = pretok.pretok.to_pointer() {
|
||||
self.tokenizer.with_pre_tokenizer(pretok);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The PreTokenizer is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn with_decoder(&mut self, decoder: &mut Decoder) -> PyResult<()> {
|
||||
if let Some(decoder) = decoder.decoder.to_pointer() {
|
||||
self.tokenizer.with_decoder(decoder);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Decoder is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn with_post_processor(&mut self, processor: &mut PostProcessor) -> PyResult<()> {
|
||||
if let Some(processor) = processor.processor.to_pointer() {
|
||||
self.tokenizer.with_post_processor(processor);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Processor is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn with_normalizer(&mut self, normalizer: &mut Normalizer) -> PyResult<()> {
|
||||
if let Some(normalizer) = normalizer.normalizer.to_pointer() {
|
||||
self.tokenizer.with_normalizer(normalizer);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Normalizer is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[args(kwargs = "**")]
|
||||
fn with_truncation(&mut self, max_length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
|
||||
let mut stride = 0;
|
||||
@ -328,6 +273,18 @@ impl Tokenizer {
|
||||
})
|
||||
}
|
||||
|
||||
#[setter]
|
||||
fn set_model(&mut self, model: &mut Model) -> PyResult<()> {
|
||||
if let Some(model) = model.model.to_pointer() {
|
||||
self.tokenizer.with_model(model);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Model is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_normalizer(&self) -> PyResult<Option<Normalizer>> {
|
||||
Ok(self
|
||||
@ -338,6 +295,18 @@ impl Tokenizer {
|
||||
}))
|
||||
}
|
||||
|
||||
#[setter]
|
||||
fn set_normalizer(&mut self, normalizer: &mut Normalizer) -> PyResult<()> {
|
||||
if let Some(normalizer) = normalizer.normalizer.to_pointer() {
|
||||
self.tokenizer.with_normalizer(normalizer);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Normalizer is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_pre_tokenizer(&self) -> PyResult<Option<PreTokenizer>> {
|
||||
Ok(self
|
||||
@ -348,6 +317,18 @@ impl Tokenizer {
|
||||
}))
|
||||
}
|
||||
|
||||
#[setter]
|
||||
fn set_pre_tokenizer(&mut self, pretok: &mut PreTokenizer) -> PyResult<()> {
|
||||
if let Some(pretok) = pretok.pretok.to_pointer() {
|
||||
self.tokenizer.with_pre_tokenizer(pretok);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The PreTokenizer is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_post_processor(&self) -> PyResult<Option<PostProcessor>> {
|
||||
Ok(self
|
||||
@ -358,10 +339,34 @@ impl Tokenizer {
|
||||
}))
|
||||
}
|
||||
|
||||
#[setter]
|
||||
fn set_post_processor(&mut self, processor: &mut PostProcessor) -> PyResult<()> {
|
||||
if let Some(processor) = processor.processor.to_pointer() {
|
||||
self.tokenizer.with_post_processor(processor);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Processor is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_decoder(&self) -> PyResult<Option<Decoder>> {
|
||||
Ok(self.tokenizer.get_decoder().map(|decoder| Decoder {
|
||||
decoder: Container::from_ref(decoder),
|
||||
}))
|
||||
}
|
||||
|
||||
#[setter]
|
||||
fn set_decoder(&mut self, decoder: &mut Decoder) -> PyResult<()> {
|
||||
if let Some(decoder) = decoder.decoder.to_pointer() {
|
||||
self.tokenizer.with_decoder(decoder);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(exceptions::Exception::py_err(
|
||||
"The Decoder is already being used in another Tokenizer",
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -169,6 +169,56 @@ class Tokenizer:
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@property
|
||||
def model(self) -> Model:
|
||||
""" Get the model in use with this Tokenizer """
|
||||
pass
|
||||
|
||||
@model.setter
|
||||
def model(self, model: models.Model):
|
||||
""" Change the model to use with this Tokenizer """
|
||||
pass
|
||||
|
||||
@property
|
||||
def pre_tokenizer(self) -> Optional[PreTokenizer]:
|
||||
""" Get the pre-tokenizer in use with this model """
|
||||
pass
|
||||
|
||||
@pre_tokenizer.setter
|
||||
def pre_tokenizer(self, pre_tokenizer: pre_tokenizers.PreTokenizer):
|
||||
""" Change the pre tokenizer to use with this Tokenizer """
|
||||
pass
|
||||
|
||||
@property
|
||||
def decoder(self) -> Optional[Decoder]:
|
||||
""" Get the decoder in use with this model """
|
||||
pass
|
||||
|
||||
@decoder.setter
|
||||
def decoder(self, decoder: decoders.Decoder):
|
||||
""" Change the decoder to use with this Tokenizer """
|
||||
pass
|
||||
|
||||
@property
|
||||
def post_processor(self) -> Optional[PostProcessor]:
|
||||
""" Get the post-processor in use with this Tokenizer """
|
||||
pass
|
||||
|
||||
@post_processor.setter
|
||||
def post_processor(self, processor: processors.PostProcessor):
|
||||
""" Change the post processor to use with this Tokenizer """
|
||||
|
||||
@property
|
||||
def normalizer(self) -> Optional[Normalizer]:
|
||||
""" Get the normalizer in use with this Tokenizer """
|
||||
pass
|
||||
|
||||
@normalizer.setter
|
||||
def normalizer(self, normalizer: normalizers.Normalizer):
|
||||
""" Change the normalizer to use with this Tokenizer """
|
||||
|
||||
|
||||
def get_vocab_size(self, with_added_tokens: Optional[bool]) -> int:
|
||||
""" Returns the size of the vocabulary
|
||||
|
||||
@ -178,24 +228,6 @@ class Tokenizer:
|
||||
"""
|
||||
pass
|
||||
|
||||
def with_model(self, model: models.Model):
|
||||
""" Change the model to use with this Tokenizer """
|
||||
pass
|
||||
|
||||
def with_pre_tokenizer(self, pre_tokenizer: pre_tokenizers.PreTokenizer):
|
||||
""" Change the pre tokenizer to use with this Tokenizer """
|
||||
pass
|
||||
|
||||
def with_decoder(self, decoder: decoders.Decoder):
|
||||
""" Change the decoder to use with this Tokenizer """
|
||||
pass
|
||||
|
||||
def with_post_processor(self, processor: processors.PostProcessor):
|
||||
""" Change the post processor to use with this Tokenizer """
|
||||
|
||||
def with_normalizer(self, normalizer: normalizers.Normalizer):
|
||||
""" Change the normalizer to use with this Tokenizer """
|
||||
|
||||
def with_truncation(self,
|
||||
max_length: int,
|
||||
stride: Optional[int],
|
||||
|
Reference in New Issue
Block a user