Expose Tokenizer parts

This commit is contained in:
Anthony MOI
2019-12-31 22:57:47 -05:00
parent 90df088054
commit 90dfdc715d
4 changed files with 81 additions and 1 deletions

View File

@@ -5,7 +5,7 @@ use super::utils::Container;
use pyo3::exceptions; use pyo3::exceptions;
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::*; use pyo3::types::*;
use std::path::{Path, PathBuf}; use std::path::Path;
/// A Model represents some tokenization algorithm like BPE or Word /// A Model represents some tokenization algorithm like BPE or Word
/// This class cannot be constructed directly. Please use one of the concrete models. /// This class cannot be constructed directly. Please use one of the concrete models.

View File

@@ -12,6 +12,7 @@ use super::normalizers::Normalizer;
use super::pre_tokenizers::PreTokenizer; use super::pre_tokenizers::PreTokenizer;
use super::processors::PostProcessor; use super::processors::PostProcessor;
use super::trainers::Trainer; use super::trainers::Trainer;
use super::utils::Container;
use tk::tokenizer::{ use tk::tokenizer::{
PaddingDirection, PaddingParams, PaddingStrategy, TruncationParams, TruncationStrategy, PaddingDirection, PaddingParams, PaddingStrategy, TruncationParams, TruncationStrategy,
@@ -301,4 +302,48 @@ impl Tokenizer {
} }
}) })
} }
#[getter]
fn get_model(&self) -> PyResult<Model> {
Ok(Model {
model: Container::from_ref(self.tokenizer.get_model()),
})
}
#[getter]
fn get_normalizer(&self) -> PyResult<Option<Normalizer>> {
Ok(self
.tokenizer
.get_normalizer()
.map(|normalizer| Normalizer {
normalizer: Container::from_ref(normalizer),
}))
}
#[getter]
fn get_pre_tokenizer(&self) -> PyResult<Option<PreTokenizer>> {
Ok(self
.tokenizer
.get_pre_tokenizer()
.map(|pretok| PreTokenizer {
pretok: Container::from_ref(pretok),
}))
}
#[getter]
fn get_post_processor(&self) -> PyResult<Option<PostProcessor>> {
Ok(self
.tokenizer
.get_post_processor()
.map(|processor| PostProcessor {
processor: Container::from_ref(processor),
}))
}
#[getter]
fn get_decoder(&self) -> PyResult<Option<Decoder>> {
Ok(self.tokenizer.get_decoder().map(|decoder| Decoder {
decoder: Container::from_ref(decoder),
}))
}
} }

View File

@@ -17,6 +17,11 @@ impl<T> Container<T>
where where
T: ?Sized, T: ?Sized,
{ {
pub fn from_ref(reference: &Box<T>) -> Self {
let content: *const T = &**reference;
Container::Pointer(content as *mut _)
}
/// Consumes ourself and return the Boxed element if we have the ownership, None otherwise. /// Consumes ourself and return the Boxed element if we have the ownership, None otherwise.
pub fn take(self) -> Option<Box<T>> { pub fn take(self) -> Option<Box<T>> {
match self { match self {

View File

@@ -165,30 +165,60 @@ impl Tokenizer {
self self
} }
/// Get the normalizer
#[allow(clippy::borrowed_box)]
pub fn get_normalizer(&self) -> Option<&Box<dyn Normalizer + Sync>> {
self.normalizer.as_ref()
}
/// Set the pre tokenizer /// Set the pre tokenizer
pub fn with_pre_tokenizer(&mut self, pre_tokenizer: Box<dyn PreTokenizer + Sync>) -> &Self { pub fn with_pre_tokenizer(&mut self, pre_tokenizer: Box<dyn PreTokenizer + Sync>) -> &Self {
self.pre_tokenizer = Some(pre_tokenizer); self.pre_tokenizer = Some(pre_tokenizer);
self self
} }
/// Get the pre tokenizer
#[allow(clippy::borrowed_box)]
pub fn get_pre_tokenizer(&self) -> Option<&Box<dyn PreTokenizer + Sync>> {
self.pre_tokenizer.as_ref()
}
/// Set the post processor /// Set the post processor
pub fn with_post_processor(&mut self, post_processor: Box<dyn PostProcessor + Sync>) -> &Self { pub fn with_post_processor(&mut self, post_processor: Box<dyn PostProcessor + Sync>) -> &Self {
self.post_processor = Some(post_processor); self.post_processor = Some(post_processor);
self self
} }
/// Get the post processor
#[allow(clippy::borrowed_box)]
pub fn get_post_processor(&self) -> Option<&Box<dyn PostProcessor + Sync>> {
self.post_processor.as_ref()
}
/// Set the decoder /// Set the decoder
pub fn with_decoder(&mut self, decoder: Box<dyn Decoder + Sync>) -> &Self { pub fn with_decoder(&mut self, decoder: Box<dyn Decoder + Sync>) -> &Self {
self.decoder = Some(decoder); self.decoder = Some(decoder);
self self
} }
/// Get the decoder
#[allow(clippy::borrowed_box)]
pub fn get_decoder(&self) -> Option<&Box<dyn Decoder + Sync>> {
self.decoder.as_ref()
}
/// Set the model /// Set the model
pub fn with_model(&mut self, model: Box<dyn Model + Sync>) -> &Self { pub fn with_model(&mut self, model: Box<dyn Model + Sync>) -> &Self {
self.model = model; self.model = model;
self self
} }
/// Get the model
#[allow(clippy::borrowed_box)]
pub fn get_model(&self) -> &Box<dyn Model + Sync> {
&self.model
}
/// Set the truncation parameters /// Set the truncation parameters
pub fn with_truncation(&mut self, trunc: Option<TruncationParams>) -> &Self { pub fn with_truncation(&mut self, trunc: Option<TruncationParams>) -> &Self {
self.trunc = trunc; self.trunc = trunc;