mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 08:45:38 +00:00
Python - RustFmt
This commit is contained in:
@ -23,6 +23,7 @@ impl std::error::Error for PyError {}
|
|||||||
pub struct ToPyResult<T>(pub Result<T>);
|
pub struct ToPyResult<T>(pub Result<T>);
|
||||||
impl<T> std::convert::Into<PyResult<T>> for ToPyResult<T> {
|
impl<T> std::convert::Into<PyResult<T>> for ToPyResult<T> {
|
||||||
fn into(self) -> PyResult<T> {
|
fn into(self) -> PyResult<T> {
|
||||||
self.0.map_err(|e| { exceptions::Exception::py_err(format!("{}", e)) })
|
self.0
|
||||||
|
.map_err(|e| exceptions::Exception::py_err(format!("{}", e)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -174,10 +174,7 @@ impl WordLevel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
match tk::models::wordlevel::WordLevel::from_files(
|
match tk::models::wordlevel::WordLevel::from_files(vocab, unk_token) {
|
||||||
vocab,
|
|
||||||
unk_token,
|
|
||||||
) {
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Errors: {:?}", e);
|
println!("Errors: {:?}", e);
|
||||||
Err(exceptions::Exception::py_err(
|
Err(exceptions::Exception::py_err(
|
||||||
|
@ -158,4 +158,3 @@ impl Strip {
|
|||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,13 +91,16 @@ pub struct CharDelimiterSplit {}
|
|||||||
impl CharDelimiterSplit {
|
impl CharDelimiterSplit {
|
||||||
#[new]
|
#[new]
|
||||||
pub fn new(obj: &PyRawObject, delimiter: &str) -> PyResult<()> {
|
pub fn new(obj: &PyRawObject, delimiter: &str) -> PyResult<()> {
|
||||||
let chr_delimiter = delimiter.chars().nth(0).ok_or(exceptions::Exception::py_err(
|
let chr_delimiter = delimiter
|
||||||
|
.chars()
|
||||||
|
.nth(0)
|
||||||
|
.ok_or(exceptions::Exception::py_err(
|
||||||
"delimiter must be a single character",
|
"delimiter must be a single character",
|
||||||
))?;
|
))?;
|
||||||
Ok(obj.init(PreTokenizer {
|
Ok(obj.init(PreTokenizer {
|
||||||
pretok: Container::Owned(Box::new(
|
pretok: Container::Owned(Box::new(
|
||||||
tk::pre_tokenizers::delimiter::CharDelimiterSplit::new(chr_delimiter)
|
tk::pre_tokenizers::delimiter::CharDelimiterSplit::new(chr_delimiter),
|
||||||
))
|
)),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,6 @@ impl BertProcessing {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[pyclass(extends=PostProcessor)]
|
#[pyclass(extends=PostProcessor)]
|
||||||
pub struct RobertaProcessing {}
|
pub struct RobertaProcessing {}
|
||||||
#[pymethods]
|
#[pymethods]
|
||||||
|
@ -39,10 +39,10 @@ impl Tokenizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn num_special_tokens_to_add(&self, is_pair: bool) -> PyResult<usize> {
|
fn num_special_tokens_to_add(&self, is_pair: bool) -> PyResult<usize> {
|
||||||
Ok(self.tokenizer
|
Ok(self
|
||||||
|
.tokenizer
|
||||||
.get_post_processor()
|
.get_post_processor()
|
||||||
.map_or(0, |p| p.as_ref().added_tokens(is_pair))
|
.map_or(0, |p| p.as_ref().added_tokens(is_pair)))
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[args(kwargs = "**")]
|
#[args(kwargs = "**")]
|
||||||
@ -197,10 +197,11 @@ impl Tokenizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn decode(&self, ids: Vec<u32>, skip_special_tokens: Option<bool>) -> PyResult<String> {
|
fn decode(&self, ids: Vec<u32>, skip_special_tokens: Option<bool>) -> PyResult<String> {
|
||||||
ToPyResult(self.tokenizer.decode(
|
ToPyResult(
|
||||||
ids,
|
self.tokenizer
|
||||||
skip_special_tokens.unwrap_or(true),
|
.decode(ids, skip_special_tokens.unwrap_or(true)),
|
||||||
)).into()
|
)
|
||||||
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decode_batch(
|
fn decode_batch(
|
||||||
@ -208,10 +209,11 @@ impl Tokenizer {
|
|||||||
sentences: Vec<Vec<u32>>,
|
sentences: Vec<Vec<u32>>,
|
||||||
skip_special_tokens: Option<bool>,
|
skip_special_tokens: Option<bool>,
|
||||||
) -> PyResult<Vec<String>> {
|
) -> PyResult<Vec<String>> {
|
||||||
ToPyResult(self.tokenizer.decode_batch(
|
ToPyResult(
|
||||||
sentences,
|
self.tokenizer
|
||||||
skip_special_tokens.unwrap_or(true),
|
.decode_batch(sentences, skip_special_tokens.unwrap_or(true)),
|
||||||
)).into()
|
)
|
||||||
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn token_to_id(&self, token: &str) -> Option<u32> {
|
fn token_to_id(&self, token: &str) -> Option<u32> {
|
||||||
|
Reference in New Issue
Block a user