mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Python - RustFmt
This commit is contained in:
@ -23,6 +23,7 @@ impl std::error::Error for PyError {}
|
||||
pub struct ToPyResult<T>(pub Result<T>);
|
||||
impl<T> std::convert::Into<PyResult<T>> for ToPyResult<T> {
|
||||
fn into(self) -> PyResult<T> {
|
||||
self.0.map_err(|e| { exceptions::Exception::py_err(format!("{}", e)) })
|
||||
self.0
|
||||
.map_err(|e| exceptions::Exception::py_err(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
@ -174,10 +174,7 @@ impl WordLevel {
|
||||
}
|
||||
}
|
||||
|
||||
match tk::models::wordlevel::WordLevel::from_files(
|
||||
vocab,
|
||||
unk_token,
|
||||
) {
|
||||
match tk::models::wordlevel::WordLevel::from_files(vocab, unk_token) {
|
||||
Err(e) => {
|
||||
println!("Errors: {:?}", e);
|
||||
Err(exceptions::Exception::py_err(
|
||||
|
@ -158,4 +158,3 @@ impl Strip {
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,13 +91,16 @@ pub struct CharDelimiterSplit {}
|
||||
impl CharDelimiterSplit {
|
||||
#[new]
|
||||
pub fn new(obj: &PyRawObject, delimiter: &str) -> PyResult<()> {
|
||||
let chr_delimiter = delimiter.chars().nth(0).ok_or(exceptions::Exception::py_err(
|
||||
"delimiter must be a single character",
|
||||
))?;
|
||||
Ok(obj.init(PreTokenizer{
|
||||
pretok:Container::Owned(Box::new(
|
||||
tk::pre_tokenizers::delimiter::CharDelimiterSplit::new(chr_delimiter)
|
||||
))
|
||||
let chr_delimiter = delimiter
|
||||
.chars()
|
||||
.nth(0)
|
||||
.ok_or(exceptions::Exception::py_err(
|
||||
"delimiter must be a single character",
|
||||
))?;
|
||||
Ok(obj.init(PreTokenizer {
|
||||
pretok: Container::Owned(Box::new(
|
||||
tk::pre_tokenizers::delimiter::CharDelimiterSplit::new(chr_delimiter),
|
||||
)),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
@ -29,7 +29,6 @@ impl BertProcessing {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[pyclass(extends=PostProcessor)]
|
||||
pub struct RobertaProcessing {}
|
||||
#[pymethods]
|
||||
|
@ -39,10 +39,10 @@ impl Tokenizer {
|
||||
}
|
||||
|
||||
fn num_special_tokens_to_add(&self, is_pair: bool) -> PyResult<usize> {
|
||||
Ok(self.tokenizer
|
||||
.get_post_processor()
|
||||
.map_or(0, |p| p.as_ref().added_tokens(is_pair))
|
||||
)
|
||||
Ok(self
|
||||
.tokenizer
|
||||
.get_post_processor()
|
||||
.map_or(0, |p| p.as_ref().added_tokens(is_pair)))
|
||||
}
|
||||
|
||||
#[args(kwargs = "**")]
|
||||
@ -197,10 +197,11 @@ impl Tokenizer {
|
||||
}
|
||||
|
||||
fn decode(&self, ids: Vec<u32>, skip_special_tokens: Option<bool>) -> PyResult<String> {
|
||||
ToPyResult(self.tokenizer.decode(
|
||||
ids,
|
||||
skip_special_tokens.unwrap_or(true),
|
||||
)).into()
|
||||
ToPyResult(
|
||||
self.tokenizer
|
||||
.decode(ids, skip_special_tokens.unwrap_or(true)),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
|
||||
fn decode_batch(
|
||||
@ -208,10 +209,11 @@ impl Tokenizer {
|
||||
sentences: Vec<Vec<u32>>,
|
||||
skip_special_tokens: Option<bool>,
|
||||
) -> PyResult<Vec<String>> {
|
||||
ToPyResult(self.tokenizer.decode_batch(
|
||||
sentences,
|
||||
skip_special_tokens.unwrap_or(true),
|
||||
)).into()
|
||||
ToPyResult(
|
||||
self.tokenizer
|
||||
.decode_batch(sentences, skip_special_tokens.unwrap_or(true)),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
|
||||
fn token_to_id(&self, token: &str) -> Option<u32> {
|
||||
|
Reference in New Issue
Block a user