mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Python - Simplify truncation interface
This commit is contained in:
@ -97,23 +97,34 @@ impl Tokenizer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn with_truncation(
|
#[args(kwargs = "**")]
|
||||||
&mut self,
|
fn with_truncation(&mut self, max_length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
|
||||||
max_length: usize,
|
let mut stride = 0;
|
||||||
stride: usize,
|
let mut strategy = TruncationStrategy::LongestFirst;
|
||||||
strategy: &str,
|
|
||||||
) -> PyResult<()> {
|
if let Some(kwargs) = kwargs {
|
||||||
let strategy = match strategy {
|
for (key, value) in kwargs {
|
||||||
"longest_first" => Ok(TruncationStrategy::LongestFirst),
|
let key: &str = key.extract()?;
|
||||||
"only_first" => Ok(TruncationStrategy::OnlyFirst),
|
match key {
|
||||||
"only_second" => Ok(TruncationStrategy::OnlySecond),
|
"stride" => stride = value.extract()?,
|
||||||
other => Err(PyError(format!(
|
"strategy" => {
|
||||||
"Unknown `strategy`: `{}`. Use \
|
let value: &str = value.extract()?;
|
||||||
one of `longest_first`, `only_first`, or `only_second`",
|
strategy = match value {
|
||||||
other
|
"longest_first" => Ok(TruncationStrategy::LongestFirst),
|
||||||
))
|
"only_first" => Ok(TruncationStrategy::OnlyFirst),
|
||||||
.into_pyerr()),
|
"only_second" => Ok(TruncationStrategy::OnlySecond),
|
||||||
}?;
|
_ => Err(PyError(format!(
|
||||||
|
"Unknown `strategy`: `{}`. Use \
|
||||||
|
one of `longest_first`, `only_first`, or `only_second`",
|
||||||
|
value
|
||||||
|
))
|
||||||
|
.into_pyerr()),
|
||||||
|
}?
|
||||||
|
}
|
||||||
|
_ => println!("Ignored unknown kwarg option {}", key),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
self.tokenizer.with_truncation(Some(TruncationParams {
|
self.tokenizer.with_truncation(Some(TruncationParams {
|
||||||
max_length,
|
max_length,
|
||||||
|
Reference in New Issue
Block a user