Addressing @n1t0's comments.

This commit is contained in:
Nicolas Patry
2020-09-04 11:57:01 +02:00
parent 7b2caca764
commit efa20202dc
3 changed files with 6 additions and 19 deletions

View File

@@ -288,21 +288,8 @@ pub struct PyDigits {}
#[pymethods] #[pymethods]
impl PyDigits { impl PyDigits {
#[new] #[new]
#[args(kwargs = "**")] #[args(individual_digits = false)]
fn new(kwargs: Option<&PyDict>) -> PyResult<(Self, PyPreTokenizer)> { fn new(individual_digits: bool) -> PyResult<(Self, PyPreTokenizer)> {
let mut individual_digits = false;
if let Some(kwargs) = kwargs {
for (key, value) in kwargs {
let key: &str = key.extract()?;
match key {
"individual_digits" => {
individual_digits = value.extract()?;
}
_ => println!("Ignored unknown kwarg option {}", key),
}
}
}
Ok((PyDigits {}, Digits::new(individual_digits).into())) Ok((PyDigits {}, Digits::new(individual_digits).into()))
} }
} }

View File

@@ -116,6 +116,6 @@ class TestDigits:
assert Digits() is not None assert Digits() is not None
assert isinstance(Digits(), PreTokenizer) assert isinstance(Digits(), PreTokenizer)
assert isinstance(Digits(), Digits) assert isinstance(Digits(), Digits)
assert isinstance(Digits(individual_digits=True), Digits) assert isinstance(Digits(True), Digits)
assert isinstance(Digits(individual_digits=False), Digits) assert isinstance(Digits(False), Digits)
assert isinstance(pickle.loads(pickle.dumps(Digits())), Digits) assert isinstance(pickle.loads(pickle.dumps(Digits())), Digits)

View File

@@ -3,8 +3,8 @@ use serde::{Deserialize, Serialize};
use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior}; use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};
#[derive(Serialize, Deserialize, Clone, Debug)] #[derive(Serialize, Deserialize, Clone, Debug)]
/// Replaces all the whitespaces by the provided meta character and then /// Pre tokenizes the numbers into single tokens. If individual_digits is set
/// splits on this character /// to true, then all digits are splitted into individual tokens.
#[serde(tag = "type")] #[serde(tag = "type")]
pub struct Digits { pub struct Digits {
individual_digits: bool, individual_digits: bool,