Fixing clippy warnings on 1.71. (#1296)

* Fixing clippy warnings on 1.71.

* Fix.

* Fmt.

* Python clippy.

* Should really set my env back again.

* Fix.
This commit is contained in:
Nicolas Patry
2023-07-16 15:58:38 +02:00
committed by GitHub
parent 4811f769a1
commit 291b2e23ae
11 changed files with 29 additions and 30 deletions

View File

@ -155,8 +155,7 @@ fn byte_level_alphabet(mut cx: FunctionContext) -> JsResult<JsValue> {
fn whitespace(mut cx: FunctionContext) -> JsResult<JsPreTokenizer> {
let mut pretok = JsPreTokenizer::new::<_, JsPreTokenizer, _>(&mut cx, vec![])?;
let guard = cx.lock();
pretok.borrow_mut(&guard).pretok =
Some(tk::pre_tokenizers::whitespace::Whitespace::default().into());
pretok.borrow_mut(&guard).pretok = Some(tk::pre_tokenizers::whitespace::Whitespace {}.into());
Ok(pretok)
}
@ -303,10 +302,10 @@ mod test {
#[test]
fn serialize() {
let js_wrapped: JsPreTokenizerWrapper = Whitespace::default().into();
let js_wrapped: JsPreTokenizerWrapper = Whitespace {}.into();
let js_ser = serde_json::to_string(&js_wrapped).unwrap();
let rs_wrapped = PreTokenizerWrapper::Whitespace(Whitespace::default());
let rs_wrapped = PreTokenizerWrapper::Whitespace(Whitespace {});
let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
assert_eq!(js_ser, rs_ser);
@ -320,11 +319,11 @@ mod test {
}
let js_seq: JsPreTokenizerWrapper =
Sequence::new(vec![WhitespaceSplit.into(), Whitespace::default().into()]).into();
Sequence::new(vec![WhitespaceSplit.into(), Whitespace {}.into()]).into();
let js_wrapper_ser = serde_json::to_string(&js_seq).unwrap();
let rs_wrapped = PreTokenizerWrapper::Sequence(Sequence::new(vec![
WhitespaceSplit.into(),
Whitespace::default().into(),
Whitespace {}.into(),
]));
let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
assert_eq!(js_wrapper_ser, rs_ser);
@ -335,7 +334,7 @@ mod test {
let js_ser = serde_json::to_string(&js_seq).unwrap();
assert_eq!(js_wrapper_ser, js_ser);
let rs_seq = Sequence::new(vec![WhitespaceSplit.into(), Whitespace::default().into()]);
let rs_seq = Sequence::new(vec![WhitespaceSplit.into(), Whitespace {}.into()]);
let rs_ser = serde_json::to_string(&rs_seq).unwrap();
assert_eq!(js_wrapper_ser, rs_ser);
}

View File

@ -301,7 +301,7 @@ pub struct PyWhitespace {}
impl PyWhitespace {
#[new]
fn new() -> (Self, PyPreTokenizer) {
(PyWhitespace {}, Whitespace::default().into())
(PyWhitespace {}, Whitespace {}.into())
}
}
@ -722,7 +722,7 @@ mod test {
#[test]
fn get_subtype() {
Python::with_gil(|py| {
let py_norm = PyPreTokenizer::new(Whitespace::default().into());
let py_norm = PyPreTokenizer::new(Whitespace {}.into());
let py_wsp = py_norm.get_as_subtype(py).unwrap();
assert_eq!("Whitespace", py_wsp.as_ref(py).get_type().name().unwrap());
})
@ -730,9 +730,9 @@ mod test {
#[test]
fn serialize() {
let py_wrapped: PyPreTokenizerWrapper = Whitespace::default().into();
let py_wrapped: PyPreTokenizerWrapper = Whitespace {}.into();
let py_ser = serde_json::to_string(&py_wrapped).unwrap();
let rs_wrapped = PreTokenizerWrapper::Whitespace(Whitespace::default());
let rs_wrapped = PreTokenizerWrapper::Whitespace(Whitespace {});
let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
assert_eq!(py_ser, rs_ser);
let py_pretok: PyPreTokenizer = serde_json::from_str(&rs_ser).unwrap();
@ -745,10 +745,10 @@ mod test {
}
let py_seq: PyPreTokenizerWrapper =
Sequence::new(vec![Whitespace::default().into(), WhitespaceSplit.into()]).into();
Sequence::new(vec![Whitespace {}.into(), WhitespaceSplit.into()]).into();
let py_wrapper_ser = serde_json::to_string(&py_seq).unwrap();
let rs_wrapped = PreTokenizerWrapper::Sequence(Sequence::new(vec![
Whitespace::default().into(),
Whitespace {}.into(),
WhitespaceSplit.into(),
]));
let rs_ser = serde_json::to_string(&rs_wrapped).unwrap();
@ -759,7 +759,7 @@ mod test {
assert_eq!(py_wrapper_ser, py_ser);
let obj = Python::with_gil(|py| {
let py_wsp = PyPreTokenizer::new(Whitespace::default().into());
let py_wsp = PyPreTokenizer::new(Whitespace {}.into());
let obj: PyObject = Py::new(py, py_wsp).unwrap().into_py(py);
obj
});

View File

@ -81,7 +81,7 @@ fn bench_train(c: &mut Criterion) {
DecoderWrapper,
>;
let mut tokenizer = Tok::new(WordPiece::default());
tokenizer.with_pre_tokenizer(Whitespace::default());
tokenizer.with_pre_tokenizer(Whitespace {});
c.bench_function("WordPiece Train vocabulary (small)", |b| {
b.iter_custom(|iters| {
iter_bench_train(
@ -94,7 +94,7 @@ fn bench_train(c: &mut Criterion) {
});
let mut tokenizer = Tok::new(WordPiece::default());
tokenizer.with_pre_tokenizer(Whitespace::default());
tokenizer.with_pre_tokenizer(Whitespace {});
c.bench_function("WordPiece Train vocabulary (big)", |b| {
b.iter_custom(|iters| {
iter_bench_train(

View File

@ -74,7 +74,7 @@ fn bench_train(c: &mut Criterion) {
.build()
.into();
let mut tokenizer = Tokenizer::new(BPE::default()).into_inner();
tokenizer.with_pre_tokenizer(Whitespace::default());
tokenizer.with_pre_tokenizer(Whitespace {});
c.bench_function("BPE Train vocabulary (small)", |b| {
b.iter_custom(|iters| {
iter_bench_train(
@ -87,7 +87,7 @@ fn bench_train(c: &mut Criterion) {
});
let mut tokenizer = Tokenizer::new(BPE::default()).into_inner();
tokenizer.with_pre_tokenizer(Whitespace::default());
tokenizer.with_pre_tokenizer(Whitespace {});
c.bench_function("BPE Train vocabulary (big)", |b| {
b.iter_custom(|iters| {
iter_bench_train(

View File

@ -271,7 +271,7 @@ impl Unigram {
{
let key_pos = starts_at + tok_bytes.len();
let token: String = String::from_utf8(tok_bytes).unwrap();
let mut target_node = &mut best_path_ends_at[key_pos];
let target_node = &mut best_path_ends_at[key_pos];
let length = key_pos - starts_at;
let id = self.token_to_ids.get(&token).unwrap();
let score = self.vocab.get(*id as usize).unwrap().1;
@ -288,7 +288,7 @@ impl Unigram {
}
}
if !has_single_node {
let mut target_node = &mut best_path_ends_at[starts_at + mblen];
let target_node = &mut best_path_ends_at[starts_at + mblen];
let candidate_best_path_score = unk_score + best_path_score_till_here;
if target_node.starts_at.is_none()
|| candidate_best_path_score > target_node.best_path_score

View File

@ -197,7 +197,7 @@ impl PostProcessor for ByteLevel {
}
pub fn process_offsets(encoding: &mut Encoding, add_prefix_space: bool) {
encoding.process_tokens_with_offsets_mut(|(i, (token, mut offsets))| {
encoding.process_tokens_with_offsets_mut(|(i, (token, offsets))| {
let mut leading_spaces = token
.chars()
.take_while(|c| *c == BYTES_CHAR[&b' '] || c.is_whitespace())

View File

@ -84,7 +84,7 @@ mod tests {
#[test]
fn basic() {
let pretok = UnicodeScripts::default();
let pretok = UnicodeScripts {};
let mut pretokenized = PreTokenizedString::from("どこで生れ。Yes");
pretok.pre_tokenize(&mut pretokenized).unwrap();
assert_eq!(
@ -107,7 +107,7 @@ mod tests {
#[test]
fn spaces_are_included_in_every_script() {
let pretok = UnicodeScripts::default();
let pretok = UnicodeScripts {};
let mut pretokenized = PreTokenizedString::from("Apples are りんご 林檎");
pretok.pre_tokenize(&mut pretokenized).unwrap();
assert_eq!(

View File

@ -64,7 +64,7 @@ mod tests {
),
("\n", vec![]),
];
let pretok = Whitespace::default();
let pretok = Whitespace {};
for (s, res) in tests {
let mut pretokenized = PreTokenizedString::from(s);
pretok.pre_tokenize(&mut pretokenized).unwrap();

View File

@ -93,7 +93,7 @@ fn quicktour_slow_train() -> tokenizers::Result<()> {
// START quicktour_init_pretok
use tokenizers::pre_tokenizers::whitespace::Whitespace;
tokenizer.with_pre_tokenizer(Whitespace::default());
tokenizer.with_pre_tokenizer(Whitespace {});
// END quicktour_init_pretok
// START quicktour_train
@ -267,7 +267,7 @@ fn pipeline() -> tokenizers::Result<()> {
use tokenizers::pre_tokenizers::whitespace::Whitespace;
use tokenizers::{OffsetReferential, OffsetType, PreTokenizedString, PreTokenizer};
let pre_tokenizer = Whitespace::default();
let pre_tokenizer = Whitespace {};
let mut pre_tokenized = PreTokenizedString::from("Hello! How are you? I'm fine, thank you.");
pre_tokenizer.pre_tokenize(&mut pre_tokenized)?;
@ -304,7 +304,7 @@ fn pipeline() -> tokenizers::Result<()> {
// START pipeline_combine_pre_tokenizer
use tokenizers::pre_tokenizers::{digits::Digits, sequence::Sequence};
let pre_tokenizer = Sequence::new(vec![Whitespace::default().into(), Digits::new(true).into()]);
let pre_tokenizer = Sequence::new(vec![Whitespace {}.into(), Digits::new(true).into()]);
let mut pre_tokenized = PreTokenizedString::from("Call 911!");
pre_tokenizer.pre_tokenize(&mut pre_tokenized)?;
@ -384,7 +384,7 @@ fn train_pipeline_bert() -> tokenizers::Result<()> {
// START bert_setup_pre_tokenizer
use tokenizers::pre_tokenizers::whitespace::Whitespace;
bert_tokenizer.with_pre_tokenizer(Whitespace::default());
bert_tokenizer.with_pre_tokenizer(Whitespace {});
// END bert_setup_pre_tokenizer
// START bert_setup_processor
use tokenizers::processors::template::TemplateProcessing;

View File

@ -138,7 +138,7 @@ fn pretoks() {
let ser_wrapped = serde_json::to_string(&ch_wrapped).unwrap();
assert_eq!(ser_wrapped, ch_ser);
let wsp = Whitespace::default();
let wsp = Whitespace {};
let wsp_ser = serde_json::to_string(&wsp).unwrap();
assert_eq!(wsp_ser, r#"{"type":"Whitespace"}"#);
serde_json::from_str::<Whitespace>(&wsp_ser).unwrap();

View File

@ -45,7 +45,7 @@ fn bpe_continuing_subword_prefix_error() {
.build()
.unwrap(),
)
.with_pre_tokenizer(Some(PreTokenizerWrapper::Whitespace(Whitespace::default())))
.with_pre_tokenizer(Some(PreTokenizerWrapper::Whitespace(Whitespace {})))
.build()
.unwrap();
let mut trainer = tokenizer.get_model().get_trainer();