mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Add-legacy-tests (#1597)
* add tests * decoder as well * check error * propagate * lint * rafiune the test * lint * revert decoder changes * on more? * fmt * Update tokenizers/src/pre_tokenizers/mod.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * fix commit * simplify err * fmt --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
@ -144,4 +144,43 @@ mod tests {
|
|||||||
PreTokenizerWrapper::WhitespaceSplit(WhitespaceSplit {})
|
PreTokenizerWrapper::WhitespaceSplit(WhitespaceSplit {})
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pre_tokenizer_deserialization_no_type() {
|
||||||
|
let json = r#"{"replacement":"▁","add_prefix_space":true, "prepend_scheme":"always"}}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||||
|
match reconstructed {
|
||||||
|
Err(err) => assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"data did not match any variant of untagged enum PreTokenizerWrapper"
|
||||||
|
),
|
||||||
|
_ => panic!("Expected an error here"),
|
||||||
|
}
|
||||||
|
|
||||||
|
let json = r#"{"type":"Metaspace", "replacement":"▁" }"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||||
|
assert_eq!(
|
||||||
|
reconstructed.unwrap(),
|
||||||
|
PreTokenizerWrapper::Metaspace(Metaspace::default())
|
||||||
|
);
|
||||||
|
|
||||||
|
let json = r#"{"type":"Metaspace", "add_prefix_space":true }"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||||
|
match reconstructed {
|
||||||
|
Err(err) => assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"data did not match any variant of untagged enum PreTokenizerWrapper"
|
||||||
|
),
|
||||||
|
_ => panic!("Expected an error here"),
|
||||||
|
}
|
||||||
|
let json = r#"{"behavior":"default_split"}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||||
|
match reconstructed {
|
||||||
|
Err(err) => assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"data did not match any variant of untagged enum PreTokenizerWrapper"
|
||||||
|
),
|
||||||
|
_ => panic!("Expected an error here"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,4 +87,42 @@ mod tests {
|
|||||||
PostProcessorWrapper::Bert(bert)
|
PostProcessorWrapper::Bert(bert)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn post_processor_deserialization_no_type() {
|
||||||
|
let json = r#"{"add_prefix_space": true, "trim_offsets": false, "use_regex": false}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||||
|
match reconstructed {
|
||||||
|
Err(err) => assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"data did not match any variant of untagged enum PostProcessorWrapper"
|
||||||
|
),
|
||||||
|
_ => panic!("Expected an error here"),
|
||||||
|
}
|
||||||
|
|
||||||
|
let json = r#"{"sep":["[SEP]",102],"cls":["[CLS]",101]}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||||
|
assert!(matches!(
|
||||||
|
reconstructed.unwrap(),
|
||||||
|
PostProcessorWrapper::Bert(_)
|
||||||
|
));
|
||||||
|
|
||||||
|
let json =
|
||||||
|
r#"{"sep":["</s>",2], "cls":["<s>",0], "trim_offsets":true, "add_prefix_space":true}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||||
|
assert!(matches!(
|
||||||
|
reconstructed.unwrap(),
|
||||||
|
PostProcessorWrapper::Roberta(_)
|
||||||
|
));
|
||||||
|
|
||||||
|
let json = r#"{"type":"RobertaProcessing", "sep":["</s>",2] }"#;
|
||||||
|
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||||
|
match reconstructed {
|
||||||
|
Err(err) => assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"data did not match any variant of untagged enum PostProcessorWrapper"
|
||||||
|
),
|
||||||
|
_ => panic!("Expected an error here"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user