mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Add-legacy-tests (#1597)
* add tests * decoder as well * check error * propagate * lint * rafiune the test * lint * revert decoder changes * on more? * fmt * Update tokenizers/src/pre_tokenizers/mod.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * fix commit * simplify err * fmt --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
@ -144,4 +144,43 @@ mod tests {
|
||||
PreTokenizerWrapper::WhitespaceSplit(WhitespaceSplit {})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pre_tokenizer_deserialization_no_type() {
|
||||
let json = r#"{"replacement":"▁","add_prefix_space":true, "prepend_scheme":"always"}}"#;
|
||||
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||
match reconstructed {
|
||||
Err(err) => assert_eq!(
|
||||
err.to_string(),
|
||||
"data did not match any variant of untagged enum PreTokenizerWrapper"
|
||||
),
|
||||
_ => panic!("Expected an error here"),
|
||||
}
|
||||
|
||||
let json = r#"{"type":"Metaspace", "replacement":"▁" }"#;
|
||||
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||
assert_eq!(
|
||||
reconstructed.unwrap(),
|
||||
PreTokenizerWrapper::Metaspace(Metaspace::default())
|
||||
);
|
||||
|
||||
let json = r#"{"type":"Metaspace", "add_prefix_space":true }"#;
|
||||
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||
match reconstructed {
|
||||
Err(err) => assert_eq!(
|
||||
err.to_string(),
|
||||
"data did not match any variant of untagged enum PreTokenizerWrapper"
|
||||
),
|
||||
_ => panic!("Expected an error here"),
|
||||
}
|
||||
let json = r#"{"behavior":"default_split"}"#;
|
||||
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
|
||||
match reconstructed {
|
||||
Err(err) => assert_eq!(
|
||||
err.to_string(),
|
||||
"data did not match any variant of untagged enum PreTokenizerWrapper"
|
||||
),
|
||||
_ => panic!("Expected an error here"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,4 +87,42 @@ mod tests {
|
||||
PostProcessorWrapper::Bert(bert)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn post_processor_deserialization_no_type() {
|
||||
let json = r#"{"add_prefix_space": true, "trim_offsets": false, "use_regex": false}"#;
|
||||
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||
match reconstructed {
|
||||
Err(err) => assert_eq!(
|
||||
err.to_string(),
|
||||
"data did not match any variant of untagged enum PostProcessorWrapper"
|
||||
),
|
||||
_ => panic!("Expected an error here"),
|
||||
}
|
||||
|
||||
let json = r#"{"sep":["[SEP]",102],"cls":["[CLS]",101]}"#;
|
||||
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||
assert!(matches!(
|
||||
reconstructed.unwrap(),
|
||||
PostProcessorWrapper::Bert(_)
|
||||
));
|
||||
|
||||
let json =
|
||||
r#"{"sep":["</s>",2], "cls":["<s>",0], "trim_offsets":true, "add_prefix_space":true}"#;
|
||||
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||
assert!(matches!(
|
||||
reconstructed.unwrap(),
|
||||
PostProcessorWrapper::Roberta(_)
|
||||
));
|
||||
|
||||
let json = r#"{"type":"RobertaProcessing", "sep":["</s>",2] }"#;
|
||||
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
|
||||
match reconstructed {
|
||||
Err(err) => assert_eq!(
|
||||
err.to_string(),
|
||||
"data did not match any variant of untagged enum PostProcessorWrapper"
|
||||
),
|
||||
_ => panic!("Expected an error here"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user