Add-legacy-tests (#1597)

* add tests

* decoder as well

* check error

* propagate

* lint

* rafiune the test

* lint

* revert decoder changes

* on more?

* fmt

* Update tokenizers/src/pre_tokenizers/mod.rs

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* fix commit

* simplify err

* fmt

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
Arthur
2024-08-06 13:08:12 +02:00
committed by GitHub
parent 99a48dcb46
commit adc82cb49a
2 changed files with 77 additions and 0 deletions

View File

@ -144,4 +144,43 @@ mod tests {
PreTokenizerWrapper::WhitespaceSplit(WhitespaceSplit {})
);
}
#[test]
fn pre_tokenizer_deserialization_no_type() {
let json = r#"{"replacement":"▁","add_prefix_space":true, "prepend_scheme":"always"}}"#;
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
match reconstructed {
Err(err) => assert_eq!(
err.to_string(),
"data did not match any variant of untagged enum PreTokenizerWrapper"
),
_ => panic!("Expected an error here"),
}
let json = r#"{"type":"Metaspace", "replacement":"▁" }"#;
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
assert_eq!(
reconstructed.unwrap(),
PreTokenizerWrapper::Metaspace(Metaspace::default())
);
let json = r#"{"type":"Metaspace", "add_prefix_space":true }"#;
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
match reconstructed {
Err(err) => assert_eq!(
err.to_string(),
"data did not match any variant of untagged enum PreTokenizerWrapper"
),
_ => panic!("Expected an error here"),
}
let json = r#"{"behavior":"default_split"}"#;
let reconstructed = serde_json::from_str::<PreTokenizerWrapper>(json);
match reconstructed {
Err(err) => assert_eq!(
err.to_string(),
"data did not match any variant of untagged enum PreTokenizerWrapper"
),
_ => panic!("Expected an error here"),
}
}
}

View File

@ -87,4 +87,42 @@ mod tests {
PostProcessorWrapper::Bert(bert)
);
}
#[test]
fn post_processor_deserialization_no_type() {
let json = r#"{"add_prefix_space": true, "trim_offsets": false, "use_regex": false}"#;
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
match reconstructed {
Err(err) => assert_eq!(
err.to_string(),
"data did not match any variant of untagged enum PostProcessorWrapper"
),
_ => panic!("Expected an error here"),
}
let json = r#"{"sep":["[SEP]",102],"cls":["[CLS]",101]}"#;
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
assert!(matches!(
reconstructed.unwrap(),
PostProcessorWrapper::Bert(_)
));
let json =
r#"{"sep":["</s>",2], "cls":["<s>",0], "trim_offsets":true, "add_prefix_space":true}"#;
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
assert!(matches!(
reconstructed.unwrap(),
PostProcessorWrapper::Roberta(_)
));
let json = r#"{"type":"RobertaProcessing", "sep":["</s>",2] }"#;
let reconstructed = serde_json::from_str::<PostProcessorWrapper>(json);
match reconstructed {
Err(err) => assert_eq!(
err.to_string(),
"data did not match any variant of untagged enum PostProcessorWrapper"
),
_ => panic!("Expected an error here"),
}
}
}