Add test normalizers (#1600)

* update

* update test they passs

* fmt
This commit is contained in:
Arthur
2024-08-06 16:08:18 +02:00
committed by GitHub
parent fe41687ca8
commit 8f2cc90249
2 changed files with 35 additions and 4 deletions

View File

@ -1,11 +1,11 @@
use crate::processors::byte_level::bytes_char;
use crate::tokenizer::{NormalizedString, Normalizer, Result};
use serde::{Deserialize, Serialize};
use crate::utils::macro_rules_attribute;
use std::collections::{HashMap, HashSet};
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(tag = "type")]
pub struct ByteLevel {}
#[derive(Clone, Debug)]
#[macro_rules_attribute(impl_serde_type!)]
pub struct ByteLevel;
lazy_static! {
static ref BYTES_CHAR: HashMap<u8, char> = bytes_char();

View File

@ -73,3 +73,34 @@ impl_enum_from!(Precompiled, NormalizerWrapper, Precompiled);
impl_enum_from!(Replace, NormalizerWrapper, Replace);
impl_enum_from!(Prepend, NormalizerWrapper, Prepend);
impl_enum_from!(ByteLevel, NormalizerWrapper, ByteLevel);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn post_processor_deserialization_no_type() {
let json = r#"{"strip_left":false, "strip_right":true}"#;
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
assert!(matches!(
reconstructed.unwrap(),
NormalizerWrapper::StripNormalizer(_)
));
let json = r#"{"trim_offsets":true, "add_prefix_space":true}"#;
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
match reconstructed {
Err(err) => assert_eq!(
err.to_string(),
"data did not match any variant of untagged enum NormalizerWrapper"
),
_ => panic!("Expected an error here"),
}
let json = r#"{"prepend":"a"}"#;
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
assert!(matches!(
reconstructed.unwrap(),
NormalizerWrapper::Prepend(_)
));
}
}