mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
@ -1,11 +1,11 @@
|
||||
use crate::processors::byte_level::bytes_char;
|
||||
use crate::tokenizer::{NormalizedString, Normalizer, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::utils::macro_rules_attribute;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub struct ByteLevel {}
|
||||
#[derive(Clone, Debug)]
|
||||
#[macro_rules_attribute(impl_serde_type!)]
|
||||
pub struct ByteLevel;
|
||||
|
||||
lazy_static! {
|
||||
static ref BYTES_CHAR: HashMap<u8, char> = bytes_char();
|
||||
|
@ -73,3 +73,34 @@ impl_enum_from!(Precompiled, NormalizerWrapper, Precompiled);
|
||||
impl_enum_from!(Replace, NormalizerWrapper, Replace);
|
||||
impl_enum_from!(Prepend, NormalizerWrapper, Prepend);
|
||||
impl_enum_from!(ByteLevel, NormalizerWrapper, ByteLevel);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn post_processor_deserialization_no_type() {
|
||||
let json = r#"{"strip_left":false, "strip_right":true}"#;
|
||||
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
|
||||
assert!(matches!(
|
||||
reconstructed.unwrap(),
|
||||
NormalizerWrapper::StripNormalizer(_)
|
||||
));
|
||||
|
||||
let json = r#"{"trim_offsets":true, "add_prefix_space":true}"#;
|
||||
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
|
||||
match reconstructed {
|
||||
Err(err) => assert_eq!(
|
||||
err.to_string(),
|
||||
"data did not match any variant of untagged enum NormalizerWrapper"
|
||||
),
|
||||
_ => panic!("Expected an error here"),
|
||||
}
|
||||
|
||||
let json = r#"{"prepend":"a"}"#;
|
||||
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
|
||||
assert!(matches!(
|
||||
reconstructed.unwrap(),
|
||||
NormalizerWrapper::Prepend(_)
|
||||
));
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user