mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
@ -1,11 +1,11 @@
|
|||||||
use crate::processors::byte_level::bytes_char;
|
use crate::processors::byte_level::bytes_char;
|
||||||
use crate::tokenizer::{NormalizedString, Normalizer, Result};
|
use crate::tokenizer::{NormalizedString, Normalizer, Result};
|
||||||
use serde::{Deserialize, Serialize};
|
use crate::utils::macro_rules_attribute;
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
#[derive(Clone, Debug)]
|
||||||
#[serde(tag = "type")]
|
#[macro_rules_attribute(impl_serde_type!)]
|
||||||
pub struct ByteLevel {}
|
pub struct ByteLevel;
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref BYTES_CHAR: HashMap<u8, char> = bytes_char();
|
static ref BYTES_CHAR: HashMap<u8, char> = bytes_char();
|
||||||
|
@ -73,3 +73,34 @@ impl_enum_from!(Precompiled, NormalizerWrapper, Precompiled);
|
|||||||
impl_enum_from!(Replace, NormalizerWrapper, Replace);
|
impl_enum_from!(Replace, NormalizerWrapper, Replace);
|
||||||
impl_enum_from!(Prepend, NormalizerWrapper, Prepend);
|
impl_enum_from!(Prepend, NormalizerWrapper, Prepend);
|
||||||
impl_enum_from!(ByteLevel, NormalizerWrapper, ByteLevel);
|
impl_enum_from!(ByteLevel, NormalizerWrapper, ByteLevel);
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
#[test]
|
||||||
|
fn post_processor_deserialization_no_type() {
|
||||||
|
let json = r#"{"strip_left":false, "strip_right":true}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
|
||||||
|
assert!(matches!(
|
||||||
|
reconstructed.unwrap(),
|
||||||
|
NormalizerWrapper::StripNormalizer(_)
|
||||||
|
));
|
||||||
|
|
||||||
|
let json = r#"{"trim_offsets":true, "add_prefix_space":true}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
|
||||||
|
match reconstructed {
|
||||||
|
Err(err) => assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"data did not match any variant of untagged enum NormalizerWrapper"
|
||||||
|
),
|
||||||
|
_ => panic!("Expected an error here"),
|
||||||
|
}
|
||||||
|
|
||||||
|
let json = r#"{"prepend":"a"}"#;
|
||||||
|
let reconstructed = serde_json::from_str::<NormalizerWrapper>(json);
|
||||||
|
assert!(matches!(
|
||||||
|
reconstructed.unwrap(),
|
||||||
|
NormalizerWrapper::Prepend(_)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user