Fixing a bug where long tokenizer files would be incorrectly deserialized (#459)

* Fixing a bug where long tokenizer files would be incorrectly deserialized - Add a bunch of tests to check deserialization behaviour - One tests also confirms current Single deserialization of Sequence. * Better test locations for Windows + no file dependency in Python binding Rust side. * Adressing @n1t0 comments.
2025-08-23 00:35:35 +00:00 · 2020-10-13 18:44:24 +02:00
parent b3c016cf9c
commit 88556790e7
8 changed files with 72 additions and 11 deletions
--- a/bindings/python/src/normalizers.rs
+++ b/bindings/python/src/normalizers.rs
@ -483,4 +483,35 @@ mod test {
        let rs_ser = serde_json::to_string(&rs_seq).unwrap();
        assert_eq!(py_wrapper_ser, rs_ser);
    }
+
+    #[test]
+    fn deserialize_sequence() {
+        let string = r#"{"type": "NFKC"}"#;
+        let normalizer: PyNormalizer = serde_json::from_str(&string).unwrap();
+        match normalizer.normalizer {
+            PyNormalizerTypeWrapper::Single(inner) => match inner.as_ref() {
+                PyNormalizerWrapper::Wrapped(NormalizerWrapper::NFKC(_)) => {}
+                _ => panic!("Expected NFKC"),
+            },
+            _ => panic!("Expected wrapped, not sequence."),
+        }
+
+        let sequence_string = format!(r#"{{"type": "Sequence", "normalizers": [{}]}}"#, string);
+        let normalizer: PyNormalizer = serde_json::from_str(&sequence_string).unwrap();
+
+        match normalizer.normalizer {
+            PyNormalizerTypeWrapper::Single(inner) => match inner.as_ref() {
+                PyNormalizerWrapper::Wrapped(NormalizerWrapper::Sequence(sequence)) => {
+                    let normalizers = sequence.get_normalizers();
+                    assert_eq!(normalizers.len(), 1);
+                    match normalizers[0] {
+                        NormalizerWrapper::NFKC(_) => {}
+                        _ => panic!("Expected NFKC"),
+                    }
+                }
+                _ => panic!("Expected sequence"),
+            },
+            _ => panic!("Expected single"),
+        }
+    }
 }