mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-03 19:28:20 +00:00
Rust - Remove str_rep from Metaspace serialization
This commit is contained in:
@@ -2,14 +2,29 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use crate::tokenizer::{Decoder, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};
|
use crate::tokenizer::{Decoder, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||||
/// Replaces all the whitespaces by the provided meta character and then
|
/// Replaces all the whitespaces by the provided meta character and then
|
||||||
/// splits on this character
|
/// splits on this character
|
||||||
#[serde(tag = "type")]
|
#[serde(tag = "type", from = "MetaspaceDeserializer")]
|
||||||
pub struct Metaspace {
|
pub struct Metaspace {
|
||||||
replacement: char,
|
replacement: char,
|
||||||
str_rep: String,
|
|
||||||
pub add_prefix_space: bool,
|
pub add_prefix_space: bool,
|
||||||
|
#[serde(skip)]
|
||||||
|
str_rep: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[doc(hidden)]
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(tag = "type")]
|
||||||
|
pub struct MetaspaceDeserializer {
|
||||||
|
replacement: char,
|
||||||
|
add_prefix_space: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<MetaspaceDeserializer> for Metaspace {
|
||||||
|
fn from(v: MetaspaceDeserializer) -> Metaspace {
|
||||||
|
Metaspace::new(v.replacement, v.add_prefix_space)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Metaspace {
|
impl Metaspace {
|
||||||
@@ -76,6 +91,17 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::{OffsetReferential, OffsetType};
|
use crate::{OffsetReferential, OffsetType};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialization() {
|
||||||
|
let metaspace = Metaspace::new('_', true);
|
||||||
|
let metaspace_s = r#"{"type":"Metaspace","replacement":"_","add_prefix_space":true}"#;
|
||||||
|
assert_eq!(serde_json::to_string(&metaspace).unwrap(), metaspace_s);
|
||||||
|
assert_eq!(
|
||||||
|
serde_json::from_str::<Metaspace>(metaspace_s).unwrap(),
|
||||||
|
metaspace
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn basic() {
|
fn basic() {
|
||||||
let pretok = Metaspace::new('▁', true);
|
let pretok = Metaspace::new('▁', true);
|
||||||
|
|||||||
Reference in New Issue
Block a user