mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Remove Tokenizer::normalize
This is actually a legacy function that doesn't really make sense now, and is getting really difficult to keep. So we remove it.
This commit is contained in:
@ -4,6 +4,7 @@ use pyo3::exceptions;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::*;
|
||||
|
||||
use crate::error::ToPyResult;
|
||||
use serde::ser::SerializeStruct;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use tk::normalizers::{BertNormalizer, Lowercase, NormalizerWrapper, Strip, NFC, NFD, NFKC, NFKD};
|
||||
@ -81,6 +82,12 @@ impl PyNormalizer {
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_str(&self, sequence: &str) -> PyResult<String> {
|
||||
let mut normalized = NormalizedString::from(sequence);
|
||||
ToPyResult(self.normalizer.normalize(&mut normalized)).into_py()?;
|
||||
Ok(normalized.get().to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=BertNormalizer)]
|
||||
|
Reference in New Issue
Block a user