Remove Tokenizer::normalize

This is actually a legacy function that doesn't really make sense now, and is getting really difficult to keep. So we remove it.
This commit is contained in:
Anthony MOI
2020-08-18 17:52:25 -04:00
committed by Anthony MOI
parent 18e3799b1d
commit 504d8c85d8
8 changed files with 25 additions and 104 deletions

View File

@ -4,6 +4,7 @@ use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::*;
use crate::error::ToPyResult;
use serde::ser::SerializeStruct;
use serde::{Deserialize, Serialize, Serializer};
use tk::normalizers::{BertNormalizer, Lowercase, NormalizerWrapper, Strip, NFC, NFD, NFKC, NFKD};
@ -81,6 +82,12 @@ impl PyNormalizer {
Err(e) => Err(e),
}
}
fn normalize_str(&self, sequence: &str) -> PyResult<String> {
let mut normalized = NormalizedString::from(sequence);
ToPyResult(self.normalizer.normalize(&mut normalized)).into_py()?;
Ok(normalized.get().to_owned())
}
}
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=BertNormalizer)]