Add from_bytes approach for creating tokenizers (#1024)

Signed-off-by: HaoboGu <haobogu@outlook.com>
This commit is contained in:
HaoboGu
2022-07-18 22:25:45 +08:00
committed by GitHub
parent adf90dcd72
commit 3564f24311

View File

@ -402,6 +402,10 @@ impl Tokenizer {
let tokenizer = serde_json::from_str(&content)?;
Ok(tokenizer)
}
pub fn from_bytes<P: AsRef<[u8]>>(bytes: P) -> Result<Self> {
let tokenizer = serde_json::from_slice(bytes.as_ref())?;
Ok(tokenizer)
}
#[cfg(feature = "http")]
pub fn from_pretrained<S: AsRef<str>>(
identifier: S,
@ -1133,6 +1137,21 @@ where
}
}
impl<M, N, PT, PP, D> TokenizerImpl<M, N, PT, PP, D>
where
M: DeserializeOwned + Model,
N: DeserializeOwned + Normalizer,
PT: DeserializeOwned + PreTokenizer,
PP: DeserializeOwned + PostProcessor,
D: DeserializeOwned + Decoder,
{
/// Instantiate a new Tokenizer from bytes
pub fn from_bytes<P: AsRef<[u8]>>(bytes: P) -> Result<Self> {
let tokenizer = serde_json::from_slice(bytes.as_ref())?;
Ok(tokenizer)
}
}
impl<M, N, PT, PP, D> TokenizerImpl<M, N, PT, PP, D>
where
M: DeserializeOwned + Model,