Add some new merging capability on Encoding

This commit is contained in:
Anthony MOI
2020-03-25 16:25:18 -04:00
parent f79ae40d88
commit 9ce895550b
4 changed files with 80 additions and 0 deletions

View File

@ -38,6 +38,18 @@ impl PySequenceProtocol for Encoding {
#[pymethods]
impl Encoding {
#[staticmethod]
fn merge(encodings: Vec<&Encoding>, growing_offsets: bool) -> Encoding {
Encoding::new(tk::tokenizer::Encoding::merge(
encodings
.into_iter()
.map(|e| e.encoding.clone())
.collect::<Vec<_>>()
.as_slice(),
growing_offsets,
))
}
#[getter]
fn get_ids(&self) -> Vec<u32> {
self.encoding.get_ids().to_vec()