mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Add some new merging capability on Encoding
This commit is contained in:
@ -38,6 +38,18 @@ impl PySequenceProtocol for Encoding {
|
||||
|
||||
#[pymethods]
|
||||
impl Encoding {
|
||||
#[staticmethod]
|
||||
fn merge(encodings: Vec<&Encoding>, growing_offsets: bool) -> Encoding {
|
||||
Encoding::new(tk::tokenizer::Encoding::merge(
|
||||
encodings
|
||||
.into_iter()
|
||||
.map(|e| e.encoding.clone())
|
||||
.collect::<Vec<_>>()
|
||||
.as_slice(),
|
||||
growing_offsets,
|
||||
))
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_ids(&self) -> Vec<u32> {
|
||||
self.encoding.get_ids().to_vec()
|
||||
|
Reference in New Issue
Block a user