mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Python - Add some missing Encoding bindings
This commit is contained in:
@ -56,4 +56,19 @@ impl Encoding {
|
|||||||
fn get_offsets(&self) -> Vec<(usize, usize)> {
|
fn get_offsets(&self) -> Vec<(usize, usize)> {
|
||||||
self.encoding.get_offsets().to_vec()
|
self.encoding.get_offsets().to_vec()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn get_special_tokens_mask(&self) -> Vec<u32> {
|
||||||
|
self.encoding.get_special_tokens_mask().to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn get_attention_mask(&self) -> Vec<u32> {
|
||||||
|
self.encoding.get_attention_mask().to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn get_overflowing(&self) -> Option<Encoding> {
|
||||||
|
self.encoding.get_overflowing().cloned().map(Encoding::new)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/// The Encoding struct represents the output of the Tokenizer
|
/// The Encoding struct represents the output of the Tokenizer
|
||||||
#[derive(Default, PartialEq, Debug)]
|
#[derive(Default, PartialEq, Debug, Clone)]
|
||||||
pub struct Encoding {
|
pub struct Encoding {
|
||||||
original: String,
|
original: String,
|
||||||
normalized: String,
|
normalized: String,
|
||||||
@ -69,6 +69,10 @@ impl Encoding {
|
|||||||
&self.attention_mask
|
&self.attention_mask
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_overflowing(&self) -> Option<&Encoding> {
|
||||||
|
self.overflowing.as_ref().map(|b| &**b)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn take_overflowing(&mut self) -> Option<Box<Encoding>> {
|
pub fn take_overflowing(&mut self) -> Option<Box<Encoding>> {
|
||||||
self.overflowing.take()
|
self.overflowing.take()
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user