mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 16:49:27 +00:00
Python - Add some missing Encoding bindings
This commit is contained in:
@ -56,4 +56,19 @@ impl Encoding {
|
||||
fn get_offsets(&self) -> Vec<(usize, usize)> {
|
||||
self.encoding.get_offsets().to_vec()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_special_tokens_mask(&self) -> Vec<u32> {
|
||||
self.encoding.get_special_tokens_mask().to_vec()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_attention_mask(&self) -> Vec<u32> {
|
||||
self.encoding.get_attention_mask().to_vec()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn get_overflowing(&self) -> Option<Encoding> {
|
||||
self.encoding.get_overflowing().cloned().map(Encoding::new)
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/// The Encoding struct represents the output of the Tokenizer
|
||||
#[derive(Default, PartialEq, Debug)]
|
||||
#[derive(Default, PartialEq, Debug, Clone)]
|
||||
pub struct Encoding {
|
||||
original: String,
|
||||
normalized: String,
|
||||
@ -69,6 +69,10 @@ impl Encoding {
|
||||
&self.attention_mask
|
||||
}
|
||||
|
||||
pub fn get_overflowing(&self) -> Option<&Encoding> {
|
||||
self.overflowing.as_ref().map(|b| &**b)
|
||||
}
|
||||
|
||||
pub fn take_overflowing(&mut self) -> Option<Box<Encoding>> {
|
||||
self.overflowing.take()
|
||||
}
|
||||
|
Reference in New Issue
Block a user