Python - Bind new Encoding's mappings

This commit is contained in:
Anthony MOI
2020-03-14 16:56:29 -04:00
parent b699fe4eb1
commit 8de6ef5a37
2 changed files with 37 additions and 0 deletions

View File

@ -48,6 +48,11 @@ impl Encoding {
self.encoding.get_tokens().to_vec()
}
#[getter]
fn get_words(&self) -> Vec<u32> {
self.encoding.get_words().to_vec()
}
#[getter]
fn get_type_ids(&self) -> Vec<u32> {
self.encoding.get_type_ids().to_vec()
@ -78,6 +83,22 @@ impl Encoding {
.collect()
}
fn word_boundaries(&self, index: usize) -> Option<(usize, usize)> {
self.encoding.word_boundaries(index)
}
fn char_to_word(&self, pos: usize) -> Option<Offsets> {
self.encoding.char_to_word(pos)
}
fn char_to_token(&self, pos: usize) -> Option<Offsets> {
self.encoding.char_to_token(pos)
}
fn token_to_word(&self, index: usize) -> Option<Offsets> {
self.encoding.token_to_word(index)
}
#[args(kwargs = "**")]
fn pad(&mut self, length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
let mut pad_id = 0;