Python - Bindings for pad_to_multiple_of

This commit is contained in:
Anthony MOI
2020-05-29 20:31:59 -04:00
parent 7f7a2059a7
commit 0934fe5803
3 changed files with 15 additions and 0 deletions

View File

@@ -337,6 +337,7 @@ impl Tokenizer {
#[args(kwargs = "**")]
fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> {
let mut direction = PaddingDirection::Right;
let mut pad_to_multiple_of: Option<usize> = None;
let mut pad_id: u32 = 0;
let mut pad_type_id: u32 = 0;
let mut pad_token = String::from("[PAD]");
@@ -359,6 +360,7 @@ impl Tokenizer {
.into_pyerr()),
}?;
}
"pad_to_multiple_of" => pad_to_multiple_of = value.extract()?,
"pad_id" => pad_id = value.extract()?,
"pad_type_id" => pad_type_id = value.extract()?,
"pad_token" => pad_token = value.extract()?,
@@ -377,6 +379,7 @@ impl Tokenizer {
self.tokenizer.with_padding(Some(PaddingParams {
strategy,
direction,
pad_to_multiple_of,
pad_id,
pad_type_id,
pad_token: pad_token.to_owned(),

View File

@@ -395,6 +395,7 @@ class Tokenizer:
def enable_padding(
self,
direction: Optional[str] = "right",
pad_to_multiple_of: Optional[int] = None,
pad_id: Optional[int] = 0,
pad_type_id: Optional[int] = 0,
pad_token: Optional[str] = "[PAD]",
@@ -406,6 +407,11 @@ class Tokenizer:
direction: (`optional`) str:
Can be one of: `right` or `left`
pad_to_multiple_of: (`optional`) unsigned int:
If specified, the padding length should always snap to the next multiple of
the given value. For example if we were going to pad with a length of 250 but
`pad_to_multiple_of=8` then we will pad to 256.
pad_id: (`optional`) unsigned int:
The indice to be used when padding

View File

@@ -52,6 +52,7 @@ class BaseTokenizer:
def enable_padding(
self,
direction: Optional[str] = "right",
pad_to_multiple_of: Optional[int] = None,
pad_id: Optional[int] = 0,
pad_type_id: Optional[int] = 0,
pad_token: Optional[str] = "[PAD]",
@@ -63,6 +64,11 @@ class BaseTokenizer:
direction: (`optional`) str:
Can be one of: `right` or `left`
pad_to_multiple_of: (`optional`) unsigned int:
If specified, the padding length should always snap to the next multiple of
the given value. For example if we were going to pad with a length of 250 but
`pad_to_multiple_of=8` then we will pad to 256.
pad_id: (`optional`) unsigned int:
The indice to be used when padding