mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-06 12:48:18 +00:00
Python - Bindings for pad_to_multiple_of
This commit is contained in:
@@ -337,6 +337,7 @@ impl Tokenizer {
|
||||
#[args(kwargs = "**")]
|
||||
fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> {
|
||||
let mut direction = PaddingDirection::Right;
|
||||
let mut pad_to_multiple_of: Option<usize> = None;
|
||||
let mut pad_id: u32 = 0;
|
||||
let mut pad_type_id: u32 = 0;
|
||||
let mut pad_token = String::from("[PAD]");
|
||||
@@ -359,6 +360,7 @@ impl Tokenizer {
|
||||
.into_pyerr()),
|
||||
}?;
|
||||
}
|
||||
"pad_to_multiple_of" => pad_to_multiple_of = value.extract()?,
|
||||
"pad_id" => pad_id = value.extract()?,
|
||||
"pad_type_id" => pad_type_id = value.extract()?,
|
||||
"pad_token" => pad_token = value.extract()?,
|
||||
@@ -377,6 +379,7 @@ impl Tokenizer {
|
||||
self.tokenizer.with_padding(Some(PaddingParams {
|
||||
strategy,
|
||||
direction,
|
||||
pad_to_multiple_of,
|
||||
pad_id,
|
||||
pad_type_id,
|
||||
pad_token: pad_token.to_owned(),
|
||||
|
||||
@@ -395,6 +395,7 @@ class Tokenizer:
|
||||
def enable_padding(
|
||||
self,
|
||||
direction: Optional[str] = "right",
|
||||
pad_to_multiple_of: Optional[int] = None,
|
||||
pad_id: Optional[int] = 0,
|
||||
pad_type_id: Optional[int] = 0,
|
||||
pad_token: Optional[str] = "[PAD]",
|
||||
@@ -406,6 +407,11 @@ class Tokenizer:
|
||||
direction: (`optional`) str:
|
||||
Can be one of: `right` or `left`
|
||||
|
||||
pad_to_multiple_of: (`optional`) unsigned int:
|
||||
If specified, the padding length should always snap to the next multiple of
|
||||
the given value. For example if we were going to pad with a length of 250 but
|
||||
`pad_to_multiple_of=8` then we will pad to 256.
|
||||
|
||||
pad_id: (`optional`) unsigned int:
|
||||
The indice to be used when padding
|
||||
|
||||
|
||||
@@ -52,6 +52,7 @@ class BaseTokenizer:
|
||||
def enable_padding(
|
||||
self,
|
||||
direction: Optional[str] = "right",
|
||||
pad_to_multiple_of: Optional[int] = None,
|
||||
pad_id: Optional[int] = 0,
|
||||
pad_type_id: Optional[int] = 0,
|
||||
pad_token: Optional[str] = "[PAD]",
|
||||
@@ -63,6 +64,11 @@ class BaseTokenizer:
|
||||
direction: (`optional`) str:
|
||||
Can be one of: `right` or `left`
|
||||
|
||||
pad_to_multiple_of: (`optional`) unsigned int:
|
||||
If specified, the padding length should always snap to the next multiple of
|
||||
the given value. For example if we were going to pad with a length of 250 but
|
||||
`pad_to_multiple_of=8` then we will pad to 256.
|
||||
|
||||
pad_id: (`optional`) unsigned int:
|
||||
The indice to be used when padding
|
||||
|
||||
|
||||
Reference in New Issue
Block a user