mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-06 20:58:22 +00:00
Python - Bindings for pad_to_multiple_of
This commit is contained in:
@@ -337,6 +337,7 @@ impl Tokenizer {
|
|||||||
#[args(kwargs = "**")]
|
#[args(kwargs = "**")]
|
||||||
fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> {
|
fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> {
|
||||||
let mut direction = PaddingDirection::Right;
|
let mut direction = PaddingDirection::Right;
|
||||||
|
let mut pad_to_multiple_of: Option<usize> = None;
|
||||||
let mut pad_id: u32 = 0;
|
let mut pad_id: u32 = 0;
|
||||||
let mut pad_type_id: u32 = 0;
|
let mut pad_type_id: u32 = 0;
|
||||||
let mut pad_token = String::from("[PAD]");
|
let mut pad_token = String::from("[PAD]");
|
||||||
@@ -359,6 +360,7 @@ impl Tokenizer {
|
|||||||
.into_pyerr()),
|
.into_pyerr()),
|
||||||
}?;
|
}?;
|
||||||
}
|
}
|
||||||
|
"pad_to_multiple_of" => pad_to_multiple_of = value.extract()?,
|
||||||
"pad_id" => pad_id = value.extract()?,
|
"pad_id" => pad_id = value.extract()?,
|
||||||
"pad_type_id" => pad_type_id = value.extract()?,
|
"pad_type_id" => pad_type_id = value.extract()?,
|
||||||
"pad_token" => pad_token = value.extract()?,
|
"pad_token" => pad_token = value.extract()?,
|
||||||
@@ -377,6 +379,7 @@ impl Tokenizer {
|
|||||||
self.tokenizer.with_padding(Some(PaddingParams {
|
self.tokenizer.with_padding(Some(PaddingParams {
|
||||||
strategy,
|
strategy,
|
||||||
direction,
|
direction,
|
||||||
|
pad_to_multiple_of,
|
||||||
pad_id,
|
pad_id,
|
||||||
pad_type_id,
|
pad_type_id,
|
||||||
pad_token: pad_token.to_owned(),
|
pad_token: pad_token.to_owned(),
|
||||||
|
|||||||
@@ -395,6 +395,7 @@ class Tokenizer:
|
|||||||
def enable_padding(
|
def enable_padding(
|
||||||
self,
|
self,
|
||||||
direction: Optional[str] = "right",
|
direction: Optional[str] = "right",
|
||||||
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
pad_id: Optional[int] = 0,
|
pad_id: Optional[int] = 0,
|
||||||
pad_type_id: Optional[int] = 0,
|
pad_type_id: Optional[int] = 0,
|
||||||
pad_token: Optional[str] = "[PAD]",
|
pad_token: Optional[str] = "[PAD]",
|
||||||
@@ -406,6 +407,11 @@ class Tokenizer:
|
|||||||
direction: (`optional`) str:
|
direction: (`optional`) str:
|
||||||
Can be one of: `right` or `left`
|
Can be one of: `right` or `left`
|
||||||
|
|
||||||
|
pad_to_multiple_of: (`optional`) unsigned int:
|
||||||
|
If specified, the padding length should always snap to the next multiple of
|
||||||
|
the given value. For example if we were going to pad with a length of 250 but
|
||||||
|
`pad_to_multiple_of=8` then we will pad to 256.
|
||||||
|
|
||||||
pad_id: (`optional`) unsigned int:
|
pad_id: (`optional`) unsigned int:
|
||||||
The indice to be used when padding
|
The indice to be used when padding
|
||||||
|
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ class BaseTokenizer:
|
|||||||
def enable_padding(
|
def enable_padding(
|
||||||
self,
|
self,
|
||||||
direction: Optional[str] = "right",
|
direction: Optional[str] = "right",
|
||||||
|
pad_to_multiple_of: Optional[int] = None,
|
||||||
pad_id: Optional[int] = 0,
|
pad_id: Optional[int] = 0,
|
||||||
pad_type_id: Optional[int] = 0,
|
pad_type_id: Optional[int] = 0,
|
||||||
pad_token: Optional[str] = "[PAD]",
|
pad_token: Optional[str] = "[PAD]",
|
||||||
@@ -63,6 +64,11 @@ class BaseTokenizer:
|
|||||||
direction: (`optional`) str:
|
direction: (`optional`) str:
|
||||||
Can be one of: `right` or `left`
|
Can be one of: `right` or `left`
|
||||||
|
|
||||||
|
pad_to_multiple_of: (`optional`) unsigned int:
|
||||||
|
If specified, the padding length should always snap to the next multiple of
|
||||||
|
the given value. For example if we were going to pad with a length of 250 but
|
||||||
|
`pad_to_multiple_of=8` then we will pad to 256.
|
||||||
|
|
||||||
pad_id: (`optional`) unsigned int:
|
pad_id: (`optional`) unsigned int:
|
||||||
The indice to be used when padding
|
The indice to be used when padding
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user