mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Python - Add bindings for PostProcessor.process
This commit is contained in:
@ -4,6 +4,7 @@ use pyo3::exceptions;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::*;
|
||||
|
||||
use crate::encoding::PyEncoding;
|
||||
use crate::error::ToPyResult;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tk::processors::bert::BertProcessing;
|
||||
@ -89,6 +90,22 @@ impl PyPostProcessor {
|
||||
fn num_special_tokens_to_add(&self, is_pair: bool) -> usize {
|
||||
self.processor.added_tokens(is_pair)
|
||||
}
|
||||
|
||||
#[args(pair = "None", add_special_tokens = "true")]
|
||||
fn process(
|
||||
&self,
|
||||
encoding: &PyEncoding,
|
||||
pair: Option<&PyEncoding>,
|
||||
add_special_tokens: bool,
|
||||
) -> PyResult<PyEncoding> {
|
||||
let final_encoding = ToPyResult(self.processor.process(
|
||||
encoding.encoding.clone(),
|
||||
pair.map(|e| e.encoding.clone()),
|
||||
add_special_tokens,
|
||||
))
|
||||
.into_py()?;
|
||||
Ok(final_encoding.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name=BertProcessing)]
|
||||
|
Reference in New Issue
Block a user