mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-06 04:38:23 +00:00
Roberta PostProcessor (#111)
* Added RobertaProcessor on Rust side. Required to match the double separator token in the middle of pairs. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Fix typo in RobertaProcessing method declaration Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Correctly include RobertProcessor in the Python binding Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Roberta doesnt use token_type_ids so let's set everything to 0 Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Attempt to make it works on Node side too. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * fix js bindings / `npm run lint` * Make RustFmt happy. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> Co-authored-by: Pierric Cistac <Pierrci@users.noreply.github.com>
This commit is contained in:
@@ -59,6 +59,7 @@ fn decoders(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
fn processors(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_class::<processors::PostProcessor>()?;
|
||||
m.add_class::<processors::BertProcessing>()?;
|
||||
m.add_class::<processors::RobertaProcessing>()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user