Python - Add ByteLevel PostProcessor

This commit is contained in:
Anthony MOI
2020-03-05 17:20:33 -05:00
parent 8dcbc8377e
commit 52180a9179
4 changed files with 28 additions and 0 deletions

View File

@@ -62,6 +62,7 @@ fn processors(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<processors::PostProcessor>()?; m.add_class::<processors::PostProcessor>()?;
m.add_class::<processors::BertProcessing>()?; m.add_class::<processors::BertProcessing>()?;
m.add_class::<processors::RobertaProcessing>()?; m.add_class::<processors::RobertaProcessing>()?;
m.add_class::<processors::ByteLevel>()?;
Ok(()) Ok(())
} }

View File

@@ -42,3 +42,17 @@ impl RobertaProcessing {
})) }))
} }
} }
#[pyclass(extends=PostProcessor)]
pub struct ByteLevel {}
#[pymethods]
impl ByteLevel {
#[new]
fn new(obj: &PyRawObject) -> PyResult<()> {
Ok(obj.init(PostProcessor {
processor: Container::Owned(Box::new(tk::processors::byte_level::ByteLevel::new(
false,
))),
}))
}
}

View File

@@ -3,3 +3,4 @@ from .. import processors
PostProcessor = processors.PostProcessor PostProcessor = processors.PostProcessor
BertProcessing = processors.BertProcessing BertProcessing = processors.BertProcessing
RobertaProcessing = processors.RobertaProcessing RobertaProcessing = processors.RobertaProcessing
ByteLevel = processors.ByteLevel

View File

@@ -62,3 +62,15 @@ class RobertaProcessing(PostProcessor):
PostProcessor PostProcessor
""" """
pass pass
class ByteLevel(PostProcessor):
""" ByteLevel Post processing
This post-processor takes care of fixing the offsets after the BPE Model may have
produced some bad offsets while merging. This happens for any unicode character that
get split up in many byte-level characters.
"""
def __init(self) -> None:
""" Instantiate a new ByteLevel """
pass