mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-23 00:35:35 +00:00
Add Strip normalizer (#140)
* WIP strip. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Rust StripNormalizer Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Allow to specify strip direction Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Renamed StripNormalizer to Strip Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Added Python binding. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Makes Strip python compatible with pythonic constructor. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Run RustFmt Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Clippy next ofc. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Move lstrip and rstrip on NormalizedString Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * implment strip() for normalizer + unittests. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Add some more unittests on edge cases. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * clippy and fmt. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Simplify strip and fix offsets * Python - Update strip bindings with default values Co-authored-by: MOI Anthony <xn1t0x@gmail.com>
This commit is contained in:
@ -133,3 +133,29 @@ impl Lowercase {
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
pub struct Strip {}
|
||||
#[pymethods]
|
||||
impl Strip {
|
||||
#[new]
|
||||
#[args(kwargs = "**")]
|
||||
fn new(obj: &PyRawObject, kwargs: Option<&PyDict>) -> PyResult<()> {
|
||||
let mut left = true;
|
||||
let mut right = true;
|
||||
|
||||
if let Some(kwargs) = kwargs {
|
||||
if let Some(l) = kwargs.get_item("left") {
|
||||
left = l.extract()?;
|
||||
}
|
||||
if let Some(r) = kwargs.get_item("right") {
|
||||
right = r.extract()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(obj.init(Normalizer {
|
||||
normalizer: Container::Owned(Box::new(tk::normalizers::strip::Strip::new(left, right))),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user