mirror of
https://github.com/mii443/tokenizers.git
synced 2025-12-06 04:38:23 +00:00
Char based delimiter splitting - TransfoXL (#114)
* WIP delimiter splitter Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Bind on Python side. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Add missing delimiter parameter in CharDelimiterSplit constructor. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Attempt to provide CharDelimiterSplit for node. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * Apply Rust formatting. Signed-off-by: Morgan Funtowicz <morgan@huggingface.co> * fix bindings node Co-authored-by: Pierric Cistac <Pierrci@users.noreply.github.com>
This commit is contained in:
@@ -40,6 +40,7 @@ fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_class::<pre_tokenizers::WhitespaceSplit>()?;
|
||||
m.add_class::<pre_tokenizers::BertPreTokenizer>()?;
|
||||
m.add_class::<pre_tokenizers::Metaspace>()?;
|
||||
m.add_class::<pre_tokenizers::CharDelimiterSplit>()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user