Remove Container, changes to PyDecoder, cloneable Tokenizer.

* derive Clone on Tokenizer and AddedVocabulary.
* Replace Container with Arc wrapper for Decoders.
* Prefix Rust Decoder types with Py.
* Rename PyDecoder to CustomDecoder.
* Change panic in serializing custom decoder to exception.
* Re-enable training with cloneable Tokenizer.
* Remove unsound Container, use Arc wrappers instead.
This commit is contained in:
Sebastian Pütz
2020-07-25 20:07:24 +02:00
committed by Anthony MOI
parent ece6ad9149
commit d62adf7195
6 changed files with 117 additions and 169 deletions

View File

@@ -10,7 +10,6 @@ mod processors;
mod token;
mod tokenizer;
mod trainers;
mod utils;
use pyo3::prelude::*;
use pyo3::wrap_pymodule;
@@ -71,11 +70,11 @@ fn pre_tokenizers(_py: Python, m: &PyModule) -> PyResult<()> {
/// Decoders Module
#[pymodule]
fn decoders(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<decoders::Decoder>()?;
m.add_class::<decoders::ByteLevel>()?;
m.add_class::<decoders::WordPiece>()?;
m.add_class::<decoders::Metaspace>()?;
m.add_class::<decoders::BPEDecoder>()?;
m.add_class::<decoders::PyDecoder>()?;
m.add_class::<decoders::PyByteLevelDec>()?;
m.add_class::<decoders::PyWordPieceDec>()?;
m.add_class::<decoders::PyMetaspaceDec>()?;
m.add_class::<decoders::PyBPEDecoder>()?;
Ok(())
}