mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
* current updates * simplify * set_item works, but `tokenizer._tokenizer.post_processor[1].single = ["$0", "</s>"]` does not ! * fix: `normalizers` deserialization and other refactoring * fix: `pre_tokenizer` deserialization * feat: add `__len__` implementation for `normalizer::PySequence` * feat: add `__setitem__` impl for `normalizers::PySequence` * feat: add `__setitem__` impl to `pre_tokenizer::PySequence` * feat: add `__setitem__` impl to `post_processor::PySequence` * test: add normalizer sequence setter check * refactor: allow unused `processors::setter` macro * test: add `__setitem__` test for processors & pretok * refactor: `unwrap` -> `PyException::new_err()?` * refactor: fmt * refactor: remove unnecessary `pub` * feat(bindings): add missing getters & setters for pretoks * feat(bindings): add missing getters & setters for processors * refactor(bindings): rewrite RwLock poison error msg * refactor: remove debug print * feat(bindings): add description as to why custom deser is needed * feat: make post proc sequence elements mutable * fix(binding): serialization --------- Co-authored-by: Luc Georges <luc.sydney.georges@gmail.com>
31 lines
645 B
TOML
31 lines
645 B
TOML
[package]
|
|
name = "tokenizers-python"
|
|
version = "0.21.0-dev.0"
|
|
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
|
edition = "2021"
|
|
|
|
[lib]
|
|
name = "tokenizers"
|
|
crate-type = ["cdylib"]
|
|
|
|
[dependencies]
|
|
rayon = "1.10"
|
|
serde = { version = "1.0", features = ["rc", "derive"] }
|
|
serde_json = "1.0"
|
|
libc = "0.2"
|
|
env_logger = "0.11"
|
|
pyo3 = { version = "0.23", features = ["abi3", "abi3-py39", "py-clone"] }
|
|
numpy = "0.23"
|
|
ndarray = "0.16"
|
|
itertools = "0.12"
|
|
|
|
[dependencies.tokenizers]
|
|
path = "../../tokenizers"
|
|
|
|
[dev-dependencies]
|
|
tempfile = "3.10"
|
|
pyo3 = { version = "0.23", features = ["auto-initialize"] }
|
|
|
|
[features]
|
|
defaut = ["pyo3/extension-module"]
|