mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
89 lines
2.2 KiB
TOML
89 lines
2.2 KiB
TOML
[package]
|
|
authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
|
|
edition = "2018"
|
|
name = "tokenizers"
|
|
version = "0.15.2-dev.0"
|
|
homepage = "https://github.com/huggingface/tokenizers"
|
|
repository = "https://github.com/huggingface/tokenizers"
|
|
documentation = "https://docs.rs/tokenizers/"
|
|
license = "Apache-2.0"
|
|
keywords = ["tokenizer", "NLP", "huggingface", "BPE", "WordPiece"]
|
|
readme = "./README.md"
|
|
description = """
|
|
Provides an implementation of today's most used tokenizers,
|
|
with a focus on performances and versatility.
|
|
"""
|
|
exclude = [ "rust-toolchain", "target/*", "Cargo.lock", "benches/*.txt", "benches/*.json", "data/*" ]
|
|
|
|
[lib]
|
|
name = "tokenizers"
|
|
path = "src/lib.rs"
|
|
bench = false
|
|
|
|
[[bin]]
|
|
name = "cli"
|
|
path = "src/cli.rs"
|
|
bench = false
|
|
required-features = ["cli"]
|
|
|
|
[[bench]]
|
|
name = "bpe_benchmark"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "bert_benchmark"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "layout_benchmark"
|
|
harness = false
|
|
|
|
[[bench]]
|
|
name = "unigram_benchmark"
|
|
harness = false
|
|
|
|
[dependencies]
|
|
lazy_static = "1.4"
|
|
rand = "0.8"
|
|
onig = { version = "6.4", default-features = false, optional = true }
|
|
regex = "1.9"
|
|
regex-syntax = "0.7"
|
|
rayon = "1.8"
|
|
rayon-cond = "0.3"
|
|
serde = { version = "1.0", features = [ "derive" ] }
|
|
serde_json = "1.0"
|
|
clap = { version = "4.4", features=["derive"], optional = true }
|
|
unicode-normalization-alignments = "0.1"
|
|
unicode_categories = "0.1"
|
|
unicode-segmentation = "1.10"
|
|
indicatif = {version = "0.17", optional = true}
|
|
itertools = "0.11"
|
|
log = "0.4"
|
|
derive_builder = "0.12"
|
|
spm_precompiled = "0.1"
|
|
hf-hub = { version = "0.3.2", optional = true }
|
|
aho-corasick = "1.1"
|
|
paste = "1.0.14"
|
|
macro_rules_attribute = "0.2.0"
|
|
thiserror = "1.0.49"
|
|
fancy-regex = { version = "0.11", optional = true}
|
|
getrandom = { version = "0.2.10" }
|
|
esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
|
|
monostate = "0.1.9"
|
|
|
|
[features]
|
|
default = ["progressbar", "cli", "onig", "esaxx_fast"]
|
|
esaxx_fast = ["esaxx-rs/cpp"]
|
|
progressbar = ["indicatif"]
|
|
http = ["hf-hub"]
|
|
cli = ["clap"]
|
|
unstable_wasm = ["fancy-regex", "getrandom/js"]
|
|
|
|
[dev-dependencies]
|
|
criterion = "0.5"
|
|
tempfile = "3.8"
|
|
assert_approx_eq = "1.1"
|
|
|
|
[profile.release]
|
|
lto = "fat"
|