mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Python bindings backbone
This commit is contained in:
323
bindings/python/Cargo.lock
generated
Normal file
323
bindings/python/Cargo.lock
generated
Normal file
@ -0,0 +1,323 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "ctor"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ghost"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indoc"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"indoc-impl 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indoc-impl"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unindent 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inventory"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ctor 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ghost 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"inventory-impl 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inventory-impl"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"paste-impl 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste-impl"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"indoc 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"inventory 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"paste 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pyo3cls 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unindent 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-derive-backend"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3cls"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pyo3-derive-backend 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"serde_derive 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.102"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokenizers"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"pyo3 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokenizers-lib 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokenizers-lib"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unindent"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
|
||||
"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
|
||||
"checksum ctor 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd8ce37ad4184ab2ce004c33bf6379185d3b1c95801cab51026bd271bf68eedc"
|
||||
"checksum ghost 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2a36606a68532b5640dc86bb1f33c64b45c4682aad4c50f3937b317ea387f3d6"
|
||||
"checksum indoc 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3f9553c1e16c114b8b77ebeb329e5f2876eed62a8d51178c8bc6bff0d65f98f8"
|
||||
"checksum indoc-impl 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b714fc08d0961716390977cdff1536234415ac37b509e34e5a983def8340fb75"
|
||||
"checksum inventory 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f4cece20baea71d9f3435e7bbe9adf4765f091c5fe404975f844006964a71299"
|
||||
"checksum inventory-impl 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2869bf972e998977b1cb87e60df70341d48e48dca0823f534feb91ea44adaf9"
|
||||
"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
|
||||
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
"checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8"
|
||||
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
|
||||
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
|
||||
"checksum paste 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "423a519e1c6e828f1e73b720f9d9ed2fa643dce8a7737fb43235ce0b41eeaa49"
|
||||
"checksum paste-impl 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "4214c9e912ef61bf42b81ba9a47e8aad1b2ffaf739ab162bf96d1e011f54e6c5"
|
||||
"checksum proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)" = "ecd45702f76d6d3c75a80564378ae228a85f0b59d2f3ed43c91b4a69eb2ebfc5"
|
||||
"checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27"
|
||||
"checksum pyo3 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e00968c74d07a0d2f18ce7a7594b1baedb1dfe2c56b4f32a74c4ea7548f37468"
|
||||
"checksum pyo3-derive-backend 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e458cac2fb0ffe261e46b5a3c254daba5e1b0fdb6955dbf9cb7286be0053659a"
|
||||
"checksum pyo3cls 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "cf4a59e5c95f7a355d3f471e9e97a72bdd9bd355686f2e069748773ffb4bbc42"
|
||||
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
|
||||
"checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd"
|
||||
"checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"
|
||||
"checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8"
|
||||
"checksum serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4b39bd9b0b087684013a792c59e3e07a46a01d2322518d8a1104641a0b1be0"
|
||||
"checksum serde_derive 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)" = "ca13fc1a832f793322228923fbb3aba9f3f44444898f835d31ad1b74fa0a2bf8"
|
||||
"checksum serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)" = "2f72eb2a68a7dc3f9a691bfda9305a1c017a6215e5a4545c258500d2099a37c2"
|
||||
"checksum spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
||||
"checksum syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0e7bedb3320d0f3035594b0b723c8a28d7d336a3eda3881db79e61d676fb644c"
|
||||
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
||||
"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||
"checksum unindent 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "63f18aa3b0e35fed5a0048f029558b1518095ffe2a0a31fb87c93dece93a4993"
|
||||
"checksum version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce"
|
23
bindings/python/Cargo.toml
Normal file
23
bindings/python/Cargo.toml
Normal file
@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "tokenizers"
|
||||
version = "0.1.0"
|
||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "tokenizers"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies.pyo3]
|
||||
version = "0.8.2"
|
||||
features = ["extension-module"]
|
||||
|
||||
[dependencies.tokenizers-lib]
|
||||
version = "*"
|
||||
path = "../../tokenizers"
|
||||
|
||||
[target.x86_64-apple-darwin]
|
||||
rustflags = [
|
||||
"-C", "link-arg=-undefined",
|
||||
"-C", "link-arg=dynamic_lookup",
|
||||
]
|
25
bindings/python/src/lib.rs
Normal file
25
bindings/python/src/lib.rs
Normal file
@ -0,0 +1,25 @@
|
||||
extern crate tokenizers as tk;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::wrap_pyfunction;
|
||||
|
||||
#[pyfunction]
|
||||
/// Tokenize
|
||||
fn tokenize(a: String) -> PyResult<Vec<u32>> {
|
||||
println!("Tokenize in rust");
|
||||
tk::test();
|
||||
Ok(vec![1, 2, 3])
|
||||
}
|
||||
|
||||
#[pymodule]
|
||||
fn tokenizers(py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_wrapped(wrap_pyfunction!(tokenize))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn it_works() {
|
||||
assert_eq!(2 + 2, 4);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user