mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Enabling static interpreter embedding for manylinux. (#1064)
* Removing dead file. * Checking that we can distribute with static python embedding for manylinux * Many linux embed interpreter. * Building wheels manylinux with static embedding * Better script. * typo. * Using a dummy feature? * default features ? * Back into order. * Fixing manylinux ??. * Local dir. * Missing star. * Makedir ? * Monkey coding this. * extension module ? * Building with default features `RustExtension`. * bdist_wheel + rustextension any better ? * update rust-py version. * Forcing extension module. * No default features. * Remove py37 out of spite * Revert "Remove py37 out of spite" This reverts commit 6ab7facd792b59c2e30be82fe42816d24c32cf0d. * Really extraneous feature. * Fix build wheels. * Putting things back in place.
This commit is contained in:
30
bindings/python/Cargo.lock
generated
30
bindings/python/Cargo.lock
generated
@ -1200,9 +1200,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.16.2"
|
||||
version = "0.16.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a378727d5fdcaafd15b5afe9842cff1c25fdc43f62a162ffda2263c57ad98703"
|
||||
checksum = "0220c44442c9b239dd4357aa856ac468a4f5e1f0df19ddb89b2522952eb4c6ca"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"indoc",
|
||||
@ -1216,18 +1216,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.16.2"
|
||||
version = "0.16.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fbb27a3e96edd34c13d97d0feefccc90a79270c577c66e19d95af8323823dfc"
|
||||
checksum = "9c819d397859445928609d0ec5afc2da5204e0d0f73d6bf9e153b04e83c9cdc2"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"target-lexicon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-ffi"
|
||||
version = "0.16.2"
|
||||
version = "0.16.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b719fff844bcf3f911132112ec06527eb195f6a98e0c42cf97e1118929fd4ea"
|
||||
checksum = "ca882703ab55f54702d7bfe1189b41b0af10272389f04cae38fe4cd56c65f75f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pyo3-build-config",
|
||||
@ -1235,9 +1236,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros"
|
||||
version = "0.16.2"
|
||||
version = "0.16.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f795e52d3320abb349ca28b501a7112154a87f353fae1c811deecd58e99cfa9b"
|
||||
checksum = "568749402955ad7be7bad9a09b8593851cd36e549ac90bfd44079cea500f3f21"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
@ -1247,12 +1248,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros-backend"
|
||||
version = "0.16.2"
|
||||
version = "0.16.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39e03aa57a3bb7b96982958088df38302a139df4eef54671bc595f26556cb75b"
|
||||
checksum = "611f64e82d98f447787e82b8e7b0ebc681e1eb78fc1252668b2c605ffb4e1eb8"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-build-config",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
@ -1611,6 +1611,12 @@ dependencies = [
|
||||
"xattr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "target-lexicon"
|
||||
version = "0.12.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1"
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.3.0"
|
||||
@ -1733,7 +1739,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokenizers-python"
|
||||
version = "0.11.0"
|
||||
version = "0.13.0"
|
||||
dependencies = [
|
||||
"env_logger",
|
||||
"itertools 0.9.0",
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tokenizers-python"
|
||||
version = "0.11.0"
|
||||
version = "0.13.0"
|
||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
@ -14,7 +14,7 @@ serde = { version = "1.0", features = [ "rc", "derive" ]}
|
||||
serde_json = "1.0"
|
||||
libc = "0.2"
|
||||
env_logger = "0.7.1"
|
||||
pyo3 = "0.16.2"
|
||||
pyo3 = { version = "0.16.2", features = ["extension-module"] }
|
||||
numpy = "0.16.2"
|
||||
ndarray = "0.13"
|
||||
onig = { version = "6.0", default-features = false }
|
||||
@ -26,7 +26,7 @@ path = "../../tokenizers"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.1"
|
||||
pyo3 = { version = "0.16.2", features = ["auto-initialize"] }
|
||||
|
||||
[features]
|
||||
default = ["pyo3/extension-module"]
|
||||
test = ["pyo3/auto-initialize"]
|
||||
|
||||
|
@ -20,7 +20,7 @@ TESTS_RESOURCES = $(DATA_DIR)/small.txt $(DATA_DIR)/roberta.json
|
||||
test: $(TESTS_RESOURCES)
|
||||
pip install pytest requests setuptools_rust numpy pyarrow datasets
|
||||
python -m pytest -s -v tests
|
||||
cargo test --no-default-features
|
||||
cargo test --features test
|
||||
|
||||
$(DATA_DIR)/big.txt :
|
||||
$(dir_guard)
|
||||
|
@ -1,23 +1,28 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||
if ! command -v cargo &> /dev/null
|
||||
then
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||
fi
|
||||
|
||||
export PATH="$HOME/.cargo/bin:$PATH"
|
||||
|
||||
for PYBIN in /opt/python/{cp37-cp37m,cp38-cp38,cp39-cp39,cp310-cp310}/bin; do
|
||||
for PYBIN in /opt/python/cp{37,38,39,310}*/bin; do
|
||||
export PYTHON_SYS_EXECUTABLE="$PYBIN/python"
|
||||
|
||||
"${PYBIN}/pip" install -U setuptools-rust==0.11.3
|
||||
"${PYBIN}/pip" install -U setuptools-rust setuptools wheel
|
||||
"${PYBIN}/python" setup.py bdist_wheel
|
||||
rm -rf build/*
|
||||
done
|
||||
|
||||
for whl in dist/*.whl; do
|
||||
for whl in ./dist/*.whl; do
|
||||
auditwheel repair "$whl" -w dist/
|
||||
done
|
||||
|
||||
# Keep only manylinux wheels
|
||||
rm dist/*-linux_*
|
||||
rm ./dist/*-linux_*
|
||||
|
||||
|
||||
# Upload wheels
|
||||
/opt/python/cp37-cp37m/bin/pip install -U awscli
|
||||
|
@ -1,4 +1,4 @@
|
||||
__version__ = "0.13.0.dev0"
|
||||
__version__ = "0.13.0"
|
||||
|
||||
from typing import Tuple, Union, Tuple, List
|
||||
from enum import Enum
|
||||
|
@ -8,7 +8,7 @@ extras["dev"] = extras["testing"]
|
||||
|
||||
setup(
|
||||
name="tokenizers",
|
||||
version="0.13.0.dev0",
|
||||
version="0.13.0",
|
||||
description="Fast and Customizable Tokenizers",
|
||||
long_description=open("README.md", "r", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
@ -1,4 +0,0 @@
|
||||
from tokenizers import Tokenizer
|
||||
|
||||
|
||||
tokenizer = Tokenizer.from_file("/home/nicolas/Downloads/tokenizer-wiki.txt")
|
Reference in New Issue
Block a user