Enabling static interpreter embedding for manylinux. (#1064)

* Removing dead file.

* Checking that we can distribute with static python embedding for

manylinux

* Many linux embed interpreter.

* Building wheels manylinux with static embedding

* Better script.

* typo.

* Using a dummy feature?

* default features ?

* Back into order.

* Fixing manylinux ??.

* Local dir.

* Missing star.

* Makedir ?

* Monkey coding this.

* extension module ?

* Building with default features `RustExtension`.

* bdist_wheel + rustextension any better ?

* update rust-py version.

* Forcing extension module.

* No default features.

* Remove py37 out of spite

* Revert "Remove py37 out of spite"

This reverts commit 6ab7facd792b59c2e30be82fe42816d24c32cf0d.

* Really extraneous feature.

* Fix build wheels.

* Putting things back in place.
This commit is contained in:
Nicolas Patry
2022-09-21 12:18:46 +02:00
committed by GitHub
parent 655f4057b7
commit 63082c4d11
7 changed files with 35 additions and 28 deletions

View File

@ -1200,9 +1200,9 @@ dependencies = [
[[package]]
name = "pyo3"
version = "0.16.2"
version = "0.16.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a378727d5fdcaafd15b5afe9842cff1c25fdc43f62a162ffda2263c57ad98703"
checksum = "0220c44442c9b239dd4357aa856ac468a4f5e1f0df19ddb89b2522952eb4c6ca"
dependencies = [
"cfg-if",
"indoc",
@ -1216,18 +1216,19 @@ dependencies = [
[[package]]
name = "pyo3-build-config"
version = "0.16.2"
version = "0.16.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fbb27a3e96edd34c13d97d0feefccc90a79270c577c66e19d95af8323823dfc"
checksum = "9c819d397859445928609d0ec5afc2da5204e0d0f73d6bf9e153b04e83c9cdc2"
dependencies = [
"once_cell",
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.16.2"
version = "0.16.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b719fff844bcf3f911132112ec06527eb195f6a98e0c42cf97e1118929fd4ea"
checksum = "ca882703ab55f54702d7bfe1189b41b0af10272389f04cae38fe4cd56c65f75f"
dependencies = [
"libc",
"pyo3-build-config",
@ -1235,9 +1236,9 @@ dependencies = [
[[package]]
name = "pyo3-macros"
version = "0.16.2"
version = "0.16.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f795e52d3320abb349ca28b501a7112154a87f353fae1c811deecd58e99cfa9b"
checksum = "568749402955ad7be7bad9a09b8593851cd36e549ac90bfd44079cea500f3f21"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
@ -1247,12 +1248,11 @@ dependencies = [
[[package]]
name = "pyo3-macros-backend"
version = "0.16.2"
version = "0.16.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e03aa57a3bb7b96982958088df38302a139df4eef54671bc595f26556cb75b"
checksum = "611f64e82d98f447787e82b8e7b0ebc681e1eb78fc1252668b2c605ffb4e1eb8"
dependencies = [
"proc-macro2",
"pyo3-build-config",
"quote",
"syn",
]
@ -1611,6 +1611,12 @@ dependencies = [
"xattr",
]
[[package]]
name = "target-lexicon"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1"
[[package]]
name = "tempfile"
version = "3.3.0"
@ -1733,7 +1739,7 @@ dependencies = [
[[package]]
name = "tokenizers-python"
version = "0.11.0"
version = "0.13.0"
dependencies = [
"env_logger",
"itertools 0.9.0",

View File

@ -1,6 +1,6 @@
[package]
name = "tokenizers-python"
version = "0.11.0"
version = "0.13.0"
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
edition = "2018"
@ -14,7 +14,7 @@ serde = { version = "1.0", features = [ "rc", "derive" ]}
serde_json = "1.0"
libc = "0.2"
env_logger = "0.7.1"
pyo3 = "0.16.2"
pyo3 = { version = "0.16.2", features = ["extension-module"] }
numpy = "0.16.2"
ndarray = "0.13"
onig = { version = "6.0", default-features = false }
@ -26,7 +26,7 @@ path = "../../tokenizers"
[dev-dependencies]
tempfile = "3.1"
pyo3 = { version = "0.16.2", features = ["auto-initialize"] }
[features]
default = ["pyo3/extension-module"]
test = ["pyo3/auto-initialize"]

View File

@ -20,7 +20,7 @@ TESTS_RESOURCES = $(DATA_DIR)/small.txt $(DATA_DIR)/roberta.json
test: $(TESTS_RESOURCES)
pip install pytest requests setuptools_rust numpy pyarrow datasets
python -m pytest -s -v tests
cargo test --no-default-features
cargo test --features test
$(DATA_DIR)/big.txt :
$(dir_guard)

View File

@ -1,23 +1,28 @@
#!/bin/bash
set -ex
curl https://sh.rustup.rs -sSf | sh -s -- -y
if ! command -v cargo &> /dev/null
then
curl https://sh.rustup.rs -sSf | sh -s -- -y
fi
export PATH="$HOME/.cargo/bin:$PATH"
for PYBIN in /opt/python/{cp37-cp37m,cp38-cp38,cp39-cp39,cp310-cp310}/bin; do
for PYBIN in /opt/python/cp{37,38,39,310}*/bin; do
export PYTHON_SYS_EXECUTABLE="$PYBIN/python"
"${PYBIN}/pip" install -U setuptools-rust==0.11.3
"${PYBIN}/pip" install -U setuptools-rust setuptools wheel
"${PYBIN}/python" setup.py bdist_wheel
rm -rf build/*
done
for whl in dist/*.whl; do
for whl in ./dist/*.whl; do
auditwheel repair "$whl" -w dist/
done
# Keep only manylinux wheels
rm dist/*-linux_*
rm ./dist/*-linux_*
# Upload wheels
/opt/python/cp37-cp37m/bin/pip install -U awscli

View File

@ -1,4 +1,4 @@
__version__ = "0.13.0.dev0"
__version__ = "0.13.0"
from typing import Tuple, Union, Tuple, List
from enum import Enum

View File

@ -8,7 +8,7 @@ extras["dev"] = extras["testing"]
setup(
name="tokenizers",
version="0.13.0.dev0",
version="0.13.0",
description="Fast and Customizable Tokenizers",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",

View File

@ -1,4 +0,0 @@
from tokenizers import Tokenizer
tokenizer = Tokenizer.from_file("/home/nicolas/Downloads/tokenizer-wiki.txt")