mirror of
https://github.com/mii443/tokenizers.git
synced 2025-08-22 16:25:30 +00:00
Update the release builds following 0.21.1. (#1746)
* Update the release builds following 0.21.1. * Clippy fix.
This commit is contained in:
129
.github/workflows/python-release-conda.yml
vendored
129
.github/workflows/python-release-conda.yml
vendored
@ -1,129 +0,0 @@
|
|||||||
name: Python Release - Conda
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- v*
|
|
||||||
|
|
||||||
env:
|
|
||||||
ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build_and_package:
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
os: [windows-latest, macos-latest]
|
|
||||||
python: ["3.8", "3.9", "3.10", "3.11"]
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Install miniconda
|
|
||||||
uses: conda-incubator/setup-miniconda@v3
|
|
||||||
with:
|
|
||||||
auto-update-conda: true
|
|
||||||
python-version: ${{ matrix.python }}
|
|
||||||
|
|
||||||
- name: Conda info
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: conda info
|
|
||||||
|
|
||||||
- name: Install Rust
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
|
|
||||||
- name: Setup conda env
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
conda install setuptools-rust
|
|
||||||
conda install -c defaults anaconda-client conda-build
|
|
||||||
|
|
||||||
- name: Extract version
|
|
||||||
shell: bash -l {0}
|
|
||||||
working-directory: ./bindings/python
|
|
||||||
run: echo "TOKENIZERS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Build conda packages
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
conda info
|
|
||||||
conda list
|
|
||||||
conda-build .github/conda --python=${{ matrix.python }}
|
|
||||||
|
|
||||||
- name: Upload to Anaconda
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
anaconda upload `conda-build .github/conda --output` --force
|
|
||||||
|
|
||||||
build_and_package_linux:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
container: quay.io/pypa/manylinux2014_x86_64
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
python: [38, 39, 310, 311]
|
|
||||||
include:
|
|
||||||
- python: 38
|
|
||||||
checksum: e2a4438671e0e42c5bba14cb51de6ce9763938184d6ca2967340bbe972bbe7e6
|
|
||||||
- python: 39
|
|
||||||
checksum: 9829d95f639bd0053b2ed06d1204e60644617bf37dd5cc57523732e0e8d64516
|
|
||||||
- python: 310
|
|
||||||
checksum: ea5e6e8a3d5a0247b9df85382d27220fac8e59b5778fd313c5913879cd9baafc
|
|
||||||
- python: 311
|
|
||||||
checksum: 634d76df5e489c44ade4085552b97bebc786d49245ed1a830022b0b406de5817
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Install miniconda
|
|
||||||
run: |
|
|
||||||
yum install -y wget openssl-devel
|
|
||||||
export FILENAME=Miniconda3-py${{ matrix.python }}_23.5.2-0-Linux-x86_64.sh
|
|
||||||
wget https://repo.anaconda.com/miniconda/$FILENAME
|
|
||||||
sha256sum $FILENAME | awk '$1=="${{ matrix.checksum}}"{print"good to go"}'
|
|
||||||
bash $FILENAME -b -p $HOME/miniconda
|
|
||||||
source $HOME/miniconda/bin/activate
|
|
||||||
|
|
||||||
- name: Show glibc information
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: ldd --version
|
|
||||||
|
|
||||||
- name: Conda info
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
source $HOME/miniconda/bin/activate
|
|
||||||
conda info
|
|
||||||
|
|
||||||
- name: Install Rust
|
|
||||||
uses: dtolnay/rust-toolchain@stable
|
|
||||||
|
|
||||||
- name: Setup conda env
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
source $HOME/miniconda/bin/activate
|
|
||||||
conda install setuptools-rust
|
|
||||||
conda install -c defaults anaconda-client conda-build
|
|
||||||
|
|
||||||
- name: Extract version
|
|
||||||
shell: bash -l {0}
|
|
||||||
working-directory: ./bindings/python
|
|
||||||
run: |
|
|
||||||
source $HOME/miniconda/bin/activate
|
|
||||||
echo "TOKENIZERS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Build conda packages
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
source $HOME/miniconda/bin/activate
|
|
||||||
conda info
|
|
||||||
conda list
|
|
||||||
conda-build .github/conda --python=${{ matrix.python }}
|
|
||||||
|
|
||||||
- name: Upload to Anaconda
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
source $HOME/miniconda/bin/activate
|
|
||||||
anaconda upload `conda-build .github/conda --output` --force
|
|
4
.github/workflows/python-release.yml
vendored
4
.github/workflows/python-release.yml
vendored
@ -37,7 +37,7 @@ jobs:
|
|||||||
platform: linux
|
platform: linux
|
||||||
- os: windows
|
- os: windows
|
||||||
ls: dir
|
ls: dir
|
||||||
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.8 pypy3.9 pypy3.10
|
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10
|
||||||
- os: windows
|
- os: windows
|
||||||
ls: dir
|
ls: dir
|
||||||
target: x86_64
|
target: x86_64
|
||||||
@ -54,7 +54,7 @@ jobs:
|
|||||||
# interpreter: 3.11 3.12
|
# interpreter: 3.11 3.12
|
||||||
- os: macos
|
- os: macos
|
||||||
target: aarch64
|
target: aarch64
|
||||||
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.8 pypy3.9 pypy3.10
|
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10
|
||||||
- os: ubuntu
|
- os: ubuntu
|
||||||
platform: linux
|
platform: linux
|
||||||
target: i686
|
target: i686
|
||||||
|
5
.github/workflows/python.yml
vendored
5
.github/workflows/python.yml
vendored
@ -56,6 +56,7 @@ jobs:
|
|||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
|
||||||
- name: Install Rust
|
- name: Install Rust
|
||||||
uses: actions-rs/toolchain@v1
|
uses: actions-rs/toolchain@v1
|
||||||
with:
|
with:
|
||||||
@ -76,7 +77,7 @@ jobs:
|
|||||||
|
|
||||||
|
|
||||||
- name: Cache Cargo Registry
|
- name: Cache Cargo Registry
|
||||||
uses: actions/cache@v1
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: ~/.cargo/registry
|
path: ~/.cargo/registry
|
||||||
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
|
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
|
||||||
@ -107,7 +108,7 @@ jobs:
|
|||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: audit
|
command: audit
|
||||||
args: -D warnings -f ./bindings/python/Cargo.lock
|
args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014
|
||||||
|
|
||||||
- name: Install
|
- name: Install
|
||||||
working-directory: ./bindings/python
|
working-directory: ./bindings/python
|
||||||
|
2
.github/workflows/rust.yml
vendored
2
.github/workflows/rust.yml
vendored
@ -94,7 +94,7 @@ jobs:
|
|||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: audit
|
command: audit
|
||||||
args: -D warnings -f ./tokenizers/Cargo.lock
|
args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014
|
||||||
|
|
||||||
# Verify that Readme.md is up to date.
|
# Verify that Readme.md is up to date.
|
||||||
- name: Make sure, Readme generated from lib.rs matches actual Readme
|
- name: Make sure, Readme generated from lib.rs matches actual Readme
|
||||||
|
5
.github/workflows/trufflehog.yml
vendored
5
.github/workflows/trufflehog.yml
vendored
@ -12,4 +12,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Secret Scanning
|
- name: Secret Scanning
|
||||||
uses: trufflesecurity/trufflehog@main
|
uses: trufflesecurity/trufflehog@853e1e8d249fd1e29d0fcc7280d29b03df3d643d
|
||||||
|
with:
|
||||||
|
# exclude buggy postgres detector that is causing false positives and not relevant to our codebase
|
||||||
|
extra_args: --results=verified,unknown --exclude-detectors=postgres
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
name = "node"
|
name = "node"
|
||||||
version = "0.21.0-dev.0"
|
version = "0.21.2-dev.0"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tokenizers-python"
|
name = "tokenizers-python"
|
||||||
version = "0.21.0-dev.0"
|
version = "0.21.2-dev.0"
|
||||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
|
authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.21.0-dev.0"
|
version = "0.21.2-dev.0"
|
||||||
homepage = "https://github.com/huggingface/tokenizers"
|
homepage = "https://github.com/huggingface/tokenizers"
|
||||||
repository = "https://github.com/huggingface/tokenizers"
|
repository = "https://github.com/huggingface/tokenizers"
|
||||||
documentation = "https://docs.rs/tokenizers/"
|
documentation = "https://docs.rs/tokenizers/"
|
||||||
|
@ -466,7 +466,7 @@ impl TemplateProcessingBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn validate(&self) -> std::result::Result<(), String> {
|
fn validate(&self) -> std::result::Result<(), String> {
|
||||||
let pair_has_both = self.pair.as_ref().map_or(true, |pair| {
|
let pair_has_both = self.pair.as_ref().is_none_or(|pair| {
|
||||||
let mut has_a = false;
|
let mut has_a = false;
|
||||||
let mut has_b = false;
|
let mut has_b = false;
|
||||||
for piece in &pair.0 {
|
for piece in &pair.0 {
|
||||||
|
Reference in New Issue
Block a user