diff --git a/.github/workflows/python-release-conda.yml b/.github/workflows/python-release-conda.yml deleted file mode 100644 index eb9c1d6b..00000000 --- a/.github/workflows/python-release-conda.yml +++ /dev/null @@ -1,129 +0,0 @@ -name: Python Release - Conda - -on: - push: - tags: - - v* - -env: - ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} - -jobs: - build_and_package: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [windows-latest, macos-latest] - python: ["3.8", "3.9", "3.10", "3.11"] - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Install miniconda - uses: conda-incubator/setup-miniconda@v3 - with: - auto-update-conda: true - python-version: ${{ matrix.python }} - - - name: Conda info - shell: bash -l {0} - run: conda info - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - name: Setup conda env - shell: bash -l {0} - run: | - conda install setuptools-rust - conda install -c defaults anaconda-client conda-build - - - name: Extract version - shell: bash -l {0} - working-directory: ./bindings/python - run: echo "TOKENIZERS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV - - - name: Build conda packages - shell: bash -l {0} - run: | - conda info - conda list - conda-build .github/conda --python=${{ matrix.python }} - - - name: Upload to Anaconda - shell: bash -l {0} - run: | - anaconda upload `conda-build .github/conda --output` --force - - build_and_package_linux: - runs-on: ubuntu-latest - container: quay.io/pypa/manylinux2014_x86_64 - - strategy: - fail-fast: false - matrix: - python: [38, 39, 310, 311] - include: - - python: 38 - checksum: e2a4438671e0e42c5bba14cb51de6ce9763938184d6ca2967340bbe972bbe7e6 - - python: 39 - checksum: 9829d95f639bd0053b2ed06d1204e60644617bf37dd5cc57523732e0e8d64516 - - python: 310 - checksum: ea5e6e8a3d5a0247b9df85382d27220fac8e59b5778fd313c5913879cd9baafc - - python: 311 - checksum: 634d76df5e489c44ade4085552b97bebc786d49245ed1a830022b0b406de5817 - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Install miniconda - run: | - yum install -y wget openssl-devel - export FILENAME=Miniconda3-py${{ matrix.python }}_23.5.2-0-Linux-x86_64.sh - wget https://repo.anaconda.com/miniconda/$FILENAME - sha256sum $FILENAME | awk '$1=="${{ matrix.checksum}}"{print"good to go"}' - bash $FILENAME -b -p $HOME/miniconda - source $HOME/miniconda/bin/activate - - - name: Show glibc information - shell: bash -l {0} - run: ldd --version - - - name: Conda info - shell: bash -l {0} - run: | - source $HOME/miniconda/bin/activate - conda info - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - name: Setup conda env - shell: bash -l {0} - run: | - source $HOME/miniconda/bin/activate - conda install setuptools-rust - conda install -c defaults anaconda-client conda-build - - - name: Extract version - shell: bash -l {0} - working-directory: ./bindings/python - run: | - source $HOME/miniconda/bin/activate - echo "TOKENIZERS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV - - - name: Build conda packages - shell: bash -l {0} - run: | - source $HOME/miniconda/bin/activate - conda info - conda list - conda-build .github/conda --python=${{ matrix.python }} - - - name: Upload to Anaconda - shell: bash -l {0} - run: | - source $HOME/miniconda/bin/activate - anaconda upload `conda-build .github/conda --output` --force diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml index 7f254f28..4251ef99 100644 --- a/.github/workflows/python-release.yml +++ b/.github/workflows/python-release.yml @@ -37,7 +37,7 @@ jobs: platform: linux - os: windows ls: dir - interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.8 pypy3.9 pypy3.10 + interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10 - os: windows ls: dir target: x86_64 @@ -54,7 +54,7 @@ jobs: # interpreter: 3.11 3.12 - os: macos target: aarch64 - interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.8 pypy3.9 pypy3.10 + interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10 - os: ubuntu platform: linux target: i686 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 367ded9e..ef8bf441 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -55,6 +55,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + - name: Install Rust uses: actions-rs/toolchain@v1 @@ -76,7 +77,7 @@ jobs: - name: Cache Cargo Registry - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: ~/.cargo/registry key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} @@ -107,7 +108,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: audit - args: -D warnings -f ./bindings/python/Cargo.lock + args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014 - name: Install working-directory: ./bindings/python diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 366a625a..9b9dd8e4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -94,7 +94,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: audit - args: -D warnings -f ./tokenizers/Cargo.lock + args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014 # Verify that Readme.md is up to date. - name: Make sure, Readme generated from lib.rs matches actual Readme diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml index 9cbbf680..5595aee2 100644 --- a/.github/workflows/trufflehog.yml +++ b/.github/workflows/trufflehog.yml @@ -12,4 +12,7 @@ jobs: with: fetch-depth: 0 - name: Secret Scanning - uses: trufflesecurity/trufflehog@main + uses: trufflesecurity/trufflehog@853e1e8d249fd1e29d0fcc7280d29b03df3d643d + with: + # exclude buggy postgres detector that is causing false positives and not relevant to our codebase + extra_args: --results=verified,unknown --exclude-detectors=postgres diff --git a/bindings/node/Cargo.toml b/bindings/node/Cargo.toml index bcf6e3e7..cf1e51e9 100644 --- a/bindings/node/Cargo.toml +++ b/bindings/node/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Nicolas Patry "] edition = "2021" name = "node" -version = "0.21.0-dev.0" +version = "0.21.2-dev.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 98bf2d69..6e8b0c34 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tokenizers-python" -version = "0.21.0-dev.0" +version = "0.21.2-dev.0" authors = ["Anthony MOI "] edition = "2021" diff --git a/tokenizers/Cargo.toml b/tokenizers/Cargo.toml index 0633b8ef..cd31bdba 100644 --- a/tokenizers/Cargo.toml +++ b/tokenizers/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Anthony MOI ", "Nicolas Patry "] edition = "2018" name = "tokenizers" -version = "0.21.0-dev.0" +version = "0.21.2-dev.0" homepage = "https://github.com/huggingface/tokenizers" repository = "https://github.com/huggingface/tokenizers" documentation = "https://docs.rs/tokenizers/" diff --git a/tokenizers/src/processors/template.rs b/tokenizers/src/processors/template.rs index 74b4fe1c..6c9cf9a7 100644 --- a/tokenizers/src/processors/template.rs +++ b/tokenizers/src/processors/template.rs @@ -466,7 +466,7 @@ impl TemplateProcessingBuilder { } fn validate(&self) -> std::result::Result<(), String> { - let pair_has_both = self.pair.as_ref().map_or(true, |pair| { + let pair_has_both = self.pair.as_ref().is_none_or(|pair| { let mut has_a = false; let mut has_b = false; for piece in &pair.0 {