Adding rust audit. (#1099)

* Adding rust audit. * Update clap version + derive_builder (they clashed). * Ignoring specific CVE which can be ignored https://github.com/Azure/iot-identity-service/issues/481 * Updating python lock. * Revert `derive-builder` update. * Adding back help msg.
2025-08-22 16:25:30 +00:00 · 2022-11-09 12:59:36 +01:00
parent 99c06c82e0
commit bbae829a72
6 changed files with 980 additions and 848 deletions
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@ -95,6 +95,14 @@ jobs:
          command: clippy
          args: --manifest-path ./bindings/python/Cargo.toml --all-targets --all-features -- -D warnings
      - name: Run Audit
        uses: actions-rs/cargo@v1
        with:
          command: audit
          # ignoring specific CVE which probably isn't affecting this crate
          # https://github.com/chronotope/chrono/issues/602
          args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2020-0071
      - name: Install
        working-directory: ./bindings/python
        run: |
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@ -81,6 +81,14 @@ jobs:
          command: test
          args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc
      - name: Run Audit
        uses: actions-rs/cargo@v1
        with:
          command: audit
          # ignoring specific CVE which probably isn't affecting this crate
          # https://github.com/chronotope/chrono/issues/602
          args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2020-0071
      # Verify that Readme.md is up to date.
      - name: Make sure, Readme generated from lib.rs matches actual Readme
        if: matrix.os == 'ubuntu-latest'
--- a/bindings/node/native/Cargo.lock
+++ b/bindings/node/native/Cargo.lock
--- a/bindings/python/Cargo.lock
+++ b/bindings/python/Cargo.lock
--- a/tokenizers/Cargo.toml
+++ b/tokenizers/Cargo.toml
@ -1,5 +1,5 @@
 [package]
-authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
+authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
 edition = "2018"
 name = "tokenizers"
 version = "0.13.2"
@ -48,7 +48,7 @@ rayon = "1.3"
 rayon-cond = "0.1"
 serde = { version = "1.0", features = [ "derive" ] }
 serde_json = "1.0"
-clap = { version = "2.33", optional = true }
+clap = { version = "4.0", features=["derive"], optional = true }
 unicode-normalization-alignments = "0.1"
 unicode_categories = "0.1"
 unicode-segmentation = "1.6"
@ -77,6 +77,6 @@ cli = ["clap"]
 unstable_wasm = ["fancy-regex", "getrandom/js"]
 [dev-dependencies]
-criterion = "0.3"
+criterion = "0.4"
 tempfile = "3.1"
 assert_approx_eq = "1.1"
--- a/tokenizers/src/cli.rs
+++ b/tokenizers/src/cli.rs
@ -2,21 +2,32 @@
 //! This is the CLI binary for the Tokenizers project
 //!
-use clap::{App, AppSettings, Arg, ArgMatches, SubCommand};
+use clap::{Parser, Subcommand};
 use std::io::{self, BufRead, Write};
 use tokenizers::models::bpe::BPE;
 use tokenizers::pre_tokenizers::byte_level::ByteLevel;
 use tokenizers::tokenizer::{AddedToken, Result};
 use tokenizers::Tokenizer;
-fn shell(matches: &ArgMatches) -> Result<()> {
+/// Generate custom Tokenizers or use existing ones
-    let vocab = matches
+#[derive(Parser, Debug)]
-        .value_of("vocab")
+#[command(author, version)]
-        .expect("Must give a vocab.json file");
+struct Args {
-    let merges = matches
+    #[command(subcommand)]
-        .value_of("merges")
+    command: Command,
-        .expect("Must give a merges.txt file");
+}
 #[derive(Subcommand, Debug)]
 enum Command {
    Shell {
        /// Path to the vocab.json file
        vocab: String,
        /// Path to the merges.txt file
        merges: String,
    },
 }
 fn shell(vocab: &str, merges: &str) -> Result<()> {
    let bpe = BPE::from_file(vocab, merges).build()?;
    let mut tokenizer = Tokenizer::new(bpe);
    tokenizer
@ -55,33 +66,8 @@ fn shell(matches: &ArgMatches) -> Result<()> {
 }
 fn main() -> Result<()> {
-    let matches = App::new("tokenizers")
+    let args = Args::parse();
-        .version("0.0.1")
+    match args.command {
-        .author("Anthony M. <anthony@huggingface.co>")
+        Command::Shell { vocab, merges } => shell(&vocab, &merges),
        .about("Generate custom Tokenizers or use existing ones")
        .setting(AppSettings::SubcommandRequiredElseHelp)
        .subcommand(
            SubCommand::with_name("shell")
                .about("Interactively test a tokenizer")
                .arg(
                    Arg::with_name("vocab")
                        .long("vocab")
                        .value_name("VOCAB_FILE")
                        .help("Path to the vocab.json file")
                        .required(true),
                )
                .arg(
                    Arg::with_name("merges")
                        .long("merges")
                        .value_name("MERGES_FILE")
                        .help("Path to the merges.txt file")
                        .required(true),
                ),
        )
        .get_matches();
    match matches.subcommand() {
        ("shell", matches) => shell(matches.unwrap()),
        (subcommand, _) => panic!("Unknown subcommand {}", subcommand),
    }
 }