Adding rust audit. (#1099)

* Adding rust audit. * Update clap version + derive_builder (they clashed). * Ignoring specific CVE which can be ignored https://github.com/Azure/iot-identity-service/issues/481 * Updating python lock. * Revert `derive-builder` update. * Adding back help msg.
2025-08-22 16:25:30 +00:00 · 2022-11-09 12:59:36 +01:00
parent 99c06c82e0
commit bbae829a72
6 changed files with 980 additions and 848 deletions
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@ -95,6 +95,14 @@ jobs:
          command: clippy
          args: --manifest-path ./bindings/python/Cargo.toml --all-targets --all-features -- -D warnings

+      - name: Run Audit
+        uses: actions-rs/cargo@v1
+        with:
+          command: audit
+          # ignoring specific CVE which probably isn't affecting this crate
+          # https://github.com/chronotope/chrono/issues/602
+          args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2020-0071
+
      - name: Install
        working-directory: ./bindings/python
        run: |
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@ -81,6 +81,14 @@ jobs:
          command: test
          args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc

+      - name: Run Audit
+        uses: actions-rs/cargo@v1
+        with:
+          command: audit
+          # ignoring specific CVE which probably isn't affecting this crate
+          # https://github.com/chronotope/chrono/issues/602
+          args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2020-0071
+
      # Verify that Readme.md is up to date.
      - name: Make sure, Readme generated from lib.rs matches actual Readme
        if: matrix.os == 'ubuntu-latest'
--- a/bindings/node/native/Cargo.lock
+++ b/bindings/node/native/Cargo.lock
--- a/bindings/python/Cargo.lock
+++ b/bindings/python/Cargo.lock
--- a/tokenizers/Cargo.toml
+++ b/tokenizers/Cargo.toml
@ -1,5 +1,5 @@
 [package]
-authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
+authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
 edition = "2018"
 name = "tokenizers"
 version = "0.13.2"
@ -48,7 +48,7 @@ rayon = "1.3"
 rayon-cond = "0.1"
 serde = { version = "1.0", features = [ "derive" ] }
 serde_json = "1.0"
-clap = { version = "2.33", optional = true }
+clap = { version = "4.0", features=["derive"], optional = true }
 unicode-normalization-alignments = "0.1"
 unicode_categories = "0.1"
 unicode-segmentation = "1.6"
@ -77,6 +77,6 @@ cli = ["clap"]
 unstable_wasm = ["fancy-regex", "getrandom/js"]

 [dev-dependencies]
-criterion = "0.3"
+criterion = "0.4"
 tempfile = "3.1"
 assert_approx_eq = "1.1"
--- a/tokenizers/src/cli.rs
+++ b/tokenizers/src/cli.rs
@ -2,21 +2,32 @@
 //! This is the CLI binary for the Tokenizers project
 //!

-use clap::{App, AppSettings, Arg, ArgMatches, SubCommand};
+use clap::{Parser, Subcommand};
 use std::io::{self, BufRead, Write};
 use tokenizers::models::bpe::BPE;
 use tokenizers::pre_tokenizers::byte_level::ByteLevel;
 use tokenizers::tokenizer::{AddedToken, Result};
 use tokenizers::Tokenizer;

-fn shell(matches: &ArgMatches) -> Result<()> {
-    let vocab = matches
-        .value_of("vocab")
-        .expect("Must give a vocab.json file");
-    let merges = matches
-        .value_of("merges")
-        .expect("Must give a merges.txt file");
+/// Generate custom Tokenizers or use existing ones
+#[derive(Parser, Debug)]
+#[command(author, version)]
+struct Args {
+    #[command(subcommand)]
+    command: Command,
+}

+#[derive(Subcommand, Debug)]
+enum Command {
+    Shell {
+        /// Path to the vocab.json file
+        vocab: String,
+        /// Path to the merges.txt file
+        merges: String,
+    },
+}
+
+fn shell(vocab: &str, merges: &str) -> Result<()> {
    let bpe = BPE::from_file(vocab, merges).build()?;
    let mut tokenizer = Tokenizer::new(bpe);
    tokenizer
@ -55,33 +66,8 @@ fn shell(matches: &ArgMatches) -> Result<()> {
 }

 fn main() -> Result<()> {
-    let matches = App::new("tokenizers")
-        .version("0.0.1")
-        .author("Anthony M. <anthony@huggingface.co>")
-        .about("Generate custom Tokenizers or use existing ones")
-        .setting(AppSettings::SubcommandRequiredElseHelp)
-        .subcommand(
-            SubCommand::with_name("shell")
-                .about("Interactively test a tokenizer")
-                .arg(
-                    Arg::with_name("vocab")
-                        .long("vocab")
-                        .value_name("VOCAB_FILE")
-                        .help("Path to the vocab.json file")
-                        .required(true),
-                )
-                .arg(
-                    Arg::with_name("merges")
-                        .long("merges")
-                        .value_name("MERGES_FILE")
-                        .help("Path to the merges.txt file")
-                        .required(true),
-                ),
-        )
-        .get_matches();
-
-    match matches.subcommand() {
-        ("shell", matches) => shell(matches.unwrap()),
-        (subcommand, _) => panic!("Unknown subcommand {}", subcommand),
+    let args = Args::parse();
+    match args.command {
+        Command::Shell { vocab, merges } => shell(&vocab, &merges),
    }
 }