Adding rust audit. (#1099)

* Adding rust audit.

* Update clap version + derive_builder (they clashed).

* Ignoring specific CVE which can be ignored

https://github.com/Azure/iot-identity-service/issues/481

* Updating python lock.

* Revert `derive-builder` update.

* Adding back help msg.
This commit is contained in:
Nicolas Patry
2022-11-09 12:59:36 +01:00
committed by GitHub
parent 99c06c82e0
commit bbae829a72
6 changed files with 980 additions and 848 deletions

View File

@ -95,6 +95,14 @@ jobs:
command: clippy command: clippy
args: --manifest-path ./bindings/python/Cargo.toml --all-targets --all-features -- -D warnings args: --manifest-path ./bindings/python/Cargo.toml --all-targets --all-features -- -D warnings
- name: Run Audit
uses: actions-rs/cargo@v1
with:
command: audit
# ignoring specific CVE which probably isn't affecting this crate
# https://github.com/chronotope/chrono/issues/602
args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2020-0071
- name: Install - name: Install
working-directory: ./bindings/python working-directory: ./bindings/python
run: | run: |

View File

@ -81,6 +81,14 @@ jobs:
command: test command: test
args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc
- name: Run Audit
uses: actions-rs/cargo@v1
with:
command: audit
# ignoring specific CVE which probably isn't affecting this crate
# https://github.com/chronotope/chrono/issues/602
args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2020-0071
# Verify that Readme.md is up to date. # Verify that Readme.md is up to date.
- name: Make sure, Readme generated from lib.rs matches actual Readme - name: Make sure, Readme generated from lib.rs matches actual Readme
if: matrix.os == 'ubuntu-latest' if: matrix.os == 'ubuntu-latest'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
[package] [package]
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"] authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
edition = "2018" edition = "2018"
name = "tokenizers" name = "tokenizers"
version = "0.13.2" version = "0.13.2"
@ -48,7 +48,7 @@ rayon = "1.3"
rayon-cond = "0.1" rayon-cond = "0.1"
serde = { version = "1.0", features = [ "derive" ] } serde = { version = "1.0", features = [ "derive" ] }
serde_json = "1.0" serde_json = "1.0"
clap = { version = "2.33", optional = true } clap = { version = "4.0", features=["derive"], optional = true }
unicode-normalization-alignments = "0.1" unicode-normalization-alignments = "0.1"
unicode_categories = "0.1" unicode_categories = "0.1"
unicode-segmentation = "1.6" unicode-segmentation = "1.6"
@ -77,6 +77,6 @@ cli = ["clap"]
unstable_wasm = ["fancy-regex", "getrandom/js"] unstable_wasm = ["fancy-regex", "getrandom/js"]
[dev-dependencies] [dev-dependencies]
criterion = "0.3" criterion = "0.4"
tempfile = "3.1" tempfile = "3.1"
assert_approx_eq = "1.1" assert_approx_eq = "1.1"

View File

@ -2,21 +2,32 @@
//! This is the CLI binary for the Tokenizers project //! This is the CLI binary for the Tokenizers project
//! //!
use clap::{App, AppSettings, Arg, ArgMatches, SubCommand}; use clap::{Parser, Subcommand};
use std::io::{self, BufRead, Write}; use std::io::{self, BufRead, Write};
use tokenizers::models::bpe::BPE; use tokenizers::models::bpe::BPE;
use tokenizers::pre_tokenizers::byte_level::ByteLevel; use tokenizers::pre_tokenizers::byte_level::ByteLevel;
use tokenizers::tokenizer::{AddedToken, Result}; use tokenizers::tokenizer::{AddedToken, Result};
use tokenizers::Tokenizer; use tokenizers::Tokenizer;
fn shell(matches: &ArgMatches) -> Result<()> { /// Generate custom Tokenizers or use existing ones
let vocab = matches #[derive(Parser, Debug)]
.value_of("vocab") #[command(author, version)]
.expect("Must give a vocab.json file"); struct Args {
let merges = matches #[command(subcommand)]
.value_of("merges") command: Command,
.expect("Must give a merges.txt file"); }
#[derive(Subcommand, Debug)]
enum Command {
Shell {
/// Path to the vocab.json file
vocab: String,
/// Path to the merges.txt file
merges: String,
},
}
fn shell(vocab: &str, merges: &str) -> Result<()> {
let bpe = BPE::from_file(vocab, merges).build()?; let bpe = BPE::from_file(vocab, merges).build()?;
let mut tokenizer = Tokenizer::new(bpe); let mut tokenizer = Tokenizer::new(bpe);
tokenizer tokenizer
@ -55,33 +66,8 @@ fn shell(matches: &ArgMatches) -> Result<()> {
} }
fn main() -> Result<()> { fn main() -> Result<()> {
let matches = App::new("tokenizers") let args = Args::parse();
.version("0.0.1") match args.command {
.author("Anthony M. <anthony@huggingface.co>") Command::Shell { vocab, merges } => shell(&vocab, &merges),
.about("Generate custom Tokenizers or use existing ones")
.setting(AppSettings::SubcommandRequiredElseHelp)
.subcommand(
SubCommand::with_name("shell")
.about("Interactively test a tokenizer")
.arg(
Arg::with_name("vocab")
.long("vocab")
.value_name("VOCAB_FILE")
.help("Path to the vocab.json file")
.required(true),
)
.arg(
Arg::with_name("merges")
.long("merges")
.value_name("MERGES_FILE")
.help("Path to the merges.txt file")
.required(true),
),
)
.get_matches();
match matches.subcommand() {
("shell", matches) => shell(matches.unwrap()),
(subcommand, _) => panic!("Unknown subcommand {}", subcommand),
} }
} }