Adding rust audit. (#1099)

* Adding rust audit.

* Update clap version + derive_builder (they clashed).

* Ignoring specific CVE which can be ignored

https://github.com/Azure/iot-identity-service/issues/481

* Updating python lock.

* Revert `derive-builder` update.

* Adding back help msg.
This commit is contained in:
Nicolas Patry
2022-11-09 12:59:36 +01:00
committed by GitHub
parent 99c06c82e0
commit bbae829a72
6 changed files with 980 additions and 848 deletions

View File

@ -95,6 +95,14 @@ jobs:
command: clippy
args: --manifest-path ./bindings/python/Cargo.toml --all-targets --all-features -- -D warnings
- name: Run Audit
uses: actions-rs/cargo@v1
with:
command: audit
# ignoring specific CVE which probably isn't affecting this crate
# https://github.com/chronotope/chrono/issues/602
args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2020-0071
- name: Install
working-directory: ./bindings/python
run: |

View File

@ -81,6 +81,14 @@ jobs:
command: test
args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc
- name: Run Audit
uses: actions-rs/cargo@v1
with:
command: audit
# ignoring specific CVE which probably isn't affecting this crate
# https://github.com/chronotope/chrono/issues/602
args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2020-0071
# Verify that Readme.md is up to date.
- name: Make sure, Readme generated from lib.rs matches actual Readme
if: matrix.os == 'ubuntu-latest'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
[package]
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
edition = "2018"
name = "tokenizers"
version = "0.13.2"
@ -48,7 +48,7 @@ rayon = "1.3"
rayon-cond = "0.1"
serde = { version = "1.0", features = [ "derive" ] }
serde_json = "1.0"
clap = { version = "2.33", optional = true }
clap = { version = "4.0", features=["derive"], optional = true }
unicode-normalization-alignments = "0.1"
unicode_categories = "0.1"
unicode-segmentation = "1.6"
@ -77,6 +77,6 @@ cli = ["clap"]
unstable_wasm = ["fancy-regex", "getrandom/js"]
[dev-dependencies]
criterion = "0.3"
criterion = "0.4"
tempfile = "3.1"
assert_approx_eq = "1.1"

View File

@ -2,21 +2,32 @@
//! This is the CLI binary for the Tokenizers project
//!
use clap::{App, AppSettings, Arg, ArgMatches, SubCommand};
use clap::{Parser, Subcommand};
use std::io::{self, BufRead, Write};
use tokenizers::models::bpe::BPE;
use tokenizers::pre_tokenizers::byte_level::ByteLevel;
use tokenizers::tokenizer::{AddedToken, Result};
use tokenizers::Tokenizer;
fn shell(matches: &ArgMatches) -> Result<()> {
let vocab = matches
.value_of("vocab")
.expect("Must give a vocab.json file");
let merges = matches
.value_of("merges")
.expect("Must give a merges.txt file");
/// Generate custom Tokenizers or use existing ones
#[derive(Parser, Debug)]
#[command(author, version)]
struct Args {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand, Debug)]
enum Command {
Shell {
/// Path to the vocab.json file
vocab: String,
/// Path to the merges.txt file
merges: String,
},
}
fn shell(vocab: &str, merges: &str) -> Result<()> {
let bpe = BPE::from_file(vocab, merges).build()?;
let mut tokenizer = Tokenizer::new(bpe);
tokenizer
@ -55,33 +66,8 @@ fn shell(matches: &ArgMatches) -> Result<()> {
}
fn main() -> Result<()> {
let matches = App::new("tokenizers")
.version("0.0.1")
.author("Anthony M. <anthony@huggingface.co>")
.about("Generate custom Tokenizers or use existing ones")
.setting(AppSettings::SubcommandRequiredElseHelp)
.subcommand(
SubCommand::with_name("shell")
.about("Interactively test a tokenizer")
.arg(
Arg::with_name("vocab")
.long("vocab")
.value_name("VOCAB_FILE")
.help("Path to the vocab.json file")
.required(true),
)
.arg(
Arg::with_name("merges")
.long("merges")
.value_name("MERGES_FILE")
.help("Path to the merges.txt file")
.required(true),
),
)
.get_matches();
match matches.subcommand() {
("shell", matches) => shell(matches.unwrap()),
(subcommand, _) => panic!("Unknown subcommand {}", subcommand),
let args = Args::parse();
match args.command {
Command::Shell { vocab, merges } => shell(&vocab, &merges),
}
}