diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml new file mode 100644 index 0000000..366b451 --- /dev/null +++ b/.github/workflows/rust-ci.yml @@ -0,0 +1,78 @@ +name: Rust + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + check: + name: Check + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + rust: [stable] + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.rust }} + override: true + - uses: actions-rs/cargo@v1 + with: + command: check + args: --workspace --examples + + test: + name: Test Suite + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + rust: [stable] + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.rust }} + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + args: --workspace --examples + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add rustfmt + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add clippy + - uses: actions-rs/cargo@v1 + with: + command: clippy + args: --workspace --tests --examples --all-targets --all-features -- -Dwarnings diff --git a/.gitignore b/.gitignore index 6985cf1..b99985e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,9 @@ Cargo.lock # MSVC Windows builds of rustc generate these, which store debugging information *.pdb + + +.debug +.vscode +runs/ +.DS_Store diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6b0cbc9 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "usls" +version = "0.0.1" +edition = "2021" +description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models." +repository = "https://github.com/jamjamjon/usls" +authors = ["Jamjamjon "] +license = "MIT" +readme = "README.md" + +[dependencies] +clap = { version = "4.2.4", features = ["derive"] } +image = { version = "0.24.7", default-features = false, features = [ + "jpeg", + "png", + "tiff", + "webp", + "webp-encoder", + "bmp" +]} +imageproc = { version = "0.23.0", default-features = false } +ndarray = { version = "0.15.6" } +# ort-sys = { version = "2.0.0-alpha.4" } +# ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "half", "ndarray", "cuda", "tensorrt", "coreml", "openvino"] } +ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "profiling", "half", "ndarray", "cuda", "tensorrt", "coreml", "ureq", "openvino"] } +rusttype = { version = "0.9", default-features = false } +anyhow = { version = "1.0.75" } +regex = { version = "1.5.4" } +rand = { version = "0.8.5" } +chrono = { version = "0.4.30" } +half = { version = "2.3.1" } +dirs = { version = "5.0.1" } +ureq = { version = "2.9.1", default-features = true, features = [ "socks-proxy" ] } +walkdir = { version = "2.5.0" } +tokenizers = { version = "0.15.2" } +itertools = { version = "0.12.1" } +usearch = { version = "2.9.1" } +rayon = "1.10.0" +indicatif = "0.17.8" diff --git a/README.md b/README.md index 52b605b..9861d56 100644 --- a/README.md +++ b/README.md @@ -1 +1,105 @@ -# usls \ No newline at end of file +# usls + +A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv8](https://github.com/ultralytics/ultralytics) `(Classification, Segmentation, Detection and Pose Detection)`, [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), and others. Many execution providers are supported, sunch as `CUDA`, `TensorRT` and `CoreML`. + + +## Supported Models + +| Model | Example | CUDA(f32) | CUDA(f16) | TensorRT(f32) | TensorRT(f16) | +| :-------------------: | :----------------------: | :----------------: | :----------------: | :------------------------: | :-----------------------: | +| YOLOv8-detection | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-pose | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-classification | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-segmentation | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ | +| YOLOv8-OBB | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | | +| YOLOv9 | [demo](examples/yolov9) | ✅ | ✅ | ✅ | ✅ | +| RT-DETR | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | ✅ | +| FastSAM | [demo](examples/fastsam) | ✅ | ✅ | ✅ | ✅ | +| YOLO-World | [demo](examples/yolo-world) | ✅ | ✅ | ✅ | ✅ | +| DINOv2 | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ | +| CLIP | [demo](examples/clip) | ✅ | ✅ | ✅ visual
❌ textual | ✅ visual
❌ textual | +| BLIP | [demo](examples/blip) | ✅ | ✅ | ✅ visual
❌ textual | ✅ visual
❌ textual | +| OCR(DB, SVTR) | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | | + +## Solution Models +Additionally, this repo also provides some solution models such as pedestrian `fall detection`, `head detection`, `trash detection`, and more. + +| Model | Example | Result | +| :---------------------------: | :------------------------------: | :--------------------------------------------------------------------------: | +| face-landmark detection | [demo](examples/yolov8-face) | | +| head detection | [demo](examples/yolov8-head) | | +| fall detection | [demo](examples/yolov8-falldown) | | +| trash detection | [demo](examples/yolov8-plastic-bag) | | + +## Demo + +``` +cargo run -r --example yolov8 # fastsam, yolov9, blip, clip, dinov2, yolo-world... +``` + +## Integrate into your own project + +#### 1. Install [ort](https://github.com/pykeio/ort) + +check **[ort guide](https://ort.pyke.io/setup/linking)** + +
+For Linux or MacOS users + +- Firstly, download from latest release from [ONNXRuntime Releases](https://github.com/microsoft/onnxruntime/releases) +- Then linking + ```shell + export ORT_DYLIB_PATH=/Users/qweasd/Desktop/onnxruntime-osx-arm64-1.17.1/lib/libonnxruntime.1.17.1.dylib + ``` +
+ +#### 2. Add `usls` as a dependency to your project's `Cargo.toml:` + +``` +[dependencies] +usls = "0.0.1" +``` + +#### 3. Set model `Options` and build `model`, then you're ready to go. + +```Rust +2use usls::{models::YOLO, Options}; + +fn main() -> Result<(), Box> { + // 1.build model + let options = Options::default() + .with_model("../models/yolov8m-seg-dyn-f16.onnx") + .with_trt(0) // using cuda(0) by default + // when model with dynamic shapes + .with_i00((1, 2, 4).into()) // dynamic batch + .with_i02((416, 640, 800).into()) // dynamic height + .with_i03((416, 640, 800).into()) // dynamic width + .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_saveout("YOLOv8"); // save results + let mut model = YOLO::new(&options)?; + + // 2.build dataloader + let dl = DataLoader::default() + .with_batch(model.batch.opt as usize) + .load("./assets/")?; + + // 3.run + for (xs, _paths) in dl { + let _y = model.run(&xs)?; + } + Ok(()) +} +``` + +## Script: converte ONNX model from `float32` to `float16` + +```python +import onnx +from pathlib import Path +from onnxconverter_common import float16 + +model_f32 = "onnx_model.onnx" +model_f16 = float16.convert_float_to_float16(onnx.load(model_f32)) +saveout = Path(model_f32).with_name(Path(model_f32).stem + "-f16.onnx") +onnx.save(model_f16, saveout) +``` diff --git a/assets/bus.jpg b/assets/bus.jpg new file mode 100644 index 0000000..40eaaf5 Binary files /dev/null and b/assets/bus.jpg differ diff --git a/assets/falldown.jpg b/assets/falldown.jpg new file mode 100644 index 0000000..1492401 Binary files /dev/null and b/assets/falldown.jpg differ diff --git a/assets/kids.jpg b/assets/kids.jpg new file mode 100644 index 0000000..7eda4f3 Binary files /dev/null and b/assets/kids.jpg differ diff --git a/assets/trash.jpg b/assets/trash.jpg new file mode 100644 index 0000000..2ead8d5 Binary files /dev/null and b/assets/trash.jpg differ diff --git a/examples/blip/README.md b/examples/blip/README.md new file mode 100644 index 0000000..823fdb5 --- /dev/null +++ b/examples/blip/README.md @@ -0,0 +1,53 @@ +This demo shows how to use [BLIP](https://arxiv.org/abs/2201.12086) to do conditional or unconditional image captioning. + + +## Quick Start + +```shell +cargo run -r --example blip +``` + +## Or you can manully + + +### 1. Donwload CLIP ONNX Model + +[blip-visual-base](https://github.com/jamjamjon/assets/releases/download/v0.0.1/blip-visual-base.onnx) +[blip-textual-base](https://github.com/jamjamjon/assets/releases/download/v0.0.1/blip-textual-base.onnx) + + +### 2. Specify the ONNX model path in `main.rs` + +```Rust + // visual + let options_visual = Options::default() + .with_model("VISUAL_MODEL") // <= modify this + .with_profile(false); + + // textual + let options_textual = Options::default() + .with_model("TEXTUAL_MODEL") // <= modify this + .with_profile(false); + +``` + +### 3. Then, run + +```bash +cargo run -r --example blip +``` + + +## Results + +```shell +[Unconditional image captioning]: a group of people walking around a bus +[Conditional image captioning]: three man walking in front of a bus +``` + +## TODO + +* [ ] text decode with Top-p sample +* [ ] VQA +* [ ] Retrival +* [ ] TensorRT support for textual model diff --git a/examples/blip/main.rs b/examples/blip/main.rs new file mode 100644 index 0000000..a21c89a --- /dev/null +++ b/examples/blip/main.rs @@ -0,0 +1,29 @@ +use usls::{models::Blip, Options}; + +fn main() -> Result<(), Box> { + // visual + let options_visual = Options::default() + .with_model("../models/blip-visual-base.onnx") + .with_i00((1, 1, 4).into()) + .with_profile(false); + + // textual + let options_textual = Options::default() + .with_model("../models/blip-textual-base.onnx") + .with_i00((1, 1, 4).into()) // input_id: batch + .with_i01((1, 1, 4).into()) // input_id: seq_len + .with_i10((1, 1, 4).into()) // attention_mask: batch + .with_i11((1, 1, 4).into()) // attention_mask: seq_len + .with_i20((1, 1, 4).into()) // encoder_hidden_states: batch + .with_i30((1, 1, 4).into()) // encoder_attention_mask: batch + .with_profile(false); + + // build model + let mut model = Blip::new(options_visual, options_textual)?; + + // image caption + model.caption("./assets/bus.jpg", None)?; // unconditional + model.caption("./assets/bus.jpg", Some("three man"))?; // conditional + + Ok(()) +} diff --git a/examples/clip/README.md b/examples/clip/README.md new file mode 100644 index 0000000..230e6e7 --- /dev/null +++ b/examples/clip/README.md @@ -0,0 +1,58 @@ +This demo showcases how to use [CLIP](https://github.com/openai/CLIP) to compute similarity between texts and images, which can be employed for image-to-text or text-to-image retrieval tasks. + +## Quick Start + +```shell +cargo run -r --example clip +``` + +## Or you can manully + + +### 1.Donwload CLIP ONNX Model + +[clip-b32-visual](https://github.com/jamjamjon/assets/releases/download/v0.0.1/clip-b32-visual.onnx) +[clip-b32-textual](https://github.com/jamjamjon/assets/releases/download/v0.0.1/clip-b32-textual.onnx) + + +### 2. Specify the ONNX model path in `main.rs` + +```Rust + // visual + let options_visual = Options::default() + .with_model("VISUAL_MODEL") // <= modify this + .with_i00((1, 1, 4).into()) + .with_profile(false); + + // textual + let options_textual = Options::default() + .with_model("TEXTUAL_MODEL") // <= modify this + .with_i00((1, 1, 4).into()) + .with_profile(false); +``` + +### 3. Then, run + +```bash +cargo run -r --example clip +``` + + + +## Results + +```shell +(82.24775%) ./examples/clip/images/carrot.jpg => 几个胡萝卜 +[0.06708972, 0.0067733657, 0.0019306632, 0.8224775, 0.003044935, 0.083962336, 0.014721389] + +(85.56889%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table +[0.0786363, 0.0004783095, 0.00060898095, 0.06286741, 0.0006842306, 0.8556889, 0.0010357979] + +(90.03625%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant +[0.07473288, 0.0027821448, 0.0075673857, 0.010874652, 0.003041679, 0.0006387719, 0.9003625] +``` + + +## TODO + +* [ ] TensorRT support for textual model diff --git a/examples/clip/images/carrot.jpg b/examples/clip/images/carrot.jpg new file mode 100644 index 0000000..dd51810 Binary files /dev/null and b/examples/clip/images/carrot.jpg differ diff --git a/examples/clip/images/doll.jpg b/examples/clip/images/doll.jpg new file mode 100644 index 0000000..0a3935a Binary files /dev/null and b/examples/clip/images/doll.jpg differ diff --git a/examples/clip/images/peoples.jpg b/examples/clip/images/peoples.jpg new file mode 100644 index 0000000..3953afa Binary files /dev/null and b/examples/clip/images/peoples.jpg differ diff --git a/examples/clip/main.rs b/examples/clip/main.rs new file mode 100644 index 0000000..2b0aaf8 --- /dev/null +++ b/examples/clip/main.rs @@ -0,0 +1,63 @@ +use usls::{models::Clip, ops, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // visual + let options_visual = Options::default() + .with_model("../models/clip-b32-visual-dyn.onnx") + .with_i00((1, 1, 4).into()) + .with_profile(false); + + // textual + let options_textual = Options::default() + .with_model("../models/clip-b32-textual-dyn.onnx") + .with_i00((1, 1, 4).into()) + .with_profile(false); + + // build model + let model = Clip::new(options_visual, options_textual)?; + + // texts + let texts = vec![ + "A photo of a dinosaur ".to_string(), + "A photo of a cat".to_string(), + "A photo of a dog".to_string(), + "几个胡萝卜".to_string(), + "There are some playing cards on a striped table cloth".to_string(), + "There is a doll with red hair and a clock on a table".to_string(), + "Some people holding wine glasses in a restaurant".to_string(), + ]; + let feats_text = model.encode_texts(&texts)?; // [n, ndim] + + // load image + let dl = DataLoader::default() + .with_batch(model.batch_visual()) + .load("./examples/clip/images")?; + + // loop + for (images, paths) in dl { + let feats_image = model.encode_images(&images).unwrap(); + + // use image to query texts + let matrix = ops::dot2(&feats_image, &feats_text)?; // [m, n] + + // summary + for i in 0..paths.len() { + let probs = &matrix[i]; + let (id, &score) = probs + .iter() + .enumerate() + .reduce(|max, x| if x.1 > max.1 { x } else { max }) + .unwrap(); + + println!( + "({:?}%) {} => {} ", + score * 100.0, + paths[i].display(), + &texts[id] + ); + println!("{:?}\n", probs); + } + } + + Ok(()) +} diff --git a/examples/dinov2/README.md b/examples/dinov2/README.md new file mode 100644 index 0000000..8a30ecb --- /dev/null +++ b/examples/dinov2/README.md @@ -0,0 +1,50 @@ +This demo showcases how to use `DINOv2` to compute image similarity, applicable for image-to-image retrieval tasks. + +## Quick Start + +```shell +cargo run -r --example dinov2 +``` + +## Or you can manully + +### 1.Donwload DINOv2 ONNX Model + +[dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx) +[dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx) +[dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx) + +[dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx) +[dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx) +[dinov2-b14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn-f16.onnx) + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH") // <= modify this + .with_profile(false); + +// build index +let options = IndexOptions { + dimensions: 384, // 768 for vitb; 384 for vits + metric: MetricKind::L2sq, + quantization: ScalarKind::F16, + ..Default::default() +}; +``` + +### 3. Then, run + +```bash +cargo run -r --example dinov2 +``` + +## Results + +```shell +Top-1 distance: 0.0 => "./examples/dinov2/images/bus.jpg" +Top-2 distance: 1.8332717 => "./examples/dinov2/images/dog.png" +Top-3 distance: 1.9672602 => "./examples/dinov2/images/cat.png" +Top-4 distance: 1.978817 => "./examples/dinov2/images/carrot.jpg" +``` diff --git a/examples/dinov2/images/GlqO.jpg b/examples/dinov2/images/GlqO.jpg new file mode 100644 index 0000000..394f48f Binary files /dev/null and b/examples/dinov2/images/GlqO.jpg differ diff --git a/examples/dinov2/images/JasD.jpg b/examples/dinov2/images/JasD.jpg new file mode 100644 index 0000000..4e6f31a Binary files /dev/null and b/examples/dinov2/images/JasD.jpg differ diff --git a/examples/dinov2/images/bus.jpg b/examples/dinov2/images/bus.jpg new file mode 100644 index 0000000..40eaaf5 Binary files /dev/null and b/examples/dinov2/images/bus.jpg differ diff --git a/examples/dinov2/main.rs b/examples/dinov2/main.rs new file mode 100644 index 0000000..a969bc9 --- /dev/null +++ b/examples/dinov2/main.rs @@ -0,0 +1,57 @@ +use usearch::ffi::{IndexOptions, MetricKind, ScalarKind}; +use usls::{models::Dinov2, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/dinov2-s14-dyn-f16.onnx") + .with_i00((1, 1, 1).into()) + .with_i02((224, 224, 224).into()) + .with_i03((224, 224, 224).into()); + let mut model = Dinov2::new(&options)?; + + // build dataloader + let dl = DataLoader::default() + .with_batch(model.batch.opt as usize) + .load("./examples/dinov2/images")?; + + // load query + let query = image::io::Reader::open("./assets/bus.jpg")?.decode()?; + let query = model.run(&[query])?; + + // build index + let options = IndexOptions { + dimensions: 384, // 768 for vitb; 384 for vits + metric: MetricKind::L2sq, + quantization: ScalarKind::F16, + ..Default::default() + }; + let index = usearch::new_index(&options)?; + index.reserve(dl.clone().count())?; + + // load feats + for (idx, (image, _path)) in dl.clone().enumerate() { + let y = model.run(&image)?; + index.add(idx as u64, &y.into_raw_vec())?; + } + + // output + let topk = 10; + let matches = index.search(&query.into_raw_vec(), topk)?; + let paths = dl.paths; + for (idx, (k, score)) in matches + .keys + .into_iter() + .zip(matches.distances.into_iter()) + .enumerate() + { + println!( + "Top-{} distance: {:?} => {:?}", + idx + 1, + score, + paths[k as usize] + ); + } + + Ok(()) +} diff --git a/examples/fastsam/README.md b/examples/fastsam/README.md new file mode 100644 index 0000000..d2ecc03 --- /dev/null +++ b/examples/fastsam/README.md @@ -0,0 +1,41 @@ +## Quick Start + +```shell +cargo run -r --example fastsam +``` + +## Or you can manully + + +### 1.Donwload or export ONNX Model + +- **Export** + + ```bash + pip install -U ultralytics + yolo export model=FastSAM-s.pt format=onnx simplify dynamic + ``` + +- **Download** + + [FastSAM-s-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/FastSAM-s-dyn-f16.onnx) + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("../models/FastSAM-s-dyn-f16.onnx") // <= modify this + .with_saveout("FastSAM") + .with_profile(false); +let mut model = YOLO::new(&options)?; +``` + +### 3. Then, run + +```bash +cargo run -r --example fastsam +``` + +## Results + +![](./demo.jpg) diff --git a/examples/fastsam/demo.jpg b/examples/fastsam/demo.jpg new file mode 100644 index 0000000..d60a5d6 Binary files /dev/null and b/examples/fastsam/demo.jpg differ diff --git a/examples/fastsam/main.rs b/examples/fastsam/main.rs new file mode 100644 index 0000000..50a2f33 --- /dev/null +++ b/examples/fastsam/main.rs @@ -0,0 +1,22 @@ +use usls::{models::YOLO, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/FastSAM-s-dyn-f16.onnx") + .with_i00((1, 1, 4).into()) + .with_i02((416, 640, 800).into()) + .with_i03((416, 640, 800).into()) + .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_saveout("FastSAM") + .with_profile(false); + let mut model = YOLO::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/bus.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/examples/rtdetr/README.md b/examples/rtdetr/README.md new file mode 100644 index 0000000..9d5921a --- /dev/null +++ b/examples/rtdetr/README.md @@ -0,0 +1,37 @@ +## Quick Start + +```shell +cargo run -r --example rtdetr +``` + +## Or you can manully + +### 1. Donwload or export ONNX Model + +- Export + + ```bash + pip install -U ultralytics + yolo export model=rtdetr-l.pt format=onnx simplify dynamic opset=16 + ``` +- Download + + [rtdetr-l-f16 model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtdetr-l-f16.onnx) + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_MODEL") // <= modify this + .with_saveout("RT-DETR"); +``` + +### 3. Then, run + +```bash +cargo run -r --example rtdetr +``` + +## Results + +![](./demo.jpg) diff --git a/examples/rtdetr/demo.jpg b/examples/rtdetr/demo.jpg new file mode 100644 index 0000000..e0df576 Binary files /dev/null and b/examples/rtdetr/demo.jpg differ diff --git a/examples/rtdetr/main.rs b/examples/rtdetr/main.rs new file mode 100644 index 0000000..6f50ced --- /dev/null +++ b/examples/rtdetr/main.rs @@ -0,0 +1,19 @@ +use usls::{models::RTDETR, DataLoader, Options, COCO_NAMES_80}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/rtdetr-l-f16.onnx") + .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_names(&COCO_NAMES_80) + .with_saveout("RT-DETR"); + let mut model = RTDETR::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/bus.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/examples/yolo-world/README.md b/examples/yolo-world/README.md new file mode 100644 index 0000000..f3081a2 --- /dev/null +++ b/examples/yolo-world/README.md @@ -0,0 +1,58 @@ +## Quick Start + +```shell +cargo run -r --example yolo-world +``` + +## Or you can manully + +### 1. Donwload or Export ONNX Model + +- Download + + [yolov8s-world-v2-shoes](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8s-world-v2-shoes.onnx) +- Or generate your own `yolo-world` model and then Export + + - Installation + + ```shell + pip install -U ultralytics + ``` + + - Generate + + ```python + from ultralytics import YOLO + + # Initialize a YOLO-World model + model = YOLO('yolov8m-worldv2.pt') + + # Define custom classes + model.set_classes(["shoes"]) + + # Save the model with the defined offline vocabulary + model.save("custom_yolov8m-world-v2.pt") + ``` + + - Export + + ```shell + yolo export model=custom_yolov8m-world-v2.pt format=onnx simplify dynamic + ``` + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH"); // <= modify this +``` + +### 3. Then, run + +``` +cargo run -r --example yolo-world +``` + +## Results + +![](./demo.jpg) diff --git a/examples/yolo-world/demo.jpg b/examples/yolo-world/demo.jpg new file mode 100644 index 0000000..5242d7d Binary files /dev/null and b/examples/yolo-world/demo.jpg differ diff --git a/examples/yolo-world/main.rs b/examples/yolo-world/main.rs new file mode 100644 index 0000000..ed1bfb5 --- /dev/null +++ b/examples/yolo-world/main.rs @@ -0,0 +1,22 @@ +use usls::{models::YOLO, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/yolov8s-world-v2-shoes.onnx") + .with_i00((1, 1, 4).into()) + .with_i02((416, 640, 800).into()) + .with_i03((416, 640, 800).into()) + .with_confs(&[0.3]) // shoes: 0.2 + .with_saveout("YOLO-World") + .with_profile(false); + let mut model = YOLO::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/bus.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/examples/yolov8-face/README.md b/examples/yolov8-face/README.md new file mode 100644 index 0000000..8b741df --- /dev/null +++ b/examples/yolov8-face/README.md @@ -0,0 +1,30 @@ +## Quick Start + +```shell +cargo run -r --example yolov8-face +``` + +## Or you can manully + +### 1. Donwload ONNX Model + +[yolov8-face-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-face-dyn-f16.onnx) + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH") // <= modify this + .with_profile(false); +let mut model = YOLO::new(&options)?; +``` + +### 3. Then, run + +```bash +cargo run -r --example yolov8-face +``` + +## Results + +![](./demo.jpg) diff --git a/examples/yolov8-face/demo.jpg b/examples/yolov8-face/demo.jpg new file mode 100644 index 0000000..6180994 Binary files /dev/null and b/examples/yolov8-face/demo.jpg differ diff --git a/examples/yolov8-face/main.rs b/examples/yolov8-face/main.rs new file mode 100644 index 0000000..e7469c1 --- /dev/null +++ b/examples/yolov8-face/main.rs @@ -0,0 +1,22 @@ +use usls::{models::YOLO, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/yolov8n-face-dyn-f16.onnx") + .with_i00((1, 1, 4).into()) + .with_i02((416, 640, 800).into()) + .with_i03((416, 640, 800).into()) + .with_confs(&[0.15]) + .with_saveout("YOLOv8-Face") + .with_profile(false); + let mut model = YOLO::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/kids.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/examples/yolov8-falldown/README.md b/examples/yolov8-falldown/README.md new file mode 100644 index 0000000..1cc6699 --- /dev/null +++ b/examples/yolov8-falldown/README.md @@ -0,0 +1,30 @@ +## Quick Start + +```shell +cargo run -r --example yolov8-falldown +``` + +## Or you can manully + +### 1.Donwload ONNX Model + +[yolov8-falldown-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-falldown-f16.onnx) + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH") // <= modify this + .with_profile(false); +let mut model = YOLO::new(&options)? +``` + +### 3. Then, run + +```bash +cargo run -r --example yolov8-falldown +``` + +## Results + +![](./demo.jpg) diff --git a/examples/yolov8-falldown/demo.jpg b/examples/yolov8-falldown/demo.jpg new file mode 100644 index 0000000..2b0a9f5 Binary files /dev/null and b/examples/yolov8-falldown/demo.jpg differ diff --git a/examples/yolov8-falldown/main.rs b/examples/yolov8-falldown/main.rs new file mode 100644 index 0000000..3e8e84f --- /dev/null +++ b/examples/yolov8-falldown/main.rs @@ -0,0 +1,19 @@ +use usls::{models::YOLO, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/yolov8-falldown-f16.onnx") + .with_confs(&[0.3]) + .with_saveout("YOLOv8-Falldown") + .with_profile(false); + let mut model = YOLO::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/falldown.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/examples/yolov8-head/README.md b/examples/yolov8-head/README.md new file mode 100644 index 0000000..2ef3bd7 --- /dev/null +++ b/examples/yolov8-head/README.md @@ -0,0 +1,30 @@ +## Quick Start + +```shell +cargo run -r --example yolov8-head +``` + +## Or you can manully + +### 1. Donwload ONNX Model + +[yolov8-head-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-head-f16.onnx) + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH") // <= modify this + .with_profile(false); +let mut model = YOLO::new(&options)?; +``` + +### 3. Then, run + +```bash +cargo run -r --example yolov8-head +``` + +## Results + +![](./demo.jpg) diff --git a/examples/yolov8-head/demo.jpg b/examples/yolov8-head/demo.jpg new file mode 100644 index 0000000..ca5ac88 Binary files /dev/null and b/examples/yolov8-head/demo.jpg differ diff --git a/examples/yolov8-head/main.rs b/examples/yolov8-head/main.rs new file mode 100644 index 0000000..d5a750d --- /dev/null +++ b/examples/yolov8-head/main.rs @@ -0,0 +1,19 @@ +use usls::{models::YOLO, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/yolov8-head-f16.onnx") + .with_confs(&[0.3]) + .with_saveout("YOLOv8-Head") + .with_profile(false); + let mut model = YOLO::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/kids.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/examples/yolov8-trash/README.md b/examples/yolov8-trash/README.md new file mode 100644 index 0000000..27c8c1c --- /dev/null +++ b/examples/yolov8-trash/README.md @@ -0,0 +1,32 @@ +Model for detecting plastic bag. + +## Quick Start + +```shell +cargo run -r --example yolov8-trash +``` + +## Or you can manully + +### 1. Donwload ONNX Model + +[yolov8-plastic-bag-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-plastic-bag-f16.onnx) + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH") // <= modify this + .with_profile(false); +let mut model = YOLO::new(&options)?; +``` + +### 3. Then, run + +```bash +cargo run -r --example yolov8-trash +``` + +## Results + +![](./demo.jpg) diff --git a/examples/yolov8-trash/demo.jpg b/examples/yolov8-trash/demo.jpg new file mode 100644 index 0000000..747b5f2 Binary files /dev/null and b/examples/yolov8-trash/demo.jpg differ diff --git a/examples/yolov8-trash/main.rs b/examples/yolov8-trash/main.rs new file mode 100644 index 0000000..a172778 --- /dev/null +++ b/examples/yolov8-trash/main.rs @@ -0,0 +1,20 @@ +use usls::{models::YOLO, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // 1.build model + let options = Options::default() + .with_model("../models/yolov8-plastic-bag-f16.onnx") + .with_confs(&[0.3]) + .with_saveout("YOLOv8-Trash") + .with_names(&["trash"]) + .with_profile(false); + let mut model = YOLO::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/trash.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/examples/yolov8/README.md b/examples/yolov8/README.md new file mode 100644 index 0000000..8b65881 --- /dev/null +++ b/examples/yolov8/README.md @@ -0,0 +1,58 @@ +## Features + +- Support `Classification`, `Segmentation`, `Detection`, `Pose(Keypoints)-Detection` tasks. +- Support `FP16` & `FP32` ONNX models. +- Support `CoreML`, `CUDA` and `TensorRT` execution provider to accelerate computation. +- Support dynamic input shapes(`batch`, `width`, `height`). +- Support dynamic confidence(`DynConf`) for each class in Detection task. + +## Quick Start + +```shell +cargo run -r --example yolov8 +``` + +## Or you can manully + +### 1. Export `YOLOv8` ONNX Models + +```bash +pip install -U ultralytics + +# export onnx model with dynamic shapes +yolo export model=yolov8m.pt format=onnx simplify dynamic +yolo export model=yolov8m-cls.pt format=onnx simplify dynamic +yolo export model=yolov8m-pose.pt format=onnx simplify dynamic +yolo export model=yolov8m-seg.pt format=onnx simplify dynamic + +# export onnx model with fixed shapes +yolo export model=yolov8m.pt format=onnx simplify +yolo export model=yolov8m-cls.pt format=onnx simplify +yolo export model=yolov8m-pose.pt format=onnx simplify +yolo export model=yolov8m-seg.pt format=onnx simplify +``` + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH") // <= modify this + .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_saveout("YOLOv8"); +let mut model = YOLO::new(&options)?; +``` + +### 3. Then, run + +``` +cargo run -r --example yolov8 +``` + +## Result + +| Task | Annotated image | +| :-------------------: | --------------------- | +| Instance Segmentation | ![img](./demo-seg.jpg) | +| Classification | ![img](./demo-cls.jpg) | +| Detection | ![img](./demo-det.jpg) | +| Pose | ![img](./demo-pose.jpg) | diff --git a/examples/yolov8/demo-cls.jpg b/examples/yolov8/demo-cls.jpg new file mode 100644 index 0000000..55df7eb Binary files /dev/null and b/examples/yolov8/demo-cls.jpg differ diff --git a/examples/yolov8/demo-det.jpg b/examples/yolov8/demo-det.jpg new file mode 100644 index 0000000..35af574 Binary files /dev/null and b/examples/yolov8/demo-det.jpg differ diff --git a/examples/yolov8/demo-pose.jpg b/examples/yolov8/demo-pose.jpg new file mode 100644 index 0000000..efdc1fb Binary files /dev/null and b/examples/yolov8/demo-pose.jpg differ diff --git a/examples/yolov8/demo-seg.jpg b/examples/yolov8/demo-seg.jpg new file mode 100644 index 0000000..cd71b0f Binary files /dev/null and b/examples/yolov8/demo-seg.jpg differ diff --git a/examples/yolov8/main.rs b/examples/yolov8/main.rs new file mode 100644 index 0000000..d5df089 --- /dev/null +++ b/examples/yolov8/main.rs @@ -0,0 +1,29 @@ +use usls::{models::YOLO, DataLoader, Options, COCO_SKELETON_17}; + +fn main() -> Result<(), Box> { + // 1.build model + let options = Options::default() + .with_model("../models/yolov8m-dyn-f16.onnx") + .with_trt(0) // cuda by default + .with_fp16(true) + .with_i00((1, 1, 4).into()) + .with_i02((416, 640, 800).into()) + .with_i03((416, 640, 800).into()) + .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_profile(true) + .with_dry_run(5) + .with_skeletons(&COCO_SKELETON_17) + .with_saveout("YOLOv8"); + let mut model = YOLO::new(&options)?; + + // 2.build dataloader + let dl = DataLoader::default() + .with_batch(1) + .load("./assets/bus.jpg")?; + + // 3.run + for (xs, _paths) in dl { + let _y = model.run(&xs)?; + } + Ok(()) +} diff --git a/examples/yolov9/README.md b/examples/yolov9/README.md new file mode 100644 index 0000000..5ce2bfb --- /dev/null +++ b/examples/yolov9/README.md @@ -0,0 +1,45 @@ +## Quick Start + +```shell +cargo run -r --example yolov9 +``` + +## Or you can manully + +### 1. Donwload or Export ONNX Model + +- **Download** + + [yolov9-c-dyn-fp16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov9-c-dyn-f16.onnx) +- **Export** + + ```shell + # clone repo and install dependencies + git clone https://github.com/WongKinYiu/yolov9.git + cd yolov9 + pip install -r requirements.txt + + # donwload `pt` weights + wget https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c.pt + + # export ONNX model + python export.py --weights yolov9-c.pt --include onnx --simplify --dynamic + ``` + +### 2. Specify the ONNX model path in `main.rs` + +```Rust +let options = Options::default() + .with_model("ONNX_PATH") // <= modify this + .with_saveout("YOLOv9"); +``` + +### 3. Run + +``` +cargo run -r --example yolov9 +``` + +## Results + +![](./demo.jpg) diff --git a/examples/yolov9/demo.jpg b/examples/yolov9/demo.jpg new file mode 100644 index 0000000..6c9f6b2 Binary files /dev/null and b/examples/yolov9/demo.jpg differ diff --git a/examples/yolov9/main.rs b/examples/yolov9/main.rs new file mode 100644 index 0000000..0c6ed85 --- /dev/null +++ b/examples/yolov9/main.rs @@ -0,0 +1,22 @@ +use usls::{models::YOLO, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // build model + let options = Options::default() + .with_model("../models/yolov9-c-dyn-f16.onnx") + .with_i00((1, 1, 4).into()) + .with_i02((416, 640, 800).into()) + .with_i03((416, 640, 800).into()) + .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15 + .with_saveout("YOLOv9") + .with_profile(false); + let mut model = YOLO::new(&options)?; + + // build dataloader + let mut dl = DataLoader::default().load("./assets/bus.jpg")?; + + // run + model.run(&dl.next().unwrap().0)?; + + Ok(()) +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..6d833ff --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "1.75" diff --git a/src/annotator.rs b/src/annotator.rs new file mode 100644 index 0000000..4d3337c --- /dev/null +++ b/src/annotator.rs @@ -0,0 +1,228 @@ +use anyhow::Result; +use image::{ImageBuffer, RgbImage}; + +use crate::{auto_load, string_now, Results, CHECK_MARK, CROSS_MARK}; + +#[derive(Debug)] +pub struct Annotator { + font: rusttype::Font<'static>, + skeletons: Option>, + hide_conf: bool, +} + +impl Default for Annotator { + fn default() -> Self { + Self { + font: Self::load_font(None).unwrap(), + skeletons: None, + hide_conf: false, + } + } +} + +impl Annotator { + pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self { + self.skeletons = Some(skeletons.to_vec()); + self + } + + pub fn with_font(mut self, path: &str) -> Self { + self.font = Self::load_font(Some(path)).unwrap(); + self + } + + pub fn save(&self, image: &RgbImage, saveout: &str) { + let mut saveout = std::path::PathBuf::from("runs").join(saveout); + if !saveout.exists() { + std::fs::create_dir_all(&saveout).unwrap(); + } + saveout.push(string_now("-")); + let saveout = format!("{}.jpg", saveout.to_str().unwrap()); + match image.save(&saveout) { + Err(err) => println!("{} Saving failed: {:?}", CROSS_MARK, err), + Ok(_) => println!("{} Annotated image saved at: {}", CHECK_MARK, saveout), + } + } + + fn load_font(path: Option<&str>) -> Result> { + let path_font = match path { + None => auto_load("Arial.ttf")?, + Some(p) => p.into(), + }; + let buffer = std::fs::read(path_font)?; + Ok(rusttype::Font::try_from_vec(buffer).unwrap()) + } + + pub fn get_color(&self, n: usize) -> (u8, u8, u8) { + Self::color_palette()[n % Self::color_palette().len()] + } + + pub fn plot(&self, img: &mut RgbImage, y: &Results) { + // masks and polygons + if let Some(masks) = y.masks() { + for mask in masks.iter() { + let mask_nd: ImageBuffer, Vec> = + ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec()) + .expect("can not crate image from ndarray"); + // masks + for _x in 0..img.width() { + for _y in 0..img.height() { + let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y); + if mask_p.0[0] > 0 { + let mut img_p = imageproc::drawing::Canvas::get_pixel(img, _x, _y); + img_p.0[0] /= 2; + img_p.0[1] = 255 - (255 - img_p.0[1]) / 3; + img_p.0[2] /= 2; + imageproc::drawing::Canvas::draw_pixel(img, _x, _y, img_p) + } + } + } + // contours + let contours: Vec> = + imageproc::contours::find_contours(&mask_nd); + for contour in contours.iter() { + for point in contour.points.iter() { + imageproc::drawing::draw_filled_circle_mut( + img, + (point.x, point.y), + 1, + image::Rgb([255, 255, 255]), + ); + } + } + } + } + + // probs + if let Some(probs) = y.probs() { + let topk = 5usize; + let (x, mut y) = (img.width() as i32 / 20, img.height() as i32 / 20); + for k in probs.topk(topk).iter() { + let legend = format!("{}: {:.2}", k.2.as_ref().unwrap_or(&k.0.to_string()), k.1); + let scale = img.width().max(img.height()) as f32 / 30.0; + let scale = rusttype::Scale::uniform(scale); + let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend); + y += text_h; + imageproc::drawing::draw_filled_rect_mut( + img, + imageproc::rect::Rect::at(x, y).of_size(text_w as u32, text_h as u32), + image::Rgb(self.get_color(k.0).into()), + ); + imageproc::drawing::draw_text_mut( + img, + image::Rgb((0, 0, 0).into()), + x, + y, + scale, + &self.font, + &legend, + ); + } + } + + // bboxes + if let Some(bboxes) = y.bboxes() { + for bbox in bboxes.iter() { + imageproc::drawing::draw_hollow_rect_mut( + img, + imageproc::rect::Rect::at(bbox.xmin() as i32, bbox.ymin() as i32) + .of_size(bbox.width() as u32, bbox.height() as u32), + image::Rgb(self.get_color(bbox.id()).into()), + ); + let legend = if self.hide_conf { + bbox.name().unwrap_or(&bbox.id().to_string()).to_string() + } else { + format!( + "{}: {:.4}", + bbox.name().unwrap_or(&bbox.id().to_string()), + bbox.confidence() + ) + }; + let scale = img.width().max(img.height()) as f32 / 45.0; + let scale = rusttype::Scale::uniform(scale); + let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend); + let text_y = if bbox.ymin() as i32 > text_h { + bbox.ymin() as i32 - text_h + } else { + text_h - bbox.ymin() as i32 + }; + imageproc::drawing::draw_filled_rect_mut( + img, + imageproc::rect::Rect::at(bbox.xmin() as i32, text_y) + .of_size(text_w as u32, text_h as u32), + image::Rgb(self.get_color(bbox.id()).into()), + ); + imageproc::drawing::draw_text_mut( + img, + image::Rgb((0, 0, 0).into()), + bbox.xmin() as i32, + text_y, + scale, + &self.font, + &legend, + ); + } + } + + // keypoints + if let Some(keypoints) = y.keypoints() { + let radius = 3; + for kpts in keypoints.iter() { + for (i, kpt) in kpts.iter().enumerate() { + if kpt.confidence() == 0.0 { + continue; + } + // draw point + imageproc::drawing::draw_filled_circle_mut( + img, + (kpt.x() as i32, kpt.y() as i32), + radius, + image::Rgb(self.get_color(i + 10).into()), + ); + } + + // draw skeleton + if let Some(skeletons) = &self.skeletons { + for &(i, ii) in skeletons.iter() { + let kpt1 = &kpts[i]; + let kpt2 = &kpts[ii]; + if kpt1.confidence() == 0.0 || kpt2.confidence() == 0.0 { + continue; + } + imageproc::drawing::draw_line_segment_mut( + img, + (kpt1.x(), kpt1.y()), + (kpt2.x(), kpt2.y()), + image::Rgb([255, 51, 255]), + ); + } + } + } + } + } + + fn color_palette() -> Vec<(u8, u8, u8)> { + vec![ + (0, 255, 0), + (255, 128, 0), + (0, 0, 255), + (255, 153, 51), + (255, 0, 0), + (255, 51, 255), + (102, 178, 255), + (51, 153, 255), + (255, 51, 51), + (153, 255, 153), + (102, 255, 102), + (153, 204, 255), + (255, 153, 153), + (255, 178, 102), + (230, 230, 0), + (255, 153, 255), + (255, 102, 255), + (255, 102, 102), + (51, 255, 51), + (255, 255, 255), + ] + } +} diff --git a/src/bbox.rs b/src/bbox.rs new file mode 100644 index 0000000..98db2b6 --- /dev/null +++ b/src/bbox.rs @@ -0,0 +1,77 @@ +use crate::Rect; + +#[derive(Clone, PartialEq, Default)] +pub struct Bbox { + rect: Rect, + id: usize, + confidence: f32, + name: Option, +} + +impl std::fmt::Debug for Bbox { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Bbox") + .field("xmin", &self.rect.xmin()) + .field("ymin", &self.rect.ymin()) + .field("xmax", &self.rect.xmax()) + .field("ymax", &self.rect.ymax()) + .field("id", &self.id) + .field("name", &self.name) + .field("confidence", &self.confidence) + .finish() + } +} + +impl Bbox { + pub fn new(rect: Rect, id: usize, confidence: f32, name: Option) -> Self { + Self { + rect, + id, + confidence, + name, + } + } + + pub fn width(&self) -> f32 { + self.rect.width() + } + + pub fn height(&self) -> f32 { + self.rect.height() + } + + pub fn xmin(&self) -> f32 { + self.rect.xmin() + } + + pub fn ymin(&self) -> f32 { + self.rect.ymin() + } + + pub fn xmax(&self) -> f32 { + self.rect.xmax() + } + + pub fn ymax(&self) -> f32 { + self.rect.ymax() + } + + pub fn id(&self) -> usize { + self.id + } + pub fn name(&self) -> Option<&String> { + self.name.as_ref() + } + + pub fn confidence(&self) -> f32 { + self.confidence + } + + pub fn area(&self) -> f32 { + self.rect.area() + } + + pub fn iou(&self, other: &Bbox) -> f32 { + self.rect.intersect(&other.rect) / self.rect.union(&other.rect) + } +} diff --git a/src/dataloader.rs b/src/dataloader.rs new file mode 100644 index 0000000..e0a769a --- /dev/null +++ b/src/dataloader.rs @@ -0,0 +1,120 @@ +use crate::{CHECK_MARK, CROSS_MARK, SAFE_CROSS_MARK}; +use anyhow::Result; +use image::DynamicImage; +use std::collections::VecDeque; +use std::path::{Path, PathBuf}; +use walkdir::{DirEntry, WalkDir}; + +#[derive(Debug, Clone)] +pub struct DataLoader { + // source could be single image, folder with images (TODO: video, stream) + pub source: PathBuf, + pub batch: usize, + pub recursive: bool, + pub paths: VecDeque, +} + +impl Iterator for DataLoader { + type Item = (Vec, Vec); + + fn next(&mut self) -> Option { + if self.paths.is_empty() { + None + } else { + let mut yis: Vec = Vec::new(); + let mut yps: Vec = Vec::new(); + loop { + let path = self.paths.pop_front().unwrap(); + match image::io::Reader::open(&path) { + Err(err) => { + println!( + "{SAFE_CROSS_MARK} Faild to load image: {:?} -> {:?}", + self.paths[0], err + ); + } + Ok(p) => match p.decode() { + Err(err) => { + println!( + "{SAFE_CROSS_MARK} Fail to load image: {:?} -> {:?}", + self.paths[0], err + ); + } + Ok(x) => { + yis.push(x); + yps.push(path); + } + }, + } + if self.paths.is_empty() || yis.len() == self.batch { + break; + } + } + Some((yis, yps)) + } + } +} + +impl Default for DataLoader { + fn default() -> Self { + Self { + batch: 1, + recursive: false, + source: Default::default(), + paths: Default::default(), + } + } +} + +impl DataLoader { + pub fn load>(&self, source: P) -> Result { + let source = source.as_ref(); + let mut paths = VecDeque::new(); + + match source { + s if s.is_file() => paths.push_back(s.to_path_buf()), + s if s.is_dir() => { + for entry in WalkDir::new(s) + .into_iter() + .filter_entry(|e| !Self::_is_hidden(e)) + { + let entry = entry.unwrap(); + if entry.file_type().is_dir() { + continue; + } + if !self.recursive && entry.depth() > 1 { + continue; + } + paths.push_back(entry.path().to_path_buf()); + } + } + // s if s.starts_with("rtsp://") || s.starts_with("rtmp://") || s.starts_with("http://")|| s.starts_with("https://") => todo!(), + s if !s.exists() => panic!("{CROSS_MARK} File not found: {s:?}"), + _ => todo!(), + } + println!("{CHECK_MARK} {} files found\n", &paths.len()); + Ok(Self { + paths, + source: source.into(), + batch: self.batch, + recursive: self.recursive, + }) + } + + pub fn with_batch(mut self, x: usize) -> Self { + self.batch = x; + self + } + + pub fn with_recursive(mut self, x: bool) -> Self { + self.recursive = x; + self + } + + fn _is_hidden(entry: &DirEntry) -> bool { + entry + .file_name() + .to_str() + .map(|s| s.starts_with('.')) + .unwrap_or(false) + } +} diff --git a/src/device.rs b/src/device.rs new file mode 100644 index 0000000..3181bd4 --- /dev/null +++ b/src/device.rs @@ -0,0 +1,13 @@ +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Device { + Cpu(usize), + Cuda(usize), + Trt(usize), + CoreML(usize), + Cann(usize), + // Acl(usize), + // Rocm(usize), + // Rknpu(usize), + // Openvino(usize), + // Onednn(usize), +} diff --git a/src/dynconf.rs b/src/dynconf.rs new file mode 100644 index 0000000..8dccdf0 --- /dev/null +++ b/src/dynconf.rs @@ -0,0 +1,54 @@ +use std::ops::Index; + +#[derive(Clone, PartialEq, PartialOrd)] +pub struct DynConf { + confs: Vec, +} + +impl Default for DynConf { + fn default() -> Self { + Self { + confs: vec![0.4f32], + } + } +} + +impl std::fmt::Debug for DynConf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("").field("DynConf", &self.confs).finish() + } +} + +impl std::fmt::Display for DynConf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.confs.iter()).finish() + } +} + +impl Index for DynConf { + type Output = f32; + + fn index(&self, i: usize) -> &Self::Output { + &self.confs[i] + } +} + +impl DynConf { + pub fn new(confs: &[f32], n: usize) -> Self { + if confs.is_empty() && n != 0 { + panic!("Error: No value found in confs") + } + let confs = if confs.len() >= n { + confs[..n].to_vec() + } else { + let val = confs.last().unwrap(); + let mut confs = confs.to_vec(); + for _ in 0..(n - confs.len()) { + confs.push(*val); + } + confs + }; + + Self { confs } + } +} diff --git a/src/embedding.rs b/src/embedding.rs new file mode 100644 index 0000000..714272f --- /dev/null +++ b/src/embedding.rs @@ -0,0 +1,51 @@ +use ndarray::{Array, Axis, IxDyn}; + +#[derive(Clone, PartialEq, Default)] +pub struct Embedding { + data: Array, + names: Option>, +} + +impl std::fmt::Debug for Embedding { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("").field("Top5", &self.topk(5)).finish() + } +} + +impl Embedding { + pub fn new(data: Array, names: Option>) -> Self { + Self { data, names } + } + + pub fn data(&self) -> &Array { + &self.data + } + + pub fn topk(&self, k: usize) -> Vec<(usize, f32, Option)> { + let mut probs = self + .data + .iter() + .enumerate() + .map(|(a, b)| (a, *b)) + .collect::>(); + probs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + let mut topk = Vec::new(); + for &(id, confidence) in probs.iter().take(k) { + topk.push(( + id, + confidence, + self.names.as_ref().map(|names| names[id].to_owned()), + )); + } + topk + } + + pub fn norm(&self) -> Array { + let std_ = self.data.mapv(|x| x * x).sum_axis(Axis(0)).mapv(f32::sqrt); + self.data.clone() / std_ + } + + pub fn top1(&self) -> (usize, f32, Option) { + self.topk(1)[0].to_owned() + } +} diff --git a/src/engine.rs b/src/engine.rs new file mode 100644 index 0000000..59f5d6f --- /dev/null +++ b/src/engine.rs @@ -0,0 +1,370 @@ +use anyhow::Result; +use half::f16; +use ndarray::{Array, IxDyn}; +use ort::{ + ExecutionProvider, ExecutionProviderDispatch, Session, SessionBuilder, TensorElementType, + TensorRTExecutionProvider, ValueType, +}; + +use crate::{config_dir, Device, MinOptMax, Options, CHECK_MARK, CROSS_MARK, SAFE_CROSS_MARK}; + +#[derive(Debug)] +pub struct OrtEngine { + session: Session, + device: Device, + inputs_minoptmax: Vec>, + inames: Vec, + ishapes: Vec>, + idtypes: Vec, + onames: Vec, + oshapes: Vec>, + odtypes: Vec, + profile: bool, + num_dry_run: usize, +} + +impl OrtEngine { + pub fn dry_run(&self) -> Result<()> { + if self.num_dry_run == 0 { + println!("{SAFE_CROSS_MARK} No dry run count specified, skipping the dry run."); + return Ok(()); + } + let mut xs: Vec> = Vec::new(); + for i in self.inputs_minoptmax.iter() { + let mut x: Vec = Vec::new(); + for i_ in i.iter() { + x.push(i_.opt as usize); + } + let x: Array = Array::ones(x).into_dyn(); + xs.push(x); + } + for _ in 0..self.num_dry_run { + self.run(xs.as_ref())?; + } + println!("{CHECK_MARK} Dry run x{}", self.num_dry_run); + Ok(()) + } + + pub fn new(config: &Options) -> Result { + ort::init().commit()?; + let session = Session::builder()?.with_model_from_file(&config.onnx_path)?; + + // inputs + let mut ishapes = Vec::new(); + let mut idtypes = Vec::new(); + let mut inames = Vec::new(); + for x in session.inputs.iter() { + inames.push(x.name.to_owned()); + if let ValueType::Tensor { ty, dimensions } = &x.input_type { + ishapes.push(dimensions.iter().map(|x| *x as isize).collect::>()); + idtypes.push(*ty); + } else { + ishapes.push(vec![-1_isize]); + idtypes.push(ort::TensorElementType::Float32); + } + } + // outputs + let mut oshapes = Vec::new(); + let mut odtypes = Vec::new(); + let mut onames = Vec::new(); + for x in session.outputs.iter() { + onames.push(x.name.to_owned()); + if let ValueType::Tensor { ty, dimensions } = &x.output_type { + oshapes.push(dimensions.iter().map(|x| *x as isize).collect::>()); + odtypes.push(*ty); + } else { + oshapes.push(vec![-1_isize]); + odtypes.push(ort::TensorElementType::Float32); + } + } + let mut inputs_minoptmax: Vec> = Vec::new(); + for (i, dims) in ishapes.iter().enumerate() { + let mut v_: Vec = Vec::new(); + for (ii, &x) in dims.iter().enumerate() { + let x_default: MinOptMax = (ishapes[i][ii], ishapes[i][ii], ishapes[i][ii]).into(); + let x: MinOptMax = match (i, ii) { + (0, 0) => Self::_set_ixx(x, &config.i00, i, ii).unwrap_or(x_default), + (0, 1) => Self::_set_ixx(x, &config.i01, i, ii).unwrap_or(x_default), + (0, 2) => Self::_set_ixx(x, &config.i02, i, ii).unwrap_or(x_default), + (0, 3) => Self::_set_ixx(x, &config.i03, i, ii).unwrap_or(x_default), + (0, 4) => Self::_set_ixx(x, &config.i04, i, ii).unwrap_or(x_default), + (0, 5) => Self::_set_ixx(x, &config.i05, i, ii).unwrap_or(x_default), + (1, 0) => Self::_set_ixx(x, &config.i10, i, ii).unwrap_or(x_default), + (1, 1) => Self::_set_ixx(x, &config.i11, i, ii).unwrap_or(x_default), + (1, 2) => Self::_set_ixx(x, &config.i12, i, ii).unwrap_or(x_default), + (1, 3) => Self::_set_ixx(x, &config.i13, i, ii).unwrap_or(x_default), + (1, 4) => Self::_set_ixx(x, &config.i14, i, ii).unwrap_or(x_default), + (1, 5) => Self::_set_ixx(x, &config.i15, i, ii).unwrap_or(x_default), + (2, 0) => Self::_set_ixx(x, &config.i20, i, ii).unwrap_or(x_default), + (2, 1) => Self::_set_ixx(x, &config.i21, i, ii).unwrap_or(x_default), + (2, 2) => Self::_set_ixx(x, &config.i22, i, ii).unwrap_or(x_default), + (2, 3) => Self::_set_ixx(x, &config.i23, i, ii).unwrap_or(x_default), + (2, 4) => Self::_set_ixx(x, &config.i24, i, ii).unwrap_or(x_default), + (2, 5) => Self::_set_ixx(x, &config.i25, i, ii).unwrap_or(x_default), + (3, 0) => Self::_set_ixx(x, &config.i30, i, ii).unwrap_or(x_default), + (3, 1) => Self::_set_ixx(x, &config.i31, i, ii).unwrap_or(x_default), + (3, 2) => Self::_set_ixx(x, &config.i32_, i, ii).unwrap_or(x_default), + (3, 3) => Self::_set_ixx(x, &config.i33, i, ii).unwrap_or(x_default), + (3, 4) => Self::_set_ixx(x, &config.i34, i, ii).unwrap_or(x_default), + (3, 5) => Self::_set_ixx(x, &config.i35, i, ii).unwrap_or(x_default), + _ => todo!(), + }; + v_.push(x); + } + inputs_minoptmax.push(v_); + } + + // build again + let builder = Session::builder()?; + let device = config.device.to_owned(); + let _ep = match device { + Device::Trt(device_id) => Self::build_trt( + &inames, + &inputs_minoptmax, + &builder, + device_id, + config.trt_int8_enable, + config.trt_fp16_enable, + config.trt_engine_cache_enable, + )?, + Device::Cuda(device_id) => Self::build_cuda(&builder, device_id)?, + Device::CoreML(_) => { + let coreml = ort::CoreMLExecutionProvider::default() + .with_subgraphs() + // .with_ane_only() + .build(); + if coreml.is_available()? && coreml.register(&builder).is_ok() { + println!("{CHECK_MARK} Using CoreML"); + coreml + } else { + println!("{CROSS_MARK} CoreML initialization failed"); + println!("{CHECK_MARK} Using CPU"); + ort::CPUExecutionProvider::default().build() + } + } + Device::Cpu(_) => { + println!("{CHECK_MARK} Using CPU"); + ort::CPUExecutionProvider::default().build() + } + _ => todo!(), + }; + let session = builder + .with_optimization_level(ort::GraphOptimizationLevel::Level3)? + .with_model_from_file(&config.onnx_path)?; + + Ok(Self { + session, + device, + inputs_minoptmax, + inames, + ishapes, + idtypes, + onames, + oshapes, + odtypes, + profile: config.profile, + num_dry_run: config.num_dry_run, + }) + } + + fn build_trt( + inames: &[String], + inputs_minoptmax: &[Vec], + builder: &SessionBuilder, + device_id: usize, + int8_enable: bool, + fp16_enable: bool, + engine_cache_enable: bool, + ) -> Result { + // auto generate shapes + let mut spec_min = String::new(); + let mut spec_opt = String::new(); + let mut spec_max = String::new(); + for (i, name) in inames.iter().enumerate() { + if i != 0 { + spec_min.push(','); + spec_opt.push(','); + spec_max.push(','); + } + let mut s_min = format!("{}:", name); + let mut s_opt = format!("{}:", name); + let mut s_max = format!("{}:", name); + for d in inputs_minoptmax[i].iter() { + let min_ = &format!("{}x", d.min); + let opt_ = &format!("{}x", d.opt); + let max_ = &format!("{}x", d.max); + s_min += min_; + s_opt += opt_; + s_max += max_; + } + s_min.pop(); + s_opt.pop(); + s_max.pop(); + spec_min += &s_min; + spec_opt += &s_opt; + spec_max += &s_max; + } + let trt = TensorRTExecutionProvider::default() + .with_device_id(device_id as i32) + .with_int8(int8_enable) + .with_fp16(fp16_enable) + .with_engine_cache(engine_cache_enable) + // .with_engine_cache_path(config_dir().to_str().unwrap()) + .with_engine_cache_path(format!( + "{}/{}", + config_dir().to_str().unwrap(), + "trt-cache" + )) + .with_timing_cache(false) + .with_profile_min_shapes(spec_min) + .with_profile_opt_shapes(spec_opt) + .with_profile_max_shapes(spec_max) + .build(); + if trt.is_available()? && trt.register(builder).is_ok() { + println!("{CHECK_MARK} Using TensorRT"); + Ok(trt) + } else { + println!("{CROSS_MARK} TensorRT initialization failed. Try CUDA..."); + Self::build_cuda(builder, device_id) + } + } + + fn build_cuda(builder: &SessionBuilder, device_id: usize) -> Result { + let cuda = ort::CUDAExecutionProvider::default() + .with_device_id(device_id as i32) + .build(); + if cuda.is_available()? && cuda.register(builder).is_ok() { + println!("{CHECK_MARK} Using CUDA"); + Ok(cuda) + } else { + println!("{CROSS_MARK} CUDA initialization failed"); + println!("{CHECK_MARK} Using CPU"); + Ok(ort::CPUExecutionProvider::default().build()) + } + } + + pub fn run(&self, xs: &[Array]) -> Result>> { + // input + let mut xs_ = Vec::new(); + let t_pre = std::time::Instant::now(); + for (idtype, x) in self.idtypes.iter().zip(xs.iter()) { + let x_ = match idtype { + TensorElementType::Float32 => ort::Value::from_array(x.view())?, + TensorElementType::Float16 => ort::Value::from_array(x.mapv(f16::from_f32).view())?, + TensorElementType::Int32 => ort::Value::from_array(x.mapv(|x_| x_ as i32).view())?, + TensorElementType::Int64 => ort::Value::from_array(x.mapv(|x_| x_ as i64).view())?, + _ => todo!(), + }; + xs_.push(x_); + } + let t_pre = t_pre.elapsed(); + + // inference + let t_run = std::time::Instant::now(); + let ys = self.session.run(xs_.as_ref())?; + let t_run = t_run.elapsed(); + + // oputput + let mut ys_ = Vec::new(); + let t_post = std::time::Instant::now(); + for ((_, y), dtype) in ys.iter().zip(self.odtypes.iter()) { + let y_ = match &dtype { + TensorElementType::Float32 => y.extract_tensor::()?.view().to_owned(), + TensorElementType::Float16 => y.extract_tensor::()?.view().mapv(f16::to_f32), + _ => todo!(), + }; + ys_.push(y_); + } + let t_post = t_post.elapsed(); + if self.profile { + println!( + "[Profile] batch: {:?} => {:.4?} (i: {t_pre:.4?}, run: {t_run:.4?}, o: {t_post:.4?})", + self.batch().opt, + t_pre + t_run + t_post + ); + } + Ok(ys_) + } + + pub fn _set_ixx(x: isize, ixx: &Option, i: usize, ii: usize) -> Option { + match x { + -1 => { + match ixx { + None => panic!( + "{CROSS_MARK} Using dynamic shapes in inputs without specifying it: the {}-th input, the {}-th dimension.", + i + 1, + ii + 1 + ), + Some(ixx) => Some(ixx.to_owned()), // customized + } + } + _ => Some((x, x, x).into()), // customized, but not dynamic + } + } + + pub fn oshapes(&self) -> &Vec> { + &self.oshapes + } + + pub fn onames(&self) -> &Vec { + &self.onames + } + + pub fn odtypes(&self) -> &Vec { + &self.odtypes + } + + pub fn ishapes(&self) -> &Vec> { + &self.ishapes + } + + pub fn inames(&self) -> &Vec { + &self.inames + } + + pub fn idtypes(&self) -> &Vec { + &self.idtypes + } + + pub fn device(&self) -> &Device { + &self.device + } + + pub fn inputs_minoptmax(&self) -> &Vec> { + &self.inputs_minoptmax + } + + pub fn batch(&self) -> &MinOptMax { + &self.inputs_minoptmax[0][0] + } + + pub fn height(&self) -> &MinOptMax { + &self.inputs_minoptmax[0][2] + } + + pub fn width(&self) -> &MinOptMax { + &self.inputs_minoptmax[0][2] + } + + pub fn is_batch_dyn(&self) -> bool { + self.ishapes[0][0] == -1 + } + + pub fn try_fetch(&self, key: &str) -> Option { + match self.session.metadata() { + Err(_) => None, + Ok(metadata) => match metadata.custom(key) { + Err(_) => None, + Ok(value) => value, + }, + } + } + + pub fn session(&self) -> &Session { + &self.session + } + + pub fn version(&self) -> Option { + self.try_fetch("version") + } +} diff --git a/src/keypoint.rs b/src/keypoint.rs new file mode 100644 index 0000000..06e386d --- /dev/null +++ b/src/keypoint.rs @@ -0,0 +1,35 @@ +use crate::Point; + +#[derive(PartialEq, Clone, Default)] +pub struct Keypoint { + pub point: Point, + confidence: f32, +} + +impl std::fmt::Debug for Keypoint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Keypoint") + .field("x", &self.point.x) + .field("y", &self.point.y) + .field("confidence", &self.confidence) + .finish() + } +} + +impl Keypoint { + pub fn new(point: Point, confidence: f32) -> Self { + Self { point, confidence } + } + + pub fn x(&self) -> f32 { + self.point.x + } + + pub fn y(&self) -> f32 { + self.point.y + } + + pub fn confidence(&self) -> f32 { + self.confidence + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..741f29b --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,43 @@ +mod annotator; +mod bbox; +mod dataloader; +mod device; +mod dynconf; +mod embedding; +mod engine; +mod keypoint; +mod min_opt_max; +pub mod models; +pub mod ops; +mod options; +mod point; +mod rect; +mod results; +mod rotated_rect; +mod tokenizer_stream; +mod utils; + +pub use annotator::Annotator; +pub use bbox::Bbox; +pub use dataloader::DataLoader; +pub use device::Device; +pub use dynconf::DynConf; +pub use embedding::Embedding; +pub use engine::OrtEngine; +pub use keypoint::Keypoint; +pub use min_opt_max::MinOptMax; +pub use options::Options; +pub use point::Point; +pub use rect::Rect; +pub use results::Results; +pub use rotated_rect::RotatedRect; +pub use tokenizer_stream::TokenizerStream; +pub use utils::{ + auto_load, config_dir, download, non_max_suppression, string_now, COCO_NAMES_80, + COCO_SKELETON_17, +}; + +const GITHUB_ASSETS: &str = "https://github.com/jamjamjon/assets/releases/download/v0.0.1"; +const CHECK_MARK: &str = "✅"; +const CROSS_MARK: &str = "❌"; +const SAFE_CROSS_MARK: &str = "❎"; diff --git a/src/min_opt_max.rs b/src/min_opt_max.rs new file mode 100644 index 0000000..b79d0c3 --- /dev/null +++ b/src/min_opt_max.rs @@ -0,0 +1,42 @@ +#[derive(Debug, Clone)] +pub struct MinOptMax { + pub min: isize, + pub opt: isize, + pub max: isize, +} + +impl Default for MinOptMax { + fn default() -> Self { + Self { + min: -1, + opt: -1, + max: -1, + } + } +} + +impl From<(isize, isize, isize)> for MinOptMax { + fn from((min, opt, max): (isize, isize, isize)) -> Self { + let min = min.min(opt); + let max = max.max(opt); + Self { min, opt, max } + } +} + +impl From<[isize; 3]> for MinOptMax { + fn from([min, opt, max]: [isize; 3]) -> Self { + let min = min.min(opt); + let max = max.max(opt); + Self { min, opt, max } + } +} + +impl MinOptMax { + pub fn new(opt: isize) -> Self { + Self { + min: opt, + opt, + max: opt, + } + } +} diff --git a/src/models/blip.rs b/src/models/blip.rs new file mode 100644 index 0000000..f814d47 --- /dev/null +++ b/src/models/blip.rs @@ -0,0 +1,135 @@ +use anyhow::Result; +use image::DynamicImage; +use ndarray::{s, Array, Axis, IxDyn}; +use std::io::Write; +use tokenizers::Tokenizer; + +use crate::{auto_load, ops, MinOptMax, Options, OrtEngine, TokenizerStream}; + +#[derive(Debug)] +pub struct Blip { + pub textual: OrtEngine, + pub visual: OrtEngine, + pub height: MinOptMax, + pub width: MinOptMax, + pub batch_visual: MinOptMax, + pub batch_textual: MinOptMax, + tokenizer: TokenizerStream, +} + +impl Blip { + pub fn new(options_visual: Options, options_textual: Options) -> Result { + let visual = OrtEngine::new(&options_visual)?; + let textual = OrtEngine::new(&options_textual)?; + let (batch_visual, batch_textual, height, width) = ( + visual.batch().to_owned(), + textual.batch().to_owned(), + visual.height().to_owned(), + visual.width().to_owned(), + ); + let tokenizer = match &options_textual.tokenizer { + None => auto_load("tokenizer-blip.json")?, + Some(tokenizer) => tokenizer.into(), + }; + let tokenizer = Tokenizer::from_file(tokenizer).unwrap(); + let tokenizer = TokenizerStream::new(tokenizer); + visual.dry_run()?; + textual.dry_run()?; + Ok(Self { + textual, + visual, + batch_visual, + batch_textual, + height, + width, + tokenizer, + }) + } + + pub fn encode_images(&self, xs: &[DynamicImage]) -> Result> { + let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?; + let ys: Vec> = self.visual.run(&[xs_])?; + let ys = ys[0].to_owned(); + Ok(ys) + } + + pub fn caption(&mut self, path: &str, prompt: Option<&str>) -> Result<()> { + // this demo use batch_size=1 + let x = image::io::Reader::open(path)?.decode()?; + let image_embeds = self.encode_images(&[x])?; + let image_embeds_attn_mask: Array = + Array::ones((1, image_embeds.shape()[1])).into_dyn(); + + // conditional + let mut input_ids = match prompt { + None => { + print!("[Unconditional image captioning]: "); + vec![0.0f32] + } + + Some(prompt) => { + let encodings = self.tokenizer.tokenizer().encode(prompt, false); + let ids: Vec = encodings + .unwrap() + .get_ids() + .iter() + .map(|x| *x as f32) + .collect(); + print!("[Conditional image captioning]: {} ", prompt); + ids + } + }; + loop { + let input_ids_nd: Array = Array::from_vec(input_ids.to_owned()).into_dyn(); + let input_ids_nd = input_ids_nd.insert_axis(Axis(0)); + let input_ids_attn_mask: Array = + Array::ones(input_ids_nd.shape()).into_dyn(); + let y = self.textual.run(&[ + input_ids_nd, + input_ids_attn_mask, + image_embeds.to_owned(), + image_embeds_attn_mask.to_owned(), + ])?; // N, length, vocab_size + let y = y[0].to_owned(); + let y = y.slice(s!(0, -1.., ..)); + + // softmax + let exps = y.mapv(|c| c.exp()); + let stds = exps.sum_axis(Axis(1)); + let probs = exps / stds.insert_axis(Axis(1)); + let probs = probs.slice(s!(0, ..)); + + // argmax + let (token_id, _) = probs + .into_iter() + .enumerate() + .reduce(|max, x| if x.1 > max.1 { x } else { max }) + .unwrap(); + input_ids.push(token_id as f32); + + // SEP + if token_id == 102 { + break; + } + + // streaming generation + if let Some(t) = self.tokenizer.next_token(token_id as u32)? { + print!("{t}"); + std::io::stdout().flush()?; + } + // sleep for test + std::thread::sleep(std::time::Duration::from_millis(10)); + } + println!(); + self.tokenizer.clear(); + Ok(()) + } + + pub fn batch_visual(&self) -> usize { + self.batch_visual.opt as usize + } + + pub fn batch_textual(&self) -> usize { + self.batch_textual.opt as usize + } +} diff --git a/src/models/clip.rs b/src/models/clip.rs new file mode 100644 index 0000000..8972b30 --- /dev/null +++ b/src/models/clip.rs @@ -0,0 +1,105 @@ +use crate::{auto_load, ops, MinOptMax, Options, OrtEngine}; +use anyhow::Result; +use image::DynamicImage; +use itertools::Itertools; +use ndarray::{Array, Array2, Axis, IxDyn}; +use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer}; + +#[derive(Debug)] +pub struct Clip { + pub textual: OrtEngine, + pub visual: OrtEngine, + pub height: MinOptMax, + pub width: MinOptMax, + pub batch_visual: MinOptMax, + pub batch_textual: MinOptMax, + tokenizer: Tokenizer, + context_length: usize, +} + +impl Clip { + pub fn new(options_visual: Options, options_textual: Options) -> Result { + let context_length = 77; + let visual = OrtEngine::new(&options_visual)?; + let textual = OrtEngine::new(&options_textual)?; + let (batch_visual, batch_textual, height, width) = ( + visual.inputs_minoptmax()[0][0].to_owned(), + textual.inputs_minoptmax()[0][0].to_owned(), + visual.inputs_minoptmax()[0][2].to_owned(), + visual.inputs_minoptmax()[0][3].to_owned(), + ); + let tokenizer = match &options_textual.tokenizer { + None => auto_load("tokenizer-clip.json").unwrap(), + Some(tokenizer) => tokenizer.into(), + }; + let mut tokenizer = Tokenizer::from_file(tokenizer).unwrap(); + tokenizer.with_padding(Some(PaddingParams { + strategy: PaddingStrategy::Fixed(context_length), + direction: PaddingDirection::Right, + pad_to_multiple_of: None, + pad_id: 0, + pad_type_id: 0, + pad_token: "[PAD]".to_string(), + })); + + visual.dry_run()?; + textual.dry_run()?; + + Ok(Self { + textual, + visual, + batch_visual, + batch_textual, + height, + width, + tokenizer, + context_length, + }) + } + + pub fn encode_images(&self, xs: &[DynamicImage]) -> Result> { + let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?; + let ys: Vec> = self.visual.run(&[xs_])?; + let ys = ys[0].to_owned(); + Ok(ys) + } + + pub fn encode_texts(&self, texts: &[String]) -> Result> { + let encodings = self + .tokenizer + .encode_batch(texts.to_owned(), false) + .unwrap(); + let xs: Vec = encodings + .iter() + .map(|i| i.get_ids().iter().map(|b| *b as f32).collect()) + .concat(); + let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn(); + let ys = self.textual.run(&[xs])?; + let ys = ys[0].to_owned(); + // let ys = ops::norm(&ys); + Ok(ys) + } + + pub fn get_similarity( + &self, + images_feats: &Array, + texts_feats: &Array, + ) -> Result>> { + let images_feats = images_feats.clone().into_dimensionality::()?; + let texts_feats = texts_feats.clone().into_dimensionality::()?; + let matrix = images_feats.dot(&texts_feats.t()); // [M, N] + let exps = matrix.mapv(|x| x.exp()); //[M, N] + let stds = exps.sum_axis(Axis(1)); //[M, 1] + let matrix = exps / stds.insert_axis(Axis(1)); // [M, N] + let similarity: Vec> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect(); + Ok(similarity) + } + + pub fn batch_visual(&self) -> usize { + self.batch_visual.opt as usize + } + + pub fn batch_textual(&self) -> usize { + self.batch_textual.opt as usize + } +} diff --git a/src/models/dinov2.rs b/src/models/dinov2.rs new file mode 100644 index 0000000..ae8721d --- /dev/null +++ b/src/models/dinov2.rs @@ -0,0 +1,39 @@ +use crate::{ops, MinOptMax, Options, OrtEngine}; +use anyhow::Result; +use image::DynamicImage; +use ndarray::{Array, IxDyn}; + +#[derive(Debug)] +pub struct Dinov2 { + engine: OrtEngine, + pub height: MinOptMax, + pub width: MinOptMax, + pub batch: MinOptMax, +} + +impl Dinov2 { + pub fn new(options: &Options) -> Result { + let engine = OrtEngine::new(options)?; + let (batch, height, width) = ( + engine.inputs_minoptmax()[0][0].to_owned(), + engine.inputs_minoptmax()[0][2].to_owned(), + engine.inputs_minoptmax()[0][3].to_owned(), + ); + engine.dry_run()?; + + Ok(Self { + engine, + height, + width, + batch, + }) + } + + pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { + let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?; + let ys: Vec> = self.engine.run(&[xs_])?; + let ys = ys[0].to_owned(); + let ys = ops::norm(&ys); + Ok(ys) + } +} diff --git a/src/models/mod.rs b/src/models/mod.rs new file mode 100644 index 0000000..9dc0d3f --- /dev/null +++ b/src/models/mod.rs @@ -0,0 +1,11 @@ +mod blip; +mod clip; +mod dinov2; +mod rtdetr; +mod yolo; + +pub use blip::Blip; +pub use clip::Clip; +pub use dinov2::Dinov2; +pub use rtdetr::RTDETR; +pub use yolo::YOLO; diff --git a/src/models/rtdetr.rs b/src/models/rtdetr.rs new file mode 100644 index 0000000..a908299 --- /dev/null +++ b/src/models/rtdetr.rs @@ -0,0 +1,154 @@ +use anyhow::Result; +use image::DynamicImage; +use ndarray::{s, Array, Axis, IxDyn}; +use regex::Regex; + +use crate::{ops, Annotator, Bbox, DynConf, MinOptMax, Options, OrtEngine, Rect, Results}; + +#[derive(Debug)] +pub struct RTDETR { + engine: OrtEngine, + height: MinOptMax, + width: MinOptMax, + batch: MinOptMax, + annotator: Annotator, + confs: DynConf, + saveout: Option, + nc: usize, + names: Option>, +} + +impl RTDETR { + pub fn new(options: &Options) -> Result { + let engine = OrtEngine::new(options)?; + let (batch, height, width) = ( + engine.inputs_minoptmax()[0][0].to_owned(), + engine.inputs_minoptmax()[0][2].to_owned(), + engine.inputs_minoptmax()[0][3].to_owned(), + ); + let names: Option<_> = match &options.names { + None => engine.try_fetch("names").map(|names| { + let re = Regex::new(r#"(['"])([-()\w '"]+)(['"])"#).unwrap(); + let mut names_ = vec![]; + for (_, [_, name, _]) in re.captures_iter(&names).map(|x| x.extract()) { + names_.push(name.to_string()); + } + names_ + }), + Some(names) => Some(names.to_owned()), + }; + let nc = options.nc.unwrap_or( + names + .as_ref() + .expect("Failed to get num_classes, make it explicit with `--nc`") + .len(), + ); + let annotator = Annotator::default(); + let confs = DynConf::new(&options.confs, nc); + engine.dry_run()?; + + Ok(Self { + engine, + confs, + nc, + height, + width, + batch, + saveout: options.saveout.to_owned(), + annotator, + names, + }) + } + + pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { + let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?; + let ys = self.engine.run(&[xs_])?; + let ys = self.postprocess(ys, xs)?; + match &self.saveout { + None => {} + Some(saveout) => { + for (img0, y) in xs.iter().zip(ys.iter()) { + let mut img = img0.to_rgb8(); + self.annotator.plot(&mut img, y); + self.annotator.save(&img, saveout); + } + } + } + Ok(ys) + } + + pub fn postprocess( + &self, + xs: Vec>, + xs0: &[DynamicImage], + ) -> Result> { + const CXYWH_OFFSET: usize = 4; // cxcywh + let preds = &xs[0]; + + let mut ys = Vec::new(); + for (idx, anchor) in preds.axis_iter(Axis(0)).enumerate() { + // [bs, num_query, 4 + nc] + let width_original = xs0[idx].width() as f32; + let height_original = xs0[idx].height() as f32; + let ratio = + (self.width() as f32 / width_original).min(self.height() as f32 / height_original); + + // save each result + let mut y_bboxes = Vec::new(); + for pred in anchor.axis_iter(Axis(0)) { + let bbox = pred.slice(s![0..CXYWH_OFFSET]); + let clss = pred.slice(s![CXYWH_OFFSET..CXYWH_OFFSET + self.nc]); + + // confidence & id + let (id, &confidence) = clss + .into_iter() + .enumerate() + .reduce(|max, x| if x.1 > max.1 { x } else { max }) + .unwrap(); + + // confs filter + if confidence < self.confs[id] { + continue; + } + + // bbox -> input size scale -> rescale + let x = (bbox[0] - bbox[2] / 2.) * self.width() as f32 / ratio; + let y = (bbox[1] - bbox[3] / 2.) * self.height() as f32 / ratio; + let w = bbox[2] * self.width() as f32 / ratio; + let h = bbox[3] * self.height() as f32 / ratio; + let y_bbox = Bbox::new( + Rect::from_xywh( + x.max(0.0f32).min(width_original), + y.max(0.0f32).min(height_original), + w, + h, + ), + id, + confidence, + self.names.as_ref().map(|names| names[id].clone()), + ); + y_bboxes.push(y_bbox) + } + let y = Results { + probs: None, + bboxes: Some(y_bboxes), + keypoints: None, + masks: None, + }; + ys.push(y); + } + Ok(ys) + } + + pub fn batch(&self) -> isize { + self.batch.opt + } + + pub fn width(&self) -> isize { + self.width.opt + } + + pub fn height(&self) -> isize { + self.height.opt + } +} diff --git a/src/models/yolo.rs b/src/models/yolo.rs new file mode 100644 index 0000000..e783bda --- /dev/null +++ b/src/models/yolo.rs @@ -0,0 +1,387 @@ +use anyhow::Result; +use clap::ValueEnum; +use image::{DynamicImage, ImageBuffer}; +use ndarray::{s, Array, Axis, IxDyn}; +use regex::Regex; + +use crate::{ + non_max_suppression, ops, Annotator, Bbox, DynConf, Embedding, Keypoint, MinOptMax, Options, + OrtEngine, Point, Rect, Results, +}; + +const CXYWH_OFFSET: usize = 4; +const KPT_STEP: usize = 3; + +#[derive(Debug, Clone, ValueEnum)] +enum YOLOTask { + Classify, + Detect, + Pose, + Segment, + Obb, // TODO +} + +#[derive(Debug)] +pub struct YOLO { + engine: OrtEngine, + nc: usize, + nk: usize, + nm: usize, + height: MinOptMax, + width: MinOptMax, + batch: MinOptMax, + task: YOLOTask, + confs: DynConf, + kconfs: DynConf, + iou: f32, + saveout: Option, + annotator: Annotator, + names: Option>, + apply_nms: bool, + anchors_first: bool, +} + +impl YOLO { + pub fn new(options: &Options) -> Result { + let engine = OrtEngine::new(options)?; + let (batch, height, width) = ( + engine.batch().to_owned(), + engine.height().to_owned(), + engine.width().to_owned(), + ); + let task = match engine + .try_fetch("task") + .unwrap_or("detect".to_string()) + .as_str() + { + "classify" => YOLOTask::Classify, + "detect" => YOLOTask::Detect, + "pose" => YOLOTask::Pose, + "segment" => YOLOTask::Segment, + x => todo!("{:?} is not supported for now!", x), + }; + + // try from custom class names, and then model metadata + let mut names = options.names.to_owned().or(Self::fetch_names(&engine)); + let nc = match options.nc { + Some(nc) => { + match &names { + None => names = Some((0..nc).map(|x| x.to_string()).collect::>()), + Some(names) => { + assert_eq!( + nc, + names.len(), + "the length of `nc` and `class names` is not equal." + ); + } + } + nc + } + None => match &names { + Some(names) => names.len(), + None => panic!( + "Can not parse model without `nc` and `class names`. Try to make it explicit." + ), + }, + }; + + // try from model metadata + let nk = engine + .try_fetch("kpt_shape") + .map(|kpt_string| { + let re = Regex::new(r"([0-9]+), ([0-9]+)").unwrap(); + let caps = re.captures(&kpt_string).unwrap(); + caps.get(1).unwrap().as_str().parse::().unwrap() + }) + .unwrap_or(0_usize); + let nm = if let YOLOTask::Segment = task { + engine.oshapes()[1][1] as usize + } else { + 0_usize + }; + let confs = DynConf::new(&options.confs, nc); + let kconfs = DynConf::new(&options.kconfs, nk); + let mut annotator = Annotator::default(); + if let Some(skeletons) = &options.skeletons { + annotator = annotator.with_skeletons(skeletons); + } + let saveout = options.saveout.to_owned(); + engine.dry_run()?; + + Ok(Self { + engine, + confs, + kconfs, + iou: options.iou, + apply_nms: options.apply_nms, + nc, + nk, + nm, + height, + width, + batch, + task, + saveout, + annotator, + names, + anchors_first: options.anchors_first, + }) + } + + // pub fn run_with_dl(&mut self, dl: &Dataloader) -> Result> { + // for (images, paths) in dataloader { + // self.run(&images) + // } + // Ok(()) + // } + + pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { + let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?; + let ys = self.engine.run(&[xs_])?; + let ys = self.postprocess(ys, xs)?; + match &self.saveout { + None => println!("{ys:?}"), + Some(saveout) => { + for (img0, y) in xs.iter().zip(ys.iter()) { + let mut img = img0.to_rgb8(); + self.annotator.plot(&mut img, y); + self.annotator.save(&img, saveout); + } + } + } + Ok(ys) + } + + pub fn postprocess( + &self, + xs: Vec>, + xs0: &[DynamicImage], + ) -> Result> { + if let YOLOTask::Classify = self.task { + let mut ys = Vec::new(); + for batch in xs[0].axis_iter(Axis(0)) { + ys.push(Results::new( + Some(Embedding::new(batch.into_owned(), self.names.to_owned())), + None, + None, + None, + )); + } + Ok(ys) + } else { + let (preds, protos) = if xs.len() == 2 { + if xs[0].ndim() == 3 { + (&xs[0], Some(&xs[1])) + } else { + (&xs[1], Some(&xs[0])) + } + } else { + (&xs[0], None) + }; + + let mut ys = Vec::new(); + for (idx, anchor) in preds.axis_iter(Axis(0)).enumerate() { + // [b, 4 + nc + nm, na] + // input image + let width_original = xs0[idx].width() as f32; + let height_original = xs0[idx].height() as f32; + let ratio = (self.width() as f32 / width_original) + .min(self.height() as f32 / height_original); + + #[allow(clippy::type_complexity)] + let mut data: Vec<(Bbox, Option>, Option>)> = Vec::new(); + for pred in anchor.axis_iter(if self.anchors_first { Axis(0) } else { Axis(1) }) { + // split preds for different tasks + let bbox = pred.slice(s![0..CXYWH_OFFSET]); + let clss = pred.slice(s![CXYWH_OFFSET..CXYWH_OFFSET + self.nc]); + let kpts = { + if let YOLOTask::Pose = self.task { + Some(pred.slice(s![pred.len() - KPT_STEP * self.nk..])) + } else { + None + } + }; + let coefs = { + if let YOLOTask::Segment = self.task { + Some(pred.slice(s![pred.len() - self.nm..]).to_vec()) + } else { + None + } + }; + + // confidence and index + let (id, &confidence) = clss + .into_iter() + .enumerate() + .reduce(|max, x| if x.1 > max.1 { x } else { max }) + .unwrap(); + + // confidence filter + if confidence < self.confs[id] { + continue; + } + + // bbox re-scale + let cx = bbox[0] / ratio; + let cy = bbox[1] / ratio; + let w = bbox[2] / ratio; + let h = bbox[3] / ratio; + let x = cx - w / 2.; + let y = cy - h / 2.; + let y_bbox = Bbox::new( + Rect::from_xywh( + x.max(0.0f32).min(width_original), + y.max(0.0f32).min(height_original), + w, + h, + ), + id, + confidence, + self.names.as_ref().map(|names| names[id].to_owned()), + ); + + // kpts + let y_kpts = { + if let Some(kpts) = kpts { + let mut kpts_ = Vec::new(); + for i in 0..self.nk { + let kx = kpts[KPT_STEP * i] / ratio; + let ky = kpts[KPT_STEP * i + 1] / ratio; + let kconf = kpts[KPT_STEP * i + 2]; + if kconf < self.kconfs[i] { + kpts_.push(Keypoint::default()); + } else { + kpts_.push(Keypoint::new( + Point::new( + kx.max(0.0f32).min(width_original), + ky.max(0.0f32).min(height_original), + ), + kconf, + )); + } + } + Some(kpts_) + } else { + None + } + }; + + // merged + data.push((y_bbox, y_kpts, coefs)); + } + + // nms + if self.apply_nms { + non_max_suppression(&mut data, self.iou); + } + + // decode + let mut y_bboxes: Vec = Vec::new(); + let mut y_kpts: Vec> = Vec::new(); + let mut y_masks: Vec> = Vec::new(); + for elem in data.into_iter() { + if let Some(kpts) = elem.1 { + y_kpts.push(kpts) + } + + // decode masks + if let Some(coefs) = elem.2 { + let proto = protos.unwrap().slice(s![idx, .., .., ..]); + let (nm, nh, nw) = proto.dim(); + + // coefs * proto -> mask + let coefs = Array::from_shape_vec((1, nm), coefs)?; // (n, nm) + let proto = proto.to_owned().into_shape((nm, nh * nw))?; // (nm, nh*nw) + let mask = coefs.dot(&proto).into_shape((nh, nw, 1))?; // (nh, nw, n) + + // build image from ndarray + let mask_im: ImageBuffer, Vec> = + match ImageBuffer::from_raw(nw as u32, nh as u32, mask.into_raw_vec()) { + Some(image) => image, + None => panic!("can not create image from ndarray"), + }; + let mut mask_im = image::DynamicImage::from(mask_im); // -> dyn + + // rescale masks + let (_, w_mask, h_mask) = + ops::scale_wh(width_original, height_original, nw as f32, nh as f32); + let mask_cropped = mask_im.crop(0, 0, w_mask as u32, h_mask as u32); + let mask_original = mask_cropped.resize_exact( + width_original as u32, + height_original as u32, + image::imageops::FilterType::Triangle, + ); + + // crop-mask with bbox + let mut mask_original_cropped = mask_original.into_luma8(); + for y in 0..height_original as usize { + for x in 0..width_original as usize { + if x < elem.0.xmin() as usize + || x > elem.0.xmax() as usize + || y < elem.0.ymin() as usize + || y > elem.0.ymax() as usize + { + mask_original_cropped.put_pixel( + x as u32, + y as u32, + image::Luma([0u8]), + ); + } + } + } + y_masks.push(mask_original_cropped.into_raw()); + } + y_bboxes.push(elem.0); + } + + // save each result + let y = Results { + probs: None, + bboxes: if !y_bboxes.is_empty() { + Some(y_bboxes) + } else { + None + }, + keypoints: if !y_kpts.is_empty() { + Some(y_kpts) + } else { + None + }, + masks: if !y_masks.is_empty() { + Some(y_masks) + } else { + None + }, + }; + ys.push(y); + } + + Ok(ys) + } + } + + fn fetch_names(engine: &OrtEngine) -> Option> { + // fetch class names from onnx metadata + // String format: `{0: 'person', 1: 'bicycle', 2: 'sports ball', ..., 27: "yellow_lady's_slipper"}` + engine.try_fetch("names").map(|names| { + let re = Regex::new(r#"(['"])([-()\w '"]+)(['"])"#).unwrap(); + let mut names_ = vec![]; + for (_, [_, name, _]) in re.captures_iter(&names).map(|x| x.extract()) { + names_.push(name.to_string()); + } + names_ + }) + } + + pub fn batch(&self) -> isize { + self.batch.opt + } + + pub fn width(&self) -> isize { + self.width.opt + } + + pub fn height(&self) -> isize { + self.height.opt + } +} diff --git a/src/ops.rs b/src/ops.rs new file mode 100644 index 0000000..945e7f0 --- /dev/null +++ b/src/ops.rs @@ -0,0 +1,95 @@ +use anyhow::Result; +use image::{DynamicImage, GenericImageView}; +use ndarray::{Array, Axis, Ix2, IxDyn}; + +pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) { + let r = (w1 / w0).min(h1 / h0); + (r, (w0 * r).round(), (h0 * r).round()) +} + +pub fn resize( + xs: &[DynamicImage], + height: u32, + width: u32, + norm_imagenet: bool, +) -> Result> { + let norm = 255.0; + let mut ys = Array::ones(vec![xs.len(), 3, height as usize, width as usize]).into_dyn(); + // let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); + for (idx, x) in xs.iter().enumerate() { + let (w0, h0) = x.dimensions(); + let w0 = w0 as f32; + let h0 = h0 as f32; + let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round + let img = x.resize_exact( + w_new as u32, + h_new as u32, + image::imageops::FilterType::Triangle, + ); + for (x, y, rgb) in img.pixels() { + let x = x as usize; + let y = y as usize; + let [r, g, b, _] = rgb.0; + ys[[idx, 0, y, x]] = (r as f32) / norm; + ys[[idx, 1, y, x]] = (g as f32) / norm; + ys[[idx, 2, y, x]] = (b as f32) / norm; + } + } + + if norm_imagenet { + let mean = + Array::from_shape_vec((1, 3, 1, 1), vec![0.48145466, 0.4578275, 0.40821073]).unwrap(); + let std = Array::from_shape_vec((1, 3, 1, 1), vec![0.26862954, 0.261_302_6, 0.275_777_1]) + .unwrap(); + ys = (ys - mean) / std; + } + Ok(ys) +} + +pub fn letterbox(xs: &[DynamicImage], height: u32, width: u32) -> Result> { + let norm = 255.0; + let bg = 144.0; + let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); + ys.fill(bg / norm); + for (idx, x) in xs.iter().enumerate() { + let (w0, h0) = x.dimensions(); + let w0 = w0 as f32; + let h0 = h0 as f32; + let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round + let img = x.resize_exact( + w_new as u32, + h_new as u32, + image::imageops::FilterType::Triangle, + ); + for (x, y, rgb) in img.pixels() { + let x = x as usize; + let y = y as usize; + let [r, g, b, _] = rgb.0; + ys[[idx, 0, y, x]] = (r as f32) / norm; + ys[[idx, 1, y, x]] = (g as f32) / norm; + ys[[idx, 2, y, x]] = (b as f32) / norm; + } + } + Ok(ys) +} + +pub fn norm(xs: &Array) -> Array { + let std_ = xs + .mapv(|x| x * x) + .sum_axis(Axis(1)) + .mapv(f32::sqrt) + .insert_axis(Axis(1)); + xs / std_ +} + +pub fn dot2(query: &Array, gallery: &Array) -> Result>> { + // (m, ndim) * (n, ndim).t => (m, n) + let query = query.to_owned().into_dimensionality::()?; + let gallery = gallery.to_owned().into_dimensionality::()?; + let matrix = query.dot(&gallery.t()); + let exps = matrix.mapv(|x| x.exp()); + let stds = exps.sum_axis(Axis(1)); + let matrix = exps / stds.insert_axis(Axis(1)); + let matrix: Vec> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect(); + Ok(matrix) +} diff --git a/src/options.rs b/src/options.rs new file mode 100644 index 0000000..f2ff5dc --- /dev/null +++ b/src/options.rs @@ -0,0 +1,321 @@ +use crate::{auto_load, Device, MinOptMax}; + +#[derive(Debug, Clone)] +pub struct Options { + pub onnx_path: String, + pub device: Device, + pub profile: bool, + pub num_dry_run: usize, + pub i00: Option, // 1st input, axis 0, batch usually + pub i01: Option, // 1st input, axis 1 + pub i02: Option, + pub i03: Option, + pub i04: Option, + pub i05: Option, + pub i10: Option, // 2nd input, axis 0 + pub i11: Option, + pub i12: Option, + pub i13: Option, + pub i14: Option, + pub i15: Option, + pub i20: Option, // 2nd input, axis 0 + pub i21: Option, + pub i22: Option, + pub i23: Option, + pub i24: Option, + pub i25: Option, + pub i30: Option, // 2nd input, axis 0 + pub i31: Option, + pub i32_: Option, + pub i33: Option, + pub i34: Option, + pub i35: Option, + + // trt ep + pub trt_engine_cache_enable: bool, + pub trt_int8_enable: bool, + pub trt_fp16_enable: bool, + + // options for Vision and Language models + pub nc: Option, + pub nk: Option, + pub nm: Option, + pub confs: Vec, + pub kconfs: Vec, + pub iou: f32, + pub apply_nms: bool, + pub saveout: Option, + pub tokenizer: Option, + pub vocab: Option, + pub names: Option>, // class names + pub anchors_first: bool, // otuput format: [bs, anchors/na, pos+nc+nm] + pub skeletons: Option>, +} + +impl Default for Options { + fn default() -> Self { + Self { + onnx_path: String::new(), + device: Device::Cuda(0), + profile: false, + num_dry_run: 3, + i00: None, + i01: None, + i02: None, + i03: None, + i04: None, + i05: None, + i10: None, + i11: None, + i12: None, + i13: None, + i14: None, + i15: None, + i20: None, + i21: None, + i22: None, + i23: None, + i24: None, + i25: None, + i30: None, + i31: None, + i32_: None, + i33: None, + i34: None, + i35: None, + trt_engine_cache_enable: true, + trt_int8_enable: false, + trt_fp16_enable: false, + nc: None, + nk: None, + nm: None, + confs: vec![0.4f32], + kconfs: vec![0.5f32], + iou: 0.45f32, + apply_nms: true, + saveout: None, + tokenizer: None, + vocab: None, + names: None, + anchors_first: false, + skeletons: None, + } + } +} + +impl Options { + pub fn with_model(mut self, onnx_path: &str) -> Self { + self.onnx_path = auto_load(onnx_path).unwrap(); + self + } + + pub fn with_dry_run(mut self, n: usize) -> Self { + self.num_dry_run = n; + self + } + + pub fn with_cuda(mut self, id: usize) -> Self { + self.device = Device::Cuda(id); + self + } + + pub fn with_trt(mut self, id: usize) -> Self { + self.device = Device::Trt(id); + self + } + + pub fn with_cpu(mut self) -> Self { + self.device = Device::Cpu(0); + self + } + + pub fn with_coreml(mut self, id: usize) -> Self { + self.device = Device::CoreML(id); + self + } + + pub fn with_fp16(mut self, x: bool) -> Self { + self.trt_fp16_enable = x; + self + } + + pub fn with_profile(mut self, profile: bool) -> Self { + self.profile = profile; + self + } + + pub fn with_saveout(mut self, saveout: &str) -> Self { + self.saveout = Some(saveout.to_string()); + self + } + + pub fn with_names(mut self, names: &[&str]) -> Self { + self.names = Some(names.iter().map(|x| x.to_string()).collect::>()); + self + } + + pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self { + self.skeletons = Some(skeletons.to_vec()); + self + } + + pub fn with_anchors_first(mut self) -> Self { + self.anchors_first = true; + self + } + + pub fn with_nms(mut self, apply_nms: bool) -> Self { + self.apply_nms = apply_nms; + self + } + + pub fn with_nc(mut self, nc: usize) -> Self { + self.nc = Some(nc); + self + } + + pub fn with_nk(mut self, nk: usize) -> Self { + self.nk = Some(nk); + self + } + + pub fn with_iou(mut self, x: f32) -> Self { + self.iou = x; + self + } + + pub fn with_confs(mut self, confs: &[f32]) -> Self { + self.confs = confs.to_vec(); + self + } + + pub fn with_kconfs(mut self, kconfs: &[f32]) -> Self { + self.kconfs = kconfs.to_vec(); + self + } + + pub fn with_tokenizer(mut self, tokenizer: String) -> Self { + self.tokenizer = Some(tokenizer); + self + } + + pub fn with_i00(mut self, x: MinOptMax) -> Self { + self.i00 = Some(x); + self + } + + pub fn with_i01(mut self, x: MinOptMax) -> Self { + self.i01 = Some(x); + self + } + + pub fn with_i02(mut self, x: MinOptMax) -> Self { + self.i02 = Some(x); + self + } + + pub fn with_i03(mut self, x: MinOptMax) -> Self { + self.i03 = Some(x); + self + } + + pub fn with_i04(mut self, x: MinOptMax) -> Self { + self.i04 = Some(x); + self + } + + pub fn with_i05(mut self, x: MinOptMax) -> Self { + self.i05 = Some(x); + self + } + + pub fn with_i10(mut self, x: MinOptMax) -> Self { + self.i10 = Some(x); + self + } + + pub fn with_i11(mut self, x: MinOptMax) -> Self { + self.i11 = Some(x); + self + } + + pub fn with_i12(mut self, x: MinOptMax) -> Self { + self.i12 = Some(x); + self + } + + pub fn with_i13(mut self, x: MinOptMax) -> Self { + self.i13 = Some(x); + self + } + + pub fn with_i14(mut self, x: MinOptMax) -> Self { + self.i14 = Some(x); + self + } + + pub fn with_i15(mut self, x: MinOptMax) -> Self { + self.i15 = Some(x); + self + } + + pub fn with_i20(mut self, x: MinOptMax) -> Self { + self.i20 = Some(x); + self + } + + pub fn with_i21(mut self, x: MinOptMax) -> Self { + self.i21 = Some(x); + self + } + + pub fn with_i22(mut self, x: MinOptMax) -> Self { + self.i22 = Some(x); + self + } + + pub fn with_i23(mut self, x: MinOptMax) -> Self { + self.i23 = Some(x); + self + } + + pub fn with_i24(mut self, x: MinOptMax) -> Self { + self.i24 = Some(x); + self + } + + pub fn with_i25(mut self, x: MinOptMax) -> Self { + self.i25 = Some(x); + self + } + + pub fn with_i30(mut self, x: MinOptMax) -> Self { + self.i30 = Some(x); + self + } + + pub fn with_i31(mut self, x: MinOptMax) -> Self { + self.i31 = Some(x); + self + } + + pub fn with_i32_(mut self, x: MinOptMax) -> Self { + self.i32_ = Some(x); + self + } + + pub fn with_i33(mut self, x: MinOptMax) -> Self { + self.i33 = Some(x); + self + } + + pub fn with_i34(mut self, x: MinOptMax) -> Self { + self.i34 = Some(x); + self + } + + pub fn with_i35(mut self, x: MinOptMax) -> Self { + self.i35 = Some(x); + self + } +} diff --git a/src/point.rs b/src/point.rs new file mode 100644 index 0000000..d53aae0 --- /dev/null +++ b/src/point.rs @@ -0,0 +1,182 @@ +use std::ops::{Add, Div, Mul, Sub}; + +#[derive(Default, Debug, PartialOrd, PartialEq, Clone, Copy)] +pub struct Point { + pub x: f32, + pub y: f32, +} + +impl Add for Point { + type Output = Self; + + fn add(self, other: Self) -> Self::Output { + Self { + x: self.x + other.x, + y: self.y + other.y, + } + } +} + +impl Add for Point { + type Output = Self; + + fn add(self, other: f32) -> Self::Output { + Self { + x: self.x + other, + y: self.y + other, + } + } +} + +impl Sub for Point { + type Output = Self; + + fn sub(self, other: Self) -> Self::Output { + Self { + x: self.x - other.x, + y: self.y - other.y, + } + } +} + +impl Sub for Point { + type Output = Self; + + fn sub(self, other: f32) -> Self::Output { + Self { + x: self.x * other, + y: self.y * other, + } + } +} + +impl Mul for Point { + type Output = Self; + + fn mul(self, other: f32) -> Self::Output { + Self { + x: self.x * other, + y: self.y * other, + } + } +} + +impl Mul for Point { + type Output = Self; + + fn mul(self, other: Self) -> Self::Output { + Self { + x: self.x * other.x, + y: self.y * other.y, + } + } +} + +impl Div for Point { + type Output = Self; + + fn div(self, other: Self) -> Self::Output { + Self { + x: self.x / other.x, + y: self.y / other.y, + } + } +} + +impl Div for Point { + type Output = Self; + + fn div(self, other: f32) -> Self::Output { + Self { + x: self.x / other, + y: self.y / other, + } + } +} + +impl From<(f32, f32)> for Point { + fn from((x, y): (f32, f32)) -> Self { + Self { x, y } + } +} + +impl From for (f32, f32) { + fn from(Point { x, y }: Point) -> Self { + (x, y) + } +} + +impl From<[f32; 2]> for Point { + fn from([x, y]: [f32; 2]) -> Self { + Self { x, y } + } +} + +impl From for [f32; 2] { + fn from(Point { x, y }: Point) -> Self { + [x, y] + } +} + +impl Point { + pub fn new(x: f32, y: f32) -> Self { + Self { x, y } + } + + pub fn coord(&self) -> [f32; 2] { + [self.x, self.y] + } + + pub fn is_origin(&self) -> bool { + self.x == 0.0_f32 && self.y == 0.0_f32 + } + + pub fn distance_from(&self, other: &Point) -> f32 { + ((self.x - other.x).powf(2.0) + (self.y - other.y).powf(2.0)).sqrt() + } + + pub fn distance_from_origin(&self) -> f32 { + (self.x.powf(2.0) + self.y.powf(2.0)).sqrt() + } + + pub fn sum(&self) -> f32 { + self.x + self.y + } +} + +#[cfg(test)] +mod tests_points { + use super::Point; + + #[test] + fn new() { + let origin1 = Point::from((0.0f32, 0.0f32)); + let origin2 = Point::from([0.0f32, 0.0f32]); + let origin3 = (0.0f32, 0.0f32).into(); + let origin4 = [0.0f32, 0.0f32].into(); + let origin5 = Point::new(1.0f32, 2.0f32); + let origin6 = Point { + x: 1.0f32, + y: 2.0f32, + }; + assert_eq!(origin1, origin2); + assert_eq!(origin2, origin3); + assert_eq!(origin3, origin4); + assert_eq!(origin5, origin6); + assert!(origin1.is_origin()); + assert!(origin2.is_origin()); + assert!(origin3.is_origin()); + assert!(origin4.is_origin()); + assert!(!origin5.is_origin()); + assert!(!origin6.is_origin()); + } + + #[test] + fn into_tuple_array() { + let point = Point::from((1.0, 2.0)); + let tuple: (f32, f32) = point.into(); + let array: [f32; 2] = point.into(); + assert_eq!(tuple, (1.0, 2.0)); + assert_eq!(array, [1.0, 2.0]); + } +} diff --git a/src/rect.rs b/src/rect.rs new file mode 100644 index 0000000..8ce25f7 --- /dev/null +++ b/src/rect.rs @@ -0,0 +1,193 @@ +use crate::Point; + +#[derive(Default, PartialOrd, PartialEq, Clone, Copy)] +pub struct Rect { + top_left: Point, + bottom_right: Point, +} + +impl std::fmt::Debug for Rect { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Rectangle") + .field("xmin", &self.xmin()) + .field("ymin", &self.ymin()) + .field("xmax", &self.xmax()) + .field("ymax", &self.ymax()) + .finish() + } +} + +impl> From<(P, P)> for Rect { + fn from((top_left, bottom_right): (P, P)) -> Self { + Self { + top_left: top_left.into(), + bottom_right: bottom_right.into(), + } + } +} + +impl> From<[P; 2]> for Rect { + fn from([top_left, bottom_right]: [P; 2]) -> Self { + Self { + top_left: top_left.into(), + bottom_right: bottom_right.into(), + } + } +} + +impl Rect { + pub fn new(top_left: Point, bottom_right: Point) -> Self { + Self { + top_left, + bottom_right, + } + } + + pub fn from_xywh(x: f32, y: f32, w: f32, h: f32) -> Self { + Self { + top_left: Point::new(x, y), + bottom_right: Point::new(x + w, y + h), + } + } + + pub fn from_xyxy(x1: f32, y1: f32, x2: f32, y2: f32) -> Self { + Self { + top_left: Point::new(x1, y1), + bottom_right: Point::new(x2, y2), + } + } + + pub fn from_cxywh(cx: f32, cy: f32, w: f32, h: f32) -> Self { + Self { + top_left: Point::new(cx - w / 2.0, cy - h / 2.0), + bottom_right: Point::new(cx + w / 2.0, cy + h / 2.0), + } + } + + pub fn width(&self) -> f32 { + (self.bottom_right - self.top_left).x + } + + pub fn height(&self) -> f32 { + (self.bottom_right - self.top_left).y + } + + pub fn xmin(&self) -> f32 { + self.top_left.x + } + + pub fn ymin(&self) -> f32 { + self.top_left.y + } + + pub fn xmax(&self) -> f32 { + self.bottom_right.x + } + + pub fn ymax(&self) -> f32 { + self.bottom_right.y + } + + pub fn cx(&self) -> f32 { + self.bottom_right.x - self.top_left.x + } + + pub fn cy(&self) -> f32 { + self.bottom_right.y - self.top_left.y + } + + pub fn tl(&self) -> Point { + self.top_left + } + + pub fn br(&self) -> Point { + self.bottom_right + } + + pub fn tr(&self) -> Point { + Point::new(self.bottom_right.x, self.top_left.y) + } + + pub fn bl(&self) -> Point { + Point::new(self.top_left.x, self.bottom_right.y) + } + + pub fn center(&self) -> Point { + (self.bottom_right + self.top_left) / 2.0 + } + + pub fn area(&self) -> f32 { + self.height() * self.width() + } + + pub fn is_empty(&self) -> bool { + self.area() == 0.0 + } + + pub fn is_squre(&self) -> bool { + self.width() == self.height() + } + + pub fn intersect(&self, other: &Rect) -> f32 { + let l = self.xmin().max(other.xmin()); + let r = (self.xmin() + self.width()).min(other.xmin() + other.width()); + let t = self.ymin().max(other.ymin()); + let b = (self.ymin() + self.height()).min(other.ymin() + other.height()); + (r - l).max(0.) * (b - t).max(0.) + } + + pub fn union(&self, other: &Rect) -> f32 { + self.area() + other.area() - self.intersect(other) + } + + pub fn iou(&self, other: &Rect) -> f32 { + self.intersect(other) / self.union(other) + } + + pub fn contains(&self, other: &Rect) -> bool { + self.xmin() <= other.xmin() + && self.xmax() >= other.xmax() + && self.ymin() <= other.ymin() + && self.ymax() >= other.ymax() + } +} + +#[cfg(test)] +mod tests { + use super::Rect; + use crate::Point; + + #[test] + fn new() { + let rect1 = Rect { + top_left: Point { + x: 0.0f32, + y: 0.0f32, + }, + bottom_right: Point { + x: 5.0f32, + y: 5.0f32, + }, + }; + let rect2 = Rect { + top_left: (0.0f32, 0.0f32).into(), + bottom_right: [5.0f32, 5.0f32].into(), + }; + let rect3 = Rect::new([0.0, 0.0].into(), [5.0, 5.0].into()); + let rect4: Rect = ((0.0, 0.0), (5.0, 5.0)).into(); + let rect5: Rect = [(0.0, 0.0), (5.0, 5.0)].into(); + let rect6: Rect = ([0.0, 0.0], [5.0, 5.0]).into(); + let rect7: Rect = Rect::from(([0.0, 0.0], [5.0, 5.0])); + let rect8: Rect = Rect::from([[0.0, 0.0], [5.0, 5.0]]); + let rect9: Rect = Rect::from([(0.0, 0.0), (5.0, 5.0)]); + let rect10: Rect = Rect::from_xyxy(0.0, 0.0, 5.0, 5.0); + let rect11: Rect = Rect::from_xywh(0.0, 0.0, 5.0, 5.0); + + assert_eq!(rect1, rect2); + assert_eq!(rect3, rect4); + assert_eq!(rect5, rect6); + assert_eq!(rect7, rect8); + assert_eq!(rect9, rect8); + assert_eq!(rect10, rect11); + } +} diff --git a/src/results.rs b/src/results.rs new file mode 100644 index 0000000..9e5c9f0 --- /dev/null +++ b/src/results.rs @@ -0,0 +1,59 @@ +use crate::{Bbox, Embedding, Keypoint}; + +#[derive(Clone, PartialEq, Default)] +pub struct Results { + pub probs: Option, + pub bboxes: Option>, + pub keypoints: Option>>, + pub masks: Option>>, +} + +impl std::fmt::Debug for Results { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Results") + .field("Probabilities", &self.probs) + .field("BoundingBoxes", &self.bboxes) + .field("Keypoints", &self.keypoints) + .field( + "Masks", + &format_args!("{:?}", self.masks().map(|masks| masks.len())), + ) + .finish() + } +} + +impl Results { + pub fn new( + probs: Option, + bboxes: Option>, + keypoints: Option>>, + masks: Option>>, + ) -> Self { + Self { + probs, + bboxes, + keypoints, + masks, + } + } + + pub fn probs(&self) -> Option<&Embedding> { + self.probs.as_ref() + } + + pub fn keypoints(&self) -> Option<&Vec>> { + self.keypoints.as_ref() + } + + pub fn masks(&self) -> Option<&Vec>> { + self.masks.as_ref() + } + + pub fn bboxes(&self) -> Option<&Vec> { + self.bboxes.as_ref() + } + + pub fn bboxes_mut(&mut self) -> Option<&mut Vec> { + self.bboxes.as_mut() + } +} diff --git a/src/rotated_rect.rs b/src/rotated_rect.rs new file mode 100644 index 0000000..ab878d5 --- /dev/null +++ b/src/rotated_rect.rs @@ -0,0 +1,155 @@ +use crate::Point; + +#[derive(Default, PartialOrd, PartialEq, Clone, Copy)] +pub struct RotatedRect { + center: Point, + width: f32, + height: f32, + rotation: f32, // (0, 90) radians +} + +impl std::fmt::Debug for RotatedRect { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RotatedRectangle") + .field("height", &self.height) + .field("width", &self.width) + .field("center", &self.center) + .field("rotation", &self.rotation) + .field("vertices", &self.vertices()) + .finish() + } +} + +impl RotatedRect { + pub fn new(center: Point, width: f32, height: f32, rotation: f32) -> Self { + Self { + center, + width, + height, + rotation, + } + } + + pub fn vertices(&self) -> [Point; 4] { + // [cos -sin] + // [sin cos] + let m = [ + [ + self.rotation.cos() * 0.5 * self.width, + -self.rotation.sin() * 0.5 * self.height, + ], + [ + self.rotation.sin() * 0.5 * self.width, + self.rotation.cos() * 0.5 * self.height, + ], + ]; + let v1 = self.center + Point::new(m[0][0] + m[0][1], m[1][0] + m[1][1]); + let v2 = self.center + Point::new(m[0][0] - m[0][1], m[1][0] - m[1][1]); + let v3 = self.center * 2.0 - v1; + let v4 = self.center * 2.0 - v2; + [v1, v2, v3, v4] + } + + pub fn height(&self) -> f32 { + self.height + } + + pub fn width(&self) -> f32 { + self.width + } + + pub fn center(&self) -> Point { + self.center + } + + pub fn area(&self) -> f32 { + self.height * self.width + } + + // pub fn contain_point(&self, point: Point) -> bool { + // // ray casting + // todo!() + // } +} + +#[test] +fn test1() { + let pi = std::f32::consts::PI; + let rt = RotatedRect::new( + Point::new(0.0f32, 0.0f32), + 2.0f32, + 4.0f32, + pi / 180.0 * 90.0, + ); + + assert_eq!( + rt.vertices(), + [ + Point { + x: -2.0, + y: 0.99999994, + }, + Point { + x: 2.0, + y: 1.0000001, + }, + Point { + x: 2.0, + y: -0.99999994, + }, + Point { + x: -2.0, + y: -1.0000001, + }, + ] + ); +} + +#[test] +fn test2() { + let pi = std::f32::consts::PI; + let rt = RotatedRect::new( + Point::new(0.0f32, 0.0f32), + 2.0f32.sqrt(), + 2.0f32.sqrt(), + pi / 180.0 * 45.0, + ); + + assert_eq!( + rt.vertices(), + [ + Point { + x: 0.0, + y: 0.99999994 + }, + Point { + x: 0.99999994, + y: 0.0 + }, + Point { + x: 0.0, + y: -0.99999994 + }, + Point { + x: -0.99999994, + y: 0.0 + }, + ] + ); +} + +// #[test] +// fn contain_point() { +// let pi = std::f32::consts::PI; +// let rt = RotatedRect::new( +// Point::new(0.0f32, 0.0f32), +// 1.0f32.sqrt(), +// 1.0f32.sqrt(), +// pi / 180.0 * 45.0, +// ); + +// assert!(rt.contain_point(Point::new(0.0, 0.0))); +// assert!(rt.contain_point(Point::new(0.5, 0.0))); +// assert!(rt.contain_point(Point::new(0.0, 0.5))); + +// } diff --git a/src/tokenizer_stream.rs b/src/tokenizer_stream.rs new file mode 100644 index 0000000..5fb8025 --- /dev/null +++ b/src/tokenizer_stream.rs @@ -0,0 +1,88 @@ +// https://github.com/huggingface/candle/blob/2a8679509eb55232b37378442c4366343f6dcb11/candle-examples/src/token_output_stream.rs#L5 +use anyhow::Result; + +/// This is a wrapper around a tokenizer to ensure that tokens can be returned to the user in a +/// streaming way rather than having to wait for the full decoding. +#[derive(Debug)] +pub struct TokenizerStream { + tokenizer: tokenizers::Tokenizer, + tokens: Vec, + prev_index: usize, + current_index: usize, +} + +impl TokenizerStream { + pub fn new(tokenizer: tokenizers::Tokenizer) -> Self { + Self { + tokenizer, + tokens: Vec::new(), + prev_index: 0, + current_index: 0, + } + } + + pub fn into_inner(self) -> tokenizers::Tokenizer { + self.tokenizer + } + + fn decode(&self, tokens: &[u32]) -> Result { + match self.tokenizer.decode(tokens, true) { + Ok(str) => Ok(str), + Err(err) => anyhow::bail!("cannot decode: {err}"), + } + } + + // https://github.com/huggingface/text-generation-inference/blob/5ba53d44a18983a4de32d122f4cb46f4a17d9ef6/server/text_generation_server/models/model.py#L68 + pub fn next_token(&mut self, token: u32) -> Result> { + let prev_text = if self.tokens.is_empty() { + String::new() + } else { + let tokens = &self.tokens[self.prev_index..self.current_index]; + self.decode(tokens)? + }; + self.tokens.push(token); + let text = self.decode(&self.tokens[self.prev_index..])?; + if text.len() > prev_text.len() && text.chars().last().unwrap().is_alphanumeric() { + let text = text.split_at(prev_text.len()); + self.prev_index = self.current_index; + self.current_index = self.tokens.len(); + Ok(Some(text.1.to_string())) + } else { + Ok(None) + } + } + + pub fn decode_rest(&self) -> Result> { + let prev_text = if self.tokens.is_empty() { + String::new() + } else { + let tokens = &self.tokens[self.prev_index..self.current_index]; + self.decode(tokens)? + }; + let text = self.decode(&self.tokens[self.prev_index..])?; + if text.len() > prev_text.len() { + let text = text.split_at(prev_text.len()); + Ok(Some(text.1.to_string())) + } else { + Ok(None) + } + } + + pub fn decode_all(&self) -> Result { + self.decode(&self.tokens) + } + + pub fn get_token(&self, token_s: &str) -> Option { + self.tokenizer.get_vocab(true).get(token_s).copied() + } + + pub fn tokenizer(&self) -> &tokenizers::Tokenizer { + &self.tokenizer + } + + pub fn clear(&mut self) { + self.tokens.clear(); + self.prev_index = 0; + self.current_index = 0; + } +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..aba3625 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,220 @@ +use crate::{Bbox, Keypoint, GITHUB_ASSETS}; +use anyhow::Result; +use indicatif::{ProgressBar, ProgressStyle}; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + +pub fn auto_load>(src: P) -> Result { + // check if input file exists + let src = src.as_ref(); + let p = if src.is_file() { + src.into() + } else { + let sth = src.file_name().unwrap().to_str().unwrap(); + let mut p = config_dir(); + p.push(sth); + // download from github assets if not exists in config directory + if !p.is_file() { + download( + &format!("{}/{}", GITHUB_ASSETS, sth), + &p, + Some(sth.to_string().as_str()), + ) + .unwrap_or_else(|err| panic!("Fail to load {:?}: {err}", src)); + } + p + }; + Ok(p.to_str().unwrap().to_string()) +} + +pub fn download + std::fmt::Debug>( + src: &str, + dst: P, + prompt: Option<&str>, +) -> Result<()> { + let resp = ureq::AgentBuilder::new() + .try_proxy_from_env(true) + .build() + .get(src) + .timeout(std::time::Duration::from_secs(2000)) + .call() + .unwrap_or_else(|err| panic!("Failed to GET: {}", err)); + let ntotal = resp + .header("Content-Length") + .and_then(|s| s.parse::().ok()) + .expect("Content-Length header should be present on archive response"); + let pb = ProgressBar::new(ntotal); + pb.set_style( + ProgressStyle::with_template( + "{prefix:.bold} {msg:.dim} [{bar:.blue.bright/white.dim}] {binary_bytes}/{binary_total_bytes} ({binary_bytes_per_sec}, {percent_precise}%, {elapsed})" + ) + .unwrap() + .progress_chars("#>-")); + pb.set_prefix(String::from("\n🐢 Downloading")); + pb.set_message(prompt.unwrap_or_default().to_string()); + let mut reader = resp.into_reader(); + let mut buffer = [0; 256]; + let mut downloaded_bytes = 0usize; + let mut f = std::fs::File::create(&dst).expect("Failed to create file"); + loop { + let bytes_read = reader.read(&mut buffer)?; + if bytes_read == 0 { + break; + } + pb.inc(bytes_read as u64); + f.write_all(&buffer[..bytes_read])?; + downloaded_bytes += bytes_read; + } + assert_eq!(downloaded_bytes as u64, ntotal); + pb.finish(); + Ok(()) +} + +pub fn string_now(delimiter: &str) -> String { + let t_now = chrono::Local::now(); + let fmt = format!( + "%Y{}%m{}%d{}%H{}%M{}%S{}%f", + delimiter, delimiter, delimiter, delimiter, delimiter, delimiter + ); + t_now.format(&fmt).to_string() +} + +pub fn config_dir() -> PathBuf { + match dirs::config_dir() { + Some(mut d) => { + d.push("usls"); + if !d.exists() { + std::fs::create_dir_all(&d).expect("Failed to create config directory."); + } + d + } + None => panic!("Unsupported operating system. Now support Linux, MacOS, Windows."), + } +} + +#[allow(clippy::type_complexity)] +pub fn non_max_suppression( + xs: &mut Vec<(Bbox, Option>, Option>)>, + iou_threshold: f32, +) { + xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap()); + + let mut current_index = 0; + for index in 0..xs.len() { + let mut drop = false; + for prev_index in 0..current_index { + let iou = xs[prev_index].0.iou(&xs[index].0); + if iou > iou_threshold { + drop = true; + break; + } + } + if !drop { + xs.swap(current_index, index); + current_index += 1; + } + } + xs.truncate(current_index); +} + +pub const COCO_SKELETON_17: [(usize, usize); 16] = [ + (0, 1), + (0, 2), + (1, 3), + (2, 4), + (5, 6), + (5, 11), + (6, 12), + (11, 12), + (5, 7), + (6, 8), + (7, 9), + (8, 10), + (11, 13), + (12, 14), + (13, 15), + (14, 16), +]; + +pub const COCO_NAMES_80: [&str; 80] = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +];