This commit is contained in:
jamjamjon
2024-03-29 15:54:24 +08:00
parent abaf8c0d65
commit af934086bb
75 changed files with 4272 additions and 1 deletions

78
.github/workflows/rust-ci.yml vendored Normal file
View File

@ -0,0 +1,78 @@
name: Rust
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
jobs:
check:
name: Check
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust }}
override: true
- uses: actions-rs/cargo@v1
with:
command: check
args: --workspace --examples
test:
name: Test Suite
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust }}
override: true
- uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --examples
fmt:
name: Rustfmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add rustfmt
- uses: actions-rs/cargo@v1
with:
command: fmt
args: --all -- --check
clippy:
name: Clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add clippy
- uses: actions-rs/cargo@v1
with:
command: clippy
args: --workspace --tests --examples --all-targets --all-features -- -Dwarnings

6
.gitignore vendored
View File

@ -12,3 +12,9 @@ Cargo.lock
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
.debug
.vscode
runs/
.DS_Store

39
Cargo.toml Normal file
View File

@ -0,0 +1,39 @@
[package]
name = "usls"
version = "0.0.1"
edition = "2021"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls"
authors = ["Jamjamjon <jamjamjon.usls@gmail.com>"]
license = "MIT"
readme = "README.md"
[dependencies]
clap = { version = "4.2.4", features = ["derive"] }
image = { version = "0.24.7", default-features = false, features = [
"jpeg",
"png",
"tiff",
"webp",
"webp-encoder",
"bmp"
]}
imageproc = { version = "0.23.0", default-features = false }
ndarray = { version = "0.15.6" }
# ort-sys = { version = "2.0.0-alpha.4" }
# ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "half", "ndarray", "cuda", "tensorrt", "coreml", "openvino"] }
ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "profiling", "half", "ndarray", "cuda", "tensorrt", "coreml", "ureq", "openvino"] }
rusttype = { version = "0.9", default-features = false }
anyhow = { version = "1.0.75" }
regex = { version = "1.5.4" }
rand = { version = "0.8.5" }
chrono = { version = "0.4.30" }
half = { version = "2.3.1" }
dirs = { version = "5.0.1" }
ureq = { version = "2.9.1", default-features = true, features = [ "socks-proxy" ] }
walkdir = { version = "2.5.0" }
tokenizers = { version = "0.15.2" }
itertools = { version = "0.12.1" }
usearch = { version = "2.9.1" }
rayon = "1.10.0"
indicatif = "0.17.8"

106
README.md
View File

@ -1 +1,105 @@
# usls
# usls
A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv8](https://github.com/ultralytics/ultralytics) `(Classification, Segmentation, Detection and Pose Detection)`, [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), and others. Many execution providers are supported, sunch as `CUDA`, `TensorRT` and `CoreML`.
## Supported Models
| Model | Example | CUDA(f32) | CUDA(f16) | TensorRT(f32) | TensorRT(f16) |
| :-------------------: | :----------------------: | :----------------: | :----------------: | :------------------------: | :-----------------------: |
| YOLOv8-detection | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| YOLOv8-pose | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| YOLOv8-classification | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| YOLOv8-segmentation | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| YOLOv8-OBB | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | |
| YOLOv9 | [demo](examples/yolov9) | ✅ | ✅ | ✅ | ✅ |
| RT-DETR | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | ✅ |
| FastSAM | [demo](examples/fastsam) | ✅ | ✅ | ✅ | ✅ |
| YOLO-World | [demo](examples/yolo-world) | ✅ | ✅ | ✅ | ✅ |
| DINOv2 | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
| CLIP | [demo](examples/clip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
| BLIP | [demo](examples/blip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
| OCR(DB, SVTR) | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | ***TODO*** | |
## Solution Models
Additionally, this repo also provides some solution models such as pedestrian `fall detection`, `head detection`, `trash detection`, and more.
| Model | Example | Result |
| :---------------------------: | :------------------------------: | :--------------------------------------------------------------------------: |
| face-landmark detection | [demo](examples/yolov8-face) | <img src="./examples/yolov8-face/demo.jpg" width="400" height="300"> |
| head detection | [demo](examples/yolov8-head) | <img src="./examples/yolov8-head/demo.jpg" width="400" height="300"> |
| fall detection | [demo](examples/yolov8-falldown) | <img src="./examples/yolov8-falldown/demo.jpg" width="400" height="300"> |
| trash detection | [demo](examples/yolov8-plastic-bag) | <img src="./examples/yolov8-trash/demo.jpg" width="400" height="260"> |
## Demo
```
cargo run -r --example yolov8 # fastsam, yolov9, blip, clip, dinov2, yolo-world...
```
## Integrate into your own project
#### 1. Install [ort](https://github.com/pykeio/ort)
check **[ort guide](https://ort.pyke.io/setup/linking)**
<details close>
<summary>For Linux or MacOS users</summary>
- Firstly, download from latest release from [ONNXRuntime Releases](https://github.com/microsoft/onnxruntime/releases)
- Then linking
```shell
export ORT_DYLIB_PATH=/Users/qweasd/Desktop/onnxruntime-osx-arm64-1.17.1/lib/libonnxruntime.1.17.1.dylib
```
</details>
#### 2. Add `usls` as a dependency to your project's `Cargo.toml:`
```
[dependencies]
usls = "0.0.1"
```
#### 3. Set model `Options` and build `model`, then you're ready to go.
```Rust
2use usls::{models::YOLO, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// 1.build model
let options = Options::default()
.with_model("../models/yolov8m-seg-dyn-f16.onnx")
.with_trt(0) // using cuda(0) by default
// when model with dynamic shapes
.with_i00((1, 2, 4).into()) // dynamic batch
.with_i02((416, 640, 800).into()) // dynamic height
.with_i03((416, 640, 800).into()) // dynamic width
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_saveout("YOLOv8"); // save results
let mut model = YOLO::new(&options)?;
// 2.build dataloader
let dl = DataLoader::default()
.with_batch(model.batch.opt as usize)
.load("./assets/")?;
// 3.run
for (xs, _paths) in dl {
let _y = model.run(&xs)?;
}
Ok(())
}
```
## Script: converte ONNX model from `float32` to `float16`
```python
import onnx
from pathlib import Path
from onnxconverter_common import float16
model_f32 = "onnx_model.onnx"
model_f16 = float16.convert_float_to_float16(onnx.load(model_f32))
saveout = Path(model_f32).with_name(Path(model_f32).stem + "-f16.onnx")
onnx.save(model_f16, saveout)
```

BIN
assets/bus.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

BIN
assets/falldown.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

BIN
assets/kids.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

BIN
assets/trash.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 272 KiB

53
examples/blip/README.md Normal file
View File

@ -0,0 +1,53 @@
This demo shows how to use [BLIP](https://arxiv.org/abs/2201.12086) to do conditional or unconditional image captioning.
## Quick Start
```shell
cargo run -r --example blip
```
## Or you can manully
### 1. Donwload CLIP ONNX Model
[blip-visual-base](https://github.com/jamjamjon/assets/releases/download/v0.0.1/blip-visual-base.onnx)
[blip-textual-base](https://github.com/jamjamjon/assets/releases/download/v0.0.1/blip-textual-base.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
// visual
let options_visual = Options::default()
.with_model("VISUAL_MODEL") // <= modify this
.with_profile(false);
// textual
let options_textual = Options::default()
.with_model("TEXTUAL_MODEL") // <= modify this
.with_profile(false);
```
### 3. Then, run
```bash
cargo run -r --example blip
```
## Results
```shell
[Unconditional image captioning]: a group of people walking around a bus
[Conditional image captioning]: three man walking in front of a bus
```
## TODO
* [ ] text decode with Top-p sample
* [ ] VQA
* [ ] Retrival
* [ ] TensorRT support for textual model

29
examples/blip/main.rs Normal file
View File

@ -0,0 +1,29 @@
use usls::{models::Blip, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options_visual = Options::default()
.with_model("../models/blip-visual-base.onnx")
.with_i00((1, 1, 4).into())
.with_profile(false);
// textual
let options_textual = Options::default()
.with_model("../models/blip-textual-base.onnx")
.with_i00((1, 1, 4).into()) // input_id: batch
.with_i01((1, 1, 4).into()) // input_id: seq_len
.with_i10((1, 1, 4).into()) // attention_mask: batch
.with_i11((1, 1, 4).into()) // attention_mask: seq_len
.with_i20((1, 1, 4).into()) // encoder_hidden_states: batch
.with_i30((1, 1, 4).into()) // encoder_attention_mask: batch
.with_profile(false);
// build model
let mut model = Blip::new(options_visual, options_textual)?;
// image caption
model.caption("./assets/bus.jpg", None)?; // unconditional
model.caption("./assets/bus.jpg", Some("three man"))?; // conditional
Ok(())
}

58
examples/clip/README.md Normal file
View File

@ -0,0 +1,58 @@
This demo showcases how to use [CLIP](https://github.com/openai/CLIP) to compute similarity between texts and images, which can be employed for image-to-text or text-to-image retrieval tasks.
## Quick Start
```shell
cargo run -r --example clip
```
## Or you can manully
### 1.Donwload CLIP ONNX Model
[clip-b32-visual](https://github.com/jamjamjon/assets/releases/download/v0.0.1/clip-b32-visual.onnx)
[clip-b32-textual](https://github.com/jamjamjon/assets/releases/download/v0.0.1/clip-b32-textual.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
// visual
let options_visual = Options::default()
.with_model("VISUAL_MODEL") // <= modify this
.with_i00((1, 1, 4).into())
.with_profile(false);
// textual
let options_textual = Options::default()
.with_model("TEXTUAL_MODEL") // <= modify this
.with_i00((1, 1, 4).into())
.with_profile(false);
```
### 3. Then, run
```bash
cargo run -r --example clip
```
## Results
```shell
(82.24775%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
[0.06708972, 0.0067733657, 0.0019306632, 0.8224775, 0.003044935, 0.083962336, 0.014721389]
(85.56889%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
[0.0786363, 0.0004783095, 0.00060898095, 0.06286741, 0.0006842306, 0.8556889, 0.0010357979]
(90.03625%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
[0.07473288, 0.0027821448, 0.0075673857, 0.010874652, 0.003041679, 0.0006387719, 0.9003625]
```
## TODO
* [ ] TensorRT support for textual model

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 297 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

63
examples/clip/main.rs Normal file
View File

@ -0,0 +1,63 @@
use usls::{models::Clip, ops, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options_visual = Options::default()
.with_model("../models/clip-b32-visual-dyn.onnx")
.with_i00((1, 1, 4).into())
.with_profile(false);
// textual
let options_textual = Options::default()
.with_model("../models/clip-b32-textual-dyn.onnx")
.with_i00((1, 1, 4).into())
.with_profile(false);
// build model
let model = Clip::new(options_visual, options_textual)?;
// texts
let texts = vec![
"A photo of a dinosaur ".to_string(),
"A photo of a cat".to_string(),
"A photo of a dog".to_string(),
"几个胡萝卜".to_string(),
"There are some playing cards on a striped table cloth".to_string(),
"There is a doll with red hair and a clock on a table".to_string(),
"Some people holding wine glasses in a restaurant".to_string(),
];
let feats_text = model.encode_texts(&texts)?; // [n, ndim]
// load image
let dl = DataLoader::default()
.with_batch(model.batch_visual())
.load("./examples/clip/images")?;
// loop
for (images, paths) in dl {
let feats_image = model.encode_images(&images).unwrap();
// use image to query texts
let matrix = ops::dot2(&feats_image, &feats_text)?; // [m, n]
// summary
for i in 0..paths.len() {
let probs = &matrix[i];
let (id, &score) = probs
.iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap();
println!(
"({:?}%) {} => {} ",
score * 100.0,
paths[i].display(),
&texts[id]
);
println!("{:?}\n", probs);
}
}
Ok(())
}

50
examples/dinov2/README.md Normal file
View File

@ -0,0 +1,50 @@
This demo showcases how to use `DINOv2` to compute image similarity, applicable for image-to-image retrieval tasks.
## Quick Start
```shell
cargo run -r --example dinov2
```
## Or you can manully
### 1.Donwload DINOv2 ONNX Model
[dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx)
[dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx)
[dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx)
[dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx)
[dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx)
[dinov2-b14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn-f16.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_profile(false);
// build index
let options = IndexOptions {
dimensions: 384, // 768 for vitb; 384 for vits
metric: MetricKind::L2sq,
quantization: ScalarKind::F16,
..Default::default()
};
```
### 3. Then, run
```bash
cargo run -r --example dinov2
```
## Results
```shell
Top-1 distance: 0.0 => "./examples/dinov2/images/bus.jpg"
Top-2 distance: 1.8332717 => "./examples/dinov2/images/dog.png"
Top-3 distance: 1.9672602 => "./examples/dinov2/images/cat.png"
Top-4 distance: 1.978817 => "./examples/dinov2/images/carrot.jpg"
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

57
examples/dinov2/main.rs Normal file
View File

@ -0,0 +1,57 @@
use usearch::ffi::{IndexOptions, MetricKind, ScalarKind};
use usls::{models::Dinov2, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/dinov2-s14-dyn-f16.onnx")
.with_i00((1, 1, 1).into())
.with_i02((224, 224, 224).into())
.with_i03((224, 224, 224).into());
let mut model = Dinov2::new(&options)?;
// build dataloader
let dl = DataLoader::default()
.with_batch(model.batch.opt as usize)
.load("./examples/dinov2/images")?;
// load query
let query = image::io::Reader::open("./assets/bus.jpg")?.decode()?;
let query = model.run(&[query])?;
// build index
let options = IndexOptions {
dimensions: 384, // 768 for vitb; 384 for vits
metric: MetricKind::L2sq,
quantization: ScalarKind::F16,
..Default::default()
};
let index = usearch::new_index(&options)?;
index.reserve(dl.clone().count())?;
// load feats
for (idx, (image, _path)) in dl.clone().enumerate() {
let y = model.run(&image)?;
index.add(idx as u64, &y.into_raw_vec())?;
}
// output
let topk = 10;
let matches = index.search(&query.into_raw_vec(), topk)?;
let paths = dl.paths;
for (idx, (k, score)) in matches
.keys
.into_iter()
.zip(matches.distances.into_iter())
.enumerate()
{
println!(
"Top-{} distance: {:?} => {:?}",
idx + 1,
score,
paths[k as usize]
);
}
Ok(())
}

View File

@ -0,0 +1,41 @@
## Quick Start
```shell
cargo run -r --example fastsam
```
## Or you can manully
### 1.Donwload or export ONNX Model
- **Export**
```bash
pip install -U ultralytics
yolo export model=FastSAM-s.pt format=onnx simplify dynamic
```
- **Download**
[FastSAM-s-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/FastSAM-s-dyn-f16.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("../models/FastSAM-s-dyn-f16.onnx") // <= modify this
.with_saveout("FastSAM")
.with_profile(false);
let mut model = YOLO::new(&options)?;
```
### 3. Then, run
```bash
cargo run -r --example fastsam
```
## Results
![](./demo.jpg)

BIN
examples/fastsam/demo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 KiB

22
examples/fastsam/main.rs Normal file
View File

@ -0,0 +1,22 @@
use usls::{models::YOLO, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/FastSAM-s-dyn-f16.onnx")
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_saveout("FastSAM")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

37
examples/rtdetr/README.md Normal file
View File

@ -0,0 +1,37 @@
## Quick Start
```shell
cargo run -r --example rtdetr
```
## Or you can manully
### 1. Donwload or export ONNX Model
- Export
```bash
pip install -U ultralytics
yolo export model=rtdetr-l.pt format=onnx simplify dynamic opset=16
```
- Download
[rtdetr-l-f16 model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtdetr-l-f16.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_MODEL") // <= modify this
.with_saveout("RT-DETR");
```
### 3. Then, run
```bash
cargo run -r --example rtdetr
```
## Results
![](./demo.jpg)

BIN
examples/rtdetr/demo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

19
examples/rtdetr/main.rs Normal file
View File

@ -0,0 +1,19 @@
use usls::{models::RTDETR, DataLoader, Options, COCO_NAMES_80};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/rtdetr-l-f16.onnx")
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_names(&COCO_NAMES_80)
.with_saveout("RT-DETR");
let mut model = RTDETR::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

View File

@ -0,0 +1,58 @@
## Quick Start
```shell
cargo run -r --example yolo-world
```
## Or you can manully
### 1. Donwload or Export ONNX Model
- Download
[yolov8s-world-v2-shoes](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8s-world-v2-shoes.onnx)
- Or generate your own `yolo-world` model and then Export
- Installation
```shell
pip install -U ultralytics
```
- Generate
```python
from ultralytics import YOLO
# Initialize a YOLO-World model
model = YOLO('yolov8m-worldv2.pt')
# Define custom classes
model.set_classes(["shoes"])
# Save the model with the defined offline vocabulary
model.save("custom_yolov8m-world-v2.pt")
```
- Export
```shell
yolo export model=custom_yolov8m-world-v2.pt format=onnx simplify dynamic
```
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH"); // <= modify this
```
### 3. Then, run
```
cargo run -r --example yolo-world
```
## Results
![](./demo.jpg)

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 KiB

View File

@ -0,0 +1,22 @@
use usls::{models::YOLO, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8s-world-v2-shoes.onnx")
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.3]) // shoes: 0.2
.with_saveout("YOLO-World")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

View File

@ -0,0 +1,30 @@
## Quick Start
```shell
cargo run -r --example yolov8-face
```
## Or you can manully
### 1. Donwload ONNX Model
[yolov8-face-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-face-dyn-f16.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_profile(false);
let mut model = YOLO::new(&options)?;
```
### 3. Then, run
```bash
cargo run -r --example yolov8-face
```
## Results
![](./demo.jpg)

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

View File

@ -0,0 +1,22 @@
use usls::{models::YOLO, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8n-face-dyn-f16.onnx")
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.15])
.with_saveout("YOLOv8-Face")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/kids.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

View File

@ -0,0 +1,30 @@
## Quick Start
```shell
cargo run -r --example yolov8-falldown
```
## Or you can manully
### 1.Donwload ONNX Model
[yolov8-falldown-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-falldown-f16.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_profile(false);
let mut model = YOLO::new(&options)?
```
### 3. Then, run
```bash
cargo run -r --example yolov8-falldown
```
## Results
![](./demo.jpg)

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

View File

@ -0,0 +1,19 @@
use usls::{models::YOLO, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8-falldown-f16.onnx")
.with_confs(&[0.3])
.with_saveout("YOLOv8-Falldown")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/falldown.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

View File

@ -0,0 +1,30 @@
## Quick Start
```shell
cargo run -r --example yolov8-head
```
## Or you can manully
### 1. Donwload ONNX Model
[yolov8-head-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-head-f16.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_profile(false);
let mut model = YOLO::new(&options)?;
```
### 3. Then, run
```bash
cargo run -r --example yolov8-head
```
## Results
![](./demo.jpg)

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

View File

@ -0,0 +1,19 @@
use usls::{models::YOLO, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8-head-f16.onnx")
.with_confs(&[0.3])
.with_saveout("YOLOv8-Head")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/kids.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

View File

@ -0,0 +1,32 @@
Model for detecting plastic bag.
## Quick Start
```shell
cargo run -r --example yolov8-trash
```
## Or you can manully
### 1. Donwload ONNX Model
[yolov8-plastic-bag-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-plastic-bag-f16.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_profile(false);
let mut model = YOLO::new(&options)?;
```
### 3. Then, run
```bash
cargo run -r --example yolov8-trash
```
## Results
![](./demo.jpg)

Binary file not shown.

After

Width:  |  Height:  |  Size: 214 KiB

View File

@ -0,0 +1,20 @@
use usls::{models::YOLO, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// 1.build model
let options = Options::default()
.with_model("../models/yolov8-plastic-bag-f16.onnx")
.with_confs(&[0.3])
.with_saveout("YOLOv8-Trash")
.with_names(&["trash"])
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/trash.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

58
examples/yolov8/README.md Normal file
View File

@ -0,0 +1,58 @@
## Features
- Support `Classification`, `Segmentation`, `Detection`, `Pose(Keypoints)-Detection` tasks.
- Support `FP16` & `FP32` ONNX models.
- Support `CoreML`, `CUDA` and `TensorRT` execution provider to accelerate computation.
- Support dynamic input shapes(`batch`, `width`, `height`).
- Support dynamic confidence(`DynConf`) for each class in Detection task.
## Quick Start
```shell
cargo run -r --example yolov8
```
## Or you can manully
### 1. Export `YOLOv8` ONNX Models
```bash
pip install -U ultralytics
# export onnx model with dynamic shapes
yolo export model=yolov8m.pt format=onnx simplify dynamic
yolo export model=yolov8m-cls.pt format=onnx simplify dynamic
yolo export model=yolov8m-pose.pt format=onnx simplify dynamic
yolo export model=yolov8m-seg.pt format=onnx simplify dynamic
# export onnx model with fixed shapes
yolo export model=yolov8m.pt format=onnx simplify
yolo export model=yolov8m-cls.pt format=onnx simplify
yolo export model=yolov8m-pose.pt format=onnx simplify
yolo export model=yolov8m-seg.pt format=onnx simplify
```
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_saveout("YOLOv8");
let mut model = YOLO::new(&options)?;
```
### 3. Then, run
```
cargo run -r --example yolov8
```
## Result
| Task | Annotated image |
| :-------------------: | --------------------- |
| Instance Segmentation | ![img](./demo-seg.jpg) |
| Classification | ![img](./demo-cls.jpg) |
| Detection | ![img](./demo-det.jpg) |
| Pose | ![img](./demo-pose.jpg) |

Binary file not shown.

After

Width:  |  Height:  |  Size: 219 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 234 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 239 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 233 KiB

29
examples/yolov8/main.rs Normal file
View File

@ -0,0 +1,29 @@
use usls::{models::YOLO, DataLoader, Options, COCO_SKELETON_17};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// 1.build model
let options = Options::default()
.with_model("../models/yolov8m-dyn-f16.onnx")
.with_trt(0) // cuda by default
.with_fp16(true)
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_profile(true)
.with_dry_run(5)
.with_skeletons(&COCO_SKELETON_17)
.with_saveout("YOLOv8");
let mut model = YOLO::new(&options)?;
// 2.build dataloader
let dl = DataLoader::default()
.with_batch(1)
.load("./assets/bus.jpg")?;
// 3.run
for (xs, _paths) in dl {
let _y = model.run(&xs)?;
}
Ok(())
}

45
examples/yolov9/README.md Normal file
View File

@ -0,0 +1,45 @@
## Quick Start
```shell
cargo run -r --example yolov9
```
## Or you can manully
### 1. Donwload or Export ONNX Model
- **Download**
[yolov9-c-dyn-fp16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov9-c-dyn-f16.onnx)
- **Export**
```shell
# clone repo and install dependencies
git clone https://github.com/WongKinYiu/yolov9.git
cd yolov9
pip install -r requirements.txt
# donwload `pt` weights
wget https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c.pt
# export ONNX model
python export.py --weights yolov9-c.pt --include onnx --simplify --dynamic
```
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_saveout("YOLOv9");
```
### 3. Run
```
cargo run -r --example yolov9
```
## Results
![](./demo.jpg)

BIN
examples/yolov9/demo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

22
examples/yolov9/main.rs Normal file
View File

@ -0,0 +1,22 @@
use usls::{models::YOLO, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov9-c-dyn-f16.onnx")
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_saveout("YOLOv9")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// run
model.run(&dl.next().unwrap().0)?;
Ok(())
}

2
rust-toolchain.toml Normal file
View File

@ -0,0 +1,2 @@
[toolchain]
channel = "1.75"

228
src/annotator.rs Normal file
View File

@ -0,0 +1,228 @@
use anyhow::Result;
use image::{ImageBuffer, RgbImage};
use crate::{auto_load, string_now, Results, CHECK_MARK, CROSS_MARK};
#[derive(Debug)]
pub struct Annotator {
font: rusttype::Font<'static>,
skeletons: Option<Vec<(usize, usize)>>,
hide_conf: bool,
}
impl Default for Annotator {
fn default() -> Self {
Self {
font: Self::load_font(None).unwrap(),
skeletons: None,
hide_conf: false,
}
}
}
impl Annotator {
pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self {
self.skeletons = Some(skeletons.to_vec());
self
}
pub fn with_font(mut self, path: &str) -> Self {
self.font = Self::load_font(Some(path)).unwrap();
self
}
pub fn save(&self, image: &RgbImage, saveout: &str) {
let mut saveout = std::path::PathBuf::from("runs").join(saveout);
if !saveout.exists() {
std::fs::create_dir_all(&saveout).unwrap();
}
saveout.push(string_now("-"));
let saveout = format!("{}.jpg", saveout.to_str().unwrap());
match image.save(&saveout) {
Err(err) => println!("{} Saving failed: {:?}", CROSS_MARK, err),
Ok(_) => println!("{} Annotated image saved at: {}", CHECK_MARK, saveout),
}
}
fn load_font(path: Option<&str>) -> Result<rusttype::Font<'static>> {
let path_font = match path {
None => auto_load("Arial.ttf")?,
Some(p) => p.into(),
};
let buffer = std::fs::read(path_font)?;
Ok(rusttype::Font::try_from_vec(buffer).unwrap())
}
pub fn get_color(&self, n: usize) -> (u8, u8, u8) {
Self::color_palette()[n % Self::color_palette().len()]
}
pub fn plot(&self, img: &mut RgbImage, y: &Results) {
// masks and polygons
if let Some(masks) = y.masks() {
for mask in masks.iter() {
let mask_nd: ImageBuffer<image::Luma<_>, Vec<u8>> =
ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec())
.expect("can not crate image from ndarray");
// masks
for _x in 0..img.width() {
for _y in 0..img.height() {
let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y);
if mask_p.0[0] > 0 {
let mut img_p = imageproc::drawing::Canvas::get_pixel(img, _x, _y);
img_p.0[0] /= 2;
img_p.0[1] = 255 - (255 - img_p.0[1]) / 3;
img_p.0[2] /= 2;
imageproc::drawing::Canvas::draw_pixel(img, _x, _y, img_p)
}
}
}
// contours
let contours: Vec<imageproc::contours::Contour<i32>> =
imageproc::contours::find_contours(&mask_nd);
for contour in contours.iter() {
for point in contour.points.iter() {
imageproc::drawing::draw_filled_circle_mut(
img,
(point.x, point.y),
1,
image::Rgb([255, 255, 255]),
);
}
}
}
}
// probs
if let Some(probs) = y.probs() {
let topk = 5usize;
let (x, mut y) = (img.width() as i32 / 20, img.height() as i32 / 20);
for k in probs.topk(topk).iter() {
let legend = format!("{}: {:.2}", k.2.as_ref().unwrap_or(&k.0.to_string()), k.1);
let scale = img.width().max(img.height()) as f32 / 30.0;
let scale = rusttype::Scale::uniform(scale);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
y += text_h;
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(x, y).of_size(text_w as u32, text_h as u32),
image::Rgb(self.get_color(k.0).into()),
);
imageproc::drawing::draw_text_mut(
img,
image::Rgb((0, 0, 0).into()),
x,
y,
scale,
&self.font,
&legend,
);
}
}
// bboxes
if let Some(bboxes) = y.bboxes() {
for bbox in bboxes.iter() {
imageproc::drawing::draw_hollow_rect_mut(
img,
imageproc::rect::Rect::at(bbox.xmin() as i32, bbox.ymin() as i32)
.of_size(bbox.width() as u32, bbox.height() as u32),
image::Rgb(self.get_color(bbox.id()).into()),
);
let legend = if self.hide_conf {
bbox.name().unwrap_or(&bbox.id().to_string()).to_string()
} else {
format!(
"{}: {:.4}",
bbox.name().unwrap_or(&bbox.id().to_string()),
bbox.confidence()
)
};
let scale = img.width().max(img.height()) as f32 / 45.0;
let scale = rusttype::Scale::uniform(scale);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
let text_y = if bbox.ymin() as i32 > text_h {
bbox.ymin() as i32 - text_h
} else {
text_h - bbox.ymin() as i32
};
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(bbox.xmin() as i32, text_y)
.of_size(text_w as u32, text_h as u32),
image::Rgb(self.get_color(bbox.id()).into()),
);
imageproc::drawing::draw_text_mut(
img,
image::Rgb((0, 0, 0).into()),
bbox.xmin() as i32,
text_y,
scale,
&self.font,
&legend,
);
}
}
// keypoints
if let Some(keypoints) = y.keypoints() {
let radius = 3;
for kpts in keypoints.iter() {
for (i, kpt) in kpts.iter().enumerate() {
if kpt.confidence() == 0.0 {
continue;
}
// draw point
imageproc::drawing::draw_filled_circle_mut(
img,
(kpt.x() as i32, kpt.y() as i32),
radius,
image::Rgb(self.get_color(i + 10).into()),
);
}
// draw skeleton
if let Some(skeletons) = &self.skeletons {
for &(i, ii) in skeletons.iter() {
let kpt1 = &kpts[i];
let kpt2 = &kpts[ii];
if kpt1.confidence() == 0.0 || kpt2.confidence() == 0.0 {
continue;
}
imageproc::drawing::draw_line_segment_mut(
img,
(kpt1.x(), kpt1.y()),
(kpt2.x(), kpt2.y()),
image::Rgb([255, 51, 255]),
);
}
}
}
}
}
fn color_palette() -> Vec<(u8, u8, u8)> {
vec![
(0, 255, 0),
(255, 128, 0),
(0, 0, 255),
(255, 153, 51),
(255, 0, 0),
(255, 51, 255),
(102, 178, 255),
(51, 153, 255),
(255, 51, 51),
(153, 255, 153),
(102, 255, 102),
(153, 204, 255),
(255, 153, 153),
(255, 178, 102),
(230, 230, 0),
(255, 153, 255),
(255, 102, 255),
(255, 102, 102),
(51, 255, 51),
(255, 255, 255),
]
}
}

77
src/bbox.rs Normal file
View File

@ -0,0 +1,77 @@
use crate::Rect;
#[derive(Clone, PartialEq, Default)]
pub struct Bbox {
rect: Rect,
id: usize,
confidence: f32,
name: Option<String>,
}
impl std::fmt::Debug for Bbox {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Bbox")
.field("xmin", &self.rect.xmin())
.field("ymin", &self.rect.ymin())
.field("xmax", &self.rect.xmax())
.field("ymax", &self.rect.ymax())
.field("id", &self.id)
.field("name", &self.name)
.field("confidence", &self.confidence)
.finish()
}
}
impl Bbox {
pub fn new(rect: Rect, id: usize, confidence: f32, name: Option<String>) -> Self {
Self {
rect,
id,
confidence,
name,
}
}
pub fn width(&self) -> f32 {
self.rect.width()
}
pub fn height(&self) -> f32 {
self.rect.height()
}
pub fn xmin(&self) -> f32 {
self.rect.xmin()
}
pub fn ymin(&self) -> f32 {
self.rect.ymin()
}
pub fn xmax(&self) -> f32 {
self.rect.xmax()
}
pub fn ymax(&self) -> f32 {
self.rect.ymax()
}
pub fn id(&self) -> usize {
self.id
}
pub fn name(&self) -> Option<&String> {
self.name.as_ref()
}
pub fn confidence(&self) -> f32 {
self.confidence
}
pub fn area(&self) -> f32 {
self.rect.area()
}
pub fn iou(&self, other: &Bbox) -> f32 {
self.rect.intersect(&other.rect) / self.rect.union(&other.rect)
}
}

120
src/dataloader.rs Normal file
View File

@ -0,0 +1,120 @@
use crate::{CHECK_MARK, CROSS_MARK, SAFE_CROSS_MARK};
use anyhow::Result;
use image::DynamicImage;
use std::collections::VecDeque;
use std::path::{Path, PathBuf};
use walkdir::{DirEntry, WalkDir};
#[derive(Debug, Clone)]
pub struct DataLoader {
// source could be single image, folder with images (TODO: video, stream)
pub source: PathBuf,
pub batch: usize,
pub recursive: bool,
pub paths: VecDeque<PathBuf>,
}
impl Iterator for DataLoader {
type Item = (Vec<DynamicImage>, Vec<PathBuf>);
fn next(&mut self) -> Option<Self::Item> {
if self.paths.is_empty() {
None
} else {
let mut yis: Vec<DynamicImage> = Vec::new();
let mut yps: Vec<PathBuf> = Vec::new();
loop {
let path = self.paths.pop_front().unwrap();
match image::io::Reader::open(&path) {
Err(err) => {
println!(
"{SAFE_CROSS_MARK} Faild to load image: {:?} -> {:?}",
self.paths[0], err
);
}
Ok(p) => match p.decode() {
Err(err) => {
println!(
"{SAFE_CROSS_MARK} Fail to load image: {:?} -> {:?}",
self.paths[0], err
);
}
Ok(x) => {
yis.push(x);
yps.push(path);
}
},
}
if self.paths.is_empty() || yis.len() == self.batch {
break;
}
}
Some((yis, yps))
}
}
}
impl Default for DataLoader {
fn default() -> Self {
Self {
batch: 1,
recursive: false,
source: Default::default(),
paths: Default::default(),
}
}
}
impl DataLoader {
pub fn load<P: AsRef<Path>>(&self, source: P) -> Result<Self> {
let source = source.as_ref();
let mut paths = VecDeque::new();
match source {
s if s.is_file() => paths.push_back(s.to_path_buf()),
s if s.is_dir() => {
for entry in WalkDir::new(s)
.into_iter()
.filter_entry(|e| !Self::_is_hidden(e))
{
let entry = entry.unwrap();
if entry.file_type().is_dir() {
continue;
}
if !self.recursive && entry.depth() > 1 {
continue;
}
paths.push_back(entry.path().to_path_buf());
}
}
// s if s.starts_with("rtsp://") || s.starts_with("rtmp://") || s.starts_with("http://")|| s.starts_with("https://") => todo!(),
s if !s.exists() => panic!("{CROSS_MARK} File not found: {s:?}"),
_ => todo!(),
}
println!("{CHECK_MARK} {} files found\n", &paths.len());
Ok(Self {
paths,
source: source.into(),
batch: self.batch,
recursive: self.recursive,
})
}
pub fn with_batch(mut self, x: usize) -> Self {
self.batch = x;
self
}
pub fn with_recursive(mut self, x: bool) -> Self {
self.recursive = x;
self
}
fn _is_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| s.starts_with('.'))
.unwrap_or(false)
}
}

13
src/device.rs Normal file
View File

@ -0,0 +1,13 @@
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum Device {
Cpu(usize),
Cuda(usize),
Trt(usize),
CoreML(usize),
Cann(usize),
// Acl(usize),
// Rocm(usize),
// Rknpu(usize),
// Openvino(usize),
// Onednn(usize),
}

54
src/dynconf.rs Normal file
View File

@ -0,0 +1,54 @@
use std::ops::Index;
#[derive(Clone, PartialEq, PartialOrd)]
pub struct DynConf {
confs: Vec<f32>,
}
impl Default for DynConf {
fn default() -> Self {
Self {
confs: vec![0.4f32],
}
}
}
impl std::fmt::Debug for DynConf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("").field("DynConf", &self.confs).finish()
}
}
impl std::fmt::Display for DynConf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(self.confs.iter()).finish()
}
}
impl Index<usize> for DynConf {
type Output = f32;
fn index(&self, i: usize) -> &Self::Output {
&self.confs[i]
}
}
impl DynConf {
pub fn new(confs: &[f32], n: usize) -> Self {
if confs.is_empty() && n != 0 {
panic!("Error: No value found in confs")
}
let confs = if confs.len() >= n {
confs[..n].to_vec()
} else {
let val = confs.last().unwrap();
let mut confs = confs.to_vec();
for _ in 0..(n - confs.len()) {
confs.push(*val);
}
confs
};
Self { confs }
}
}

51
src/embedding.rs Normal file
View File

@ -0,0 +1,51 @@
use ndarray::{Array, Axis, IxDyn};
#[derive(Clone, PartialEq, Default)]
pub struct Embedding {
data: Array<f32, IxDyn>,
names: Option<Vec<String>>,
}
impl std::fmt::Debug for Embedding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("").field("Top5", &self.topk(5)).finish()
}
}
impl Embedding {
pub fn new(data: Array<f32, IxDyn>, names: Option<Vec<String>>) -> Self {
Self { data, names }
}
pub fn data(&self) -> &Array<f32, IxDyn> {
&self.data
}
pub fn topk(&self, k: usize) -> Vec<(usize, f32, Option<String>)> {
let mut probs = self
.data
.iter()
.enumerate()
.map(|(a, b)| (a, *b))
.collect::<Vec<_>>();
probs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
let mut topk = Vec::new();
for &(id, confidence) in probs.iter().take(k) {
topk.push((
id,
confidence,
self.names.as_ref().map(|names| names[id].to_owned()),
));
}
topk
}
pub fn norm(&self) -> Array<f32, IxDyn> {
let std_ = self.data.mapv(|x| x * x).sum_axis(Axis(0)).mapv(f32::sqrt);
self.data.clone() / std_
}
pub fn top1(&self) -> (usize, f32, Option<String>) {
self.topk(1)[0].to_owned()
}
}

370
src/engine.rs Normal file
View File

@ -0,0 +1,370 @@
use anyhow::Result;
use half::f16;
use ndarray::{Array, IxDyn};
use ort::{
ExecutionProvider, ExecutionProviderDispatch, Session, SessionBuilder, TensorElementType,
TensorRTExecutionProvider, ValueType,
};
use crate::{config_dir, Device, MinOptMax, Options, CHECK_MARK, CROSS_MARK, SAFE_CROSS_MARK};
#[derive(Debug)]
pub struct OrtEngine {
session: Session,
device: Device,
inputs_minoptmax: Vec<Vec<MinOptMax>>,
inames: Vec<String>,
ishapes: Vec<Vec<isize>>,
idtypes: Vec<TensorElementType>,
onames: Vec<String>,
oshapes: Vec<Vec<isize>>,
odtypes: Vec<TensorElementType>,
profile: bool,
num_dry_run: usize,
}
impl OrtEngine {
pub fn dry_run(&self) -> Result<()> {
if self.num_dry_run == 0 {
println!("{SAFE_CROSS_MARK} No dry run count specified, skipping the dry run.");
return Ok(());
}
let mut xs: Vec<Array<f32, IxDyn>> = Vec::new();
for i in self.inputs_minoptmax.iter() {
let mut x: Vec<usize> = Vec::new();
for i_ in i.iter() {
x.push(i_.opt as usize);
}
let x: Array<f32, IxDyn> = Array::ones(x).into_dyn();
xs.push(x);
}
for _ in 0..self.num_dry_run {
self.run(xs.as_ref())?;
}
println!("{CHECK_MARK} Dry run x{}", self.num_dry_run);
Ok(())
}
pub fn new(config: &Options) -> Result<Self> {
ort::init().commit()?;
let session = Session::builder()?.with_model_from_file(&config.onnx_path)?;
// inputs
let mut ishapes = Vec::new();
let mut idtypes = Vec::new();
let mut inames = Vec::new();
for x in session.inputs.iter() {
inames.push(x.name.to_owned());
if let ValueType::Tensor { ty, dimensions } = &x.input_type {
ishapes.push(dimensions.iter().map(|x| *x as isize).collect::<Vec<_>>());
idtypes.push(*ty);
} else {
ishapes.push(vec![-1_isize]);
idtypes.push(ort::TensorElementType::Float32);
}
}
// outputs
let mut oshapes = Vec::new();
let mut odtypes = Vec::new();
let mut onames = Vec::new();
for x in session.outputs.iter() {
onames.push(x.name.to_owned());
if let ValueType::Tensor { ty, dimensions } = &x.output_type {
oshapes.push(dimensions.iter().map(|x| *x as isize).collect::<Vec<_>>());
odtypes.push(*ty);
} else {
oshapes.push(vec![-1_isize]);
odtypes.push(ort::TensorElementType::Float32);
}
}
let mut inputs_minoptmax: Vec<Vec<MinOptMax>> = Vec::new();
for (i, dims) in ishapes.iter().enumerate() {
let mut v_: Vec<MinOptMax> = Vec::new();
for (ii, &x) in dims.iter().enumerate() {
let x_default: MinOptMax = (ishapes[i][ii], ishapes[i][ii], ishapes[i][ii]).into();
let x: MinOptMax = match (i, ii) {
(0, 0) => Self::_set_ixx(x, &config.i00, i, ii).unwrap_or(x_default),
(0, 1) => Self::_set_ixx(x, &config.i01, i, ii).unwrap_or(x_default),
(0, 2) => Self::_set_ixx(x, &config.i02, i, ii).unwrap_or(x_default),
(0, 3) => Self::_set_ixx(x, &config.i03, i, ii).unwrap_or(x_default),
(0, 4) => Self::_set_ixx(x, &config.i04, i, ii).unwrap_or(x_default),
(0, 5) => Self::_set_ixx(x, &config.i05, i, ii).unwrap_or(x_default),
(1, 0) => Self::_set_ixx(x, &config.i10, i, ii).unwrap_or(x_default),
(1, 1) => Self::_set_ixx(x, &config.i11, i, ii).unwrap_or(x_default),
(1, 2) => Self::_set_ixx(x, &config.i12, i, ii).unwrap_or(x_default),
(1, 3) => Self::_set_ixx(x, &config.i13, i, ii).unwrap_or(x_default),
(1, 4) => Self::_set_ixx(x, &config.i14, i, ii).unwrap_or(x_default),
(1, 5) => Self::_set_ixx(x, &config.i15, i, ii).unwrap_or(x_default),
(2, 0) => Self::_set_ixx(x, &config.i20, i, ii).unwrap_or(x_default),
(2, 1) => Self::_set_ixx(x, &config.i21, i, ii).unwrap_or(x_default),
(2, 2) => Self::_set_ixx(x, &config.i22, i, ii).unwrap_or(x_default),
(2, 3) => Self::_set_ixx(x, &config.i23, i, ii).unwrap_or(x_default),
(2, 4) => Self::_set_ixx(x, &config.i24, i, ii).unwrap_or(x_default),
(2, 5) => Self::_set_ixx(x, &config.i25, i, ii).unwrap_or(x_default),
(3, 0) => Self::_set_ixx(x, &config.i30, i, ii).unwrap_or(x_default),
(3, 1) => Self::_set_ixx(x, &config.i31, i, ii).unwrap_or(x_default),
(3, 2) => Self::_set_ixx(x, &config.i32_, i, ii).unwrap_or(x_default),
(3, 3) => Self::_set_ixx(x, &config.i33, i, ii).unwrap_or(x_default),
(3, 4) => Self::_set_ixx(x, &config.i34, i, ii).unwrap_or(x_default),
(3, 5) => Self::_set_ixx(x, &config.i35, i, ii).unwrap_or(x_default),
_ => todo!(),
};
v_.push(x);
}
inputs_minoptmax.push(v_);
}
// build again
let builder = Session::builder()?;
let device = config.device.to_owned();
let _ep = match device {
Device::Trt(device_id) => Self::build_trt(
&inames,
&inputs_minoptmax,
&builder,
device_id,
config.trt_int8_enable,
config.trt_fp16_enable,
config.trt_engine_cache_enable,
)?,
Device::Cuda(device_id) => Self::build_cuda(&builder, device_id)?,
Device::CoreML(_) => {
let coreml = ort::CoreMLExecutionProvider::default()
.with_subgraphs()
// .with_ane_only()
.build();
if coreml.is_available()? && coreml.register(&builder).is_ok() {
println!("{CHECK_MARK} Using CoreML");
coreml
} else {
println!("{CROSS_MARK} CoreML initialization failed");
println!("{CHECK_MARK} Using CPU");
ort::CPUExecutionProvider::default().build()
}
}
Device::Cpu(_) => {
println!("{CHECK_MARK} Using CPU");
ort::CPUExecutionProvider::default().build()
}
_ => todo!(),
};
let session = builder
.with_optimization_level(ort::GraphOptimizationLevel::Level3)?
.with_model_from_file(&config.onnx_path)?;
Ok(Self {
session,
device,
inputs_minoptmax,
inames,
ishapes,
idtypes,
onames,
oshapes,
odtypes,
profile: config.profile,
num_dry_run: config.num_dry_run,
})
}
fn build_trt(
inames: &[String],
inputs_minoptmax: &[Vec<MinOptMax>],
builder: &SessionBuilder,
device_id: usize,
int8_enable: bool,
fp16_enable: bool,
engine_cache_enable: bool,
) -> Result<ExecutionProviderDispatch> {
// auto generate shapes
let mut spec_min = String::new();
let mut spec_opt = String::new();
let mut spec_max = String::new();
for (i, name) in inames.iter().enumerate() {
if i != 0 {
spec_min.push(',');
spec_opt.push(',');
spec_max.push(',');
}
let mut s_min = format!("{}:", name);
let mut s_opt = format!("{}:", name);
let mut s_max = format!("{}:", name);
for d in inputs_minoptmax[i].iter() {
let min_ = &format!("{}x", d.min);
let opt_ = &format!("{}x", d.opt);
let max_ = &format!("{}x", d.max);
s_min += min_;
s_opt += opt_;
s_max += max_;
}
s_min.pop();
s_opt.pop();
s_max.pop();
spec_min += &s_min;
spec_opt += &s_opt;
spec_max += &s_max;
}
let trt = TensorRTExecutionProvider::default()
.with_device_id(device_id as i32)
.with_int8(int8_enable)
.with_fp16(fp16_enable)
.with_engine_cache(engine_cache_enable)
// .with_engine_cache_path(config_dir().to_str().unwrap())
.with_engine_cache_path(format!(
"{}/{}",
config_dir().to_str().unwrap(),
"trt-cache"
))
.with_timing_cache(false)
.with_profile_min_shapes(spec_min)
.with_profile_opt_shapes(spec_opt)
.with_profile_max_shapes(spec_max)
.build();
if trt.is_available()? && trt.register(builder).is_ok() {
println!("{CHECK_MARK} Using TensorRT");
Ok(trt)
} else {
println!("{CROSS_MARK} TensorRT initialization failed. Try CUDA...");
Self::build_cuda(builder, device_id)
}
}
fn build_cuda(builder: &SessionBuilder, device_id: usize) -> Result<ExecutionProviderDispatch> {
let cuda = ort::CUDAExecutionProvider::default()
.with_device_id(device_id as i32)
.build();
if cuda.is_available()? && cuda.register(builder).is_ok() {
println!("{CHECK_MARK} Using CUDA");
Ok(cuda)
} else {
println!("{CROSS_MARK} CUDA initialization failed");
println!("{CHECK_MARK} Using CPU");
Ok(ort::CPUExecutionProvider::default().build())
}
}
pub fn run(&self, xs: &[Array<f32, IxDyn>]) -> Result<Vec<Array<f32, IxDyn>>> {
// input
let mut xs_ = Vec::new();
let t_pre = std::time::Instant::now();
for (idtype, x) in self.idtypes.iter().zip(xs.iter()) {
let x_ = match idtype {
TensorElementType::Float32 => ort::Value::from_array(x.view())?,
TensorElementType::Float16 => ort::Value::from_array(x.mapv(f16::from_f32).view())?,
TensorElementType::Int32 => ort::Value::from_array(x.mapv(|x_| x_ as i32).view())?,
TensorElementType::Int64 => ort::Value::from_array(x.mapv(|x_| x_ as i64).view())?,
_ => todo!(),
};
xs_.push(x_);
}
let t_pre = t_pre.elapsed();
// inference
let t_run = std::time::Instant::now();
let ys = self.session.run(xs_.as_ref())?;
let t_run = t_run.elapsed();
// oputput
let mut ys_ = Vec::new();
let t_post = std::time::Instant::now();
for ((_, y), dtype) in ys.iter().zip(self.odtypes.iter()) {
let y_ = match &dtype {
TensorElementType::Float32 => y.extract_tensor::<f32>()?.view().to_owned(),
TensorElementType::Float16 => y.extract_tensor::<f16>()?.view().mapv(f16::to_f32),
_ => todo!(),
};
ys_.push(y_);
}
let t_post = t_post.elapsed();
if self.profile {
println!(
"[Profile] batch: {:?} => {:.4?} (i: {t_pre:.4?}, run: {t_run:.4?}, o: {t_post:.4?})",
self.batch().opt,
t_pre + t_run + t_post
);
}
Ok(ys_)
}
pub fn _set_ixx(x: isize, ixx: &Option<MinOptMax>, i: usize, ii: usize) -> Option<MinOptMax> {
match x {
-1 => {
match ixx {
None => panic!(
"{CROSS_MARK} Using dynamic shapes in inputs without specifying it: the {}-th input, the {}-th dimension.",
i + 1,
ii + 1
),
Some(ixx) => Some(ixx.to_owned()), // customized
}
}
_ => Some((x, x, x).into()), // customized, but not dynamic
}
}
pub fn oshapes(&self) -> &Vec<Vec<isize>> {
&self.oshapes
}
pub fn onames(&self) -> &Vec<String> {
&self.onames
}
pub fn odtypes(&self) -> &Vec<ort::TensorElementType> {
&self.odtypes
}
pub fn ishapes(&self) -> &Vec<Vec<isize>> {
&self.ishapes
}
pub fn inames(&self) -> &Vec<String> {
&self.inames
}
pub fn idtypes(&self) -> &Vec<ort::TensorElementType> {
&self.idtypes
}
pub fn device(&self) -> &Device {
&self.device
}
pub fn inputs_minoptmax(&self) -> &Vec<Vec<MinOptMax>> {
&self.inputs_minoptmax
}
pub fn batch(&self) -> &MinOptMax {
&self.inputs_minoptmax[0][0]
}
pub fn height(&self) -> &MinOptMax {
&self.inputs_minoptmax[0][2]
}
pub fn width(&self) -> &MinOptMax {
&self.inputs_minoptmax[0][2]
}
pub fn is_batch_dyn(&self) -> bool {
self.ishapes[0][0] == -1
}
pub fn try_fetch(&self, key: &str) -> Option<String> {
match self.session.metadata() {
Err(_) => None,
Ok(metadata) => match metadata.custom(key) {
Err(_) => None,
Ok(value) => value,
},
}
}
pub fn session(&self) -> &Session {
&self.session
}
pub fn version(&self) -> Option<String> {
self.try_fetch("version")
}
}

35
src/keypoint.rs Normal file
View File

@ -0,0 +1,35 @@
use crate::Point;
#[derive(PartialEq, Clone, Default)]
pub struct Keypoint {
pub point: Point,
confidence: f32,
}
impl std::fmt::Debug for Keypoint {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Keypoint")
.field("x", &self.point.x)
.field("y", &self.point.y)
.field("confidence", &self.confidence)
.finish()
}
}
impl Keypoint {
pub fn new(point: Point, confidence: f32) -> Self {
Self { point, confidence }
}
pub fn x(&self) -> f32 {
self.point.x
}
pub fn y(&self) -> f32 {
self.point.y
}
pub fn confidence(&self) -> f32 {
self.confidence
}
}

43
src/lib.rs Normal file
View File

@ -0,0 +1,43 @@
mod annotator;
mod bbox;
mod dataloader;
mod device;
mod dynconf;
mod embedding;
mod engine;
mod keypoint;
mod min_opt_max;
pub mod models;
pub mod ops;
mod options;
mod point;
mod rect;
mod results;
mod rotated_rect;
mod tokenizer_stream;
mod utils;
pub use annotator::Annotator;
pub use bbox::Bbox;
pub use dataloader::DataLoader;
pub use device::Device;
pub use dynconf::DynConf;
pub use embedding::Embedding;
pub use engine::OrtEngine;
pub use keypoint::Keypoint;
pub use min_opt_max::MinOptMax;
pub use options::Options;
pub use point::Point;
pub use rect::Rect;
pub use results::Results;
pub use rotated_rect::RotatedRect;
pub use tokenizer_stream::TokenizerStream;
pub use utils::{
auto_load, config_dir, download, non_max_suppression, string_now, COCO_NAMES_80,
COCO_SKELETON_17,
};
const GITHUB_ASSETS: &str = "https://github.com/jamjamjon/assets/releases/download/v0.0.1";
const CHECK_MARK: &str = "";
const CROSS_MARK: &str = "";
const SAFE_CROSS_MARK: &str = "";

42
src/min_opt_max.rs Normal file
View File

@ -0,0 +1,42 @@
#[derive(Debug, Clone)]
pub struct MinOptMax {
pub min: isize,
pub opt: isize,
pub max: isize,
}
impl Default for MinOptMax {
fn default() -> Self {
Self {
min: -1,
opt: -1,
max: -1,
}
}
}
impl From<(isize, isize, isize)> for MinOptMax {
fn from((min, opt, max): (isize, isize, isize)) -> Self {
let min = min.min(opt);
let max = max.max(opt);
Self { min, opt, max }
}
}
impl From<[isize; 3]> for MinOptMax {
fn from([min, opt, max]: [isize; 3]) -> Self {
let min = min.min(opt);
let max = max.max(opt);
Self { min, opt, max }
}
}
impl MinOptMax {
pub fn new(opt: isize) -> Self {
Self {
min: opt,
opt,
max: opt,
}
}
}

135
src/models/blip.rs Normal file
View File

@ -0,0 +1,135 @@
use anyhow::Result;
use image::DynamicImage;
use ndarray::{s, Array, Axis, IxDyn};
use std::io::Write;
use tokenizers::Tokenizer;
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine, TokenizerStream};
#[derive(Debug)]
pub struct Blip {
pub textual: OrtEngine,
pub visual: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch_visual: MinOptMax,
pub batch_textual: MinOptMax,
tokenizer: TokenizerStream,
}
impl Blip {
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
let visual = OrtEngine::new(&options_visual)?;
let textual = OrtEngine::new(&options_textual)?;
let (batch_visual, batch_textual, height, width) = (
visual.batch().to_owned(),
textual.batch().to_owned(),
visual.height().to_owned(),
visual.width().to_owned(),
);
let tokenizer = match &options_textual.tokenizer {
None => auto_load("tokenizer-blip.json")?,
Some(tokenizer) => tokenizer.into(),
};
let tokenizer = Tokenizer::from_file(tokenizer).unwrap();
let tokenizer = TokenizerStream::new(tokenizer);
visual.dry_run()?;
textual.dry_run()?;
Ok(Self {
textual,
visual,
batch_visual,
batch_textual,
height,
width,
tokenizer,
})
}
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
let ys = ys[0].to_owned();
Ok(ys)
}
pub fn caption(&mut self, path: &str, prompt: Option<&str>) -> Result<()> {
// this demo use batch_size=1
let x = image::io::Reader::open(path)?.decode()?;
let image_embeds = self.encode_images(&[x])?;
let image_embeds_attn_mask: Array<f32, IxDyn> =
Array::ones((1, image_embeds.shape()[1])).into_dyn();
// conditional
let mut input_ids = match prompt {
None => {
print!("[Unconditional image captioning]: ");
vec![0.0f32]
}
Some(prompt) => {
let encodings = self.tokenizer.tokenizer().encode(prompt, false);
let ids: Vec<f32> = encodings
.unwrap()
.get_ids()
.iter()
.map(|x| *x as f32)
.collect();
print!("[Conditional image captioning]: {} ", prompt);
ids
}
};
loop {
let input_ids_nd: Array<f32, IxDyn> = Array::from_vec(input_ids.to_owned()).into_dyn();
let input_ids_nd = input_ids_nd.insert_axis(Axis(0));
let input_ids_attn_mask: Array<f32, IxDyn> =
Array::ones(input_ids_nd.shape()).into_dyn();
let y = self.textual.run(&[
input_ids_nd,
input_ids_attn_mask,
image_embeds.to_owned(),
image_embeds_attn_mask.to_owned(),
])?; // N, length, vocab_size
let y = y[0].to_owned();
let y = y.slice(s!(0, -1.., ..));
// softmax
let exps = y.mapv(|c| c.exp());
let stds = exps.sum_axis(Axis(1));
let probs = exps / stds.insert_axis(Axis(1));
let probs = probs.slice(s!(0, ..));
// argmax
let (token_id, _) = probs
.into_iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap();
input_ids.push(token_id as f32);
// SEP
if token_id == 102 {
break;
}
// streaming generation
if let Some(t) = self.tokenizer.next_token(token_id as u32)? {
print!("{t}");
std::io::stdout().flush()?;
}
// sleep for test
std::thread::sleep(std::time::Duration::from_millis(10));
}
println!();
self.tokenizer.clear();
Ok(())
}
pub fn batch_visual(&self) -> usize {
self.batch_visual.opt as usize
}
pub fn batch_textual(&self) -> usize {
self.batch_textual.opt as usize
}
}

105
src/models/clip.rs Normal file
View File

@ -0,0 +1,105 @@
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine};
use anyhow::Result;
use image::DynamicImage;
use itertools::Itertools;
use ndarray::{Array, Array2, Axis, IxDyn};
use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer};
#[derive(Debug)]
pub struct Clip {
pub textual: OrtEngine,
pub visual: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch_visual: MinOptMax,
pub batch_textual: MinOptMax,
tokenizer: Tokenizer,
context_length: usize,
}
impl Clip {
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
let context_length = 77;
let visual = OrtEngine::new(&options_visual)?;
let textual = OrtEngine::new(&options_textual)?;
let (batch_visual, batch_textual, height, width) = (
visual.inputs_minoptmax()[0][0].to_owned(),
textual.inputs_minoptmax()[0][0].to_owned(),
visual.inputs_minoptmax()[0][2].to_owned(),
visual.inputs_minoptmax()[0][3].to_owned(),
);
let tokenizer = match &options_textual.tokenizer {
None => auto_load("tokenizer-clip.json").unwrap(),
Some(tokenizer) => tokenizer.into(),
};
let mut tokenizer = Tokenizer::from_file(tokenizer).unwrap();
tokenizer.with_padding(Some(PaddingParams {
strategy: PaddingStrategy::Fixed(context_length),
direction: PaddingDirection::Right,
pad_to_multiple_of: None,
pad_id: 0,
pad_type_id: 0,
pad_token: "[PAD]".to_string(),
}));
visual.dry_run()?;
textual.dry_run()?;
Ok(Self {
textual,
visual,
batch_visual,
batch_textual,
height,
width,
tokenizer,
context_length,
})
}
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
let ys = ys[0].to_owned();
Ok(ys)
}
pub fn encode_texts(&self, texts: &[String]) -> Result<Array<f32, IxDyn>> {
let encodings = self
.tokenizer
.encode_batch(texts.to_owned(), false)
.unwrap();
let xs: Vec<f32> = encodings
.iter()
.map(|i| i.get_ids().iter().map(|b| *b as f32).collect())
.concat();
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
let ys = self.textual.run(&[xs])?;
let ys = ys[0].to_owned();
// let ys = ops::norm(&ys);
Ok(ys)
}
pub fn get_similarity(
&self,
images_feats: &Array<f32, IxDyn>,
texts_feats: &Array<f32, IxDyn>,
) -> Result<Vec<Vec<f32>>> {
let images_feats = images_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
let texts_feats = texts_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
let matrix = images_feats.dot(&texts_feats.t()); // [M, N]
let exps = matrix.mapv(|x| x.exp()); //[M, N]
let stds = exps.sum_axis(Axis(1)); //[M, 1]
let matrix = exps / stds.insert_axis(Axis(1)); // [M, N]
let similarity: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
Ok(similarity)
}
pub fn batch_visual(&self) -> usize {
self.batch_visual.opt as usize
}
pub fn batch_textual(&self) -> usize {
self.batch_textual.opt as usize
}
}

39
src/models/dinov2.rs Normal file
View File

@ -0,0 +1,39 @@
use crate::{ops, MinOptMax, Options, OrtEngine};
use anyhow::Result;
use image::DynamicImage;
use ndarray::{Array, IxDyn};
#[derive(Debug)]
pub struct Dinov2 {
engine: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch: MinOptMax,
}
impl Dinov2 {
pub fn new(options: &Options) -> Result<Self> {
let engine = OrtEngine::new(options)?;
let (batch, height, width) = (
engine.inputs_minoptmax()[0][0].to_owned(),
engine.inputs_minoptmax()[0][2].to_owned(),
engine.inputs_minoptmax()[0][3].to_owned(),
);
engine.dry_run()?;
Ok(Self {
engine,
height,
width,
batch,
})
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
let ys = ys[0].to_owned();
let ys = ops::norm(&ys);
Ok(ys)
}
}

11
src/models/mod.rs Normal file
View File

@ -0,0 +1,11 @@
mod blip;
mod clip;
mod dinov2;
mod rtdetr;
mod yolo;
pub use blip::Blip;
pub use clip::Clip;
pub use dinov2::Dinov2;
pub use rtdetr::RTDETR;
pub use yolo::YOLO;

154
src/models/rtdetr.rs Normal file
View File

@ -0,0 +1,154 @@
use anyhow::Result;
use image::DynamicImage;
use ndarray::{s, Array, Axis, IxDyn};
use regex::Regex;
use crate::{ops, Annotator, Bbox, DynConf, MinOptMax, Options, OrtEngine, Rect, Results};
#[derive(Debug)]
pub struct RTDETR {
engine: OrtEngine,
height: MinOptMax,
width: MinOptMax,
batch: MinOptMax,
annotator: Annotator,
confs: DynConf,
saveout: Option<String>,
nc: usize,
names: Option<Vec<String>>,
}
impl RTDETR {
pub fn new(options: &Options) -> Result<Self> {
let engine = OrtEngine::new(options)?;
let (batch, height, width) = (
engine.inputs_minoptmax()[0][0].to_owned(),
engine.inputs_minoptmax()[0][2].to_owned(),
engine.inputs_minoptmax()[0][3].to_owned(),
);
let names: Option<_> = match &options.names {
None => engine.try_fetch("names").map(|names| {
let re = Regex::new(r#"(['"])([-()\w '"]+)(['"])"#).unwrap();
let mut names_ = vec![];
for (_, [_, name, _]) in re.captures_iter(&names).map(|x| x.extract()) {
names_.push(name.to_string());
}
names_
}),
Some(names) => Some(names.to_owned()),
};
let nc = options.nc.unwrap_or(
names
.as_ref()
.expect("Failed to get num_classes, make it explicit with `--nc`")
.len(),
);
let annotator = Annotator::default();
let confs = DynConf::new(&options.confs, nc);
engine.dry_run()?;
Ok(Self {
engine,
confs,
nc,
height,
width,
batch,
saveout: options.saveout.to_owned(),
annotator,
names,
})
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?;
let ys = self.engine.run(&[xs_])?;
let ys = self.postprocess(ys, xs)?;
match &self.saveout {
None => {}
Some(saveout) => {
for (img0, y) in xs.iter().zip(ys.iter()) {
let mut img = img0.to_rgb8();
self.annotator.plot(&mut img, y);
self.annotator.save(&img, saveout);
}
}
}
Ok(ys)
}
pub fn postprocess(
&self,
xs: Vec<Array<f32, IxDyn>>,
xs0: &[DynamicImage],
) -> Result<Vec<Results>> {
const CXYWH_OFFSET: usize = 4; // cxcywh
let preds = &xs[0];
let mut ys = Vec::new();
for (idx, anchor) in preds.axis_iter(Axis(0)).enumerate() {
// [bs, num_query, 4 + nc]
let width_original = xs0[idx].width() as f32;
let height_original = xs0[idx].height() as f32;
let ratio =
(self.width() as f32 / width_original).min(self.height() as f32 / height_original);
// save each result
let mut y_bboxes = Vec::new();
for pred in anchor.axis_iter(Axis(0)) {
let bbox = pred.slice(s![0..CXYWH_OFFSET]);
let clss = pred.slice(s![CXYWH_OFFSET..CXYWH_OFFSET + self.nc]);
// confidence & id
let (id, &confidence) = clss
.into_iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap();
// confs filter
if confidence < self.confs[id] {
continue;
}
// bbox -> input size scale -> rescale
let x = (bbox[0] - bbox[2] / 2.) * self.width() as f32 / ratio;
let y = (bbox[1] - bbox[3] / 2.) * self.height() as f32 / ratio;
let w = bbox[2] * self.width() as f32 / ratio;
let h = bbox[3] * self.height() as f32 / ratio;
let y_bbox = Bbox::new(
Rect::from_xywh(
x.max(0.0f32).min(width_original),
y.max(0.0f32).min(height_original),
w,
h,
),
id,
confidence,
self.names.as_ref().map(|names| names[id].clone()),
);
y_bboxes.push(y_bbox)
}
let y = Results {
probs: None,
bboxes: Some(y_bboxes),
keypoints: None,
masks: None,
};
ys.push(y);
}
Ok(ys)
}
pub fn batch(&self) -> isize {
self.batch.opt
}
pub fn width(&self) -> isize {
self.width.opt
}
pub fn height(&self) -> isize {
self.height.opt
}
}

387
src/models/yolo.rs Normal file
View File

@ -0,0 +1,387 @@
use anyhow::Result;
use clap::ValueEnum;
use image::{DynamicImage, ImageBuffer};
use ndarray::{s, Array, Axis, IxDyn};
use regex::Regex;
use crate::{
non_max_suppression, ops, Annotator, Bbox, DynConf, Embedding, Keypoint, MinOptMax, Options,
OrtEngine, Point, Rect, Results,
};
const CXYWH_OFFSET: usize = 4;
const KPT_STEP: usize = 3;
#[derive(Debug, Clone, ValueEnum)]
enum YOLOTask {
Classify,
Detect,
Pose,
Segment,
Obb, // TODO
}
#[derive(Debug)]
pub struct YOLO {
engine: OrtEngine,
nc: usize,
nk: usize,
nm: usize,
height: MinOptMax,
width: MinOptMax,
batch: MinOptMax,
task: YOLOTask,
confs: DynConf,
kconfs: DynConf,
iou: f32,
saveout: Option<String>,
annotator: Annotator,
names: Option<Vec<String>>,
apply_nms: bool,
anchors_first: bool,
}
impl YOLO {
pub fn new(options: &Options) -> Result<Self> {
let engine = OrtEngine::new(options)?;
let (batch, height, width) = (
engine.batch().to_owned(),
engine.height().to_owned(),
engine.width().to_owned(),
);
let task = match engine
.try_fetch("task")
.unwrap_or("detect".to_string())
.as_str()
{
"classify" => YOLOTask::Classify,
"detect" => YOLOTask::Detect,
"pose" => YOLOTask::Pose,
"segment" => YOLOTask::Segment,
x => todo!("{:?} is not supported for now!", x),
};
// try from custom class names, and then model metadata
let mut names = options.names.to_owned().or(Self::fetch_names(&engine));
let nc = match options.nc {
Some(nc) => {
match &names {
None => names = Some((0..nc).map(|x| x.to_string()).collect::<Vec<String>>()),
Some(names) => {
assert_eq!(
nc,
names.len(),
"the length of `nc` and `class names` is not equal."
);
}
}
nc
}
None => match &names {
Some(names) => names.len(),
None => panic!(
"Can not parse model without `nc` and `class names`. Try to make it explicit."
),
},
};
// try from model metadata
let nk = engine
.try_fetch("kpt_shape")
.map(|kpt_string| {
let re = Regex::new(r"([0-9]+), ([0-9]+)").unwrap();
let caps = re.captures(&kpt_string).unwrap();
caps.get(1).unwrap().as_str().parse::<usize>().unwrap()
})
.unwrap_or(0_usize);
let nm = if let YOLOTask::Segment = task {
engine.oshapes()[1][1] as usize
} else {
0_usize
};
let confs = DynConf::new(&options.confs, nc);
let kconfs = DynConf::new(&options.kconfs, nk);
let mut annotator = Annotator::default();
if let Some(skeletons) = &options.skeletons {
annotator = annotator.with_skeletons(skeletons);
}
let saveout = options.saveout.to_owned();
engine.dry_run()?;
Ok(Self {
engine,
confs,
kconfs,
iou: options.iou,
apply_nms: options.apply_nms,
nc,
nk,
nm,
height,
width,
batch,
task,
saveout,
annotator,
names,
anchors_first: options.anchors_first,
})
}
// pub fn run_with_dl(&mut self, dl: &Dataloader) -> Result<Vec<Results>> {
// for (images, paths) in dataloader {
// self.run(&images)
// }
// Ok(())
// }
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?;
let ys = self.engine.run(&[xs_])?;
let ys = self.postprocess(ys, xs)?;
match &self.saveout {
None => println!("{ys:?}"),
Some(saveout) => {
for (img0, y) in xs.iter().zip(ys.iter()) {
let mut img = img0.to_rgb8();
self.annotator.plot(&mut img, y);
self.annotator.save(&img, saveout);
}
}
}
Ok(ys)
}
pub fn postprocess(
&self,
xs: Vec<Array<f32, IxDyn>>,
xs0: &[DynamicImage],
) -> Result<Vec<Results>> {
if let YOLOTask::Classify = self.task {
let mut ys = Vec::new();
for batch in xs[0].axis_iter(Axis(0)) {
ys.push(Results::new(
Some(Embedding::new(batch.into_owned(), self.names.to_owned())),
None,
None,
None,
));
}
Ok(ys)
} else {
let (preds, protos) = if xs.len() == 2 {
if xs[0].ndim() == 3 {
(&xs[0], Some(&xs[1]))
} else {
(&xs[1], Some(&xs[0]))
}
} else {
(&xs[0], None)
};
let mut ys = Vec::new();
for (idx, anchor) in preds.axis_iter(Axis(0)).enumerate() {
// [b, 4 + nc + nm, na]
// input image
let width_original = xs0[idx].width() as f32;
let height_original = xs0[idx].height() as f32;
let ratio = (self.width() as f32 / width_original)
.min(self.height() as f32 / height_original);
#[allow(clippy::type_complexity)]
let mut data: Vec<(Bbox, Option<Vec<Keypoint>>, Option<Vec<f32>>)> = Vec::new();
for pred in anchor.axis_iter(if self.anchors_first { Axis(0) } else { Axis(1) }) {
// split preds for different tasks
let bbox = pred.slice(s![0..CXYWH_OFFSET]);
let clss = pred.slice(s![CXYWH_OFFSET..CXYWH_OFFSET + self.nc]);
let kpts = {
if let YOLOTask::Pose = self.task {
Some(pred.slice(s![pred.len() - KPT_STEP * self.nk..]))
} else {
None
}
};
let coefs = {
if let YOLOTask::Segment = self.task {
Some(pred.slice(s![pred.len() - self.nm..]).to_vec())
} else {
None
}
};
// confidence and index
let (id, &confidence) = clss
.into_iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap();
// confidence filter
if confidence < self.confs[id] {
continue;
}
// bbox re-scale
let cx = bbox[0] / ratio;
let cy = bbox[1] / ratio;
let w = bbox[2] / ratio;
let h = bbox[3] / ratio;
let x = cx - w / 2.;
let y = cy - h / 2.;
let y_bbox = Bbox::new(
Rect::from_xywh(
x.max(0.0f32).min(width_original),
y.max(0.0f32).min(height_original),
w,
h,
),
id,
confidence,
self.names.as_ref().map(|names| names[id].to_owned()),
);
// kpts
let y_kpts = {
if let Some(kpts) = kpts {
let mut kpts_ = Vec::new();
for i in 0..self.nk {
let kx = kpts[KPT_STEP * i] / ratio;
let ky = kpts[KPT_STEP * i + 1] / ratio;
let kconf = kpts[KPT_STEP * i + 2];
if kconf < self.kconfs[i] {
kpts_.push(Keypoint::default());
} else {
kpts_.push(Keypoint::new(
Point::new(
kx.max(0.0f32).min(width_original),
ky.max(0.0f32).min(height_original),
),
kconf,
));
}
}
Some(kpts_)
} else {
None
}
};
// merged
data.push((y_bbox, y_kpts, coefs));
}
// nms
if self.apply_nms {
non_max_suppression(&mut data, self.iou);
}
// decode
let mut y_bboxes: Vec<Bbox> = Vec::new();
let mut y_kpts: Vec<Vec<Keypoint>> = Vec::new();
let mut y_masks: Vec<Vec<u8>> = Vec::new();
for elem in data.into_iter() {
if let Some(kpts) = elem.1 {
y_kpts.push(kpts)
}
// decode masks
if let Some(coefs) = elem.2 {
let proto = protos.unwrap().slice(s![idx, .., .., ..]);
let (nm, nh, nw) = proto.dim();
// coefs * proto -> mask
let coefs = Array::from_shape_vec((1, nm), coefs)?; // (n, nm)
let proto = proto.to_owned().into_shape((nm, nh * nw))?; // (nm, nh*nw)
let mask = coefs.dot(&proto).into_shape((nh, nw, 1))?; // (nh, nw, n)
// build image from ndarray
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
match ImageBuffer::from_raw(nw as u32, nh as u32, mask.into_raw_vec()) {
Some(image) => image,
None => panic!("can not create image from ndarray"),
};
let mut mask_im = image::DynamicImage::from(mask_im); // -> dyn
// rescale masks
let (_, w_mask, h_mask) =
ops::scale_wh(width_original, height_original, nw as f32, nh as f32);
let mask_cropped = mask_im.crop(0, 0, w_mask as u32, h_mask as u32);
let mask_original = mask_cropped.resize_exact(
width_original as u32,
height_original as u32,
image::imageops::FilterType::Triangle,
);
// crop-mask with bbox
let mut mask_original_cropped = mask_original.into_luma8();
for y in 0..height_original as usize {
for x in 0..width_original as usize {
if x < elem.0.xmin() as usize
|| x > elem.0.xmax() as usize
|| y < elem.0.ymin() as usize
|| y > elem.0.ymax() as usize
{
mask_original_cropped.put_pixel(
x as u32,
y as u32,
image::Luma([0u8]),
);
}
}
}
y_masks.push(mask_original_cropped.into_raw());
}
y_bboxes.push(elem.0);
}
// save each result
let y = Results {
probs: None,
bboxes: if !y_bboxes.is_empty() {
Some(y_bboxes)
} else {
None
},
keypoints: if !y_kpts.is_empty() {
Some(y_kpts)
} else {
None
},
masks: if !y_masks.is_empty() {
Some(y_masks)
} else {
None
},
};
ys.push(y);
}
Ok(ys)
}
}
fn fetch_names(engine: &OrtEngine) -> Option<Vec<String>> {
// fetch class names from onnx metadata
// String format: `{0: 'person', 1: 'bicycle', 2: 'sports ball', ..., 27: "yellow_lady's_slipper"}`
engine.try_fetch("names").map(|names| {
let re = Regex::new(r#"(['"])([-()\w '"]+)(['"])"#).unwrap();
let mut names_ = vec![];
for (_, [_, name, _]) in re.captures_iter(&names).map(|x| x.extract()) {
names_.push(name.to_string());
}
names_
})
}
pub fn batch(&self) -> isize {
self.batch.opt
}
pub fn width(&self) -> isize {
self.width.opt
}
pub fn height(&self) -> isize {
self.height.opt
}
}

95
src/ops.rs Normal file
View File

@ -0,0 +1,95 @@
use anyhow::Result;
use image::{DynamicImage, GenericImageView};
use ndarray::{Array, Axis, Ix2, IxDyn};
pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) {
let r = (w1 / w0).min(h1 / h0);
(r, (w0 * r).round(), (h0 * r).round())
}
pub fn resize(
xs: &[DynamicImage],
height: u32,
width: u32,
norm_imagenet: bool,
) -> Result<Array<f32, IxDyn>> {
let norm = 255.0;
let mut ys = Array::ones(vec![xs.len(), 3, height as usize, width as usize]).into_dyn();
// let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
for (idx, x) in xs.iter().enumerate() {
let (w0, h0) = x.dimensions();
let w0 = w0 as f32;
let h0 = h0 as f32;
let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round
let img = x.resize_exact(
w_new as u32,
h_new as u32,
image::imageops::FilterType::Triangle,
);
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = (r as f32) / norm;
ys[[idx, 1, y, x]] = (g as f32) / norm;
ys[[idx, 2, y, x]] = (b as f32) / norm;
}
}
if norm_imagenet {
let mean =
Array::from_shape_vec((1, 3, 1, 1), vec![0.48145466, 0.4578275, 0.40821073]).unwrap();
let std = Array::from_shape_vec((1, 3, 1, 1), vec![0.26862954, 0.261_302_6, 0.275_777_1])
.unwrap();
ys = (ys - mean) / std;
}
Ok(ys)
}
pub fn letterbox(xs: &[DynamicImage], height: u32, width: u32) -> Result<Array<f32, IxDyn>> {
let norm = 255.0;
let bg = 144.0;
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
ys.fill(bg / norm);
for (idx, x) in xs.iter().enumerate() {
let (w0, h0) = x.dimensions();
let w0 = w0 as f32;
let h0 = h0 as f32;
let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round
let img = x.resize_exact(
w_new as u32,
h_new as u32,
image::imageops::FilterType::Triangle,
);
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = (r as f32) / norm;
ys[[idx, 1, y, x]] = (g as f32) / norm;
ys[[idx, 2, y, x]] = (b as f32) / norm;
}
}
Ok(ys)
}
pub fn norm(xs: &Array<f32, IxDyn>) -> Array<f32, IxDyn> {
let std_ = xs
.mapv(|x| x * x)
.sum_axis(Axis(1))
.mapv(f32::sqrt)
.insert_axis(Axis(1));
xs / std_
}
pub fn dot2(query: &Array<f32, IxDyn>, gallery: &Array<f32, IxDyn>) -> Result<Vec<Vec<f32>>> {
// (m, ndim) * (n, ndim).t => (m, n)
let query = query.to_owned().into_dimensionality::<Ix2>()?;
let gallery = gallery.to_owned().into_dimensionality::<Ix2>()?;
let matrix = query.dot(&gallery.t());
let exps = matrix.mapv(|x| x.exp());
let stds = exps.sum_axis(Axis(1));
let matrix = exps / stds.insert_axis(Axis(1));
let matrix: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
Ok(matrix)
}

321
src/options.rs Normal file
View File

@ -0,0 +1,321 @@
use crate::{auto_load, Device, MinOptMax};
#[derive(Debug, Clone)]
pub struct Options {
pub onnx_path: String,
pub device: Device,
pub profile: bool,
pub num_dry_run: usize,
pub i00: Option<MinOptMax>, // 1st input, axis 0, batch usually
pub i01: Option<MinOptMax>, // 1st input, axis 1
pub i02: Option<MinOptMax>,
pub i03: Option<MinOptMax>,
pub i04: Option<MinOptMax>,
pub i05: Option<MinOptMax>,
pub i10: Option<MinOptMax>, // 2nd input, axis 0
pub i11: Option<MinOptMax>,
pub i12: Option<MinOptMax>,
pub i13: Option<MinOptMax>,
pub i14: Option<MinOptMax>,
pub i15: Option<MinOptMax>,
pub i20: Option<MinOptMax>, // 2nd input, axis 0
pub i21: Option<MinOptMax>,
pub i22: Option<MinOptMax>,
pub i23: Option<MinOptMax>,
pub i24: Option<MinOptMax>,
pub i25: Option<MinOptMax>,
pub i30: Option<MinOptMax>, // 2nd input, axis 0
pub i31: Option<MinOptMax>,
pub i32_: Option<MinOptMax>,
pub i33: Option<MinOptMax>,
pub i34: Option<MinOptMax>,
pub i35: Option<MinOptMax>,
// trt ep
pub trt_engine_cache_enable: bool,
pub trt_int8_enable: bool,
pub trt_fp16_enable: bool,
// options for Vision and Language models
pub nc: Option<usize>,
pub nk: Option<usize>,
pub nm: Option<usize>,
pub confs: Vec<f32>,
pub kconfs: Vec<f32>,
pub iou: f32,
pub apply_nms: bool,
pub saveout: Option<String>,
pub tokenizer: Option<String>,
pub vocab: Option<String>,
pub names: Option<Vec<String>>, // class names
pub anchors_first: bool, // otuput format: [bs, anchors/na, pos+nc+nm]
pub skeletons: Option<Vec<(usize, usize)>>,
}
impl Default for Options {
fn default() -> Self {
Self {
onnx_path: String::new(),
device: Device::Cuda(0),
profile: false,
num_dry_run: 3,
i00: None,
i01: None,
i02: None,
i03: None,
i04: None,
i05: None,
i10: None,
i11: None,
i12: None,
i13: None,
i14: None,
i15: None,
i20: None,
i21: None,
i22: None,
i23: None,
i24: None,
i25: None,
i30: None,
i31: None,
i32_: None,
i33: None,
i34: None,
i35: None,
trt_engine_cache_enable: true,
trt_int8_enable: false,
trt_fp16_enable: false,
nc: None,
nk: None,
nm: None,
confs: vec![0.4f32],
kconfs: vec![0.5f32],
iou: 0.45f32,
apply_nms: true,
saveout: None,
tokenizer: None,
vocab: None,
names: None,
anchors_first: false,
skeletons: None,
}
}
}
impl Options {
pub fn with_model(mut self, onnx_path: &str) -> Self {
self.onnx_path = auto_load(onnx_path).unwrap();
self
}
pub fn with_dry_run(mut self, n: usize) -> Self {
self.num_dry_run = n;
self
}
pub fn with_cuda(mut self, id: usize) -> Self {
self.device = Device::Cuda(id);
self
}
pub fn with_trt(mut self, id: usize) -> Self {
self.device = Device::Trt(id);
self
}
pub fn with_cpu(mut self) -> Self {
self.device = Device::Cpu(0);
self
}
pub fn with_coreml(mut self, id: usize) -> Self {
self.device = Device::CoreML(id);
self
}
pub fn with_fp16(mut self, x: bool) -> Self {
self.trt_fp16_enable = x;
self
}
pub fn with_profile(mut self, profile: bool) -> Self {
self.profile = profile;
self
}
pub fn with_saveout(mut self, saveout: &str) -> Self {
self.saveout = Some(saveout.to_string());
self
}
pub fn with_names(mut self, names: &[&str]) -> Self {
self.names = Some(names.iter().map(|x| x.to_string()).collect::<Vec<String>>());
self
}
pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self {
self.skeletons = Some(skeletons.to_vec());
self
}
pub fn with_anchors_first(mut self) -> Self {
self.anchors_first = true;
self
}
pub fn with_nms(mut self, apply_nms: bool) -> Self {
self.apply_nms = apply_nms;
self
}
pub fn with_nc(mut self, nc: usize) -> Self {
self.nc = Some(nc);
self
}
pub fn with_nk(mut self, nk: usize) -> Self {
self.nk = Some(nk);
self
}
pub fn with_iou(mut self, x: f32) -> Self {
self.iou = x;
self
}
pub fn with_confs(mut self, confs: &[f32]) -> Self {
self.confs = confs.to_vec();
self
}
pub fn with_kconfs(mut self, kconfs: &[f32]) -> Self {
self.kconfs = kconfs.to_vec();
self
}
pub fn with_tokenizer(mut self, tokenizer: String) -> Self {
self.tokenizer = Some(tokenizer);
self
}
pub fn with_i00(mut self, x: MinOptMax) -> Self {
self.i00 = Some(x);
self
}
pub fn with_i01(mut self, x: MinOptMax) -> Self {
self.i01 = Some(x);
self
}
pub fn with_i02(mut self, x: MinOptMax) -> Self {
self.i02 = Some(x);
self
}
pub fn with_i03(mut self, x: MinOptMax) -> Self {
self.i03 = Some(x);
self
}
pub fn with_i04(mut self, x: MinOptMax) -> Self {
self.i04 = Some(x);
self
}
pub fn with_i05(mut self, x: MinOptMax) -> Self {
self.i05 = Some(x);
self
}
pub fn with_i10(mut self, x: MinOptMax) -> Self {
self.i10 = Some(x);
self
}
pub fn with_i11(mut self, x: MinOptMax) -> Self {
self.i11 = Some(x);
self
}
pub fn with_i12(mut self, x: MinOptMax) -> Self {
self.i12 = Some(x);
self
}
pub fn with_i13(mut self, x: MinOptMax) -> Self {
self.i13 = Some(x);
self
}
pub fn with_i14(mut self, x: MinOptMax) -> Self {
self.i14 = Some(x);
self
}
pub fn with_i15(mut self, x: MinOptMax) -> Self {
self.i15 = Some(x);
self
}
pub fn with_i20(mut self, x: MinOptMax) -> Self {
self.i20 = Some(x);
self
}
pub fn with_i21(mut self, x: MinOptMax) -> Self {
self.i21 = Some(x);
self
}
pub fn with_i22(mut self, x: MinOptMax) -> Self {
self.i22 = Some(x);
self
}
pub fn with_i23(mut self, x: MinOptMax) -> Self {
self.i23 = Some(x);
self
}
pub fn with_i24(mut self, x: MinOptMax) -> Self {
self.i24 = Some(x);
self
}
pub fn with_i25(mut self, x: MinOptMax) -> Self {
self.i25 = Some(x);
self
}
pub fn with_i30(mut self, x: MinOptMax) -> Self {
self.i30 = Some(x);
self
}
pub fn with_i31(mut self, x: MinOptMax) -> Self {
self.i31 = Some(x);
self
}
pub fn with_i32_(mut self, x: MinOptMax) -> Self {
self.i32_ = Some(x);
self
}
pub fn with_i33(mut self, x: MinOptMax) -> Self {
self.i33 = Some(x);
self
}
pub fn with_i34(mut self, x: MinOptMax) -> Self {
self.i34 = Some(x);
self
}
pub fn with_i35(mut self, x: MinOptMax) -> Self {
self.i35 = Some(x);
self
}
}

182
src/point.rs Normal file
View File

@ -0,0 +1,182 @@
use std::ops::{Add, Div, Mul, Sub};
#[derive(Default, Debug, PartialOrd, PartialEq, Clone, Copy)]
pub struct Point {
pub x: f32,
pub y: f32,
}
impl Add for Point {
type Output = Self;
fn add(self, other: Self) -> Self::Output {
Self {
x: self.x + other.x,
y: self.y + other.y,
}
}
}
impl Add<f32> for Point {
type Output = Self;
fn add(self, other: f32) -> Self::Output {
Self {
x: self.x + other,
y: self.y + other,
}
}
}
impl Sub for Point {
type Output = Self;
fn sub(self, other: Self) -> Self::Output {
Self {
x: self.x - other.x,
y: self.y - other.y,
}
}
}
impl Sub<f32> for Point {
type Output = Self;
fn sub(self, other: f32) -> Self::Output {
Self {
x: self.x * other,
y: self.y * other,
}
}
}
impl Mul<f32> for Point {
type Output = Self;
fn mul(self, other: f32) -> Self::Output {
Self {
x: self.x * other,
y: self.y * other,
}
}
}
impl Mul for Point {
type Output = Self;
fn mul(self, other: Self) -> Self::Output {
Self {
x: self.x * other.x,
y: self.y * other.y,
}
}
}
impl Div for Point {
type Output = Self;
fn div(self, other: Self) -> Self::Output {
Self {
x: self.x / other.x,
y: self.y / other.y,
}
}
}
impl Div<f32> for Point {
type Output = Self;
fn div(self, other: f32) -> Self::Output {
Self {
x: self.x / other,
y: self.y / other,
}
}
}
impl From<(f32, f32)> for Point {
fn from((x, y): (f32, f32)) -> Self {
Self { x, y }
}
}
impl From<Point> for (f32, f32) {
fn from(Point { x, y }: Point) -> Self {
(x, y)
}
}
impl From<[f32; 2]> for Point {
fn from([x, y]: [f32; 2]) -> Self {
Self { x, y }
}
}
impl From<Point> for [f32; 2] {
fn from(Point { x, y }: Point) -> Self {
[x, y]
}
}
impl Point {
pub fn new(x: f32, y: f32) -> Self {
Self { x, y }
}
pub fn coord(&self) -> [f32; 2] {
[self.x, self.y]
}
pub fn is_origin(&self) -> bool {
self.x == 0.0_f32 && self.y == 0.0_f32
}
pub fn distance_from(&self, other: &Point) -> f32 {
((self.x - other.x).powf(2.0) + (self.y - other.y).powf(2.0)).sqrt()
}
pub fn distance_from_origin(&self) -> f32 {
(self.x.powf(2.0) + self.y.powf(2.0)).sqrt()
}
pub fn sum(&self) -> f32 {
self.x + self.y
}
}
#[cfg(test)]
mod tests_points {
use super::Point;
#[test]
fn new() {
let origin1 = Point::from((0.0f32, 0.0f32));
let origin2 = Point::from([0.0f32, 0.0f32]);
let origin3 = (0.0f32, 0.0f32).into();
let origin4 = [0.0f32, 0.0f32].into();
let origin5 = Point::new(1.0f32, 2.0f32);
let origin6 = Point {
x: 1.0f32,
y: 2.0f32,
};
assert_eq!(origin1, origin2);
assert_eq!(origin2, origin3);
assert_eq!(origin3, origin4);
assert_eq!(origin5, origin6);
assert!(origin1.is_origin());
assert!(origin2.is_origin());
assert!(origin3.is_origin());
assert!(origin4.is_origin());
assert!(!origin5.is_origin());
assert!(!origin6.is_origin());
}
#[test]
fn into_tuple_array() {
let point = Point::from((1.0, 2.0));
let tuple: (f32, f32) = point.into();
let array: [f32; 2] = point.into();
assert_eq!(tuple, (1.0, 2.0));
assert_eq!(array, [1.0, 2.0]);
}
}

193
src/rect.rs Normal file
View File

@ -0,0 +1,193 @@
use crate::Point;
#[derive(Default, PartialOrd, PartialEq, Clone, Copy)]
pub struct Rect {
top_left: Point,
bottom_right: Point,
}
impl std::fmt::Debug for Rect {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Rectangle")
.field("xmin", &self.xmin())
.field("ymin", &self.ymin())
.field("xmax", &self.xmax())
.field("ymax", &self.ymax())
.finish()
}
}
impl<P: Into<Point>> From<(P, P)> for Rect {
fn from((top_left, bottom_right): (P, P)) -> Self {
Self {
top_left: top_left.into(),
bottom_right: bottom_right.into(),
}
}
}
impl<P: Into<Point>> From<[P; 2]> for Rect {
fn from([top_left, bottom_right]: [P; 2]) -> Self {
Self {
top_left: top_left.into(),
bottom_right: bottom_right.into(),
}
}
}
impl Rect {
pub fn new(top_left: Point, bottom_right: Point) -> Self {
Self {
top_left,
bottom_right,
}
}
pub fn from_xywh(x: f32, y: f32, w: f32, h: f32) -> Self {
Self {
top_left: Point::new(x, y),
bottom_right: Point::new(x + w, y + h),
}
}
pub fn from_xyxy(x1: f32, y1: f32, x2: f32, y2: f32) -> Self {
Self {
top_left: Point::new(x1, y1),
bottom_right: Point::new(x2, y2),
}
}
pub fn from_cxywh(cx: f32, cy: f32, w: f32, h: f32) -> Self {
Self {
top_left: Point::new(cx - w / 2.0, cy - h / 2.0),
bottom_right: Point::new(cx + w / 2.0, cy + h / 2.0),
}
}
pub fn width(&self) -> f32 {
(self.bottom_right - self.top_left).x
}
pub fn height(&self) -> f32 {
(self.bottom_right - self.top_left).y
}
pub fn xmin(&self) -> f32 {
self.top_left.x
}
pub fn ymin(&self) -> f32 {
self.top_left.y
}
pub fn xmax(&self) -> f32 {
self.bottom_right.x
}
pub fn ymax(&self) -> f32 {
self.bottom_right.y
}
pub fn cx(&self) -> f32 {
self.bottom_right.x - self.top_left.x
}
pub fn cy(&self) -> f32 {
self.bottom_right.y - self.top_left.y
}
pub fn tl(&self) -> Point {
self.top_left
}
pub fn br(&self) -> Point {
self.bottom_right
}
pub fn tr(&self) -> Point {
Point::new(self.bottom_right.x, self.top_left.y)
}
pub fn bl(&self) -> Point {
Point::new(self.top_left.x, self.bottom_right.y)
}
pub fn center(&self) -> Point {
(self.bottom_right + self.top_left) / 2.0
}
pub fn area(&self) -> f32 {
self.height() * self.width()
}
pub fn is_empty(&self) -> bool {
self.area() == 0.0
}
pub fn is_squre(&self) -> bool {
self.width() == self.height()
}
pub fn intersect(&self, other: &Rect) -> f32 {
let l = self.xmin().max(other.xmin());
let r = (self.xmin() + self.width()).min(other.xmin() + other.width());
let t = self.ymin().max(other.ymin());
let b = (self.ymin() + self.height()).min(other.ymin() + other.height());
(r - l).max(0.) * (b - t).max(0.)
}
pub fn union(&self, other: &Rect) -> f32 {
self.area() + other.area() - self.intersect(other)
}
pub fn iou(&self, other: &Rect) -> f32 {
self.intersect(other) / self.union(other)
}
pub fn contains(&self, other: &Rect) -> bool {
self.xmin() <= other.xmin()
&& self.xmax() >= other.xmax()
&& self.ymin() <= other.ymin()
&& self.ymax() >= other.ymax()
}
}
#[cfg(test)]
mod tests {
use super::Rect;
use crate::Point;
#[test]
fn new() {
let rect1 = Rect {
top_left: Point {
x: 0.0f32,
y: 0.0f32,
},
bottom_right: Point {
x: 5.0f32,
y: 5.0f32,
},
};
let rect2 = Rect {
top_left: (0.0f32, 0.0f32).into(),
bottom_right: [5.0f32, 5.0f32].into(),
};
let rect3 = Rect::new([0.0, 0.0].into(), [5.0, 5.0].into());
let rect4: Rect = ((0.0, 0.0), (5.0, 5.0)).into();
let rect5: Rect = [(0.0, 0.0), (5.0, 5.0)].into();
let rect6: Rect = ([0.0, 0.0], [5.0, 5.0]).into();
let rect7: Rect = Rect::from(([0.0, 0.0], [5.0, 5.0]));
let rect8: Rect = Rect::from([[0.0, 0.0], [5.0, 5.0]]);
let rect9: Rect = Rect::from([(0.0, 0.0), (5.0, 5.0)]);
let rect10: Rect = Rect::from_xyxy(0.0, 0.0, 5.0, 5.0);
let rect11: Rect = Rect::from_xywh(0.0, 0.0, 5.0, 5.0);
assert_eq!(rect1, rect2);
assert_eq!(rect3, rect4);
assert_eq!(rect5, rect6);
assert_eq!(rect7, rect8);
assert_eq!(rect9, rect8);
assert_eq!(rect10, rect11);
}
}

59
src/results.rs Normal file
View File

@ -0,0 +1,59 @@
use crate::{Bbox, Embedding, Keypoint};
#[derive(Clone, PartialEq, Default)]
pub struct Results {
pub probs: Option<Embedding>,
pub bboxes: Option<Vec<Bbox>>,
pub keypoints: Option<Vec<Vec<Keypoint>>>,
pub masks: Option<Vec<Vec<u8>>>,
}
impl std::fmt::Debug for Results {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Results")
.field("Probabilities", &self.probs)
.field("BoundingBoxes", &self.bboxes)
.field("Keypoints", &self.keypoints)
.field(
"Masks",
&format_args!("{:?}", self.masks().map(|masks| masks.len())),
)
.finish()
}
}
impl Results {
pub fn new(
probs: Option<Embedding>,
bboxes: Option<Vec<Bbox>>,
keypoints: Option<Vec<Vec<Keypoint>>>,
masks: Option<Vec<Vec<u8>>>,
) -> Self {
Self {
probs,
bboxes,
keypoints,
masks,
}
}
pub fn probs(&self) -> Option<&Embedding> {
self.probs.as_ref()
}
pub fn keypoints(&self) -> Option<&Vec<Vec<Keypoint>>> {
self.keypoints.as_ref()
}
pub fn masks(&self) -> Option<&Vec<Vec<u8>>> {
self.masks.as_ref()
}
pub fn bboxes(&self) -> Option<&Vec<Bbox>> {
self.bboxes.as_ref()
}
pub fn bboxes_mut(&mut self) -> Option<&mut Vec<Bbox>> {
self.bboxes.as_mut()
}
}

155
src/rotated_rect.rs Normal file
View File

@ -0,0 +1,155 @@
use crate::Point;
#[derive(Default, PartialOrd, PartialEq, Clone, Copy)]
pub struct RotatedRect {
center: Point,
width: f32,
height: f32,
rotation: f32, // (0, 90) radians
}
impl std::fmt::Debug for RotatedRect {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RotatedRectangle")
.field("height", &self.height)
.field("width", &self.width)
.field("center", &self.center)
.field("rotation", &self.rotation)
.field("vertices", &self.vertices())
.finish()
}
}
impl RotatedRect {
pub fn new(center: Point, width: f32, height: f32, rotation: f32) -> Self {
Self {
center,
width,
height,
rotation,
}
}
pub fn vertices(&self) -> [Point; 4] {
// [cos -sin]
// [sin cos]
let m = [
[
self.rotation.cos() * 0.5 * self.width,
-self.rotation.sin() * 0.5 * self.height,
],
[
self.rotation.sin() * 0.5 * self.width,
self.rotation.cos() * 0.5 * self.height,
],
];
let v1 = self.center + Point::new(m[0][0] + m[0][1], m[1][0] + m[1][1]);
let v2 = self.center + Point::new(m[0][0] - m[0][1], m[1][0] - m[1][1]);
let v3 = self.center * 2.0 - v1;
let v4 = self.center * 2.0 - v2;
[v1, v2, v3, v4]
}
pub fn height(&self) -> f32 {
self.height
}
pub fn width(&self) -> f32 {
self.width
}
pub fn center(&self) -> Point {
self.center
}
pub fn area(&self) -> f32 {
self.height * self.width
}
// pub fn contain_point(&self, point: Point) -> bool {
// // ray casting
// todo!()
// }
}
#[test]
fn test1() {
let pi = std::f32::consts::PI;
let rt = RotatedRect::new(
Point::new(0.0f32, 0.0f32),
2.0f32,
4.0f32,
pi / 180.0 * 90.0,
);
assert_eq!(
rt.vertices(),
[
Point {
x: -2.0,
y: 0.99999994,
},
Point {
x: 2.0,
y: 1.0000001,
},
Point {
x: 2.0,
y: -0.99999994,
},
Point {
x: -2.0,
y: -1.0000001,
},
]
);
}
#[test]
fn test2() {
let pi = std::f32::consts::PI;
let rt = RotatedRect::new(
Point::new(0.0f32, 0.0f32),
2.0f32.sqrt(),
2.0f32.sqrt(),
pi / 180.0 * 45.0,
);
assert_eq!(
rt.vertices(),
[
Point {
x: 0.0,
y: 0.99999994
},
Point {
x: 0.99999994,
y: 0.0
},
Point {
x: 0.0,
y: -0.99999994
},
Point {
x: -0.99999994,
y: 0.0
},
]
);
}
// #[test]
// fn contain_point() {
// let pi = std::f32::consts::PI;
// let rt = RotatedRect::new(
// Point::new(0.0f32, 0.0f32),
// 1.0f32.sqrt(),
// 1.0f32.sqrt(),
// pi / 180.0 * 45.0,
// );
// assert!(rt.contain_point(Point::new(0.0, 0.0)));
// assert!(rt.contain_point(Point::new(0.5, 0.0)));
// assert!(rt.contain_point(Point::new(0.0, 0.5)));
// }

88
src/tokenizer_stream.rs Normal file
View File

@ -0,0 +1,88 @@
// https://github.com/huggingface/candle/blob/2a8679509eb55232b37378442c4366343f6dcb11/candle-examples/src/token_output_stream.rs#L5
use anyhow::Result;
/// This is a wrapper around a tokenizer to ensure that tokens can be returned to the user in a
/// streaming way rather than having to wait for the full decoding.
#[derive(Debug)]
pub struct TokenizerStream {
tokenizer: tokenizers::Tokenizer,
tokens: Vec<u32>,
prev_index: usize,
current_index: usize,
}
impl TokenizerStream {
pub fn new(tokenizer: tokenizers::Tokenizer) -> Self {
Self {
tokenizer,
tokens: Vec::new(),
prev_index: 0,
current_index: 0,
}
}
pub fn into_inner(self) -> tokenizers::Tokenizer {
self.tokenizer
}
fn decode(&self, tokens: &[u32]) -> Result<String> {
match self.tokenizer.decode(tokens, true) {
Ok(str) => Ok(str),
Err(err) => anyhow::bail!("cannot decode: {err}"),
}
}
// https://github.com/huggingface/text-generation-inference/blob/5ba53d44a18983a4de32d122f4cb46f4a17d9ef6/server/text_generation_server/models/model.py#L68
pub fn next_token(&mut self, token: u32) -> Result<Option<String>> {
let prev_text = if self.tokens.is_empty() {
String::new()
} else {
let tokens = &self.tokens[self.prev_index..self.current_index];
self.decode(tokens)?
};
self.tokens.push(token);
let text = self.decode(&self.tokens[self.prev_index..])?;
if text.len() > prev_text.len() && text.chars().last().unwrap().is_alphanumeric() {
let text = text.split_at(prev_text.len());
self.prev_index = self.current_index;
self.current_index = self.tokens.len();
Ok(Some(text.1.to_string()))
} else {
Ok(None)
}
}
pub fn decode_rest(&self) -> Result<Option<String>> {
let prev_text = if self.tokens.is_empty() {
String::new()
} else {
let tokens = &self.tokens[self.prev_index..self.current_index];
self.decode(tokens)?
};
let text = self.decode(&self.tokens[self.prev_index..])?;
if text.len() > prev_text.len() {
let text = text.split_at(prev_text.len());
Ok(Some(text.1.to_string()))
} else {
Ok(None)
}
}
pub fn decode_all(&self) -> Result<String> {
self.decode(&self.tokens)
}
pub fn get_token(&self, token_s: &str) -> Option<u32> {
self.tokenizer.get_vocab(true).get(token_s).copied()
}
pub fn tokenizer(&self) -> &tokenizers::Tokenizer {
&self.tokenizer
}
pub fn clear(&mut self) {
self.tokens.clear();
self.prev_index = 0;
self.current_index = 0;
}
}

220
src/utils.rs Normal file
View File

@ -0,0 +1,220 @@
use crate::{Bbox, Keypoint, GITHUB_ASSETS};
use anyhow::Result;
use indicatif::{ProgressBar, ProgressStyle};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
pub fn auto_load<P: AsRef<Path>>(src: P) -> Result<String> {
// check if input file exists
let src = src.as_ref();
let p = if src.is_file() {
src.into()
} else {
let sth = src.file_name().unwrap().to_str().unwrap();
let mut p = config_dir();
p.push(sth);
// download from github assets if not exists in config directory
if !p.is_file() {
download(
&format!("{}/{}", GITHUB_ASSETS, sth),
&p,
Some(sth.to_string().as_str()),
)
.unwrap_or_else(|err| panic!("Fail to load {:?}: {err}", src));
}
p
};
Ok(p.to_str().unwrap().to_string())
}
pub fn download<P: AsRef<Path> + std::fmt::Debug>(
src: &str,
dst: P,
prompt: Option<&str>,
) -> Result<()> {
let resp = ureq::AgentBuilder::new()
.try_proxy_from_env(true)
.build()
.get(src)
.timeout(std::time::Duration::from_secs(2000))
.call()
.unwrap_or_else(|err| panic!("Failed to GET: {}", err));
let ntotal = resp
.header("Content-Length")
.and_then(|s| s.parse::<u64>().ok())
.expect("Content-Length header should be present on archive response");
let pb = ProgressBar::new(ntotal);
pb.set_style(
ProgressStyle::with_template(
"{prefix:.bold} {msg:.dim} [{bar:.blue.bright/white.dim}] {binary_bytes}/{binary_total_bytes} ({binary_bytes_per_sec}, {percent_precise}%, {elapsed})"
)
.unwrap()
.progress_chars("#>-"));
pb.set_prefix(String::from("\n🐢 Downloading"));
pb.set_message(prompt.unwrap_or_default().to_string());
let mut reader = resp.into_reader();
let mut buffer = [0; 256];
let mut downloaded_bytes = 0usize;
let mut f = std::fs::File::create(&dst).expect("Failed to create file");
loop {
let bytes_read = reader.read(&mut buffer)?;
if bytes_read == 0 {
break;
}
pb.inc(bytes_read as u64);
f.write_all(&buffer[..bytes_read])?;
downloaded_bytes += bytes_read;
}
assert_eq!(downloaded_bytes as u64, ntotal);
pb.finish();
Ok(())
}
pub fn string_now(delimiter: &str) -> String {
let t_now = chrono::Local::now();
let fmt = format!(
"%Y{}%m{}%d{}%H{}%M{}%S{}%f",
delimiter, delimiter, delimiter, delimiter, delimiter, delimiter
);
t_now.format(&fmt).to_string()
}
pub fn config_dir() -> PathBuf {
match dirs::config_dir() {
Some(mut d) => {
d.push("usls");
if !d.exists() {
std::fs::create_dir_all(&d).expect("Failed to create config directory.");
}
d
}
None => panic!("Unsupported operating system. Now support Linux, MacOS, Windows."),
}
}
#[allow(clippy::type_complexity)]
pub fn non_max_suppression(
xs: &mut Vec<(Bbox, Option<Vec<Keypoint>>, Option<Vec<f32>>)>,
iou_threshold: f32,
) {
xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap());
let mut current_index = 0;
for index in 0..xs.len() {
let mut drop = false;
for prev_index in 0..current_index {
let iou = xs[prev_index].0.iou(&xs[index].0);
if iou > iou_threshold {
drop = true;
break;
}
}
if !drop {
xs.swap(current_index, index);
current_index += 1;
}
}
xs.truncate(current_index);
}
pub const COCO_SKELETON_17: [(usize, usize); 16] = [
(0, 1),
(0, 2),
(1, 3),
(2, 4),
(5, 6),
(5, 11),
(6, 12),
(11, 12),
(5, 7),
(6, 8),
(7, 9),
(8, 10),
(11, 13),
(12, 14),
(13, 15),
(14, 16),
];
pub const COCO_NAMES_80: [&str; 80] = [
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
];