* Update imageproc crates

* Add top-p method for sampling

* Add SVTR for text recognition & bug fix
This commit is contained in:
Jamjamjon
2024-04-06 16:16:53 +08:00
committed by GitHub
parent ce9a416b71
commit a0d410b46d
48 changed files with 1621 additions and 990 deletions

View File

@ -11,30 +11,33 @@ exclude = ["assets/*", "examples/*"]
[dependencies]
clap = { version = "4.2.4", features = ["derive"] }
image = { version = "0.24.7", default-features = false, features = [
"jpeg",
"png",
"tiff",
"webp",
"webp-encoder",
"bmp"
]}
imageproc = { version = "0.23.0", default-features = false }
ndarray = { version = "0.15.6" }
# ort-sys = { version = "2.0.0-alpha.4" }
# ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "half", "ndarray", "cuda", "tensorrt", "coreml", "openvino"] }
ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "profiling", "half", "ndarray", "cuda", "tensorrt", "coreml", "ureq", "openvino"] }
rusttype = { version = "0.9", default-features = false }
ort = { version = "2.0.0-alpha.4", default-features = false, features = [
"load-dynamic",
"copy-dylibs",
"profiling",
"half",
"ndarray",
"cuda",
"tensorrt",
"coreml",
"ureq",
"openvino",
] }
anyhow = { version = "1.0.75" }
regex = { version = "1.5.4" }
rand = { version = "0.8.5" }
chrono = { version = "0.4.30" }
half = { version = "2.3.1" }
dirs = { version = "5.0.1" }
ureq = { version = "2.9.1", default-features = true, features = [ "socks-proxy" ] }
ureq = { version = "2.9.1", default-features = true, features = [
"socks-proxy",
] }
walkdir = { version = "2.5.0" }
tokenizers = { version = "0.15.2" }
itertools = { version = "0.12.1" }
usearch = { version = "2.9.1" }
usearch = { version = "2.10.4" }
rayon = "1.10.0"
indicatif = "0.17.8"
image = "0.25.1"
imageproc = { version = "0.24" }
ab_glyph = "0.2.23"

View File

@ -4,34 +4,35 @@ A Rust library integrated with **ONNXRuntime**, providing a collection of **Comp
## Supported Models
| Model | Example | CUDA<br />f32 | CUDA<br />f16 | TensorRT<br />f32 | TensorRT<br />f16 |
| :-----------------------------: | :----------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: |
| **YOLOv8-detection** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-pose** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-classification** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-segmentation** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-OBB** | TODO | TODO | TODO | TODO | TODO |
| **YOLOv9** | [demo](examples/yolov9) | ✅ | ✅ | ✅ | ✅ |
| **RT-DETR** | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | ✅ |
| **FastSAM** | [demo](examples/fastsam) | ✅ | ✅ | ✅ | ✅ |
| **YOLO-World** | [demo](examples/yolo-world) | ✅ | ✅ | ✅ | ✅ |
| **DINOv2** | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
| **CLIP** | [demo](examples/clip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
| **BLIP** | [demo](examples/blip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
| [**DB(Text Detection)**](https://arxiv.org/abs/1911.08947) | [demo](examples/db) | ✅ | ❌ | ✅ | ✅ |
| **SVTR, TROCR** | TODO | TODO | TODO | TODO | TODO |
| Model | Example | CUDA<br />f32 | CUDA<br />f16 | TensorRT<br />f32 | TensorRT<br />f16 |
| :---------------------------------------------------------------: | :----------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: |
| **YOLOv8-detection** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-pose** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-classification** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-segmentation** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
| **YOLOv8-OBB** | TODO | TODO | TODO | TODO | TODO |
| **YOLOv9** | [demo](examples/yolov9) | ✅ | ✅ | ✅ | ✅ |
| **RT-DETR** | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | ✅ |
| **FastSAM** | [demo](examples/fastsam) | ✅ | ✅ | ✅ | ✅ |
| **YOLO-World** | [demo](examples/yolo-world) | ✅ | ✅ | ✅ | ✅ |
| **DINOv2** | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
| **CLIP** | [demo](examples/clip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
| **BLIP** | [demo](examples/blip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
| [**DB(Text Detection)**](https://arxiv.org/abs/1911.08947) | [demo](examples/db) | ✅ | ❌ | ✅ | ✅ |
| [**SVTR(Text Recognition)**](https://arxiv.org/abs/2205.00159) | [demo](examples/svtr) | | | | |
## Solution Models
Additionally, this repo also provides some solution models such as pedestrian `fall detection`, `head detection`, `trash detection`, and more.
| Model | Example |
| :-------------------------------------------------------: | :------------------------------: |
| **face-landmark detection**<br />**人脸 & 关键点检测** | [demo](examples/yolov8-face) |
| **head detection**<br /> **人头检测** | [demo](examples/yolov8-head) |
| **fall detection**<br /> **摔倒检测** | [demo](examples/yolov8-falldown) |
| **trash detection**<br /> **垃圾检测** | [demo](examples/yolov8-plastic-bag) |
| **text detection(PPOCR-det v3, v4)**<br />**PPOCR文本检测** | [demo](examples/db) |
| Model | Example |
| :--------------------------------------------------------------------------------: | :------------------------------: |
| **text detection<br />(PPOCR-det v3, v4)**<br />**通用文本检测** | [demo](examples/db) |
| **text recognition<br />(PPOCR-rec v3, v4)**<br />**中英文-文本识别** | [demo](examples/svtr) |
| **face-landmark detection**<br />**人脸 & 关键点检测** | [demo](examples/yolov8-face) |
| **head detection**<br /> **人头检测** | [demo](examples/yolov8-head) |
| **fall detection**<br /> **摔倒检测** | [demo](examples/yolov8-falldown) |
| **trash detection**<br /> **垃圾检测** | [demo](examples/yolov8-plastic-bag) |
## Demo
@ -60,27 +61,42 @@ check **[ort guide](https://ort.pyke.io/setup/linking)**
```shell
cargo add --git https://github.com/jamjamjon/usls
# or
cargo add usls
```
#### 3. Set `Options` and build model
```Rust
let options = Options::default()
.with_model("../models/yolov8m-seg-dyn-f16.onnx")
.with_trt(0) // using cuda(0) by default
// when model with dynamic shapes
.with_i00((1, 2, 4).into()) // dynamic batch
.with_i02((416, 640, 800).into()) // dynamic height
.with_i03((416, 640, 800).into()) // dynamic width
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_dry_run(3)
.with_saveout("YOLOv8"); // save results
.with_model("../models/yolov8m-seg-dyn-f16.onnx");
let mut model = YOLO::new(&options)?;
```
- If you want to run your model with TensorRT or CoreML
```Rust
let options = Options::default()
.with_trt(0) // using cuda by default
// .with_coreml(0)
```
- If your model has dynamic shapes
```Rust
let options = Options::default()
.with_i00((1, 2, 4).into()) // dynamic batch
.with_i02((416, 640, 800).into()) // dynamic height
.with_i03((416, 640, 800).into()) // dynamic width
```
- If you want to set a confidence level for each category
```Rust
let options = Options::default()
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
```
- Go check [Options](src/options.rs) for more model options.
#### 4. Prepare inputs, and then you're ready to go
- Build `DataLoader` to load images
@ -98,10 +114,17 @@ for (xs, _paths) in dl {
- Or simply read one image
```Rust
let x = DataLoader::try_read("./assets/bus.jpg")?;
let _y = model.run(&[x])?;
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
let y = model.run(&x)?;
```
#### 5. Annotate and save results
```Rust
let annotator = Annotator::default().with_saveout("YOLOv8");
annotator.annotate(&x, &y);
```
## Script: converte ONNX model from `float32` to `float16`
```python

BIN
assets/db.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

BIN
examples/assets/bus.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

BIN
examples/assets/kids.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

View File

Before

Width:  |  Height:  |  Size: 54 KiB

After

Width:  |  Height:  |  Size: 54 KiB

BIN
examples/assets/trash.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 272 KiB

View File

@ -42,14 +42,14 @@ cargo run -r --example clip
## Results
```shell
(82.24775%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
[0.06708972, 0.0067733657, 0.0019306632, 0.8224775, 0.003044935, 0.083962336, 0.014721389]
(90.11472%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
[0.04573484, 0.0048218793, 0.0011618224, 0.90114725, 0.0036694852, 0.031348046, 0.0121166315]
(85.56889%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
[0.0786363, 0.0004783095, 0.00060898095, 0.06286741, 0.0006842306, 0.8556889, 0.0010357979]
(94.07785%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
[0.050406333, 0.0011632168, 0.0019338318, 0.0013227565, 0.003916758, 0.00047858112, 0.9407785]
(90.03625%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
[0.07473288, 0.0027821448, 0.0075673857, 0.010874652, 0.003041679, 0.0006387719, 0.9003625]
(86.59852%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
[0.07032883, 0.00053773675, 0.0006372929, 0.06066096, 0.0007378078, 0.8659852, 0.0011121632]
```

View File

@ -16,7 +16,6 @@ cargo run -r --example db
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_profile(false);
```
### 3. Run
@ -27,10 +26,10 @@ cargo run -r --example db
### Speed test
| Model | Image size | TensorRT<br />f16 | TensorRT<br />f32 | CUDA<br />f32 |
| --------------- | ---------- | ----------------- | ----------------- | ------------- |
| ppocr-v3-db-dyn | 640x640 | 1.8585ms | 2.5739ms | 4.3314ms |
| ppocr-v4-db-dyn | 640x640 | 2.0507ms | 2.8264ms | 6.6064ms |
| Model | Image size | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
| --------------- | ---------- | ---------------------------------------- | ---------------------------------------- | ------------------------------------ |
| ppocr-v3-db-dyn | 640x640 | 1.8585 | 2.5739 | 4.3314 |
| ppocr-v4-db-dyn | 640x640 | 2.0507 | 2.8264 | 6.6064 |
***Test on RTX3060***

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 165 KiB

View File

@ -1,25 +1,33 @@
use usls::{models::DB, DataLoader, Options};
use usls::{models::DB, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/ppocr-v4-db-dyn.onnx")
.with_i00((1, 1, 4).into())
.with_i02((608, 640, 960).into())
.with_i03((608, 640, 960).into())
.with_confs(&[0.7])
.with_saveout("DB-Text-Detection")
.with_dry_run(5)
.with_i00((1, 4, 8).into())
.with_i02((608, 960, 1280).into())
.with_i03((608, 960, 1280).into())
.with_confs(&[0.4])
.with_min_width(5.0)
.with_min_height(12.0)
// .with_trt(0)
// .with_fp16(true)
.with_profile(true);
.with_model("../models/ppocr-v4-db-dyn.onnx");
let mut model = DB::new(&options)?;
// load image
let x = DataLoader::try_read("./assets/math.jpg")?;
let x = vec![DataLoader::try_read("./assets/db.png")?];
// run
let _y = model.run(&[x])?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default()
.with_polygon_color([255u8, 0u8, 0u8])
.without_name(true)
.without_polygons(false)
.without_bboxes(false)
.with_saveout("DB-Text-Detection");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -1,4 +1,4 @@
use usls::{models::Dinov2, Metric, Options};
use usls::{models::Dinov2, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
@ -8,31 +8,32 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i00((1, 1, 1).into())
.with_i02((224, 224, 224).into())
.with_i03((224, 224, 224).into());
let mut model = Dinov2::new(&options)?;
let _model = Dinov2::new(&options)?;
println!("TODO...");
// query from vector
let ys = model.query_from_vec(
"./assets/bus.jpg",
&[
"./examples/dinov2/images/bus.jpg",
"./examples/dinov2/images/1.jpg",
"./examples/dinov2/images/2.jpg",
],
Metric::L2,
)?;
// let ys = model.query_from_vec(
// "./assets/bus.jpg",
// &[
// "./examples/dinov2/images/bus.jpg",
// "./examples/dinov2/images/1.jpg",
// "./examples/dinov2/images/2.jpg",
// ],
// Metric::L2,
// )?;
// or query from folder
// let ys = model.query_from_folder("./assets/bus.jpg", "./examples/dinov2/images", Metric::IP)?;
// results
for (i, y) in ys.iter().enumerate() {
println!(
"Top-{:<3}{:.7} {}",
i + 1,
y.1,
y.2.canonicalize()?.display()
);
}
// for (i, y) in ys.iter().enumerate() {
// println!(
// "Top-{:<3}{:.7} {}",
// i + 1,
// y.1,
// y.2.canonicalize()?.display()
// );
// }
Ok(())
}

View File

@ -1,4 +1,4 @@
use usls::{models::YOLO, DataLoader, Options};
use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
@ -7,16 +7,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_saveout("FastSAM")
.with_profile(false);
.with_confs(&[0.4]);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// load image
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
// run
model.run(&dl.next().unwrap().0)?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("FastSAM");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -1,19 +1,22 @@
use usls::{models::RTDETR, DataLoader, Options, COCO_NAMES_80};
use usls::{models::RTDETR, Annotator, DataLoader, Options, COCO_NAMES_80};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/rtdetr-l-f16.onnx")
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_names(&COCO_NAMES_80)
.with_saveout("RT-DETR");
.with_names(&COCO_NAMES_80);
let mut model = RTDETR::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// load image
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
// run
model.run(&dl.next().unwrap().0)?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("RT-DETR");
annotator.annotate(&x, &y);
Ok(())
}

44
examples/svtr/README.md Normal file
View File

@ -0,0 +1,44 @@
## Quick Start
```shell
cargo run -r --example svtr
```
## Or you can manully
### 1. Donwload ONNX Model
[ppocr-v4-server-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v4-server-svtr-ch-dyn.onnx)
[ppocr-v4-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v4-svtr-ch-dyn.onnx)
[ppocr-v3-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v3-svtr-ch-dyn.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
```
### 3. Run
```bash
cargo run -r --example svtr
```
### Speed test
| Model | Width | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
| --------------------------- | :---: | :--------------------------------------: | :--------------------------------------: | :----------------------------------: |
| ppocr-v4-server-svtr-ch-dyn | 1500 | 4.2116 | 13.0013 | 20.8673 |
| ppocr-v4-svtr-ch-dyn | 1500 | 2.0435 | 3.1959 | 10.1750 |
| ppocr-v3-svtr-ch-dyn | 1500 | 1.8596 | 2.9401 | 6.8210 |
***Test on RTX3060***
## Results
```shell
[Texts] from the background, but also separate text instances which
[Texts] are closely jointed. Some examples are ilustrated in Fig.7.
[Texts] 你有这么高速运转的机械进入中国,记住我给出的原理
```

24
examples/svtr/main.rs Normal file
View File

@ -0,0 +1,24 @@
use usls::{models::SVTR, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_i00((1, 2, 8).into())
.with_i03((320, 1500, 2200).into())
.with_confs(&[0.6])
.with_vocab("../ppocr_rec_vocab.txt")
.with_model("../models/ppocr-v4-svtr-ch-dyn.onnx");
let mut model = SVTR::new(&options)?;
// load image
let xs = vec![
DataLoader::try_read("./examples/svtr/text1.png")?,
DataLoader::try_read("./examples/svtr/text2.png")?,
DataLoader::try_read("./examples/svtr/text3.png")?,
];
// run
model.run(&xs)?;
Ok(())
}

BIN
examples/svtr/text1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.0 KiB

BIN
examples/svtr/text2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
examples/svtr/text3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -1,4 +1,4 @@
use usls::{models::YOLO, DataLoader, Options};
use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
@ -8,15 +8,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.3]) // shoes: 0.2
.with_saveout("YOLO-World")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// load image
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
// run
model.run(&dl.next().unwrap().0)?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("YOLO-World");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -1,4 +1,4 @@
use usls::{models::YOLO, DataLoader, Options};
use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
@ -7,16 +7,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.15])
.with_saveout("YOLOv8-Face")
.with_profile(false);
.with_confs(&[0.15]);
let mut model = YOLO::new(&options)?;
// load image
let x = DataLoader::try_read("./assets/kids.jpg")?;
let x = vec![DataLoader::try_read("./assets/kids.jpg")?];
// run
let _y = model.run(&[x])?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("YOLOv8-Face");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -1,19 +1,21 @@
use usls::{models::YOLO, DataLoader, Options};
use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8-falldown-f16.onnx")
.with_confs(&[0.3])
.with_saveout("YOLOv8-Falldown")
.with_profile(false);
.with_confs(&[0.3]);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/falldown.jpg")?;
// load image
let x = vec![DataLoader::try_read("./assets/falldown.jpg")?];
// run
model.run(&dl.next().unwrap().0)?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("YOLOv8-Falldown");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -1,19 +1,21 @@
use usls::{models::YOLO, DataLoader, Options};
use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8-head-f16.onnx")
.with_confs(&[0.3])
.with_saveout("YOLOv8-Head")
.with_profile(false);
.with_confs(&[0.3]);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/kids.jpg")?;
// load image
let x = vec![DataLoader::try_read("./assets/kids.jpg")?];
// run
model.run(&dl.next().unwrap().0)?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("YOLOv8-Head");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -1,20 +1,22 @@
use usls::{models::YOLO, DataLoader, Options};
use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// 1.build model
let options = Options::default()
.with_model("../models/yolov8-plastic-bag-f16.onnx")
.with_confs(&[0.3])
.with_saveout("YOLOv8-Trash")
.with_names(&["trash"])
.with_profile(false);
.with_names(&["trash"]);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/trash.jpg")?;
// load image
let x = vec![DataLoader::try_read("./assets/trash.jpg")?];
// run
model.run(&dl.next().unwrap().0)?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("YOLOv8-Trash");
annotator.annotate(&x, &y);
Ok(())
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 219 KiB

After

Width:  |  Height:  |  Size: 221 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 233 KiB

After

Width:  |  Height:  |  Size: 237 KiB

View File

@ -1,27 +1,40 @@
use usls::{models::YOLO, DataLoader, Options, COCO_SKELETON_17};
use usls::{models::YOLO, Annotator, DataLoader, Options, COCO_SKELETON_17};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// 1.build model
// build model
let options = Options::default()
.with_model("../models/yolov8m-dyn-f16.onnx")
// .with_model("../models/yolov8m-seg-dyn-f16.onnx")
.with_model("../models/yolov8m-cls.onnx")
// .with_trt(0) // cuda by default
// .with_fp16(true)
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_i02((224, 224, 800).into())
.with_i03((224, 224, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_profile(false)
.with_dry_run(3)
.with_skeletons(&COCO_SKELETON_17)
.with_saveout("YOLOv8");
.with_dry_run(3);
let mut model = YOLO::new(&options)?;
// 2.build dataloader
let dl = DataLoader::default().with_batch(1).load("./assets")?;
// build dataloader
let dl = DataLoader::default()
.with_batch(1)
.load("./assets/bus.jpg")?;
// 3.run
// build annotate
let annotator = Annotator::default()
.with_skeletons(&COCO_SKELETON_17)
.without_conf(false)
.without_name(false)
.without_masks(false)
.without_polygons(false)
.without_bboxes(false)
.with_saveout("YOLOv8");
// run & annotate
for (xs, _paths) in dl {
let _y = model.run(&xs)?;
let ys = model.run(&xs)?;
annotator.annotate(&xs, &ys);
}
Ok(())
}

View File

@ -1,4 +1,4 @@
use usls::{models::YOLO, DataLoader, Options};
use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
@ -8,15 +8,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_saveout("YOLOv9")
.with_profile(false);
let mut model = YOLO::new(&options)?;
// build dataloader
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
// load image
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
// run
model.run(&dl.next().unwrap().0)?;
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("YOLOv9");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -1,26 +1,83 @@
use crate::{
auto_load, string_now, Bbox, Embedding, Keypoint, Polygon, Ys, CHECK_MARK, CROSS_MARK,
};
use ab_glyph::{FontVec, PxScale};
use anyhow::Result;
use image::{ImageBuffer, RgbImage};
use crate::{auto_load, string_now, Results, CHECK_MARK, CROSS_MARK};
use image::{DynamicImage, GrayImage, ImageBuffer, Rgb, RgbImage};
#[derive(Debug)]
pub struct Annotator {
font: rusttype::Font<'static>,
font: ab_glyph::FontVec,
scale_: f32, // Cope with ab_glyph & imageproc=0.24.0
skeletons: Option<Vec<(usize, usize)>>,
hide_conf: bool,
polygon_color: Rgb<u8>,
saveout: Option<String>,
without_conf: bool,
without_name: bool,
without_bboxes: bool,
without_masks: bool,
without_polygons: bool,
without_keypoints: bool,
}
impl Default for Annotator {
fn default() -> Self {
Self {
font: Self::load_font(None).unwrap(),
scale_: 6.666667,
skeletons: None,
hide_conf: false,
polygon_color: Rgb([255, 255, 255]),
saveout: None,
without_conf: false,
without_name: false,
without_bboxes: false,
without_masks: false,
without_polygons: false,
without_keypoints: false,
}
}
}
impl Annotator {
pub fn without_conf(mut self, x: bool) -> Self {
self.without_conf = x;
self
}
pub fn without_name(mut self, x: bool) -> Self {
self.without_name = x;
self
}
pub fn without_bboxes(mut self, x: bool) -> Self {
self.without_bboxes = x;
self
}
pub fn without_masks(mut self, x: bool) -> Self {
self.without_masks = x;
self
}
pub fn without_polygons(mut self, x: bool) -> Self {
self.without_polygons = x;
self
}
pub fn without_keypoints(mut self, x: bool) -> Self {
self.without_keypoints = x;
self
}
pub fn with_saveout(mut self, saveout: &str) -> Self {
self.saveout = Some(saveout.to_string());
self
}
pub fn with_polygon_color(mut self, rgb: [u8; 3]) -> Self {
self.polygon_color = Rgb(rgb);
self
}
pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self {
self.skeletons = Some(skeletons.to_vec());
self
@ -44,163 +101,217 @@ impl Annotator {
}
}
fn load_font(path: Option<&str>) -> Result<rusttype::Font<'static>> {
pub fn annotate(&self, imgs: &[DynamicImage], ys: &[Ys]) {
for (img, y) in imgs.iter().zip(ys.iter()) {
let mut img_rgb = img.to_rgb8();
// masks
if !self.without_masks {
if let Some(masks) = &y.masks {
self.plot_masks(&mut img_rgb, masks)
}
}
// polygons
if !self.without_polygons {
if let Some(polygons) = &y.polygons {
self.plot_polygons(&mut img_rgb, polygons)
}
}
// bboxes
if !self.without_bboxes {
if let Some(bboxes) = &y.bboxes {
self.plot_bboxes(&mut img_rgb, bboxes)
}
}
// keypoints
if !self.without_keypoints {
if let Some(keypoints) = &y.keypoints {
self.plot_keypoints(&mut img_rgb, keypoints)
}
}
// probs
if let Some(probs) = &y.probs {
self.plot_probs(&mut img_rgb, probs)
}
if let Some(saveout) = &self.saveout {
self.save(&img_rgb, saveout);
}
}
}
pub fn plot_masks(&self, img: &mut RgbImage, masks: &[Vec<u8>]) {
for mask in masks.iter() {
let mask_nd: GrayImage =
ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec())
.expect("can not crate image from ndarray");
for _x in 0..img.width() {
for _y in 0..img.height() {
let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y);
if mask_p.0[0] > 0 {
let mut img_p = imageproc::drawing::Canvas::get_pixel(img, _x, _y);
img_p.0[0] /= 2;
img_p.0[1] = 255 - (255 - img_p.0[1]) / 3;
img_p.0[2] /= 2;
imageproc::drawing::Canvas::draw_pixel(img, _x, _y, img_p)
}
}
}
}
}
pub fn plot_bboxes(&self, img: &mut RgbImage, bboxes: &[Bbox]) {
for bbox in bboxes.iter() {
imageproc::drawing::draw_hollow_rect_mut(
img,
imageproc::rect::Rect::at(bbox.xmin().round() as i32, bbox.ymin().round() as i32)
.of_size(bbox.width().round() as u32, bbox.height().round() as u32),
image::Rgb(self.get_color(bbox.id()).into()),
);
let mut legend = String::new();
if !self.without_name {
legend.push_str(&bbox.name().unwrap_or(&bbox.id().to_string()).to_string());
}
if !self.without_conf {
if !self.without_name {
legend.push_str(&format!(": {:.4}", bbox.confidence()));
} else {
legend.push_str(&format!("{:.4}", bbox.confidence()));
}
}
let scale_dy = img.width().max(img.height()) as f32 / 40.0;
let scale = PxScale::from(scale_dy);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend); // u32
let text_h = text_h + text_h / 3;
let top = if bbox.ymin() > text_h as f32 {
(bbox.ymin().round() as u32 - text_h) as i32
} else {
(text_h - bbox.ymin().round() as u32) as i32
};
// text
if !legend.is_empty() {
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(bbox.xmin() as i32, top).of_size(text_w, text_h),
image::Rgb(self.get_color(bbox.id()).into()),
);
imageproc::drawing::draw_text_mut(
img,
image::Rgb([0, 0, 0]),
bbox.xmin() as i32,
top - (scale_dy / self.scale_).floor() as i32 + 2,
scale,
&self.font,
&legend,
);
}
}
}
pub fn plot_polygons(&self, img: &mut RgbImage, polygons: &[Polygon]) {
for polygon in polygons.iter() {
// option: draw polygon
let polygon = polygon
.points
.iter()
.map(|p| imageproc::point::Point::new(p.x, p.y))
.collect::<Vec<_>>();
imageproc::drawing::draw_hollow_polygon_mut(img, &polygon, self.polygon_color);
// option: draw circle
// polygon.points.iter().for_each(|point| {
// imageproc::drawing::draw_filled_circle_mut(
// img,
// (point.x as i32, point.y as i32),
// 1,
// // image::Rgb([255, 255, 255]),
// self.polygon_color,
// );
// });
}
}
pub fn plot_probs(&self, img: &mut RgbImage, probs: &Embedding) {
let topk = 5usize;
let (x, mut y) = (img.width() as i32 / 20, img.height() as i32 / 20);
for k in probs.topk(topk).iter() {
let legend = format!("{}: {:.4}", k.2.as_ref().unwrap_or(&k.0.to_string()), k.1);
let scale_dy = img.width().max(img.height()) as f32 / 30.0;
let scale = PxScale::from(scale_dy);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
let text_h = text_h + text_h / 3;
y += text_h as i32;
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(x, y).of_size(text_w, text_h),
image::Rgb(self.get_color(k.0).into()),
);
imageproc::drawing::draw_text_mut(
img,
image::Rgb((0, 0, 0).into()),
x,
y - (scale_dy / self.scale_).floor() as i32 + 2,
scale,
&self.font,
&legend,
);
}
}
pub fn plot_keypoints(&self, img: &mut RgbImage, keypoints: &[Vec<Keypoint>]) {
let radius = 3;
for kpts in keypoints.iter() {
for (i, kpt) in kpts.iter().enumerate() {
if kpt.confidence() == 0.0 {
continue;
}
// draw point
imageproc::drawing::draw_filled_circle_mut(
img,
(kpt.x() as i32, kpt.y() as i32),
radius,
image::Rgb(self.get_color(i + 10).into()),
);
}
// draw skeleton
if let Some(skeletons) = &self.skeletons {
for &(i, ii) in skeletons.iter() {
let kpt1 = &kpts[i];
let kpt2 = &kpts[ii];
if kpt1.confidence() == 0.0 || kpt2.confidence() == 0.0 {
continue;
}
imageproc::drawing::draw_line_segment_mut(
img,
(kpt1.x(), kpt1.y()),
(kpt2.x(), kpt2.y()),
image::Rgb([255, 51, 255]),
);
}
}
}
}
fn load_font(path: Option<&str>) -> Result<FontVec> {
let path_font = match path {
None => auto_load("Arial.ttf")?,
Some(p) => p.into(),
};
let buffer = std::fs::read(path_font)?;
Ok(rusttype::Font::try_from_vec(buffer).unwrap())
Ok(FontVec::try_from_vec(buffer.to_owned()).unwrap())
}
pub fn get_color(&self, n: usize) -> (u8, u8, u8) {
Self::color_palette()[n % Self::color_palette().len()]
}
pub fn plot(&self, img: &mut RgbImage, y: &Results) {
// masks and polygons
if let Some(masks) = y.masks() {
for mask in masks.iter() {
let mask_nd: ImageBuffer<image::Luma<_>, Vec<u8>> =
ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec())
.expect("can not crate image from ndarray");
// masks
for _x in 0..img.width() {
for _y in 0..img.height() {
let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y);
if mask_p.0[0] > 0 {
let mut img_p = imageproc::drawing::Canvas::get_pixel(img, _x, _y);
img_p.0[0] /= 2;
img_p.0[1] = 255 - (255 - img_p.0[1]) / 3;
img_p.0[2] /= 2;
imageproc::drawing::Canvas::draw_pixel(img, _x, _y, img_p)
}
}
}
// contours
let contours: Vec<imageproc::contours::Contour<i32>> =
imageproc::contours::find_contours(&mask_nd);
for contour in contours.iter() {
for point in contour.points.iter() {
imageproc::drawing::draw_filled_circle_mut(
img,
(point.x, point.y),
1,
image::Rgb([255, 255, 255]),
);
}
}
}
}
// probs
if let Some(probs) = y.probs() {
let topk = 5usize;
let (x, mut y) = (img.width() as i32 / 20, img.height() as i32 / 20);
for k in probs.topk(topk).iter() {
let legend = format!("{}: {:.2}", k.2.as_ref().unwrap_or(&k.0.to_string()), k.1);
let scale = img.width().max(img.height()) as f32 / 30.0;
let scale = rusttype::Scale::uniform(scale);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
y += text_h;
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(x, y).of_size(text_w as u32, text_h as u32),
image::Rgb(self.get_color(k.0).into()),
);
imageproc::drawing::draw_text_mut(
img,
image::Rgb((0, 0, 0).into()),
x,
y,
scale,
&self.font,
&legend,
);
}
}
// bboxes
if let Some(bboxes) = y.bboxes() {
for bbox in bboxes.iter() {
imageproc::drawing::draw_hollow_rect_mut(
img,
imageproc::rect::Rect::at(bbox.xmin() as i32, bbox.ymin() as i32)
.of_size(bbox.width() as u32, bbox.height() as u32),
image::Rgb(self.get_color(bbox.id()).into()),
);
let legend = if self.hide_conf {
bbox.name().unwrap_or(&bbox.id().to_string()).to_string()
} else {
format!(
"{}: {:.4}",
bbox.name().unwrap_or(&bbox.id().to_string()),
bbox.confidence()
)
};
let scale = img.width().max(img.height()) as f32 / 45.0;
let scale = rusttype::Scale::uniform(scale);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
let text_y = if bbox.ymin() as i32 > text_h {
bbox.ymin() as i32 - text_h
} else {
text_h - bbox.ymin() as i32
};
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(bbox.xmin() as i32, text_y)
.of_size(text_w as u32, text_h as u32),
image::Rgb(self.get_color(bbox.id()).into()),
);
imageproc::drawing::draw_text_mut(
img,
image::Rgb((0, 0, 0).into()),
bbox.xmin() as i32,
text_y,
scale,
&self.font,
&legend,
);
}
}
// keypoints
if let Some(keypoints) = y.keypoints() {
let radius = 3;
for kpts in keypoints.iter() {
for (i, kpt) in kpts.iter().enumerate() {
if kpt.confidence() == 0.0 {
continue;
}
// draw point
imageproc::drawing::draw_filled_circle_mut(
img,
(kpt.x() as i32, kpt.y() as i32),
radius,
image::Rgb(self.get_color(i + 10).into()),
);
}
// draw skeleton
if let Some(skeletons) = &self.skeletons {
for &(i, ii) in skeletons.iter() {
let kpt1 = &kpts[i];
let kpt2 = &kpts[ii];
if kpt1.confidence() == 0.0 || kpt2.confidence() == 0.0 {
continue;
}
imageproc::drawing::draw_line_segment_mut(
img,
(kpt1.x(), kpt1.y()),
(kpt2.x(), kpt2.y()),
image::Rgb([255, 51, 255]),
);
}
}
}
}
}
fn color_palette() -> Vec<(u8, u8, u8)> {
vec![
(0, 255, 0),

View File

@ -209,7 +209,6 @@ impl OrtEngine {
.with_int8(int8_enable)
.with_fp16(fp16_enable)
.with_engine_cache(engine_cache_enable)
// .with_engine_cache_path(config_dir().to_str().unwrap())
.with_engine_cache_path(format!(
"{}/{}",
config_dir().to_str().unwrap(),
@ -221,7 +220,9 @@ impl OrtEngine {
.with_profile_max_shapes(spec_max)
.build();
if trt.is_available()? && trt.register(builder).is_ok() {
println!("{CHECK_MARK} Using TensorRT");
println!(
"{CHECK_MARK} Using TensorRT (Initial model serialization may require a wait)"
);
Ok(trt)
} else {
println!("{CROSS_MARK} TensorRT initialization failed. Try CUDA...");
@ -343,7 +344,7 @@ impl OrtEngine {
}
pub fn width(&self) -> &MinOptMax {
&self.inputs_minoptmax[0][2]
&self.inputs_minoptmax[0][3]
}
pub fn is_batch_dyn(&self) -> bool {

View File

@ -6,6 +6,7 @@ mod dynconf;
mod embedding;
mod engine;
mod keypoint;
mod logits_sampler;
mod metric;
mod min_opt_max;
pub mod models;
@ -14,10 +15,10 @@ mod options;
mod point;
mod polygon;
mod rect;
mod results;
mod rotated_rect;
mod tokenizer_stream;
mod utils;
mod ys;
pub use annotator::Annotator;
pub use bbox::Bbox;
@ -27,19 +28,17 @@ pub use dynconf::DynConf;
pub use embedding::Embedding;
pub use engine::OrtEngine;
pub use keypoint::Keypoint;
pub use logits_sampler::LogitsSampler;
pub use metric::Metric;
pub use min_opt_max::MinOptMax;
pub use options::Options;
pub use point::Point;
pub use polygon::Polygon;
pub use rect::Rect;
pub use results::Results;
pub use rotated_rect::RotatedRect;
pub use tokenizer_stream::TokenizerStream;
pub use utils::{
auto_load, config_dir, download, non_max_suppression, string_now, COCO_NAMES_80,
COCO_SKELETON_17,
};
pub use utils::{auto_load, config_dir, download, string_now, COCO_NAMES_80, COCO_SKELETON_17};
pub use ys::Ys;
const GITHUB_ASSETS: &str = "https://github.com/jamjamjon/assets/releases/download/v0.0.1";
const CHECK_MARK: &str = "";

94
src/logits_sampler.rs Normal file
View File

@ -0,0 +1,94 @@
use anyhow::Result;
use rand::distributions::{Distribution, WeightedIndex};
#[derive(Debug)]
pub struct LogitsSampler {
temperature: f32,
p: f32,
}
impl Default for LogitsSampler {
fn default() -> Self {
Self {
temperature: 1.0,
p: 0.0,
}
}
}
impl LogitsSampler {
pub fn new() -> Self {
Self::default()
}
pub fn with_topp(mut self, p: f32) -> Self {
self.p = p.max(0.0).min(1.0);
self
}
pub fn with_temperature(mut self, temperature: f32) -> Self {
self.temperature = temperature.max(1e-7);
self
}
pub fn decode(&mut self, logits: &[f32]) -> Result<u32> {
if self.p == 0.0 {
self.search_by_argmax(logits)
} else {
self.sample_by_topp(logits)
}
}
fn search_by_argmax(&mut self, logits: &[f32]) -> Result<u32> {
// no need to do softmax
let (token_id, _) = logits
.iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap();
Ok(token_id as u32)
}
fn sample_by_topp(&mut self, logits: &[f32]) -> Result<u32> {
let logits = self.softmax(logits);
let mut logits: Vec<(usize, f32)> = logits
.iter()
.enumerate()
.map(|(i, &prob)| (i, prob))
.collect();
logits.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
// candidates
let mut candidates: Vec<(usize, f32)> = Vec::new();
let mut acc_prob: f32 = 0.0;
for (idx, prob) in logits.iter() {
acc_prob += prob;
if acc_prob >= self.p {
if candidates.is_empty() {
candidates.push((*idx, acc_prob));
}
break;
}
candidates.push((*idx, acc_prob));
}
// sample
let choices: Vec<usize> = candidates.iter().map(|&(idx, _)| idx).collect();
let probs: Vec<f32> = candidates.iter().map(|&(_, prob)| prob).collect();
let dist = WeightedIndex::new(probs)?;
let mut rng = rand::thread_rng();
let token_id = choices[dist.sample(&mut rng)];
Ok(token_id as u32)
}
fn softmax(&self, logits: &[f32]) -> Vec<f32> {
let logits_t = logits
.iter()
.map(|&x| x / self.temperature)
.collect::<Vec<f32>>();
let max_logit = logits_t.iter().fold(f32::MIN, |a, &b| a.max(b));
let exps: Vec<f32> = logits_t.iter().map(|&x| (x - max_logit).exp()).collect();
let sum_exps: f32 = exps.iter().sum();
exps.iter().map(|&exp| exp / sum_exps).collect()
}
}

View File

@ -1,135 +1,132 @@
use anyhow::Result;
use image::DynamicImage;
use ndarray::{s, Array, Axis, IxDyn};
use std::io::Write;
use tokenizers::Tokenizer;
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine, TokenizerStream};
#[derive(Debug)]
pub struct Blip {
pub textual: OrtEngine,
pub visual: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch_visual: MinOptMax,
pub batch_textual: MinOptMax,
tokenizer: TokenizerStream,
}
impl Blip {
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
let visual = OrtEngine::new(&options_visual)?;
let textual = OrtEngine::new(&options_textual)?;
let (batch_visual, batch_textual, height, width) = (
visual.batch().to_owned(),
textual.batch().to_owned(),
visual.height().to_owned(),
visual.width().to_owned(),
);
let tokenizer = match &options_textual.tokenizer {
None => auto_load("tokenizer-blip.json")?,
Some(tokenizer) => tokenizer.into(),
};
let tokenizer = Tokenizer::from_file(tokenizer).unwrap();
let tokenizer = TokenizerStream::new(tokenizer);
visual.dry_run()?;
textual.dry_run()?;
Ok(Self {
textual,
visual,
batch_visual,
batch_textual,
height,
width,
tokenizer,
})
}
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
let ys = ys[0].to_owned();
Ok(ys)
}
pub fn caption(&mut self, path: &str, prompt: Option<&str>) -> Result<()> {
// this demo use batch_size=1
let x = image::io::Reader::open(path)?.decode()?;
let image_embeds = self.encode_images(&[x])?;
let image_embeds_attn_mask: Array<f32, IxDyn> =
Array::ones((1, image_embeds.shape()[1])).into_dyn();
// conditional
let mut input_ids = match prompt {
None => {
print!("[Unconditional image captioning]: ");
vec![0.0f32]
}
Some(prompt) => {
let encodings = self.tokenizer.tokenizer().encode(prompt, false);
let ids: Vec<f32> = encodings
.unwrap()
.get_ids()
.iter()
.map(|x| *x as f32)
.collect();
print!("[Conditional image captioning]: {} ", prompt);
ids
}
};
loop {
let input_ids_nd: Array<f32, IxDyn> = Array::from_vec(input_ids.to_owned()).into_dyn();
let input_ids_nd = input_ids_nd.insert_axis(Axis(0));
let input_ids_attn_mask: Array<f32, IxDyn> =
Array::ones(input_ids_nd.shape()).into_dyn();
let y = self.textual.run(&[
input_ids_nd,
input_ids_attn_mask,
image_embeds.to_owned(),
image_embeds_attn_mask.to_owned(),
])?; // N, length, vocab_size
let y = y[0].to_owned();
let y = y.slice(s!(0, -1.., ..));
// softmax
let exps = y.mapv(|c| c.exp());
let stds = exps.sum_axis(Axis(1));
let probs = exps / stds.insert_axis(Axis(1));
let probs = probs.slice(s!(0, ..));
// argmax
let (token_id, _) = probs
.into_iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap();
input_ids.push(token_id as f32);
// SEP
if token_id == 102 {
break;
}
// streaming generation
if let Some(t) = self.tokenizer.next_token(token_id as u32)? {
print!("{t}");
std::io::stdout().flush()?;
}
// sleep for test
std::thread::sleep(std::time::Duration::from_millis(10));
}
println!();
self.tokenizer.clear();
Ok(())
}
pub fn batch_visual(&self) -> usize {
self.batch_visual.opt as usize
}
pub fn batch_textual(&self) -> usize {
self.batch_textual.opt as usize
}
}
use anyhow::Result;
use image::DynamicImage;
use ndarray::{s, Array, Axis, IxDyn};
use std::io::Write;
use tokenizers::Tokenizer;
use crate::{auto_load, ops, LogitsSampler, MinOptMax, Options, OrtEngine, TokenizerStream};
#[derive(Debug)]
pub struct Blip {
pub textual: OrtEngine,
pub visual: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch_visual: MinOptMax,
pub batch_textual: MinOptMax,
tokenizer: TokenizerStream,
}
impl Blip {
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
let visual = OrtEngine::new(&options_visual)?;
let textual = OrtEngine::new(&options_textual)?;
let (batch_visual, batch_textual, height, width) = (
visual.batch().to_owned(),
textual.batch().to_owned(),
visual.height().to_owned(),
visual.width().to_owned(),
);
let tokenizer = match &options_textual.tokenizer {
None => auto_load("tokenizer-blip.json")?,
Some(tokenizer) => tokenizer.into(),
};
let tokenizer = Tokenizer::from_file(tokenizer).unwrap();
let tokenizer = TokenizerStream::new(tokenizer);
visual.dry_run()?;
textual.dry_run()?;
Ok(Self {
textual,
visual,
batch_visual,
batch_textual,
height,
width,
tokenizer,
})
}
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let xs_ = ops::standardize(
xs_,
&[0.48145466, 0.4578275, 0.40821073],
&[0.26862954, 0.2613026, 0.2757771],
);
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
let ys = ys[0].to_owned();
Ok(ys)
}
pub fn caption(&mut self, path: &str, prompt: Option<&str>) -> Result<()> {
// this demo use batch_size=1
let x = image::io::Reader::open(path)?.decode()?;
let image_embeds = self.encode_images(&[x])?;
let image_embeds_attn_mask: Array<f32, IxDyn> =
Array::ones((1, image_embeds.shape()[1])).into_dyn();
// conditional
let mut input_ids = match prompt {
None => {
print!("[Unconditional]: ");
vec![0.0f32]
}
Some(prompt) => {
let encodings = self.tokenizer.tokenizer().encode(prompt, false);
let ids: Vec<f32> = encodings
.unwrap()
.get_ids()
.iter()
.map(|x| *x as f32)
.collect();
print!("[Conditional]: {} ", prompt);
ids
}
};
let mut logits_sampler = LogitsSampler::new();
loop {
let input_ids_nd: Array<f32, IxDyn> = Array::from_vec(input_ids.to_owned()).into_dyn();
let input_ids_nd = input_ids_nd.insert_axis(Axis(0));
let input_ids_attn_mask: Array<f32, IxDyn> =
Array::ones(input_ids_nd.shape()).into_dyn();
let y = self.textual.run(&[
input_ids_nd,
input_ids_attn_mask,
image_embeds.to_owned(),
image_embeds_attn_mask.to_owned(),
])?; // N, length, vocab_size
let y = y[0].slice(s!(0, -1.., ..));
let logits = y.slice(s!(0, ..)).to_vec();
let token_id = logits_sampler.decode(&logits)?;
input_ids.push(token_id as f32);
// SEP
if token_id == 102 {
break;
}
// streaming generation
if let Some(t) = self.tokenizer.next_token(token_id as u32)? {
print!("{t}");
std::io::stdout().flush()?;
}
// sleep for test
std::thread::sleep(std::time::Duration::from_millis(5));
}
println!();
self.tokenizer.clear();
Ok(())
}
pub fn batch_visual(&self) -> usize {
self.batch_visual.opt as usize
}
pub fn batch_textual(&self) -> usize {
self.batch_textual.opt as usize
}
}

View File

@ -1,105 +1,110 @@
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine};
use anyhow::Result;
use image::DynamicImage;
use itertools::Itertools;
use ndarray::{Array, Array2, Axis, IxDyn};
use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer};
#[derive(Debug)]
pub struct Clip {
pub textual: OrtEngine,
pub visual: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch_visual: MinOptMax,
pub batch_textual: MinOptMax,
tokenizer: Tokenizer,
context_length: usize,
}
impl Clip {
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
let context_length = 77;
let visual = OrtEngine::new(&options_visual)?;
let textual = OrtEngine::new(&options_textual)?;
let (batch_visual, batch_textual, height, width) = (
visual.inputs_minoptmax()[0][0].to_owned(),
textual.inputs_minoptmax()[0][0].to_owned(),
visual.inputs_minoptmax()[0][2].to_owned(),
visual.inputs_minoptmax()[0][3].to_owned(),
);
let tokenizer = match &options_textual.tokenizer {
None => auto_load("tokenizer-clip.json").unwrap(),
Some(tokenizer) => tokenizer.into(),
};
let mut tokenizer = Tokenizer::from_file(tokenizer).unwrap();
tokenizer.with_padding(Some(PaddingParams {
strategy: PaddingStrategy::Fixed(context_length),
direction: PaddingDirection::Right,
pad_to_multiple_of: None,
pad_id: 0,
pad_type_id: 0,
pad_token: "[PAD]".to_string(),
}));
visual.dry_run()?;
textual.dry_run()?;
Ok(Self {
textual,
visual,
batch_visual,
batch_textual,
height,
width,
tokenizer,
context_length,
})
}
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
let ys = ys[0].to_owned();
Ok(ys)
}
pub fn encode_texts(&self, texts: &[String]) -> Result<Array<f32, IxDyn>> {
let encodings = self
.tokenizer
.encode_batch(texts.to_owned(), false)
.unwrap();
let xs: Vec<f32> = encodings
.iter()
.map(|i| i.get_ids().iter().map(|b| *b as f32).collect())
.concat();
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
let ys = self.textual.run(&[xs])?;
let ys = ys[0].to_owned();
// let ys = ops::norm(&ys);
Ok(ys)
}
pub fn get_similarity(
&self,
images_feats: &Array<f32, IxDyn>,
texts_feats: &Array<f32, IxDyn>,
) -> Result<Vec<Vec<f32>>> {
let images_feats = images_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
let texts_feats = texts_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
let matrix = images_feats.dot(&texts_feats.t()); // [M, N]
let exps = matrix.mapv(|x| x.exp()); //[M, N]
let stds = exps.sum_axis(Axis(1)); //[M, 1]
let matrix = exps / stds.insert_axis(Axis(1)); // [M, N]
let similarity: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
Ok(similarity)
}
pub fn batch_visual(&self) -> usize {
self.batch_visual.opt as usize
}
pub fn batch_textual(&self) -> usize {
self.batch_textual.opt as usize
}
}
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine};
use anyhow::Result;
use image::DynamicImage;
// use itertools::Itertools;
use ndarray::{Array, Array2, Axis, IxDyn};
use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer};
#[derive(Debug)]
pub struct Clip {
pub textual: OrtEngine,
pub visual: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch_visual: MinOptMax,
pub batch_textual: MinOptMax,
tokenizer: Tokenizer,
context_length: usize,
}
impl Clip {
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
let context_length = 77;
let visual = OrtEngine::new(&options_visual)?;
let textual = OrtEngine::new(&options_textual)?;
let (batch_visual, batch_textual, height, width) = (
visual.inputs_minoptmax()[0][0].to_owned(),
textual.inputs_minoptmax()[0][0].to_owned(),
visual.inputs_minoptmax()[0][2].to_owned(),
visual.inputs_minoptmax()[0][3].to_owned(),
);
let tokenizer = match &options_textual.tokenizer {
None => auto_load("tokenizer-clip.json").unwrap(),
Some(tokenizer) => tokenizer.into(),
};
let mut tokenizer = Tokenizer::from_file(tokenizer).unwrap();
tokenizer.with_padding(Some(PaddingParams {
strategy: PaddingStrategy::Fixed(context_length),
direction: PaddingDirection::Right,
pad_to_multiple_of: None,
pad_id: 0,
pad_type_id: 0,
pad_token: "[PAD]".to_string(),
}));
visual.dry_run()?;
textual.dry_run()?;
Ok(Self {
textual,
visual,
batch_visual,
batch_textual,
height,
width,
tokenizer,
context_length,
})
}
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let xs_ = ops::standardize(
xs_,
&[0.48145466, 0.4578275, 0.40821073],
&[0.26862954, 0.2613026, 0.2757771],
);
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
let ys = ys[0].to_owned();
Ok(ys)
}
pub fn encode_texts(&self, texts: &[String]) -> Result<Array<f32, IxDyn>> {
let encodings = self
.tokenizer
.encode_batch(texts.to_owned(), false)
.unwrap();
let xs: Vec<f32> = encodings
.iter()
.flat_map(|i| i.get_ids().iter().map(|&b| b as f32))
.collect();
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
let ys = self.textual.run(&[xs])?;
let ys = ys[0].to_owned();
Ok(ys)
}
pub fn get_similarity(
&self,
images_feats: &Array<f32, IxDyn>,
texts_feats: &Array<f32, IxDyn>,
) -> Result<Vec<Vec<f32>>> {
let images_feats = images_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
let texts_feats = texts_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
let matrix = images_feats.dot(&texts_feats.t()); // [M, N]
let exps = matrix.mapv(|x| x.exp()); //[M, N]
let stds = exps.sum_axis(Axis(1)); //[M, 1]
let matrix = exps / stds.insert_axis(Axis(1)); // [M, N]
let similarity: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
Ok(similarity)
}
pub fn batch_visual(&self) -> usize {
self.batch_visual.opt as usize
}
pub fn batch_textual(&self) -> usize {
self.batch_textual.opt as usize
}
}

View File

@ -1,6 +1,4 @@
use crate::{
ops, Annotator, Bbox, DynConf, MinOptMax, Options, OrtEngine, Point, Polygon, Results,
};
use crate::{ops, Bbox, DynConf, MinOptMax, Options, OrtEngine, Polygon, Ys};
use anyhow::Result;
use image::{DynamicImage, ImageBuffer};
use ndarray::{Array, Axis, IxDyn};
@ -11,131 +9,121 @@ pub struct DB {
height: MinOptMax,
width: MinOptMax,
batch: MinOptMax,
annotator: Annotator,
confs: DynConf,
saveout: Option<String>,
names: Option<Vec<String>>,
unclip_ratio: f32,
binary_thresh: f32,
min_width: f32,
min_height: f32,
}
impl DB {
pub fn new(options: &Options) -> Result<Self> {
let engine = OrtEngine::new(options)?;
let (batch, height, width) = (
engine.inputs_minoptmax()[0][0].to_owned(),
engine.inputs_minoptmax()[0][2].to_owned(),
engine.inputs_minoptmax()[0][3].to_owned(),
engine.batch().to_owned(),
engine.height().to_owned(),
engine.width().to_owned(),
);
let annotator = Annotator::default();
let names = Some(vec!["Text".to_string()]);
let confs = DynConf::new(&options.confs, 1);
let unclip_ratio = options.unclip_ratio;
let binary_thresh = 0.2;
let min_width = options.min_width.unwrap_or(0.0);
let min_height = options.min_height.unwrap_or(0.0);
engine.dry_run()?;
Ok(Self {
engine,
names,
confs,
height,
width,
batch,
saveout: options.saveout.to_owned(),
annotator,
min_width,
min_height,
unclip_ratio,
binary_thresh,
})
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
let xs_ = ops::letterbox(xs, self.height.opt as u32, self.width.opt as u32)?;
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Ys>> {
let xs_ = ops::letterbox(xs, self.height.opt as u32, self.width.opt as u32, 144.0)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]);
let ys = self.engine.run(&[xs_])?;
let ys = self.postprocess(ys, xs)?;
match &self.saveout {
None => {}
Some(saveout) => {
for (img0, y) in xs.iter().zip(ys.iter()) {
let mut img = img0.to_rgb8();
self.annotator.plot(&mut img, y);
self.annotator.save(&img, saveout);
}
}
}
Ok(ys)
}
pub fn postprocess(
&self,
xs: Vec<Array<f32, IxDyn>>,
xs0: &[DynamicImage],
) -> Result<Vec<Results>> {
pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Ys>> {
let mut ys = Vec::new();
for (idx, mask) in xs[0].axis_iter(Axis(0)).enumerate() {
let mut ys_bbox = Vec::new();
for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() {
let mut y_bbox = Vec::new();
// reshape
let h = luma.dim()[1];
let w = luma.dim()[2];
let luma = luma.into_shape((h, w, 1))?.into_owned();
// build image from ndarray
let raw_vec = luma
.into_raw_vec()
.iter()
.map(|x| if x <= &self.binary_thresh { 0.0 } else { *x })
.collect::<Vec<_>>();
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
ImageBuffer::from_raw(w as u32, h as u32, raw_vec)
.expect("Faild to create image from ndarray");
let mut mask_im = image::DynamicImage::from(mask_im);
// input image
let image_width = xs0[idx].width() as f32;
let image_height = xs0[idx].height() as f32;
// h,w,1
let h = mask.dim()[1];
let w = mask.dim()[2];
let mask = mask.into_shape((h, w, 1))?.into_owned();
// build image from ndarray
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
ImageBuffer::from_raw(w as u32, h as u32, mask.into_raw_vec())
.expect("Faild to create image from ndarray");
let mut mask_im = image::DynamicImage::from(mask_im);
// rescale
let (_, w_mask, h_mask) = ops::scale_wh(image_width, image_height, w as f32, h as f32);
let mask_original = mask_im.crop(0, 0, w_mask as u32, h_mask as u32);
let mask_original = mask_original.resize_exact(
// rescale mask image
let (ratio, w_mask, h_mask) =
ops::scale_wh(image_width, image_height, w as f32, h as f32);
let mask_im = mask_im.crop(0, 0, w_mask as u32, h_mask as u32);
let mask_im = mask_im.resize_exact(
image_width as u32,
image_height as u32,
image::imageops::FilterType::Triangle,
);
let mask_im = mask_im.into_luma8();
// contours
let contours: Vec<imageproc::contours::Contour<i32>> =
imageproc::contours::find_contours(&mask_original.into_luma8());
imageproc::contours::find_contours_with_threshold(&mask_im, 1);
// loop
let mut y_polygons: Vec<Polygon> = Vec::new();
for contour in contours.iter() {
// polygon
let points: Vec<Point> = contour
.points
.iter()
.map(|p| Point::new(p.x as f32, p.y as f32))
.collect();
let polygon = Polygon::new(&points);
let mut rect = polygon.find_min_rect();
// min size filter
if rect.height() < 3.0 || rect.width() < 3.0 {
if contour.points.len() <= 1 {
continue;
}
let polygon = Polygon::from_imageproc_points(&contour.points);
let perimeter = polygon.perimeter();
let delta = polygon.area() * ratio.round() * self.unclip_ratio / perimeter;
let polygon = polygon
// .simplify(6e-4 * perimeter)
.offset(delta, image_width, image_height)
.resample(50)
.convex_hull();
let rect = polygon.find_min_rect();
if rect.height() < self.min_height || rect.width() < self.min_width {
continue;
}
// confs filter
let confidence = polygon.area() / rect.area();
if confidence < self.confs[0] {
continue;
}
// TODO: expand polygon
let unclip_ratio = 1.5;
let delta = rect.area() * unclip_ratio / rect.perimeter();
// save
let y_bbox = Bbox::new(
rect.expand(delta, delta, image_width, image_height),
0,
confidence,
self.names.as_ref().map(|names| names[0].clone()),
);
ys_bbox.push(y_bbox);
let bbox = Bbox::new(rect, 0, confidence, None);
y_bbox.push(bbox);
y_polygons.push(polygon);
}
let y = Results {
probs: None,
bboxes: Some(ys_bbox),
keypoints: None,
masks: None,
};
ys.push(y);
ys.push(
Ys::default()
.with_bboxes(&y_bbox)
.with_polygons(&y_polygons),
);
}
Ok(ys)

View File

@ -1,9 +1,9 @@
use crate::{ops, DataLoader, Metric, MinOptMax, Options, OrtEngine};
use crate::{ops, MinOptMax, Options, OrtEngine};
use anyhow::Result;
use image::DynamicImage;
use ndarray::{Array, IxDyn};
use std::path::PathBuf;
use usearch::ffi::{IndexOptions, MetricKind, ScalarKind};
// use std::path::PathBuf;
// use usearch::ffi::{IndexOptions, MetricKind, ScalarKind};
#[derive(Debug)]
pub enum Model {
@ -49,102 +49,107 @@ impl Dinov2 {
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let xs_ = ops::standardize(
xs_,
&[0.48145466, 0.4578275, 0.40821073],
&[0.26862954, 0.2613026, 0.2757771],
);
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
let ys = ys[0].to_owned();
let ys = ops::norm(&ys);
Ok(ys)
}
pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {
let metric = match metric {
Metric::IP => MetricKind::IP,
Metric::L2 => MetricKind::L2sq,
Metric::Cos => MetricKind::Cos,
};
let options = IndexOptions {
metric,
dimensions: self.hidden_size,
quantization: ScalarKind::F16,
..Default::default()
};
Ok(usearch::new_index(&options)?)
}
// pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {
// let metric = match metric {
// Metric::IP => MetricKind::IP,
// Metric::L2 => MetricKind::L2sq,
// Metric::Cos => MetricKind::Cos,
// };
// let options = IndexOptions {
// metric,
// dimensions: self.hidden_size,
// quantization: ScalarKind::F16,
// ..Default::default()
// };
// Ok(usearch::new_index(&options)?)
// }
pub fn query_from_folder(
&mut self,
qurey: &str,
gallery: &str,
metric: Metric,
) -> Result<Vec<(usize, f32, PathBuf)>> {
// load query
let query = DataLoader::try_read(qurey)?;
let query = self.run(&[query])?;
// pub fn query_from_folder(
// &mut self,
// qurey: &str,
// gallery: &str,
// metric: Metric,
// ) -> Result<Vec<(usize, f32, PathBuf)>> {
// // load query
// let query = DataLoader::try_read(qurey)?;
// let query = self.run(&[query])?;
// build index & gallery
let index = self.build_index(metric)?;
let dl = DataLoader::default()
.with_batch(self.batch.opt as usize)
.load(gallery)?;
let paths = dl.paths().to_owned();
index.reserve(paths.len())?;
// // build index & gallery
// let index = self.build_index(metric)?;
// let dl = DataLoader::default()
// .with_batch(self.batch.opt as usize)
// .load(gallery)?;
// let paths = dl.paths().to_owned();
// index.reserve(paths.len())?;
// load feats
for (idx, (x, _path)) in dl.enumerate() {
let y = self.run(&x)?;
index.add(idx as u64, &y.into_raw_vec())?;
}
// // load feats
// for (idx, (x, _path)) in dl.enumerate() {
// let y = self.run(&x)?;
// index.add(idx as u64, &y.into_raw_vec())?;
// }
// output
let matches = index.search(&query.into_raw_vec(), index.size())?;
let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
matches
.keys
.into_iter()
.zip(matches.distances)
.for_each(|(k, score)| {
results.push((k as usize, score, paths[k as usize].to_owned()));
});
// // output
// let matches = index.search(&query.into_raw_vec(), index.size())?;
// let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
// matches
// .keys
// .into_iter()
// .zip(matches.distances)
// .for_each(|(k, score)| {
// results.push((k as usize, score, paths[k as usize].to_owned()));
// });
Ok(results)
}
// Ok(results)
// }
pub fn query_from_vec(
&mut self,
qurey: &str,
gallery: &[&str],
metric: Metric,
) -> Result<Vec<(usize, f32, PathBuf)>> {
// load query
let query = DataLoader::try_read(qurey)?;
let query = self.run(&[query])?;
// pub fn query_from_vec(
// &mut self,
// qurey: &str,
// gallery: &[&str],
// metric: Metric,
// ) -> Result<Vec<(usize, f32, PathBuf)>> {
// // load query
// let query = DataLoader::try_read(qurey)?;
// let query = self.run(&[query])?;
// build index & gallery
let index = self.build_index(metric)?;
index.reserve(gallery.len())?;
let mut dl = DataLoader::default().with_batch(self.batch.opt as usize);
gallery.iter().for_each(|x| {
dl.load(x).unwrap();
});
// // build index & gallery
// let index = self.build_index(metric)?;
// index.reserve(gallery.len())?;
// let mut dl = DataLoader::default().with_batch(self.batch.opt as usize);
// gallery.iter().for_each(|x| {
// dl.load(x).unwrap();
// });
// load feats
let paths = dl.paths().to_owned();
for (idx, (x, _path)) in dl.enumerate() {
let y = self.run(&x)?;
index.add(idx as u64, &y.into_raw_vec())?;
}
// // load feats
// let paths = dl.paths().to_owned();
// for (idx, (x, _path)) in dl.enumerate() {
// let y = self.run(&x)?;
// index.add(idx as u64, &y.into_raw_vec())?;
// }
// output
let matches = index.search(&query.into_raw_vec(), index.size())?;
let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
matches
.keys
.into_iter()
.zip(matches.distances)
.for_each(|(k, score)| {
results.push((k as usize, score, paths[k as usize].to_owned()));
});
// // output
// let matches = index.search(&query.into_raw_vec(), index.size())?;
// let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
// matches
// .keys
// .into_iter()
// .zip(matches.distances)
// .for_each(|(k, score)| {
// results.push((k as usize, score, paths[k as usize].to_owned()));
// });
Ok(results)
}
// Ok(results)
// }
}

View File

@ -3,6 +3,7 @@ mod clip;
mod db;
mod dinov2;
mod rtdetr;
mod svtr;
mod yolo;
pub use blip::Blip;
@ -10,4 +11,5 @@ pub use clip::Clip;
pub use db::DB;
pub use dinov2::Dinov2;
pub use rtdetr::RTDETR;
pub use svtr::SVTR;
pub use yolo::YOLO;

View File

@ -3,7 +3,7 @@ use image::DynamicImage;
use ndarray::{s, Array, Axis, IxDyn};
use regex::Regex;
use crate::{ops, Annotator, Bbox, DynConf, MinOptMax, Options, OrtEngine, Rect, Results};
use crate::{ops, Bbox, DynConf, MinOptMax, Options, OrtEngine, Rect, Ys};
#[derive(Debug)]
pub struct RTDETR {
@ -11,9 +11,7 @@ pub struct RTDETR {
height: MinOptMax,
width: MinOptMax,
batch: MinOptMax,
annotator: Annotator,
confs: DynConf,
saveout: Option<String>,
nc: usize,
names: Option<Vec<String>>,
}
@ -43,7 +41,7 @@ impl RTDETR {
.expect("Failed to get num_classes, make it explicit with `--nc`")
.len(),
);
let annotator = Annotator::default();
// let annotator = Annotator::default();
let confs = DynConf::new(&options.confs, nc);
engine.dry_run()?;
@ -54,34 +52,19 @@ impl RTDETR {
height,
width,
batch,
saveout: options.saveout.to_owned(),
annotator,
names,
})
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?;
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Ys>> {
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32, 144.0)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let ys = self.engine.run(&[xs_])?;
let ys = self.postprocess(ys, xs)?;
match &self.saveout {
None => {}
Some(saveout) => {
for (img0, y) in xs.iter().zip(ys.iter()) {
let mut img = img0.to_rgb8();
self.annotator.plot(&mut img, y);
self.annotator.save(&img, saveout);
}
}
}
Ok(ys)
}
pub fn postprocess(
&self,
xs: Vec<Array<f32, IxDyn>>,
xs0: &[DynamicImage],
) -> Result<Vec<Results>> {
pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Ys>> {
const CXYWH_OFFSET: usize = 4; // cxcywh
let preds = &xs[0];
@ -129,11 +112,12 @@ impl RTDETR {
);
y_bboxes.push(y_bbox)
}
let y = Results {
let y = Ys {
probs: None,
bboxes: Some(y_bboxes),
keypoints: None,
masks: None,
polygons: None,
};
ys.push(y);
}

86
src/models/svtr.rs Normal file
View File

@ -0,0 +1,86 @@
use crate::{ops, DynConf, MinOptMax, Options, OrtEngine};
use anyhow::Result;
use image::DynamicImage;
use ndarray::{Array, Axis, IxDyn};
#[derive(Debug)]
pub struct SVTR {
engine: OrtEngine,
pub height: MinOptMax,
pub width: MinOptMax,
pub batch: MinOptMax,
confs: DynConf,
vocab: Vec<String>,
}
impl SVTR {
pub fn new(options: &Options) -> Result<Self> {
let engine = OrtEngine::new(options)?;
let (batch, height, width) = (
engine.batch().to_owned(),
engine.height().to_owned(),
engine.width().to_owned(),
);
let confs = DynConf::new(&options.confs, 1);
let mut vocab: Vec<_> =
std::fs::read_to_string(options.vocab.as_ref().expect("No vocabulary found"))?
.lines()
.map(|line| line.to_string())
.collect();
vocab.push(" ".to_string());
vocab.insert(0, "Blank".to_string());
engine.dry_run()?;
Ok(Self {
engine,
height,
width,
batch,
vocab,
confs,
})
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<()> {
let xs_ =
ops::resize_with_fixed_height(xs, self.height.opt as u32, self.width.opt as u32, 0.0)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
let ys = ys[0].to_owned();
self.postprocess(&ys)?;
Ok(())
}
pub fn postprocess(&self, xs: &Array<f32, IxDyn>) -> Result<()> {
for batch in xs.axis_iter(Axis(0)) {
let mut texts: Vec<String> = Vec::new();
for (i, seq) in batch.axis_iter(Axis(0)).enumerate() {
let (id, &confidence) = seq
.into_iter()
.enumerate()
.reduce(|max, x| if x.1 > max.1 { x } else { max })
.unwrap();
if id == 0 || confidence < self.confs[0] {
continue;
}
if i == 0 && id == self.vocab.len() - 1 {
continue;
}
texts.push(self.vocab[id].to_owned());
}
texts.dedup();
print!("[Texts] ");
if texts.is_empty() {
println!("Nothing detected!");
} else {
for text in texts.into_iter() {
print!("{text}");
}
println!();
}
}
Ok(())
}
}

View File

@ -5,8 +5,8 @@ use ndarray::{s, Array, Axis, IxDyn};
use regex::Regex;
use crate::{
non_max_suppression, ops, Annotator, Bbox, DynConf, Embedding, Keypoint, MinOptMax, Options,
OrtEngine, Point, Rect, Results,
ops, Bbox, DynConf, Embedding, Keypoint, MinOptMax, Options, OrtEngine, Point, Polygon, Rect,
Ys,
};
const CXYWH_OFFSET: usize = 4;
@ -34,8 +34,7 @@ pub struct YOLO {
confs: DynConf,
kconfs: DynConf,
iou: f32,
saveout: Option<String>,
annotator: Annotator,
// saveout: Option<String>,
names: Option<Vec<String>>,
apply_nms: bool,
anchors_first: bool,
@ -101,11 +100,6 @@ impl YOLO {
};
let confs = DynConf::new(&options.confs, nc);
let kconfs = DynConf::new(&options.kconfs, nk);
let mut annotator = Annotator::default();
if let Some(skeletons) = &options.skeletons {
annotator = annotator.with_skeletons(skeletons);
}
let saveout = options.saveout.to_owned();
engine.dry_run()?;
Ok(Self {
@ -121,44 +115,27 @@ impl YOLO {
width,
batch,
task,
saveout,
annotator,
names,
anchors_first: options.anchors_first,
})
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?;
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Ys>> {
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32, 144.0)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let ys = self.engine.run(&[xs_])?;
let ys = self.postprocess(ys, xs)?;
match &self.saveout {
None => println!("{ys:?}"),
Some(saveout) => {
for (img0, y) in xs.iter().zip(ys.iter()) {
let mut img = img0.to_rgb8();
self.annotator.plot(&mut img, y);
self.annotator.save(&img, saveout);
}
}
}
Ok(ys)
}
pub fn postprocess(
&self,
xs: Vec<Array<f32, IxDyn>>,
xs0: &[DynamicImage],
) -> Result<Vec<Results>> {
pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Ys>> {
if let YOLOTask::Classify = self.task {
let mut ys = Vec::new();
for batch in xs[0].axis_iter(Axis(0)) {
ys.push(Results::new(
Some(Embedding::new(batch.into_owned(), self.names.to_owned())),
None,
None,
None,
));
ys.push(
Ys::default()
.with_probs(Embedding::new(batch.into_owned(), self.names.to_owned())),
);
}
Ok(ys)
} else {
@ -265,13 +242,16 @@ impl YOLO {
// nms
if self.apply_nms {
non_max_suppression(&mut data, self.iou);
Self::non_max_suppression(&mut data, self.iou);
}
// decode
let mut y_bboxes: Vec<Bbox> = Vec::new();
let mut y_kpts: Vec<Vec<Keypoint>> = Vec::new();
let mut y_masks: Vec<Vec<u8>> = Vec::new();
let mut y_polygons: Vec<Polygon> = Vec::new();
for elem in data.into_iter() {
if let Some(kpts) = elem.1 {
y_kpts.push(kpts)
@ -291,7 +271,6 @@ impl YOLO {
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
ImageBuffer::from_raw(nw as u32, nh as u32, mask.into_raw_vec())
.expect("Faild to create image from ndarray");
let mut mask_im = image::DynamicImage::from(mask_im); // -> dyn
// rescale masks
@ -305,7 +284,7 @@ impl YOLO {
);
// crop-mask with bbox
let mut mask_original_cropped = mask_original.into_luma8();
let mut mask_object_cropped = mask_original.into_luma8(); // gray image
for y in 0..height_original as usize {
for x in 0..width_original as usize {
if x < elem.0.xmin() as usize
@ -313,7 +292,7 @@ impl YOLO {
|| y < elem.0.ymin() as usize
|| y > elem.0.ymax() as usize
{
mask_original_cropped.put_pixel(
mask_object_cropped.put_pixel(
x as u32,
y as u32,
image::Luma([0u8]),
@ -321,31 +300,37 @@ impl YOLO {
}
}
}
y_masks.push(mask_original_cropped.into_raw());
// mask -> contours
let contours: Vec<imageproc::contours::Contour<i32>> =
imageproc::contours::find_contours_with_threshold(
&mask_object_cropped,
1,
);
// contours -> polygons
contours.iter().for_each(|contour| {
if let imageproc::contours::BorderType::Outer = contour.border_type {
if contour.points.len() > 1 {
y_polygons.push(Polygon::from_contour(contour));
}
}
});
// save each mask
y_masks.push(mask_object_cropped.into_raw());
}
y_bboxes.push(elem.0);
}
// save each result
let y = Results {
probs: None,
bboxes: if !y_bboxes.is_empty() {
Some(y_bboxes)
} else {
None
},
keypoints: if !y_kpts.is_empty() {
Some(y_kpts)
} else {
None
},
masks: if !y_masks.is_empty() {
Some(y_masks)
} else {
None
},
};
ys.push(y);
// save result
ys.push(
Ys::default()
.with_bboxes(&y_bboxes)
.with_keypoints(&y_kpts)
.with_masks(&y_masks)
.with_polygons(&y_polygons),
);
}
Ok(ys)
@ -376,4 +361,29 @@ impl YOLO {
pub fn height(&self) -> isize {
self.height.opt
}
#[allow(clippy::type_complexity)]
fn non_max_suppression(
xs: &mut Vec<(Bbox, Option<Vec<Keypoint>>, Option<Vec<f32>>)>,
iou_threshold: f32,
) {
xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap());
let mut current_index = 0;
for index in 0..xs.len() {
let mut drop = false;
for prev_index in 0..current_index {
let iou = xs[prev_index].0.iou(&xs[index].0);
if iou > iou_threshold {
drop = true;
break;
}
}
if !drop {
xs.swap(current_index, index);
current_index += 1;
}
}
xs.truncate(current_index);
}
}

View File

@ -2,78 +2,17 @@ use anyhow::Result;
use image::{DynamicImage, GenericImageView};
use ndarray::{Array, Axis, Ix2, IxDyn};
pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) {
let r = (w1 / w0).min(h1 / h0);
(r, (w0 * r).round(), (h0 * r).round())
pub fn standardize(xs: Array<f32, IxDyn>, mean: &[f32], std: &[f32]) -> Array<f32, IxDyn> {
let mean = Array::from_shape_vec((1, mean.len(), 1, 1), mean.to_vec()).unwrap();
let std = Array::from_shape_vec((1, std.len(), 1, 1), std.to_vec()).unwrap();
(xs - mean) / std
}
pub fn resize(
xs: &[DynamicImage],
height: u32,
width: u32,
norm_imagenet: bool,
) -> Result<Array<f32, IxDyn>> {
let norm = 255.0;
let mut ys = Array::ones(vec![xs.len(), 3, height as usize, width as usize]).into_dyn();
// let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
for (idx, x) in xs.iter().enumerate() {
let (w0, h0) = x.dimensions();
let w0 = w0 as f32;
let h0 = h0 as f32;
let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round
let img = x.resize_exact(
w_new as u32,
h_new as u32,
image::imageops::FilterType::Triangle,
);
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = (r as f32) / norm;
ys[[idx, 1, y, x]] = (g as f32) / norm;
ys[[idx, 2, y, x]] = (b as f32) / norm;
}
}
if norm_imagenet {
let mean =
Array::from_shape_vec((1, 3, 1, 1), vec![0.48145466, 0.4578275, 0.40821073]).unwrap();
let std = Array::from_shape_vec((1, 3, 1, 1), vec![0.26862954, 0.261_302_6, 0.275_777_1])
.unwrap();
ys = (ys - mean) / std;
}
Ok(ys)
pub fn normalize(xs: Array<f32, IxDyn>, min_: f32, max_: f32) -> Array<f32, IxDyn> {
(xs - min_) / (max_ - min_)
}
pub fn letterbox(xs: &[DynamicImage], height: u32, width: u32) -> Result<Array<f32, IxDyn>> {
let norm = 255.0;
let bg = 144.0;
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
ys.fill(bg / norm);
for (idx, x) in xs.iter().enumerate() {
let (w0, h0) = x.dimensions();
let w0 = w0 as f32;
let h0 = h0 as f32;
let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round
let img = x.resize_exact(
w_new as u32,
h_new as u32,
image::imageops::FilterType::Triangle,
);
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = (r as f32) / norm;
ys[[idx, 1, y, x]] = (g as f32) / norm;
ys[[idx, 2, y, x]] = (b as f32) / norm;
}
}
Ok(ys)
}
pub fn norm(xs: &Array<f32, IxDyn>) -> Array<f32, IxDyn> {
pub fn norm2(xs: &Array<f32, IxDyn>) -> Array<f32, IxDyn> {
let std_ = xs
.mapv(|x| x * x)
.sum_axis(Axis(1))
@ -93,3 +32,77 @@ pub fn dot2(query: &Array<f32, IxDyn>, gallery: &Array<f32, IxDyn>) -> Result<Ve
let matrix: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
Ok(matrix)
}
pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) {
let r = (w1 / w0).min(h1 / h0);
(r, (w0 * r).round(), (h0 * r).round())
}
pub fn resize(xs: &[DynamicImage], height: u32, width: u32) -> Result<Array<f32, IxDyn>> {
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
for (idx, x) in xs.iter().enumerate() {
let img = x.resize_exact(width, height, image::imageops::FilterType::Triangle);
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = r as f32;
ys[[idx, 1, y, x]] = g as f32;
ys[[idx, 2, y, x]] = b as f32;
}
}
Ok(ys)
}
pub fn letterbox(
xs: &[DynamicImage],
height: u32,
width: u32,
bg: f32,
) -> Result<Array<f32, IxDyn>> {
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
ys.fill(bg);
for (idx, x) in xs.iter().enumerate() {
let (w0, h0) = x.dimensions();
let (_, w_new, h_new) = scale_wh(w0 as f32, h0 as f32, width as f32, height as f32);
let img = x.resize_exact(
w_new as u32,
h_new as u32,
image::imageops::FilterType::CatmullRom,
);
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = r as f32;
ys[[idx, 1, y, x]] = g as f32;
ys[[idx, 2, y, x]] = b as f32;
}
}
Ok(ys)
}
pub fn resize_with_fixed_height(
xs: &[DynamicImage],
height: u32,
width: u32,
bg: f32,
) -> Result<Array<f32, IxDyn>> {
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
ys.fill(bg);
for (idx, x) in xs.iter().enumerate() {
let (w0, h0) = x.dimensions();
let h_new = height;
let w_new = height * w0 / h0;
let img = x.resize_exact(w_new, h_new, image::imageops::FilterType::CatmullRom);
for (x, y, rgb) in img.pixels() {
let x = x as usize;
let y = y as usize;
let [r, g, b, _] = rgb.0;
ys[[idx, 0, y, x]] = r as f32;
ys[[idx, 1, y, x]] = g as f32;
ys[[idx, 2, y, x]] = b as f32;
}
}
Ok(ys)
}

View File

@ -13,25 +13,25 @@ pub struct Options {
pub i04: Option<MinOptMax>,
pub i05: Option<MinOptMax>,
pub i10: Option<MinOptMax>, // 2nd input, axis 0
pub i11: Option<MinOptMax>,
pub i11: Option<MinOptMax>, // 2nd input, axis 1
pub i12: Option<MinOptMax>,
pub i13: Option<MinOptMax>,
pub i14: Option<MinOptMax>,
pub i15: Option<MinOptMax>,
pub i20: Option<MinOptMax>, // 2nd input, axis 0
pub i20: Option<MinOptMax>,
pub i21: Option<MinOptMax>,
pub i22: Option<MinOptMax>,
pub i23: Option<MinOptMax>,
pub i24: Option<MinOptMax>,
pub i25: Option<MinOptMax>,
pub i30: Option<MinOptMax>, // 2nd input, axis 0
pub i30: Option<MinOptMax>,
pub i31: Option<MinOptMax>,
pub i32_: Option<MinOptMax>,
pub i33: Option<MinOptMax>,
pub i34: Option<MinOptMax>,
pub i35: Option<MinOptMax>,
// trt ep
// trt related
pub trt_engine_cache_enable: bool,
pub trt_int8_enable: bool,
pub trt_fp16_enable: bool,
@ -44,12 +44,13 @@ pub struct Options {
pub kconfs: Vec<f32>,
pub iou: f32,
pub apply_nms: bool,
pub saveout: Option<String>,
pub tokenizer: Option<String>,
pub vocab: Option<String>,
pub names: Option<Vec<String>>, // class names
pub anchors_first: bool, // otuput format: [bs, anchors/na, pos+nc+nm]
pub skeletons: Option<Vec<(usize, usize)>>,
pub min_width: Option<f32>,
pub min_height: Option<f32>,
pub unclip_ratio: f32, // DB
}
impl Default for Options {
@ -93,12 +94,13 @@ impl Default for Options {
kconfs: vec![0.5f32],
iou: 0.45f32,
apply_nms: true,
saveout: None,
tokenizer: None,
vocab: None,
names: None,
anchors_first: false,
skeletons: None,
min_width: None,
min_height: None,
unclip_ratio: 1.5,
}
}
}
@ -144,18 +146,28 @@ impl Options {
self
}
pub fn with_saveout(mut self, saveout: &str) -> Self {
self.saveout = Some(saveout.to_string());
self
}
pub fn with_names(mut self, names: &[&str]) -> Self {
self.names = Some(names.iter().map(|x| x.to_string()).collect::<Vec<String>>());
self
}
pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self {
self.skeletons = Some(skeletons.to_vec());
pub fn with_vocab(mut self, vocab: &str) -> Self {
self.vocab = Some(auto_load(vocab).unwrap());
self
}
pub fn with_unclip_ratio(mut self, x: f32) -> Self {
self.unclip_ratio = x;
self
}
pub fn with_min_width(mut self, x: f32) -> Self {
self.min_width = Some(x);
self
}
pub fn with_min_height(mut self, x: f32) -> Self {
self.min_height = Some(x);
self
}

View File

@ -142,6 +142,18 @@ impl Point {
pub fn sum(&self) -> f32 {
self.x + self.y
}
pub fn perpendicular_distance(&self, start: &Point, end: &Point) -> f32 {
let numerator = ((end.y - start.y) * self.x - (end.x - start.x) * self.y + end.x * start.y
- end.y * start.x)
.abs();
let denominator = ((end.y - start.y).powi(2) + (end.x - start.x).powi(2)).sqrt();
numerator / denominator
}
pub fn cross(&self, other: &Point) -> f32 {
self.x * other.y - self.y * other.x
}
}
#[cfg(test)]

View File

@ -1,16 +1,53 @@
use crate::{Point, Rect, RotatedRect};
use crate::{Point, Rect};
#[derive(Default, Debug, PartialOrd, PartialEq, Clone)]
#[derive(Default, Debug, Clone, PartialEq)]
pub struct Polygon {
points: Vec<Point>,
pub points: Vec<Point>,
}
impl From<Vec<Point>> for Polygon {
fn from(points: Vec<Point>) -> Self {
Self { points }
}
}
impl Polygon {
pub fn new(points: &[Point]) -> Self {
// TODO: refactor
Self {
points: points.to_vec(),
}
// pub fn new(points: &[Point]) -> Self {
// Self {
// points: points.to_vec(),
// }
// }
pub fn new() -> Self {
Self::default()
}
pub fn from_contour(contour: &imageproc::contours::Contour<i32>) -> Self {
let points = contour
.points
.iter()
.map(|p| Point::new(p.x as f32, p.y as f32))
.collect::<Vec<_>>();
Self { points }
}
pub fn to_imageproc_points(&self) -> Vec<imageproc::point::Point<i32>> {
self.points
.iter()
.map(|p| imageproc::point::Point::new(p.x as i32, p.y as i32))
.collect::<Vec<_>>()
}
pub fn from_imageproc_points(points: &[imageproc::point::Point<i32>]) -> Self {
let points = points
.iter()
.map(|p| Point::new(p.x as f32, p.y as f32))
.collect::<Vec<_>>();
Self { points }
}
pub fn with_points(mut self, points: &[Point]) {
self.points = points.to_vec();
}
pub fn area(&self) -> f32 {
@ -41,14 +78,163 @@ impl Polygon {
max_y = point.y
}
}
((min_x, min_y), (max_x, max_y)).into()
((min_x - 1.0, min_y - 1.0), (max_x + 1.0, max_y + 1.0)).into()
}
pub fn find_min_rotated_rect() -> RotatedRect {
todo!()
pub fn perimeter(&self) -> f32 {
let mut perimeter = 0.0;
let n = self.points.len();
for i in 0..n {
let j = (i + 1) % n;
perimeter += self.points[i].distance_from(&self.points[j]);
}
perimeter
}
pub fn expand(&mut self) -> Self {
todo!()
pub fn offset(&self, delta: f32, width: f32, height: f32) -> Self {
let num_points = self.points.len();
let mut new_points = Vec::with_capacity(self.points.len());
for i in 0..num_points {
let prev_idx = if i == 0 { num_points - 1 } else { i - 1 };
let next_idx = (i + 1) % num_points;
let edge_vector = Point {
x: self.points[next_idx].x - self.points[prev_idx].x,
y: self.points[next_idx].y - self.points[prev_idx].y,
};
let normal_vector = Point {
x: -edge_vector.y,
y: edge_vector.x,
};
let normal_length = (normal_vector.x.powi(2) + normal_vector.y.powi(2)).sqrt();
if normal_length.abs() < 1e-6 {
new_points.push(self.points[i]);
} else {
let normalized_normal = Point {
x: normal_vector.x / normal_length,
y: normal_vector.y / normal_length,
};
let new_x = self.points[i].x + normalized_normal.x * delta;
let new_y = self.points[i].y + normalized_normal.y * delta;
let new_x = new_x.max(0.0).min(width);
let new_y = new_y.max(0.0).min(height);
new_points.push(Point { x: new_x, y: new_y });
}
}
Self { points: new_points }
}
pub fn resample(&self, num_samples: usize) -> Polygon {
let mut points = Vec::new();
for i in 0..self.points.len() {
let start_point = self.points[i];
let end_point = self.points[(i + 1) % self.points.len()];
points.push(start_point);
let dx = end_point.x - start_point.x;
let dy = end_point.y - start_point.y;
for j in 1..num_samples {
let t = (j as f32) / (num_samples as f32);
let new_x = start_point.x + t * dx;
let new_y = start_point.y + t * dy;
points.push(Point { x: new_x, y: new_y });
}
}
Self { points }
}
pub fn simplify(&self, epsilon: f32) -> Self {
let mask = self.rdp_iter(epsilon);
let points = self
.points
.iter()
.enumerate()
.filter_map(|(i, &point)| if mask[i] { Some(point) } else { None })
.collect();
Self { points }
}
#[allow(clippy::needless_range_loop)]
fn rdp_iter(&self, epsilon: f32) -> Vec<bool> {
let mut stk = Vec::new();
let mut indices = vec![true; self.points.len()];
stk.push((0, self.points.len() - 1));
while let Some((start_index, last_index)) = stk.pop() {
let mut dmax = 0.0;
let mut index = start_index;
for i in (start_index + 1)..last_index {
let d = self.points[i]
.perpendicular_distance(&self.points[start_index], &self.points[last_index]);
if d > dmax {
index = i;
dmax = d;
}
}
if dmax > epsilon {
stk.push((start_index, index));
stk.push((index, last_index));
} else {
for j in (start_index + 1)..last_index {
indices[j] = false;
}
}
}
indices
}
pub fn convex_hull(&self) -> Self {
let mut points = self.points.clone();
points.sort_by(|a, b| {
a.x.partial_cmp(&b.x)
.unwrap()
.then(a.y.partial_cmp(&b.y).unwrap())
});
let mut hull: Vec<Point> = Vec::new();
// Lower hull
for &point in &points {
while hull.len() >= 2 {
let last = hull.len() - 1;
let second_last = hull.len() - 2;
let vec_a = hull[last] - hull[second_last];
let vec_b = point - hull[second_last];
if vec_a.cross(&vec_b) <= 0.0 {
hull.pop();
} else {
break;
}
}
hull.push(point);
}
// Upper hull
let lower_hull_size = hull.len();
for &point in points.iter().rev().skip(1) {
while hull.len() > lower_hull_size {
let last = hull.len() - 1;
let second_last = hull.len() - 2;
let vec_a: Point = hull[last] - hull[second_last];
let vec_b = point - hull[second_last];
if vec_a.cross(&vec_b) <= 0.0 {
hull.pop();
} else {
break;
}
}
hull.push(point);
}
// Remove duplicate points
hull.dedup();
if hull.len() > 1 && hull.first() == hull.last() {
hull.pop();
}
Self { points: hull }
}
}

View File

@ -1,59 +0,0 @@
use crate::{Bbox, Embedding, Keypoint};
#[derive(Clone, PartialEq, Default)]
pub struct Results {
pub probs: Option<Embedding>,
pub bboxes: Option<Vec<Bbox>>,
pub keypoints: Option<Vec<Vec<Keypoint>>>,
pub masks: Option<Vec<Vec<u8>>>,
}
impl std::fmt::Debug for Results {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Results")
.field("Probabilities", &self.probs)
.field("BoundingBoxes", &self.bboxes)
.field("Keypoints", &self.keypoints)
.field(
"Masks",
&format_args!("{:?}", self.masks().map(|masks| masks.len())),
)
.finish()
}
}
impl Results {
pub fn new(
probs: Option<Embedding>,
bboxes: Option<Vec<Bbox>>,
keypoints: Option<Vec<Vec<Keypoint>>>,
masks: Option<Vec<Vec<u8>>>,
) -> Self {
Self {
probs,
bboxes,
keypoints,
masks,
}
}
pub fn probs(&self) -> Option<&Embedding> {
self.probs.as_ref()
}
pub fn keypoints(&self) -> Option<&Vec<Vec<Keypoint>>> {
self.keypoints.as_ref()
}
pub fn masks(&self) -> Option<&Vec<Vec<u8>>> {
self.masks.as_ref()
}
pub fn bboxes(&self) -> Option<&Vec<Bbox>> {
self.bboxes.as_ref()
}
pub fn bboxes_mut(&mut self) -> Option<&mut Vec<Bbox>> {
self.bboxes.as_mut()
}
}

View File

@ -1,4 +1,4 @@
use crate::{Bbox, Keypoint, GITHUB_ASSETS};
use crate::GITHUB_ASSETS;
use anyhow::Result;
use indicatif::{ProgressBar, ProgressStyle};
use std::io::{Read, Write};
@ -92,31 +92,6 @@ pub fn config_dir() -> PathBuf {
}
}
#[allow(clippy::type_complexity)]
pub fn non_max_suppression(
xs: &mut Vec<(Bbox, Option<Vec<Keypoint>>, Option<Vec<f32>>)>,
iou_threshold: f32,
) {
xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap());
let mut current_index = 0;
for index in 0..xs.len() {
let mut drop = false;
for prev_index in 0..current_index {
let iou = xs[prev_index].0.iou(&xs[index].0);
if iou > iou_threshold {
drop = true;
break;
}
}
if !drop {
xs.swap(current_index, index);
current_index += 1;
}
}
xs.truncate(current_index);
}
pub const COCO_SKELETON_17: [(usize, usize); 16] = [
(0, 1),
(0, 2),

76
src/ys.rs Normal file
View File

@ -0,0 +1,76 @@
use crate::{Bbox, Embedding, Keypoint, Polygon};
#[derive(Clone, PartialEq, Default)]
pub struct Ys {
// Results for each frame
pub probs: Option<Embedding>,
pub bboxes: Option<Vec<Bbox>>,
pub keypoints: Option<Vec<Vec<Keypoint>>>,
pub masks: Option<Vec<Vec<u8>>>,
pub polygons: Option<Vec<Polygon>>,
}
impl std::fmt::Debug for Ys {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Results")
.field("Probabilities", &self.probs)
.field("BoundingBoxes", &self.bboxes)
.field("Keypoints", &self.keypoints)
.field(
"Masks",
&format_args!("{:?}", self.masks().map(|masks| masks.len())),
)
.field(
"Polygons",
&format_args!("{:?}", self.polygons().map(|polygons| polygons.len())),
)
.finish()
}
}
impl Ys {
pub fn with_probs(mut self, probs: Embedding) -> Self {
self.probs = Some(probs);
self
}
pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self {
self.bboxes = Some(bboxes.to_vec());
self
}
pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self {
self.keypoints = Some(keypoints.to_vec());
self
}
pub fn with_masks(mut self, masks: &[Vec<u8>]) -> Self {
self.masks = Some(masks.to_vec());
self
}
pub fn with_polygons(mut self, polygons: &[Polygon]) -> Self {
self.polygons = Some(polygons.to_vec());
self
}
pub fn probs(&self) -> Option<&Embedding> {
self.probs.as_ref()
}
pub fn keypoints(&self) -> Option<&Vec<Vec<Keypoint>>> {
self.keypoints.as_ref()
}
pub fn masks(&self) -> Option<&Vec<Vec<u8>>> {
self.masks.as_ref()
}
pub fn polygons(&self) -> Option<&Vec<Polygon>> {
self.polygons.as_ref()
}
pub fn bboxes(&self) -> Option<&Vec<Bbox>> {
self.bboxes.as_ref()
}
}