Dev (#1)
* Update imageproc crates * Add top-p method for sampling * Add SVTR for text recognition & bug fix
35
Cargo.toml
@ -11,30 +11,33 @@ exclude = ["assets/*", "examples/*"]
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.2.4", features = ["derive"] }
|
||||
image = { version = "0.24.7", default-features = false, features = [
|
||||
"jpeg",
|
||||
"png",
|
||||
"tiff",
|
||||
"webp",
|
||||
"webp-encoder",
|
||||
"bmp"
|
||||
]}
|
||||
imageproc = { version = "0.23.0", default-features = false }
|
||||
ndarray = { version = "0.15.6" }
|
||||
# ort-sys = { version = "2.0.0-alpha.4" }
|
||||
# ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "half", "ndarray", "cuda", "tensorrt", "coreml", "openvino"] }
|
||||
ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "profiling", "half", "ndarray", "cuda", "tensorrt", "coreml", "ureq", "openvino"] }
|
||||
rusttype = { version = "0.9", default-features = false }
|
||||
ort = { version = "2.0.0-alpha.4", default-features = false, features = [
|
||||
"load-dynamic",
|
||||
"copy-dylibs",
|
||||
"profiling",
|
||||
"half",
|
||||
"ndarray",
|
||||
"cuda",
|
||||
"tensorrt",
|
||||
"coreml",
|
||||
"ureq",
|
||||
"openvino",
|
||||
] }
|
||||
anyhow = { version = "1.0.75" }
|
||||
regex = { version = "1.5.4" }
|
||||
rand = { version = "0.8.5" }
|
||||
chrono = { version = "0.4.30" }
|
||||
half = { version = "2.3.1" }
|
||||
dirs = { version = "5.0.1" }
|
||||
ureq = { version = "2.9.1", default-features = true, features = [ "socks-proxy" ] }
|
||||
ureq = { version = "2.9.1", default-features = true, features = [
|
||||
"socks-proxy",
|
||||
] }
|
||||
walkdir = { version = "2.5.0" }
|
||||
tokenizers = { version = "0.15.2" }
|
||||
itertools = { version = "0.12.1" }
|
||||
usearch = { version = "2.9.1" }
|
||||
usearch = { version = "2.10.4" }
|
||||
rayon = "1.10.0"
|
||||
indicatif = "0.17.8"
|
||||
image = "0.25.1"
|
||||
imageproc = { version = "0.24" }
|
||||
ab_glyph = "0.2.23"
|
||||
|
97
README.md
@ -4,34 +4,35 @@ A Rust library integrated with **ONNXRuntime**, providing a collection of **Comp
|
||||
|
||||
## Supported Models
|
||||
|
||||
| Model | Example | CUDA<br />f32 | CUDA<br />f16 | TensorRT<br />f32 | TensorRT<br />f16 |
|
||||
| :-----------------------------: | :----------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: |
|
||||
| **YOLOv8-detection** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-pose** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-classification** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-segmentation** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-OBB** | TODO | TODO | TODO | TODO | TODO |
|
||||
| **YOLOv9** | [demo](examples/yolov9) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **RT-DETR** | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **FastSAM** | [demo](examples/fastsam) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLO-World** | [demo](examples/yolo-world) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **DINOv2** | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **CLIP** | [demo](examples/clip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
|
||||
| **BLIP** | [demo](examples/blip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
|
||||
| [**DB(Text Detection)**](https://arxiv.org/abs/1911.08947) | [demo](examples/db) | ✅ | ❌ | ✅ | ✅ |
|
||||
| **SVTR, TROCR** | TODO | TODO | TODO | TODO | TODO |
|
||||
| Model | Example | CUDA<br />f32 | CUDA<br />f16 | TensorRT<br />f32 | TensorRT<br />f16 |
|
||||
| :---------------------------------------------------------------: | :----------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: |
|
||||
| **YOLOv8-detection** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-pose** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-classification** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-segmentation** | [demo](examples/yolov8) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLOv8-OBB** | TODO | TODO | TODO | TODO | TODO |
|
||||
| **YOLOv9** | [demo](examples/yolov9) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **RT-DETR** | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **FastSAM** | [demo](examples/fastsam) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **YOLO-World** | [demo](examples/yolo-world) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **DINOv2** | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
|
||||
| **CLIP** | [demo](examples/clip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
|
||||
| **BLIP** | [demo](examples/blip) | ✅ | ✅ | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
|
||||
| [**DB(Text Detection)**](https://arxiv.org/abs/1911.08947) | [demo](examples/db) | ✅ | ❌ | ✅ | ✅ |
|
||||
| [**SVTR(Text Recognition)**](https://arxiv.org/abs/2205.00159) | [demo](examples/svtr) | ✅ | ❌ | ✅ | ✅ |
|
||||
|
||||
## Solution Models
|
||||
|
||||
Additionally, this repo also provides some solution models such as pedestrian `fall detection`, `head detection`, `trash detection`, and more.
|
||||
|
||||
| Model | Example |
|
||||
| :-------------------------------------------------------: | :------------------------------: |
|
||||
| **face-landmark detection**<br />**人脸 & 关键点检测** | [demo](examples/yolov8-face) |
|
||||
| **head detection**<br /> **人头检测** | [demo](examples/yolov8-head) |
|
||||
| **fall detection**<br /> **摔倒检测** | [demo](examples/yolov8-falldown) |
|
||||
| **trash detection**<br /> **垃圾检测** | [demo](examples/yolov8-plastic-bag) |
|
||||
| **text detection(PPOCR-det v3, v4)**<br />**PPOCR文本检测** | [demo](examples/db) |
|
||||
| Model | Example |
|
||||
| :--------------------------------------------------------------------------------: | :------------------------------: |
|
||||
| **text detection<br />(PPOCR-det v3, v4)**<br />**通用文本检测** | [demo](examples/db) |
|
||||
| **text recognition<br />(PPOCR-rec v3, v4)**<br />**中英文-文本识别** | [demo](examples/svtr) |
|
||||
| **face-landmark detection**<br />**人脸 & 关键点检测** | [demo](examples/yolov8-face) |
|
||||
| **head detection**<br /> **人头检测** | [demo](examples/yolov8-head) |
|
||||
| **fall detection**<br /> **摔倒检测** | [demo](examples/yolov8-falldown) |
|
||||
| **trash detection**<br /> **垃圾检测** | [demo](examples/yolov8-plastic-bag) |
|
||||
|
||||
## Demo
|
||||
|
||||
@ -60,27 +61,42 @@ check **[ort guide](https://ort.pyke.io/setup/linking)**
|
||||
|
||||
```shell
|
||||
cargo add --git https://github.com/jamjamjon/usls
|
||||
|
||||
# or
|
||||
cargo add usls
|
||||
```
|
||||
|
||||
#### 3. Set `Options` and build model
|
||||
|
||||
```Rust
|
||||
let options = Options::default()
|
||||
.with_model("../models/yolov8m-seg-dyn-f16.onnx")
|
||||
.with_trt(0) // using cuda(0) by default
|
||||
// when model with dynamic shapes
|
||||
.with_i00((1, 2, 4).into()) // dynamic batch
|
||||
.with_i02((416, 640, 800).into()) // dynamic height
|
||||
.with_i03((416, 640, 800).into()) // dynamic width
|
||||
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
|
||||
.with_dry_run(3)
|
||||
.with_saveout("YOLOv8"); // save results
|
||||
.with_model("../models/yolov8m-seg-dyn-f16.onnx");
|
||||
let mut model = YOLO::new(&options)?;
|
||||
```
|
||||
|
||||
- If you want to run your model with TensorRT or CoreML
|
||||
```Rust
|
||||
let options = Options::default()
|
||||
.with_trt(0) // using cuda by default
|
||||
// .with_coreml(0)
|
||||
```
|
||||
|
||||
|
||||
- If your model has dynamic shapes
|
||||
```Rust
|
||||
let options = Options::default()
|
||||
.with_i00((1, 2, 4).into()) // dynamic batch
|
||||
.with_i02((416, 640, 800).into()) // dynamic height
|
||||
.with_i03((416, 640, 800).into()) // dynamic width
|
||||
```
|
||||
|
||||
- If you want to set a confidence level for each category
|
||||
```Rust
|
||||
let options = Options::default()
|
||||
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
|
||||
```
|
||||
|
||||
- Go check [Options](src/options.rs) for more model options.
|
||||
|
||||
|
||||
|
||||
#### 4. Prepare inputs, and then you're ready to go
|
||||
|
||||
- Build `DataLoader` to load images
|
||||
@ -98,10 +114,17 @@ for (xs, _paths) in dl {
|
||||
- Or simply read one image
|
||||
|
||||
```Rust
|
||||
let x = DataLoader::try_read("./assets/bus.jpg")?;
|
||||
let _y = model.run(&[x])?;
|
||||
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
|
||||
let y = model.run(&x)?;
|
||||
```
|
||||
|
||||
#### 5. Annotate and save results
|
||||
```Rust
|
||||
let annotator = Annotator::default().with_saveout("YOLOv8");
|
||||
annotator.annotate(&x, &y);
|
||||
```
|
||||
|
||||
|
||||
## Script: converte ONNX model from `float32` to `float16`
|
||||
|
||||
```python
|
||||
|
BIN
assets/db.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
examples/assets/bus.jpg
Normal file
After Width: | Height: | Size: 134 KiB |
BIN
examples/assets/falldown.jpg
Normal file
After Width: | Height: | Size: 68 KiB |
BIN
examples/assets/kids.jpg
Normal file
After Width: | Height: | Size: 85 KiB |
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
BIN
examples/assets/trash.jpg
Normal file
After Width: | Height: | Size: 272 KiB |
@ -42,14 +42,14 @@ cargo run -r --example clip
|
||||
## Results
|
||||
|
||||
```shell
|
||||
(82.24775%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
|
||||
[0.06708972, 0.0067733657, 0.0019306632, 0.8224775, 0.003044935, 0.083962336, 0.014721389]
|
||||
(90.11472%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
|
||||
[0.04573484, 0.0048218793, 0.0011618224, 0.90114725, 0.0036694852, 0.031348046, 0.0121166315]
|
||||
|
||||
(85.56889%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
|
||||
[0.0786363, 0.0004783095, 0.00060898095, 0.06286741, 0.0006842306, 0.8556889, 0.0010357979]
|
||||
(94.07785%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
|
||||
[0.050406333, 0.0011632168, 0.0019338318, 0.0013227565, 0.003916758, 0.00047858112, 0.9407785]
|
||||
|
||||
(90.03625%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
|
||||
[0.07473288, 0.0027821448, 0.0075673857, 0.010874652, 0.003041679, 0.0006387719, 0.9003625]
|
||||
(86.59852%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
|
||||
[0.07032883, 0.00053773675, 0.0006372929, 0.06066096, 0.0007378078, 0.8659852, 0.0011121632]
|
||||
```
|
||||
|
||||
|
||||
|
@ -16,7 +16,6 @@ cargo run -r --example db
|
||||
```Rust
|
||||
let options = Options::default()
|
||||
.with_model("ONNX_PATH") // <= modify this
|
||||
.with_profile(false);
|
||||
```
|
||||
|
||||
### 3. Run
|
||||
@ -27,10 +26,10 @@ cargo run -r --example db
|
||||
|
||||
### Speed test
|
||||
|
||||
| Model | Image size | TensorRT<br />f16 | TensorRT<br />f32 | CUDA<br />f32 |
|
||||
| --------------- | ---------- | ----------------- | ----------------- | ------------- |
|
||||
| ppocr-v3-db-dyn | 640x640 | 1.8585ms | 2.5739ms | 4.3314ms |
|
||||
| ppocr-v4-db-dyn | 640x640 | 2.0507ms | 2.8264ms | 6.6064ms |
|
||||
| Model | Image size | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
|
||||
| --------------- | ---------- | ---------------------------------------- | ---------------------------------------- | ------------------------------------ |
|
||||
| ppocr-v3-db-dyn | 640x640 | 1.8585 | 2.5739 | 4.3314 |
|
||||
| ppocr-v4-db-dyn | 640x640 | 2.0507 | 2.8264 | 6.6064 |
|
||||
|
||||
***Test on RTX3060***
|
||||
|
||||
|
Before Width: | Height: | Size: 68 KiB After Width: | Height: | Size: 165 KiB |
@ -1,25 +1,33 @@
|
||||
use usls::{models::DB, DataLoader, Options};
|
||||
use usls::{models::DB, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
let options = Options::default()
|
||||
.with_model("../models/ppocr-v4-db-dyn.onnx")
|
||||
.with_i00((1, 1, 4).into())
|
||||
.with_i02((608, 640, 960).into())
|
||||
.with_i03((608, 640, 960).into())
|
||||
.with_confs(&[0.7])
|
||||
.with_saveout("DB-Text-Detection")
|
||||
.with_dry_run(5)
|
||||
.with_i00((1, 4, 8).into())
|
||||
.with_i02((608, 960, 1280).into())
|
||||
.with_i03((608, 960, 1280).into())
|
||||
.with_confs(&[0.4])
|
||||
.with_min_width(5.0)
|
||||
.with_min_height(12.0)
|
||||
// .with_trt(0)
|
||||
// .with_fp16(true)
|
||||
.with_profile(true);
|
||||
.with_model("../models/ppocr-v4-db-dyn.onnx");
|
||||
|
||||
let mut model = DB::new(&options)?;
|
||||
|
||||
// load image
|
||||
let x = DataLoader::try_read("./assets/math.jpg")?;
|
||||
let x = vec![DataLoader::try_read("./assets/db.png")?];
|
||||
|
||||
// run
|
||||
let _y = model.run(&[x])?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default()
|
||||
.with_polygon_color([255u8, 0u8, 0u8])
|
||||
.without_name(true)
|
||||
.without_polygons(false)
|
||||
.without_bboxes(false)
|
||||
.with_saveout("DB-Text-Detection");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use usls::{models::Dinov2, Metric, Options};
|
||||
use usls::{models::Dinov2, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
@ -8,31 +8,32 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
.with_i00((1, 1, 1).into())
|
||||
.with_i02((224, 224, 224).into())
|
||||
.with_i03((224, 224, 224).into());
|
||||
let mut model = Dinov2::new(&options)?;
|
||||
let _model = Dinov2::new(&options)?;
|
||||
println!("TODO...");
|
||||
|
||||
// query from vector
|
||||
let ys = model.query_from_vec(
|
||||
"./assets/bus.jpg",
|
||||
&[
|
||||
"./examples/dinov2/images/bus.jpg",
|
||||
"./examples/dinov2/images/1.jpg",
|
||||
"./examples/dinov2/images/2.jpg",
|
||||
],
|
||||
Metric::L2,
|
||||
)?;
|
||||
// let ys = model.query_from_vec(
|
||||
// "./assets/bus.jpg",
|
||||
// &[
|
||||
// "./examples/dinov2/images/bus.jpg",
|
||||
// "./examples/dinov2/images/1.jpg",
|
||||
// "./examples/dinov2/images/2.jpg",
|
||||
// ],
|
||||
// Metric::L2,
|
||||
// )?;
|
||||
|
||||
// or query from folder
|
||||
// let ys = model.query_from_folder("./assets/bus.jpg", "./examples/dinov2/images", Metric::IP)?;
|
||||
|
||||
// results
|
||||
for (i, y) in ys.iter().enumerate() {
|
||||
println!(
|
||||
"Top-{:<3}{:.7} {}",
|
||||
i + 1,
|
||||
y.1,
|
||||
y.2.canonicalize()?.display()
|
||||
);
|
||||
}
|
||||
// for (i, y) in ys.iter().enumerate() {
|
||||
// println!(
|
||||
// "Top-{:<3}{:.7} {}",
|
||||
// i + 1,
|
||||
// y.1,
|
||||
// y.2.canonicalize()?.display()
|
||||
// );
|
||||
// }
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use usls::{models::YOLO, DataLoader, Options};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
@ -7,16 +7,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
.with_i00((1, 1, 4).into())
|
||||
.with_i02((416, 640, 800).into())
|
||||
.with_i03((416, 640, 800).into())
|
||||
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
|
||||
.with_saveout("FastSAM")
|
||||
.with_profile(false);
|
||||
.with_confs(&[0.4]);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// build dataloader
|
||||
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
|
||||
// load image
|
||||
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
|
||||
|
||||
// run
|
||||
model.run(&dl.next().unwrap().0)?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("FastSAM");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,19 +1,22 @@
|
||||
use usls::{models::RTDETR, DataLoader, Options, COCO_NAMES_80};
|
||||
use usls::{models::RTDETR, Annotator, DataLoader, Options, COCO_NAMES_80};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
let options = Options::default()
|
||||
.with_model("../models/rtdetr-l-f16.onnx")
|
||||
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
|
||||
.with_names(&COCO_NAMES_80)
|
||||
.with_saveout("RT-DETR");
|
||||
.with_names(&COCO_NAMES_80);
|
||||
let mut model = RTDETR::new(&options)?;
|
||||
|
||||
// build dataloader
|
||||
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
|
||||
// load image
|
||||
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
|
||||
|
||||
// run
|
||||
model.run(&dl.next().unwrap().0)?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("RT-DETR");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
44
examples/svtr/README.md
Normal file
@ -0,0 +1,44 @@
|
||||
## Quick Start
|
||||
|
||||
```shell
|
||||
cargo run -r --example svtr
|
||||
```
|
||||
|
||||
## Or you can manully
|
||||
|
||||
### 1. Donwload ONNX Model
|
||||
|
||||
[ppocr-v4-server-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v4-server-svtr-ch-dyn.onnx)
|
||||
[ppocr-v4-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v4-svtr-ch-dyn.onnx)
|
||||
[ppocr-v3-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v3-svtr-ch-dyn.onnx)
|
||||
|
||||
### 2. Specify the ONNX model path in `main.rs`
|
||||
|
||||
```Rust
|
||||
let options = Options::default()
|
||||
.with_model("ONNX_PATH") // <= modify this
|
||||
```
|
||||
|
||||
### 3. Run
|
||||
|
||||
```bash
|
||||
cargo run -r --example svtr
|
||||
```
|
||||
|
||||
### Speed test
|
||||
|
||||
| Model | Width | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
|
||||
| --------------------------- | :---: | :--------------------------------------: | :--------------------------------------: | :----------------------------------: |
|
||||
| ppocr-v4-server-svtr-ch-dyn | 1500 | 4.2116 | 13.0013 | 20.8673 |
|
||||
| ppocr-v4-svtr-ch-dyn | 1500 | 2.0435 | 3.1959 | 10.1750 |
|
||||
| ppocr-v3-svtr-ch-dyn | 1500 | 1.8596 | 2.9401 | 6.8210 |
|
||||
|
||||
***Test on RTX3060***
|
||||
|
||||
## Results
|
||||
|
||||
```shell
|
||||
[Texts] from the background, but also separate text instances which
|
||||
[Texts] are closely jointed. Some examples are ilustrated in Fig.7.
|
||||
[Texts] 你有这么高速运转的机械进入中国,记住我给出的原理
|
||||
```
|
24
examples/svtr/main.rs
Normal file
@ -0,0 +1,24 @@
|
||||
use usls::{models::SVTR, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
let options = Options::default()
|
||||
.with_i00((1, 2, 8).into())
|
||||
.with_i03((320, 1500, 2200).into())
|
||||
.with_confs(&[0.6])
|
||||
.with_vocab("../ppocr_rec_vocab.txt")
|
||||
.with_model("../models/ppocr-v4-svtr-ch-dyn.onnx");
|
||||
let mut model = SVTR::new(&options)?;
|
||||
|
||||
// load image
|
||||
let xs = vec![
|
||||
DataLoader::try_read("./examples/svtr/text1.png")?,
|
||||
DataLoader::try_read("./examples/svtr/text2.png")?,
|
||||
DataLoader::try_read("./examples/svtr/text3.png")?,
|
||||
];
|
||||
|
||||
// run
|
||||
model.run(&xs)?;
|
||||
|
||||
Ok(())
|
||||
}
|
BIN
examples/svtr/text1.png
Normal file
After Width: | Height: | Size: 9.0 KiB |
BIN
examples/svtr/text2.png
Normal file
After Width: | Height: | Size: 17 KiB |
BIN
examples/svtr/text3.png
Normal file
After Width: | Height: | Size: 14 KiB |
@ -1,4 +1,4 @@
|
||||
use usls::{models::YOLO, DataLoader, Options};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
@ -8,15 +8,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
.with_i02((416, 640, 800).into())
|
||||
.with_i03((416, 640, 800).into())
|
||||
.with_confs(&[0.3]) // shoes: 0.2
|
||||
.with_saveout("YOLO-World")
|
||||
.with_profile(false);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// build dataloader
|
||||
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
|
||||
// load image
|
||||
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
|
||||
|
||||
// run
|
||||
model.run(&dl.next().unwrap().0)?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("YOLO-World");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use usls::{models::YOLO, DataLoader, Options};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
@ -7,16 +7,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
.with_i00((1, 1, 4).into())
|
||||
.with_i02((416, 640, 800).into())
|
||||
.with_i03((416, 640, 800).into())
|
||||
.with_confs(&[0.15])
|
||||
.with_saveout("YOLOv8-Face")
|
||||
.with_profile(false);
|
||||
.with_confs(&[0.15]);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// load image
|
||||
let x = DataLoader::try_read("./assets/kids.jpg")?;
|
||||
let x = vec![DataLoader::try_read("./assets/kids.jpg")?];
|
||||
|
||||
// run
|
||||
let _y = model.run(&[x])?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("YOLOv8-Face");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,19 +1,21 @@
|
||||
use usls::{models::YOLO, DataLoader, Options};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
let options = Options::default()
|
||||
.with_model("../models/yolov8-falldown-f16.onnx")
|
||||
.with_confs(&[0.3])
|
||||
.with_saveout("YOLOv8-Falldown")
|
||||
.with_profile(false);
|
||||
.with_confs(&[0.3]);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// build dataloader
|
||||
let mut dl = DataLoader::default().load("./assets/falldown.jpg")?;
|
||||
// load image
|
||||
let x = vec![DataLoader::try_read("./assets/falldown.jpg")?];
|
||||
|
||||
// run
|
||||
model.run(&dl.next().unwrap().0)?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("YOLOv8-Falldown");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,19 +1,21 @@
|
||||
use usls::{models::YOLO, DataLoader, Options};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
let options = Options::default()
|
||||
.with_model("../models/yolov8-head-f16.onnx")
|
||||
.with_confs(&[0.3])
|
||||
.with_saveout("YOLOv8-Head")
|
||||
.with_profile(false);
|
||||
.with_confs(&[0.3]);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// build dataloader
|
||||
let mut dl = DataLoader::default().load("./assets/kids.jpg")?;
|
||||
// load image
|
||||
let x = vec![DataLoader::try_read("./assets/kids.jpg")?];
|
||||
|
||||
// run
|
||||
model.run(&dl.next().unwrap().0)?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("YOLOv8-Head");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,20 +1,22 @@
|
||||
use usls::{models::YOLO, DataLoader, Options};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// 1.build model
|
||||
let options = Options::default()
|
||||
.with_model("../models/yolov8-plastic-bag-f16.onnx")
|
||||
.with_confs(&[0.3])
|
||||
.with_saveout("YOLOv8-Trash")
|
||||
.with_names(&["trash"])
|
||||
.with_profile(false);
|
||||
.with_names(&["trash"]);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// build dataloader
|
||||
let mut dl = DataLoader::default().load("./assets/trash.jpg")?;
|
||||
// load image
|
||||
let x = vec![DataLoader::try_read("./assets/trash.jpg")?];
|
||||
|
||||
// run
|
||||
model.run(&dl.next().unwrap().0)?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("YOLOv8-Trash");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Before Width: | Height: | Size: 219 KiB After Width: | Height: | Size: 221 KiB |
Before Width: | Height: | Size: 233 KiB After Width: | Height: | Size: 237 KiB |
@ -1,27 +1,40 @@
|
||||
use usls::{models::YOLO, DataLoader, Options, COCO_SKELETON_17};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options, COCO_SKELETON_17};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// 1.build model
|
||||
// build model
|
||||
let options = Options::default()
|
||||
.with_model("../models/yolov8m-dyn-f16.onnx")
|
||||
// .with_model("../models/yolov8m-seg-dyn-f16.onnx")
|
||||
.with_model("../models/yolov8m-cls.onnx")
|
||||
// .with_trt(0) // cuda by default
|
||||
// .with_fp16(true)
|
||||
.with_i00((1, 1, 4).into())
|
||||
.with_i02((416, 640, 800).into())
|
||||
.with_i03((416, 640, 800).into())
|
||||
.with_i02((224, 224, 800).into())
|
||||
.with_i03((224, 224, 800).into())
|
||||
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
|
||||
.with_profile(false)
|
||||
.with_dry_run(3)
|
||||
.with_skeletons(&COCO_SKELETON_17)
|
||||
.with_saveout("YOLOv8");
|
||||
.with_dry_run(3);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// 2.build dataloader
|
||||
let dl = DataLoader::default().with_batch(1).load("./assets")?;
|
||||
// build dataloader
|
||||
let dl = DataLoader::default()
|
||||
.with_batch(1)
|
||||
.load("./assets/bus.jpg")?;
|
||||
|
||||
// 3.run
|
||||
// build annotate
|
||||
let annotator = Annotator::default()
|
||||
.with_skeletons(&COCO_SKELETON_17)
|
||||
.without_conf(false)
|
||||
.without_name(false)
|
||||
.without_masks(false)
|
||||
.without_polygons(false)
|
||||
.without_bboxes(false)
|
||||
.with_saveout("YOLOv8");
|
||||
|
||||
// run & annotate
|
||||
for (xs, _paths) in dl {
|
||||
let _y = model.run(&xs)?;
|
||||
let ys = model.run(&xs)?;
|
||||
annotator.annotate(&xs, &ys);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use usls::{models::YOLO, DataLoader, Options};
|
||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
@ -8,15 +8,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
.with_i02((416, 640, 800).into())
|
||||
.with_i03((416, 640, 800).into())
|
||||
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
|
||||
.with_saveout("YOLOv9")
|
||||
.with_profile(false);
|
||||
let mut model = YOLO::new(&options)?;
|
||||
|
||||
// build dataloader
|
||||
let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
|
||||
// load image
|
||||
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
|
||||
|
||||
// run
|
||||
model.run(&dl.next().unwrap().0)?;
|
||||
let y = model.run(&x)?;
|
||||
|
||||
// annotate
|
||||
let annotator = Annotator::default().with_saveout("YOLOv9");
|
||||
annotator.annotate(&x, &y);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
415
src/annotator.rs
@ -1,26 +1,83 @@
|
||||
use crate::{
|
||||
auto_load, string_now, Bbox, Embedding, Keypoint, Polygon, Ys, CHECK_MARK, CROSS_MARK,
|
||||
};
|
||||
use ab_glyph::{FontVec, PxScale};
|
||||
use anyhow::Result;
|
||||
use image::{ImageBuffer, RgbImage};
|
||||
|
||||
use crate::{auto_load, string_now, Results, CHECK_MARK, CROSS_MARK};
|
||||
use image::{DynamicImage, GrayImage, ImageBuffer, Rgb, RgbImage};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Annotator {
|
||||
font: rusttype::Font<'static>,
|
||||
font: ab_glyph::FontVec,
|
||||
scale_: f32, // Cope with ab_glyph & imageproc=0.24.0
|
||||
skeletons: Option<Vec<(usize, usize)>>,
|
||||
hide_conf: bool,
|
||||
polygon_color: Rgb<u8>,
|
||||
saveout: Option<String>,
|
||||
without_conf: bool,
|
||||
without_name: bool,
|
||||
without_bboxes: bool,
|
||||
without_masks: bool,
|
||||
without_polygons: bool,
|
||||
without_keypoints: bool,
|
||||
}
|
||||
|
||||
impl Default for Annotator {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
font: Self::load_font(None).unwrap(),
|
||||
scale_: 6.666667,
|
||||
skeletons: None,
|
||||
hide_conf: false,
|
||||
polygon_color: Rgb([255, 255, 255]),
|
||||
saveout: None,
|
||||
without_conf: false,
|
||||
without_name: false,
|
||||
without_bboxes: false,
|
||||
without_masks: false,
|
||||
without_polygons: false,
|
||||
without_keypoints: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Annotator {
|
||||
pub fn without_conf(mut self, x: bool) -> Self {
|
||||
self.without_conf = x;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn without_name(mut self, x: bool) -> Self {
|
||||
self.without_name = x;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn without_bboxes(mut self, x: bool) -> Self {
|
||||
self.without_bboxes = x;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn without_masks(mut self, x: bool) -> Self {
|
||||
self.without_masks = x;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn without_polygons(mut self, x: bool) -> Self {
|
||||
self.without_polygons = x;
|
||||
self
|
||||
}
|
||||
pub fn without_keypoints(mut self, x: bool) -> Self {
|
||||
self.without_keypoints = x;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_saveout(mut self, saveout: &str) -> Self {
|
||||
self.saveout = Some(saveout.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_polygon_color(mut self, rgb: [u8; 3]) -> Self {
|
||||
self.polygon_color = Rgb(rgb);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self {
|
||||
self.skeletons = Some(skeletons.to_vec());
|
||||
self
|
||||
@ -44,163 +101,217 @@ impl Annotator {
|
||||
}
|
||||
}
|
||||
|
||||
fn load_font(path: Option<&str>) -> Result<rusttype::Font<'static>> {
|
||||
pub fn annotate(&self, imgs: &[DynamicImage], ys: &[Ys]) {
|
||||
for (img, y) in imgs.iter().zip(ys.iter()) {
|
||||
let mut img_rgb = img.to_rgb8();
|
||||
|
||||
// masks
|
||||
if !self.without_masks {
|
||||
if let Some(masks) = &y.masks {
|
||||
self.plot_masks(&mut img_rgb, masks)
|
||||
}
|
||||
}
|
||||
|
||||
// polygons
|
||||
if !self.without_polygons {
|
||||
if let Some(polygons) = &y.polygons {
|
||||
self.plot_polygons(&mut img_rgb, polygons)
|
||||
}
|
||||
}
|
||||
|
||||
// bboxes
|
||||
if !self.without_bboxes {
|
||||
if let Some(bboxes) = &y.bboxes {
|
||||
self.plot_bboxes(&mut img_rgb, bboxes)
|
||||
}
|
||||
}
|
||||
|
||||
// keypoints
|
||||
if !self.without_keypoints {
|
||||
if let Some(keypoints) = &y.keypoints {
|
||||
self.plot_keypoints(&mut img_rgb, keypoints)
|
||||
}
|
||||
}
|
||||
|
||||
// probs
|
||||
if let Some(probs) = &y.probs {
|
||||
self.plot_probs(&mut img_rgb, probs)
|
||||
}
|
||||
|
||||
if let Some(saveout) = &self.saveout {
|
||||
self.save(&img_rgb, saveout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn plot_masks(&self, img: &mut RgbImage, masks: &[Vec<u8>]) {
|
||||
for mask in masks.iter() {
|
||||
let mask_nd: GrayImage =
|
||||
ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec())
|
||||
.expect("can not crate image from ndarray");
|
||||
for _x in 0..img.width() {
|
||||
for _y in 0..img.height() {
|
||||
let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y);
|
||||
if mask_p.0[0] > 0 {
|
||||
let mut img_p = imageproc::drawing::Canvas::get_pixel(img, _x, _y);
|
||||
img_p.0[0] /= 2;
|
||||
img_p.0[1] = 255 - (255 - img_p.0[1]) / 3;
|
||||
img_p.0[2] /= 2;
|
||||
imageproc::drawing::Canvas::draw_pixel(img, _x, _y, img_p)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn plot_bboxes(&self, img: &mut RgbImage, bboxes: &[Bbox]) {
|
||||
for bbox in bboxes.iter() {
|
||||
imageproc::drawing::draw_hollow_rect_mut(
|
||||
img,
|
||||
imageproc::rect::Rect::at(bbox.xmin().round() as i32, bbox.ymin().round() as i32)
|
||||
.of_size(bbox.width().round() as u32, bbox.height().round() as u32),
|
||||
image::Rgb(self.get_color(bbox.id()).into()),
|
||||
);
|
||||
let mut legend = String::new();
|
||||
if !self.without_name {
|
||||
legend.push_str(&bbox.name().unwrap_or(&bbox.id().to_string()).to_string());
|
||||
}
|
||||
if !self.without_conf {
|
||||
if !self.without_name {
|
||||
legend.push_str(&format!(": {:.4}", bbox.confidence()));
|
||||
} else {
|
||||
legend.push_str(&format!("{:.4}", bbox.confidence()));
|
||||
}
|
||||
}
|
||||
let scale_dy = img.width().max(img.height()) as f32 / 40.0;
|
||||
let scale = PxScale::from(scale_dy);
|
||||
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend); // u32
|
||||
let text_h = text_h + text_h / 3;
|
||||
let top = if bbox.ymin() > text_h as f32 {
|
||||
(bbox.ymin().round() as u32 - text_h) as i32
|
||||
} else {
|
||||
(text_h - bbox.ymin().round() as u32) as i32
|
||||
};
|
||||
|
||||
// text
|
||||
if !legend.is_empty() {
|
||||
imageproc::drawing::draw_filled_rect_mut(
|
||||
img,
|
||||
imageproc::rect::Rect::at(bbox.xmin() as i32, top).of_size(text_w, text_h),
|
||||
image::Rgb(self.get_color(bbox.id()).into()),
|
||||
);
|
||||
imageproc::drawing::draw_text_mut(
|
||||
img,
|
||||
image::Rgb([0, 0, 0]),
|
||||
bbox.xmin() as i32,
|
||||
top - (scale_dy / self.scale_).floor() as i32 + 2,
|
||||
scale,
|
||||
&self.font,
|
||||
&legend,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn plot_polygons(&self, img: &mut RgbImage, polygons: &[Polygon]) {
|
||||
for polygon in polygons.iter() {
|
||||
// option: draw polygon
|
||||
let polygon = polygon
|
||||
.points
|
||||
.iter()
|
||||
.map(|p| imageproc::point::Point::new(p.x, p.y))
|
||||
.collect::<Vec<_>>();
|
||||
imageproc::drawing::draw_hollow_polygon_mut(img, &polygon, self.polygon_color);
|
||||
|
||||
// option: draw circle
|
||||
// polygon.points.iter().for_each(|point| {
|
||||
// imageproc::drawing::draw_filled_circle_mut(
|
||||
// img,
|
||||
// (point.x as i32, point.y as i32),
|
||||
// 1,
|
||||
// // image::Rgb([255, 255, 255]),
|
||||
// self.polygon_color,
|
||||
// );
|
||||
// });
|
||||
}
|
||||
}
|
||||
|
||||
pub fn plot_probs(&self, img: &mut RgbImage, probs: &Embedding) {
|
||||
let topk = 5usize;
|
||||
let (x, mut y) = (img.width() as i32 / 20, img.height() as i32 / 20);
|
||||
for k in probs.topk(topk).iter() {
|
||||
let legend = format!("{}: {:.4}", k.2.as_ref().unwrap_or(&k.0.to_string()), k.1);
|
||||
let scale_dy = img.width().max(img.height()) as f32 / 30.0;
|
||||
let scale = PxScale::from(scale_dy);
|
||||
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
|
||||
let text_h = text_h + text_h / 3;
|
||||
y += text_h as i32;
|
||||
imageproc::drawing::draw_filled_rect_mut(
|
||||
img,
|
||||
imageproc::rect::Rect::at(x, y).of_size(text_w, text_h),
|
||||
image::Rgb(self.get_color(k.0).into()),
|
||||
);
|
||||
imageproc::drawing::draw_text_mut(
|
||||
img,
|
||||
image::Rgb((0, 0, 0).into()),
|
||||
x,
|
||||
y - (scale_dy / self.scale_).floor() as i32 + 2,
|
||||
scale,
|
||||
&self.font,
|
||||
&legend,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn plot_keypoints(&self, img: &mut RgbImage, keypoints: &[Vec<Keypoint>]) {
|
||||
let radius = 3;
|
||||
for kpts in keypoints.iter() {
|
||||
for (i, kpt) in kpts.iter().enumerate() {
|
||||
if kpt.confidence() == 0.0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// draw point
|
||||
imageproc::drawing::draw_filled_circle_mut(
|
||||
img,
|
||||
(kpt.x() as i32, kpt.y() as i32),
|
||||
radius,
|
||||
image::Rgb(self.get_color(i + 10).into()),
|
||||
);
|
||||
}
|
||||
|
||||
// draw skeleton
|
||||
if let Some(skeletons) = &self.skeletons {
|
||||
for &(i, ii) in skeletons.iter() {
|
||||
let kpt1 = &kpts[i];
|
||||
let kpt2 = &kpts[ii];
|
||||
if kpt1.confidence() == 0.0 || kpt2.confidence() == 0.0 {
|
||||
continue;
|
||||
}
|
||||
imageproc::drawing::draw_line_segment_mut(
|
||||
img,
|
||||
(kpt1.x(), kpt1.y()),
|
||||
(kpt2.x(), kpt2.y()),
|
||||
image::Rgb([255, 51, 255]),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_font(path: Option<&str>) -> Result<FontVec> {
|
||||
let path_font = match path {
|
||||
None => auto_load("Arial.ttf")?,
|
||||
Some(p) => p.into(),
|
||||
};
|
||||
let buffer = std::fs::read(path_font)?;
|
||||
Ok(rusttype::Font::try_from_vec(buffer).unwrap())
|
||||
Ok(FontVec::try_from_vec(buffer.to_owned()).unwrap())
|
||||
}
|
||||
|
||||
pub fn get_color(&self, n: usize) -> (u8, u8, u8) {
|
||||
Self::color_palette()[n % Self::color_palette().len()]
|
||||
}
|
||||
|
||||
pub fn plot(&self, img: &mut RgbImage, y: &Results) {
|
||||
// masks and polygons
|
||||
if let Some(masks) = y.masks() {
|
||||
for mask in masks.iter() {
|
||||
let mask_nd: ImageBuffer<image::Luma<_>, Vec<u8>> =
|
||||
ImageBuffer::from_vec(img.width(), img.height(), mask.to_vec())
|
||||
.expect("can not crate image from ndarray");
|
||||
// masks
|
||||
for _x in 0..img.width() {
|
||||
for _y in 0..img.height() {
|
||||
let mask_p = imageproc::drawing::Canvas::get_pixel(&mask_nd, _x, _y);
|
||||
if mask_p.0[0] > 0 {
|
||||
let mut img_p = imageproc::drawing::Canvas::get_pixel(img, _x, _y);
|
||||
img_p.0[0] /= 2;
|
||||
img_p.0[1] = 255 - (255 - img_p.0[1]) / 3;
|
||||
img_p.0[2] /= 2;
|
||||
imageproc::drawing::Canvas::draw_pixel(img, _x, _y, img_p)
|
||||
}
|
||||
}
|
||||
}
|
||||
// contours
|
||||
let contours: Vec<imageproc::contours::Contour<i32>> =
|
||||
imageproc::contours::find_contours(&mask_nd);
|
||||
for contour in contours.iter() {
|
||||
for point in contour.points.iter() {
|
||||
imageproc::drawing::draw_filled_circle_mut(
|
||||
img,
|
||||
(point.x, point.y),
|
||||
1,
|
||||
image::Rgb([255, 255, 255]),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// probs
|
||||
if let Some(probs) = y.probs() {
|
||||
let topk = 5usize;
|
||||
let (x, mut y) = (img.width() as i32 / 20, img.height() as i32 / 20);
|
||||
for k in probs.topk(topk).iter() {
|
||||
let legend = format!("{}: {:.2}", k.2.as_ref().unwrap_or(&k.0.to_string()), k.1);
|
||||
let scale = img.width().max(img.height()) as f32 / 30.0;
|
||||
let scale = rusttype::Scale::uniform(scale);
|
||||
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
|
||||
y += text_h;
|
||||
imageproc::drawing::draw_filled_rect_mut(
|
||||
img,
|
||||
imageproc::rect::Rect::at(x, y).of_size(text_w as u32, text_h as u32),
|
||||
image::Rgb(self.get_color(k.0).into()),
|
||||
);
|
||||
imageproc::drawing::draw_text_mut(
|
||||
img,
|
||||
image::Rgb((0, 0, 0).into()),
|
||||
x,
|
||||
y,
|
||||
scale,
|
||||
&self.font,
|
||||
&legend,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// bboxes
|
||||
if let Some(bboxes) = y.bboxes() {
|
||||
for bbox in bboxes.iter() {
|
||||
imageproc::drawing::draw_hollow_rect_mut(
|
||||
img,
|
||||
imageproc::rect::Rect::at(bbox.xmin() as i32, bbox.ymin() as i32)
|
||||
.of_size(bbox.width() as u32, bbox.height() as u32),
|
||||
image::Rgb(self.get_color(bbox.id()).into()),
|
||||
);
|
||||
let legend = if self.hide_conf {
|
||||
bbox.name().unwrap_or(&bbox.id().to_string()).to_string()
|
||||
} else {
|
||||
format!(
|
||||
"{}: {:.4}",
|
||||
bbox.name().unwrap_or(&bbox.id().to_string()),
|
||||
bbox.confidence()
|
||||
)
|
||||
};
|
||||
let scale = img.width().max(img.height()) as f32 / 45.0;
|
||||
let scale = rusttype::Scale::uniform(scale);
|
||||
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
|
||||
let text_y = if bbox.ymin() as i32 > text_h {
|
||||
bbox.ymin() as i32 - text_h
|
||||
} else {
|
||||
text_h - bbox.ymin() as i32
|
||||
};
|
||||
imageproc::drawing::draw_filled_rect_mut(
|
||||
img,
|
||||
imageproc::rect::Rect::at(bbox.xmin() as i32, text_y)
|
||||
.of_size(text_w as u32, text_h as u32),
|
||||
image::Rgb(self.get_color(bbox.id()).into()),
|
||||
);
|
||||
imageproc::drawing::draw_text_mut(
|
||||
img,
|
||||
image::Rgb((0, 0, 0).into()),
|
||||
bbox.xmin() as i32,
|
||||
text_y,
|
||||
scale,
|
||||
&self.font,
|
||||
&legend,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// keypoints
|
||||
if let Some(keypoints) = y.keypoints() {
|
||||
let radius = 3;
|
||||
for kpts in keypoints.iter() {
|
||||
for (i, kpt) in kpts.iter().enumerate() {
|
||||
if kpt.confidence() == 0.0 {
|
||||
continue;
|
||||
}
|
||||
// draw point
|
||||
imageproc::drawing::draw_filled_circle_mut(
|
||||
img,
|
||||
(kpt.x() as i32, kpt.y() as i32),
|
||||
radius,
|
||||
image::Rgb(self.get_color(i + 10).into()),
|
||||
);
|
||||
}
|
||||
|
||||
// draw skeleton
|
||||
if let Some(skeletons) = &self.skeletons {
|
||||
for &(i, ii) in skeletons.iter() {
|
||||
let kpt1 = &kpts[i];
|
||||
let kpt2 = &kpts[ii];
|
||||
if kpt1.confidence() == 0.0 || kpt2.confidence() == 0.0 {
|
||||
continue;
|
||||
}
|
||||
imageproc::drawing::draw_line_segment_mut(
|
||||
img,
|
||||
(kpt1.x(), kpt1.y()),
|
||||
(kpt2.x(), kpt2.y()),
|
||||
image::Rgb([255, 51, 255]),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn color_palette() -> Vec<(u8, u8, u8)> {
|
||||
vec![
|
||||
(0, 255, 0),
|
||||
|
@ -209,7 +209,6 @@ impl OrtEngine {
|
||||
.with_int8(int8_enable)
|
||||
.with_fp16(fp16_enable)
|
||||
.with_engine_cache(engine_cache_enable)
|
||||
// .with_engine_cache_path(config_dir().to_str().unwrap())
|
||||
.with_engine_cache_path(format!(
|
||||
"{}/{}",
|
||||
config_dir().to_str().unwrap(),
|
||||
@ -221,7 +220,9 @@ impl OrtEngine {
|
||||
.with_profile_max_shapes(spec_max)
|
||||
.build();
|
||||
if trt.is_available()? && trt.register(builder).is_ok() {
|
||||
println!("{CHECK_MARK} Using TensorRT");
|
||||
println!(
|
||||
"{CHECK_MARK} Using TensorRT (Initial model serialization may require a wait)"
|
||||
);
|
||||
Ok(trt)
|
||||
} else {
|
||||
println!("{CROSS_MARK} TensorRT initialization failed. Try CUDA...");
|
||||
@ -343,7 +344,7 @@ impl OrtEngine {
|
||||
}
|
||||
|
||||
pub fn width(&self) -> &MinOptMax {
|
||||
&self.inputs_minoptmax[0][2]
|
||||
&self.inputs_minoptmax[0][3]
|
||||
}
|
||||
|
||||
pub fn is_batch_dyn(&self) -> bool {
|
||||
|
11
src/lib.rs
@ -6,6 +6,7 @@ mod dynconf;
|
||||
mod embedding;
|
||||
mod engine;
|
||||
mod keypoint;
|
||||
mod logits_sampler;
|
||||
mod metric;
|
||||
mod min_opt_max;
|
||||
pub mod models;
|
||||
@ -14,10 +15,10 @@ mod options;
|
||||
mod point;
|
||||
mod polygon;
|
||||
mod rect;
|
||||
mod results;
|
||||
mod rotated_rect;
|
||||
mod tokenizer_stream;
|
||||
mod utils;
|
||||
mod ys;
|
||||
|
||||
pub use annotator::Annotator;
|
||||
pub use bbox::Bbox;
|
||||
@ -27,19 +28,17 @@ pub use dynconf::DynConf;
|
||||
pub use embedding::Embedding;
|
||||
pub use engine::OrtEngine;
|
||||
pub use keypoint::Keypoint;
|
||||
pub use logits_sampler::LogitsSampler;
|
||||
pub use metric::Metric;
|
||||
pub use min_opt_max::MinOptMax;
|
||||
pub use options::Options;
|
||||
pub use point::Point;
|
||||
pub use polygon::Polygon;
|
||||
pub use rect::Rect;
|
||||
pub use results::Results;
|
||||
pub use rotated_rect::RotatedRect;
|
||||
pub use tokenizer_stream::TokenizerStream;
|
||||
pub use utils::{
|
||||
auto_load, config_dir, download, non_max_suppression, string_now, COCO_NAMES_80,
|
||||
COCO_SKELETON_17,
|
||||
};
|
||||
pub use utils::{auto_load, config_dir, download, string_now, COCO_NAMES_80, COCO_SKELETON_17};
|
||||
pub use ys::Ys;
|
||||
|
||||
const GITHUB_ASSETS: &str = "https://github.com/jamjamjon/assets/releases/download/v0.0.1";
|
||||
const CHECK_MARK: &str = "✅";
|
||||
|
94
src/logits_sampler.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use anyhow::Result;
|
||||
use rand::distributions::{Distribution, WeightedIndex};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LogitsSampler {
|
||||
temperature: f32,
|
||||
p: f32,
|
||||
}
|
||||
|
||||
impl Default for LogitsSampler {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
temperature: 1.0,
|
||||
p: 0.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LogitsSampler {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn with_topp(mut self, p: f32) -> Self {
|
||||
self.p = p.max(0.0).min(1.0);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_temperature(mut self, temperature: f32) -> Self {
|
||||
self.temperature = temperature.max(1e-7);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn decode(&mut self, logits: &[f32]) -> Result<u32> {
|
||||
if self.p == 0.0 {
|
||||
self.search_by_argmax(logits)
|
||||
} else {
|
||||
self.sample_by_topp(logits)
|
||||
}
|
||||
}
|
||||
|
||||
fn search_by_argmax(&mut self, logits: &[f32]) -> Result<u32> {
|
||||
// no need to do softmax
|
||||
let (token_id, _) = logits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.reduce(|max, x| if x.1 > max.1 { x } else { max })
|
||||
.unwrap();
|
||||
Ok(token_id as u32)
|
||||
}
|
||||
|
||||
fn sample_by_topp(&mut self, logits: &[f32]) -> Result<u32> {
|
||||
let logits = self.softmax(logits);
|
||||
let mut logits: Vec<(usize, f32)> = logits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &prob)| (i, prob))
|
||||
.collect();
|
||||
logits.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
|
||||
// candidates
|
||||
let mut candidates: Vec<(usize, f32)> = Vec::new();
|
||||
let mut acc_prob: f32 = 0.0;
|
||||
for (idx, prob) in logits.iter() {
|
||||
acc_prob += prob;
|
||||
if acc_prob >= self.p {
|
||||
if candidates.is_empty() {
|
||||
candidates.push((*idx, acc_prob));
|
||||
}
|
||||
break;
|
||||
}
|
||||
candidates.push((*idx, acc_prob));
|
||||
}
|
||||
|
||||
// sample
|
||||
let choices: Vec<usize> = candidates.iter().map(|&(idx, _)| idx).collect();
|
||||
let probs: Vec<f32> = candidates.iter().map(|&(_, prob)| prob).collect();
|
||||
let dist = WeightedIndex::new(probs)?;
|
||||
let mut rng = rand::thread_rng();
|
||||
let token_id = choices[dist.sample(&mut rng)];
|
||||
Ok(token_id as u32)
|
||||
}
|
||||
|
||||
fn softmax(&self, logits: &[f32]) -> Vec<f32> {
|
||||
let logits_t = logits
|
||||
.iter()
|
||||
.map(|&x| x / self.temperature)
|
||||
.collect::<Vec<f32>>();
|
||||
let max_logit = logits_t.iter().fold(f32::MIN, |a, &b| a.max(b));
|
||||
let exps: Vec<f32> = logits_t.iter().map(|&x| (x - max_logit).exp()).collect();
|
||||
let sum_exps: f32 = exps.iter().sum();
|
||||
exps.iter().map(|&exp| exp / sum_exps).collect()
|
||||
}
|
||||
}
|
@ -1,135 +1,132 @@
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{s, Array, Axis, IxDyn};
|
||||
use std::io::Write;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine, TokenizerStream};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Blip {
|
||||
pub textual: OrtEngine,
|
||||
pub visual: OrtEngine,
|
||||
pub height: MinOptMax,
|
||||
pub width: MinOptMax,
|
||||
pub batch_visual: MinOptMax,
|
||||
pub batch_textual: MinOptMax,
|
||||
tokenizer: TokenizerStream,
|
||||
}
|
||||
|
||||
impl Blip {
|
||||
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
|
||||
let visual = OrtEngine::new(&options_visual)?;
|
||||
let textual = OrtEngine::new(&options_textual)?;
|
||||
let (batch_visual, batch_textual, height, width) = (
|
||||
visual.batch().to_owned(),
|
||||
textual.batch().to_owned(),
|
||||
visual.height().to_owned(),
|
||||
visual.width().to_owned(),
|
||||
);
|
||||
let tokenizer = match &options_textual.tokenizer {
|
||||
None => auto_load("tokenizer-blip.json")?,
|
||||
Some(tokenizer) => tokenizer.into(),
|
||||
};
|
||||
let tokenizer = Tokenizer::from_file(tokenizer).unwrap();
|
||||
let tokenizer = TokenizerStream::new(tokenizer);
|
||||
visual.dry_run()?;
|
||||
textual.dry_run()?;
|
||||
Ok(Self {
|
||||
textual,
|
||||
visual,
|
||||
batch_visual,
|
||||
batch_textual,
|
||||
height,
|
||||
width,
|
||||
tokenizer,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
|
||||
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||
let ys = ys[0].to_owned();
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn caption(&mut self, path: &str, prompt: Option<&str>) -> Result<()> {
|
||||
// this demo use batch_size=1
|
||||
let x = image::io::Reader::open(path)?.decode()?;
|
||||
let image_embeds = self.encode_images(&[x])?;
|
||||
let image_embeds_attn_mask: Array<f32, IxDyn> =
|
||||
Array::ones((1, image_embeds.shape()[1])).into_dyn();
|
||||
|
||||
// conditional
|
||||
let mut input_ids = match prompt {
|
||||
None => {
|
||||
print!("[Unconditional image captioning]: ");
|
||||
vec![0.0f32]
|
||||
}
|
||||
|
||||
Some(prompt) => {
|
||||
let encodings = self.tokenizer.tokenizer().encode(prompt, false);
|
||||
let ids: Vec<f32> = encodings
|
||||
.unwrap()
|
||||
.get_ids()
|
||||
.iter()
|
||||
.map(|x| *x as f32)
|
||||
.collect();
|
||||
print!("[Conditional image captioning]: {} ", prompt);
|
||||
ids
|
||||
}
|
||||
};
|
||||
loop {
|
||||
let input_ids_nd: Array<f32, IxDyn> = Array::from_vec(input_ids.to_owned()).into_dyn();
|
||||
let input_ids_nd = input_ids_nd.insert_axis(Axis(0));
|
||||
let input_ids_attn_mask: Array<f32, IxDyn> =
|
||||
Array::ones(input_ids_nd.shape()).into_dyn();
|
||||
let y = self.textual.run(&[
|
||||
input_ids_nd,
|
||||
input_ids_attn_mask,
|
||||
image_embeds.to_owned(),
|
||||
image_embeds_attn_mask.to_owned(),
|
||||
])?; // N, length, vocab_size
|
||||
let y = y[0].to_owned();
|
||||
let y = y.slice(s!(0, -1.., ..));
|
||||
|
||||
// softmax
|
||||
let exps = y.mapv(|c| c.exp());
|
||||
let stds = exps.sum_axis(Axis(1));
|
||||
let probs = exps / stds.insert_axis(Axis(1));
|
||||
let probs = probs.slice(s!(0, ..));
|
||||
|
||||
// argmax
|
||||
let (token_id, _) = probs
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.reduce(|max, x| if x.1 > max.1 { x } else { max })
|
||||
.unwrap();
|
||||
input_ids.push(token_id as f32);
|
||||
|
||||
// SEP
|
||||
if token_id == 102 {
|
||||
break;
|
||||
}
|
||||
|
||||
// streaming generation
|
||||
if let Some(t) = self.tokenizer.next_token(token_id as u32)? {
|
||||
print!("{t}");
|
||||
std::io::stdout().flush()?;
|
||||
}
|
||||
// sleep for test
|
||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||
}
|
||||
println!();
|
||||
self.tokenizer.clear();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn batch_visual(&self) -> usize {
|
||||
self.batch_visual.opt as usize
|
||||
}
|
||||
|
||||
pub fn batch_textual(&self) -> usize {
|
||||
self.batch_textual.opt as usize
|
||||
}
|
||||
}
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{s, Array, Axis, IxDyn};
|
||||
use std::io::Write;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
use crate::{auto_load, ops, LogitsSampler, MinOptMax, Options, OrtEngine, TokenizerStream};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Blip {
|
||||
pub textual: OrtEngine,
|
||||
pub visual: OrtEngine,
|
||||
pub height: MinOptMax,
|
||||
pub width: MinOptMax,
|
||||
pub batch_visual: MinOptMax,
|
||||
pub batch_textual: MinOptMax,
|
||||
tokenizer: TokenizerStream,
|
||||
}
|
||||
|
||||
impl Blip {
|
||||
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
|
||||
let visual = OrtEngine::new(&options_visual)?;
|
||||
let textual = OrtEngine::new(&options_textual)?;
|
||||
let (batch_visual, batch_textual, height, width) = (
|
||||
visual.batch().to_owned(),
|
||||
textual.batch().to_owned(),
|
||||
visual.height().to_owned(),
|
||||
visual.width().to_owned(),
|
||||
);
|
||||
let tokenizer = match &options_textual.tokenizer {
|
||||
None => auto_load("tokenizer-blip.json")?,
|
||||
Some(tokenizer) => tokenizer.into(),
|
||||
};
|
||||
let tokenizer = Tokenizer::from_file(tokenizer).unwrap();
|
||||
let tokenizer = TokenizerStream::new(tokenizer);
|
||||
visual.dry_run()?;
|
||||
textual.dry_run()?;
|
||||
Ok(Self {
|
||||
textual,
|
||||
visual,
|
||||
batch_visual,
|
||||
batch_textual,
|
||||
height,
|
||||
width,
|
||||
tokenizer,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
|
||||
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let xs_ = ops::standardize(
|
||||
xs_,
|
||||
&[0.48145466, 0.4578275, 0.40821073],
|
||||
&[0.26862954, 0.2613026, 0.2757771],
|
||||
);
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||
let ys = ys[0].to_owned();
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn caption(&mut self, path: &str, prompt: Option<&str>) -> Result<()> {
|
||||
// this demo use batch_size=1
|
||||
let x = image::io::Reader::open(path)?.decode()?;
|
||||
let image_embeds = self.encode_images(&[x])?;
|
||||
let image_embeds_attn_mask: Array<f32, IxDyn> =
|
||||
Array::ones((1, image_embeds.shape()[1])).into_dyn();
|
||||
|
||||
// conditional
|
||||
let mut input_ids = match prompt {
|
||||
None => {
|
||||
print!("[Unconditional]: ");
|
||||
vec![0.0f32]
|
||||
}
|
||||
|
||||
Some(prompt) => {
|
||||
let encodings = self.tokenizer.tokenizer().encode(prompt, false);
|
||||
let ids: Vec<f32> = encodings
|
||||
.unwrap()
|
||||
.get_ids()
|
||||
.iter()
|
||||
.map(|x| *x as f32)
|
||||
.collect();
|
||||
print!("[Conditional]: {} ", prompt);
|
||||
ids
|
||||
}
|
||||
};
|
||||
|
||||
let mut logits_sampler = LogitsSampler::new();
|
||||
loop {
|
||||
let input_ids_nd: Array<f32, IxDyn> = Array::from_vec(input_ids.to_owned()).into_dyn();
|
||||
let input_ids_nd = input_ids_nd.insert_axis(Axis(0));
|
||||
let input_ids_attn_mask: Array<f32, IxDyn> =
|
||||
Array::ones(input_ids_nd.shape()).into_dyn();
|
||||
let y = self.textual.run(&[
|
||||
input_ids_nd,
|
||||
input_ids_attn_mask,
|
||||
image_embeds.to_owned(),
|
||||
image_embeds_attn_mask.to_owned(),
|
||||
])?; // N, length, vocab_size
|
||||
let y = y[0].slice(s!(0, -1.., ..));
|
||||
let logits = y.slice(s!(0, ..)).to_vec();
|
||||
let token_id = logits_sampler.decode(&logits)?;
|
||||
input_ids.push(token_id as f32);
|
||||
|
||||
// SEP
|
||||
if token_id == 102 {
|
||||
break;
|
||||
}
|
||||
|
||||
// streaming generation
|
||||
if let Some(t) = self.tokenizer.next_token(token_id as u32)? {
|
||||
print!("{t}");
|
||||
std::io::stdout().flush()?;
|
||||
}
|
||||
|
||||
// sleep for test
|
||||
std::thread::sleep(std::time::Duration::from_millis(5));
|
||||
}
|
||||
println!();
|
||||
self.tokenizer.clear();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn batch_visual(&self) -> usize {
|
||||
self.batch_visual.opt as usize
|
||||
}
|
||||
|
||||
pub fn batch_textual(&self) -> usize {
|
||||
self.batch_textual.opt as usize
|
||||
}
|
||||
}
|
||||
|
@ -1,105 +1,110 @@
|
||||
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use itertools::Itertools;
|
||||
use ndarray::{Array, Array2, Axis, IxDyn};
|
||||
use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Clip {
|
||||
pub textual: OrtEngine,
|
||||
pub visual: OrtEngine,
|
||||
pub height: MinOptMax,
|
||||
pub width: MinOptMax,
|
||||
pub batch_visual: MinOptMax,
|
||||
pub batch_textual: MinOptMax,
|
||||
tokenizer: Tokenizer,
|
||||
context_length: usize,
|
||||
}
|
||||
|
||||
impl Clip {
|
||||
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
|
||||
let context_length = 77;
|
||||
let visual = OrtEngine::new(&options_visual)?;
|
||||
let textual = OrtEngine::new(&options_textual)?;
|
||||
let (batch_visual, batch_textual, height, width) = (
|
||||
visual.inputs_minoptmax()[0][0].to_owned(),
|
||||
textual.inputs_minoptmax()[0][0].to_owned(),
|
||||
visual.inputs_minoptmax()[0][2].to_owned(),
|
||||
visual.inputs_minoptmax()[0][3].to_owned(),
|
||||
);
|
||||
let tokenizer = match &options_textual.tokenizer {
|
||||
None => auto_load("tokenizer-clip.json").unwrap(),
|
||||
Some(tokenizer) => tokenizer.into(),
|
||||
};
|
||||
let mut tokenizer = Tokenizer::from_file(tokenizer).unwrap();
|
||||
tokenizer.with_padding(Some(PaddingParams {
|
||||
strategy: PaddingStrategy::Fixed(context_length),
|
||||
direction: PaddingDirection::Right,
|
||||
pad_to_multiple_of: None,
|
||||
pad_id: 0,
|
||||
pad_type_id: 0,
|
||||
pad_token: "[PAD]".to_string(),
|
||||
}));
|
||||
|
||||
visual.dry_run()?;
|
||||
textual.dry_run()?;
|
||||
|
||||
Ok(Self {
|
||||
textual,
|
||||
visual,
|
||||
batch_visual,
|
||||
batch_textual,
|
||||
height,
|
||||
width,
|
||||
tokenizer,
|
||||
context_length,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
|
||||
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||
let ys = ys[0].to_owned();
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn encode_texts(&self, texts: &[String]) -> Result<Array<f32, IxDyn>> {
|
||||
let encodings = self
|
||||
.tokenizer
|
||||
.encode_batch(texts.to_owned(), false)
|
||||
.unwrap();
|
||||
let xs: Vec<f32> = encodings
|
||||
.iter()
|
||||
.map(|i| i.get_ids().iter().map(|b| *b as f32).collect())
|
||||
.concat();
|
||||
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
|
||||
let ys = self.textual.run(&[xs])?;
|
||||
let ys = ys[0].to_owned();
|
||||
// let ys = ops::norm(&ys);
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn get_similarity(
|
||||
&self,
|
||||
images_feats: &Array<f32, IxDyn>,
|
||||
texts_feats: &Array<f32, IxDyn>,
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
let images_feats = images_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
|
||||
let texts_feats = texts_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
|
||||
let matrix = images_feats.dot(&texts_feats.t()); // [M, N]
|
||||
let exps = matrix.mapv(|x| x.exp()); //[M, N]
|
||||
let stds = exps.sum_axis(Axis(1)); //[M, 1]
|
||||
let matrix = exps / stds.insert_axis(Axis(1)); // [M, N]
|
||||
let similarity: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
|
||||
Ok(similarity)
|
||||
}
|
||||
|
||||
pub fn batch_visual(&self) -> usize {
|
||||
self.batch_visual.opt as usize
|
||||
}
|
||||
|
||||
pub fn batch_textual(&self) -> usize {
|
||||
self.batch_textual.opt as usize
|
||||
}
|
||||
}
|
||||
use crate::{auto_load, ops, MinOptMax, Options, OrtEngine};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
// use itertools::Itertools;
|
||||
use ndarray::{Array, Array2, Axis, IxDyn};
|
||||
use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Clip {
|
||||
pub textual: OrtEngine,
|
||||
pub visual: OrtEngine,
|
||||
pub height: MinOptMax,
|
||||
pub width: MinOptMax,
|
||||
pub batch_visual: MinOptMax,
|
||||
pub batch_textual: MinOptMax,
|
||||
tokenizer: Tokenizer,
|
||||
context_length: usize,
|
||||
}
|
||||
|
||||
impl Clip {
|
||||
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
|
||||
let context_length = 77;
|
||||
let visual = OrtEngine::new(&options_visual)?;
|
||||
let textual = OrtEngine::new(&options_textual)?;
|
||||
let (batch_visual, batch_textual, height, width) = (
|
||||
visual.inputs_minoptmax()[0][0].to_owned(),
|
||||
textual.inputs_minoptmax()[0][0].to_owned(),
|
||||
visual.inputs_minoptmax()[0][2].to_owned(),
|
||||
visual.inputs_minoptmax()[0][3].to_owned(),
|
||||
);
|
||||
let tokenizer = match &options_textual.tokenizer {
|
||||
None => auto_load("tokenizer-clip.json").unwrap(),
|
||||
Some(tokenizer) => tokenizer.into(),
|
||||
};
|
||||
let mut tokenizer = Tokenizer::from_file(tokenizer).unwrap();
|
||||
tokenizer.with_padding(Some(PaddingParams {
|
||||
strategy: PaddingStrategy::Fixed(context_length),
|
||||
direction: PaddingDirection::Right,
|
||||
pad_to_multiple_of: None,
|
||||
pad_id: 0,
|
||||
pad_type_id: 0,
|
||||
pad_token: "[PAD]".to_string(),
|
||||
}));
|
||||
|
||||
visual.dry_run()?;
|
||||
textual.dry_run()?;
|
||||
|
||||
Ok(Self {
|
||||
textual,
|
||||
visual,
|
||||
batch_visual,
|
||||
batch_textual,
|
||||
height,
|
||||
width,
|
||||
tokenizer,
|
||||
context_length,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn encode_images(&self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
|
||||
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let xs_ = ops::standardize(
|
||||
xs_,
|
||||
&[0.48145466, 0.4578275, 0.40821073],
|
||||
&[0.26862954, 0.2613026, 0.2757771],
|
||||
);
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||
let ys = ys[0].to_owned();
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn encode_texts(&self, texts: &[String]) -> Result<Array<f32, IxDyn>> {
|
||||
let encodings = self
|
||||
.tokenizer
|
||||
.encode_batch(texts.to_owned(), false)
|
||||
.unwrap();
|
||||
let xs: Vec<f32> = encodings
|
||||
.iter()
|
||||
.flat_map(|i| i.get_ids().iter().map(|&b| b as f32))
|
||||
.collect();
|
||||
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
|
||||
let ys = self.textual.run(&[xs])?;
|
||||
let ys = ys[0].to_owned();
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn get_similarity(
|
||||
&self,
|
||||
images_feats: &Array<f32, IxDyn>,
|
||||
texts_feats: &Array<f32, IxDyn>,
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
let images_feats = images_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
|
||||
let texts_feats = texts_feats.clone().into_dimensionality::<ndarray::Ix2>()?;
|
||||
let matrix = images_feats.dot(&texts_feats.t()); // [M, N]
|
||||
let exps = matrix.mapv(|x| x.exp()); //[M, N]
|
||||
let stds = exps.sum_axis(Axis(1)); //[M, 1]
|
||||
let matrix = exps / stds.insert_axis(Axis(1)); // [M, N]
|
||||
let similarity: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
|
||||
Ok(similarity)
|
||||
}
|
||||
|
||||
pub fn batch_visual(&self) -> usize {
|
||||
self.batch_visual.opt as usize
|
||||
}
|
||||
|
||||
pub fn batch_textual(&self) -> usize {
|
||||
self.batch_textual.opt as usize
|
||||
}
|
||||
}
|
||||
|
152
src/models/db.rs
@ -1,6 +1,4 @@
|
||||
use crate::{
|
||||
ops, Annotator, Bbox, DynConf, MinOptMax, Options, OrtEngine, Point, Polygon, Results,
|
||||
};
|
||||
use crate::{ops, Bbox, DynConf, MinOptMax, Options, OrtEngine, Polygon, Ys};
|
||||
use anyhow::Result;
|
||||
use image::{DynamicImage, ImageBuffer};
|
||||
use ndarray::{Array, Axis, IxDyn};
|
||||
@ -11,131 +9,121 @@ pub struct DB {
|
||||
height: MinOptMax,
|
||||
width: MinOptMax,
|
||||
batch: MinOptMax,
|
||||
annotator: Annotator,
|
||||
confs: DynConf,
|
||||
saveout: Option<String>,
|
||||
names: Option<Vec<String>>,
|
||||
unclip_ratio: f32,
|
||||
binary_thresh: f32,
|
||||
min_width: f32,
|
||||
min_height: f32,
|
||||
}
|
||||
|
||||
impl DB {
|
||||
pub fn new(options: &Options) -> Result<Self> {
|
||||
let engine = OrtEngine::new(options)?;
|
||||
let (batch, height, width) = (
|
||||
engine.inputs_minoptmax()[0][0].to_owned(),
|
||||
engine.inputs_minoptmax()[0][2].to_owned(),
|
||||
engine.inputs_minoptmax()[0][3].to_owned(),
|
||||
engine.batch().to_owned(),
|
||||
engine.height().to_owned(),
|
||||
engine.width().to_owned(),
|
||||
);
|
||||
let annotator = Annotator::default();
|
||||
let names = Some(vec!["Text".to_string()]);
|
||||
let confs = DynConf::new(&options.confs, 1);
|
||||
let unclip_ratio = options.unclip_ratio;
|
||||
let binary_thresh = 0.2;
|
||||
let min_width = options.min_width.unwrap_or(0.0);
|
||||
let min_height = options.min_height.unwrap_or(0.0);
|
||||
engine.dry_run()?;
|
||||
|
||||
Ok(Self {
|
||||
engine,
|
||||
names,
|
||||
confs,
|
||||
height,
|
||||
width,
|
||||
batch,
|
||||
saveout: options.saveout.to_owned(),
|
||||
annotator,
|
||||
min_width,
|
||||
min_height,
|
||||
unclip_ratio,
|
||||
binary_thresh,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
|
||||
let xs_ = ops::letterbox(xs, self.height.opt as u32, self.width.opt as u32)?;
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Ys>> {
|
||||
let xs_ = ops::letterbox(xs, self.height.opt as u32, self.width.opt as u32, 144.0)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]);
|
||||
let ys = self.engine.run(&[xs_])?;
|
||||
let ys = self.postprocess(ys, xs)?;
|
||||
match &self.saveout {
|
||||
None => {}
|
||||
Some(saveout) => {
|
||||
for (img0, y) in xs.iter().zip(ys.iter()) {
|
||||
let mut img = img0.to_rgb8();
|
||||
self.annotator.plot(&mut img, y);
|
||||
self.annotator.save(&img, saveout);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn postprocess(
|
||||
&self,
|
||||
xs: Vec<Array<f32, IxDyn>>,
|
||||
xs0: &[DynamicImage],
|
||||
) -> Result<Vec<Results>> {
|
||||
pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Ys>> {
|
||||
let mut ys = Vec::new();
|
||||
for (idx, mask) in xs[0].axis_iter(Axis(0)).enumerate() {
|
||||
let mut ys_bbox = Vec::new();
|
||||
for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() {
|
||||
let mut y_bbox = Vec::new();
|
||||
|
||||
// reshape
|
||||
let h = luma.dim()[1];
|
||||
let w = luma.dim()[2];
|
||||
let luma = luma.into_shape((h, w, 1))?.into_owned();
|
||||
|
||||
// build image from ndarray
|
||||
let raw_vec = luma
|
||||
.into_raw_vec()
|
||||
.iter()
|
||||
.map(|x| if x <= &self.binary_thresh { 0.0 } else { *x })
|
||||
.collect::<Vec<_>>();
|
||||
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
|
||||
ImageBuffer::from_raw(w as u32, h as u32, raw_vec)
|
||||
.expect("Faild to create image from ndarray");
|
||||
let mut mask_im = image::DynamicImage::from(mask_im);
|
||||
|
||||
// input image
|
||||
let image_width = xs0[idx].width() as f32;
|
||||
let image_height = xs0[idx].height() as f32;
|
||||
|
||||
// h,w,1
|
||||
let h = mask.dim()[1];
|
||||
let w = mask.dim()[2];
|
||||
let mask = mask.into_shape((h, w, 1))?.into_owned();
|
||||
|
||||
// build image from ndarray
|
||||
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
|
||||
ImageBuffer::from_raw(w as u32, h as u32, mask.into_raw_vec())
|
||||
.expect("Faild to create image from ndarray");
|
||||
let mut mask_im = image::DynamicImage::from(mask_im);
|
||||
|
||||
// rescale
|
||||
let (_, w_mask, h_mask) = ops::scale_wh(image_width, image_height, w as f32, h as f32);
|
||||
let mask_original = mask_im.crop(0, 0, w_mask as u32, h_mask as u32);
|
||||
let mask_original = mask_original.resize_exact(
|
||||
// rescale mask image
|
||||
let (ratio, w_mask, h_mask) =
|
||||
ops::scale_wh(image_width, image_height, w as f32, h as f32);
|
||||
let mask_im = mask_im.crop(0, 0, w_mask as u32, h_mask as u32);
|
||||
let mask_im = mask_im.resize_exact(
|
||||
image_width as u32,
|
||||
image_height as u32,
|
||||
image::imageops::FilterType::Triangle,
|
||||
);
|
||||
let mask_im = mask_im.into_luma8();
|
||||
|
||||
// contours
|
||||
let contours: Vec<imageproc::contours::Contour<i32>> =
|
||||
imageproc::contours::find_contours(&mask_original.into_luma8());
|
||||
imageproc::contours::find_contours_with_threshold(&mask_im, 1);
|
||||
|
||||
// loop
|
||||
let mut y_polygons: Vec<Polygon> = Vec::new();
|
||||
for contour in contours.iter() {
|
||||
// polygon
|
||||
let points: Vec<Point> = contour
|
||||
.points
|
||||
.iter()
|
||||
.map(|p| Point::new(p.x as f32, p.y as f32))
|
||||
.collect();
|
||||
let polygon = Polygon::new(&points);
|
||||
let mut rect = polygon.find_min_rect();
|
||||
|
||||
// min size filter
|
||||
if rect.height() < 3.0 || rect.width() < 3.0 {
|
||||
if contour.points.len() <= 1 {
|
||||
continue;
|
||||
}
|
||||
let polygon = Polygon::from_imageproc_points(&contour.points);
|
||||
let perimeter = polygon.perimeter();
|
||||
let delta = polygon.area() * ratio.round() * self.unclip_ratio / perimeter;
|
||||
let polygon = polygon
|
||||
// .simplify(6e-4 * perimeter)
|
||||
.offset(delta, image_width, image_height)
|
||||
.resample(50)
|
||||
.convex_hull();
|
||||
let rect = polygon.find_min_rect();
|
||||
if rect.height() < self.min_height || rect.width() < self.min_width {
|
||||
continue;
|
||||
}
|
||||
|
||||
// confs filter
|
||||
let confidence = polygon.area() / rect.area();
|
||||
if confidence < self.confs[0] {
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: expand polygon
|
||||
let unclip_ratio = 1.5;
|
||||
let delta = rect.area() * unclip_ratio / rect.perimeter();
|
||||
|
||||
// save
|
||||
let y_bbox = Bbox::new(
|
||||
rect.expand(delta, delta, image_width, image_height),
|
||||
0,
|
||||
confidence,
|
||||
self.names.as_ref().map(|names| names[0].clone()),
|
||||
);
|
||||
ys_bbox.push(y_bbox);
|
||||
let bbox = Bbox::new(rect, 0, confidence, None);
|
||||
y_bbox.push(bbox);
|
||||
y_polygons.push(polygon);
|
||||
}
|
||||
let y = Results {
|
||||
probs: None,
|
||||
bboxes: Some(ys_bbox),
|
||||
keypoints: None,
|
||||
masks: None,
|
||||
};
|
||||
ys.push(y);
|
||||
ys.push(
|
||||
Ys::default()
|
||||
.with_bboxes(&y_bbox)
|
||||
.with_polygons(&y_polygons),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(ys)
|
||||
|
@ -1,9 +1,9 @@
|
||||
use crate::{ops, DataLoader, Metric, MinOptMax, Options, OrtEngine};
|
||||
use crate::{ops, MinOptMax, Options, OrtEngine};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{Array, IxDyn};
|
||||
use std::path::PathBuf;
|
||||
use usearch::ffi::{IndexOptions, MetricKind, ScalarKind};
|
||||
// use std::path::PathBuf;
|
||||
// use usearch::ffi::{IndexOptions, MetricKind, ScalarKind};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Model {
|
||||
@ -49,102 +49,107 @@ impl Dinov2 {
|
||||
}
|
||||
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
|
||||
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32, true)?;
|
||||
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let xs_ = ops::standardize(
|
||||
xs_,
|
||||
&[0.48145466, 0.4578275, 0.40821073],
|
||||
&[0.26862954, 0.2613026, 0.2757771],
|
||||
);
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
|
||||
let ys = ys[0].to_owned();
|
||||
let ys = ops::norm(&ys);
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {
|
||||
let metric = match metric {
|
||||
Metric::IP => MetricKind::IP,
|
||||
Metric::L2 => MetricKind::L2sq,
|
||||
Metric::Cos => MetricKind::Cos,
|
||||
};
|
||||
let options = IndexOptions {
|
||||
metric,
|
||||
dimensions: self.hidden_size,
|
||||
quantization: ScalarKind::F16,
|
||||
..Default::default()
|
||||
};
|
||||
Ok(usearch::new_index(&options)?)
|
||||
}
|
||||
// pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {
|
||||
// let metric = match metric {
|
||||
// Metric::IP => MetricKind::IP,
|
||||
// Metric::L2 => MetricKind::L2sq,
|
||||
// Metric::Cos => MetricKind::Cos,
|
||||
// };
|
||||
// let options = IndexOptions {
|
||||
// metric,
|
||||
// dimensions: self.hidden_size,
|
||||
// quantization: ScalarKind::F16,
|
||||
// ..Default::default()
|
||||
// };
|
||||
// Ok(usearch::new_index(&options)?)
|
||||
// }
|
||||
|
||||
pub fn query_from_folder(
|
||||
&mut self,
|
||||
qurey: &str,
|
||||
gallery: &str,
|
||||
metric: Metric,
|
||||
) -> Result<Vec<(usize, f32, PathBuf)>> {
|
||||
// load query
|
||||
let query = DataLoader::try_read(qurey)?;
|
||||
let query = self.run(&[query])?;
|
||||
// pub fn query_from_folder(
|
||||
// &mut self,
|
||||
// qurey: &str,
|
||||
// gallery: &str,
|
||||
// metric: Metric,
|
||||
// ) -> Result<Vec<(usize, f32, PathBuf)>> {
|
||||
// // load query
|
||||
// let query = DataLoader::try_read(qurey)?;
|
||||
// let query = self.run(&[query])?;
|
||||
|
||||
// build index & gallery
|
||||
let index = self.build_index(metric)?;
|
||||
let dl = DataLoader::default()
|
||||
.with_batch(self.batch.opt as usize)
|
||||
.load(gallery)?;
|
||||
let paths = dl.paths().to_owned();
|
||||
index.reserve(paths.len())?;
|
||||
// // build index & gallery
|
||||
// let index = self.build_index(metric)?;
|
||||
// let dl = DataLoader::default()
|
||||
// .with_batch(self.batch.opt as usize)
|
||||
// .load(gallery)?;
|
||||
// let paths = dl.paths().to_owned();
|
||||
// index.reserve(paths.len())?;
|
||||
|
||||
// load feats
|
||||
for (idx, (x, _path)) in dl.enumerate() {
|
||||
let y = self.run(&x)?;
|
||||
index.add(idx as u64, &y.into_raw_vec())?;
|
||||
}
|
||||
// // load feats
|
||||
// for (idx, (x, _path)) in dl.enumerate() {
|
||||
// let y = self.run(&x)?;
|
||||
// index.add(idx as u64, &y.into_raw_vec())?;
|
||||
// }
|
||||
|
||||
// output
|
||||
let matches = index.search(&query.into_raw_vec(), index.size())?;
|
||||
let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
|
||||
matches
|
||||
.keys
|
||||
.into_iter()
|
||||
.zip(matches.distances)
|
||||
.for_each(|(k, score)| {
|
||||
results.push((k as usize, score, paths[k as usize].to_owned()));
|
||||
});
|
||||
// // output
|
||||
// let matches = index.search(&query.into_raw_vec(), index.size())?;
|
||||
// let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
|
||||
// matches
|
||||
// .keys
|
||||
// .into_iter()
|
||||
// .zip(matches.distances)
|
||||
// .for_each(|(k, score)| {
|
||||
// results.push((k as usize, score, paths[k as usize].to_owned()));
|
||||
// });
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
// Ok(results)
|
||||
// }
|
||||
|
||||
pub fn query_from_vec(
|
||||
&mut self,
|
||||
qurey: &str,
|
||||
gallery: &[&str],
|
||||
metric: Metric,
|
||||
) -> Result<Vec<(usize, f32, PathBuf)>> {
|
||||
// load query
|
||||
let query = DataLoader::try_read(qurey)?;
|
||||
let query = self.run(&[query])?;
|
||||
// pub fn query_from_vec(
|
||||
// &mut self,
|
||||
// qurey: &str,
|
||||
// gallery: &[&str],
|
||||
// metric: Metric,
|
||||
// ) -> Result<Vec<(usize, f32, PathBuf)>> {
|
||||
// // load query
|
||||
// let query = DataLoader::try_read(qurey)?;
|
||||
// let query = self.run(&[query])?;
|
||||
|
||||
// build index & gallery
|
||||
let index = self.build_index(metric)?;
|
||||
index.reserve(gallery.len())?;
|
||||
let mut dl = DataLoader::default().with_batch(self.batch.opt as usize);
|
||||
gallery.iter().for_each(|x| {
|
||||
dl.load(x).unwrap();
|
||||
});
|
||||
// // build index & gallery
|
||||
// let index = self.build_index(metric)?;
|
||||
// index.reserve(gallery.len())?;
|
||||
// let mut dl = DataLoader::default().with_batch(self.batch.opt as usize);
|
||||
// gallery.iter().for_each(|x| {
|
||||
// dl.load(x).unwrap();
|
||||
// });
|
||||
|
||||
// load feats
|
||||
let paths = dl.paths().to_owned();
|
||||
for (idx, (x, _path)) in dl.enumerate() {
|
||||
let y = self.run(&x)?;
|
||||
index.add(idx as u64, &y.into_raw_vec())?;
|
||||
}
|
||||
// // load feats
|
||||
// let paths = dl.paths().to_owned();
|
||||
// for (idx, (x, _path)) in dl.enumerate() {
|
||||
// let y = self.run(&x)?;
|
||||
// index.add(idx as u64, &y.into_raw_vec())?;
|
||||
// }
|
||||
|
||||
// output
|
||||
let matches = index.search(&query.into_raw_vec(), index.size())?;
|
||||
let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
|
||||
matches
|
||||
.keys
|
||||
.into_iter()
|
||||
.zip(matches.distances)
|
||||
.for_each(|(k, score)| {
|
||||
results.push((k as usize, score, paths[k as usize].to_owned()));
|
||||
});
|
||||
// // output
|
||||
// let matches = index.search(&query.into_raw_vec(), index.size())?;
|
||||
// let mut results: Vec<(usize, f32, PathBuf)> = Vec::new();
|
||||
// matches
|
||||
// .keys
|
||||
// .into_iter()
|
||||
// .zip(matches.distances)
|
||||
// .for_each(|(k, score)| {
|
||||
// results.push((k as usize, score, paths[k as usize].to_owned()));
|
||||
// });
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
// Ok(results)
|
||||
// }
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ mod clip;
|
||||
mod db;
|
||||
mod dinov2;
|
||||
mod rtdetr;
|
||||
mod svtr;
|
||||
mod yolo;
|
||||
|
||||
pub use blip::Blip;
|
||||
@ -10,4 +11,5 @@ pub use clip::Clip;
|
||||
pub use db::DB;
|
||||
pub use dinov2::Dinov2;
|
||||
pub use rtdetr::RTDETR;
|
||||
pub use svtr::SVTR;
|
||||
pub use yolo::YOLO;
|
||||
|
@ -3,7 +3,7 @@ use image::DynamicImage;
|
||||
use ndarray::{s, Array, Axis, IxDyn};
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{ops, Annotator, Bbox, DynConf, MinOptMax, Options, OrtEngine, Rect, Results};
|
||||
use crate::{ops, Bbox, DynConf, MinOptMax, Options, OrtEngine, Rect, Ys};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RTDETR {
|
||||
@ -11,9 +11,7 @@ pub struct RTDETR {
|
||||
height: MinOptMax,
|
||||
width: MinOptMax,
|
||||
batch: MinOptMax,
|
||||
annotator: Annotator,
|
||||
confs: DynConf,
|
||||
saveout: Option<String>,
|
||||
nc: usize,
|
||||
names: Option<Vec<String>>,
|
||||
}
|
||||
@ -43,7 +41,7 @@ impl RTDETR {
|
||||
.expect("Failed to get num_classes, make it explicit with `--nc`")
|
||||
.len(),
|
||||
);
|
||||
let annotator = Annotator::default();
|
||||
// let annotator = Annotator::default();
|
||||
let confs = DynConf::new(&options.confs, nc);
|
||||
engine.dry_run()?;
|
||||
|
||||
@ -54,34 +52,19 @@ impl RTDETR {
|
||||
height,
|
||||
width,
|
||||
batch,
|
||||
saveout: options.saveout.to_owned(),
|
||||
annotator,
|
||||
names,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
|
||||
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?;
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Ys>> {
|
||||
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32, 144.0)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let ys = self.engine.run(&[xs_])?;
|
||||
let ys = self.postprocess(ys, xs)?;
|
||||
match &self.saveout {
|
||||
None => {}
|
||||
Some(saveout) => {
|
||||
for (img0, y) in xs.iter().zip(ys.iter()) {
|
||||
let mut img = img0.to_rgb8();
|
||||
self.annotator.plot(&mut img, y);
|
||||
self.annotator.save(&img, saveout);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn postprocess(
|
||||
&self,
|
||||
xs: Vec<Array<f32, IxDyn>>,
|
||||
xs0: &[DynamicImage],
|
||||
) -> Result<Vec<Results>> {
|
||||
pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Ys>> {
|
||||
const CXYWH_OFFSET: usize = 4; // cxcywh
|
||||
let preds = &xs[0];
|
||||
|
||||
@ -129,11 +112,12 @@ impl RTDETR {
|
||||
);
|
||||
y_bboxes.push(y_bbox)
|
||||
}
|
||||
let y = Results {
|
||||
let y = Ys {
|
||||
probs: None,
|
||||
bboxes: Some(y_bboxes),
|
||||
keypoints: None,
|
||||
masks: None,
|
||||
polygons: None,
|
||||
};
|
||||
ys.push(y);
|
||||
}
|
||||
|
86
src/models/svtr.rs
Normal file
@ -0,0 +1,86 @@
|
||||
use crate::{ops, DynConf, MinOptMax, Options, OrtEngine};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{Array, Axis, IxDyn};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SVTR {
|
||||
engine: OrtEngine,
|
||||
pub height: MinOptMax,
|
||||
pub width: MinOptMax,
|
||||
pub batch: MinOptMax,
|
||||
confs: DynConf,
|
||||
vocab: Vec<String>,
|
||||
}
|
||||
|
||||
impl SVTR {
|
||||
pub fn new(options: &Options) -> Result<Self> {
|
||||
let engine = OrtEngine::new(options)?;
|
||||
let (batch, height, width) = (
|
||||
engine.batch().to_owned(),
|
||||
engine.height().to_owned(),
|
||||
engine.width().to_owned(),
|
||||
);
|
||||
let confs = DynConf::new(&options.confs, 1);
|
||||
let mut vocab: Vec<_> =
|
||||
std::fs::read_to_string(options.vocab.as_ref().expect("No vocabulary found"))?
|
||||
.lines()
|
||||
.map(|line| line.to_string())
|
||||
.collect();
|
||||
vocab.push(" ".to_string());
|
||||
vocab.insert(0, "Blank".to_string());
|
||||
engine.dry_run()?;
|
||||
|
||||
Ok(Self {
|
||||
engine,
|
||||
height,
|
||||
width,
|
||||
batch,
|
||||
vocab,
|
||||
confs,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<()> {
|
||||
let xs_ =
|
||||
ops::resize_with_fixed_height(xs, self.height.opt as u32, self.width.opt as u32, 0.0)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
|
||||
let ys = ys[0].to_owned();
|
||||
self.postprocess(&ys)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn postprocess(&self, xs: &Array<f32, IxDyn>) -> Result<()> {
|
||||
for batch in xs.axis_iter(Axis(0)) {
|
||||
let mut texts: Vec<String> = Vec::new();
|
||||
for (i, seq) in batch.axis_iter(Axis(0)).enumerate() {
|
||||
let (id, &confidence) = seq
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.reduce(|max, x| if x.1 > max.1 { x } else { max })
|
||||
.unwrap();
|
||||
if id == 0 || confidence < self.confs[0] {
|
||||
continue;
|
||||
}
|
||||
if i == 0 && id == self.vocab.len() - 1 {
|
||||
continue;
|
||||
}
|
||||
texts.push(self.vocab[id].to_owned());
|
||||
}
|
||||
texts.dedup();
|
||||
|
||||
print!("[Texts] ");
|
||||
if texts.is_empty() {
|
||||
println!("Nothing detected!");
|
||||
} else {
|
||||
for text in texts.into_iter() {
|
||||
print!("{text}");
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -5,8 +5,8 @@ use ndarray::{s, Array, Axis, IxDyn};
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{
|
||||
non_max_suppression, ops, Annotator, Bbox, DynConf, Embedding, Keypoint, MinOptMax, Options,
|
||||
OrtEngine, Point, Rect, Results,
|
||||
ops, Bbox, DynConf, Embedding, Keypoint, MinOptMax, Options, OrtEngine, Point, Polygon, Rect,
|
||||
Ys,
|
||||
};
|
||||
|
||||
const CXYWH_OFFSET: usize = 4;
|
||||
@ -34,8 +34,7 @@ pub struct YOLO {
|
||||
confs: DynConf,
|
||||
kconfs: DynConf,
|
||||
iou: f32,
|
||||
saveout: Option<String>,
|
||||
annotator: Annotator,
|
||||
// saveout: Option<String>,
|
||||
names: Option<Vec<String>>,
|
||||
apply_nms: bool,
|
||||
anchors_first: bool,
|
||||
@ -101,11 +100,6 @@ impl YOLO {
|
||||
};
|
||||
let confs = DynConf::new(&options.confs, nc);
|
||||
let kconfs = DynConf::new(&options.kconfs, nk);
|
||||
let mut annotator = Annotator::default();
|
||||
if let Some(skeletons) = &options.skeletons {
|
||||
annotator = annotator.with_skeletons(skeletons);
|
||||
}
|
||||
let saveout = options.saveout.to_owned();
|
||||
engine.dry_run()?;
|
||||
|
||||
Ok(Self {
|
||||
@ -121,44 +115,27 @@ impl YOLO {
|
||||
width,
|
||||
batch,
|
||||
task,
|
||||
saveout,
|
||||
annotator,
|
||||
names,
|
||||
anchors_first: options.anchors_first,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Results>> {
|
||||
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32)?;
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Vec<Ys>> {
|
||||
let xs_ = ops::letterbox(xs, self.height() as u32, self.width() as u32, 144.0)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let ys = self.engine.run(&[xs_])?;
|
||||
let ys = self.postprocess(ys, xs)?;
|
||||
match &self.saveout {
|
||||
None => println!("{ys:?}"),
|
||||
Some(saveout) => {
|
||||
for (img0, y) in xs.iter().zip(ys.iter()) {
|
||||
let mut img = img0.to_rgb8();
|
||||
self.annotator.plot(&mut img, y);
|
||||
self.annotator.save(&img, saveout);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn postprocess(
|
||||
&self,
|
||||
xs: Vec<Array<f32, IxDyn>>,
|
||||
xs0: &[DynamicImage],
|
||||
) -> Result<Vec<Results>> {
|
||||
pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Ys>> {
|
||||
if let YOLOTask::Classify = self.task {
|
||||
let mut ys = Vec::new();
|
||||
for batch in xs[0].axis_iter(Axis(0)) {
|
||||
ys.push(Results::new(
|
||||
Some(Embedding::new(batch.into_owned(), self.names.to_owned())),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
));
|
||||
ys.push(
|
||||
Ys::default()
|
||||
.with_probs(Embedding::new(batch.into_owned(), self.names.to_owned())),
|
||||
);
|
||||
}
|
||||
Ok(ys)
|
||||
} else {
|
||||
@ -265,13 +242,16 @@ impl YOLO {
|
||||
|
||||
// nms
|
||||
if self.apply_nms {
|
||||
non_max_suppression(&mut data, self.iou);
|
||||
Self::non_max_suppression(&mut data, self.iou);
|
||||
}
|
||||
|
||||
// decode
|
||||
let mut y_bboxes: Vec<Bbox> = Vec::new();
|
||||
let mut y_kpts: Vec<Vec<Keypoint>> = Vec::new();
|
||||
|
||||
let mut y_masks: Vec<Vec<u8>> = Vec::new();
|
||||
let mut y_polygons: Vec<Polygon> = Vec::new();
|
||||
|
||||
for elem in data.into_iter() {
|
||||
if let Some(kpts) = elem.1 {
|
||||
y_kpts.push(kpts)
|
||||
@ -291,7 +271,6 @@ impl YOLO {
|
||||
let mask_im: ImageBuffer<image::Luma<_>, Vec<f32>> =
|
||||
ImageBuffer::from_raw(nw as u32, nh as u32, mask.into_raw_vec())
|
||||
.expect("Faild to create image from ndarray");
|
||||
|
||||
let mut mask_im = image::DynamicImage::from(mask_im); // -> dyn
|
||||
|
||||
// rescale masks
|
||||
@ -305,7 +284,7 @@ impl YOLO {
|
||||
);
|
||||
|
||||
// crop-mask with bbox
|
||||
let mut mask_original_cropped = mask_original.into_luma8();
|
||||
let mut mask_object_cropped = mask_original.into_luma8(); // gray image
|
||||
for y in 0..height_original as usize {
|
||||
for x in 0..width_original as usize {
|
||||
if x < elem.0.xmin() as usize
|
||||
@ -313,7 +292,7 @@ impl YOLO {
|
||||
|| y < elem.0.ymin() as usize
|
||||
|| y > elem.0.ymax() as usize
|
||||
{
|
||||
mask_original_cropped.put_pixel(
|
||||
mask_object_cropped.put_pixel(
|
||||
x as u32,
|
||||
y as u32,
|
||||
image::Luma([0u8]),
|
||||
@ -321,31 +300,37 @@ impl YOLO {
|
||||
}
|
||||
}
|
||||
}
|
||||
y_masks.push(mask_original_cropped.into_raw());
|
||||
|
||||
// mask -> contours
|
||||
let contours: Vec<imageproc::contours::Contour<i32>> =
|
||||
imageproc::contours::find_contours_with_threshold(
|
||||
&mask_object_cropped,
|
||||
1,
|
||||
);
|
||||
|
||||
// contours -> polygons
|
||||
contours.iter().for_each(|contour| {
|
||||
if let imageproc::contours::BorderType::Outer = contour.border_type {
|
||||
if contour.points.len() > 1 {
|
||||
y_polygons.push(Polygon::from_contour(contour));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// save each mask
|
||||
y_masks.push(mask_object_cropped.into_raw());
|
||||
}
|
||||
y_bboxes.push(elem.0);
|
||||
}
|
||||
|
||||
// save each result
|
||||
let y = Results {
|
||||
probs: None,
|
||||
bboxes: if !y_bboxes.is_empty() {
|
||||
Some(y_bboxes)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
keypoints: if !y_kpts.is_empty() {
|
||||
Some(y_kpts)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
masks: if !y_masks.is_empty() {
|
||||
Some(y_masks)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
};
|
||||
ys.push(y);
|
||||
// save result
|
||||
ys.push(
|
||||
Ys::default()
|
||||
.with_bboxes(&y_bboxes)
|
||||
.with_keypoints(&y_kpts)
|
||||
.with_masks(&y_masks)
|
||||
.with_polygons(&y_polygons),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(ys)
|
||||
@ -376,4 +361,29 @@ impl YOLO {
|
||||
pub fn height(&self) -> isize {
|
||||
self.height.opt
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn non_max_suppression(
|
||||
xs: &mut Vec<(Bbox, Option<Vec<Keypoint>>, Option<Vec<f32>>)>,
|
||||
iou_threshold: f32,
|
||||
) {
|
||||
xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap());
|
||||
|
||||
let mut current_index = 0;
|
||||
for index in 0..xs.len() {
|
||||
let mut drop = false;
|
||||
for prev_index in 0..current_index {
|
||||
let iou = xs[prev_index].0.iou(&xs[index].0);
|
||||
if iou > iou_threshold {
|
||||
drop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !drop {
|
||||
xs.swap(current_index, index);
|
||||
current_index += 1;
|
||||
}
|
||||
}
|
||||
xs.truncate(current_index);
|
||||
}
|
||||
}
|
||||
|
149
src/ops.rs
@ -2,78 +2,17 @@ use anyhow::Result;
|
||||
use image::{DynamicImage, GenericImageView};
|
||||
use ndarray::{Array, Axis, Ix2, IxDyn};
|
||||
|
||||
pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) {
|
||||
let r = (w1 / w0).min(h1 / h0);
|
||||
(r, (w0 * r).round(), (h0 * r).round())
|
||||
pub fn standardize(xs: Array<f32, IxDyn>, mean: &[f32], std: &[f32]) -> Array<f32, IxDyn> {
|
||||
let mean = Array::from_shape_vec((1, mean.len(), 1, 1), mean.to_vec()).unwrap();
|
||||
let std = Array::from_shape_vec((1, std.len(), 1, 1), std.to_vec()).unwrap();
|
||||
(xs - mean) / std
|
||||
}
|
||||
|
||||
pub fn resize(
|
||||
xs: &[DynamicImage],
|
||||
height: u32,
|
||||
width: u32,
|
||||
norm_imagenet: bool,
|
||||
) -> Result<Array<f32, IxDyn>> {
|
||||
let norm = 255.0;
|
||||
let mut ys = Array::ones(vec![xs.len(), 3, height as usize, width as usize]).into_dyn();
|
||||
// let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
|
||||
for (idx, x) in xs.iter().enumerate() {
|
||||
let (w0, h0) = x.dimensions();
|
||||
let w0 = w0 as f32;
|
||||
let h0 = h0 as f32;
|
||||
let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round
|
||||
let img = x.resize_exact(
|
||||
w_new as u32,
|
||||
h_new as u32,
|
||||
image::imageops::FilterType::Triangle,
|
||||
);
|
||||
for (x, y, rgb) in img.pixels() {
|
||||
let x = x as usize;
|
||||
let y = y as usize;
|
||||
let [r, g, b, _] = rgb.0;
|
||||
ys[[idx, 0, y, x]] = (r as f32) / norm;
|
||||
ys[[idx, 1, y, x]] = (g as f32) / norm;
|
||||
ys[[idx, 2, y, x]] = (b as f32) / norm;
|
||||
}
|
||||
}
|
||||
|
||||
if norm_imagenet {
|
||||
let mean =
|
||||
Array::from_shape_vec((1, 3, 1, 1), vec![0.48145466, 0.4578275, 0.40821073]).unwrap();
|
||||
let std = Array::from_shape_vec((1, 3, 1, 1), vec![0.26862954, 0.261_302_6, 0.275_777_1])
|
||||
.unwrap();
|
||||
ys = (ys - mean) / std;
|
||||
}
|
||||
Ok(ys)
|
||||
pub fn normalize(xs: Array<f32, IxDyn>, min_: f32, max_: f32) -> Array<f32, IxDyn> {
|
||||
(xs - min_) / (max_ - min_)
|
||||
}
|
||||
|
||||
pub fn letterbox(xs: &[DynamicImage], height: u32, width: u32) -> Result<Array<f32, IxDyn>> {
|
||||
let norm = 255.0;
|
||||
let bg = 144.0;
|
||||
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
|
||||
ys.fill(bg / norm);
|
||||
for (idx, x) in xs.iter().enumerate() {
|
||||
let (w0, h0) = x.dimensions();
|
||||
let w0 = w0 as f32;
|
||||
let h0 = h0 as f32;
|
||||
let (_, w_new, h_new) = scale_wh(w0, h0, width as f32, height as f32); // f32 round
|
||||
let img = x.resize_exact(
|
||||
w_new as u32,
|
||||
h_new as u32,
|
||||
image::imageops::FilterType::Triangle,
|
||||
);
|
||||
for (x, y, rgb) in img.pixels() {
|
||||
let x = x as usize;
|
||||
let y = y as usize;
|
||||
let [r, g, b, _] = rgb.0;
|
||||
ys[[idx, 0, y, x]] = (r as f32) / norm;
|
||||
ys[[idx, 1, y, x]] = (g as f32) / norm;
|
||||
ys[[idx, 2, y, x]] = (b as f32) / norm;
|
||||
}
|
||||
}
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn norm(xs: &Array<f32, IxDyn>) -> Array<f32, IxDyn> {
|
||||
pub fn norm2(xs: &Array<f32, IxDyn>) -> Array<f32, IxDyn> {
|
||||
let std_ = xs
|
||||
.mapv(|x| x * x)
|
||||
.sum_axis(Axis(1))
|
||||
@ -93,3 +32,77 @@ pub fn dot2(query: &Array<f32, IxDyn>, gallery: &Array<f32, IxDyn>) -> Result<Ve
|
||||
let matrix: Vec<Vec<f32>> = matrix.axis_iter(Axis(0)).map(|row| row.to_vec()).collect();
|
||||
Ok(matrix)
|
||||
}
|
||||
|
||||
pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) {
|
||||
let r = (w1 / w0).min(h1 / h0);
|
||||
(r, (w0 * r).round(), (h0 * r).round())
|
||||
}
|
||||
|
||||
pub fn resize(xs: &[DynamicImage], height: u32, width: u32) -> Result<Array<f32, IxDyn>> {
|
||||
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
|
||||
for (idx, x) in xs.iter().enumerate() {
|
||||
let img = x.resize_exact(width, height, image::imageops::FilterType::Triangle);
|
||||
for (x, y, rgb) in img.pixels() {
|
||||
let x = x as usize;
|
||||
let y = y as usize;
|
||||
let [r, g, b, _] = rgb.0;
|
||||
ys[[idx, 0, y, x]] = r as f32;
|
||||
ys[[idx, 1, y, x]] = g as f32;
|
||||
ys[[idx, 2, y, x]] = b as f32;
|
||||
}
|
||||
}
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn letterbox(
|
||||
xs: &[DynamicImage],
|
||||
height: u32,
|
||||
width: u32,
|
||||
bg: f32,
|
||||
) -> Result<Array<f32, IxDyn>> {
|
||||
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
|
||||
ys.fill(bg);
|
||||
for (idx, x) in xs.iter().enumerate() {
|
||||
let (w0, h0) = x.dimensions();
|
||||
let (_, w_new, h_new) = scale_wh(w0 as f32, h0 as f32, width as f32, height as f32);
|
||||
let img = x.resize_exact(
|
||||
w_new as u32,
|
||||
h_new as u32,
|
||||
image::imageops::FilterType::CatmullRom,
|
||||
);
|
||||
for (x, y, rgb) in img.pixels() {
|
||||
let x = x as usize;
|
||||
let y = y as usize;
|
||||
let [r, g, b, _] = rgb.0;
|
||||
ys[[idx, 0, y, x]] = r as f32;
|
||||
ys[[idx, 1, y, x]] = g as f32;
|
||||
ys[[idx, 2, y, x]] = b as f32;
|
||||
}
|
||||
}
|
||||
Ok(ys)
|
||||
}
|
||||
|
||||
pub fn resize_with_fixed_height(
|
||||
xs: &[DynamicImage],
|
||||
height: u32,
|
||||
width: u32,
|
||||
bg: f32,
|
||||
) -> Result<Array<f32, IxDyn>> {
|
||||
let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn();
|
||||
ys.fill(bg);
|
||||
for (idx, x) in xs.iter().enumerate() {
|
||||
let (w0, h0) = x.dimensions();
|
||||
let h_new = height;
|
||||
let w_new = height * w0 / h0;
|
||||
let img = x.resize_exact(w_new, h_new, image::imageops::FilterType::CatmullRom);
|
||||
for (x, y, rgb) in img.pixels() {
|
||||
let x = x as usize;
|
||||
let y = y as usize;
|
||||
let [r, g, b, _] = rgb.0;
|
||||
ys[[idx, 0, y, x]] = r as f32;
|
||||
ys[[idx, 1, y, x]] = g as f32;
|
||||
ys[[idx, 2, y, x]] = b as f32;
|
||||
}
|
||||
}
|
||||
Ok(ys)
|
||||
}
|
||||
|
@ -13,25 +13,25 @@ pub struct Options {
|
||||
pub i04: Option<MinOptMax>,
|
||||
pub i05: Option<MinOptMax>,
|
||||
pub i10: Option<MinOptMax>, // 2nd input, axis 0
|
||||
pub i11: Option<MinOptMax>,
|
||||
pub i11: Option<MinOptMax>, // 2nd input, axis 1
|
||||
pub i12: Option<MinOptMax>,
|
||||
pub i13: Option<MinOptMax>,
|
||||
pub i14: Option<MinOptMax>,
|
||||
pub i15: Option<MinOptMax>,
|
||||
pub i20: Option<MinOptMax>, // 2nd input, axis 0
|
||||
pub i20: Option<MinOptMax>,
|
||||
pub i21: Option<MinOptMax>,
|
||||
pub i22: Option<MinOptMax>,
|
||||
pub i23: Option<MinOptMax>,
|
||||
pub i24: Option<MinOptMax>,
|
||||
pub i25: Option<MinOptMax>,
|
||||
pub i30: Option<MinOptMax>, // 2nd input, axis 0
|
||||
pub i30: Option<MinOptMax>,
|
||||
pub i31: Option<MinOptMax>,
|
||||
pub i32_: Option<MinOptMax>,
|
||||
pub i33: Option<MinOptMax>,
|
||||
pub i34: Option<MinOptMax>,
|
||||
pub i35: Option<MinOptMax>,
|
||||
|
||||
// trt ep
|
||||
// trt related
|
||||
pub trt_engine_cache_enable: bool,
|
||||
pub trt_int8_enable: bool,
|
||||
pub trt_fp16_enable: bool,
|
||||
@ -44,12 +44,13 @@ pub struct Options {
|
||||
pub kconfs: Vec<f32>,
|
||||
pub iou: f32,
|
||||
pub apply_nms: bool,
|
||||
pub saveout: Option<String>,
|
||||
pub tokenizer: Option<String>,
|
||||
pub vocab: Option<String>,
|
||||
pub names: Option<Vec<String>>, // class names
|
||||
pub anchors_first: bool, // otuput format: [bs, anchors/na, pos+nc+nm]
|
||||
pub skeletons: Option<Vec<(usize, usize)>>,
|
||||
pub min_width: Option<f32>,
|
||||
pub min_height: Option<f32>,
|
||||
pub unclip_ratio: f32, // DB
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
@ -93,12 +94,13 @@ impl Default for Options {
|
||||
kconfs: vec![0.5f32],
|
||||
iou: 0.45f32,
|
||||
apply_nms: true,
|
||||
saveout: None,
|
||||
tokenizer: None,
|
||||
vocab: None,
|
||||
names: None,
|
||||
anchors_first: false,
|
||||
skeletons: None,
|
||||
min_width: None,
|
||||
min_height: None,
|
||||
unclip_ratio: 1.5,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -144,18 +146,28 @@ impl Options {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_saveout(mut self, saveout: &str) -> Self {
|
||||
self.saveout = Some(saveout.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_names(mut self, names: &[&str]) -> Self {
|
||||
self.names = Some(names.iter().map(|x| x.to_string()).collect::<Vec<String>>());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_skeletons(mut self, skeletons: &[(usize, usize)]) -> Self {
|
||||
self.skeletons = Some(skeletons.to_vec());
|
||||
pub fn with_vocab(mut self, vocab: &str) -> Self {
|
||||
self.vocab = Some(auto_load(vocab).unwrap());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_unclip_ratio(mut self, x: f32) -> Self {
|
||||
self.unclip_ratio = x;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_min_width(mut self, x: f32) -> Self {
|
||||
self.min_width = Some(x);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_min_height(mut self, x: f32) -> Self {
|
||||
self.min_height = Some(x);
|
||||
self
|
||||
}
|
||||
|
||||
|
12
src/point.rs
@ -142,6 +142,18 @@ impl Point {
|
||||
pub fn sum(&self) -> f32 {
|
||||
self.x + self.y
|
||||
}
|
||||
|
||||
pub fn perpendicular_distance(&self, start: &Point, end: &Point) -> f32 {
|
||||
let numerator = ((end.y - start.y) * self.x - (end.x - start.x) * self.y + end.x * start.y
|
||||
- end.y * start.x)
|
||||
.abs();
|
||||
let denominator = ((end.y - start.y).powi(2) + (end.x - start.x).powi(2)).sqrt();
|
||||
numerator / denominator
|
||||
}
|
||||
|
||||
pub fn cross(&self, other: &Point) -> f32 {
|
||||
self.x * other.y - self.y * other.x
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
212
src/polygon.rs
@ -1,16 +1,53 @@
|
||||
use crate::{Point, Rect, RotatedRect};
|
||||
use crate::{Point, Rect};
|
||||
|
||||
#[derive(Default, Debug, PartialOrd, PartialEq, Clone)]
|
||||
#[derive(Default, Debug, Clone, PartialEq)]
|
||||
pub struct Polygon {
|
||||
points: Vec<Point>,
|
||||
pub points: Vec<Point>,
|
||||
}
|
||||
|
||||
impl From<Vec<Point>> for Polygon {
|
||||
fn from(points: Vec<Point>) -> Self {
|
||||
Self { points }
|
||||
}
|
||||
}
|
||||
|
||||
impl Polygon {
|
||||
pub fn new(points: &[Point]) -> Self {
|
||||
// TODO: refactor
|
||||
Self {
|
||||
points: points.to_vec(),
|
||||
}
|
||||
// pub fn new(points: &[Point]) -> Self {
|
||||
// Self {
|
||||
// points: points.to_vec(),
|
||||
// }
|
||||
// }
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn from_contour(contour: &imageproc::contours::Contour<i32>) -> Self {
|
||||
let points = contour
|
||||
.points
|
||||
.iter()
|
||||
.map(|p| Point::new(p.x as f32, p.y as f32))
|
||||
.collect::<Vec<_>>();
|
||||
Self { points }
|
||||
}
|
||||
|
||||
pub fn to_imageproc_points(&self) -> Vec<imageproc::point::Point<i32>> {
|
||||
self.points
|
||||
.iter()
|
||||
.map(|p| imageproc::point::Point::new(p.x as i32, p.y as i32))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
pub fn from_imageproc_points(points: &[imageproc::point::Point<i32>]) -> Self {
|
||||
let points = points
|
||||
.iter()
|
||||
.map(|p| Point::new(p.x as f32, p.y as f32))
|
||||
.collect::<Vec<_>>();
|
||||
Self { points }
|
||||
}
|
||||
|
||||
pub fn with_points(mut self, points: &[Point]) {
|
||||
self.points = points.to_vec();
|
||||
}
|
||||
|
||||
pub fn area(&self) -> f32 {
|
||||
@ -41,14 +78,163 @@ impl Polygon {
|
||||
max_y = point.y
|
||||
}
|
||||
}
|
||||
((min_x, min_y), (max_x, max_y)).into()
|
||||
((min_x - 1.0, min_y - 1.0), (max_x + 1.0, max_y + 1.0)).into()
|
||||
}
|
||||
|
||||
pub fn find_min_rotated_rect() -> RotatedRect {
|
||||
todo!()
|
||||
pub fn perimeter(&self) -> f32 {
|
||||
let mut perimeter = 0.0;
|
||||
let n = self.points.len();
|
||||
for i in 0..n {
|
||||
let j = (i + 1) % n;
|
||||
perimeter += self.points[i].distance_from(&self.points[j]);
|
||||
}
|
||||
perimeter
|
||||
}
|
||||
|
||||
pub fn expand(&mut self) -> Self {
|
||||
todo!()
|
||||
pub fn offset(&self, delta: f32, width: f32, height: f32) -> Self {
|
||||
let num_points = self.points.len();
|
||||
let mut new_points = Vec::with_capacity(self.points.len());
|
||||
for i in 0..num_points {
|
||||
let prev_idx = if i == 0 { num_points - 1 } else { i - 1 };
|
||||
let next_idx = (i + 1) % num_points;
|
||||
|
||||
let edge_vector = Point {
|
||||
x: self.points[next_idx].x - self.points[prev_idx].x,
|
||||
y: self.points[next_idx].y - self.points[prev_idx].y,
|
||||
};
|
||||
let normal_vector = Point {
|
||||
x: -edge_vector.y,
|
||||
y: edge_vector.x,
|
||||
};
|
||||
|
||||
let normal_length = (normal_vector.x.powi(2) + normal_vector.y.powi(2)).sqrt();
|
||||
if normal_length.abs() < 1e-6 {
|
||||
new_points.push(self.points[i]);
|
||||
} else {
|
||||
let normalized_normal = Point {
|
||||
x: normal_vector.x / normal_length,
|
||||
y: normal_vector.y / normal_length,
|
||||
};
|
||||
|
||||
let new_x = self.points[i].x + normalized_normal.x * delta;
|
||||
let new_y = self.points[i].y + normalized_normal.y * delta;
|
||||
let new_x = new_x.max(0.0).min(width);
|
||||
let new_y = new_y.max(0.0).min(height);
|
||||
new_points.push(Point { x: new_x, y: new_y });
|
||||
}
|
||||
}
|
||||
Self { points: new_points }
|
||||
}
|
||||
|
||||
pub fn resample(&self, num_samples: usize) -> Polygon {
|
||||
let mut points = Vec::new();
|
||||
for i in 0..self.points.len() {
|
||||
let start_point = self.points[i];
|
||||
let end_point = self.points[(i + 1) % self.points.len()];
|
||||
points.push(start_point);
|
||||
let dx = end_point.x - start_point.x;
|
||||
let dy = end_point.y - start_point.y;
|
||||
for j in 1..num_samples {
|
||||
let t = (j as f32) / (num_samples as f32);
|
||||
let new_x = start_point.x + t * dx;
|
||||
let new_y = start_point.y + t * dy;
|
||||
points.push(Point { x: new_x, y: new_y });
|
||||
}
|
||||
}
|
||||
Self { points }
|
||||
}
|
||||
|
||||
pub fn simplify(&self, epsilon: f32) -> Self {
|
||||
let mask = self.rdp_iter(epsilon);
|
||||
let points = self
|
||||
.points
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, &point)| if mask[i] { Some(point) } else { None })
|
||||
.collect();
|
||||
Self { points }
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
fn rdp_iter(&self, epsilon: f32) -> Vec<bool> {
|
||||
let mut stk = Vec::new();
|
||||
let mut indices = vec![true; self.points.len()];
|
||||
stk.push((0, self.points.len() - 1));
|
||||
while let Some((start_index, last_index)) = stk.pop() {
|
||||
let mut dmax = 0.0;
|
||||
let mut index = start_index;
|
||||
for i in (start_index + 1)..last_index {
|
||||
let d = self.points[i]
|
||||
.perpendicular_distance(&self.points[start_index], &self.points[last_index]);
|
||||
if d > dmax {
|
||||
index = i;
|
||||
dmax = d;
|
||||
}
|
||||
}
|
||||
|
||||
if dmax > epsilon {
|
||||
stk.push((start_index, index));
|
||||
stk.push((index, last_index));
|
||||
} else {
|
||||
for j in (start_index + 1)..last_index {
|
||||
indices[j] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
indices
|
||||
}
|
||||
|
||||
pub fn convex_hull(&self) -> Self {
|
||||
let mut points = self.points.clone();
|
||||
points.sort_by(|a, b| {
|
||||
a.x.partial_cmp(&b.x)
|
||||
.unwrap()
|
||||
.then(a.y.partial_cmp(&b.y).unwrap())
|
||||
});
|
||||
let mut hull: Vec<Point> = Vec::new();
|
||||
|
||||
// Lower hull
|
||||
for &point in &points {
|
||||
while hull.len() >= 2 {
|
||||
let last = hull.len() - 1;
|
||||
let second_last = hull.len() - 2;
|
||||
let vec_a = hull[last] - hull[second_last];
|
||||
let vec_b = point - hull[second_last];
|
||||
|
||||
if vec_a.cross(&vec_b) <= 0.0 {
|
||||
hull.pop();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
hull.push(point);
|
||||
}
|
||||
|
||||
// Upper hull
|
||||
let lower_hull_size = hull.len();
|
||||
for &point in points.iter().rev().skip(1) {
|
||||
while hull.len() > lower_hull_size {
|
||||
let last = hull.len() - 1;
|
||||
let second_last = hull.len() - 2;
|
||||
let vec_a: Point = hull[last] - hull[second_last];
|
||||
let vec_b = point - hull[second_last];
|
||||
|
||||
if vec_a.cross(&vec_b) <= 0.0 {
|
||||
hull.pop();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
hull.push(point);
|
||||
}
|
||||
|
||||
// Remove duplicate points
|
||||
hull.dedup();
|
||||
if hull.len() > 1 && hull.first() == hull.last() {
|
||||
hull.pop();
|
||||
}
|
||||
|
||||
Self { points: hull }
|
||||
}
|
||||
}
|
||||
|
@ -1,59 +0,0 @@
|
||||
use crate::{Bbox, Embedding, Keypoint};
|
||||
|
||||
#[derive(Clone, PartialEq, Default)]
|
||||
pub struct Results {
|
||||
pub probs: Option<Embedding>,
|
||||
pub bboxes: Option<Vec<Bbox>>,
|
||||
pub keypoints: Option<Vec<Vec<Keypoint>>>,
|
||||
pub masks: Option<Vec<Vec<u8>>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Results {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Results")
|
||||
.field("Probabilities", &self.probs)
|
||||
.field("BoundingBoxes", &self.bboxes)
|
||||
.field("Keypoints", &self.keypoints)
|
||||
.field(
|
||||
"Masks",
|
||||
&format_args!("{:?}", self.masks().map(|masks| masks.len())),
|
||||
)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Results {
|
||||
pub fn new(
|
||||
probs: Option<Embedding>,
|
||||
bboxes: Option<Vec<Bbox>>,
|
||||
keypoints: Option<Vec<Vec<Keypoint>>>,
|
||||
masks: Option<Vec<Vec<u8>>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
probs,
|
||||
bboxes,
|
||||
keypoints,
|
||||
masks,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn probs(&self) -> Option<&Embedding> {
|
||||
self.probs.as_ref()
|
||||
}
|
||||
|
||||
pub fn keypoints(&self) -> Option<&Vec<Vec<Keypoint>>> {
|
||||
self.keypoints.as_ref()
|
||||
}
|
||||
|
||||
pub fn masks(&self) -> Option<&Vec<Vec<u8>>> {
|
||||
self.masks.as_ref()
|
||||
}
|
||||
|
||||
pub fn bboxes(&self) -> Option<&Vec<Bbox>> {
|
||||
self.bboxes.as_ref()
|
||||
}
|
||||
|
||||
pub fn bboxes_mut(&mut self) -> Option<&mut Vec<Bbox>> {
|
||||
self.bboxes.as_mut()
|
||||
}
|
||||
}
|
27
src/utils.rs
@ -1,4 +1,4 @@
|
||||
use crate::{Bbox, Keypoint, GITHUB_ASSETS};
|
||||
use crate::GITHUB_ASSETS;
|
||||
use anyhow::Result;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use std::io::{Read, Write};
|
||||
@ -92,31 +92,6 @@ pub fn config_dir() -> PathBuf {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn non_max_suppression(
|
||||
xs: &mut Vec<(Bbox, Option<Vec<Keypoint>>, Option<Vec<f32>>)>,
|
||||
iou_threshold: f32,
|
||||
) {
|
||||
xs.sort_by(|b1, b2| b2.0.confidence().partial_cmp(&b1.0.confidence()).unwrap());
|
||||
|
||||
let mut current_index = 0;
|
||||
for index in 0..xs.len() {
|
||||
let mut drop = false;
|
||||
for prev_index in 0..current_index {
|
||||
let iou = xs[prev_index].0.iou(&xs[index].0);
|
||||
if iou > iou_threshold {
|
||||
drop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !drop {
|
||||
xs.swap(current_index, index);
|
||||
current_index += 1;
|
||||
}
|
||||
}
|
||||
xs.truncate(current_index);
|
||||
}
|
||||
|
||||
pub const COCO_SKELETON_17: [(usize, usize); 16] = [
|
||||
(0, 1),
|
||||
(0, 2),
|
||||
|
76
src/ys.rs
Normal file
@ -0,0 +1,76 @@
|
||||
use crate::{Bbox, Embedding, Keypoint, Polygon};
|
||||
|
||||
#[derive(Clone, PartialEq, Default)]
|
||||
pub struct Ys {
|
||||
// Results for each frame
|
||||
pub probs: Option<Embedding>,
|
||||
pub bboxes: Option<Vec<Bbox>>,
|
||||
pub keypoints: Option<Vec<Vec<Keypoint>>>,
|
||||
pub masks: Option<Vec<Vec<u8>>>,
|
||||
pub polygons: Option<Vec<Polygon>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Ys {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Results")
|
||||
.field("Probabilities", &self.probs)
|
||||
.field("BoundingBoxes", &self.bboxes)
|
||||
.field("Keypoints", &self.keypoints)
|
||||
.field(
|
||||
"Masks",
|
||||
&format_args!("{:?}", self.masks().map(|masks| masks.len())),
|
||||
)
|
||||
.field(
|
||||
"Polygons",
|
||||
&format_args!("{:?}", self.polygons().map(|polygons| polygons.len())),
|
||||
)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Ys {
|
||||
pub fn with_probs(mut self, probs: Embedding) -> Self {
|
||||
self.probs = Some(probs);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self {
|
||||
self.bboxes = Some(bboxes.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self {
|
||||
self.keypoints = Some(keypoints.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_masks(mut self, masks: &[Vec<u8>]) -> Self {
|
||||
self.masks = Some(masks.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_polygons(mut self, polygons: &[Polygon]) -> Self {
|
||||
self.polygons = Some(polygons.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn probs(&self) -> Option<&Embedding> {
|
||||
self.probs.as_ref()
|
||||
}
|
||||
|
||||
pub fn keypoints(&self) -> Option<&Vec<Vec<Keypoint>>> {
|
||||
self.keypoints.as_ref()
|
||||
}
|
||||
|
||||
pub fn masks(&self) -> Option<&Vec<Vec<u8>>> {
|
||||
self.masks.as_ref()
|
||||
}
|
||||
|
||||
pub fn polygons(&self) -> Option<&Vec<Polygon>> {
|
||||
self.polygons.as_ref()
|
||||
}
|
||||
|
||||
pub fn bboxes(&self) -> Option<&Vec<Bbox>> {
|
||||
self.bboxes.as_ref()
|
||||
}
|
||||
}
|