Add YOLOv8-OBB and some bug fixes (#9)

* Add YOLOv8-Obb & Refactor outputs

* Update README.md
This commit is contained in:
Jamjamjon
2024-04-21 17:06:58 +08:00
committed by GitHub
parent 91049fc18a
commit beda8ef803
109 changed files with 2542 additions and 1940 deletions

View File

@@ -17,10 +17,12 @@ cargo run -r --example blip
```shell
[Unconditional image captioning]: a group of people walking around a bus
[Conditional image captioning]: three man walking in front of a bus
Some(["three man walking in front of a bus"])
```
## TODO
* [ ] Multi-batch inference for image caption
* [ ] VQA
* [ ] Retrival
* [ ] TensorRT support for textual model

View File

@@ -1,4 +1,4 @@
use usls::{models::Blip, Options};
use usls::{models::Blip, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
@@ -22,9 +22,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let mut model = Blip::new(options_visual, options_textual)?;
// image caption
model.caption("./assets/bus.jpg", None)?; // unconditional
model.caption("./assets/bus.jpg", Some("three man"))?; // conditional
// image caption (this demo use batch_size=1)
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
let _y = model.caption(&x, None, true)?; // unconditional
let y = model.caption(&x, Some("three man"), true)?; // conditional
println!("{:?}", y[0].texts());
Ok(())
}

View File

@@ -1,4 +1,4 @@
use usls::{models::Clip, ops, DataLoader, Options};
use usls::{models::Clip, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
@@ -39,7 +39,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let feats_image = model.encode_images(&images).unwrap();
// use image to query texts
let matrix = ops::dot2(&feats_image, &feats_text)?; // [m, n]
let matrix = feats_image.dot2(&feats_text)?;
// summary
for i in 0..paths.len() {

View File

@@ -20,4 +20,4 @@ cargo run -r --example db
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 165 KiB

BIN
examples/db/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

View File

@@ -15,18 +15,21 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut model = DB::new(&options)?;
// load image
let x = vec![DataLoader::try_read("./assets/db.png")?];
let x = vec![
DataLoader::try_read("./assets/db.png")?,
// DataLoader::try_read("./assets/2.jpg")?,
];
// run
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default()
.without_name(true)
.without_polygons(false)
.with_mask_alpha(0)
.without_bboxes(false)
.with_saveout("DB-Text-Detection");
.without_bboxes(true)
.with_masks_alpha(60)
.with_polygon_color([255, 105, 180, 255])
.without_mbrs(true)
.with_saveout("DB");
annotator.annotate(&x, &y);
Ok(())

Binary file not shown.

Before

Width:  |  Height:  |  Size: 448 KiB

After

Width:  |  Height:  |  Size: 105 KiB

View File

@@ -9,7 +9,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i03((416, 640, 800).into())
// .with_trt(0)
// .with_fp16(true)
// .with_dry_run(10)
.with_confs(&[0.5]);
let mut model = YOLO::new(&options)?;
@@ -21,10 +20,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// annotate
let annotator = Annotator::default()
.without_conf(true)
.without_name(true)
.without_polygons(false)
.without_bboxes(true)
.without_bboxes_conf(true)
.without_bboxes_name(true)
.without_polygons(false)
.with_masks_name(false)
.with_saveout("Face-Parsing");
annotator.annotate(&x, &y);

View File

@@ -20,4 +20,4 @@ cargo run -r --example fastsam
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 302 KiB

BIN
examples/fastsam/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 321 KiB

View File

@@ -18,4 +18,4 @@ cargo run -r --example rtdetr
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 258 KiB

BIN
examples/rtdetr/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 439 KiB

View File

@@ -1,11 +1,11 @@
use usls::{models::RTDETR, Annotator, DataLoader, Options, COCO_NAMES_80};
use usls::{coco, models::RTDETR, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/rtdetr-l-f16.onnx")
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_names(&COCO_NAMES_80);
.with_names(&coco::NAMES_80);
let mut model = RTDETR::new(&options)?;
// load image

View File

@@ -15,4 +15,4 @@ cargo run -r --example rtmo
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 242 KiB

BIN
examples/rtmo/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 455 KiB

View File

@@ -1,10 +1,10 @@
use usls::{models::RTMO, Annotator, DataLoader, Options, COCO_SKELETON_17};
use usls::{coco, models::RTMO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../rtmo-l-dyn-f16.onnx")
.with_i00((1, 2, 8).into())
.with_model("../rtmo-s-dyn.onnx")
.with_i00((1, 1, 8).into())
.with_nk(17)
.with_confs(&[0.3])
.with_kconfs(&[0.5]);
@@ -19,7 +19,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// // annotate
let annotator = Annotator::default()
.with_saveout("RTMO")
.with_skeletons(&COCO_SKELETON_17);
.with_skeletons(&coco::SKELETONS_16);
annotator.annotate(&x, &y);
Ok(())

View File

@@ -24,9 +24,13 @@ cargo run -r --example svtr
## Results
```shell
[Texts] from the background, but also separate text instances which
[Texts] are closely jointed. Some examples are illustrated in Fig.7.
[Texts] 你有这么高速运转的机械进入中国,记住我给出的原理
[Texts] 110022345
[Texts] 冀B6G000
```
["./examples/svtr/images/5.png"]: Some(["are closely jointed. Some examples are illustrated in Fig.7."])
["./examples/svtr/images/6.png"]: Some(["小菊儿胡同71号"])
["./examples/svtr/images/4.png"]: Some(["我在南锣鼓捣猫呢"])
["./examples/svtr/images/1.png"]: Some(["你有这么高速运转的机械进入中国,记住我给出的原理"])
["./examples/svtr/images/2.png"]: Some(["冀B6G000"])
["./examples/svtr/images/9.png"]: Some(["from the background, but also separate text instances which"])
["./examples/svtr/images/8.png"]: Some(["110022345"])
["./examples/svtr/images/3.png"]: Some(["粤A·68688"])
["./examples/svtr/images/7.png"]: Some(["Please lower your volume"])
```

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

BIN
examples/svtr/images/2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

BIN
examples/svtr/images/3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

BIN
examples/svtr/images/4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

BIN
examples/svtr/images/6.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

BIN
examples/svtr/images/7.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

View File

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 24 KiB

View File

Before

Width:  |  Height:  |  Size: 9.0 KiB

After

Width:  |  Height:  |  Size: 9.0 KiB

View File

@@ -5,23 +5,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let options = Options::default()
.with_i00((1, 2, 8).into())
.with_i03((320, 960, 1600).into())
.with_confs(&[0.4])
.with_confs(&[0.2])
.with_vocab("../ppocr_rec_vocab.txt")
.with_model("../models/ppocr-v4-svtr-ch-dyn.onnx");
let mut model = SVTR::new(&options)?;
// load image
let xs = vec![
DataLoader::try_read("./examples/svtr/text1.png")?,
DataLoader::try_read("./examples/svtr/text2.png")?,
DataLoader::try_read("./examples/svtr/text3.png")?,
DataLoader::try_read("./examples/svtr/text4.png")?,
DataLoader::try_read("./examples/svtr/text5.png")?,
];
// load images
let dl = DataLoader::default()
.with_batch(1)
.load("./examples/svtr/images")?;
// run
for text in model.run(&xs)?.into_iter() {
println!("[Texts] {text}")
for (xs, paths) in dl {
let ys = model.run(&xs)?;
println!("{paths:?}: {:?}", ys[0].texts())
}
Ok(())

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

View File

@@ -40,4 +40,4 @@ cargo run -r --example yolo-world
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 453 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 922 KiB

After

Width:  |  Height:  |  Size: 296 KiB

View File

@@ -5,8 +5,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let options = Options::default()
.with_model("../models/yolopv2-dyn-480x800.onnx")
.with_i00((1, 1, 8).into())
// .with_trt(0)
// .with_fp16(true)
.with_confs(&[0.3]);
let mut model = YOLOPv2::new(&options)?;
@@ -18,7 +16,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// annotate
let annotator = Annotator::default()
.with_masks_name(false)
.with_masks_name(true)
.with_saveout("YOLOPv2");
annotator.annotate(&x, &y);

BIN
examples/yolov5/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 395 KiB

32
examples/yolov5/main.rs Normal file
View File

@@ -0,0 +1,32 @@
use usls::{
models::{YOLOTask, YOLO},
Annotator, DataLoader, Options,
};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_conf_independent(true)
.with_anchors_first(true)
.with_yolo_task(YOLOTask::Segment)
.with_model("../models/yolov5s-seg.onnx")
.with_trt(0)
.with_fp16(true)
.with_i00((1, 1, 4).into())
.with_i02((224, 640, 800).into())
.with_i03((224, 640, 800).into())
.with_dry_run(3);
let mut model = YOLO::new(&options)?;
// load image
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
// run
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("YOLOv5");
annotator.annotate(&x, &y);
Ok(())
}

View File

@@ -10,4 +10,4 @@ cargo run -r --example yolov8-face
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 285 KiB

View File

@@ -11,4 +11,4 @@ cargo run -r --example yolov8-falldown
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

View File

@@ -2,9 +2,7 @@ use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8-falldown-f16.onnx")
.with_confs(&[0.3]);
let options = Options::default().with_model("../models/yolov8-falldown-f16.onnx");
let mut model = YOLO::new(&options)?;
// load image

View File

@@ -11,4 +11,4 @@ cargo run -r --example yolov8-head
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 291 KiB

View File

@@ -2,9 +2,7 @@ use usls::{models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8-head-f16.onnx")
.with_confs(&[0.3]);
let options = Options::default().with_model("../models/yolov8-head-f16.onnx");
let mut model = YOLO::new(&options)?;
// load image

View File

@@ -13,4 +13,4 @@ cargo run -r --example yolov8-trash
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 214 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 367 KiB

View File

@@ -4,7 +4,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// 1.build model
let options = Options::default()
.with_model("../models/yolov8-plastic-bag-f16.onnx")
.with_confs(&[0.3])
.with_names(&["trash"]);
let mut model = YOLO::new(&options)?;

View File

@@ -14,19 +14,22 @@ yolo export model=yolov8m.pt format=onnx simplify dynamic
yolo export model=yolov8m-cls.pt format=onnx simplify dynamic
yolo export model=yolov8m-pose.pt format=onnx simplify dynamic
yolo export model=yolov8m-seg.pt format=onnx simplify dynamic
yolo export model=yolov8m-obb.pt format=onnx simplify dynamic
# export onnx model with fixed shapes
yolo export model=yolov8m.pt format=onnx simplify
yolo export model=yolov8m-cls.pt format=onnx simplify
yolo export model=yolov8m-pose.pt format=onnx simplify
yolo export model=yolov8m-seg.pt format=onnx simplify
yolo export model=yolov8m-obb.pt format=onnx simplify
```
## Result
| Task | Annotated image |
| :-------------------: | --------------------- |
| Obb | ![img](./demo-obb.png) |
| Instance Segmentation | ![img](./demo-seg.png) |
| Classification | ![img](./demo-cls.jpg) |
| Classification | ![img](./demo-cls.png) |
| Detection | ![img](./demo-det.png) |
| Pose | ![img](./demo-pose.png) |

Binary file not shown.

Before

Width:  |  Height:  |  Size: 221 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 453 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 MiB

After

Width:  |  Height:  |  Size: 451 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 546 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 552 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 MiB

After

Width:  |  Height:  |  Size: 457 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 MiB

After

Width:  |  Height:  |  Size: 387 KiB

View File

@@ -1,38 +1,70 @@
use usls::{
models::YOLO, Annotator, DataLoader, Options, COCO_KEYPOINT_NAMES_17, COCO_SKELETON_17,
};
use usls::{coco, models::YOLO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("../models/yolov8m-dyn-f16.onnx")
// .with_trt(0) // cuda by default
// .with_model("../models/yolov8m.onnx")
// .with_model("../models/yolov8m-dyn-f16.onnx")
// .with_model("../models/yolov8m-pose-dyn-f16.onnx")
// .with_model("../models/yolov8m-seg-dyn-f16.onnx")
.with_model("../models/yolov8s-cls.onnx")
// .with_model("../models/yolov8s-obb.onnx")
// .with_trt(0)
// .with_fp16(true)
.with_i00((1, 1, 4).into())
.with_i02((224, 640, 800).into())
.with_i03((224, 640, 800).into())
.with_i02((224, 1024, 1024).into())
.with_i03((224, 1024, 1024).into())
// .with_i02((224, 640, 800).into())
// .with_i03((224, 640, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_names2(&COCO_KEYPOINT_NAMES_17)
.with_profile(false)
.with_dry_run(3);
.with_names2(&coco::KEYPOINTS_NAMES_17)
.with_profile(true)
.with_dry_run(10);
let mut model = YOLO::new(&options)?;
// build dataloader
let dl = DataLoader::default()
.with_batch(1)
.load("./assets/bus.jpg")?;
// .load("./assets/dota.png")?;
// build annotate
let annotator = Annotator::default()
.with_skeletons(&COCO_SKELETON_17)
.without_conf(false)
.without_name(false)
.with_keypoints_name(false)
.with_keypoints_conf(false)
.with_masks_name(false)
.without_masks(false)
.without_polygons(false)
.without_bboxes(false)
// .with_probs_topk(10)
// // bboxes
// .without_bboxes(false)
// .without_bboxes_conf(false)
// .without_bboxes_name(false)
// .without_bboxes_text_bg(false)
// .with_bboxes_text_color([255, 255, 255, 255])
// .with_bboxes_text_bg_alpha(255)
// // keypoints
// .without_keypoints(false)
// .with_keypoints_palette(&COCO_KEYPOINT_COLORS_17)
.with_skeletons(&coco::SKELETONS_16)
// .with_keypoints_name(false)
// .with_keypoints_conf(false)
// .without_keypoints_text_bg(false)
// .with_keypoints_text_color([255, 255, 255, 255])
// .with_keypoints_text_bg_alpha(255)
// .with_keypoints_radius(4)
// // masks
// .without_masks(false)
// .with_masks_alpha(190)
// .without_polygons(false)
// // .with_polygon_color([0, 255, 255, 255])
// .with_masks_conf(false)
// .with_masks_name(true)
// .with_masks_text_bg(true)
// .with_masks_text_color([255, 255, 255, 255])
// .with_masks_text_bg_alpha(10)
// // mbrs
// .without_mbrs(false)
// .without_mbrs_conf(false)
// .without_mbrs_name(false)
// .without_mbrs_text_bg(false)
// .with_mbrs_text_color([255, 255, 255, 255])
// .with_mbrs_text_bg_alpha(70)
.with_saveout("YOLOv8");
// run & annotate

View File

@@ -26,4 +26,4 @@ cargo run -r --example yolov9
## Results
![](./demo.jpg)
![](./demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 232 KiB

BIN
examples/yolov9/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 450 KiB

View File

@@ -7,8 +7,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
.with_i03((416, 640, 800).into())
.with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
.with_profile(false);
.with_confs(&[0.4, 0.15]); // person: 0.4, others: 0.15
let mut model = YOLO::new(&options)?;
// load image