🐍 v0.1.0 (#53)

This commit is contained in:
Jamjamjon
2025-01-12 16:59:57 +08:00
committed by GitHub
parent 4e932c4910
commit 0f2d84b8c5
256 changed files with 12485 additions and 9088 deletions

6
examples/beit/README.md Normal file
View File

@@ -0,0 +1,6 @@
## Quick Start
```shell
cargo run -r -F cuda --example beit -- --device cuda --dtype fp16
```

52
examples/beit/main.rs Normal file
View File

@@ -0,0 +1,52 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::beit_base()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -3,20 +3,12 @@ This demo shows how to use [BLIP](https://arxiv.org/abs/2201.12086) to do condit
## Quick Start
```shell
cargo run -r --example blip
cargo run -r -F cuda --example blip -- --device cuda:0 --source images/dog.jpg --source ./assets/bus.jpg --source images/green-car.jpg
```
## Results
```shell
[Unconditional]: a group of people walking around a bus
[Conditional]: three man walking in front of a bus
Some(["three man walking in front of a bus"])
Unconditional: Ys([Y { Texts: [Text("a dog running through a field of grass")] }, Y { Texts: [Text("a group of people walking around a bus")] }, Y { Texts: [Text("a green volkswagen beetle parked in front of a yellow building")] }])
Conditional: Ys([Y { Texts: [Text("this image depicting a dog running in a field")] }, Y { Texts: [Text("this image depict a bus in barcelona")] }, Y { Texts: [Text("this image depict a blue volkswagen beetle parked in a street in havana, cuba")] }])
```
## TODO
* [ ] Multi-batch inference for image caption
* [ ] VQA
* [ ] Retrival
* [ ] TensorRT support for textual model

View File

@@ -1,28 +1,44 @@
use usls::{models::Blip, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options_visual = Options::default()
.with_model("blip/visual-base.onnx")?
// .with_ixx(0, 2, 384.into())
// .with_ixx(0, 3, 384.into())
.with_profile(false);
// textual
let options_textual = Options::default()
.with_model("blip/textual-base.onnx")?
.with_tokenizer("blip/tokenizer.json")?
.with_profile(false);
// build model
let mut model = Blip::new(options_visual, options_textual)?;
// image caption (this demo use batch_size=1)
let xs = [DataLoader::try_read("images/bus.jpg")?];
let image_embeddings = model.encode_images(&xs)?;
let _y = model.caption(&image_embeddings, None, true)?; // unconditional
let y = model.caption(&image_embeddings, Some("three man"), true)?; // conditional
println!("{:?}", y[0].texts());
Ok(())
}
use usls::{models::Blip, DataLoader, Options};
#[derive(argh::FromArgs)]
/// BLIP Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(option, default = "vec![String::from(\"./assets/bus.jpg\")]")]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options_visual = Options::blip_v1_base_caption_visual()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let options_textual = Options::blip_v1_base_caption_textual()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = Blip::new(options_visual, options_textual)?;
// image caption
let xs = DataLoader::try_read_batch(&args.source)?;
// unconditional caption
let ys = model.forward(&xs, None)?;
println!("Unconditional: {:?}", ys);
// conditional caption
let ys = model.forward(&xs, Some("this image depict"))?;
println!("Conditional: {:?}", ys);
Ok(())
}

View File

@@ -3,18 +3,13 @@ This demo showcases how to use [CLIP](https://github.com/openai/CLIP) to compute
## Quick Start
```shell
cargo run -r --example clip
cargo run -r -F cuda --example clip -- --device cuda:0
```
## Results
```shell
(90.11472%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
[0.04573484, 0.0048218793, 0.0011618224, 0.90114725, 0.0036694852, 0.031348046, 0.0121166315]
(94.07785%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
[0.050406333, 0.0011632168, 0.0019338318, 0.0013227565, 0.003916758, 0.00047858112, 0.9407785]
(86.59852%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
[0.07032883, 0.00053773675, 0.0006372929, 0.06066096, 0.0007378078, 0.8659852, 0.0011121632]
```
(99.9675%) ./examples/clip/images/carrot.jpg => Some carrots
(99.93718%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
(100.0%) ./examples/clip/images/drink.jpg => Some people holding wine glasses in a restaurant
```

View File

Before

Width:  |  Height:  |  Size: 176 KiB

After

Width:  |  Height:  |  Size: 176 KiB

View File

@@ -1,43 +1,54 @@
use usls::{models::Clip, DataLoader, Options};
use anyhow::Result;
use usls::{models::Clip, DataLoader, Ops, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options_visual = Options::default().with_model("clip/visual-base-dyn.onnx")?;
#[derive(argh::FromArgs)]
/// CLIP Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
// textual
let options_textual = Options::default()
.with_model("clip/textual-base-dyn.onnx")?
.with_tokenizer("clip/tokenizer.json")?;
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options_visual = Options::jina_clip_v1_visual()
// clip_vit_b32_visual()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let options_textual = Options::jina_clip_v1_textual()
// clip_vit_b32_textual()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = Clip::new(options_visual, options_textual)?;
// texts
let texts = vec![
"A photo of a dinosaur ".to_string(),
"A photo of a cat".to_string(),
"A photo of a dog".to_string(),
"几个胡萝卜".to_string(),
"There are some playing cards on a striped table cloth".to_string(),
"There is a doll with red hair and a clock on a table".to_string(),
"Some people holding wine glasses in a restaurant".to_string(),
"A photo of a dinosaur",
"A photo of a cat",
"A photo of a dog",
"Some carrots",
"There are some playing cards on a striped table cloth",
"There is a doll with red hair and a clock on a table",
"Some people holding wine glasses in a restaurant",
];
let feats_text = model.encode_texts(&texts)?; // [n, ndim]
// load image
// load images
let dl = DataLoader::new("./examples/clip/images")?.build()?;
// loop
// run
for (images, paths) in dl {
let feats_image = model.encode_images(&images).unwrap();
let feats_image = model.encode_images(&images)?;
// use image to query texts
let matrix = match feats_image.embedding() {
Some(x) => x.dot2(feats_text.embedding().unwrap())?,
None => continue,
};
let matrix = Ops::dot2(&feats_image, &feats_text)?;
// summary
for i in 0..paths.len() {
let probs = &matrix[i];
let (id, &score) = probs
@@ -52,7 +63,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
paths[i].display(),
&texts[id]
);
println!("{:?}\n", probs);
}
}

View File

@@ -0,0 +1,6 @@
## Quick Start
```shell
cargo run -r -F cuda --example convnext -- --device cuda --dtype fp16
```

52
examples/convnext/main.rs Normal file
View File

@@ -0,0 +1,52 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::convnext_v2_atto()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -0,0 +1,5 @@
## Quick Start
```shell
cargo run -r --example d-fine
```

28
examples/d-fine/main.rs Normal file
View File

@@ -0,0 +1,28 @@
use anyhow::Result;
use usls::{models::RTDETR, Annotator, DataLoader, Options};
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// options
let options = Options::d_fine_n_coco().commit()?;
let mut model = RTDETR::new(options)?;
// load
let x = [DataLoader::try_read("./assets/bus.jpg")?];
// run
let y = model.forward(&x)?;
println!("{:?}", y);
// annotate
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
}

View File

@@ -0,0 +1,5 @@
## Quick Start
```shell
cargo run -r --example dataloader
```

View File

@@ -1,66 +1,45 @@
use usls::{
models::YOLO, Annotator, DataLoader, Device, Options, Viewer, Vision, YOLOTask, YOLOVersion,
};
use usls::DataLoader;
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::ERROR)
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let options = Options::new()
.with_device(Device::Cuda(0))
.with_model("yolo/v8-m-dyn.onnx")?
.with_yolo_version(YOLOVersion::V8)
.with_yolo_task(YOLOTask::Detect)
.with_batch(2)
.with_ixx(0, 2, (416, 640, 800).into())
.with_ixx(0, 3, (416, 640, 800).into())
.with_confs(&[0.2]);
let mut model = YOLO::new(options)?;
// build annotator
let annotator = Annotator::new()
.with_bboxes_thickness(4)
.with_saveout("YOLO-DataLoader");
// build dataloader
let dl = DataLoader::new(
// 1. iterator
let dl = DataLoader::try_from(
// "images/bus.jpg", // remote image
// "../images", // image folder
// "../demo.mp4", // local video
// "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
// "rtsp://admin:xyz@192.168.2.217:554/h265/ch1/", // rtsp h264 stream
// "./assets/bus.jpg", // local image
"../7.mp4",
"./assets/bus.jpg", // local image
)?
.with_batch(1)
.with_progress_bar(true)
.build()?;
let mut viewer = Viewer::new().with_delay(10).with_scale(1.).resizable(true);
// iteration
for (xs, _) in dl {
// inference & annotate
let ys = model.run(&xs)?;
let images_plotted = annotator.plot(&xs, &ys, false)?;
// show image
viewer.imshow(&images_plotted)?;
// check out window and key event
if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) {
break;
}
// write video
viewer.write_batch(&images_plotted)?;
for (_xs, _paths) in dl {
println!("Paths: {:?}", _paths);
}
// finish video write
viewer.finish_write()?;
// 2. read one image
let image = DataLoader::try_read("./assets/bus.jpg")?;
println!(
"Read one image. Height: {}, Width: {}",
image.height(),
image.width()
);
// images -> video
// DataLoader::is2v("runs/YOLO-DataLoader", &["runs", "is2v"], 24)?;
// 3. read several images
let images = DataLoader::try_read_batch(&[
"./assets/bus.jpg",
"./assets/bus.jpg",
"./assets/bus.jpg",
"./assets/bus.jpg",
"./assets/bus.jpg",
])?;
println!("Read {} images.", images.len());
Ok(())
}

View File

@@ -4,15 +4,6 @@
cargo run -r --example db
```
### Speed test
| Model | Image size | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
| --------------- | ---------- | ---------------------------------------- | ---------------------------------------- | ------------------------------------ |
| ppocr-v3-db-dyn | 640x640 | 1.8585 | 2.5739 | 4.3314 |
| ppocr-v4-db-dyn | 640x640 | 2.0507 | 2.8264 | 6.6064 |
***Test on RTX3060***
## Results
![](https://github.com/jamjamjon/assets/releases/download/db/demo-paper.png)

View File

@@ -1,35 +1,48 @@
use anyhow::Result;
use usls::{models::DB, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_ixx(0, 0, (1, 4, 8).into())
.with_ixx(0, 2, (608, 960, 1280).into())
.with_ixx(0, 3, (608, 960, 1280).into())
// .with_trt(0)
.with_confs(&[0.4])
.with_min_width(5.0)
.with_min_height(12.0)
.with_model("db/ppocr-v4-db-dyn.onnx")?;
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::ppocr_det_v4_server_ch()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = DB::new(options)?;
// load image
let x = [
DataLoader::try_read("images/db.png")?,
DataLoader::try_read("images/street.jpg")?,
];
let x = DataLoader::try_read_batch(&[
"images/table.png",
"images/table1.jpg",
"images/table2.png",
"images/table-ch.jpg",
"images/db.png",
"images/street.jpg",
])?;
// run
let y = model.run(&x)?;
let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.without_bboxes(true)
.without_mbrs(true)
.with_polygons_alpha(60)
.with_contours_color([255, 105, 180, 255])
.without_mbrs(true)
.with_saveout("DB");
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())

7
examples/deim/README.md Normal file
View File

@@ -0,0 +1,7 @@
## Quick Start
```shell
cargo run -r --example deim
```

28
examples/deim/main.rs Normal file
View File

@@ -0,0 +1,28 @@
use anyhow::Result;
use usls::{models::RTDETR, Annotator, DataLoader, Options};
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// options
let options = Options::deim_dfine_s_coco().commit()?;
let mut model = RTDETR::new(options)?;
// load
let x = [DataLoader::try_read("./assets/bus.jpg")?];
// run
let y = model.forward(&x)?;
println!("{:?}", y);
// annotate
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
}

7
examples/deit/README.md Normal file
View File

@@ -0,0 +1,7 @@
## Quick Start
```shell
cargo run -r -F cuda --example deit -- --device cuda --dtype fp16
```

52
examples/deit/main.rs Normal file
View File

@@ -0,0 +1,52 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::deit_tiny_distill()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -1,24 +1,26 @@
use anyhow::Result;
use usls::{models::DepthAnything, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// options
let options = Options::default()
// .with_model("depth-anything/v1-s-dyn.onnx")?
.with_model("depth-anything/v2-s.onnx")?
.with_ixx(0, 2, (384, 512, 1024).into())
.with_ixx(0, 3, (384, 512, 1024).into());
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// build model
let options = Options::depth_anything_v2_small().commit()?;
let mut model = DepthAnything::new(options)?;
// load
let x = [DataLoader::try_read("images/street.jpg")?];
// run
let y = model.run(&x)?;
let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.with_colormap("Turbo")
.with_saveout("Depth-Anything");
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())

View File

@@ -0,0 +1,10 @@
## Quick Start
```shell
cargo run -r -F cuda --example depth-pro -- --device cuda
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/depth-pro/demo-depth-pro.png)

View File

@@ -1,25 +1,47 @@
use anyhow::Result;
use usls::{models::DepthPro, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// options
let options = Options::default()
.with_model("depth-pro/q4f16.onnx")? // bnb4, f16
.with_ixx(0, 0, 1.into()) // batch. Note: now only support batch_size = 1
.with_ixx(0, 1, 3.into()) // channel
.with_ixx(0, 2, 1536.into()) // height
.with_ixx(0, 3, 1536.into()); // width
#[derive(argh::FromArgs)]
/// BLIP Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// dtype
#[argh(option, default = "String::from(\"q4f16\")")]
dtype: String,
/// source image
#[argh(option, default = "String::from(\"images/street.jpg\")")]
source: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// model
let options = Options::depth_pro()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = DepthPro::new(options)?;
// load
let x = [DataLoader::try_read("images/street.jpg")?];
let x = [DataLoader::try_read(&args.source)?];
// run
let y = model.run(&x)?;
let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.with_colormap("Turbo")
.with_saveout("Depth-Pro");
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())

View File

@@ -1,40 +1,25 @@
use usls::{models::Dinov2, DataLoader, Options};
use anyhow::Result;
use usls::{models::DINOv2, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("dinov2/s-dyn.onnx")?
.with_ixx(0, 2, 224.into())
.with_ixx(0, 3, 224.into());
let mut model = Dinov2::new(options)?;
let x = [DataLoader::try_read("images/bus.jpg")?];
let y = model.run(&x)?;
println!("{y:?}");
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// TODO:
// query from vector
// let ys = model.query_from_vec(
// "./assets/bus.jpg",
// &[
// "./examples/dinov2/images/bus.jpg",
// "./examples/dinov2/images/1.jpg",
// "./examples/dinov2/images/2.jpg",
// ],
// Metric::L2,
// )?;
// images
let xs = [
DataLoader::try_read("./assets/bus.jpg")?,
DataLoader::try_read("./assets/bus.jpg")?,
];
// or query from folder
// let ys = model.query_from_folder("./assets/bus.jpg", "./examples/dinov2/images", Metric::IP)?;
// model
let options = Options::dinov2_small().with_batch_size(xs.len()).commit()?;
let mut model = DINOv2::new(options)?;
// results
// for (i, y) in ys.iter().enumerate() {
// println!(
// "Top-{:<3}{:.7} {}",
// i + 1,
// y.1,
// y.2.canonicalize()?.display()
// );
// }
// encode images
let y = model.encode_images(&xs)?;
println!("Feat shape: {:?}", y.shape());
Ok(())
}

View File

@@ -0,0 +1,10 @@
## Quick Start
```shell
cargo run -r -F cuda --example doclayout-yolo -- --device cuda
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/yolo/demo-doclayout-yolo.png)

View File

@@ -0,0 +1,42 @@
use anyhow::Result;
use usls::{models::YOLO, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let config = Options::doclayout_yolo_docstructbench()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = YOLO::new(config)?;
// load images
let xs = [DataLoader::try_read("images/academic.jpg")?];
// run
let ys = model.forward(&xs)?;
// println!("{:?}", ys);
// annotate
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout("doclayout-yolo");
annotator.annotate(&xs, &ys);
model.summary();
Ok(())
}

6
examples/fast/README.md Normal file
View File

@@ -0,0 +1,6 @@
## Quick Start
```shell
cargo run -r --example fast
```

65
examples/fast/main.rs Normal file
View File

@@ -0,0 +1,65 @@
use anyhow::Result;
use usls::{models::DB, Annotator, DataLoader, Options, Scale};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// scale
#[argh(option, default = "String::from(\"t\")")]
scale: String,
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = match args.scale.as_str().try_into()? {
Scale::T => Options::fast_tiny(),
Scale::S => Options::fast_small(),
Scale::B => Options::fast_base(),
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
};
let mut model = DB::new(
options
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?,
)?;
// load image
let x = DataLoader::try_read_batch(&[
"images/table.png",
"images/table1.jpg",
"images/table2.png",
"images/table-ch.jpg",
"images/db.png",
"images/street.jpg",
])?;
// run
let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.without_bboxes(true)
.without_mbrs(true)
.with_polygons_alpha(60)
.with_contours_color([255, 105, 180, 255])
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
}

View File

@@ -0,0 +1,5 @@
## Quick Start
```shell
cargo run -r -F cuda --example fastsam -- --device cuda
```

45
examples/fastsam/main.rs Normal file
View File

@@ -0,0 +1,45 @@
use anyhow::Result;
use usls::{models::YOLO, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"fp16\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let config = Options::fastsam_s()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = YOLO::new(config)?;
// load images
let xs = DataLoader::try_read_batch(&["./assets/bus.jpg"])?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default()
.without_masks(true)
.with_bboxes_thickness(3)
.with_saveout("fastsam");
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -0,0 +1,13 @@
## Quick Start
```shell
cargo run -r -F cuda --example mobileone -- --device cuda --dtype fp16
```
```shell
0: Y { Probs: { Top5: [(263, 0.6109131, Some("Pembroke, Pembroke Welsh corgi")), (264, 0.2062352, Some("Cardigan, Cardigan Welsh corgi")), (231, 0.028572788, Some("collie")), (273, 0.015174894, Some("dingo, warrigal, warragal, Canis dingo")), (248, 0.014367299, Some("Eskimo dog, husky"))] } }
1: Y { Probs: { Top5: [(284, 0.9907692, Some("siamese cat, Siamese")), (285, 0.0015794479, Some("Egyptian cat")), (174, 0.0015189401, Some("Norwegian elkhound, elkhound")), (225, 0.00031838714, Some("malinois")), (17, 0.00027021166, Some("jay"))] } }
2: Y { Probs: { Top5: [(387, 0.94238573, Some("lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens")), (368, 0.0029994072, Some("gibbon, Hylobates lar")), (277, 0.0016564301, Some("red fox, Vulpes vulpes")), (356, 0.0015081967, Some("weasel")), (295, 0.001427932, Some("American black bear, black bear, Ursus americanus, Euarctos americanus"))] } }
```

57
examples/fastvit/main.rs Normal file
View File

@@ -0,0 +1,57 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::fastvit_t8_distill()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// results
for (i, y) in ys.iter().enumerate() {
println!("{}: {:?}", i, y);
}
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -0,0 +1,30 @@
## Quick Start
```shell
cargo run -r -F cuda --example florence2 -- --device cuda --scale base --dtype fp16
```
```Shell
Task: Caption(0)
Ys([Y { Texts: [Text("A green car parked in front of a yellow building.")] }, Y { Texts: [Text("A group of people walking down a street next to a bus.")] }])
Task: Caption(1)
Ys([Y { Texts: [Text("The image shows a green car parked in front of a yellow building with two brown doors. The car is on the road, and the building has a wall and a tree in the background.")] }, Y { Texts: [Text("The image shows a group of people walking down a street next to a bus, with a building in the background. The bus is likely part of the World Electric Emission Bus, which is a new bus that will be launched in Madrid. The people are walking on the road, and there are trees and a sign board to the left of the bus.")] }])
Task: Caption(2)
Ys([Y { Texts: [Text("The image shows a vintage Volkswagen Beetle car parked on a cobblestone street in front of a yellow building with two wooden doors. The car is a light blue color with silver rims and appears to be in good condition. The building has a sloping roof and is painted in a bright yellow color. The sky is blue and there are trees in the background. The overall mood of the image is peaceful and serene.")] }, Y { Texts: [Text("The image shows a blue and white bus with the logo of the Brazilian football club, Cero Emisiones, on the side. The bus is parked on a street with a building in the background. There are several people walking on the sidewalk in front of the bus, some of them are carrying bags and one person is holding a camera. The sky is blue and there are trees and a traffic light visible in the top right corner of the image. The image appears to be taken during the day.")] }])
```
## Results
| Task | Demo |
| -----| ------|
|Caption-To-Phrase-Grounding | <img src='https://github.com/jamjamjon/assets/releases/download/florence2/Caption-To-Phrase-Grounding-car.png' alt=''> |
| Ocr-With-Region | <img src='https://github.com/jamjamjon/assets/releases/download/florence2/Ocr-With-Region.png' alt=''>|
| Dense-Region-Caption | <img src='https://github.com/jamjamjon/assets/releases/download/florence2/Dense-Region-Caption-car.png' alt=''>|
| Object-Detection | <img src='https://github.com/jamjamjon/assets/releases/download/florence2/Object-Detection-car.png' alt=''>|
| Region-Proposal | <img src='https://github.com/jamjamjon/assets/releases/download/florence2/Region-Proposal.png' alt=''>|
| Referring-Expression-Segmentation | <img src='https://github.com/jamjamjon/assets/releases/download/florence2/Referring-Expression-Segmentation.png' alt=''>|

View File

@@ -1,157 +1,176 @@
use usls::{models::Florence2, Annotator, DataLoader, Options, Task};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let batch_size = 3;
// vision encoder
let options_vision_encoder = Options::default()
.with_model("florence2/base-vision-encoder-f16.onnx")?
.with_ixx(0, 2, (512, 768, 800).into())
.with_ixx(0, 3, 768.into())
.with_ixx(0, 0, (1, batch_size as _, 8).into());
// text embed
let options_text_embed = Options::default()
.with_model("florence2/base-embed-tokens-f16.onnx")?
.with_tokenizer("florence2/tokenizer.json")?
.with_batch(batch_size);
// transformer encoder
let options_encoder = Options::default()
.with_model("florence2/base-encoder-f16.onnx")?
.with_batch(batch_size);
// transformer decoder
let options_decoder = Options::default()
.with_model("florence2/base-decoder-f16.onnx")?
.with_batch(batch_size);
// transformer decoder merged
let options_decoder_merged = Options::default()
.with_model("florence2/base-decoder-merged-f16.onnx")?
.with_batch(batch_size);
// build model
let mut model = Florence2::new(
options_vision_encoder,
options_text_embed,
options_encoder,
options_decoder,
options_decoder_merged,
)?;
// load images
let xs = [
// DataLoader::try_read("florence2/car.jpg")?, // for testing region-related tasks
DataLoader::try_read("florence2/car.jpg")?,
// DataLoader::try_read("images/db.png")?,
DataLoader::try_read("assets/bus.jpg")?,
];
// region-related tasks
let quantizer = usls::Quantizer::default();
// let coords = [449., 270., 556., 372.]; // wheel
let coords = [31., 156., 581., 373.]; // car
let (width_car, height_car) = (xs[0].width(), xs[0].height());
let quantized_coords = quantizer.quantize(&coords, (width_car as _, height_car as _));
// run with tasks
let ys = model.run_with_tasks(
&xs,
&[
// w/ inputs
Task::Caption(0),
Task::Caption(1),
Task::Caption(2),
Task::Ocr,
Task::OcrWithRegion,
Task::RegionProposal,
Task::ObjectDetection,
Task::DenseRegionCaption,
// w/o inputs
Task::OpenSetDetection("a vehicle".into()),
Task::CaptionToPhraseGrounding(
"A vehicle with two wheels parked in front of a building.".into(),
),
Task::ReferringExpressionSegmentation("a vehicle".into()),
Task::RegionToSegmentation(
quantized_coords[0],
quantized_coords[1],
quantized_coords[2],
quantized_coords[3],
),
Task::RegionToCategory(
quantized_coords[0],
quantized_coords[1],
quantized_coords[2],
quantized_coords[3],
),
Task::RegionToDescription(
quantized_coords[0],
quantized_coords[1],
quantized_coords[2],
quantized_coords[3],
),
],
)?;
// annotator
let annotator = Annotator::new()
.without_bboxes_conf(true)
.with_bboxes_thickness(3)
.with_saveout_subs(&["Florence2"]);
for (task, ys_) in ys.iter() {
match task {
Task::Caption(_)
| Task::Ocr
| Task::RegionToCategory(..)
| Task::RegionToDescription(..) => {
println!("Task: {:?}\n{:?}\n", task, ys_)
}
Task::DenseRegionCaption => {
let annotator = annotator.clone().with_saveout("Dense-Region-Caption");
annotator.annotate(&xs, ys_);
}
Task::RegionProposal => {
let annotator = annotator
.clone()
.without_bboxes_name(false)
.with_saveout("Region-Proposal");
annotator.annotate(&xs, ys_);
}
Task::ObjectDetection => {
let annotator = annotator.clone().with_saveout("Object-Detection");
annotator.annotate(&xs, ys_);
}
Task::OpenSetDetection(_) => {
let annotator = annotator.clone().with_saveout("Open-Set-Detection");
annotator.annotate(&xs, ys_);
}
Task::CaptionToPhraseGrounding(_) => {
let annotator = annotator
.clone()
.with_saveout("Caption-To-Phrase-Grounding");
annotator.annotate(&xs, ys_);
}
Task::ReferringExpressionSegmentation(_) => {
let annotator = annotator
.clone()
.with_saveout("Referring-Expression-Segmentation");
annotator.annotate(&xs, ys_);
}
Task::RegionToSegmentation(..) => {
let annotator = annotator.clone().with_saveout("Region-To-Segmentation");
annotator.annotate(&xs, ys_);
}
Task::OcrWithRegion => {
let annotator = annotator.clone().with_saveout("Ocr-With-Region");
annotator.annotate(&xs, ys_);
}
_ => (),
}
}
Ok(())
}
use anyhow::Result;
use usls::{models::Florence2, Annotator, DataLoader, Options, Scale, Task};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// scale
#[argh(option, default = "String::from(\"base\")")]
scale: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// load images
let xs = [
DataLoader::try_read("images/green-car.jpg")?,
DataLoader::try_read("assets/bus.jpg")?,
];
// build model
let (
options_vision_encoder,
options_text_embed,
options_encoder,
options_decoder,
options_decoder_merged,
) = match args.scale.as_str().try_into()? {
Scale::B => (
Options::florence2_visual_encoder_base(),
Options::florence2_textual_embed_base(),
Options::florence2_texual_encoder_base(),
Options::florence2_texual_decoder_base(),
Options::florence2_texual_decoder_merged_base(),
),
Scale::L => todo!(),
_ => anyhow::bail!("Unsupported Florence2 scale."),
};
let mut model = Florence2::new(
options_vision_encoder
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
options_text_embed
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
options_encoder
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
options_decoder
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
options_decoder_merged
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
)?;
// tasks
let tasks = [
// w inputs
Task::Caption(0),
Task::Caption(1),
Task::Caption(2),
Task::Ocr,
// Task::OcrWithRegion,
Task::RegionProposal,
Task::ObjectDetection,
Task::DenseRegionCaption,
// w/o inputs
Task::OpenSetDetection("a vehicle"),
Task::CaptionToPhraseGrounding("A vehicle with two wheels parked in front of a building."),
Task::ReferringExpressionSegmentation("a vehicle"),
Task::RegionToSegmentation(
// 31, 156, 581, 373, // car
449, 270, 556, 372, // wheel
),
Task::RegionToCategory(
// 31, 156, 581, 373,
449, 270, 556, 372,
),
Task::RegionToDescription(
// 31, 156, 581, 373,
449, 270, 556, 372,
),
];
// annotator
let annotator = Annotator::new()
.without_bboxes_conf(true)
.with_bboxes_thickness(3)
.with_saveout_subs(&["Florence2"]);
// inference
for task in tasks.iter() {
let ys = model.forward(&xs, task)?;
// annotate
match task {
Task::Caption(_)
| Task::Ocr
| Task::RegionToCategory(..)
| Task::RegionToDescription(..) => {
println!("Task: {:?}\n{:?}\n", task, &ys)
}
Task::DenseRegionCaption => {
let annotator = annotator.clone().with_saveout("Dense-Region-Caption");
annotator.annotate(&xs, &ys);
}
Task::RegionProposal => {
let annotator = annotator
.clone()
.without_bboxes_name(false)
.with_saveout("Region-Proposal");
annotator.annotate(&xs, &ys);
}
Task::ObjectDetection => {
let annotator = annotator.clone().with_saveout("Object-Detection");
annotator.annotate(&xs, &ys);
}
Task::OpenSetDetection(_) => {
let annotator = annotator.clone().with_saveout("Open-Set-Detection");
annotator.annotate(&xs, &ys);
}
Task::CaptionToPhraseGrounding(_) => {
let annotator = annotator
.clone()
.with_saveout("Caption-To-Phrase-Grounding");
annotator.annotate(&xs, &ys);
}
Task::ReferringExpressionSegmentation(_) => {
let annotator = annotator
.clone()
.with_saveout("Referring-Expression-Segmentation");
annotator.annotate(&xs, &ys);
}
Task::RegionToSegmentation(..) => {
let annotator = annotator.clone().with_saveout("Region-To-Segmentation");
annotator.annotate(&xs, &ys);
}
Task::OcrWithRegion => {
let annotator = annotator.clone().with_saveout("Ocr-With-Region");
annotator.annotate(&xs, &ys);
}
_ => (),
}
}
model.summary();
Ok(())
}

View File

@@ -1,7 +1,7 @@
## Quick Start
```shell
cargo run -r --example grounding-dino
cargo run -r -F cuda --example grounding-dino -- --device cuda --dtype fp16
```

View File

@@ -1,41 +1,72 @@
use anyhow::Result;
use usls::{models::GroundingDINO, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let opts = Options::default()
.with_ixx(0, 0, (1, 1, 4).into())
.with_ixx(0, 2, (640, 800, 1200).into())
.with_ixx(0, 3, (640, 1200, 1200).into())
// .with_i10((1, 1, 4).into())
// .with_i11((256, 256, 512).into())
// .with_i20((1, 1, 4).into())
// .with_i21((256, 256, 512).into())
// .with_i30((1, 1, 4).into())
// .with_i31((256, 256, 512).into())
// .with_i40((1, 1, 4).into())
// .with_i41((256, 256, 512).into())
// .with_i50((1, 1, 4).into())
// .with_i51((256, 256, 512).into())
// .with_i52((256, 256, 512).into())
.with_model("grounding-dino/swint-ogc-dyn-u8.onnx")? // TODO: current onnx model does not support bs > 1
// .with_model("grounding-dino/swint-ogc-dyn-f32.onnx")?
.with_tokenizer("grounding-dino/tokenizer.json")?
.with_confs(&[0.2])
.with_profile(false);
let mut model = GroundingDINO::new(opts)?;
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
// Load images and set class names
let x = [DataLoader::try_read("images/bus.jpg")?];
let texts = [
"person", "hand", "shoes", "bus", "dog", "cat", "sign", "tie", "monitor", "window",
"glasses", "tree", "head",
];
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
// Run and annotate
let y = model.run(&x, &texts)?;
/// source image
#[argh(option, default = "vec![String::from(\"./assets/bus.jpg\")]")]
source: Vec<String>,
/// open class names
#[argh(
option,
default = "vec![
String::from(\"person\"),
String::from(\"hand\"),
String::from(\"shoes\"),
String::from(\"bus\"),
String::from(\"dog\"),
String::from(\"cat\"),
String::from(\"sign\"),
String::from(\"tie\"),
String::from(\"monitor\"),
String::from(\"glasses\"),
String::from(\"tree\"),
String::from(\"head\"),
]"
)]
labels: Vec<String>,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
let options = Options::grounding_dino_tiny()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
.commit()?;
let mut model = GroundingDINO::new(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default()
.with_bboxes_thickness(4)
.with_saveout("GroundingDINO");
annotator.annotate(&x, &y);
.with_saveout(model.spec());
annotator.annotate(&xs, &ys);
// summary
model.summary();
Ok(())
}

5
examples/hub/README.md Normal file
View File

@@ -0,0 +1,5 @@
## Quick Start
```shell
RUST_LOG=usls=info cargo run -r --example hub
```

26
examples/hub/main.rs Normal file
View File

@@ -0,0 +1,26 @@
use usls::Hub;
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// 1. Download from default github release
let path = Hub::default().try_fetch("images/bus.jpg")?;
println!("Fetch one image: {:?}", path);
// 2. Download from specific github release url
let path = Hub::default()
.try_fetch("https://github.com/jamjamjon/assets/releases/download/images/bus.jpg")?;
println!("Fetch one file: {:?}", path);
// 3. Fetch tags and files
let hub = Hub::default().with_owner("jamjamjon").with_repo("usls");
for tag in hub.tags().iter() {
let files = hub.files(tag);
println!("{} => {:?}", tag, files); // Should be empty
}
Ok(())
}

View File

@@ -0,0 +1,6 @@
## Quick Start
```shell
cargo run -r --example fast
```

65
examples/linknet/main.rs Normal file
View File

@@ -0,0 +1,65 @@
use anyhow::Result;
use usls::{models::DB, Annotator, DataLoader, Options, Scale};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// scale
#[argh(option, default = "String::from(\"t\")")]
scale: String,
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = match args.scale.as_str().try_into()? {
Scale::T => Options::linknet_r18(),
Scale::S => Options::linknet_r34(),
Scale::B => Options::linknet_r50(),
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
};
let mut model = DB::new(
options
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?,
)?;
// load image
let x = DataLoader::try_read_batch(&[
"images/table.png",
"images/table1.jpg",
"images/table2.png",
"images/table-ch.jpg",
"images/db.png",
"images/street.jpg",
])?;
// run
let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.without_bboxes(true)
.without_mbrs(true)
.with_polygons_alpha(60)
.with_contours_color([255, 105, 180, 255])
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
}

View File

@@ -0,0 +1,13 @@
## Quick Start
```shell
cargo run -r -F cuda --example mobileone -- --device cuda --dtype fp16
```
```shell
0: Y { Probs: { Top5: [(263, 0.6109131, Some("Pembroke, Pembroke Welsh corgi")), (264, 0.2062352, Some("Cardigan, Cardigan Welsh corgi")), (231, 0.028572788, Some("collie")), (273, 0.015174894, Some("dingo, warrigal, warragal, Canis dingo")), (248, 0.014367299, Some("Eskimo dog, husky"))] } }
1: Y { Probs: { Top5: [(284, 0.9907692, Some("siamese cat, Siamese")), (285, 0.0015794479, Some("Egyptian cat")), (174, 0.0015189401, Some("Norwegian elkhound, elkhound")), (225, 0.00031838714, Some("malinois")), (17, 0.00027021166, Some("jay"))] } }
2: Y { Probs: { Top5: [(387, 0.94238573, Some("lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens")), (368, 0.0029994072, Some("gibbon, Hylobates lar")), (277, 0.0016564301, Some("red fox, Vulpes vulpes")), (356, 0.0015081967, Some("weasel")), (295, 0.001427932, Some("American black bear, black bear, Ursus americanus, Euarctos americanus"))] } }
```

View File

@@ -0,0 +1,57 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::mobileone_s0()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// results
for (i, y) in ys.iter().enumerate() {
println!("{}: {:?}", i, y);
}
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -1,22 +1,24 @@
use usls::{models::MODNet, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// build model
let options = Options::default()
.with_model("modnet/dyn-f32.onnx")?
.with_ixx(0, 2, (416, 512, 800).into())
.with_ixx(0, 3, (416, 512, 800).into());
let options = Options::modnet_photographic().commit()?;
let mut model = MODNet::new(options)?;
// load image
let x = [DataLoader::try_read("images/liuyifei.png")?];
let xs = [DataLoader::try_read("images/liuyifei.png")?];
// run
let y = model.run(&x)?;
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default().with_saveout("MODNet");
annotator.annotate(&x, &y);
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -0,0 +1,10 @@
## Quick Start
```shell
cargo run -r --example picodet-layout
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/picodet/demo-layout-1x.png)

View File

@@ -0,0 +1,31 @@
use anyhow::Result;
use usls::{models::PicoDet, Annotator, DataLoader, Options};
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// options
let options = Options::picodet_layout_1x()
// picodet_l_layout_3cls()
// picodet_l_layout_17cls()
.commit()?;
let mut model = PicoDet::new(options)?;
// load
let xs = [DataLoader::try_read("images/academic.jpg")?];
// annotator
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout(model.spec());
// run
let ys = model.forward(&xs)?;
println!("{:?}", ys);
annotator.annotate(&xs, &ys);
Ok(())
}

17
examples/rtdetr/README.md Normal file
View File

@@ -0,0 +1,17 @@
## Quick Start
```shell
cargo run -r --example rtdetr
```
## Results
```
[Bboxes]: Found 5 objects
0: Bbox { xyxy: [47.969677, 397.81808, 246.22426, 904.8823], class_id: 0, name: Some("person"), confidence: 0.94432133 }
1: Bbox { xyxy: [668.0796, 399.28854, 810.3779, 880.7412], class_id: 0, name: Some("person"), confidence: 0.93386495 }
2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 }
3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 }
4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 }
```

43
examples/rtdetr/main.rs Normal file
View File

@@ -0,0 +1,43 @@
use anyhow::Result;
use usls::{models::RTDETR, Annotator, DataLoader, Options};
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// options
let options = Options::rtdetr_v2_s_coco()
// rtdetr_v1_r18vd_coco()
// rtdetr_v2_ms_coco()
// rtdetr_v2_m_coco()
// rtdetr_v2_l_coco()
// rtdetr_v2_x_coco()
.commit()?;
let mut model = RTDETR::new(options)?;
// load
let xs = [DataLoader::try_read("./assets/bus.jpg")?];
// run
let ys = model.forward(&xs)?;
// extract bboxes
for y in ys.iter() {
if let Some(bboxes) = y.bboxes() {
println!("[Bboxes]: Found {} objects", bboxes.len());
for (i, bbox) in bboxes.iter().enumerate() {
println!("{}: {:?}", i, bbox)
}
}
}
// annotate
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -1,25 +1,26 @@
use anyhow::Result;
use usls::{models::RTMO, Annotator, DataLoader, Options, COCO_SKELETONS_16};
fn main() -> Result<(), Box<dyn std::error::Error>> {
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// build model
let options = Options::default()
.with_model("rtmo/s-dyn.onnx")?
.with_nk(17)
.with_confs(&[0.3])
.with_kconfs(&[0.5]);
let mut model = RTMO::new(options)?;
let mut model = RTMO::new(Options::rtmo_s().commit()?)?;
// load image
let x = [DataLoader::try_read("images/bus.jpg")?];
let xs = [DataLoader::try_read("images/bus.jpg")?];
// run
let y = model.run(&x)?;
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default()
.with_saveout("RTMO")
.with_saveout(model.spec())
.with_skeletons(&COCO_SKELETONS_16);
annotator.annotate(&x, &y);
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@@ -3,19 +3,18 @@
```Shell
# SAM
cargo run -r --example sam
cargo run -r -F cuda --example sam -- --device cuda --kind sam
# MobileSAM
cargo run -r --example sam -- --kind mobile-sam
cargo run -r -F cuda --example sam -- --device cuda --kind mobile-sam
# EdgeSAM
cargo run -r --example sam -- --kind edge-sam
cargo run -r -F cuda --example sam -- --device cuda --kind edge-sam
# SAM-HQ
cargo run -r --example sam -- --kind sam-hq
cargo run -r -F cuda --example sam -- --device cuda --kind sam-hq
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/sam/demo-car.png)

View File

@@ -1,97 +1,73 @@
use clap::Parser;
use anyhow::Result;
use usls::{
models::{SamKind, SamPrompt, SAM},
Annotator, DataLoader, Options,
Annotator, DataLoader, Options, Scale,
};
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
pub struct Args {
#[arg(long, value_enum, default_value_t = SamKind::Sam)]
pub kind: SamKind,
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
#[arg(long, default_value_t = 0)]
pub device_id: usize,
/// scale
#[argh(option, default = "String::from(\"t\")")]
scale: String,
#[arg(long)]
pub use_low_res_mask: bool,
/// SAM kind
#[argh(option, default = "String::from(\"sam\")")]
kind: String,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
// Options
let (options_encoder, options_decoder, saveout) = match args.kind {
SamKind::Sam => {
let options_encoder = Options::default()
// .with_model("sam/sam-vit-b-encoder.onnx")?;
.with_model("sam/sam-vit-b-encoder-u8.onnx")?;
let options_decoder = Options::default()
.with_sam_kind(SamKind::Sam)
// .with_model("sam/sam-vit-b-decoder.onnx")?;
// .with_model("sam/sam-vit-b-decoder-singlemask.onnx")?;
.with_model("sam/sam-vit-b-decoder-u8.onnx")?;
(options_encoder, options_decoder, "SAM")
}
SamKind::Sam2 => {
let options_encoder = Options::default()
// .with_model("sam/sam2-hiera-tiny-encoder.onnx")?;
// .with_model("sam/sam2-hiera-small-encoder.onnx")?;
.with_model("sam/sam2-hiera-base-plus-encoder.onnx")?;
let options_decoder = Options::default()
.with_sam_kind(SamKind::Sam2)
// .with_model("sam/sam2-hiera-tiny-decoder.onnx")?;
// .with_model("sam/sam2-hiera-small-decoder.onnx")?;
.with_model("sam/sam2-hiera-base-plus-decoder.onnx")?;
(options_encoder, options_decoder, "SAM2")
}
SamKind::MobileSam => {
let options_encoder =
Options::default().with_model("sam/mobile-sam-vit-t-encoder.onnx")?;
let options_decoder = Options::default()
.with_sam_kind(SamKind::MobileSam)
.with_model("sam/mobile-sam-vit-t-decoder.onnx")?;
(options_encoder, options_decoder, "Mobile-SAM")
}
SamKind::SamHq => {
let options_encoder = Options::default().with_model("sam/sam-hq-vit-t-encoder.onnx")?;
let options_decoder = Options::default()
.with_sam_kind(SamKind::SamHq)
.with_model("sam/sam-hq-vit-t-decoder.onnx")?;
(options_encoder, options_decoder, "SAM-HQ")
}
SamKind::EdgeSam => {
let options_encoder = Options::default().with_model("sam/edge-sam-3x-encoder.onnx")?;
let options_decoder = Options::default()
.with_sam_kind(SamKind::EdgeSam)
.with_model("sam/edge-sam-3x-decoder.onnx")?;
(options_encoder, options_decoder, "Edge-SAM")
}
};
let options_encoder = options_encoder
.with_cuda(args.device_id)
.with_ixx(0, 2, (800, 1024, 1024).into())
.with_ixx(0, 3, (800, 1024, 1024).into());
let options_decoder = options_decoder
.with_cuda(args.device_id)
.use_low_res_mask(args.use_low_res_mask)
.with_find_contours(true);
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// Build model
let (options_encoder, options_decoder) = match args.kind.as_str().try_into()? {
SamKind::Sam => (
Options::sam_v1_base_encoder(),
Options::sam_v1_base_decoder(),
),
SamKind::Sam2 => match args.scale.as_str().try_into()? {
Scale::T => (Options::sam2_tiny_encoder(), Options::sam2_tiny_decoder()),
Scale::S => (Options::sam2_small_encoder(), Options::sam2_small_decoder()),
Scale::B => (
Options::sam2_base_plus_encoder(),
Options::sam2_base_plus_decoder(),
),
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
},
SamKind::MobileSam => (
Options::mobile_sam_tiny_encoder(),
Options::mobile_sam_tiny_decoder(),
),
SamKind::SamHq => (
Options::sam_hq_tiny_encoder(),
Options::sam_hq_tiny_decoder(),
),
SamKind::EdgeSam => (
Options::edge_sam_3x_encoder(),
Options::edge_sam_3x_decoder(),
),
};
let options_encoder = options_encoder
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let options_decoder = options_decoder.commit()?;
let mut model = SAM::new(options_encoder, options_decoder)?;
// Load image
let xs = [
DataLoader::try_read("images/truck.jpg")?,
// DataLoader::try_read("images/dog.jpg")?,
];
let xs = [DataLoader::try_read("images/truck.jpg")?];
// Build annotator
let annotator = Annotator::default().with_saveout(saveout);
let annotator = Annotator::default().with_saveout(model.spec());
// Prompt
let prompts = vec![
@@ -102,7 +78,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
];
// Run & Annotate
let ys = model.run(&xs, &prompts)?;
let ys = model.forward(&xs, &prompts)?;
annotator.annotate(&xs, &ys);
Ok(())

View File

@@ -1,10 +1,9 @@
## Quick Start
```shell
cargo run -r --example sapiens
cargo run -r -F cuda --example sapiens -- --device cuda
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/sapiens/demo.png)

View File

@@ -1,27 +1,38 @@
use usls::{
models::{Sapiens, SapiensTask},
Annotator, DataLoader, Options, BODY_PARTS_28,
};
use anyhow::Result;
use usls::{models::Sapiens, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build
let options = Options::default()
.with_model("sapiens/seg-0.3b-dyn.onnx")?
.with_sapiens_task(SapiensTask::Seg)
.with_names(&BODY_PARTS_28);
let options = Options::sapiens_seg_0_3b()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = Sapiens::new(options)?;
// load
let x = [DataLoader::try_read("images/paul-george.jpg")?];
// run
let y = model.run(&x)?;
let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.without_masks(true)
.with_polygons_name(false)
.with_saveout("Sapiens");
.with_polygons_name(true)
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())

View File

@@ -0,0 +1,9 @@
## Quick Start
```shell
cargo run -r -F cuda --example slanet -- --device cuda
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/slanet/demo.png)

48
examples/slanet/main.rs Normal file
View File

@@ -0,0 +1,48 @@
use anyhow::Result;
use usls::{models::SLANet, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// source
#[argh(option, default = "String::from(\"images/table.png\")")]
source: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::slanet_lcnet_v2_mobile_ch()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = SLANet::new(options)?;
// load
let xs = DataLoader::try_read_batch(&[args.source])?;
// run
let ys = model.forward(&xs)?;
println!("{:?}", ys);
// annotate
let annotator = Annotator::default()
.with_keypoints_radius(2)
.with_skeletons(&[(0, 1), (1, 2), (2, 3), (3, 0)])
.with_saveout(model.spec());
annotator.annotate(&xs, &ys);
// summary
model.summary();
Ok(())
}

View File

@@ -1,29 +1,21 @@
## Quick Start
```shell
cargo run -r --example svtr
cargo run -r -F cuda --example svtr -- --device cuda
```
### Speed test
| Model | Width | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
| --------------------------- | :---: | :--------------------------------------: | :--------------------------------------: | :----------------------------------: |
| ppocr-v4-server-svtr-ch-dyn | 1500 | 4.2116 | 13.0013 | 20.8673 |
| ppocr-v4-svtr-ch-dyn | 1500 | 2.0435 | 3.1959 | 10.1750 |
| ppocr-v3-svtr-ch-dyn | 1500 | 1.8596 | 2.9401 | 6.8210 |
***Test on RTX3060***
## Results
```shell
["./examples/svtr/images/5.png"]: Some(["are closely jointed. Some examples are illustrated in Fig.7."])
["./examples/svtr/images/6.png"]: Some(["小菊儿胡同71号"])
["./examples/svtr/images/4.png"]: Some(["我在南锣鼓捣猫呢"])
["./examples/svtr/images/1.png"]: Some(["你有这么高速运转的机械进入中国,记住我给出的原理"])
["./examples/svtr/images/2.png"]: Some(["冀B6G000"])
["./examples/svtr/images/9.png"]: Some(["from the background, but also separate text instances which"])
["./examples/svtr/images/8.png"]: Some(["110022345"])
["./examples/svtr/images/3.png"]: Some(["粤A·68688"])
["./examples/svtr/images/7.png"]: Some(["Please lower your volume"])
["./examples/svtr/images/license-ch-2.png"]: Ys([Y { Texts: [Text("粤A·68688")] }])
["./examples/svtr/images/license-ch.png"]: Ys([Y { Texts: [Text("冀B6G000")] }])
["./examples/svtr/images/sign-ch-2.png"]: Ys([Y { Texts: [Text("我在南锣鼓捣猫呢")] }])
["./examples/svtr/images/sign-ch.png"]: Ys([Y { Texts: [Text("小菊儿胡同71号")] }])
["./examples/svtr/images/text-110022345.png"]: Ys([Y { Texts: [Text("110022345")] }])
["./examples/svtr/images/text-ch.png"]: Ys([Y { Texts: [Text("你有这么高速运转的机械进入中国,记住我给出的原理")] }])
["./examples/svtr/images/text-en-2.png"]: Ys([Y { Texts: [Text("from the background, but also separate text instances which")] }])
["./examples/svtr/images/text-en-dark.png"]: Ys([Y { Texts: [Text("Please lower your volume")] }])
["./examples/svtr/images/text-en.png"]: Ys([Y { Texts: [Text("are closely jointed. Some examples are illustrated in Fig.7.")] }])
["./examples/svtr/images/text-hello-rust-handwritten.png"]: Ys([Y { Texts: [Text("HeloRuSt")] }])
```

View File

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

View File

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 10 KiB

View File

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 24 KiB

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 9.0 KiB

After

Width:  |  Height:  |  Size: 9.0 KiB

View File

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

@@ -1,24 +1,44 @@
use anyhow::Result;
use usls::{models::SVTR, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::default()
.with_ixx(0, 0, (1, 2, 8).into())
.with_ixx(0, 2, (320, 960, 1600).into())
.with_ixx(0, 3, (320, 960, 1600).into())
.with_confs(&[0.2])
.with_vocab("svtr/ppocr_rec_vocab.txt")?
.with_model("svtr/ppocr-v4-svtr-ch-dyn.onnx")?;
let options = Options::ppocr_rec_v4_ch()
// svtr_v2_teacher_ch()
// .with_batch_size(2)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = SVTR::new(options)?;
// load images
let dl = DataLoader::new("./examples/svtr/images")?.build()?;
let dl = DataLoader::new("./examples/svtr/images")?
.with_batch(model.batch() as _)
.with_progress_bar(false)
.build()?;
// run
for (xs, paths) in dl {
let ys = model.run(&xs)?;
println!("{paths:?}: {:?}", ys[0].texts())
let ys = model.forward(&xs)?;
println!("{paths:?}: {:?}", ys)
}
//summary
model.summary();
Ok(())
}

13
examples/trocr/README.md Normal file
View File

@@ -0,0 +1,13 @@
## Quick Start
```shell
cargo run -r -F cuda --example trocr -- --device cuda --dtype fp16 --scale s --kind printed
cargo run -r -F cuda --example trocr -- --device cuda --dtype fp16 --scale s --kind hand-written
```
```shell
Ys([Y { Texts: [Text("PLEASE LOWER YOUR VOLUME")] }, Y { Texts: [Text("HELLO RUST")] }])
```

96
examples/trocr/main.rs Normal file
View File

@@ -0,0 +1,96 @@
use usls::{
models::{TrOCR, TrOCRKind},
DataLoader, Options, Scale,
};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// scale
#[argh(option, default = "String::from(\"s\")")]
scale: String,
/// kind
#[argh(option, default = "String::from(\"printed\")")]
kind: String,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// load images
let xs = DataLoader::try_read_batch(&[
"images/text-en-dark.png",
"images/text-hello-rust-handwritten.png",
])?;
// build model
let (options_encoder, options_decoder, options_decoder_merged) =
match args.scale.as_str().try_into()? {
Scale::S => match args.kind.as_str().try_into()? {
TrOCRKind::Printed => (
Options::trocr_encoder_small_printed(),
Options::trocr_decoder_small_printed(),
Options::trocr_decoder_merged_small_printed(),
),
TrOCRKind::HandWritten => (
Options::trocr_encoder_small_handwritten(),
Options::trocr_decoder_small_handwritten(),
Options::trocr_decoder_merged_small_handwritten(),
),
},
Scale::B => match args.kind.as_str().try_into()? {
TrOCRKind::Printed => (
Options::trocr_encoder_base_printed(),
Options::trocr_decoder_base_printed(),
Options::trocr_decoder_merged_base_printed(),
),
TrOCRKind::HandWritten => (
Options::trocr_encoder_base_handwritten(),
Options::trocr_decoder_base_handwritten(),
Options::trocr_decoder_merged_base_handwritten(),
),
},
x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
};
let mut model = TrOCR::new(
options_encoder
.with_model_device(args.device.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
options_decoder
.with_model_device(args.device.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
options_decoder_merged
.with_model_device(args.device.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_batch_size(xs.len())
.commit()?,
)?;
// inference
let ys = model.forward(&xs)?;
println!("{:?}", ys);
// summary
model.summary();
Ok(())
}

View File

@@ -0,0 +1,5 @@
## Quick Start
```shell
RUST_LOG=usls=info cargo run -F ffmpeg -r --example viewer
```

43
examples/viewer/main.rs Normal file
View File

@@ -0,0 +1,43 @@
use usls::{DataLoader, Key, Viewer};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// source
#[argh(
option,
default = "String::from(\"http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4\")"
)]
source: String,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
let dl = DataLoader::new(&args.source)?.with_batch(1).build()?;
let mut viewer = Viewer::new().with_delay(5).with_scale(1.).resizable(true);
// run & annotate
for (xs, _paths) in dl {
// show image
viewer.imshow(&xs)?;
// check out window and key event
if !viewer.is_open() || viewer.is_key_pressed(Key::Escape) {
break;
}
// write video
viewer.write_batch(&xs)?
}
// finish video write
viewer.finish_write()?;
Ok(())
}

View File

@@ -1,7 +1,7 @@
## Quick Start
```shell
cargo run -r --example yolo-sam
cargo run -r -F cuda --example yolo-sam -- --device cuda
```
## Results

View File

@@ -1,31 +1,42 @@
use anyhow::Result;
use usls::{
models::{SamKind, SamPrompt, YOLOTask, YOLOVersion, SAM, YOLO},
Annotator, DataLoader, Options, Vision,
models::{SamPrompt, SAM, YOLO},
Annotator, DataLoader, Options, Scale,
};
fn main() -> Result<(), Box<dyn std::error::Error>> {
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build SAM
let options_encoder = Options::default().with_model("sam/mobile-sam-vit-t-encoder.onnx")?;
let options_decoder = Options::default()
.with_find_contours(true)
.with_sam_kind(SamKind::Sam)
.with_model("sam/mobile-sam-vit-t-decoder.onnx")?;
let (options_encoder, options_decoder) = (
Options::mobile_sam_tiny_encoder().commit()?,
Options::mobile_sam_tiny_decoder().commit()?,
);
let mut sam = SAM::new(options_encoder, options_decoder)?;
// build YOLOv8-Det
let options_yolo = Options::default()
.with_yolo_version(YOLOVersion::V8)
.with_yolo_task(YOLOTask::Detect)
.with_model("yolo/v8-m-dyn.onnx")?
.with_cuda(0)
.with_ixx(0, 2, (416, 640, 800).into())
.with_ixx(0, 3, (416, 640, 800).into())
.with_find_contours(false)
.with_confs(&[0.45]);
// build YOLOv8
let options_yolo = Options::yolo_detect()
.with_model_scale(Scale::N)
.with_model_version(8.0.into())
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut yolo = YOLO::new(options_yolo)?;
// load one image
let xs = [DataLoader::try_read("images/dog.jpg")?];
let xs = DataLoader::try_read_batch(&["images/dog.jpg"])?;
// build annotator
let annotator = Annotator::default()
@@ -36,11 +47,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_saveout("YOLO-SAM");
// run & annotate
let ys_det = yolo.run(&xs)?;
for y_det in ys_det {
let ys_det = yolo.forward(&xs)?;
for y_det in ys_det.iter() {
if let Some(bboxes) = y_det.bboxes() {
for bbox in bboxes {
let ys_sam = sam.run(
let ys_sam = sam.forward(
&xs,
&[SamPrompt::default().with_bbox(
bbox.xmin(),

View File

@@ -1,175 +1,65 @@
<h1 align='center'>YOLO-Series</h1>
| Detection | Instance Segmentation | Pose |
| :----------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------: |
| `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-det.png' width="300px">` | `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-seg.png' width="300px">` | `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-pose.png' width="300px">` |
| Detection | Instance Segmentation | Pose |
| :---------------: | :------------------------: |:---------------: |
| <img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-det.png' width="300px"> | <img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-seg.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-pose.png' width="300px"> |
| Classification | Obb |
| :------------------------: |:------------------------: |
|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-cls.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-obb-2.png' width="628px">
| Head Detection | Fall Detection | Trash Detection |
| :------------------------: |:------------------------: |:------------------------: |
|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-head.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-falldown.png' width="300px">|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-trash.png' width="300px">
| YOLO-World | Face Parsing | FastSAM |
| :------------------------: |:------------------------: |:------------------------: |
|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-yolov8-world.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-face-parsing.png' width="300px">|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-fastsam.png' width="300px">
| Classification | Obb |
| :----------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
| `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-cls.png' width="300px">` | `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-obb-2.png' width="628px">` |
| Head Detection | Fall Detection | Trash Detection |
| :-----------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
| `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-head.png' width="300px">` | `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-falldown.png' width="300px">` | `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-trash.png' width="300px">` |
| YOLO-World | Face Parsing | FastSAM |
| :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: |
| `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-yolov8-world.png' width="300px">` | `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-face-parsing.png' width="300px">` | `<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-fastsam.png' width="300px">` |
## Quick Start
```Shell
# customized
cargo run -r --example yolo -- --task detect --ver v8 --nc 6 --model xxx.onnx # YOLOv8
# Your customized YOLOv8 model
cargo run -r --example yolo -- --task detect --ver v8 --num-classes 6 --model xxx.onnx # YOLOv8
# Classify
cargo run -r --example yolo -- --task classify --ver v5 --scale s --width 224 --height 224 --nc 1000 # YOLOv5
cargo run -r --example yolo -- --task classify --ver v8 --scale n --width 224 --height 224 --nc 1000 # YOLOv8
cargo run -r --example yolo -- --task classify --ver v11 --scale n --width 224 --height 224 --nc 1000 # YOLOv11
cargo run -r --example yolo -- --task classify --ver 5 --scale s --image-width 224 --image-height 224 --num-classes 1000 --use-imagenet-1k-classes # YOLOv5
cargo run -r --example yolo -- --task classify --ver 8 --scale n --image-width 224 --image-height 224 # YOLOv8
cargo run -r --example yolo -- --task classify --ver 11 --scale n --image-width 224 --image-height 224 # YOLOv11
# Detect
cargo run -r --example yolo -- --task detect --ver v5 --scale n # YOLOv5
cargo run -r --example yolo -- --task detect --ver v6 --scale n # YOLOv6
cargo run -r --example yolo -- --task detect --ver v7 --scale t # YOLOv7
cargo run -r --example yolo -- --task detect --ver v8 --scale n # YOLOv8
cargo run -r --example yolo -- --task detect --ver v9 --scale t # YOLOv9
cargo run -r --example yolo -- --task detect --ver v10 --scale n # YOLOv10
cargo run -r --example yolo -- --task detect --ver v11 --scale n # YOLOv11
cargo run -r --example yolo -- --task detect --ver rtdetr --scale l # RTDETR
cargo run -r --example yolo -- --task detect --ver v8 --model yolo/v8-s-world-v2-shoes.onnx # YOLOv8-world
cargo run -r --example yolo -- --task detect --ver 5 --scale n --use-coco-80-classes # YOLOv5
cargo run -r --example yolo -- --task detect --ver 6 --scale n --use-coco-80-classes # YOLOv6
cargo run -r --example yolo -- --task detect --ver 7 --scale t --use-coco-80-classes # YOLOv7
cargo run -r --example yolo -- --task detect --ver 8 --scale n --use-coco-80-classes # YOLOv8
cargo run -r --example yolo -- --task detect --ver 9 --scale t --use-coco-80-classes # YOLOv9
cargo run -r --example yolo -- --task detect --ver 10 --scale n --use-coco-80-classes # YOLOv10
cargo run -r --example yolo -- --task detect --ver 11 --scale n --use-coco-80-classes # YOLOv11
cargo run -r --example yolo -- --task detect --ver 8 --model v8-s-world-v2-shoes.onnx # YOLOv8-world
# Pose
cargo run -r --example yolo -- --task pose --ver v8 --scale n # YOLOv8-Pose
cargo run -r --example yolo -- --task pose --ver v11 --scale n # YOLOv11-Pose
cargo run -r --example yolo -- --task pose --ver 8 --scale n # YOLOv8-Pose
cargo run -r --example yolo -- --task pose --ver 11 --scale n # YOLOv11-Pose
# Segment
cargo run -r --example yolo -- --task segment --ver v5 --scale n # YOLOv5-Segment
cargo run -r --example yolo -- --task segment --ver v8 --scale n # YOLOv8-Segment
cargo run -r --example yolo -- --task segment --ver v11 --scale n # YOLOv8-Segment
cargo run -r --example yolo -- --task segment --ver v8 --model yolo/FastSAM-s-dyn-f16.onnx # FastSAM
cargo run -r --example yolo -- --task segment --ver 5 --scale n # YOLOv5-Segment
cargo run -r --example yolo -- --task segment --ver 8 --scale n # YOLOv8-Segment
cargo run -r --example yolo -- --task segment --ver 11 --scale n # YOLOv8-Segment
# Obb
cargo run -r --example yolo -- --ver v8 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv8-Obb
cargo run -r --example yolo -- --ver v11 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv11-Obb
cargo run -r --example yolo -- --ver 8 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv8-Obb
cargo run -r --example yolo -- --ver 11 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv11-Obb
```
**`cargo run -r --example yolo -- --help` for more options**
## YOLOs configs with `Options`
<details open>
<summary>Use official YOLO Models</summary>
```Rust
let options = Options::default()
.with_yolo_version(YOLOVersion::V5) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
.with_yolo_task(YOLOTask::Classify) // YOLOTask: Classify, Detect, Pose, Segment, Obb
.with_model("xxxx.onnx")?;
```
</details>
<details open>
<summary>Cutomized your own YOLO model</summary>
```Rust
// This config is for YOLOv8-Segment
use usls::{AnchorsPosition, BoxType, ClssType, YOLOPreds};
let options = Options::default()
.with_yolo_preds(
YOLOPreds {
bbox: Some(BoxType::Cxcywh),
clss: ClssType::Clss,
coefs: Some(true),
anchors: Some(AnchorsPosition::After),
..Default::default()
}
)
// .with_nc(80)
// .with_names(&COCO_CLASS_NAMES_80)
.with_model("xxxx.onnx")?;
```
</details>
## Other YOLOv8 Solution Models
| Model | Weights | Datasets|
|:---------------------: | :--------------------------: | :-------------------------------: |
| Face-Landmark Detection | [yolov8-face-dyn-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-n-face-dyn-f16.onnx) | |
| Head Detection | [yolov8-head-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-head-f16.onnx) | |
| Fall Detection | [yolov8-falldown-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-falldown-f16.onnx) | |
| Trash Detection | [yolov8-plastic-bag-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-plastic-bag-f16.onnx) | |
| FaceParsing | [yolov8-face-parsing-dyn](https://github.com/jamjamjon/assets/releases/download/yolo/v8-face-parsing-dyn.onnx) | [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ/tree/master/face_parsing)<br />[[Processed YOLO labels]](https://github.com/jamjamjon/assets/releases/download/yolo/CelebAMask-HQ-YOLO-Labels.zip)[[Python Script]](../../scripts/CelebAMask-HQ-To-YOLO-Labels.py) |
## Export ONNX Models
<details close>
<summary>YOLOv5</summary>
[Here](https://docs.ultralytics.com/yolov5/tutorials/model_export/)
</details>
<details close>
<summary>YOLOv6</summary>
[Here](https://github.com/meituan/YOLOv6/tree/main/deploy/ONNX)
</details>
<details close>
<summary>YOLOv7</summary>
[Here](https://github.com/WongKinYiu/yolov7?tab=readme-ov-file#export)
</details>
<details close>
<summary>YOLOv8, YOLOv11</summary>
```Shell
pip install -U ultralytics
# export onnx model with dynamic shapes
yolo export model=yolov8m.pt format=onnx simplify dynamic
yolo export model=yolov8m-cls.pt format=onnx simplify dynamic
yolo export model=yolov8m-pose.pt format=onnx simplify dynamic
yolo export model=yolov8m-seg.pt format=onnx simplify dynamic
yolo export model=yolov8m-obb.pt format=onnx simplify dynamic
# export onnx model with fixed shapes
yolo export model=yolov8m.pt format=onnx simplify
yolo export model=yolov8m-cls.pt format=onnx simplify
yolo export model=yolov8m-pose.pt format=onnx simplify
yolo export model=yolov8m-seg.pt format=onnx simplify
yolo export model=yolov8m-obb.pt format=onnx simplify
```
</details>
<details close>
<summary>YOLOv9</summary>
[Here](https://github.com/WongKinYiu/yolov9/blob/main/export.py)
</details>
<details close>
<summary>YOLOv10</summary>
[Here](https://github.com/THU-MIG/yolov10#export)
</details>
| Model | Weights |
| :---------------------: | :------------------------------------------------------: |
| Face-Landmark Detection | [yolov8-n-face](https://github.com/jamjamjon/assets/releases/download/yolo/v8-n-face-fp16.onnx) |
| Head Detection | [yolov8-head](https://github.com/jamjamjon/assets/releases/download/yolo/v8-head-fp16.onnx) |
| Fall Detection | [yolov8-falldown](https://github.com/jamjamjon/assets/releases/download/yolo/v8-falldown-fp16.onnx) |
| Trash Detection | [yolov8-plastic-bag](https://github.com/jamjamjon/assets/releases/download/yolo/v8-plastic-bag-fp16.onnx) |
| FaceParsing | [yolov8-face-parsing-seg](https://github.com/jamjamjon/assets/releases/download/yolo/v8-face-parsing.onnx) |

View File

@@ -1,171 +1,213 @@
use anyhow::Result;
use clap::Parser;
use usls::{
models::YOLO, Annotator, DataLoader, Device, Options, Viewer, Vision, YOLOScale, YOLOTask,
YOLOVersion, COCO_SKELETONS_16,
models::YOLO, Annotator, DataLoader, Options, COCO_CLASS_NAMES_80, COCO_SKELETONS_16,
IMAGENET_NAMES_1K,
};
#[derive(Parser, Clone)]
#[command(author, version, about, long_about = None)]
pub struct Args {
/// Path to the model
#[arg(long)]
pub model: Option<String>,
#[derive(argh::FromArgs, Debug)]
/// Example
struct Args {
/// model file
#[argh(option)]
model: Option<String>,
/// Input source path
#[arg(long, default_value_t = String::from("./assets/bus.jpg"))]
pub source: String,
/// source
#[argh(option, default = "String::from(\"./assets/bus.jpg\")")]
source: String,
/// YOLO Task
#[arg(long, value_enum, default_value_t = YOLOTask::Detect)]
pub task: YOLOTask,
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// YOLO Version
#[arg(long, value_enum, default_value_t = YOLOVersion::V8)]
pub ver: YOLOVersion,
/// task
#[argh(option, default = "String::from(\"det\")")]
task: String,
/// YOLO Scale
#[arg(long, value_enum, default_value_t = YOLOScale::N)]
pub scale: YOLOScale,
/// version
#[argh(option, default = "8.0")]
ver: f32,
/// Batch size
#[arg(long, default_value_t = 1)]
pub batch_size: usize,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// Minimum input width
#[arg(long, default_value_t = 224)]
pub width_min: isize,
/// scale
#[argh(option, default = "String::from(\"n\")")]
scale: String,
/// Input width
#[arg(long, default_value_t = 640)]
pub width: isize,
/// trt_fp16
#[argh(option, default = "true")]
trt_fp16: bool,
/// Maximum input width
#[arg(long, default_value_t = 1024)]
pub width_max: isize,
/// find_contours
#[argh(option, default = "true")]
find_contours: bool,
/// Minimum input height
#[arg(long, default_value_t = 224)]
pub height_min: isize,
/// batch_size
#[argh(option, default = "1")]
batch_size: usize,
/// Input height
#[arg(long, default_value_t = 640)]
pub height: isize,
/// min_batch_size
#[argh(option, default = "1")]
min_batch_size: usize,
/// Maximum input height
#[arg(long, default_value_t = 1024)]
pub height_max: isize,
/// max_batch_size
#[argh(option, default = "4")]
max_batch_size: usize,
/// Number of classes
#[arg(long, default_value_t = 80)]
pub nc: usize,
/// min_image_width
#[argh(option, default = "224")]
min_image_width: isize,
/// Class confidence
#[arg(long)]
pub confs: Vec<f32>,
/// image_width
#[argh(option, default = "640")]
image_width: isize,
/// Enable TensorRT support
#[arg(long)]
pub trt: bool,
/// max_image_width
#[argh(option, default = "1280")]
max_image_width: isize,
/// Enable CUDA support
#[arg(long)]
pub cuda: bool,
/// min_image_height
#[argh(option, default = "224")]
min_image_height: isize,
/// Enable CoreML support
#[arg(long)]
pub coreml: bool,
/// image_height
#[argh(option, default = "640")]
image_height: isize,
/// Use TensorRT half precision
#[arg(long)]
pub half: bool,
/// max_image_height
#[argh(option, default = "1280")]
max_image_height: isize,
/// Device ID to use
#[arg(long, default_value_t = 0)]
pub device_id: usize,
/// num_classes
#[argh(option)]
num_classes: Option<usize>,
/// Enable performance profiling
#[arg(long)]
pub profile: bool,
/// num_keypoints
#[argh(option)]
num_keypoints: Option<usize>,
/// Disable contour drawing
#[arg(long)]
pub no_contours: bool,
/// use_coco_80_classes
#[argh(switch)]
use_coco_80_classes: bool,
/// Show result
#[arg(long)]
pub view: bool,
/// use_imagenet_1k_classes
#[argh(switch)]
use_imagenet_1k_classes: bool,
/// Do not save output
#[arg(long)]
pub nosave: bool,
/// confs
#[argh(option)]
confs: Vec<f32>,
/// keypoint_confs
#[argh(option)]
keypoint_confs: Vec<f32>,
/// exclude_classes
#[argh(option)]
exclude_classes: Vec<usize>,
/// retain_classes
#[argh(option)]
retain_classes: Vec<usize>,
/// class_names
#[argh(option)]
class_names: Vec<String>,
/// keypoint_names
#[argh(option)]
keypoint_names: Vec<String>,
}
fn main() -> Result<()> {
let args = Args::parse();
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// model path
let path = match &args.model {
None => format!(
"yolo/{}-{}-{}.onnx",
args.ver.name(),
args.scale.name(),
args.task.name()
),
Some(x) => x.to_string(),
};
let args: Args = argh::from_env();
// saveout
let saveout = match &args.model {
None => format!(
"{}-{}-{}",
args.ver.name(),
args.scale.name(),
args.task.name()
),
Some(x) => {
let p = std::path::PathBuf::from(&x);
p.file_stem().unwrap().to_str().unwrap().to_string()
}
};
// device
let device = if args.cuda {
Device::Cuda(args.device_id)
} else if args.trt {
Device::Trt(args.device_id)
} else if args.coreml {
Device::CoreML(args.device_id)
} else {
Device::Cpu(args.device_id)
};
// build options
let options = Options::new()
.with_model(&path)?
.with_yolo_version(args.ver)
.with_yolo_task(args.task)
.with_device(device)
.with_trt_fp16(args.half)
.with_ixx(0, 0, (1, args.batch_size as _, 4).into())
.with_ixx(0, 2, (args.height_min, args.height, args.height_max).into())
.with_ixx(0, 3, (args.width_min, args.width, args.width_max).into())
.with_confs(if args.confs.is_empty() {
let mut options = Options::yolo()
.with_model_file(&args.model.unwrap_or_default())
.with_model_task(args.task.as_str().try_into()?)
.with_model_version(args.ver.into())
.with_model_scale(args.scale.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_trt_fp16(args.trt_fp16)
.with_model_ixx(
0,
0,
(args.min_batch_size, args.batch_size, args.max_batch_size).into(),
)
.with_model_ixx(
0,
2,
(
args.min_image_height,
args.image_height,
args.max_image_height,
)
.into(),
)
.with_model_ixx(
0,
3,
(args.min_image_width, args.image_width, args.max_image_width).into(),
)
.with_class_confs(if args.confs.is_empty() {
&[0.2, 0.15]
} else {
&args.confs
})
.with_nc(args.nc)
// .with_names(&COCO_CLASS_NAMES_80)
// .with_names2(&COCO_KEYPOINTS_17)
.with_find_contours(!args.no_contours) // find contours or not
.exclude_classes(&[0])
// .retain_classes(&[0, 5])
.with_profile(args.profile);
.with_keypoint_confs(if args.keypoint_confs.is_empty() {
&[0.5]
} else {
&args.keypoint_confs
})
.with_find_contours(args.find_contours)
.retain_classes(&args.retain_classes)
.exclude_classes(&args.exclude_classes);
if args.use_coco_80_classes {
options = options.with_class_names(&COCO_CLASS_NAMES_80);
}
if args.use_imagenet_1k_classes {
options = options.with_class_names(&IMAGENET_NAMES_1K);
}
if let Some(nc) = args.num_classes {
options = options.with_nc(nc);
}
if let Some(nk) = args.num_keypoints {
options = options.with_nk(nk);
}
if !args.class_names.is_empty() {
options = options.with_class_names(
&args
.class_names
.iter()
.map(|x| x.as_str())
.collect::<Vec<_>>(),
);
}
if !args.keypoint_names.is_empty() {
options = options.with_keypoint_names(
&args
.keypoint_names
.iter()
.map(|x| x.as_str())
.collect::<Vec<_>>(),
);
}
// build model
let mut model = YOLO::new(options)?;
let mut model = YOLO::try_from(options.commit()?)?;
// build dataloader
let dl = DataLoader::new(&args.source)?
@@ -175,56 +217,28 @@ fn main() -> Result<()> {
// build annotator
let annotator = Annotator::default()
.with_skeletons(&COCO_SKELETONS_16)
.without_masks(true) // No masks plotting when doing segment task.
.without_masks(true)
.with_bboxes_thickness(3)
.with_keypoints_name(false) // Enable keypoints names
.with_saveout_subs(&["YOLO"])
.with_saveout(&saveout);
// build viewer
let mut viewer = if args.view {
Some(Viewer::new().with_delay(5).with_scale(1.).resizable(true))
} else {
None
};
.with_saveout(model.spec());
// run & annotate
for (xs, _paths) in dl {
// let ys = model.run(&xs)?; // way one
let ys = model.forward(&xs, args.profile)?; // way two
let images_plotted = annotator.plot(&xs, &ys, !args.nosave)?;
let ys = model.forward(&xs)?;
// extract bboxes
// for y in ys.iter() {
// if let Some(bboxes) = y.bboxes() {
// println!("[Bboxes]: Found {} objects", bboxes.len());
// for (i, bbox) in bboxes.iter().enumerate() {
// println!("{}: {:?}", i, bbox)
// }
// }
// }
// show image
match &mut viewer {
Some(viewer) => viewer.imshow(&images_plotted)?,
None => continue,
}
// check out window and key event
match &mut viewer {
Some(viewer) => {
if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) {
break;
}
}
None => continue,
}
// write video
if !args.nosave {
match &mut viewer {
Some(viewer) => viewer.write_batch(&images_plotted)?,
None => continue,
}
}
// plot
annotator.annotate(&xs, &ys);
}
// finish video write
if !args.nosave {
if let Some(viewer) = &mut viewer {
viewer.finish_write()?;
}
}
model.summary();
Ok(())
}

View File

@@ -1,22 +1,26 @@
use anyhow::Result;
use usls::{models::YOLOPv2, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// build model
let options = Options::default()
.with_model("yolop/v2-dyn-480x800.onnx")?
.with_confs(&[0.3]);
let options = Options::yolop_v2_480x800().commit()?;
let mut model = YOLOPv2::new(options)?;
// load image
let x = [DataLoader::try_read("images/car.jpg")?];
let x = [DataLoader::try_read("images/car-view.jpg")?];
// run
let y = model.run(&x)?;
let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.with_polygons_name(true)
.with_saveout("YOLOPv2");
.with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())

View File

@@ -0,0 +1,9 @@
## Quick Start
```shell
cargo run -r -F cuda --example yolov8-rtdetr -- --device cuda
```
```shell
Ys([Y { BBoxes: [Bbox { xyxy: [668.71356, 395.4159, 809.01587, 879.3043], class_id: 0, name: Some("person"), confidence: 0.950527 }, Bbox { xyxy: [48.866394, 399.50665, 248.22641, 904.7525], class_id: 0, name: Some("person"), confidence: 0.9504415 }, Bbox { xyxy: [20.197449, 230.00304, 805.026, 730.3445], class_id: 5, name: Some("bus"), confidence: 0.94705224 }, Bbox { xyxy: [221.3088, 405.65436, 345.44052, 860.2628], class_id: 0, name: Some("person"), confidence: 0.93062377 }, Bbox { xyxy: [0.34117508, 549.8391, 76.50758, 868.87646], class_id: 0, name: Some("person"), confidence: 0.71064234 }, Bbox { xyxy: [282.12543, 484.14166, 296.43207, 520.96246], class_id: 27, name: Some("tie"), confidence: 0.40305245 }] }])
```

View File

@@ -0,0 +1,45 @@
use anyhow::Result;
use usls::{models::YOLO, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let config = Options::yolo_v8_rtdetr_l()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = YOLO::new(config)?;
// load images
let xs = DataLoader::try_read_batch(&["./assets/bus.jpg"])?;
// run
let ys = model.forward(&xs)?;
println!("{:?}", ys);
// annotate
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}