0.0.14: DataLoader now support video and streaming

- Added `Hub` for resource management
- Updated `DataLoader` to support video and streaming
- Updated `CI`
- Replaced `println!` with `tracing` for logging
This commit is contained in:
Jamjamjon
2024-09-16 10:41:16 +08:00
committed by GitHub
parent 826da4037e
commit 0adddd3bbd
82 changed files with 1739 additions and 583 deletions

View File

@ -1,78 +1,203 @@
name: Rust
name: Rust-CI
on:
push:
branches: [ "main", "dev" ]
branches: [ "main", "dev", "develop", "x", "xy" , "xyz" ]
pull_request:
branches: [ "main" ]
env:
CARGO_TERM_COLOR: always
jobs:
check:
name: Check
runs-on: ${{ matrix.os }}
build-on-linux:
name: build / linux / ffmpeg ${{ matrix.ffmpeg_version }}
runs-on: ubuntu-latest
container: jrottenberg/ffmpeg:${{ matrix.ffmpeg_version }}-ubuntu
strategy:
matrix:
os: [ubuntu-latest, macOS-latest, windows-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust }}
override: true
- uses: actions-rs/cargo@v1
with:
command: check
args: --workspace --examples
ffmpeg_version: ["4.3", "4.4", "5.0", "5.1", "6.0", "6.1", "7.0"]
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install dependencies
run: |
apt update
apt install -y --no-install-recommends clang curl pkg-config
- name: Setup Rust
uses: dtolnay/rust-toolchain@v1
with:
toolchain: stable
- name: Build
run: cargo build
build-on-macos:
name: build / macos / ffmpeg latest
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install dependencies
run: |
brew install ffmpeg pkg-config
- name: Setup Rust
uses: dtolnay/rust-toolchain@v1
with:
toolchain: stable
- name: Build
run: cargo build
build-on-windows:
name: build / windows / ffmpeg latest
runs-on: windows-latest
env:
FFMPEG_DOWNLOAD_URL: https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-full-shared.7z
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install dependencies
run: |
$VCINSTALLDIR = $(& "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath)
Add-Content $env:GITHUB_ENV "LIBCLANG_PATH=${VCINSTALLDIR}\VC\Tools\LLVM\x64\bin`n"
Invoke-WebRequest "${env:FFMPEG_DOWNLOAD_URL}" -OutFile ffmpeg-release-full-shared.7z
7z x ffmpeg-release-full-shared.7z
mkdir ffmpeg
mv ffmpeg-*/* ffmpeg/
Add-Content $env:GITHUB_ENV "FFMPEG_DIR=${pwd}\ffmpeg`n"
Add-Content $env:GITHUB_PATH "${pwd}\ffmpeg\bin`n"
- name: Setup Rust
uses: dtolnay/rust-toolchain@v1
with:
toolchain: stable
- name: Build
run: cargo build
test-on-linux:
name: test / linux / ffmpeg ${{ matrix.ffmpeg_version }}
runs-on: ubuntu-latest
container: jrottenberg/ffmpeg:${{ matrix.ffmpeg_version }}-ubuntu
test:
name: Test
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macOS-latest, windows-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust }}
override: true
- uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --examples
ffmpeg_version: ["4.3", "4.4", "5.0", "5.1", "6.0", "6.1", "7.0"]
fail-fast: false
fmt:
name: Rustfmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add rustfmt
- uses: actions-rs/cargo@v1
with:
command: fmt
args: --all -- --check
- name: Checkout
uses: actions/checkout@v3
clippy:
name: Clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
- name: Install dependencies
run: |
apt update
apt install -y --no-install-recommends clang curl pkg-config
- name: Setup Rust
uses: dtolnay/rust-toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add clippy
- uses: actions-rs/cargo@v1
- name: Run Tests with All Features
run: cargo test --all-features
- name: Run Tests in Release Mode
run: cargo test --release
test-on-macos:
name: test / macos / ffmpeg latest
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install dependencies
run: |
brew install ffmpeg pkg-config
- name: Setup Rust
uses: dtolnay/rust-toolchain@v1
with:
command: clippy
args: --workspace --tests --examples --all-targets --all-features -- -Dwarnings
toolchain: stable
- name: Run Tests with All Features
run: cargo test --all-features
- name: Run Tests in Release Mode
run: cargo test --release
test-on-windows:
name: test / windows / ffmpeg latest
runs-on: windows-latest
env:
FFMPEG_DOWNLOAD_URL: https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-full-shared.7z
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install dependencies
run: |
$VCINSTALLDIR = $(& "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath)
Add-Content $env:GITHUB_ENV "LIBCLANG_PATH=${VCINSTALLDIR}\VC\Tools\LLVM\x64\bin`n"
Invoke-WebRequest "${env:FFMPEG_DOWNLOAD_URL}" -OutFile ffmpeg-release-full-shared.7z
7z x ffmpeg-release-full-shared.7z
mkdir ffmpeg
mv ffmpeg-*/* ffmpeg/
Add-Content $env:GITHUB_ENV "FFMPEG_DIR=${pwd}\ffmpeg`n"
Add-Content $env:GITHUB_PATH "${pwd}\ffmpeg\bin`n"
- name: Setup Rust
uses: dtolnay/rust-toolchain@v1
with:
toolchain: stable
- name: Run Tests with All Features
run: cargo test --all-features
- name: Run Tests in Release Mode
run: cargo test --release
lints:
runs-on: ubuntu-latest
container: jrottenberg/ffmpeg:6-ubuntu
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install dependencies
run: |
apt update
apt install -y --no-install-recommends clang curl pkg-config
- name: Setup Rust
uses: dtolnay/rust-toolchain@v1
with:
toolchain: stable
components: rustfmt, clippy
- name: Rustfmt
run: cargo fmt --all -- --check
- name: Clippy
run: cargo clippy --all --all-targets --all-features -- -D warnings

View File

@ -1,6 +1,6 @@
[package]
name = "usls"
version = "0.0.13"
version = "0.0.14"
edition = "2021"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls"
@ -22,7 +22,7 @@ dirs = { version = "5.0.1" }
ureq = { version = "2.9.1", default-features = true, features = [
"socks-proxy",
] }
walkdir = { version = "2.5.0" }
walkdir = { version = "2.5.0" } # TODO: remove
tokenizers = { version = "0.15.2" }
rayon = "1.10.0"
indicatif = "0.17.8"
@ -32,6 +32,13 @@ ab_glyph = "0.2.23"
geo = "0.28.0"
prost = "0.12.4"
fast_image_resize = { version = "4.2.1", features = ["image"]}
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tempfile = "3.12.0"
video-rs = { version = "0.9.0", features = ["ndarray"] }
natord = "1.0.9"
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
[features]

View File

@ -98,7 +98,7 @@ You have two options to link the ONNXRuntime library
## 🎈 Quick Start
## 🎈 Demo
```Shell
cargo run -r --example yolo # blip, clip, yolop, svtr, db, ...
@ -106,16 +106,85 @@ cargo run -r --example yolo # blip, clip, yolop, svtr, db, ...
## 🥂 Integrate Into Your Own Project
Add `usls` as a dependency to your project's `Cargo.toml`
```Shell
cargo add usls
```
- #### Add `usls` as a dependency to your project's `Cargo.toml`
```Shell
cargo add usls
```
Or use a specific commit:
```Toml
[dependencies]
usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" }
```
- #### Follow the pipeline
- Build model with the provided `models` and `Options`
- Load images, video and stream with `DataLoader`
- Do inference
- Annotate inference results with `Annotator`
- Retrieve inference results from `Vec<Y>`
```rust
use usls::{models::YOLO, Annotator, DataLoader, Nms, Options, Vision, YOLOTask, YOLOVersion};
fn main() -> anyhow::Result<()> {
// Build model with Options
let options = Options::new()
.with_trt(0)
.with_model("yolo/v8-m-dyn.onnx")?
.with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
.with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
.with_i00((1, 2, 4).into())
.with_i02((0, 640, 640).into())
.with_i03((0, 640, 640).into())
.with_confs(&[0.2]);
let mut model = YOLO::new(options)?;
// Build DataLoader to load image(s), video, stream
let dl = DataLoader::new(
// "./assets/bus.jpg", // local image
// "images/bus.jpg", // remote image
// "../images-folder", // local images (from folder)
// "../demo.mp4", // local video
// "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
"rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream
)?
.with_batch(2) // iterate with batch_size = 2
.build()?;
// Build annotator
let annotator = Annotator::new()
.with_bboxes_thickness(4)
.with_saveout("YOLO-DataLoader");
// Run and annotate results
for (xs, _) in dl {
let ys = model.forward(&xs, false)?;
annotator.annotate(&xs, &ys);
// Retrieve inference results
for y in ys {
// bboxes
if let Some(bboxes) = y.bboxes() {
for bbox in bboxes {
println!(
"Bbox: {}, {}, {}, {}, {}, {}",
bbox.xmin(),
bbox.ymin(),
bbox.xmax(),
bbox.ymax(),
bbox.confidence(),
bbox.id(),
);
}
}
}
}
Ok(())
}
```
Or use a specific commit:
```Toml
[dependencies]
usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" }
```
## 📌 License
This project is licensed under [LICENSE](LICENSE).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 217 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 680 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 340 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 408 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 265 KiB

View File

@ -1,7 +1,7 @@
use anyhow::Result;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use usls::{models::YOLO, DataLoader, Options, Vision, YOLOTask, YOLOVersion, COCO_KEYPOINTS_17};
use usls::{models::YOLO, DataLoader, Options, Vision, YOLOTask, YOLOVersion};
enum Stage {
Pre,
@ -52,18 +52,17 @@ pub fn benchmark_cuda(c: &mut Criterion, h: isize, w: isize) -> Result<()> {
let options = Options::default()
.with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
.with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
.with_model("yolov8m-dyn.onnx")?
.with_model("yolo/v8-m-dyn.onnx")?
.with_cuda(0)
// .with_cpu()
.with_dry_run(0)
.with_i00((1, 1, 4).into())
.with_i02((320, h, 1280).into())
.with_i03((320, w, 1280).into())
.with_confs(&[0.2, 0.15]) // class_0: 0.4, others: 0.15
.with_names2(&COCO_KEYPOINTS_17);
.with_confs(&[0.2, 0.15]);
let mut model = YOLO::new(options)?;
let xs = vec![DataLoader::try_read("./assets/bus.jpg")?];
let xs = [DataLoader::try_read("./assets/bus.jpg")?];
group.bench_function("pre-process", |b| {
b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Pre, n))

View File

@ -6,17 +6,11 @@ This demo shows how to use [BLIP](https://arxiv.org/abs/2201.12086) to do condit
cargo run -r --example blip
```
## BLIP ONNX Model
- [blip-visual-base](https://github.com/jamjamjon/assets/releases/download/v0.0.1/blip-visual-base.onnx)
- [blip-textual-base](https://github.com/jamjamjon/assets/releases/download/v0.0.1/blip-textual-base.onnx)
## Results
```shell
[Unconditional image captioning]: a group of people walking around a bus
[Conditional image captioning]: three man walking in front of a bus
[Unconditional]: a group of people walking around a bus
[Conditional]: three man walking in front of a bus
Some(["three man walking in front of a bus"])
```

View File

@ -3,14 +3,14 @@ use usls::{models::Blip, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options_visual = Options::default()
.with_model("blip-visual-base.onnx")?
.with_model("blip/visual-base.onnx")?
.with_i00((1, 1, 4).into())
.with_profile(false);
// textual
let options_textual = Options::default()
.with_model("blip-textual-base.onnx")?
// .with_tokenizer("tokenizer-blip.json")?
.with_model("blip/textual-base.onnx")?
// .with_tokenizer("blip/tokenizer.json")?
.with_i00((1, 1, 4).into()) // input_id: batch
.with_i01((1, 1, 4).into()) // input_id: seq_len
.with_i10((1, 1, 4).into()) // attention_mask: batch
@ -23,7 +23,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut model = Blip::new(options_visual, options_textual)?;
// image caption (this demo use batch_size=1)
let xs = vec![DataLoader::try_read("./assets/bus.jpg")?];
let xs = [DataLoader::try_read("images/bus.jpg")?];
let image_embeddings = model.encode_images(&xs)?;
let _y = model.caption(&image_embeddings, None, true)?; // unconditional
let y = model.caption(&image_embeddings, Some("three man"), true)?; // conditional

View File

@ -6,12 +6,6 @@ This demo showcases how to use [CLIP](https://github.com/openai/CLIP) to compute
cargo run -r --example clip
```
## CLIP ONNX Model
- [clip-b32-visual](https://github.com/jamjamjon/assets/releases/download/v0.0.1/clip-b32-visual.onnx)
- [clip-b32-textual](https://github.com/jamjamjon/assets/releases/download/v0.0.1/clip-b32-textual.onnx)
## Results
```shell

View File

@ -3,14 +3,14 @@ use usls::{models::Clip, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options_visual = Options::default()
.with_model("clip-b32-visual-dyn.onnx")?
.with_model("clip/visual-base-dyn.onnx")?
.with_i00((1, 1, 4).into())
.with_profile(false);
// textual
let options_textual = Options::default()
.with_model("clip-b32-textual-dyn.onnx")?
// .with_tokenizer("tokenizer-clip.json")?
.with_model("clip/textual-base-dyn.onnx")?
.with_tokenizer("clip/tokenizer.json")?
.with_i00((1, 1, 4).into())
.with_profile(false);
@ -30,9 +30,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let feats_text = model.encode_texts(&texts)?; // [n, ndim]
// load image
let dl = DataLoader::default()
.with_batch(model.batch_visual())
.load("./examples/clip/images")?;
let dl = DataLoader::new("./examples/clip/images")?.build()?;
// loop
for (images, paths) in dl {

View File

@ -0,0 +1,49 @@
use usls::{models::YOLO, Annotator, DataLoader, Options, Vision, YOLOTask, YOLOVersion};
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::ERROR)
.init();
let options = Options::new()
.with_cuda(0)
.with_model("yolo/v8-m-dyn.onnx")?
.with_yolo_version(YOLOVersion::V8)
.with_yolo_task(YOLOTask::Detect)
.with_i00((1, 1, 4).into())
.with_i02((0, 640, 640).into())
.with_i03((0, 640, 640).into())
.with_confs(&[0.2]);
let mut model = YOLO::new(options)?;
// build dataloader
let dl = DataLoader::new(
"./assets/bus.jpg", // local image
// "images/bus.jpg", // remote image
// "../images", // image folder
// "../demo.mp4", // local video
// "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
// "rtsp://admin:xyz@192.168.2.217:554/h265/ch1/", // rtsp h264 stream
)?
.with_batch(1)
.with_progress_bar(true)
.with_bound(100)
.build()?;
// // build annotator
let annotator = Annotator::new()
.with_bboxes_thickness(4)
.with_saveout("YOLO-DataLoader");
// run
for (xs, _) in dl {
// std::thread::sleep(std::time::Duration::from_millis(1000));
let ys = model.forward(&xs, false)?;
annotator.annotate(&xs, &ys);
}
// images -> video
// DataLoader::is2v("runs/YOLO-DataLoader", &["runs", "is2v"], 24)?;
Ok(())
}

View File

@ -4,11 +4,6 @@
cargo run -r --example db
```
## ONNX Model
- [ppocr-v3-db-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v3-db-dyn.onnx)
- [ppocr-v4-db-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v4-db-dyn.onnx)
### Speed test
| Model | Image size | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
@ -20,4 +15,5 @@ cargo run -r --example db
## Results
![](./demo.png)
![](https://github.com/jamjamjon/assets/releases/download/db/demo-paper.png)
![](https://github.com/jamjamjon/assets/releases/download/db/demo-sign.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

View File

@ -10,14 +10,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_confs(&[0.4])
.with_min_width(5.0)
.with_min_height(12.0)
.with_model("ppocr-v4-db-dyn.onnx")?;
.with_model("db/ppocr-v4-db-dyn.onnx")?;
let mut model = DB::new(options)?;
// load image
let x = vec![
DataLoader::try_read("./assets/db.png")?,
DataLoader::try_read("./assets/2.jpg")?,
let x = [
DataLoader::try_read("images/db.png")?,
DataLoader::try_read("images/street.jpg")?,
];
// run

View File

@ -4,14 +4,7 @@
cargo run -r --example depth-anything
```
## ONNX Model
- [depth-anything-s-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-s-dyn.onnx)
- [depth-anything-b-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-b-dyn.onnx)
- [depth-anything-l-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-l-dyn.onnx)
- [depth-anything-v2-s](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-v2-s.onnx)
## Results
![](./demo.png)
![](https://github.com/jamjamjon/assets/releases/download/depth-anything/demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 175 KiB

View File

@ -3,15 +3,15 @@ use usls::{models::DepthAnything, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// options
let options = Options::default()
// .with_model("depth-anything-s-dyn.onnx")?
.with_model("depth-anything-v2-s.onnx")?
// .with_model("depth-anything/v1-s-dyn.onnx")?
.with_model("depth-anything/v2-s.onnx")?
.with_i00((1, 1, 8).into())
.with_i02((384, 512, 1024).into())
.with_i03((384, 512, 1024).into());
let mut model = DepthAnything::new(options)?;
// load
let x = [DataLoader::try_read("./assets/2.jpg")?];
let x = [DataLoader::try_read("images/2.jpg")?];
// run
let y = model.run(&x)?;

View File

@ -5,22 +5,3 @@ This demo showcases how to use `DINOv2` to compute image similarity, applicable
```shell
cargo run -r --example dinov2
```
## Donwload DINOv2 ONNX Model
- [dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx)
- [dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx)
- [dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx)
- [dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx)
- [dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx)
## Results
```shell
Top-1 0.0000000 /home/qweasd/Desktop/usls/examples/dinov2/images/bus.jpg
Top-2 1.9059424 /home/qweasd/Desktop/usls/examples/dinov2/images/1.jpg
Top-3 1.9736203 /home/qweasd/Desktop/usls/examples/dinov2/images/2.jpg
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

View File

@ -3,15 +3,16 @@ use usls::{models::Dinov2, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("dinov2-s14-dyn-f16.onnx")?
.with_model("dinov2/s-dyn.onnx")?
.with_i00((1, 1, 1).into())
.with_i02((224, 224, 224).into())
.with_i03((224, 224, 224).into());
let mut model = Dinov2::new(options)?;
let x = vec![DataLoader::try_read("./examples/dinov2/images/1.jpg")?];
let x = [DataLoader::try_read("images/bus.jpg")?];
let y = model.run(&x)?;
println!("{y:?}");
// TODO:
// query from vector
// let ys = model.query_from_vec(
// "./assets/bus.jpg",

View File

@ -0,0 +1,10 @@
## Quick Start
```shell
cargo run -r --example grounding-dino
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/grounding-dino/demo.png)

View File

@ -16,14 +16,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i50((1, 1, 4).into())
.with_i51((256, 256, 512).into())
.with_i52((256, 256, 512).into())
.with_model("groundingdino-swint-ogc-dyn-u8.onnx")? // TODO: current onnx model does not support bs > 1
// .with_model("groundingdino-swint-ogc-dyn-f32.onnx")?
.with_model("grounding-dino/swint-ogc-dyn-u8.onnx")? // TODO: current onnx model does not support bs > 1
// .with_model("grounding-dino/swint-ogc-dyn-f32.onnx")?
.with_tokenizer("grounding-dino/tokenizer.json")?
.with_confs(&[0.2])
.with_profile(false);
let mut model = GroundingDINO::new(opts)?;
// Load images and set class names
let x = [DataLoader::try_read("./assets/bus.jpg")?];
let x = [DataLoader::try_read("images/bus.jpg")?];
let texts = [
"person", "hand", "shoes", "bus", "dog", "cat", "sign", "tie", "monitor", "window",
"glasses", "tree", "head",

View File

@ -4,12 +4,7 @@
cargo run -r --example modnet
```
## ONNX Model
- [modnet-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/modnet-dyn.onnx)
## Results
![](./demo.png)
![](https://github.com/jamjamjon/assets/releases/download/modnet/demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 128 KiB

View File

@ -3,14 +3,14 @@ use usls::{models::MODNet, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("modnet-dyn.onnx")?
.with_model("modnet/dyn-f32.onnx")?
.with_i00((1, 1, 4).into())
.with_i02((416, 512, 800).into())
.with_i03((416, 512, 800).into());
let mut model = MODNet::new(options)?;
// load image
let x = vec![DataLoader::try_read("./assets/liuyifei.png")?];
let x = [DataLoader::try_read("images/liuyifei.png")?];
// run
let y = model.run(&x)?;

View File

@ -3,16 +3,8 @@
```shell
cargo run -r --example rtmo
```
## ONNX Model
- [rtmo-s-dyn model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtmo-s-dyn.onnx)
- [rtmo-m-dyn model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtmo-m-dyn.onnx)
- [rtmo-l-dyn model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtmo-l-dyn.onnx)
- [rtmo-s-dyn-f16 model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtmo-s-dyn-f16.onnx)
- [rtmo-m-dyn-f16 model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtmo-m-dyn-f16.onnx)
- [rtmo-l-dyn-f16 model](https://github.com/jamjamjon/assets/releases/download/v0.0.1/rtmo-l-dyn-f16.onnx)
## Results
![](./demo.png)
![](https://github.com/jamjamjon/assets/releases/download/rtmo/demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 455 KiB

View File

@ -3,7 +3,7 @@ use usls::{models::RTMO, Annotator, DataLoader, Options, COCO_SKELETONS_16};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("rtmo-s-dyn.onnx")?
.with_model("rtmo/s-dyn.onnx")?
.with_i00((1, 1, 8).into())
.with_nk(17)
.with_confs(&[0.3])
@ -11,7 +11,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut model = RTMO::new(options)?;
// load image
let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
let x = [DataLoader::try_read("images/bus.jpg")?];
// run
let y = model.run(&x)?;

View File

@ -18,4 +18,5 @@ cargo run -r --example sam -- --kind sam-hq
## Results
![](./demo.png)
![](https://github.com/jamjamjon/assets/releases/download/sam/demo-car.png)
![](https://github.com/jamjamjon/assets/releases/download/sam/demo-dog.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 326 KiB

View File

@ -25,63 +25,64 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let (options_encoder, options_decoder, saveout) = match args.kind {
SamKind::Sam => {
let options_encoder = Options::default()
// .with_model("sam-vit-b-encoder.onnx")?;
.with_model("sam-vit-b-encoder-u8.onnx")?;
// .with_model("sam/sam-vit-b-encoder.onnx")?;
.with_model("sam/sam-vit-b-encoder-u8.onnx")?;
let options_decoder = Options::default()
.with_i00((1, 1, 1).into())
.with_i11((1, 1, 1).into())
.with_i21((1, 1, 1).into())
.with_sam_kind(SamKind::Sam)
// .with_model("sam-vit-b-decoder.onnx")?;
// .with_model("sam-vit-b-decoder-singlemask.onnx")?;
.with_model("sam-vit-b-decoder-u8.onnx")?;
// .with_model("sam/sam-vit-b-decoder.onnx")?;
// .with_model("sam/sam-vit-b-decoder-singlemask.onnx")?;
.with_model("sam/sam-vit-b-decoder-u8.onnx")?;
(options_encoder, options_decoder, "SAM")
}
SamKind::Sam2 => {
let options_encoder = Options::default()
// .with_model("sam2-hiera-tiny-encoder.onnx")?;
// .with_model("sam2-hiera-small-encoder.onnx")?;
.with_model("sam2-hiera-base-plus-encoder.onnx")?;
// .with_model("sam/sam2-hiera-tiny-encoder.onnx")?;
// .with_model("sam/sam2-hiera-small-encoder.onnx")?;
.with_model("sam/sam2-hiera-base-plus-encoder.onnx")?;
let options_decoder = Options::default()
.with_i31((1, 1, 1).into())
.with_i41((1, 1, 1).into())
.with_sam_kind(SamKind::Sam2)
// .with_model("sam2-hiera-tiny-decoder.onnx")?;
// .with_model("sam2-hiera-small-decoder.onnx")?;
.with_model("sam2-hiera-base-plus-decoder.onnx")?;
// .with_model("sam/sam2-hiera-tiny-decoder.onnx")?;
// .with_model("sam/sam2-hiera-small-decoder.onnx")?;
.with_model("sam/sam2-hiera-base-plus-decoder.onnx")?;
(options_encoder, options_decoder, "SAM2")
}
SamKind::MobileSam => {
let options_encoder = Options::default().with_model("mobile-sam-vit-t-encoder.onnx")?;
let options_encoder =
Options::default().with_model("sam/mobile-sam-vit-t-encoder.onnx")?;
let options_decoder = Options::default()
.with_i00((1, 1, 1).into())
.with_i11((1, 1, 1).into())
.with_i21((1, 1, 1).into())
.with_sam_kind(SamKind::MobileSam)
.with_model("mobile-sam-vit-t-decoder.onnx")?;
.with_model("sam/mobile-sam-vit-t-decoder.onnx")?;
(options_encoder, options_decoder, "Mobile-SAM")
}
SamKind::SamHq => {
let options_encoder = Options::default().with_model("sam-hq-vit-t-encoder.onnx")?;
let options_encoder = Options::default().with_model("sam/sam-hq-vit-t-encoder.onnx")?;
let options_decoder = Options::default()
.with_i00((1, 1, 1).into())
.with_i21((1, 1, 1).into())
.with_i31((1, 1, 1).into())
.with_sam_kind(SamKind::SamHq)
.with_model("sam-hq-vit-t-decoder.onnx")?;
.with_model("sam/sam-hq-vit-t-decoder.onnx")?;
(options_encoder, options_decoder, "SAM-HQ")
}
SamKind::EdgeSam => {
let options_encoder = Options::default().with_model("edge-sam-3x-encoder.onnx")?;
let options_encoder = Options::default().with_model("sam/edge-sam-3x-encoder.onnx")?;
let options_decoder = Options::default()
.with_i00((1, 1, 1).into())
.with_i11((1, 1, 1).into())
.with_i21((1, 1, 1).into())
.with_sam_kind(SamKind::EdgeSam)
.with_model("edge-sam-3x-decoder.onnx")?;
.with_model("sam/edge-sam-3x-decoder.onnx")?;
(options_encoder, options_decoder, "Edge-SAM")
}
};
@ -100,8 +101,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// Load image
let xs = [
DataLoader::try_read("./assets/truck.jpg")?,
// DataLoader::try_read("./assets/dog.jpg")?,
DataLoader::try_read("images/truck.jpg")?,
// DataLoader::try_read("images/dog.jpg")?,
];
// Build annotator

View File

@ -0,0 +1,10 @@
## Quick Start
```shell
cargo run -r --example sapiens
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/sapiens/demo.png)

View File

@ -6,7 +6,7 @@ use usls::{
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build
let options = Options::default()
.with_model("sapiens-seg-0.3b-dyn.onnx")?
.with_model("sapiens/seg-0.3b-dyn.onnx")?
.with_sapiens_task(SapiensTask::Seg)
.with_names(&BODY_PARTS_28)
.with_profile(false)
@ -14,7 +14,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut model = Sapiens::new(options)?;
// load
let x = [DataLoader::try_read("./assets/paul-george.jpg")?];
let x = [DataLoader::try_read("images/paul-george.jpg")?];
// run
let y = model.run(&x)?;

View File

@ -4,13 +4,6 @@
cargo run -r --example svtr
```
## ONNX Model
- [ppocr-v4-server-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v4-server-svtr-ch-dyn.onnx)
- [ppocr-v4-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v4-svtr-ch-dyn.onnx)
- [ppocr-v3-svtr-ch-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/ppocr-v3-svtr-ch-dyn.onnx)
### Speed test
| Model | Width | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |

View File

@ -6,14 +6,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i00((1, 2, 8).into())
.with_i03((320, 960, 1600).into())
.with_confs(&[0.2])
.with_vocab("ppocr_rec_vocab.txt")?
.with_model("ppocr-v4-svtr-ch-dyn.onnx")?;
.with_vocab("svtr/ppocr_rec_vocab.txt")?
.with_model("svtr/ppocr-v4-svtr-ch-dyn.onnx")?;
let mut model = SVTR::new(options)?;
// load images
let dl = DataLoader::default()
.with_batch(1)
.load("./examples/svtr/images")?;
let dl = DataLoader::new("./examples/svtr/images")?.build()?;
// run
for (xs, paths) in dl {

View File

@ -0,0 +1,9 @@
## Quick Start
```shell
cargo run -r --example yolo-sam
```
## Results
![](https://github.com/jamjamjon/assets/releases/download/sam/demo-yolo-sam.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 133 KiB

View File

@ -7,20 +7,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// build SAM
let options_encoder = Options::default()
.with_i00((1, 1, 1).into())
.with_model("mobile-sam-vit-t-encoder.onnx")?;
.with_model("sam/mobile-sam-vit-t-encoder.onnx")?;
let options_decoder = Options::default()
.with_i11((1, 1, 1).into())
.with_i21((1, 1, 1).into())
.with_find_contours(true)
.with_sam_kind(SamKind::Sam)
.with_model("mobile-sam-vit-t-decoder.onnx")?;
.with_model("sam/mobile-sam-vit-t-decoder.onnx")?;
let mut sam = SAM::new(options_encoder, options_decoder)?;
// build YOLOv8-Det
let options_yolo = Options::default()
.with_yolo_version(YOLOVersion::V8)
.with_yolo_task(YOLOTask::Detect)
.with_model("yolov8m-dyn.onnx")?
.with_model("yolo/v8-m-dyn.onnx")?
.with_cuda(0)
.with_i00((1, 1, 4).into())
.with_i02((416, 640, 800).into())
@ -30,7 +30,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut yolo = YOLO::new(options_yolo)?;
// load one image
let xs = vec![DataLoader::try_read("./assets/dog.jpg")?];
let xs = [DataLoader::try_read("images/dog.jpg")?];
// build annotator
let annotator = Annotator::default()
@ -38,7 +38,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.without_bboxes_name(true)
.without_bboxes_conf(true)
.without_mbrs(true)
.with_saveout("YOLO+SAM");
.with_saveout("YOLO-SAM");
// run & annotate
let ys_det = yolo.run(&xs)?;

View File

@ -3,19 +3,19 @@
| Detection | Instance Segmentation | Pose |
| :---------------: | :------------------------: |:---------------: |
| <img src='./demos/det.png' width="300px"> | <img src='./demos/seg.png' width="300px"> |<img src='./demos/pose.png' width="300px"> |
| <img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-det.png' width="300px"> | <img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-seg.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-pose.png' width="300px"> |
| Classification | Obb |
| :------------------------: |:------------------------: |
|<img src='./demos/cls.png' width="300px"> |<img src='./demos/obb-2.png' width="628px">
|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-cls.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-obb-2.png' width="628px">
| Head Detection | Fall Detection | Trash Detection |
| :------------------------: |:------------------------: |:------------------------: |
|<img src='./demos/head.png' width="300px"> |<img src='./demos/falldown.png' width="300px">|<img src='./demos/trash.png' width="300px">
|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-head.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-falldown.png' width="300px">|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-trash.png' width="300px">
| YOLO-World | Face Parsing | FastSAM |
| :------------------------: |:------------------------: |:------------------------: |
|<img src='./demos/yolov8-world.png' width="300px"> |<img src='./demos/face-parsing.png' width="300px">|<img src='./demos/fastsam.png' width="300px">
|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-yolov8-world.png' width="300px"> |<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-face-parsing.png' width="300px">|<img src='https://github.com/jamjamjon/assets/releases/download/yolo/demo-fastsam.png' width="300px">
@ -104,11 +104,11 @@ let options = Options::default()
| Model | Weights | Datasets|
|:---------------------: | :--------------------------: | :-------------------------------: |
| Face-Landmark Detection | [yolov8-face-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-face-dyn-f16.onnx) | |
| Head Detection | [yolov8-head-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-head-f16.onnx) | |
| Fall Detection | [yolov8-falldown-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-falldown-f16.onnx) | |
| Trash Detection | [yolov8-plastic-bag-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolov8-plastic-bag-f16.onnx) | |
| FaceParsing | [face-parsing-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/face-parsing-dyn.onnx) | [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ/tree/master/face_parsing)<br />[[Processed YOLO labels]](https://github.com/jamjamjon/assets/releases/download/v0.0.1/CelebAMask-HQ-YOLO-Labels.zip)[[Python Script]](https://github.com/jamjamjon/assets/releases/download/v0.0.1/CelebAMask-HQ-YOLO-Labels.zip) |
| Face-Landmark Detection | [yolov8-face-dyn-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-n-face-dyn-f16.onnx) | |
| Head Detection | [yolov8-head-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-head-f16.onnx) | |
| Fall Detection | [yolov8-falldown-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-falldown-f16.onnx) | |
| Trash Detection | [yolov8-plastic-bag-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-plastic-bag-f16.onnx) | |
| FaceParsing | [yolov8-face-parsing-dyn](https://github.com/jamjamjon/assets/releases/download/yolo/v8-face-parsing-dyn.onnx) | [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ/tree/master/face_parsing)<br />[[Processed YOLO labels]](https://github.com/jamjamjon/assets/releases/download/yolo/CelebAMask-HQ-YOLO-Labels.zip)[[Python Script]](../../scripts/CelebAMask-HQ-To-YOLO-Labels.py) |

Binary file not shown.

Before

Width:  |  Height:  |  Size: 453 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 451 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 286 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 321 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 291 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 546 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 552 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 457 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 391 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 367 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 453 KiB

View File

@ -77,15 +77,15 @@ fn main() -> Result<()> {
let (options, saveout) = match args.ver {
YOLOVersion::V5 => match args.task {
YOLOTask::Classify => (
options.with_model(&args.model.unwrap_or("yolov5n-cls-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v5-n-cls-dyn.onnx".to_string()))?,
"YOLOv5-Classify",
),
YOLOTask::Detect => (
options.with_model(&args.model.unwrap_or("yolov5n-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v5-n-dyn.onnx".to_string()))?,
"YOLOv5-Detect",
),
YOLOTask::Segment => (
options.with_model(&args.model.unwrap_or("yolov5n-seg-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v5-n-seg-dyn.onnx".to_string()))?,
"YOLOv5-Segment",
),
t => anyhow::bail!("Task: {t:?} is unsupported for {:?}", args.ver),
@ -93,7 +93,7 @@ fn main() -> Result<()> {
YOLOVersion::V6 => match args.task {
YOLOTask::Detect => (
options
.with_model(&args.model.unwrap_or("yolov6n-dyn.onnx".to_string()))?
.with_model(&args.model.unwrap_or("yolo/v6-n-dyn.onnx".to_string()))?
.with_nc(args.nc),
"YOLOv6-Detect",
),
@ -102,7 +102,7 @@ fn main() -> Result<()> {
YOLOVersion::V7 => match args.task {
YOLOTask::Detect => (
options
.with_model(&args.model.unwrap_or("yolov7-tiny-dyn.onnx".to_string()))?
.with_model(&args.model.unwrap_or("yolo/v7-tiny-dyn.onnx".to_string()))?
.with_nc(args.nc),
"YOLOv7-Detect",
),
@ -110,43 +110,43 @@ fn main() -> Result<()> {
},
YOLOVersion::V8 => match args.task {
YOLOTask::Classify => (
options.with_model(&args.model.unwrap_or("yolov8m-cls-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v8-m-cls-dyn.onnx".to_string()))?,
"YOLOv8-Classify",
),
YOLOTask::Detect => (
options.with_model(&args.model.unwrap_or("yolov8m-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v8-m-dyn.onnx".to_string()))?,
"YOLOv8-Detect",
),
YOLOTask::Segment => (
options.with_model(&args.model.unwrap_or("yolov8m-seg-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v8-m-seg-dyn.onnx".to_string()))?,
"YOLOv8-Segment",
),
YOLOTask::Pose => (
options.with_model(&args.model.unwrap_or("yolov8m-pose-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v8-m-pose-dyn.onnx".to_string()))?,
"YOLOv8-Pose",
),
YOLOTask::Obb => (
options.with_model(&args.model.unwrap_or("yolov8m-obb-dyn.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v8-m-obb-dyn.onnx".to_string()))?,
"YOLOv8-Obb",
),
},
YOLOVersion::V9 => match args.task {
YOLOTask::Detect => (
options.with_model(&args.model.unwrap_or("yolov9-c-dyn-f16.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v9-c-dyn-f16.onnx".to_string()))?,
"YOLOv9-Detect",
),
t => anyhow::bail!("Task: {t:?} is unsupported for {:?}", args.ver),
},
YOLOVersion::V10 => match args.task {
YOLOTask::Detect => (
options.with_model(&args.model.unwrap_or("yolov10n.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/v10-n.onnx".to_string()))?,
"YOLOv10-Detect",
),
t => anyhow::bail!("Task: {t:?} is unsupported for {:?}", args.ver),
},
YOLOVersion::RTDETR => match args.task {
YOLOTask::Detect => (
options.with_model(&args.model.unwrap_or("rtdetr-l-f16.onnx".to_string()))?,
options.with_model(&args.model.unwrap_or("yolo/rtdetr-l-f16.onnx".to_string()))?,
"RTDETR",
),
t => anyhow::bail!("Task: {t:?} is unsupported for {:?}", args.ver),
@ -184,9 +184,9 @@ fn main() -> Result<()> {
let mut model = YOLO::new(options)?;
// build dataloader
let dl = DataLoader::default()
let dl = DataLoader::new(&args.source)?
.with_batch(model.batch() as _)
.load(args.source)?;
.build()?;
// build annotator
let annotator = Annotator::default()

View File

@ -4,11 +4,6 @@
cargo run -r --example yolop
```
## Pretrained Model
- [yolopv2-dyn-480x800](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolopv2-dyn-480x800.onnx)
- [yolopv2-dyn-736x1280](https://github.com/jamjamjon/assets/releases/download/v0.0.1/yolopv2-dyn-736x1280.onnx)
## Results
![](./demo.png)
![](https://github.com/jamjamjon/assets/releases/download/yolop/demo.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 296 KiB

View File

@ -3,13 +3,13 @@ use usls::{models::YOLOPv2, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
let options = Options::default()
.with_model("yolopv2-dyn-480x800.onnx")?
.with_model("yolop/v2-dyn-480x800.onnx")?
.with_i00((1, 1, 8).into())
.with_confs(&[0.3]);
let mut model = YOLOPv2::new(options)?;
// load image
let x = vec![DataLoader::try_read("./assets/car.jpg")?];
let x = [DataLoader::try_read("images/car.jpg")?];
// run
let y = model.run(&x)?;

View File

@ -1,5 +1,5 @@
use crate::{
auto_load, colormap256, string_now, Bbox, Keypoint, Mask, Mbr, Polygon, Prob, CHECK_MARK,
colormap256, string_now, Bbox, Dir, Hub, Keypoint, Mask, Mbr, Polygon, Prob, CHECK_MARK,
CROSS_MARK, Y,
};
use ab_glyph::{FontVec, PxScale};
@ -8,11 +8,12 @@ use image::{DynamicImage, GenericImage, Rgba, RgbaImage};
use imageproc::map::map_colors;
/// Annotator for struct `Y`
#[derive(Debug)]
// #[derive(Debug)]
pub struct Annotator {
font: FontVec,
_scale: f32, // Cope with ab_glyph & imageproc=0.24.0
scale_dy: f32,
saveout_base: String,
saveout: Option<String>,
decimal_places: usize,
@ -63,11 +64,15 @@ pub struct Annotator {
impl Default for Annotator {
fn default() -> Self {
Self {
font: Self::load_font(None).unwrap(),
font: match Self::load_font(None) {
Ok(x) => x,
Err(err) => panic!("Failed to load font: {}", err),
},
_scale: 6.666667,
scale_dy: 28.,
polygons_alpha: 179,
saveout: None,
saveout_base: String::from("runs"),
decimal_places: 4,
without_bboxes: false,
without_bboxes_conf: false,
@ -104,6 +109,10 @@ impl Default for Annotator {
}
impl Annotator {
pub fn new() -> Self {
Default::default()
}
pub fn with_decimal_places(mut self, x: usize) -> Self {
self.decimal_places = x;
self
@ -304,32 +313,44 @@ impl Annotator {
self
}
pub fn with_saveout(mut self, saveout: &str) -> Self {
self.saveout = Some(saveout.to_string());
pub fn with_saveout_base(mut self, x: &str) -> Self {
self.saveout_base = x.to_string();
self
}
pub fn with_font(mut self, path: &str) -> Self {
self.font = Self::load_font(Some(path)).unwrap();
pub fn with_saveout(mut self, x: &str) -> Self {
self.saveout = Some(x.to_string());
self
}
/// Save annotated images to `runs` folder
pub fn save(&self, image: &RgbaImage, saveout: &str) {
let mut saveout = std::path::PathBuf::from("runs").join(saveout);
if !saveout.exists() {
std::fs::create_dir_all(&saveout).unwrap();
}
saveout.push(string_now("-"));
let saveout = format!("{}.png", saveout.to_str().unwrap());
match image.save(&saveout) {
Err(err) => println!("{} Saving failed: {:?}", CROSS_MARK, err),
Ok(_) => println!("{} Annotated image saved to: {}", CHECK_MARK, saveout),
}
pub fn with_font(mut self, path: &str) -> Result<Self> {
self.font = Self::load_font(Some(path))?;
Ok(self)
}
/// Annotate images
/// Create folders for saving annotated results. e.g., `./runs/xxx`
pub fn saveout(&self) -> Result<std::path::PathBuf> {
let subs = match &self.saveout {
Some(x) => vec![self.saveout_base.as_str(), x.as_str()],
None => vec![self.saveout_base.as_str()],
};
Dir::Currnet.raw_path_with_subs(&subs)
}
/// Annotate images, and no return
pub fn annotate(&self, imgs: &[DynamicImage], ys: &[Y]) {
let _ = self.plot(imgs, ys);
}
/// Plot images and return plotted images(RGBA8)
pub fn plot(&self, imgs: &[DynamicImage], ys: &[Y]) -> Result<Vec<RgbaImage>> {
let span = tracing::span!(tracing::Level::INFO, "YOLO-new");
let _guard = span.enter();
let mut vs: Vec<RgbaImage> = Vec::new();
// annotate
for (img, y) in imgs.iter().zip(ys.iter()) {
let mut img_rgba = img.to_rgba8();
@ -374,16 +395,23 @@ impl Annotator {
}
// save
if let Some(saveout) = &self.saveout {
self.save(&img_rgba, saveout);
let saveout = self.saveout()?.join(format!("{}.png", string_now("-")));
match img_rgba.save(&saveout) {
Err(err) => tracing::error!("{} Saving failed: {:?}", CROSS_MARK, err),
Ok(_) => {
tracing::info!("{} Annotated image saved to: {:?}", CHECK_MARK, saveout);
}
}
vs.push(img_rgba);
}
Ok(vs)
}
/// Plot bounding bboxes and labels
pub fn plot_bboxes(&self, img: &mut RgbaImage, bboxes: &[Bbox]) {
// bbox
for bbox in bboxes.iter() {
// bbox
let short_side_threshold =
bbox.width().min(bbox.height()) * self.bboxes_thickness_threshold;
let thickness = self.bboxes_thickness.min(short_side_threshold as usize);
@ -723,11 +751,11 @@ impl Annotator {
/// Load custom font
fn load_font(path: Option<&str>) -> Result<FontVec> {
let path_font = match path {
None => auto_load("Arial.ttf", Some("fonts"))?,
None => Hub::new()?.fetch("fonts/Arial.ttf")?.commit()?,
Some(p) => p.into(),
};
let buffer = std::fs::read(path_font)?;
Ok(FontVec::try_from_vec(buffer.to_owned()).unwrap())
Ok(FontVec::try_from_vec(buffer.to_owned())?)
}
/// Pick color from pallette

View File

@ -1,91 +1,336 @@
use crate::{CHECK_MARK, SAFE_CROSS_MARK};
use anyhow::{anyhow, bail, Result};
use anyhow::{anyhow, Result};
use image::DynamicImage;
use indicatif::{ProgressBar, ProgressStyle};
use std::collections::VecDeque;
use std::path::{Path, PathBuf};
use walkdir::{DirEntry, WalkDir};
use std::sync::mpsc;
use video_rs::{
encode::{Encoder, Settings},
time::Time,
Decoder, Url,
};
/// Dataloader for load images
#[derive(Debug, Clone)]
pub struct DataLoader {
pub paths: VecDeque<PathBuf>,
pub recursive: bool,
pub batch: usize,
use crate::{
build_progress_bar, string_now, Dir, Hub, Location, MediaType, CHECK_MARK, CROSS_MARK,
};
type TempReturnType = (Vec<DynamicImage>, Vec<PathBuf>);
pub struct DataLoaderIterator {
receiver: mpsc::Receiver<TempReturnType>,
progress_bar: Option<ProgressBar>,
}
impl Iterator for DataLoader {
type Item = (Vec<DynamicImage>, Vec<PathBuf>);
impl Iterator for DataLoaderIterator {
type Item = TempReturnType;
fn next(&mut self) -> Option<Self::Item> {
if self.paths.is_empty() {
None
} else {
let mut yis: Vec<DynamicImage> = Vec::new();
let mut yps: Vec<PathBuf> = Vec::new();
loop {
let path = self.paths.pop_front().unwrap();
match Self::try_read(&path) {
Err(err) => {
println!("{SAFE_CROSS_MARK} {err}");
}
Ok(x) => {
yis.push(x);
yps.push(path);
}
match &self.progress_bar {
None => self.receiver.recv().ok(),
Some(progress_bar) => match self.receiver.recv().ok() {
Some(item) => {
progress_bar.inc(1);
Some(item)
}
if self.paths.is_empty() || yis.len() == self.batch {
break;
None => {
progress_bar.set_prefix(" Iterated");
progress_bar.set_style(
indicatif::ProgressStyle::with_template(crate::PROGRESS_BAR_STYLE_GREEN)
.unwrap(),
);
progress_bar.finish();
None
}
}
Some((yis, yps))
},
}
}
}
impl Default for DataLoader {
fn default() -> Self {
Self {
batch: 1,
recursive: false,
paths: Default::default(),
impl IntoIterator for DataLoader {
type Item = TempReturnType;
type IntoIter = DataLoaderIterator;
fn into_iter(self) -> Self::IntoIter {
let progress_bar = if self.with_pb {
build_progress_bar(
self.nf / self.batch_size as u64,
" Iterating",
Some(&format!("{:?}", self.media_type)),
crate::PROGRESS_BAR_STYLE_CYAN,
)
.ok()
} else {
None
};
DataLoaderIterator {
receiver: self.receiver,
progress_bar,
}
}
}
/// A structure designed to load and manage image, video, or stream data.
/// It handles local file paths, remote URLs, and live streams, supporting both batch processing
/// and optional progress bar display. The structure also supports video decoding through
/// `video_rs` for video and stream data.
pub struct DataLoader {
/// Queue of paths for images.
paths: Option<VecDeque<PathBuf>>,
/// Media type of the source (image, video, stream, etc.).
media_type: MediaType,
/// Batch size for iteration, determining how many files are processed at once.
batch_size: usize,
/// Buffer size for the channel, used to manage the buffer between producer and consumer.
bound: usize,
/// Receiver for processed data.
receiver: mpsc::Receiver<TempReturnType>,
/// Video decoder for handling video or stream data.
decoder: Option<video_rs::decode::Decoder>,
/// Number of images or frames; `u64::MAX` is used for live streams (indicating no limit).
nf: u64,
/// Flag indicating whether to display a progress bar.
with_pb: bool,
}
impl DataLoader {
pub fn load<P: AsRef<Path>>(mut self, source: P) -> Result<Self> {
self.paths = match source.as_ref() {
s if s.is_file() => VecDeque::from([s.to_path_buf()]),
s if s.is_dir() => WalkDir::new(s)
.into_iter()
.filter_entry(|e| !Self::_is_hidden(e))
.filter_map(|entry| match entry {
Err(_) => None,
Ok(entry) => {
if entry.file_type().is_dir() {
return None;
}
if !self.recursive && entry.depth() > 1 {
return None;
}
Some(entry.path().to_path_buf())
}
})
.collect::<VecDeque<_>>(),
// s if s.starts_with("rtsp://") || s.starts_with("rtmp://") || s.starts_with("http://")|| s.starts_with("https://") => todo!(),
s if !s.exists() => bail!("{s:?} Not Exists"),
_ => todo!(),
pub fn new(source: &str) -> Result<Self> {
let span = tracing::span!(tracing::Level::INFO, "DataLoader-new");
let _guard = span.enter();
// Number of frames or stream
let mut nf = 0;
// paths & media_type
let source_path = Path::new(source);
let (paths, media_type) = match source_path.exists() {
false => {
// remote
nf = 1;
(
Some(VecDeque::from([source_path.to_path_buf()])),
MediaType::from_url(source),
)
}
true => {
// local
if source_path.is_file() {
nf = 1;
(
Some(VecDeque::from([source_path.to_path_buf()])),
MediaType::from_path(source_path),
)
} else if source_path.is_dir() {
let paths_sorted = Self::load_from_folder(source_path)?;
nf = paths_sorted.len() as _;
(
Some(VecDeque::from(paths_sorted)),
MediaType::Image(Location::Local),
)
} else {
(None, MediaType::Unknown)
}
}
};
println!("{CHECK_MARK} Found file x{}", self.paths.len());
if let MediaType::Unknown = media_type {
anyhow::bail!("Could not locate the source path: {:?}", source_path);
}
// video decoder
let decoder = match &media_type {
MediaType::Video(Location::Local) => Some(Decoder::new(source_path)?),
MediaType::Video(Location::Remote) | MediaType::Stream => {
let location: video_rs::location::Location = source.parse::<Url>()?.into();
Some(Decoder::new(location)?)
}
_ => None,
};
// video & stream frames
if let Some(decoder) = &decoder {
nf = match decoder.frames() {
Err(_) => u64::MAX,
Ok(0) => u64::MAX,
Ok(x) => x,
}
}
// summary
tracing::info!("{} Found {:?} x{}", CHECK_MARK, media_type, nf,);
Ok(DataLoader {
paths,
media_type,
bound: 50,
receiver: mpsc::sync_channel(1).1,
batch_size: 1,
decoder,
nf,
with_pb: true,
})
}
pub fn with_bound(mut self, x: usize) -> Self {
self.bound = x;
self
}
pub fn with_batch(mut self, x: usize) -> Self {
self.batch_size = x;
self
}
pub fn with_progress_bar(mut self, x: bool) -> Self {
self.with_pb = x;
self
}
pub fn build(mut self) -> Result<Self> {
let (sender, receiver) = mpsc::sync_channel::<TempReturnType>(self.bound);
self.receiver = receiver;
let batch_size = self.batch_size;
let data = self.paths.take().unwrap_or_default();
let media_type = self.media_type.clone();
let decoder = self.decoder.take();
// Spawn the producer thread
std::thread::spawn(move || {
DataLoader::producer_thread(sender, data, batch_size, media_type, decoder);
});
Ok(self)
}
fn producer_thread(
sender: mpsc::SyncSender<TempReturnType>,
mut data: VecDeque<PathBuf>,
batch_size: usize,
media_type: MediaType,
mut decoder: Option<video_rs::decode::Decoder>,
) {
let span = tracing::span!(tracing::Level::INFO, "DataLoader-producer-thread");
let _guard = span.enter();
let mut yis: Vec<DynamicImage> = Vec::with_capacity(batch_size);
let mut yps: Vec<PathBuf> = Vec::with_capacity(batch_size);
match media_type {
MediaType::Image(_) => {
while let Some(path) = data.pop_front() {
match Self::try_read(&path) {
Err(err) => {
tracing::warn!("{} {:?} | {:?}", CROSS_MARK, path, err);
continue;
}
Ok(img) => {
yis.push(img);
yps.push(path);
}
}
if yis.len() == batch_size
&& sender
.send((std::mem::take(&mut yis), std::mem::take(&mut yps)))
.is_err()
{
break;
}
}
}
MediaType::Video(_) | MediaType::Stream => {
if let Some(decoder) = decoder.as_mut() {
let (w, h) = decoder.size();
let frames = decoder.decode_iter();
for frame in frames {
match frame {
Ok((ts, frame)) => {
let rgb8: image::ImageBuffer<image::Rgb<u8>, Vec<u8>> =
match image::ImageBuffer::from_raw(
w as _,
h as _,
frame.into_raw_vec_and_offset().0,
) {
Some(x) => x,
None => continue,
};
let img = image::DynamicImage::from(rgb8);
yis.push(img);
yps.push(ts.to_string().into());
if yis.len() == batch_size
&& sender
.send((std::mem::take(&mut yis), std::mem::take(&mut yps)))
.is_err()
{
break;
}
}
Err(_) => break,
}
}
}
}
_ => todo!(),
}
// Deal with remaining data
if !yis.is_empty() && sender.send((yis, yps)).is_err() {
tracing::info!("Receiver dropped, stopping production");
}
}
pub fn load_from_folder<P: AsRef<std::path::Path>>(path: P) -> Result<Vec<std::path::PathBuf>> {
let mut paths: Vec<PathBuf> = std::fs::read_dir(path)?
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
let path = entry.path();
if path.is_file() {
Some(path)
} else {
None
}
})
.collect();
paths.sort_by(|a, b| {
let a_name = a.file_name().and_then(|s| s.to_str());
let b_name = b.file_name().and_then(|s| s.to_str());
match (a_name, b_name) {
(Some(a_str), Some(b_str)) => natord::compare(a_str, b_str),
_ => std::cmp::Ordering::Equal,
}
});
Ok(paths)
}
pub fn try_read<P: AsRef<Path>>(path: P) -> Result<DynamicImage> {
let img = image::ImageReader::open(&path)
let mut path = path.as_ref().to_path_buf();
// try to fetch from hub or local cache
if !path.exists() {
let p = Hub::new()?.fetch(path.to_str().unwrap())?.commit()?;
path = PathBuf::from(&p);
}
let img = Self::read_into_rgb8(path)?;
Ok(DynamicImage::from(img))
}
fn read_into_rgb8<P: AsRef<Path>>(path: P) -> Result<image::RgbImage> {
let path = path.as_ref();
let img = image::ImageReader::open(path)
.map_err(|err| {
anyhow!(
"Failed to open image at {:?}. Error: {:?}",
path.as_ref(),
path.display(),
err
)
})?
@ -93,7 +338,7 @@ impl DataLoader {
.map_err(|err| {
anyhow!(
"Failed to make a format guess based on the content: {:?}. Error: {:?}",
path.as_ref(),
path.display(),
err
)
})?
@ -101,33 +346,64 @@ impl DataLoader {
.map_err(|err| {
anyhow!(
"Failed to decode image at {:?}. Error: {:?}",
path.as_ref(),
path.display(),
err
)
})?
.into_rgb8();
Ok(DynamicImage::from(img))
Ok(img)
}
pub fn with_batch(mut self, x: usize) -> Self {
self.batch = x;
self
}
/// Convert images into a video
pub fn is2v<P: AsRef<Path>>(source: P, subs: &[&str], fps: usize) -> Result<()> {
let paths = Self::load_from_folder(source.as_ref())?;
if paths.is_empty() {
anyhow::bail!("No images found.");
}
let mut encoder = None;
let mut position = Time::zero();
let saveout = Dir::Currnet
.raw_path_with_subs(subs)?
.join(format!("{}.mp4", string_now("-")));
let pb = build_progress_bar(paths.len() as u64, " Converting", saveout.to_str(),"{prefix:.cyan.bold} {msg} |{bar}| ({percent_precise}%, {human_pos}/{human_len}, {per_sec})")?;
pub fn with_recursive(mut self, x: bool) -> Self {
self.recursive = x;
self
}
// loop
for path in paths {
pb.inc(1);
let img = Self::read_into_rgb8(path)?;
let (w, h) = img.dimensions();
pub fn paths(&self) -> &VecDeque<PathBuf> {
&self.paths
}
// build encoder at the 1st time
if encoder.is_none() {
let settings = Settings::preset_h264_yuv420p(w as _, h as _, false);
encoder = Some(Encoder::new(saveout.clone(), settings)?);
}
fn _is_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| s.starts_with('.'))
.unwrap_or(false)
// write video
if let Some(encoder) = encoder.as_mut() {
let raw_data = img.into_raw();
let frame = ndarray::Array3::from_shape_vec((h as usize, w as usize, 3), raw_data)
.expect("Failed to create ndarray from raw image data");
// encode and update
encoder.encode(&frame, position)?;
position = position.aligned_with(Time::from_nth_of_a_second(fps)).add();
}
}
match &mut encoder {
Some(vencoder) => vencoder.finish()?,
None => anyhow::bail!("Found no video encoder."),
}
// update
pb.set_prefix(" Downloaded");
pb.set_prefix(" Converted");
pb.set_style(ProgressStyle::with_template(
"{prefix:.green.bold} {msg} in {elapsed}",
)?);
pb.finish();
Ok(())
}
}

119
src/core/dir.rs Normal file
View File

@ -0,0 +1,119 @@
/// Represents various directories on the system, including Home, Cache, Config, and more.
#[derive(Debug)]
pub enum Dir {
Home,
Cache,
Config,
Currnet,
Document,
Data,
Download,
Desktop,
Audio,
Picture,
}
impl Dir {
/// Retrieves the base path for the specified directory type, optionally appending the `usls` subdirectory.
///
/// # Arguments
/// * `raw` - If `true`, returns the base path without adding the `usls` subdirectory.
///
/// # Returns
/// * `Result<PathBuf>` - The base path for the directory.
fn get_path(&self, raw: bool) -> anyhow::Result<std::path::PathBuf> {
let base_path = match self {
Dir::Home => dirs::home_dir(),
Dir::Cache => dirs::cache_dir(),
Dir::Config => dirs::config_dir(),
Dir::Currnet => std::env::current_dir().ok(),
_ => None,
};
let mut path = base_path.ok_or_else(|| {
anyhow::anyhow!("Unsupported operating system. Now supports Linux, MacOS, Windows.")
})?;
if !raw {
if let Dir::Home = self {
path.push(".usls");
} else {
path.push("usls");
}
}
Ok(path)
}
/// Returns the default path for the `usls` directory, creating it automatically if it does not exist.
///
/// Examples:
/// `~/.cache/usls`, `~/.config/usls`, `~/.usls`.
///
/// # Returns
/// * `Result<PathBuf>` - The default `usls` directory path.
pub fn path(&self) -> anyhow::Result<std::path::PathBuf> {
let d = self.get_path(false)?;
self.create_directory(&d)?;
Ok(d)
}
/// Returns the raw path for the directory without adding the `usls` subdirectory.
///
/// Examples:
/// `~/.cache`, `~/.config`, `~`.
///
/// # Returns
/// * `Result<PathBuf>` - The raw directory path.
pub fn raw_path(&self) -> anyhow::Result<std::path::PathBuf> {
self.get_path(true)
}
/// Constructs a path to the `usls` directory with the provided subdirectories, creating it automatically.
///
/// Examples:
/// `~/.cache/usls/sub1/sub2/sub3`, `~/.config/usls/sub1/sub2`, `~/.usls/sub1/sub2`.
///
/// # Arguments
/// * `subs` - A slice of strings representing subdirectories to append.
///
/// # Returns
/// * `Result<PathBuf>` - The resulting directory path.
pub fn path_with_subs(&self, subs: &[&str]) -> anyhow::Result<std::path::PathBuf> {
let mut d = self.get_path(false)?;
self.append_subs(&mut d, subs)?;
Ok(d)
}
/// Constructs a path to a specified directory with the provided subdirectories, creating it automatically.
///
/// Examples:
/// `~/.cache/sub1/sub2/sub3`, `~/.config/sub1/sub2`, `~/sub1/sub2`.
///
/// # Arguments
/// * `subs` - A slice of strings representing subdirectories to append.
///
/// # Returns
/// * `Result<PathBuf>` - The resulting directory path.
pub fn raw_path_with_subs(&self, subs: &[&str]) -> anyhow::Result<std::path::PathBuf> {
let mut d = self.get_path(true)?;
self.append_subs(&mut d, subs)?;
Ok(d)
}
/// Appends subdirectories to the given base path and creates the directories if they don't exist.
fn append_subs(&self, path: &mut std::path::PathBuf, subs: &[&str]) -> anyhow::Result<()> {
for sub in subs {
path.push(sub);
}
self.create_directory(path)?;
Ok(())
}
/// Creates the specified directory if it does not exist.
fn create_directory(&self, path: &std::path::PathBuf) -> anyhow::Result<()> {
if !path.exists() {
std::fs::create_dir_all(path)?;
}
Ok(())
}
}

432
src/core/hub.rs Normal file
View File

@ -0,0 +1,432 @@
use anyhow::{Context, Result};
use indicatif::{ProgressBar, ProgressStyle};
use serde::{Deserialize, Serialize};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use crate::Dir;
/// Represents a downloadable asset in a release
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Asset {
pub name: String,
pub browser_download_url: String,
pub size: u64,
}
/// Represents a GitHub release
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Release {
pub tag_name: String,
pub assets: Vec<Asset>,
}
/// Manages interactions with a GitHub repository's releases
pub struct Hub {
/// github api
_gh_api_release: String,
/// GitHub repository owner
owner: String,
/// GitHub repository name
repo: String,
/// Optional list of releases fetched from GitHub
releases: Option<Vec<Release>>,
/// Path to cache file
cache: PathBuf,
/// Optional release tag to be used
tag: Option<String>,
/// Filename for the asset, used in cache management
file_name: Option<String>,
file_size: Option<u64>,
/// Full URL constructed for downloading the asset
url: Option<String>,
/// Local path where the asset will be stored
path: PathBuf,
/// Directory to store the downloaded file
to: Dir,
/// Download timeout in seconds
timeout: u64,
/// Time to live (cache duration)
ttl: std::time::Duration,
/// Maximum attempts for downloading
max_attempts: u32,
}
impl std::fmt::Debug for Hub {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Hub")
.field("owner", &self.owner)
.field("repo", &self.repo)
.field("cache", &self.cache)
.field("path", &self.path)
.field("releases", &self.releases.as_ref().map(|x| x.len()))
.field("ttl", &self.ttl)
.field("max_attempts", &self.max_attempts)
.finish()
}
}
impl Default for Hub {
fn default() -> Self {
let owner = "jamjamjon".to_string();
let repo = "assets".to_string();
let _gh_api_release = format!("https://api.github.com/repos/{}/{}/releases", owner, repo);
Self {
owner,
repo,
_gh_api_release,
url: None,
path: PathBuf::new(),
to: Dir::Cache,
tag: None,
file_name: None,
file_size: None,
releases: None,
cache: PathBuf::new(),
timeout: 3000,
max_attempts: 3,
ttl: std::time::Duration::from_secs(10 * 60),
}
}
}
impl Hub {
pub fn new() -> Result<Self> {
let mut to = Dir::Cache;
let cache = to
.path()
.or_else(|_| {
to = Dir::Home;
to.path()
})?
.join("cache_releases");
Ok(Self {
to,
cache,
..Default::default()
})
}
pub fn with_owner(mut self, owner: &str) -> Self {
self.owner = owner.to_string();
self
}
pub fn with_repo(mut self, repo: &str) -> Self {
self.repo = repo.to_string();
self
}
pub fn with_ttl(mut self, x: u64) -> Self {
self.ttl = std::time::Duration::from_secs(x);
self
}
pub fn with_timeout(mut self, x: u64) -> Self {
self.timeout = x;
self
}
pub fn with_max_attempts(mut self, x: u32) -> Self {
self.max_attempts = x;
self
}
pub fn fetch(mut self, s: &str) -> Result<Self> {
// try to fetch from hub or local cache
let p = PathBuf::from(s);
match p.exists() {
true => self.path = p,
false => {
// check local cache 1st
let p_cache = self.cache.with_file_name(s);
if p_cache.exists() {
self.path = p_cache;
} else {
// check remote list then
match s.split_once('/') {
Some((tag, file_name)) => {
// Extract tag and file from input string
self.tag = Some(tag.to_string());
self.file_name = Some(file_name.to_string());
// Check if releases are already loaded in memory
if self.releases.is_none() {
self.releases = Some(self.connect_remote()?);
}
if let Some(releases) = &self.releases {
// Validate the tag
let tags: Vec<&str> =
releases.iter().map(|x| x.tag_name.as_str()).collect();
if !tags.contains(&tag) {
anyhow::bail!(
"Hub tag '{}' not found in releases. Available tags: {:?}",
tag,
tags
);
}
// Validate the file
if let Some(release) = releases.iter().find(|r| r.tag_name == tag) {
let files: Vec<&str> =
release.assets.iter().map(|x| x.name.as_str()).collect();
if !files.contains(&file_name) {
anyhow::bail!(
"Hub file '{}' not found in tag '{}'. Available files: {:?}",
file_name,
tag,
files
);
} else {
for f_ in release.assets.iter() {
if f_.name.as_str() == file_name {
self.url = Some(f_.browser_download_url.clone());
self.file_size = Some(f_.size);
break;
}
}
}
}
self.path = self.to.path_with_subs(&[tag])?.join(file_name);
}
}
_ => anyhow::bail!(
"Download failed due to invalid format. Expected: <tag>/<file>, got: {}",
s
),
}
}
}
}
Ok(self)
}
/// Fetch releases from GitHub and cache them
fn fetch_and_cache_releases(url: &str, cache_path: &Path) -> Result<String> {
let response = ureq::get(url)
.set("User-Agent", "my-app")
.call()
.context("Failed to fetch releases from remote")?;
if response.status() != 200 {
anyhow::bail!(
"Failed to fetch releases from remote ({}): status {} - {}",
url,
response.status(),
response.status_text()
);
}
let body = response
.into_string()
.context("Failed to read response body")?;
// Ensure cache directory exists
let parent_dir = cache_path
.parent()
.context("Invalid cache path; no parent directory found")?;
std::fs::create_dir_all(parent_dir)
.with_context(|| format!("Failed to create cache directory: {:?}", parent_dir))?;
// Create temporary file
let mut temp_file = tempfile::NamedTempFile::new_in(parent_dir)
.context("Failed to create temporary cache file")?;
// Write data to temporary file
temp_file
.write_all(body.as_bytes())
.context("Failed to write to temporary cache file")?;
// Persist temporary file as the cache
temp_file.persist(cache_path).with_context(|| {
format!("Failed to persist temporary cache file to {:?}", cache_path)
})?;
Ok(body)
}
pub fn tags(&mut self) -> Option<Vec<&str>> {
if self.releases.is_none() {
self.releases = self.connect_remote().ok();
}
self.releases
.as_ref()
.map(|releases| releases.iter().map(|x| x.tag_name.as_str()).collect())
}
pub fn files(&mut self, tag: &str) -> Option<Vec<&str>> {
if self.releases.is_none() {
self.releases = self.connect_remote().ok();
}
self.releases.as_ref().map(|releases| {
releases
.iter()
.find(|r| r.tag_name == tag)
.map(|a| a.assets.iter().map(|x| x.name.as_str()).collect())
})?
}
pub fn connect_remote(&mut self) -> Result<Vec<Release>> {
let span = tracing::span!(tracing::Level::INFO, "OrtEngine-run");
let _guard = span.enter();
let should_download = if !self.cache.exists() {
tracing::info!("No cache found, fetching data from GitHub");
true
} else {
match std::fs::metadata(&self.cache)?.modified() {
Err(_) => {
tracing::info!("Cannot get file modified time, fetching new data from GitHub");
true
}
Ok(modified_time) => {
if std::time::SystemTime::now().duration_since(modified_time)? < self.ttl {
tracing::info!("Using cached data");
false
} else {
tracing::info!("Cache expired, fetching new data from GitHub");
true
}
}
}
};
let body = if should_download {
Self::fetch_and_cache_releases(&self._gh_api_release, &self.cache)?
} else {
std::fs::read_to_string(&self.cache)?
};
let releases: Vec<Release> = serde_json::from_str(&body)?;
Ok(releases)
}
/// Commit the downloaded file, downloading if necessary
pub fn commit(&self) -> Result<String> {
if let Some(url) = &self.url {
// Download if the file does not exist or if the size of file does not match
if !self.path.is_file()
|| self.path.is_file()
&& Some(std::fs::metadata(&self.path)?.len()) != self.file_size
{
let name = format!(
"{}/{}",
self.tag.as_ref().unwrap(),
self.file_name.as_ref().unwrap()
);
Self::download(
url.as_str(),
&self.path,
Some(&name),
Some(self.timeout),
Some(self.max_attempts),
)?;
}
}
self.path
.to_str()
.map(|s| s.to_string())
.with_context(|| format!("Failed to convert PathBuf: {:?} to String", self.path))
}
/// Download a file from a github release to a specified path with a progress bar
pub fn download<P: AsRef<Path> + std::fmt::Debug>(
src: &str,
dst: P,
prompt: Option<&str>,
timeout: Option<u64>,
max_attempts: Option<u32>,
) -> Result<()> {
// TODO: other url, not just github release page
let max_attempts = max_attempts.unwrap_or(2);
let timeout_duration = std::time::Duration::from_secs(timeout.unwrap_or(2000));
let agent = ureq::AgentBuilder::new().try_proxy_from_env(true).build();
for i_try in 0..max_attempts {
let resp = agent
.get(src)
.timeout(timeout_duration)
.call()
.with_context(|| {
format!(
"Failed to download file from {}, timeout: {:?}",
src, timeout_duration
)
})?;
let ntotal = resp
.header("Content-Length")
.and_then(|s| s.parse::<u64>().ok())
.context("Content-Length header is missing or invalid")?;
let pb = ProgressBar::new(ntotal);
pb.set_style(
ProgressStyle::with_template(
"{prefix:.cyan.bold} {msg} |{bar}| ({percent_precise}%, {binary_bytes}/{binary_total_bytes}, {binary_bytes_per_sec})",
)?
.progress_chars("██ "),
);
pb.set_prefix(if i_try == 0 {
" Fetching"
} else {
" Re-Fetching"
});
pb.set_message(prompt.unwrap_or_default().to_string());
let mut reader = resp.into_reader();
let mut buffer = [0; 256];
let mut downloaded_bytes = 0usize;
let mut file = std::fs::File::create(&dst)
.with_context(|| format!("Failed to create destination file: {:?}", dst))?;
loop {
let bytes_read = reader.read(&mut buffer)?;
if bytes_read == 0 {
break;
}
file.write_all(&buffer[..bytes_read])
.context("Failed to write to file")?;
downloaded_bytes += bytes_read;
pb.inc(bytes_read as u64);
}
// check size
if downloaded_bytes as u64 != ntotal {
continue;
}
// update
pb.set_prefix(" Downloaded");
pb.set_style(ProgressStyle::with_template(
"{prefix:.green.bold} {msg} ({binary_total_bytes}) in {elapsed}",
)?);
pb.finish();
if i_try != max_attempts {
break;
} else {
anyhow::bail!("Exceeded the maximum number of download attempts");
}
}
Ok(())
}
}

64
src/core/media.rs Normal file
View File

@ -0,0 +1,64 @@
use crate::{AUDIO_EXTENSIONS, IMAGE_EXTENSIONS, STREAM_PROTOCOLS, VIDEO_EXTENSIONS};
#[derive(Debug, Clone)]
pub enum MediaType {
Image(Location),
Video(Location),
Audio(Location),
Stream,
Unknown,
}
#[derive(Debug, Clone)]
pub enum Location {
Local,
Remote,
}
#[derive(Debug, Clone)]
pub enum StreamType {
Pre,
Live,
}
impl MediaType {
pub fn from_path<P: AsRef<std::path::Path>>(path: P) -> Self {
let extension = path
.as_ref()
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("")
.to_lowercase();
if IMAGE_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Image(Location::Local)
} else if VIDEO_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Video(Location::Local)
} else if AUDIO_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Audio(Location::Local)
} else {
MediaType::Unknown
}
}
pub fn from_url(url: &str) -> Self {
if IMAGE_EXTENSIONS
.iter()
.any(|&ext| url.ends_with(&format!(".{}", ext)))
{
MediaType::Image(Location::Remote)
} else if VIDEO_EXTENSIONS
.iter()
.any(|&ext| url.ends_with(&format!(".{}", ext)))
{
MediaType::Video(Location::Remote)
} else if STREAM_PROTOCOLS
.iter()
.any(|&protocol| url.starts_with(protocol))
{
MediaType::Stream
} else {
MediaType::Unknown
}
}
}

View File

@ -1,8 +1,11 @@
mod annotator;
mod dataloader;
mod device;
mod dir;
mod dynconf;
mod hub;
mod logits_sampler;
mod media;
mod metric;
mod min_opt_max;
pub mod onnx;
@ -19,8 +22,11 @@ mod xs;
pub use annotator::Annotator;
pub use dataloader::DataLoader;
pub use device::Device;
pub use dir::Dir;
pub use dynconf::DynConf;
pub use hub::Hub;
pub use logits_sampler::LogitsSampler;
pub use media::*;
pub use metric::Metric;
pub use min_opt_max::MinOptMax;
pub use ops::Ops;

View File

@ -3,9 +3,8 @@
use anyhow::Result;
use crate::{
auto_load,
models::{SamKind, SapiensTask, YOLOPreds, YOLOTask, YOLOVersion},
Device, MinOptMax,
Device, Hub, MinOptMax,
};
/// Options for building models
@ -15,14 +14,14 @@ pub struct Options {
pub device: Device,
pub profile: bool,
pub num_dry_run: usize,
pub i00: Option<MinOptMax>, // 1st input, axis 0, batch usually
pub i01: Option<MinOptMax>, // 1st input, axis 1
pub i00: Option<MinOptMax>, // the 1st input, axis 0, batch usually
pub i01: Option<MinOptMax>, // the 1st input, axis 1
pub i02: Option<MinOptMax>,
pub i03: Option<MinOptMax>,
pub i04: Option<MinOptMax>,
pub i05: Option<MinOptMax>,
pub i10: Option<MinOptMax>, // 2nd input, axis 0
pub i11: Option<MinOptMax>, // 2nd input, axis 1
pub i10: Option<MinOptMax>, // the 2nd input, axis 0
pub i11: Option<MinOptMax>, // the 2nd input, axis 1
pub i12: Option<MinOptMax>,
pub i13: Option<MinOptMax>,
pub i14: Option<MinOptMax>,
@ -101,7 +100,7 @@ impl Default for Options {
onnx_path: String::new(),
device: Device::Cuda(0),
profile: false,
num_dry_run: 5,
num_dry_run: 3,
i00: None,
i01: None,
i02: None,
@ -182,8 +181,12 @@ impl Default for Options {
}
impl Options {
pub fn new() -> Self {
Default::default()
}
pub fn with_model(mut self, onnx_path: &str) -> Result<Self> {
self.onnx_path = auto_load(onnx_path, Some("models"))?;
self.onnx_path = Hub::new()?.fetch(onnx_path)?.commit()?;
Ok(self)
}
@ -268,7 +271,7 @@ impl Options {
}
pub fn with_vocab(mut self, vocab: &str) -> Result<Self> {
self.vocab = Some(auto_load(vocab, Some("tokenizers"))?);
self.vocab = Some(Hub::new()?.fetch(vocab)?.commit()?);
Ok(self)
}
@ -278,7 +281,7 @@ impl Options {
}
pub fn with_tokenizer(mut self, tokenizer: &str) -> Result<Self> {
self.tokenizer = Some(auto_load(tokenizer, Some("tokenizers"))?);
self.tokenizer = Some(Hub::new()?.fetch(tokenizer)?.commit()?);
Ok(self)
}

View File

@ -8,7 +8,8 @@ use prost::Message;
use std::collections::HashSet;
use crate::{
home_dir, human_bytes, onnx, Device, MinOptMax, Ops, Options, Ts, Xs, CHECK_MARK, CROSS_MARK, X,
build_progress_bar, human_bytes, onnx, Device, Dir, MinOptMax, Ops, Options, Ts, Xs,
CHECK_MARK, CROSS_MARK, X,
};
/// Ort Tensor Attrs: name, data_type, dims
@ -37,6 +38,9 @@ pub struct OrtEngine {
impl OrtEngine {
pub fn new(config: &Options) -> Result<Self> {
let span = tracing::span!(tracing::Level::INFO, "OrtEngine-new");
let _guard = span.enter();
// onnx graph
let model_proto = Self::load_onnx(&config.onnx_path)?;
let graph = match &model_proto.graph {
@ -150,13 +154,13 @@ impl OrtEngine {
}
Device::Cuda(device_id) => {
Self::build_cuda(&builder, device_id).unwrap_or_else(|err| {
tracing::warn!("{err}, Using cpu");
device = Device::Cpu(0);
println!("{err}");
})
}
Device::CoreML(_) => Self::build_coreml(&builder).unwrap_or_else(|err| {
tracing::warn!("{err}, Using cpu");
device = Device::Cpu(0);
println!("{err}");
}),
Device::Cpu(_) => {
Self::build_cpu(&builder)?;
@ -169,7 +173,7 @@ impl OrtEngine {
.commit_from_file(&config.onnx_path)?;
// summary
println!(
tracing::info!(
"{CHECK_MARK} Backend: ONNXRuntime | Opset: {} | Device: {:?} | Params: {}",
model_proto.opset_import[0].version,
device,
@ -200,6 +204,9 @@ impl OrtEngine {
fp16_enable: bool,
engine_cache_enable: bool,
) -> Result<()> {
let span = tracing::span!(tracing::Level::INFO, "OrtEngine-new");
let _guard = span.enter();
// auto generate shapes
let mut spec_min = String::new();
let mut spec_opt = String::new();
@ -228,22 +235,19 @@ impl OrtEngine {
spec_opt += &s_opt;
spec_max += &s_max;
}
let p = Dir::Cache.path_with_subs(&["trt-cache"])?;
let trt = TensorRTExecutionProvider::default()
.with_device_id(device_id as i32)
.with_int8(int8_enable)
.with_fp16(fp16_enable)
.with_engine_cache(engine_cache_enable)
.with_engine_cache_path(format!(
"{}/{}",
home_dir(None).to_str().unwrap(),
"trt-cache"
))
.with_engine_cache_path(p.to_str().unwrap())
.with_timing_cache(false)
.with_profile_min_shapes(spec_min)
.with_profile_opt_shapes(spec_opt)
.with_profile_max_shapes(spec_max);
if trt.is_available()? && trt.register(builder).is_ok() {
println!("\n🐢 Initial model serialization with TensorRT may require a wait...\n");
tracing::info!("🐢 Initial model serialization with TensorRT may require a wait...\n");
Ok(())
} else {
anyhow::bail!("{CROSS_MARK} TensorRT initialization failed")
@ -279,6 +283,15 @@ impl OrtEngine {
pub fn dry_run(&mut self) -> Result<()> {
if self.num_dry_run > 0 {
// pb
let pb = build_progress_bar(
self.num_dry_run as u64,
" DryRun",
Some(&format!("{:?}", self.device)),
crate::PROGRESS_BAR_STYLE_CYAN,
)?;
// dummy inputs
let mut xs = Vec::new();
for i in self.inputs_minoptmax.iter() {
let mut x: Vec<usize> = Vec::new();
@ -289,16 +302,27 @@ impl OrtEngine {
xs.push(X::from(x));
}
let xs = Xs::from(xs);
// run
for _ in 0..self.num_dry_run {
pb.inc(1);
self.run(xs.clone())?;
}
self.ts.clear();
println!("{CHECK_MARK} Dryrun x{}", self.num_dry_run);
// update
pb.set_style(indicatif::ProgressStyle::with_template(
crate::PROGRESS_BAR_STYLE_GREEN,
)?);
pb.finish();
}
Ok(())
}
pub fn run(&mut self, xs: Xs) -> Result<Xs> {
let span = tracing::span!(tracing::Level::INFO, "OrtEngine-run");
let _guard = span.enter();
// inputs dtype alignment
let mut xs_ = Vec::new();
let t_pre = std::time::Instant::now();
@ -369,7 +393,7 @@ impl OrtEngine {
if self.profile {
let len = 10usize;
let n = 4usize;
println!(
tracing::info!(
"[Profile] {:>len$.n$?} ({:>len$.n$?} avg) [alignment: {:>len$.n$?} ({:>len$.n$?} avg) | inference: {:>len$.n$?} ({:>len$.n$?} avg) | to_f32: {:>len$.n$?} ({:>len$.n$?} avg)]",
t_pre + t_run + t_post,
self.ts.avg(),
@ -624,10 +648,7 @@ impl OrtEngine {
pub fn try_fetch(&self, key: &str) -> Option<String> {
match self.session.metadata() {
Err(_) => None,
Ok(metadata) => match metadata.custom(key) {
Err(_) => None,
Ok(value) => value,
},
Ok(metadata) => metadata.custom(key).unwrap_or_default(),
}
}

View File

@ -25,6 +25,9 @@ pub trait Vision: Sized {
/// Executes the full pipeline.
fn forward(&mut self, xs: &[Self::Input], profile: bool) -> anyhow::Result<Vec<Y>> {
let span = tracing::span!(tracing::Level::INFO, "DataLoader-new");
let _guard = span.enter();
let t_pre = std::time::Instant::now();
let ys = self.preprocess(xs)?;
let t_pre = t_pre.elapsed();
@ -38,7 +41,9 @@ pub trait Vision: Sized {
let t_post = t_post.elapsed();
if profile {
println!("> Preprocess: {t_pre:?} | Execution: {t_exe:?} | Postprocess: {t_post:?}");
tracing::info!(
"> Preprocess: {t_pre:?} | Execution: {t_exe:?} | Postprocess: {t_post:?}"
);
}
Ok(ys)

View File

@ -1,42 +1,71 @@
//! A Rust library integrated with ONNXRuntime, providing a collection of **Computer Vision** and **Vision-Language** models.
//! **usls** is a Rust library integrated with **ONNXRuntime** that provides a collection of state-of-the-art models for **Computer Vision** and **Vision-Language** tasks, including:
//!
//! # Supported Models
//!
//! - [YOLOv5](https://github.com/ultralytics/yolov5): Object Detection, Instance Segmentation, Classification
//! - [YOLOv6](https://github.com/meituan/YOLOv6): Object Detection
//! - [YOLOv7](https://github.com/WongKinYiu/yolov7): Object Detection
//! - [YOLOv8](https://github.com/ultralytics/ultralytics): Object Detection, Instance Segmentation, Classification, Oriented Object Detection, Keypoint Detection
//! - [YOLOv9](https://github.com/WongKinYiu/yolov9): Object Detection
//! - [YOLOv10](https://github.com/THU-MIG/yolov10): Object Detection
//! - [RT-DETR](https://arxiv.org/abs/2304.08069): Object Detection
//! - [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM): Instance Segmentation
//! - [SAM](https://github.com/facebookresearch/segment-anything): Segmentation Anything
//! - [MobileSAM](https://github.com/ChaoningZhang/MobileSAM): Segmentation Anything
//! - [EdgeSAM](https://github.com/chongzhou96/EdgeSAM): Segmentation Anything
//! - [SAM-HQ](https://github.com/SysCV/sam-hq): Segmentation Anything
//! - [YOLO-World](https://github.com/AILab-CVC/YOLO-World): Object Detection
//! - [DINOv2](https://github.com/facebookresearch/dinov2): Vision-Self-Supervised
//! - [CLIP](https://github.com/openai/CLIP): Vision-Language
//! - [BLIP](https://github.com/salesforce/BLIP): Vision-Language
//! - [DB](https://arxiv.org/abs/1911.08947): Text Detection
//! - [SVTR](https://arxiv.org/abs/2205.00159): Text Recognition
//! - [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo): Keypoint Detection
//! - [YOLOPv2](https://arxiv.org/abs/2208.11434): Panoptic Driving Perception
//! - [Depth-Anything (v1, v2)](https://github.com/LiheYoung/Depth-Anything): Monocular Depth Estimation
//! - [MODNet](https://github.com/ZHKKKe/MODNet): Image Matting
//! - [Sapiens](https://arxiv.org/abs/2408.12569): Human-centric Vision Tasks
//! - **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10)
//! - **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
//! - **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569)
//! - **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World)
//!
//! # Examples
//!
//! [All Demos Here](https://github.com/jamjamjon/usls/tree/main/examples)
//! Refer to [All Runnable Demos](https://github.com/jamjamjon/usls/tree/main/examples)
//!
//! # Using Provided Models for Inference
//! # Quick Start
//!
//! #### 1. Build Model
//! Using provided [`models`] with [`Options`]
//! The following demo shows how to build a `YOLO` with [`Options`], load `image(s)`, `video` and `stream` with [`DataLoader`], and annotate the model's inference results with [`Annotator`].
//!
//! ```rust, no_run
//! use usls::{ models::YOLO, Annotator, DataLoader, Options, Vision, COCO_CLASS_NAMES_80};
//! ```ignore
//! use usls::{models::YOLO, Annotator, DataLoader, Options, Vision, YOLOTask, YOLOVersion};
//!
//! fn main() -> anyhow::Result<()> {
//! // Build model with Options
//! let options = Options::new()
//! .with_trt(0)
//! .with_model("yolo/v8-m-dyn.onnx")?
//! .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
//! .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
//! .with_i00((1, 1, 4).into())
//! .with_i02((0, 640, 640).into())
//! .with_i03((0, 640, 640).into())
//! .with_confs(&[0.2]);
//! let mut model = YOLO::new(options)?;
//!
//! // Build DataLoader to load image(s), video, stream
//! let dl = DataLoader::new(
//! "./assets/bus.jpg", // local image
//! // "images/bus.jpg", // remote image
//! // "../set-negs", // local images (from folder)
//! // "../hall.mp4", // local video
//! // "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
//! // "rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream
//! )?
//! .with_batch(3) // iterate with batch_size = 3
//! .build()?;
//!
//! // Build annotator
//! let annotator = Annotator::new().with_saveout("YOLO-Demo");
//!
//! // Run and Annotate images
//! for (xs, _) in dl {
//! let ys = model.forward(&xs, false)?;
//! annotator.annotate(&xs, &ys);
//! }
//!
//! Ok(())
//! }
//! ```
//!
//! # What's More
//!
//! This guide covers the process of using provided models for inference, including how to build a model, load data, annotate results, and retrieve the outputs. Click the sections below to expand for detailed instructions.
//!
//! <details>
//! <summary>Build the Model</summary>
//!
//! To build a model, you can use the provided [models] with [Options]:
//!
//! ```ignore
//! use usls::{models::YOLO, Annotator, DataLoader, Options, Vision};
//!
//! let options = Options::default()
//! .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
@ -45,107 +74,149 @@
//! let mut model = YOLO::new(options)?;
//! ```
//!
//! - Use `CUDA`, `TensorRT`, or `CoreML`
//! **And there're many options provided by [Options]**
//!
//! ```rust, no_run
//! - **Choose Execution Provider:**
//! Select `CUDA` (default), `TensorRT`, or `CoreML`:
//!
//! ```ignore
//! let options = Options::default()
//! .with_cuda(0) // using CUDA by default
//! .with_cuda(0)
//! // .with_trt(0)
//! // .with_coreml(0)
//! // .with_cpu();
//! ```
//!
//! - Dynamic Input Shapes
//! - **Dynamic Input Shapes:**
//! Specify dynamic shapes with [MinOptMax]:
//!
//! ```rust, no_run
//! ```ignore
//! let options = Options::default()
//! .with_i00((1, 2, 4).into()) // dynamic batch
//! .with_i02((416, 640, 800).into()) // dynamic height
//! .with_i03((416, 640, 800).into()); // dynamic width
//! .with_i00((1, 2, 4).into()) // batch(min=1, opt=2, max=4)
//! .with_i02((416, 640, 800).into()) // height(min=416, opt=640, max=800)
//! .with_i03((416, 640, 800).into()); // width(min=416, opt=640, max=800)
//! ```
//!
//! - Set Confidence Thresholds for Each Category
//! - **Set Confidence Thresholds:**
//! Adjust thresholds for each category:
//!
//! ```rust, no_run
//! ```ignore
//! let options = Options::default()
//! .with_confs(&[0.4, 0.15]); // class_0: 0.4, others: 0.15
//! ```
//!
//! - Set Class Names
//! - **Set Class Names:**
//! Provide class names if needed:
//!
//! ```rust, no_run
//! ```ignore
//! let options = Options::default()
//! .with_names(&COCO_CLASS_NAMES_80);
//! ```
//!
//! More options can be found in the [`Options`] documentation.
//! **More options are detailed in the [Options] documentation.**
//!
//!
//! #### 2. Load Images
//! </details>
//!
//! Ensure that the input image is RGB type.
//! <details>
//! <summary>Load Images, Video and Stream</summary>
//!
//! - Using [`image::ImageReader`] or [`DataLoader`] to Load One Image
//! - **Load a Single Image**
//! Use [DataLoader::try_read] to load an image from a local file or remote source:
//!
//! ```rust, no_run
//! let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
//! // or
//! ```ignore
//! let x = DataLoader::try_read("./assets/bus.jpg")?; // from local
//! let x = DataLoader::try_read("images/bus.jpg")?; // from remote
//! ```
//!
//! Alternatively, use [image::ImageReader] directly:
//!
//! ```ignore
//! let x = image::ImageReader::open("myimage.png")?.decode()?;
//! ```
//!
//! - Using [`DataLoader`] to Load a Batch of Images
//! - **Load Multiple Images, Videos, or Streams**
//! Create a [DataLoader] instance for batch processing:
//!
//! ```rust, no_run
//! let dl = DataLoader::default()
//! .with_batch(4)
//! .load("./assets")?;
//! ```ignore
//! let dl = DataLoader::new(
//! "./assets/bus.jpg", // local image
//! // "images/bus.jpg", // remote image
//! // "../set-negs", // local images (from folder)
//! // "../hall.mp4", // local video
//! // "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
//! // "rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream
//! )?
//! .with_batch(3) // iterate with batch_size = 3
//! .build()?;
//!
//! // Iterate through the data
//! for (xs, _) in dl {}
//! ```
//!
//! #### 3. (Optional) Annotate Results with [`Annotator`]
//! - **Convert Images to Video**
//! Use [DataLoader::is2v] to create a video from a sequence of images:
//!
//! ```rust, no_run
//! ```ignore
//! let fps = 24;
//! let image_folder = "runs/YOLO-DataLoader";
//! let saveout = ["runs", "is2v"];
//! DataLoader::is2v(image_folder, &saveout, fps)?;
//! ```
//!
//! </details>
//!
//! <details>
//! <summary>Annotate Inference Results</summary>
//!
//! - **Create an Annotator Instance**
//!
//! ```ignore
//! let annotator = Annotator::default();
//! ```
//!
//! - Set Saveout Name
//! - **Set Saveout Name:**
//!
//! ```rust, no_run
//! ```ignore
//! let annotator = Annotator::default()
//! .with_saveout("YOLOs");
//! ```
//!
//! - Set Bboxes Line Width
//!
//! ```rust, no_run
//! - **Set Bounding Box Line Width:**
//!
//! ```ignore
//! let annotator = Annotator::default()
//! .with_bboxes_thickness(4);
//! ```
//!
//! - Disable Mask Plotting
//!
//! ```rust, no_run
//!
//! - **Disable Mask Plotting**
//!
//! ```ignore
//! let annotator = Annotator::default()
//! .without_masks(true);
//! ```
//!
//! More options can be found in the [`Annotator`] documentation.
//!
//!
//! #### 4. Run and Annotate
//! - **Perform Inference and nnotate the results**
//!
//! ```rust, no_run
//! ```ignore
//! for (xs, _paths) in dl {
//! let ys = model.run(&xs)?;
//! annotator.annotate(&xs, &ys);
//! }
//! ```
//!
//! #### 5. Get Results
//! More options are detailed in the [Annotator] documentation.
//!
//! The inference outputs of provided models will be saved to a [`Vec<Y>`].
//! </details>
//!
//! - For Example, Get Detection Bboxes with `y.bboxes()`
//! <details>
//! <summary>Retrieve Model's Inference Results</summary>
//!
//! ```rust, no_run
//! Retrieve the inference outputs, which are saved in a [`Vec<Y>`]:
//!
//! - **Get Detection Bounding Boxes**
//!
//! ```ignore
//! let ys = model.run(&xs)?;
//! for y in ys {
//! // bboxes
@ -165,11 +236,16 @@
//! }
//! ```
//!
//! # Also, You Can Implement Your Own Model with [`OrtEngine`] and [`Options`]
//! </details>
//!
//! [`OrtEngine`] provides ONNX model loading, metadata parsing, dry_run, inference, and other functions, supporting EPs such as CUDA, TensorRT, CoreML, etc. You can use it as the ONNXRuntime engine for building models.
//! <details>
//! <summary>Custom Model Implementation</summary>
//!
//! Refer to [Demo: Depth-Anything](https://github.com/jamjamjon/usls/blob/main/src/models/depth_anything.rs) for more details.
//! You can also implement your own model using [OrtEngine] and [Options]. [OrtEngine] supports ONNX model loading, metadata parsing, dry_run, inference, and other functions, with execution providers such as CUDA, TensorRT, CoreML, etc.
//!
//! For more details, refer to the [Demo: Depth-Anything](https://github.com/jamjamjon/usls/blob/main/src/models/depth_anything.rs).
//!
//! </details>
mod core;
pub mod models;

View File

@ -5,8 +5,7 @@ use std::io::Write;
use tokenizers::Tokenizer;
use crate::{
auto_load, Embedding, LogitsSampler, MinOptMax, Ops, Options, OrtEngine, TokenizerStream, Xs,
X, Y,
Embedding, LogitsSampler, MinOptMax, Ops, Options, OrtEngine, TokenizerStream, Xs, X, Y,
};
#[derive(Debug)]
@ -31,13 +30,9 @@ impl Blip {
visual.width().to_owned(),
);
let tokenizer = match options_textual.tokenizer {
Some(x) => x,
None => match auto_load("tokenizer-blip.json", Some("tokenizers")) {
Err(err) => anyhow::bail!("No tokenizer's file found: {:?}", err),
Ok(x) => x,
},
};
let tokenizer = options_textual
.tokenizer
.ok_or(anyhow::anyhow!("No tokenizer file found"))?;
let tokenizer = match Tokenizer::from_file(tokenizer) {
Err(err) => anyhow::bail!("Failed to build tokenizer: {:?}", err),
Ok(x) => x,

View File

@ -3,7 +3,7 @@ use image::DynamicImage;
use ndarray::Array2;
use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer};
use crate::{auto_load, Embedding, MinOptMax, Ops, Options, OrtEngine, Xs, X, Y};
use crate::{Embedding, MinOptMax, Ops, Options, OrtEngine, Xs, X, Y};
#[derive(Debug)]
pub struct Clip {
@ -29,13 +29,10 @@ impl Clip {
visual.inputs_minoptmax()[0][3].to_owned(),
);
let tokenizer = match options_textual.tokenizer {
Some(x) => x,
None => match auto_load("tokenizer-clip.json", Some("tokenizers")) {
Err(err) => anyhow::bail!("No tokenizer's file found: {:?}", err),
Ok(x) => x,
},
};
let tokenizer = options_textual
.tokenizer
.ok_or(anyhow::anyhow!("No tokenizer file found"))?;
let mut tokenizer = match Tokenizer::from_file(tokenizer) {
Err(err) => anyhow::bail!("Failed to build tokenizer: {:?}", err),
Ok(x) => x,

View File

@ -28,8 +28,8 @@ impl Dinov2 {
engine.inputs_minoptmax()[0][3].to_owned(),
);
let which = match options.onnx_path {
s if s.contains("b14") => Model::B,
s if s.contains("s14") => Model::S,
s if s.contains('b') => Model::B,
s if s.contains('s') => Model::S,
_ => todo!(),
};
let hidden_size = match which {

View File

@ -1,4 +1,4 @@
use crate::{auto_load, Bbox, DynConf, MinOptMax, Ops, Options, OrtEngine, Xs, X, Y};
use crate::{Bbox, DynConf, MinOptMax, Ops, Options, OrtEngine, Xs, X, Y};
use anyhow::Result;
use image::DynamicImage;
use ndarray::{s, Array, Axis};
@ -27,13 +27,9 @@ impl GroundingDINO {
);
let context_length = options.context_length.unwrap_or(256);
// let special_tokens = ["[CLS]", "[SEP]", ".", "?"];
let tokenizer = match options.tokenizer {
Some(x) => x,
None => match auto_load("tokenizer-groundingdino.json", Some("tokenizers")) {
Err(err) => anyhow::bail!("No tokenizer's file found: {:?}", err),
Ok(x) => x,
},
};
let tokenizer = options
.tokenizer
.ok_or(anyhow::anyhow!("No tokenizer file found"))?;
let tokenizer = match Tokenizer::from_file(tokenizer) {
Err(err) => anyhow::bail!("Failed to build tokenizer: {:?}", err),
Ok(x) => x,

View File

@ -32,6 +32,9 @@ impl Vision for YOLO {
type Input = DynamicImage;
fn new(options: Options) -> Result<Self> {
let span = tracing::span!(tracing::Level::INFO, "YOLO-new");
let _guard = span.enter();
let mut engine = OrtEngine::new(&options)?;
let (batch, height, width) = (
engine.batch().to_owned(),
@ -49,7 +52,7 @@ impl Vision for YOLO {
"segment" => Some(YOLOTask::Segment),
"obb" => Some(YOLOTask::Obb),
s => {
println!("YOLO Task: {s:?} is unsupported");
tracing::error!("YOLO Task: {s:?} is unsupported");
None
}
}));
@ -135,7 +138,7 @@ impl Vision for YOLO {
let iou = options.iou.unwrap_or(0.45);
// Summary
println!("YOLO Task: {:?}, Version: {:?}", task, version);
tracing::info!("YOLO Task: {:?}, Version: {:?}", task, version);
engine.dry_run()?;

View File

@ -1,8 +1,7 @@
use anyhow::{anyhow, Result};
#![allow(dead_code)]
use indicatif::{ProgressBar, ProgressStyle};
use rand::{distributions::Alphanumeric, thread_rng, Rng};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
pub mod colormap256;
pub mod names;
@ -10,11 +9,25 @@ pub mod names;
pub use colormap256::*;
pub use names::*;
pub(crate) const GITHUB_ASSETS: &str =
"https://github.com/jamjamjon/assets/releases/download/v0.0.1";
pub(crate) const CHECK_MARK: &str = "";
pub(crate) const CROSS_MARK: &str = "";
pub(crate) const SAFE_CROSS_MARK: &str = "";
pub(crate) const NETWORK_PREFIXES: &[&str] = &[
"http://", "https://", "ftp://", "ftps://", "sftp://", "rtsp://", "mms://", "mmsh://",
"rtmp://", "rtmps://", "file://",
];
pub(crate) const IMAGE_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp"];
pub(crate) const VIDEO_EXTENSIONS: &[&str] = &[
"mp4", "avi", "mkv", "mov", "wmv", "flv", "webm", "mpeg", "mpg", "m4v", "m4p",
];
pub(crate) const AUDIO_EXTENSIONS: &[&str] = &["mp3", "wav", "flac", "aac", "ogg", "wma"];
pub(crate) const STREAM_PROTOCOLS: &[&str] = &[
"rtsp://", "rtsps://", "rtspu://", "rtmp://", "rtmps://", "hls://", "http://", "https://",
];
pub(crate) const PROGRESS_BAR_STYLE_CYAN: &str =
"{prefix:.cyan.bold} {msg} {human_pos}/{human_len} |{bar}| {elapsed_precise}";
pub(crate) const PROGRESS_BAR_STYLE_GREEN: &str =
"{prefix:.green.bold} {msg} {human_pos}/{human_len} |{bar}| {elapsed_precise}";
pub fn human_bytes(size: f64) -> String {
let units = ["B", "KB", "MB", "GB", "TB", "PB", "EB"];
@ -30,71 +43,6 @@ pub fn human_bytes(size: f64) -> String {
format!("{:.1} {}", size, units[unit_index])
}
pub(crate) fn auto_load<P: AsRef<Path>>(src: P, sub: Option<&str>) -> Result<String> {
let src = src.as_ref();
let p = if src.is_file() {
src.into()
} else {
let sth = src.file_name().unwrap().to_str().unwrap();
let mut p = home_dir(sub);
p.push(sth);
if !p.is_file() {
download(
&format!("{}/{}", GITHUB_ASSETS, sth),
&p,
Some(sth.to_string().as_str()),
)?;
}
p
};
Ok(p.to_str().unwrap().to_string())
}
/// `download` sth from src to dst
pub fn download<P: AsRef<Path> + std::fmt::Debug>(
src: &str,
dst: P,
prompt: Option<&str>,
) -> Result<()> {
let resp = ureq::AgentBuilder::new()
.try_proxy_from_env(true)
.build()
.get(src)
.timeout(std::time::Duration::from_secs(2000))
.call()
.map_err(|err| anyhow!("Failed to download. {err:?}"))?;
let ntotal = resp
.header("Content-Length")
.and_then(|s| s.parse::<u64>().ok())
.expect("Content-Length header should be present on archive response");
let pb = ProgressBar::new(ntotal);
pb.set_style(
ProgressStyle::with_template(
"{prefix:.bold} {msg:.dim} [{bar:.blue.bright/white.dim}] {binary_bytes}/{binary_total_bytes} ({binary_bytes_per_sec}, {percent_precise}%, {elapsed})"
)
.unwrap()
.progress_chars("#>-"));
pb.set_prefix(String::from("\n🐢 Downloading"));
pb.set_message(prompt.unwrap_or_default().to_string());
let mut reader = resp.into_reader();
let mut buffer = [0; 256];
let mut downloaded_bytes = 0usize;
let mut f = std::fs::File::create(&dst).expect("Failed to create file");
loop {
let bytes_read = reader.read(&mut buffer)?;
if bytes_read == 0 {
break;
}
pb.inc(bytes_read as u64);
f.write_all(&buffer[..bytes_read])?;
downloaded_bytes += bytes_read;
}
assert_eq!(downloaded_bytes as u64, ntotal);
pb.finish();
println!();
Ok(())
}
pub(crate) fn string_random(n: usize) -> String {
thread_rng()
.sample_iter(&Alphanumeric)
@ -112,33 +60,16 @@ pub(crate) fn string_now(delimiter: &str) -> String {
t_now.format(&fmt).to_string()
}
#[allow(dead_code)]
pub(crate) fn config_dir() -> PathBuf {
match dirs::config_dir() {
Some(mut d) => {
d.push("usls");
if !d.exists() {
std::fs::create_dir_all(&d).expect("Failed to create usls config directory.");
}
d
}
None => panic!("Unsupported operating system. Now support Linux, MacOS, Windows."),
}
}
pub fn build_progress_bar(
n: u64,
prefix: &str,
msg: Option<&str>,
style_temp: &str,
) -> anyhow::Result<ProgressBar> {
let pb = ProgressBar::new(n);
pb.set_style(ProgressStyle::with_template(style_temp)?.progress_chars("██ "));
pb.set_prefix(prefix.to_string());
pb.set_message(msg.unwrap_or_default().to_string());
#[allow(dead_code)]
pub(crate) fn home_dir(sub: Option<&str>) -> PathBuf {
match dirs::home_dir() {
Some(mut d) => {
d.push(".usls");
if let Some(sub) = sub {
d.push(sub);
}
if !d.exists() {
std::fs::create_dir_all(&d).expect("Failed to create usls home directory.");
}
d
}
None => panic!("Unsupported operating system. Now support Linux, MacOS, Windows."),
}
Ok(pb)
}

View File

@ -95,13 +95,6 @@ impl Y {
/// # Returns
///
/// * `Self` - The updated struct instance with the new probabilities set.
///
/// # Examples
///
/// ```
/// let probs = Prob::default();
/// let y = Y::default().with_probs(&probs);
/// ```
pub fn with_probs(mut self, probs: &Prob) -> Self {
self.probs = Some(probs.clone());
self