0.1.0-beta.1 (#82)

This commit is contained in:
Jamjamjon
2025-04-27 13:01:51 +08:00
committed by GitHub
parent aa25854f7f
commit 80468d9c83
174 changed files with 8413 additions and 6188 deletions

View File

@ -3,4 +3,5 @@ updates:
- package-ecosystem: "cargo" - package-ecosystem: "cargo"
directory: "/" directory: "/"
schedule: schedule:
interval: "weekly" interval: "monthly"
open-pull-requests-limit: 3

View File

@ -1,83 +1,102 @@
name: Rust-CI name: CI
on: on:
push: push:
branches: [ "main", "dev", "develop", "x", "xy" , "xyz" ] branches: [ "main", "dev", "x" ]
pull_request: pull_request:
branches: [ "main" ] branches: [ "main" ]
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always
jobs: jobs:
check: lints:
name: Check name: Rustfmt & Clippy
runs-on: ${{ matrix.os }} runs-on: ubuntu-latest
strategy: container: jrottenberg/ffmpeg:7.1-ubuntu
matrix:
os: [ubuntu-latest, macOS-latest, windows-latest]
rust: [stable]
steps: steps:
- uses: actions/checkout@v2 - name: Checkout
- uses: actions-rs/toolchain@v1 uses: actions/checkout@v4
- name: Install dependencies
run: |
DEBIAN_FRONTEND=noninteractive apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ca-certificates clang curl pkg-config protobuf-compiler
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
with: with:
profile: minimal components: rustfmt, clippy
toolchain: ${{ matrix.rust }}
override: true - name: Rustfmt
- uses: actions-rs/cargo@v1 run: cargo fmt --all -- --check
with:
command: check - name: Clippy
args: --all run: cargo clippy --all-features --all-targets -- -D warnings
check:
name: cargo-check
runs-on: ubuntu-latest
container: jrottenberg/ffmpeg:7.1-ubuntu
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install dependencies
run: |
DEBIAN_FRONTEND=noninteractive apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ca-certificates clang curl pkg-config protobuf-compiler
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
- name: Check
run: cargo check --all-features --all-targets
test: test:
name: Test name: cargo-test
runs-on: ${{ matrix.os }} runs-on: ubuntu-latest
container: jrottenberg/ffmpeg:7.1-ubuntu
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install dependencies
run: |
DEBIAN_FRONTEND=noninteractive apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ca-certificates clang curl pkg-config protobuf-compiler
- name: Setup Rust
uses: dtolnay/rust-toolchain@nightly
- name: Test
run: cargo +nightly test --all-features --all-targets
build-linux:
needs: test
name: cargo build / linux / ffmpeg ${{ matrix.ffmpeg_version }}
runs-on: ubuntu-latest
container: jrottenberg/ffmpeg:${{ matrix.ffmpeg_version }}-ubuntu
strategy: strategy:
matrix: matrix:
os: [ubuntu-latest, macOS-latest, windows-latest] ffmpeg_version: [ "5.0", "5.1", "6.0", "6.1", "7.0", "7.1" ] # "4.3", "4.4"
rust: [stable] fail-fast: false
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust }}
override: true
- uses: actions-rs/cargo@v1
with:
command: test
args: --all
fmt:
name: Rustfmt
runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v2 - name: Checkout
- uses: actions-rs/toolchain@v1 uses: actions/checkout@v4
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add rustfmt
- uses: actions-rs/cargo@v1
with:
command: fmt
args: --all -- --check
clippy: - name: Install dependencies
name: Clippy run: |
runs-on: ubuntu-latest DEBIAN_FRONTEND=noninteractive apt-get update
steps: DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ca-certificates clang curl pkg-config protobuf-compiler
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add clippy
- uses: actions-rs/cargo@v1
with:
command: clippy
args: --all --all-targets -- -D warnings
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
- name: Build
run: cargo build --all-features

4
.gitignore vendored
View File

@ -3,8 +3,6 @@
debug/ debug/
target/ target/
**/*.DS_Store
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock Cargo.lock
@ -18,4 +16,4 @@ Cargo.lock
.debug .debug
.vscode .vscode
runs/ runs/
.DS_Store **/*.DS_Store

View File

@ -1,67 +1,62 @@
[package] [package]
name = "usls" name = "usls"
version = "0.1.0"
rust-version = "1.82"
edition = "2021" edition = "2021"
version = "0.1.0-beta.1"
rust-version = "1.82"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models." description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls" repository = "https://github.com/jamjamjon/usls"
authors = ["Jamjamjon <xxyydzml@outlook.com>"] authors = ["Jamjamjon <xxyydzml@outlook.com>"]
license = "MIT" license = "MIT"
readme = "README.md" readme = "README.md"
exclude = ["assets/*", "examples/*", "runs/*", "benches/*"] exclude = ["assets/*", "examples/*", "runs/*", "benches/*", "tests/*"]
[dependencies] [dependencies]
aksr = { version = "0.0.2" } anyhow = { version = "1" }
aksr = { version = "0.0.3" }
ab_glyph = { version = "0.2.29" }
image = { version = "0.25" } image = { version = "0.25" }
imageproc = { version = "0.25" } imageproc = { version = "0.25" }
ndarray = { version = "0.16.1", features = ["rayon", "serde"] } ndarray = { version = "0.16.1", features = ["rayon", "serde"] }
rayon = { version = "1.10.0" } indicatif = { version = "0.17.11" }
anyhow = { version = "1.0" } log = "0.4.26"
regex = { version = "1.11.1" } minifb = { version = "0.28.0" }
rand = { version = "0.8.5" } rand = { version = "0.8.5" }
chrono = { version = "0.4.30" } ureq = { version = "2", default-features = true, features = [ "socks-proxy" ] }
tokenizers = { version = "0.21.0" }
log = { version = "0.4.22" }
indicatif = "0.17.8"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
ort = { version = "2.0.0-rc.9", default-features = false} serde_json = "1.0"
prost = "0.12.6" rayon = { version = "1.10.0" }
ab_glyph = "0.2.29" glob = "0.3.2"
tempfile = "3.19.1"
dirs = "6.0.0" dirs = "6.0.0"
geo = "0.30.0"
natord = "1.0.9" natord = "1.0.9"
half = { version = "2.3.1" } geo = "0.30.0"
ureq = { version = "2.12.1", default-features = false, features = [ "tls" ] } chrono = "0.4.40"
fast_image_resize = { version = "5.1.2", features = ["image"]} regex = "1.11.1"
video-rs = { version = "0.10.3", features = ["ndarray"], optional = true }
minifb = { version = "0.28.0", optional = true }
sha2 = "0.10.8" sha2 = "0.10.8"
tempfile = "3.19.1"
video-rs = { version = "0.10.3", features = ["ndarray"], optional = true }
fast_image_resize = { version = "5.1.2", features = ["image"]}
ndarray-npy = "0.9.1" ndarray-npy = "0.9.1"
half = { version = "2.3.1" }
prost = "0.13.5"
ort = { version = "2.0.0-rc.9", default-features = false, optional = true , features = [
"ndarray",
"copy-dylibs",
"half"
]}
tokenizers = { version = "0.21.1" }
[build-dependencies]
prost-build = "0.13.5"
[dev-dependencies] [dev-dependencies]
argh = "0.1.13" argh = "0.1.13"
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "chrono"] } tracing-subscriber = { version = "0.3.18", features = ["env-filter", "chrono"] }
[[example]]
name = "viewer"
required-features = ["ffmpeg"]
[features] [features]
default = [ default = ["ort-download-binaries"]
"ort/ndarray", ort-download-binaries = ["ort", "ort/download-binaries"]
"ort/copy-dylibs", ort-load-dynamic = ["ort", "ort/load-dynamic"]
"ort/load-dynamic", cuda = ["ort/cuda"]
"ort/half", trt = ["ort/tensorrt"]
] mps = ["ort/coreml"]
auto = ["ort/download-binaries"] video = ["dep:video-rs"]
ffmpeg = ["dep:video-rs", "dep:minifb"]
cuda = [ "ort/cuda" ]
trt = [ "ort/tensorrt" ]
mps = [ "ort/coreml" ]
[profile.release]
lto = true
strip = true
panic = "abort"

253
README.md
View File

@ -1,47 +1,72 @@
<h2 align="center">usls</h2> <h2 align="center">usls</h2>
<p align="center"> <p align="center">
<a href="https://github.com/jamjamjon/usls/actions/workflows/rust-ci.yml"> <!-- Rust MSRV -->
<img src="https://github.com/jamjamjon/usls/actions/workflows/rust-ci.yml/badge.svg" alt="Rust Continuous Integration Badge">
</a>
<a href='https://crates.io/crates/usls'>
<img src='https://img.shields.io/crates/v/usls.svg' alt='usls Version'>
</a>
<a href='https://crates.io/crates/usls'> <a href='https://crates.io/crates/usls'>
<img src='https://img.shields.io/crates/msrv/usls-yellow?' alt='Rust MSRV'> <img src='https://img.shields.io/crates/msrv/usls-yellow?' alt='Rust MSRV'>
</a> </a>
<!-- ONNXRuntime MSRV -->
<a href='https://github.com/microsoft/onnxruntime/releases'> <a href='https://github.com/microsoft/onnxruntime/releases'>
<img src='https://img.shields.io/badge/onnxruntime-%3E%3D%201.19.0-3399FF' alt='ONNXRuntime MSRV'> <img src='https://img.shields.io/badge/onnxruntime-%3E%3D%201.19.0-3399FF' alt='ONNXRuntime MSRV'>
</a> </a>
<!-- CUDA MSRV -->
<a href='https://developer.nvidia.com/cuda-toolkit-archive'> <a href='https://developer.nvidia.com/cuda-toolkit-archive'>
<img src='https://img.shields.io/badge/cuda-%3E%3D%2012.0-green' alt='CUDA MSRV'> <img src='https://img.shields.io/badge/CUDA-%3E%3D%2012.0-green' alt='CUDA MSRV'>
</a> </a>
<!-- cuDNN MSRV -->
<a href='https://developer.nvidia.com/cudnn-downloads'>
<img src='https://img.shields.io/badge/cuDNN-%3E%3D%209.0-green4' alt='cuDNN MSRV'>
</a>
<!-- TensorRT MSRV -->
<a href='https://developer.nvidia.com/tensorrt'> <a href='https://developer.nvidia.com/tensorrt'>
<img src='https://img.shields.io/badge/TensorRT-%3E%3D%2012.0-0ABF53' alt='TensorRT MSRV'> <img src='https://img.shields.io/badge/TensorRT-%3E%3D%2012.0-0ABF53' alt='TensorRT MSRV'>
</a> </a>
<a href="https://crates.io/crates/usls">
<img alt="Crates.io Total Downloads" src="https://img.shields.io/crates/d/usls?&color=946CE6">
</a>
</p> </p>
<p align="center"> <p align="center">
<!-- Examples Link -->
<a href="./examples"> <a href="./examples">
<img src="https://img.shields.io/badge/Examples-1A86FD?&logo=anki" alt="Examples"> <img src="https://img.shields.io/badge/Examples-1A86FD?&logo=anki" alt="Examples">
</a> </a>
<!-- Docs.rs Link -->
<a href='https://docs.rs/usls'> <a href='https://docs.rs/usls'>
<img src='https://img.shields.io/badge/Docs-usls-yellow?&logo=docs.rs&color=FFA200' alt='usls documentation'> <img src='https://img.shields.io/badge/Docs-usls-yellow?&logo=docs.rs&color=FFA200' alt='Documentation'>
</a> </a>
</p> </p>
<p align="center">
<!-- CI Badge -->
<a href="https://github.com/jamjamjon/usls/actions/workflows/rust-ci.yml">
<img src="https://github.com/jamjamjon/usls/actions/workflows/rust-ci.yml/badge.svg" alt="Rust CI">
</a>
<a href='https://crates.io/crates/usls'>
<img src='https://img.shields.io/crates/v/usls.svg' alt='Crates.io Version'>
</a>
<!-- Crates.io Downloads -->
<a href="https://crates.io/crates/usls">
<img alt="Crates.io Downloads" src="https://img.shields.io/crates/d/usls?&color=946CE6">
</a>
</p>
<p align="center">
<strong>⭐️ Star if helpful! ⭐️</strong>
</p>
**usls** is a Rust library integrated with **ONNXRuntime**, offering a suite of advanced models for **Computer Vision** and **Vision-Language** tasks, including: **usls** is an evolving Rust library focused on inference for advanced **vision** and **vision-language** models, along with practical vision utilities.
- **SOTA Model Inference:** Supports a wide range of state-of-the-art vision and multi-modal models (typically with fewer than 1B parameters).
- **Multi-backend Acceleration:** Supports CPU, CUDA, TensorRT, and CoreML.
- **Easy Data Handling:** Easily read images, video streams, and folders with iterator support.
- **Rich Result Types:** Built-in containers for common vision outputs like bounding boxes (Hbb, Obb), polygons, masks, etc.
- **Annotation & Visualization:** Draw and display inference results directly, similar to OpenCV's `imshow()`.
## 🧩 Supported Models
- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLO11](https://github.com/ultralytics/ultralytics), [YOLOv12](https://github.com/sunsmarterjie/yolov12) - **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLO11](https://github.com/ultralytics/ultralytics), [YOLOv12](https://github.com/sunsmarterjie/yolov12)
- **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) - **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
- **Vision Models**: [RT-DETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro), [FastViT](https://github.com/apple/ml-fastvit), [BEiT](https://github.com/microsoft/unilm/tree/master/beit), [MobileOne](https://github.com/apple/ml-mobileone) - **Vision Models**: [RT-DETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro), [FastViT](https://github.com/apple/ml-fastvit), [BEiT](https://github.com/microsoft/unilm/tree/master/beit), [MobileOne](https://github.com/apple/ml-mobileone)
- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242), [Moondream2](https://github.com/vikhyat/moondream/tree/main) - **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242), [Moondream2](https://github.com/vikhyat/moondream/tree/main)
- **OCR Models**: [FAST](https://github.com/czczup/FAST), [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947), [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159), [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html), [TrOCR](https://huggingface.co/microsoft/trocr-base-printed), [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) - **OCR-Related Models**: [FAST](https://github.com/czczup/FAST), [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947), [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159), [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html), [TrOCR](https://huggingface.co/microsoft/trocr-base-printed), [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
<details> <details>
<summary>👉 More Supported Models</summary> <summary>Full list of supported models (click to expand)</summary>
| Model | Task / Description | Example | CoreML | CUDA<br />FP32 | CUDA<br />FP16 | TensorRT<br />FP32 | TensorRT<br />FP16 | | Model | Task / Description | Example | CoreML | CUDA<br />FP32 | CUDA<br />FP16 | TensorRT<br />FP32 | TensorRT<br />FP16 |
| -------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------- | ------ | -------------- | -------------- | ------------------ | ------------------ | | -------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------- | ------ | -------------- | -------------- | ------------------ | ------------------ |
@ -94,73 +119,149 @@
</details> </details>
## ⛳️ Cargo Features
By default, **none of the following features are enabled**. You can enable them as needed:
- **`auto`**: Automatically downloads prebuilt ONNXRuntime binaries from Pykes CDN for supported platforms. ## 🛠️ Installation
**Note:** It is recommended to use the GitHub repository as the source, since the crates.io version may not be up-to-date.
- If disabled, you'll need to [compile `ONNXRuntime` from source](https://github.com/microsoft/onnxruntime) or [download a precompiled package](https://github.com/microsoft/onnxruntime/releases), and then [link it manually](https://ort.pyke.io/setup/linking). ```toml
<details>
<summary>👉 For Linux or macOS Users</summary>
- Download from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
- Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
```shell
export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.20.1
```
</details>
- **`ffmpeg`**: Adds support for video streams, real-time frame visualization, and video export.
- Powered by [video-rs](https://github.com/oddity-ai/video-rs) and [minifb](https://github.com/emoon/rust_minifb). For any issues related to `ffmpeg` features, please refer to the issues of these two crates.
- **`cuda`**: Enables the NVIDIA TensorRT provider.
- **`trt`**: Enables the NVIDIA TensorRT provider.
- **`mps`**: Enables the Apple CoreML provider.
## 🎈 Example
* **Using `CUDA`**
```
cargo run -r -F cuda --example yolo -- --device cuda:0
```
* **Using Apple `CoreML`**
```
cargo run -r -F mps --example yolo -- --device mps
```
* **Using `TensorRT`**
```
cargo run -r -F trt --example yolo -- --device trt
```
* **Using `CPU`**
```
cargo run -r --example yolo
```
All examples are located in the [examples](./examples/) directory.
## 🥂 Integrate Into Your Own Project
Add `usls` as a dependency to your project's `Cargo.toml`
```Shell
cargo add usls -F cuda
```
Or use a specific commit:
```Toml
[dependencies] [dependencies]
usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" } usls = { git = "https://github.com/jamjamjon/usls" }
# crates.io version
usls = "latest-version"
``` ```
## 🥳 If you find this helpful, please give it a star ⭐ ## ⚡ Cargo Features
- **ONNXRuntime-related features (enabled by default)**, provide model inference and model zoo support:
- **`ort-download-binaries`** (**default**): Automatically downloads prebuilt `ONNXRuntime` binaries for supported platforms. Provides core model loading and inference capabilities using the `CPU` execution provider.
- **`ort-load-dynamic `** Dynamic linking. You'll need to compile `ONNXRuntime` from [source](https://github.com/microsoft/onnxruntime) or download a [precompiled package](https://github.com/microsoft/onnxruntime/releases), and then link it manually. [See the guide here](https://ort.pyke.io/setup/linking#dynamic-linking).
## 📌 License - **`cuda`**: Enables the NVIDIA `CUDA` provider. Requires `CUDA` toolkit and `cuDNN` installed.
- **`trt`**: Enables the NVIDIA `TensorRT` provider. Requires `TensorRT` libraries installed.
- **`mps`**: Enables the Apple `CoreML` provider for macOS.
- **If you only need basic features** (such as image/video reading, result visualization, etc.), you can disable the default features to minimize dependencies:
```shell
usls = { git = "https://github.com/jamjamjon/usls", default-features = false }
```
- **`video`** : Enable video stream reading, and video writing.(Note: Powered by [video-rs](https://github.com/oddity-ai/video-rs) and [minifb](https://github.com/emoon/rust_minifb). Check their repositories for potential issues.)
## ✨ Example
- Model Inference
```shell
cargo run -r --example yolo # CPU
cargo run -r -F cuda --example yolo -- --device cuda:0 # GPU
```
- Reading Images
```rust
// Read a single image
let image = DataLoader::try_read_one("./assets/bus.jpg")?;
// Read multiple images
let images = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/cat.png"])?;
// Read all images in a folder
let images = DataLoader::try_read_folder("./assets")?;
// Read images matching a pattern (glob)
let images = DataLoader::try_read_pattern("./assets/*.Jpg")?;
// Load images and iterate
let dl = DataLoader::new("./assets")?.with_batch(2).build()?;
for images in dl.iter() {
// Code here
}
```
- Reading Video
```rust
let dl = DataLoader::new("http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4")?
.with_batch(1)
.with_nf_skip(2)
.with_progress_bar(true)
.build()?;
for images in dl.iter() {
// Code here
}
```
- Annotate
```rust
let annotator = Annotator::default();
let image = DataLoader::try_read_one("./assets/bus.jpg")?;
// hbb
let hbb = Hbb::default()
.with_xyxy(669.5233, 395.4491, 809.0367, 878.81226)
.with_id(0)
.with_name("person")
.with_confidence(0.87094545);
let _ = annotator.annotate(&image, &hbb)?;
// keypoints
let keypoints: Vec<Keypoint> = vec![
Keypoint::default()
.with_xy(139.35767, 443.43655)
.with_id(0)
.with_name("nose")
.with_confidence(0.9739332),
Keypoint::default()
.with_xy(147.38545, 434.34055)
.with_id(1)
.with_name("left_eye")
.with_confidence(0.9098319),
Keypoint::default()
.with_xy(128.5701, 434.07516)
.with_id(2)
.with_name("right_eye")
.with_confidence(0.9320564),
];
let _ = annotator.annotate(&image, &keypoints)?;
```
- Visualizing Inference Results and Exporting Video
```rust
let dl = DataLoader::new(args.source.as_str())?.build()?;
let mut viewer = Viewer::default().with_window_scale(0.5);
for images in &dl {
// Check if the window exists and is open
if viewer.is_window_exist() && !viewer.is_window_open() {
break;
}
// Show image in window
viewer.imshow(&images[0])?;
// Handle key events and delay
if let Some(key) = viewer.wait_key(1) {
if key == usls::Key::Escape {
break;
}
}
// Your custom code here
// Write video frame (requires video feature)
// if args.save_video {
// viewer.write_video_frame(&images[0])?;
// }
}
```
**All examples are located in the [examples](./examples/) directory.**
## ❓ FAQ
See issues or open a new discussion.
## 🤝 Contributing
Contributions are welcome! If you have suggestions, bug reports, or want to add new features or models, feel free to open an issue or submit a pull request.
## 📜 License
This project is licensed under [LICENSE](LICENSE). This project is licensed under [LICENSE](LICENSE).

BIN
assets/cat.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 263 KiB

BIN
assets/dog.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 217 KiB

View File

@ -1,5 +1,10 @@
fn main() { use std::io::Result;
// Need this for CoreML. See: https://ort.pyke.io/perf/execution-providers#coreml
#[cfg(target_os = "macos")] fn main() -> Result<()> {
prost_build::compile_protos(&["src/utils/onnx.proto3"], &["src"])?;
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "tvos"))]
println!("cargo:rustc-link-arg=-fapple-link-rtlib"); println!("cargo:rustc-link-arg=-fapple-link-rtlib");
Ok(())
} }

View File

@ -0,0 +1,5 @@
## Quick Start
```shell
cargo run -r --example annotate
```

734
examples/annotate/main.rs Normal file
View File

@ -0,0 +1,734 @@
use usls::{Annotator, DataLoader, Hbb, Keypoint, Polygon, Prob, Style, SKELETON_COCO_19, Y};
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// load images
let image = DataLoader::try_read_one("./assets/bus.jpg")?;
println!("Read 1 images: {:?}.", image.dimensions());
let hbbs = vec![
Hbb::default()
.with_xyxy(20.81192, 229.65482, 795.1383, 751.0504)
.with_id(5)
.with_name("bus")
.with_confidence(0.8815875)
.with_style(
// individual setting
Style::hbb()
.with_thickness(5)
.with_draw_fill(true)
.with_visible(true)
.with_text_visible(true)
.show_confidence(true)
.show_id(true)
.show_name(true)
.with_text_loc(usls::TextLoc::Center)
.with_color(
usls::StyleColors::default()
.with_outline(usls::Color::white())
.with_fill(usls::Color::black().with_alpha(100))
.with_text(usls::Color::black())
.with_text_bg(usls::Color::white()),
),
),
Hbb::default()
.with_xyxy(669.5233, 395.4491, 809.0367, 878.81226)
.with_id(0)
.with_name("person")
.with_confidence(0.87094545),
Hbb::default()
.with_xyxy(48.03354, 398.6103, 245.06848, 902.5964)
.with_id(0)
.with_name("person")
.with_confidence(0.8625425),
Hbb::default()
.with_xyxy(221.26727, 405.51895, 345.14288, 857.61865)
.with_id(0)
.with_name("person")
.with_confidence(0.81437635),
Hbb::default()
.with_xyxy(0.08129883, 254.67389, 32.30627, 324.9663)
.with_id(11)
.with_name("stop sign")
.with_confidence(0.30021638),
];
let keypoints: Vec<Keypoint> = vec![
Keypoint::default()
.with_xy(139.35767, 443.43655)
.with_id(0)
.with_name("nose")
.with_confidence(0.9739332),
Keypoint::default()
.with_xy(147.38545, 434.34055)
.with_id(1)
.with_name("left_eye")
.with_confidence(0.9098319),
Keypoint::default()
.with_xy(128.5701, 434.07516)
.with_id(2)
.with_name("right_eye")
.with_confidence(0.9320564),
Keypoint::default()
.with_xy(153.24237, 442.4857)
.with_id(3)
.with_name("left_ear")
.with_confidence(0.5992247),
Keypoint::default()
.with_xy(105.74312, 441.05765)
.with_id(4)
.with_name("right_ear")
.with_confidence(0.7259705),
Keypoint::default()
.with_xy(166.55661, 498.17484)
.with_id(5)
.with_name("left_shoulder")
.with_confidence(0.9862031),
Keypoint::default()
.with_xy(89.40589, 497.6169)
.with_id(6)
.with_name("right_shoulder")
.with_confidence(0.9879458),
Keypoint::default()
.with_xy(190.7351, 575.00226)
.with_id(7)
.with_name("left_elbow")
.with_confidence(0.9521556),
Keypoint::default()
.with_xy(116.3187, 570.6441)
.with_id(8)
.with_name("right_elbow")
.with_confidence(0.9619827),
Keypoint::default()
.with_xy(140.43465, 575.80994)
.with_id(9)
.with_name("left_wrist")
.with_confidence(0.9329945),
Keypoint::default()
.with_xy(174.73381, 558.4027)
.with_id(10)
.with_name("right_wrist")
.with_confidence(0.93989426),
Keypoint::default()
.with_xy(159.16801, 652.35846)
.with_id(11)
.with_name("left_hip")
.with_confidence(0.9849887),
Keypoint::default()
.with_xy(99.27675, 653.01874)
.with_id(12)
.with_name("right_hip")
.with_confidence(0.9861814),
Keypoint::default()
.with_xy(180.95883, 759.8797)
.with_id(13)
.with_name("left_knee")
.with_confidence(0.95086014),
Keypoint::default()
.with_xy(87.09352, 762.6029)
.with_id(14)
.with_name("right_knee")
.with_confidence(0.9532267),
Keypoint::default()
.with_xy(194.39137, 860.7901)
.with_id(15)
.with_name("left_ankle")
.with_confidence(0.7986185),
Keypoint::default()
.with_xy(70.85685, 862.53253)
.with_id(16)
.with_name("right_ankle")
.with_confidence(0.79832363),
];
let probs = vec![
Prob::default()
.with_id(654)
.with_name("minibus")
.with_confidence(0.666985),
Prob::default()
.with_id(734)
.with_name("police_van")
.with_confidence(0.20067203),
Prob::default()
.with_id(874)
.with_name("trolleybus")
.with_confidence(0.024672432),
Prob::default()
.with_id(656)
.with_name("minivan")
.with_confidence(0.02395765),
Prob::default()
.with_id(757)
.with_name("recreational_vehicle")
.with_confidence(0.012205753),
];
let polygons = vec![
Polygon::from_xys(&[
[13.0, 251.0],
[12.0, 251.0],
[11.0, 251.0],
[10.0, 251.0],
[9.0, 251.0],
[8.0, 251.0],
[7.0, 251.0],
[6.0, 251.0],
[5.0, 251.0],
[4.0, 251.0],
[3.0, 251.0],
[2.0, 251.0],
[1.0, 251.0],
[0.0, 251.0],
[0.0, 252.0],
[0.0, 253.0],
[0.0, 254.0],
[0.0, 255.0],
[0.0, 256.0],
[0.0, 257.0],
[0.0, 258.0],
[0.0, 259.0],
[0.0, 260.0],
[0.0, 261.0],
[0.0, 262.0],
[0.0, 263.0],
[0.0, 264.0],
[0.0, 265.0],
[0.0, 266.0],
[0.0, 267.0],
[0.0, 268.0],
[0.0, 269.0],
[0.0, 270.0],
[0.0, 271.0],
[0.0, 272.0],
[0.0, 273.0],
[0.0, 274.0],
[0.0, 275.0],
[0.0, 276.0],
[0.0, 277.0],
[0.0, 278.0],
[0.0, 279.0],
[0.0, 280.0],
[0.0, 281.0],
[0.0, 282.0],
[0.0, 283.0],
[0.0, 284.0],
[0.0, 285.0],
[0.0, 286.0],
[0.0, 287.0],
[0.0, 288.0],
[0.0, 289.0],
[0.0, 290.0],
[0.0, 291.0],
[0.0, 292.0],
[0.0, 293.0],
[0.0, 294.0],
[0.0, 295.0],
[0.0, 296.0],
[0.0, 297.0],
[0.0, 298.0],
[0.0, 299.0],
[0.0, 300.0],
[0.0, 301.0],
[0.0, 302.0],
[0.0, 303.0],
[0.0, 304.0],
[0.0, 305.0],
[0.0, 306.0],
[0.0, 307.0],
[0.0, 308.0],
[0.0, 309.0],
[0.0, 310.0],
[0.0, 311.0],
[0.0, 312.0],
[0.0, 313.0],
[0.0, 314.0],
[0.0, 315.0],
[0.0, 316.0],
[0.0, 317.0],
[0.0, 318.0],
[0.0, 319.0],
[0.0, 320.0],
[0.0, 321.0],
[0.0, 322.0],
[0.0, 323.0],
[0.0, 324.0],
[0.0, 325.0],
[1.0, 325.0],
[2.0, 325.0],
[3.0, 325.0],
[4.0, 325.0],
[5.0, 325.0],
[6.0, 325.0],
[7.0, 325.0],
[8.0, 325.0],
[9.0, 325.0],
[10.0, 325.0],
[11.0, 325.0],
[12.0, 324.0],
[13.0, 324.0],
[14.0, 324.0],
[15.0, 323.0],
[16.0, 323.0],
[17.0, 322.0],
[18.0, 321.0],
[19.0, 321.0],
[20.0, 320.0],
[20.0, 319.0],
[21.0, 318.0],
[22.0, 317.0],
[23.0, 316.0],
[24.0, 315.0],
[24.0, 314.0],
[25.0, 313.0],
[26.0, 312.0],
[27.0, 311.0],
[28.0, 310.0],
[29.0, 309.0],
[30.0, 308.0],
[30.0, 307.0],
[31.0, 306.0],
[31.0, 305.0],
[31.0, 304.0],
[32.0, 303.0],
[32.0, 302.0],
[32.0, 301.0],
[33.0, 300.0],
[33.0, 299.0],
[33.0, 298.0],
[33.0, 297.0],
[33.0, 296.0],
[33.0, 295.0],
[33.0, 294.0],
[33.0, 293.0],
[33.0, 292.0],
[33.0, 291.0],
[33.0, 290.0],
[33.0, 289.0],
[33.0, 288.0],
[33.0, 287.0],
[33.0, 286.0],
[33.0, 285.0],
[33.0, 284.0],
[33.0, 283.0],
[33.0, 282.0],
[33.0, 281.0],
[33.0, 280.0],
[32.0, 279.0],
[32.0, 278.0],
[32.0, 277.0],
[31.0, 276.0],
[31.0, 275.0],
[31.0, 274.0],
[30.0, 273.0],
[30.0, 272.0],
[29.0, 271.0],
[28.0, 270.0],
[28.0, 269.0],
[27.0, 268.0],
[27.0, 267.0],
[26.0, 266.0],
[25.0, 265.0],
[25.0, 264.0],
[24.0, 263.0],
[23.0, 262.0],
[22.0, 261.0],
[21.0, 260.0],
[20.0, 259.0],
[20.0, 258.0],
[19.0, 257.0],
[18.0, 256.0],
[17.0, 255.0],
[16.0, 254.0],
[15.0, 254.0],
[14.0, 253.0],
[13.0, 252.0],
[13.0, 251.0],
])
.with_id(11)
.with_name("stop sign")
.with_confidence(0.5555),
Polygon::from_xys(&[
[485.0, 149.0],
[484.0, 150.0],
[484.0, 151.0],
[483.0, 152.0],
[482.0, 153.0],
[481.0, 153.0],
[480.0, 153.0],
[479.0, 153.0],
[478.0, 153.0],
[477.0, 154.0],
[476.0, 154.0],
[475.0, 154.0],
[474.0, 154.0],
[473.0, 154.0],
[472.0, 154.0],
[471.0, 154.0],
[470.0, 154.0],
[469.0, 154.0],
[468.0, 155.0],
[467.0, 155.0],
[466.0, 155.0],
[465.0, 155.0],
[464.0, 155.0],
[463.0, 155.0],
[462.0, 156.0],
[461.0, 156.0],
[460.0, 156.0],
[459.0, 156.0],
[458.0, 156.0],
[457.0, 157.0],
[456.0, 157.0],
[455.0, 157.0],
[454.0, 157.0],
[453.0, 158.0],
[452.0, 158.0],
[451.0, 158.0],
[450.0, 158.0],
[449.0, 159.0],
[448.0, 159.0],
[447.0, 159.0],
[446.0, 159.0],
[445.0, 160.0],
[444.0, 160.0],
[443.0, 160.0],
[442.0, 160.0],
[441.0, 160.0],
[440.0, 161.0],
[439.0, 161.0],
[438.0, 161.0],
[437.0, 161.0],
[436.0, 161.0],
[435.0, 162.0],
[434.0, 162.0],
[433.0, 162.0],
[432.0, 162.0],
[431.0, 162.0],
[430.0, 162.0],
[429.0, 163.0],
[428.0, 163.0],
[427.0, 163.0],
[427.0, 164.0],
[427.0, 165.0],
[427.0, 166.0],
[427.0, 167.0],
[427.0, 168.0],
[427.0, 169.0],
[427.0, 170.0],
[427.0, 171.0],
[427.0, 172.0],
[427.0, 173.0],
[427.0, 174.0],
[427.0, 175.0],
[427.0, 176.0],
[427.0, 177.0],
[427.0, 178.0],
[427.0, 179.0],
[427.0, 180.0],
[427.0, 181.0],
[427.0, 182.0],
[427.0, 183.0],
[427.0, 184.0],
[427.0, 185.0],
[427.0, 186.0],
[427.0, 187.0],
[427.0, 188.0],
[427.0, 189.0],
[427.0, 190.0],
[428.0, 190.0],
[429.0, 191.0],
[430.0, 191.0],
[431.0, 191.0],
[432.0, 191.0],
[433.0, 191.0],
[434.0, 191.0],
[435.0, 191.0],
[436.0, 191.0],
[437.0, 191.0],
[438.0, 190.0],
[439.0, 190.0],
[440.0, 190.0],
[441.0, 190.0],
[442.0, 190.0],
[443.0, 190.0],
[444.0, 190.0],
[445.0, 189.0],
[446.0, 189.0],
[447.0, 189.0],
[448.0, 189.0],
[449.0, 189.0],
[450.0, 189.0],
[451.0, 188.0],
[452.0, 188.0],
[453.0, 188.0],
[454.0, 188.0],
[455.0, 188.0],
[456.0, 188.0],
[457.0, 187.0],
[458.0, 187.0],
[459.0, 187.0],
[460.0, 187.0],
[461.0, 186.0],
[462.0, 186.0],
[463.0, 187.0],
[464.0, 188.0],
[465.0, 189.0],
[466.0, 190.0],
[467.0, 191.0],
[467.0, 192.0],
[468.0, 193.0],
[469.0, 193.0],
[470.0, 193.0],
[471.0, 193.0],
[472.0, 193.0],
[473.0, 193.0],
[474.0, 193.0],
[475.0, 193.0],
[476.0, 193.0],
[477.0, 193.0],
[478.0, 192.0],
[479.0, 191.0],
[480.0, 190.0],
[481.0, 190.0],
[482.0, 189.0],
[483.0, 189.0],
[484.0, 189.0],
[485.0, 188.0],
[486.0, 188.0],
[487.0, 188.0],
[488.0, 188.0],
[489.0, 188.0],
[490.0, 188.0],
[491.0, 188.0],
[492.0, 188.0],
[493.0, 187.0],
[494.0, 187.0],
[495.0, 187.0],
[496.0, 187.0],
[497.0, 187.0],
[498.0, 187.0],
[499.0, 187.0],
[500.0, 186.0],
[501.0, 186.0],
[502.0, 186.0],
[503.0, 186.0],
[504.0, 185.0],
[505.0, 185.0],
[506.0, 185.0],
[507.0, 184.0],
[508.0, 184.0],
[509.0, 183.0],
[510.0, 183.0],
[511.0, 183.0],
[512.0, 182.0],
[513.0, 182.0],
[514.0, 182.0],
[515.0, 181.0],
[516.0, 181.0],
[517.0, 181.0],
[518.0, 180.0],
[519.0, 180.0],
[520.0, 180.0],
[521.0, 179.0],
[522.0, 179.0],
[523.0, 178.0],
[524.0, 178.0],
[525.0, 177.0],
[526.0, 176.0],
[527.0, 175.0],
[528.0, 174.0],
[529.0, 173.0],
[530.0, 172.0],
[531.0, 172.0],
[531.0, 171.0],
[531.0, 170.0],
[531.0, 169.0],
[531.0, 168.0],
[531.0, 167.0],
[531.0, 166.0],
[531.0, 165.0],
[531.0, 164.0],
[531.0, 163.0],
[531.0, 162.0],
[531.0, 161.0],
[531.0, 160.0],
[531.0, 159.0],
[531.0, 158.0],
[531.0, 157.0],
[531.0, 156.0],
[530.0, 155.0],
[530.0, 154.0],
[529.0, 154.0],
[528.0, 153.0],
[527.0, 152.0],
[526.0, 151.0],
[525.0, 150.0],
[524.0, 149.0],
[523.0, 149.0],
[522.0, 149.0],
[521.0, 149.0],
[520.0, 149.0],
[519.0, 149.0],
[518.0, 149.0],
[517.0, 149.0],
[516.0, 149.0],
[515.0, 149.0],
[514.0, 149.0],
[513.0, 149.0],
[512.0, 149.0],
[511.0, 149.0],
[510.0, 149.0],
[509.0, 149.0],
[508.0, 149.0],
[507.0, 149.0],
[506.0, 149.0],
[505.0, 149.0],
[504.0, 149.0],
[503.0, 149.0],
[502.0, 149.0],
[501.0, 149.0],
[500.0, 149.0],
[499.0, 149.0],
[498.0, 149.0],
[497.0, 149.0],
[496.0, 149.0],
[495.0, 149.0],
[494.0, 149.0],
[493.0, 149.0],
[492.0, 149.0],
[491.0, 149.0],
[490.0, 149.0],
[489.0, 149.0],
[488.0, 149.0],
[487.0, 149.0],
[486.0, 149.0],
[485.0, 149.0],
])
.with_id(9)
.with_name("traffic light")
.with_confidence(0.777777),
];
// Build annotator
let annotator = Annotator::default()
.with_prob_style(Style::prob().with_text_loc(usls::TextLoc::InnerTopLeft))
.with_hbb_style(Style::hbb().with_thickness(5).with_draw_fill(true))
.with_keypoint_style(
Style::keypoint()
.with_skeleton(SKELETON_COCO_19.into())
.with_radius(4)
.with_text_visible(true)
.show_confidence(false)
.show_id(true)
.show_name(false),
)
.with_polygon_style(
Style::polygon()
.with_text_visible(true)
.show_confidence(true)
.show_id(true)
.show_name(true),
);
// Annotate Y
let y = Y::default()
.with_probs(&probs)
.with_hbbs(&hbbs)
.with_keypoints(&keypoints)
// .with_keypointss(&[keypoints.clone()])
.with_polygons(&polygons);
annotator.annotate(&image, &y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Y"])?
.join(usls::timestamp(None))
.display(),
))?;
// Annotate Probs
annotator.annotate(&image, &probs)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Probs"])?
.join(usls::timestamp(None))
.display(),
))?;
// Annotate Prob
for prob in &probs {
annotator.annotate(&image, prob)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Prob"])?
.join(usls::timestamp(None))
.display(),
))?;
}
// Annotate Hbbs
annotator.annotate(&image, &hbbs)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Hbbs"])?
.join(usls::timestamp(None))
.display(),
))?;
// Annotate Hbb
for hbb in &hbbs {
annotator.annotate(&image, hbb)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Hbb"])?
.join(usls::timestamp(None))
.display(),
))?;
}
// Annotate A set of Keypoint
annotator.annotate(&image, &keypoints)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Keypoints"])?
.join(usls::timestamp(None))
.display(),
))?;
// Annotate Keypoint
for keypoint in &keypoints {
annotator.annotate(&image, keypoint)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Keypoint"])?
.join(usls::timestamp(None))
.display(),
))?;
}
// Annotate Polygons
annotator.annotate(&image, &polygons)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Polygons"])?
.join(usls::timestamp(None))
.display(),
))?;
// Annotate Polygon
for polygon in &polygons {
annotator.annotate(&image, polygon)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Annotate", "Polygon"])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(())
}

View File

@ -1,6 +0,0 @@
## Quick Start
```shell
cargo run -r -F cuda --example beit -- --device cuda --dtype fp16
```

View File

@ -1,52 +0,0 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::beit_base()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@ -17,7 +17,6 @@ fn main() -> anyhow::Result<()> {
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339()) .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init(); .init();
let args: Args = argh::from_env(); let args: Args = argh::from_env();
// build model // build model
@ -30,7 +29,7 @@ fn main() -> anyhow::Result<()> {
let mut model = Blip::new(options_visual, options_textual)?; let mut model = Blip::new(options_visual, options_textual)?;
// image caption // image caption
let xs = DataLoader::try_read_batch(&args.source)?; let xs = DataLoader::try_read_n(&args.source)?;
// unconditional caption // unconditional caption
let ys = model.forward(&xs, None)?; let ys = model.forward(&xs, None)?;

View File

@ -0,0 +1,6 @@
## Quick Start
```shell
cargo run -r -F cuda --example classifier -- --device cuda --dtype fp16 --model beit # convnext, fastvit, deit, mobileone
```

View File

@ -11,6 +11,10 @@ struct Args {
#[argh(option, default = "String::from(\"cpu:0\")")] #[argh(option, default = "String::from(\"cpu:0\")")]
device: String, device: String,
/// model name
#[argh(option, default = "String::from(\"beit\")")]
model: String,
/// source image /// source image
#[argh( #[argh(
option, option,
@ -25,33 +29,46 @@ struct Args {
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt() tracing_subscriber::fmt()
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init(); .init();
let args: Args = argh::from_env(); let args: Args = argh::from_env();
// build model // build model
let options = Options::fastvit_t8_distill() let options = match args.model.to_lowercase().as_str() {
"beit" => Options::beit_base(),
"convnext" => Options::convnext_v2_atto(),
"deit" => Options::deit_tiny_distill(),
"fastvit" => Options::fastvit_t8_distill(),
"mobileone" => Options::mobileone_s0(),
_ => anyhow::bail!("Unsupported model: {}", args.model),
};
let options = options
.with_model_dtype(args.dtype.as_str().try_into()?) .with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?) .with_model_device(args.device.as_str().try_into()?)
.commit()?; .commit()?;
let mut model = ImageClassifier::try_from(options)?; let mut model = ImageClassifier::try_from(options)?;
// load images // load images
let xs = DataLoader::try_read_batch(&args.source)?; let xs = DataLoader::try_read_n(&args.source)?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
println!("{:?}", ys);
// results
for (i, y) in ys.iter().enumerate() {
println!("{}: {:?}", i, y);
}
// annotate // annotate
let annotator = Annotator::default().with_saveout(model.spec()); let annotator = Annotator::default();
annotator.annotate(&xs, &ys); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Image-Classifier", &args.model])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -43,13 +43,13 @@ fn main() -> Result<()> {
let dl = DataLoader::new("./examples/clip/images")?.build()?; let dl = DataLoader::new("./examples/clip/images")?.build()?;
// run // run
for (images, paths) in dl { for images in dl {
let feats_image = model.encode_images(&images)?; let feats_image = model.encode_images(&images)?;
// use image to query texts // use image to query texts
let matrix = Ops::dot2(&feats_image, &feats_text)?; let matrix = Ops::dot2(&feats_image, &feats_text)?;
for i in 0..paths.len() { for i in 0..images.len() {
let probs = &matrix[i]; let probs = &matrix[i];
let (id, &score) = probs let (id, &score) = probs
.iter() .iter()
@ -58,9 +58,9 @@ fn main() -> Result<()> {
.unwrap(); .unwrap();
println!( println!(
"({:?}%) {} => {} ", "({:?}%) {:?} => {} ",
score * 100.0, score * 100.0,
paths[i].display(), images[i].source(),
&texts[id] &texts[id]
); );
} }

View File

@ -1,6 +0,0 @@
## Quick Start
```shell
cargo run -r -F cuda --example convnext -- --device cuda --dtype fp16
```

View File

@ -1,52 +0,0 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::convnext_v2_atto()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@ -12,17 +12,23 @@ fn main() -> Result<()> {
let mut model = RTDETR::new(options)?; let mut model = RTDETR::new(options)?;
// load // load
let x = [DataLoader::try_read("./assets/bus.jpg")?]; let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
println!("{:?}", y); println!("{:?}", ys);
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(3) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&x, &y); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,5 +0,0 @@
## Quick Start
```shell
cargo run -r --example dataloader
```

View File

@ -1,45 +0,0 @@
use usls::DataLoader;
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// 1. iterator
let dl = DataLoader::try_from(
// "images/bus.jpg", // remote image
// "../images", // image folder
// "../demo.mp4", // local video
// "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
// "rtsp://admin:xyz@192.168.2.217:554/h265/ch1/", // rtsp h264 stream
"./assets/bus.jpg", // local image
)?
.with_batch(1)
.with_progress_bar(true)
.build()?;
for (_xs, _paths) in dl {
println!("Paths: {:?}", _paths);
}
// 2. read one image
let image = DataLoader::try_read("./assets/bus.jpg")?;
println!(
"Read one image. Height: {}, Width: {}",
image.height(),
image.width()
);
// 3. read several images
let images = DataLoader::try_read_batch(&[
"./assets/bus.jpg",
"./assets/bus.jpg",
"./assets/bus.jpg",
"./assets/bus.jpg",
"./assets/bus.jpg",
])?;
println!("Read {} images.", images.len());
Ok(())
}

View File

@ -1,5 +1,5 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::DB, Annotator, DataLoader, Options}; use usls::{models::DB, Annotator, DataLoader, Options, Style};
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// Example /// Example
@ -16,21 +16,21 @@ struct Args {
#[argh(option, default = "String::from(\"auto\")")] #[argh(option, default = "String::from(\"auto\")")]
dtype: String, dtype: String,
/// show bboxes /// show hbbs
#[argh(option, default = "false")] #[argh(option, default = "false")]
show_bboxes: bool, show_hbbs: bool,
/// show mbrs /// show obbs
#[argh(option, default = "false")] #[argh(option, default = "false")]
show_mbrs: bool, show_obbs: bool,
/// show bboxes confidence /// show bboxes confidence
#[argh(option, default = "false")] #[argh(option, default = "false")]
show_bboxes_conf: bool, show_hbbs_conf: bool,
/// show mbrs confidence /// show mbrs confidence
#[argh(option, default = "false")] #[argh(option, default = "false")]
show_mbrs_conf: bool, show_obbs_conf: bool,
} }
fn main() -> Result<()> { fn main() -> Result<()> {
@ -52,7 +52,7 @@ fn main() -> Result<()> {
)?; )?;
// load image // load image
let x = DataLoader::try_read_batch(&[ let xs = DataLoader::try_read_n(&[
"images/db.png", "images/db.png",
"images/table.png", "images/table.png",
"images/table-ch.jpg", "images/table-ch.jpg",
@ -61,20 +61,46 @@ fn main() -> Result<()> {
])?; ])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default()
.without_bboxes(!args.show_bboxes) .with_polygon_style(
.without_mbrs(!args.show_mbrs) Style::polygon()
.without_bboxes_name(true) .with_visible(true)
.without_mbrs_name(true) .with_text_visible(false)
.without_bboxes_conf(!args.show_bboxes_conf) .show_confidence(true)
.without_mbrs_conf(!args.show_mbrs_conf) .show_id(true)
.with_polygons_alpha(60) .show_name(true)
.with_contours_color([255, 105, 180, 255]) .with_color(usls::StyleColors::default().with_outline([255, 105, 180, 255].into())),
.with_saveout(model.spec()); )
annotator.annotate(&x, &y); .with_hbb_style(
Style::hbb()
.with_visible(args.show_hbbs)
.with_text_visible(false)
.with_thickness(1)
.show_confidence(args.show_hbbs_conf)
.show_id(false)
.show_name(false),
)
.with_obb_style(
Style::obb()
.with_visible(args.show_obbs)
.with_text_visible(false)
.show_confidence(args.show_obbs_conf)
.show_id(false)
.show_name(false),
);
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
// summary // summary
model.summary(); model.summary();

View File

@ -12,17 +12,23 @@ fn main() -> Result<()> {
let mut model = RTDETR::new(options)?; let mut model = RTDETR::new(options)?;
// load // load
let x = [DataLoader::try_read("./assets/bus.jpg")?]; let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
println!("{:?}", y); println!("{:?}", ys);
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(3) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&x, &y); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,7 +0,0 @@
## Quick Start
```shell
cargo run -r -F cuda --example deit -- --device cuda --dtype fp16
```

View File

@ -1,52 +0,0 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::deit_tiny_distill()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@ -1,5 +1,5 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::DepthAnything, Annotator, DataLoader, Options}; use usls::{models::DepthAnything, Annotator, DataLoader, Options, Style};
fn main() -> Result<()> { fn main() -> Result<()> {
tracing_subscriber::fmt() tracing_subscriber::fmt()
@ -12,16 +12,23 @@ fn main() -> Result<()> {
let mut model = DepthAnything::new(options)?; let mut model = DepthAnything::new(options)?;
// load // load
let x = [DataLoader::try_read("images/street.jpg")?]; let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator =
.with_colormap("Turbo") Annotator::default().with_mask_style(Style::mask().with_colormap256("turbo".into()));
.with_saveout(model.spec()); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&x, &y); annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,8 +1,9 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::DepthPro, Annotator, DataLoader, Options}; use usls::DataLoader;
use usls::{models::DepthPro, Annotator, Options, Style};
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// BLIP Example /// Example
struct Args { struct Args {
/// device /// device
#[argh(option, default = "String::from(\"cpu:0\")")] #[argh(option, default = "String::from(\"cpu:0\")")]
@ -11,10 +12,6 @@ struct Args {
/// dtype /// dtype
#[argh(option, default = "String::from(\"q4f16\")")] #[argh(option, default = "String::from(\"q4f16\")")]
dtype: String, dtype: String,
/// source image
#[argh(option, default = "String::from(\"images/street.jpg\")")]
source: String,
} }
fn main() -> Result<()> { fn main() -> Result<()> {
@ -33,16 +30,23 @@ fn main() -> Result<()> {
let mut model = DepthPro::new(options)?; let mut model = DepthPro::new(options)?;
// load // load
let x = [DataLoader::try_read(&args.source)?]; let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator =
.with_colormap("Turbo") Annotator::default().with_mask_style(Style::mask().with_colormap256("turbo".into()));
.with_saveout(model.spec()); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&x, &y); annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -8,10 +8,7 @@ fn main() -> Result<()> {
.init(); .init();
// images // images
let xs = [ let xs = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/bus.jpg"])?;
DataLoader::try_read("./assets/bus.jpg")?,
DataLoader::try_read("./assets/bus.jpg")?,
];
// model // model
let options = Options::dinov2_small().with_batch_size(xs.len()).commit()?; let options = Options::dinov2_small().with_batch_size(xs.len()).commit()?;

View File

@ -24,17 +24,22 @@ fn main() -> Result<()> {
let mut model = YOLO::new(config)?; let mut model = YOLO::new(config)?;
// load images // load images
let xs = [DataLoader::try_read("images/academic.jpg")?]; let xs = DataLoader::try_read_n(&["images/academic.jpg"])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// println!("{:?}", ys);
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(3) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout("doclayout-yolo"); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&xs, &ys); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "doclayout-yolo"])?
.join(usls::timestamp(None))
.display(),
))?;
}
model.summary(); model.summary();

View File

@ -1,5 +1,5 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::DB, Annotator, DataLoader, Options, Scale}; use usls::{models::DB, Annotator, DataLoader, Options, Scale, Style};
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// Example /// Example
@ -40,7 +40,7 @@ fn main() -> Result<()> {
)?; )?;
// load image // load image
let x = DataLoader::try_read_batch(&[ let xs = DataLoader::try_read_n(&[
"images/db.png", "images/db.png",
"images/table.png", "images/table.png",
"images/table-ch.jpg", "images/table-ch.jpg",
@ -49,16 +49,46 @@ fn main() -> Result<()> {
])?; ])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default()
.without_bboxes(true) .with_polygon_style(
.without_mbrs(true) Style::polygon()
.with_polygons_alpha(60) .with_visible(true)
.with_contours_color([255, 105, 180, 255]) .with_text_visible(false)
.with_saveout(model.spec()); .show_confidence(true)
annotator.annotate(&x, &y); .show_id(true)
.show_name(true)
.with_color(usls::StyleColors::default().with_outline([255, 105, 180, 255].into())),
)
.with_hbb_style(
Style::hbb()
.with_visible(false)
.with_text_visible(false)
.with_thickness(1)
.show_confidence(false)
.show_id(false)
.show_name(false),
)
.with_obb_style(
Style::obb()
.with_visible(false)
.with_text_visible(false)
.show_confidence(false)
.show_id(false)
.show_name(false),
);
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -29,17 +29,27 @@ fn main() -> Result<()> {
let mut model = YOLO::new(config)?; let mut model = YOLO::new(config)?;
// load images // load images
let xs = DataLoader::try_read_batch(&["./assets/bus.jpg"])?; let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default().with_hbb_style(
.without_masks(true) usls::Style::hbb()
.with_bboxes_thickness(3) .show_confidence(true)
.with_saveout("fastsam"); .show_id(false)
annotator.annotate(&xs, &ys); .show_name(false),
);
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "FastSAM"])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,13 +0,0 @@
## Quick Start
```shell
cargo run -r -F cuda --example mobileone -- --device cuda --dtype fp16
```
```shell
0: Y { Probs: { Top5: [(263, 0.6109131, Some("Pembroke, Pembroke Welsh corgi")), (264, 0.2062352, Some("Cardigan, Cardigan Welsh corgi")), (231, 0.028572788, Some("collie")), (273, 0.015174894, Some("dingo, warrigal, warragal, Canis dingo")), (248, 0.014367299, Some("Eskimo dog, husky"))] } }
1: Y { Probs: { Top5: [(284, 0.9907692, Some("siamese cat, Siamese")), (285, 0.0015794479, Some("Egyptian cat")), (174, 0.0015189401, Some("Norwegian elkhound, elkhound")), (225, 0.00031838714, Some("malinois")), (17, 0.00027021166, Some("jay"))] } }
2: Y { Probs: { Top5: [(387, 0.94238573, Some("lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens")), (368, 0.0029994072, Some("gibbon, Hylobates lar")), (277, 0.0016564301, Some("red fox, Vulpes vulpes")), (356, 0.0015081967, Some("weasel")), (295, 0.001427932, Some("American black bear, black bear, Ursus americanus, Euarctos americanus"))] } }
```

View File

@ -1,5 +1,5 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::Florence2, Annotator, DataLoader, Options, Scale, Task}; use usls::{models::Florence2, Annotator, DataLoader, Options, Scale, Style, Task};
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// Example /// Example
@ -26,10 +26,7 @@ fn main() -> Result<()> {
let args: Args = argh::from_env(); let args: Args = argh::from_env();
// load images // load images
let xs = [ let xs = DataLoader::try_read_n(&["images/green-car.jpg", "assets/bus.jpg"])?;
DataLoader::try_read("images/green-car.jpg")?,
DataLoader::try_read("assets/bus.jpg")?,
];
// build model // build model
let ( let (
@ -109,12 +106,6 @@ fn main() -> Result<()> {
), ),
]; ];
// annotator
let annotator = Annotator::new()
.without_bboxes_conf(true)
.with_bboxes_thickness(3)
.with_saveout_subs(&["Florence2"]);
// inference // inference
for task in tasks.iter() { for task in tasks.iter() {
let ys = model.forward(&xs, task)?; let ys = model.forward(&xs, task)?;
@ -128,44 +119,122 @@ fn main() -> Result<()> {
println!("Task: {:?}\n{:?}\n", task, &ys) println!("Task: {:?}\n{:?}\n", task, &ys)
} }
Task::DenseRegionCaption => { Task::DenseRegionCaption => {
let annotator = annotator.clone().with_saveout("Dense-Region-Caption"); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&xs, &ys); Annotator::default()
.with_hbb_style(Style::hbb().show_confidence(false))
.annotate(x, y)?
.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Florence2", "Dense-Region-Caption"])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
Task::RegionProposal => { Task::RegionProposal => {
let annotator = annotator for (x, y) in xs.iter().zip(ys.iter()) {
.clone() Annotator::default()
.without_bboxes_name(false) .with_hbb_style(Style::hbb().show_confidence(false).show_name(false))
.with_saveout("Region-Proposal"); .annotate(x, y)?
.save(format!(
annotator.annotate(&xs, &ys); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Florence2", "Region-Proposal"])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
Task::ObjectDetection => { Task::ObjectDetection => {
let annotator = annotator.clone().with_saveout("Object-Detection"); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&xs, &ys); Annotator::default().annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Florence2", "Object-Detection"])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
Task::OpenSetDetection(_) => { Task::OpenSetDetection(_) => {
let annotator = annotator.clone().with_saveout("Open-Set-Detection"); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&xs, &ys); Annotator::default().annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Florence2", "Open-Object-Detection"])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
Task::CaptionToPhraseGrounding(_) => { Task::CaptionToPhraseGrounding(_) => {
let annotator = annotator for (x, y) in xs.iter().zip(ys.iter()) {
.clone() Annotator::default()
.with_saveout("Caption-To-Phrase-Grounding"); .with_hbb_style(Style::hbb().show_confidence(false))
annotator.annotate(&xs, &ys); .annotate(x, y)?
.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&[
"runs",
"Florence2",
"Caption-To-Phrase-Grounding"
])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
Task::ReferringExpressionSegmentation(_) => { Task::ReferringExpressionSegmentation(_) => {
let annotator = annotator for (x, y) in xs.iter().zip(ys.iter()) {
.clone() Annotator::default()
.with_saveout("Referring-Expression-Segmentation"); .with_hbb_style(Style::hbb().show_confidence(false))
annotator.annotate(&xs, &ys); .annotate(x, y)?
.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&[
"runs",
"Florence2",
"Referring-Expression-Segmentation"
])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
Task::RegionToSegmentation(..) => { Task::RegionToSegmentation(..) => {
let annotator = annotator.clone().with_saveout("Region-To-Segmentation"); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&xs, &ys); Annotator::default()
.with_hbb_style(Style::hbb().show_confidence(false))
.annotate(x, y)?
.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&[
"runs",
"Florence2",
"Region-To-Segmentation",
])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
Task::OcrWithRegion => { Task::OcrWithRegion => {
let annotator = annotator.clone().with_saveout("Ocr-With-Region"); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&xs, &ys); Annotator::default()
.with_hbb_style(Style::hbb().show_confidence(false))
.annotate(x, y)?
.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "Florence2", "Ocr-With-Region",])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
_ => (), _ => (),

View File

@ -56,16 +56,22 @@ fn main() -> Result<()> {
let mut model = GroundingDINO::new(options)?; let mut model = GroundingDINO::new(options)?;
// load images // load images
let xs = DataLoader::try_read_batch(&args.source)?; let xs = DataLoader::try_read_n(&args.source)?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(4) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&xs, &ys); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
// summary // summary
model.summary(); model.summary();

View File

@ -1,5 +0,0 @@
## Quick Start
```shell
RUST_LOG=usls=info cargo run -r --example hub
```

View File

@ -1,26 +0,0 @@
use usls::Hub;
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// 1. Download from default github release
let path = Hub::default().try_fetch("images/bus.jpg")?;
println!("Fetch one image: {:?}", path);
// 2. Download from specific github release url
let path = Hub::default()
.try_fetch("https://github.com/jamjamjon/assets/releases/download/images/bus.jpg")?;
println!("Fetch one file: {:?}", path);
// 3. Fetch tags and files
let hub = Hub::default().with_owner("jamjamjon").with_repo("usls");
for (i, tag) in hub.tags().iter().enumerate() {
let files = hub.files(tag);
println!("{} :: {} => {:?}", i, tag, files); // Should be empty
}
Ok(())
}

51
examples/imshow.rs Normal file
View File

@ -0,0 +1,51 @@
use usls::{DataLoader, Viewer};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// source
#[argh(option, default = "String::from(\"./assets\")")]
source: String,
// /// record video and save
// #[argh(option, default = "false")]
// save_video: bool,
}
fn main() -> anyhow::Result<()> {
let args: Args = argh::from_env();
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let dl = DataLoader::new(args.source.as_str())?.build()?;
let mut viewer = Viewer::default().with_window_scale(1.);
for images in &dl {
// check out window
if viewer.is_window_exist() && !viewer.is_window_open() {
break;
}
viewer.imshow(&images[0])?;
// check out key event
if let Some(key) = viewer.wait_key(1) {
if key == usls::Key::Escape {
break;
}
}
// image info
for image in &images {
println!("## {:?}", image);
}
// // write video, need video feature
// if args.save_video {
// viewer.write_video_frame(&images[0])?;
// }
}
Ok(())
}

View File

@ -1,6 +1,6 @@
## Quick Start ## Quick Start
```shell ```shell
cargo run -r --example fast cargo run -r --example linknet
``` ```

View File

@ -1,5 +1,6 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::DB, Annotator, DataLoader, Options, Scale}; use usls::DataLoader;
use usls::{models::DB, Annotator, Options, Scale, Style};
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// Example /// Example
@ -40,7 +41,7 @@ fn main() -> Result<()> {
)?; )?;
// load image // load image
let x = DataLoader::try_read_batch(&[ let xs = DataLoader::try_read_n(&[
"images/table.png", "images/table.png",
"images/table1.jpg", "images/table1.jpg",
"images/table2.png", "images/table2.png",
@ -50,16 +51,46 @@ fn main() -> Result<()> {
])?; ])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default()
.without_bboxes(true) .with_polygon_style(
.without_mbrs(true) Style::polygon()
.with_polygons_alpha(60) .with_visible(true)
.with_contours_color([255, 105, 180, 255]) .with_text_visible(false)
.with_saveout(model.spec()); .show_confidence(true)
annotator.annotate(&x, &y); .show_id(true)
.show_name(true)
.with_color(usls::StyleColors::default().with_outline([255, 105, 180, 255].into())),
)
.with_hbb_style(
Style::hbb()
.with_visible(false)
.with_text_visible(false)
.with_thickness(1)
.show_confidence(false)
.show_id(false)
.show_name(false),
)
.with_obb_style(
Style::obb()
.with_visible(false)
.with_text_visible(false)
.show_confidence(false)
.show_id(false)
.show_name(false),
);
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,13 +0,0 @@
## Quick Start
```shell
cargo run -r -F cuda --example mobileone -- --device cuda --dtype fp16
```
```shell
0: Y { Probs: { Top5: [(263, 0.6109131, Some("Pembroke, Pembroke Welsh corgi")), (264, 0.2062352, Some("Cardigan, Cardigan Welsh corgi")), (231, 0.028572788, Some("collie")), (273, 0.015174894, Some("dingo, warrigal, warragal, Canis dingo")), (248, 0.014367299, Some("Eskimo dog, husky"))] } }
1: Y { Probs: { Top5: [(284, 0.9907692, Some("siamese cat, Siamese")), (285, 0.0015794479, Some("Egyptian cat")), (174, 0.0015189401, Some("Norwegian elkhound, elkhound")), (225, 0.00031838714, Some("malinois")), (17, 0.00027021166, Some("jay"))] } }
2: Y { Probs: { Top5: [(387, 0.94238573, Some("lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens")), (368, 0.0029994072, Some("gibbon, Hylobates lar")), (277, 0.0016564301, Some("red fox, Vulpes vulpes")), (356, 0.0015081967, Some("weasel")), (295, 0.001427932, Some("American black bear, black bear, Ursus americanus, Euarctos americanus"))] } }
```

View File

@ -1,57 +0,0 @@
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
/// source image
#[argh(
option,
default = "vec![
String::from(\"images/dog.jpg\"),
String::from(\"images/siamese.png\"),
String::from(\"images/ailurus-fulgens.jpg\"),
]"
)]
source: Vec<String>,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options = Options::mobileone_s0()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = ImageClassifier::try_from(options)?;
// load images
let xs = DataLoader::try_read_batch(&args.source)?;
// run
let ys = model.forward(&xs)?;
// results
for (i, y) in ys.iter().enumerate() {
println!("{}: {:?}", i, y);
}
// annotate
let annotator = Annotator::default().with_saveout(model.spec());
annotator.annotate(&xs, &ys);
Ok(())
}

View File

@ -11,14 +11,22 @@ fn main() -> anyhow::Result<()> {
let mut model = MODNet::new(options)?; let mut model = MODNet::new(options)?;
// load image // load image
let xs = [DataLoader::try_read("images/liuyifei.png")?]; let xs = DataLoader::try_read_n(&["images/liuyifei.png"])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default().with_saveout(model.spec()); let annotator = Annotator::default();
annotator.annotate(&xs, &ys); for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -116,7 +116,7 @@ fn main() -> Result<()> {
)?; )?;
// load images // load images
let xs = DataLoader::try_read_batch(&args.source)?; let xs = DataLoader::try_read_n(&args.source)?;
// run with task // run with task
let task: Task = args.task.as_str().try_into()?; let task: Task = args.task.as_str().try_into()?;
@ -142,13 +142,37 @@ fn main() -> Result<()> {
} }
Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => { Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => {
println!("{:?}", ys); println!("{:?}", ys);
// let annotator = Annotator::default()
// .with_bboxes_thickness(4)
// .without_bboxes_conf(true)
// .with_keypoints_radius(6)
// .with_keypoints_name(true)
// .with_saveout("moondream2");
// annotator.annotate(&xs, &ys);
// annotate
let annotator = Annotator::default() let annotator = Annotator::default()
.with_bboxes_thickness(4) .with_hbb_style(
.without_bboxes_conf(true) usls::Style::hbb()
.with_keypoints_radius(6) .with_draw_fill(true)
.with_keypoints_name(true) .show_confidence(false),
.with_saveout("moondream2"); )
annotator.annotate(&xs, &ys); .with_keypoint_style(
usls::Style::keypoint()
.show_confidence(false)
.show_id(true)
.show_name(false),
);
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "moondream2"])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
_ => unimplemented!("Unsupported moondream2 task."), _ => unimplemented!("Unsupported moondream2 task."),
} }

View File

@ -1,5 +1,6 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::OWLv2, Annotator, DataLoader, Options}; use usls::DataLoader;
use usls::{models::OWLv2, Annotator, Options};
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// Example /// Example
@ -55,16 +56,22 @@ fn main() -> Result<()> {
let mut model = OWLv2::new(options)?; let mut model = OWLv2::new(options)?;
// load // load
let xs = DataLoader::try_read_batch(&args.source)?; let xs = DataLoader::try_read_n(&args.source)?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(3) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&xs, &ys); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,5 +1,6 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::PicoDet, Annotator, DataLoader, Options}; use usls::DataLoader;
use usls::{models::PicoDet, Annotator, Options};
fn main() -> Result<()> { fn main() -> Result<()> {
tracing_subscriber::fmt() tracing_subscriber::fmt()
@ -15,17 +16,23 @@ fn main() -> Result<()> {
let mut model = PicoDet::new(options)?; let mut model = PicoDet::new(options)?;
// load // load
let xs = [DataLoader::try_read("images/academic.jpg")?]; let xs = DataLoader::try_read_n(&["images/academic.jpg"])?;
// annotator
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout(model.spec());
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
println!("{:?}", ys); println!("{:?}", ys);
annotator.annotate(&xs, &ys);
// annotate
let annotator = Annotator::default();
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

58
examples/read_images.rs Normal file
View File

@ -0,0 +1,58 @@
use usls::{DataLoader, Image, ImageVecExt};
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
// 1. Read one image
let image = Image::try_read("./assets/bus.jpg")?;
println!("Image::try_read(): {:?}", image);
// image.save("kkk.png")?;
// => To Rgba8
let _image_rgba = image.to_rgba8();
// 2. Read one image with DataLoader
let image = DataLoader::try_read_one("./assets/bus.jpg")?;
println!("DataLoader::try_read_one(): {:?}", image);
// 3. Read N images with DataLoader
let images = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/cat.png"])?;
println!("DataLoader::try_read_n():");
for image in images {
println!(" - {:?}", image);
}
// 4. Read image folder with DataLoader
let images = DataLoader::try_read_folder("./assets")?;
println!("DataLoader::try_read_folder():");
for image in images {
println!(" - {:?}", image);
}
// 5. Glob and read image folder with DataLoader
// let images = DataLoader::try_read_pattern("./assets/*.Jpg")?;
let images = DataLoader::try_read_pattern_case_insensitive("./assets/*.Jpg")?;
println!("DataLoader::try_read_pattern_case_insensitive():");
for image in images {
println!(" - {:?}", image);
}
// 6. Load images with DataLoader
let dl = DataLoader::new("./assets")?.with_batch(2).build()?;
// iterate over the dataloader
for (i, images) in dl.iter().enumerate() {
println!("## Batch-{}: {:?}", i + 1, images);
}
// 7. Vec<Image> <-> Vec<DynamicImage>
let images = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/cat.png"])?;
let dyn_images = images.into_dyns();
let _images = dyn_images.into_images();
Ok(())
}

View File

@ -1,4 +1,4 @@
use usls::{DataLoader, Key, Viewer}; use usls::DataLoader;
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// Example /// Example
@ -12,32 +12,25 @@ struct Args {
} }
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
let args: Args = argh::from_env();
tracing_subscriber::fmt() tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339()) .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init(); .init();
let args: Args = argh::from_env(); // load images or video stream
let dl = DataLoader::new(&args.source)?.with_batch(1).build()?; let dl = DataLoader::new(args.source.as_str())?
.with_batch(1)
// .with_nf_skip(1)
// .with_progress_bar(true)
.build()?;
let mut viewer = Viewer::new().with_delay(5).with_scale(1.).resizable(true); // iterate over the dataloader
for images in &dl {
// run & annotate for image in &images {
for (xs, _paths) in dl { println!("## {:?}", image);
// show image
viewer.imshow(&xs)?;
// check out window and key event
if !viewer.is_open() || viewer.is_key_pressed(Key::Escape) {
break;
} }
// write video
viewer.write_batch(&xs)?
} }
// finish video write
viewer.finish_write()?;
Ok(()) Ok(())
} }

View File

@ -7,11 +7,11 @@ cargo run -r --example rfdetr
## Results ## Results
``` ```
[Bboxes]: Found 6 objects [Bboxes]: Found 5 objects
0: Bbox { xyxy: [221.55753, 408.0652, 345.23325, 860.2527], class_id: 1, name: Some("person"), confidence: 0.93212366 } 0: Bbox { xyxy: [47.969677, 397.81808, 246.22426, 904.8823], class_id: 0, name: Some("person"), confidence: 0.94432133 }
1: Bbox { xyxy: [44.967827, 397.84177, 246.13187, 905.7567], class_id: 1, name: Some("person"), confidence: 0.93540853 } 1: Bbox { xyxy: [668.0796, 399.28854, 810.3779, 880.7412], class_id: 0, name: Some("person"), confidence: 0.93386495 }
2: Bbox { xyxy: [6.2678833, 233.208, 801.6806, 737.4714], class_id: 6, name: Some("bus"), confidence: 0.93637216 } 2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 }
3: Bbox { xyxy: [0.0, 555.167, 77.74801, 870.2772], class_id: 1, name: Some("person"), confidence: 0.85163206 } 3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 }
4: Bbox { xyxy: [133.94543, 473.6574, 149.62558, 507.99875], class_id: 32, name: Some("tie"), confidence: 0.2992424 } 4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 }
5: Bbox { xyxy: [669.81836, 395.28635, 813.44855, 879.9562], class_id: 1, name: Some("person"), confidence: 0.83661026 }
``` ```

View File

@ -12,26 +12,25 @@ fn main() -> Result<()> {
let mut model = RFDETR::new(options)?; let mut model = RFDETR::new(options)?;
// load // load
let xs = [DataLoader::try_read("./assets/bus.jpg")?]; let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// extract bboxes // extract bboxes
for y in ys.iter() { println!("{:?}", ys);
if let Some(bboxes) = y.bboxes() {
println!("[Bboxes]: Found {} objects", bboxes.len());
for (i, bbox) in bboxes.iter().enumerate() {
println!("{}: {:?}", i, bbox)
}
}
}
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(3) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&xs, &ys); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -13,4 +13,5 @@ cargo run -r --example rtdetr
2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 } 2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 }
3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 } 3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 }
4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 } 4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 }
``` ```

View File

@ -18,26 +18,23 @@ fn main() -> Result<()> {
let mut model = RTDETR::new(options)?; let mut model = RTDETR::new(options)?;
// load // load
let xs = [DataLoader::try_read("./assets/bus.jpg")?]; let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
println!("{:?}", ys);
// extract bboxes
for y in ys.iter() {
if let Some(bboxes) = y.bboxes() {
println!("[Bboxes]: Found {} objects", bboxes.len());
for (i, bbox) in bboxes.iter().enumerate() {
println!("{}: {:?}", i, bbox)
}
}
}
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(3) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&xs, &ys); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,5 +1,5 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::RTMO, Annotator, DataLoader, Options, COCO_SKELETONS_16}; use usls::{models::RTMO, Annotator, DataLoader, Options, Style, SKELETON_COCO_19};
fn main() -> Result<()> { fn main() -> Result<()> {
tracing_subscriber::fmt() tracing_subscriber::fmt()
@ -11,16 +11,31 @@ fn main() -> Result<()> {
let mut model = RTMO::new(Options::rtmo_s().commit()?)?; let mut model = RTMO::new(Options::rtmo_s().commit()?)?;
// load image // load image
let xs = [DataLoader::try_read("images/bus.jpg")?]; let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
println!("ys: {:?}", ys);
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default()
.with_saveout(model.spec()) .with_hbb_style(Style::hbb().with_draw_fill(true))
.with_skeletons(&COCO_SKELETONS_16); .with_keypoint_style(
annotator.annotate(&xs, &ys); Style::keypoint()
.with_skeleton(SKELETON_COCO_19.into())
.show_confidence(false)
.show_id(true)
.show_name(false),
);
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -64,10 +64,7 @@ fn main() -> Result<()> {
let mut model = SAM::new(options_encoder, options_decoder)?; let mut model = SAM::new(options_encoder, options_decoder)?;
// Load image // Load image
let xs = [DataLoader::try_read("images/truck.jpg")?]; let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
// Build annotator
let annotator = Annotator::default().with_saveout(model.spec());
// Prompt // Prompt
let prompts = vec![ let prompts = vec![
@ -79,7 +76,18 @@ fn main() -> Result<()> {
// Run & Annotate // Run & Annotate
let ys = model.forward(&xs, &prompts)?; let ys = model.forward(&xs, &prompts)?;
annotator.annotate(&xs, &ys);
// annotate
let annotator = Annotator::default();
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -23,17 +23,22 @@ fn main() -> Result<()> {
let mut model = Sapiens::new(options)?; let mut model = Sapiens::new(options)?;
// load // load
let x = [DataLoader::try_read("images/paul-george.jpg")?]; let xs = DataLoader::try_read_n(&["images/paul-george.jpg"])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.without_masks(true) for (x, y) in xs.iter().zip(ys.iter()) {
.with_polygons_name(true) annotator.annotate(x, y)?.save(format!(
.with_saveout(model.spec()); "{}.jpg",
annotator.annotate(&x, &y); usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -1,5 +1,5 @@
use anyhow::Result; use anyhow::Result;
use usls::{models::SLANet, Annotator, DataLoader, Options}; use usls::{models::SLANet, Annotator, Color, DataLoader, Options};
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
/// Example /// Example
@ -33,18 +33,34 @@ fn main() -> Result<()> {
let mut model = SLANet::new(options)?; let mut model = SLANet::new(options)?;
// load // load
let xs = DataLoader::try_read_batch(&[args.source])?; let xs = DataLoader::try_read_n(&[args.source])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// println!("{:?}", ys); println!("{:?}", ys);
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default().with_keypoint_style(
.with_keypoints_radius(2) usls::Style::keypoint()
.with_skeletons(&[(0, 1), (1, 2), (2, 3), (3, 0)]) .with_text_visible(false)
.with_saveout(model.spec()); .with_skeleton(
annotator.annotate(&xs, &ys); (
[(0, 1), (1, 2), (2, 3), (3, 0)],
[Color::black(), Color::red(), Color::green(), Color::blue()],
)
.into(),
),
);
for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
// summary // summary
model.summary(); model.summary();

View File

@ -57,7 +57,7 @@ fn main() -> Result<()> {
)?; )?;
// load images // load images
let xs = DataLoader::try_read_batch(&args.source)?; let xs = DataLoader::try_read_n(&args.source)?;
// run // run
let ys = model.forward(&xs, &args.prompt)?; let ys = model.forward(&xs, &args.prompt)?;

View File

@ -37,9 +37,9 @@ fn main() -> Result<()> {
.build()?; .build()?;
// run // run
for (xs, paths) in dl { for xs in &dl {
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
println!("{paths:?}: {:?}", ys) println!("ys: {:?}", ys);
} }
// summary // summary

View File

@ -32,7 +32,7 @@ fn main() -> anyhow::Result<()> {
let args: Args = argh::from_env(); let args: Args = argh::from_env();
// load images // load images
let xs = DataLoader::try_read_batch(&[ let xs = DataLoader::try_read_n(&[
"images/text-en-dark.png", "images/text-en-dark.png",
"images/text-hello-rust-handwritten.png", "images/text-hello-rust-handwritten.png",
])?; ])?;

View File

@ -1,5 +0,0 @@
## Quick Start
```shell
RUST_LOG=usls=info cargo run -F ffmpeg -r --example viewer
```

View File

@ -1,7 +1,7 @@
use anyhow::Result; use anyhow::Result;
use usls::{ use usls::{
models::{SamPrompt, SAM, YOLO}, models::{SamPrompt, SAM, YOLO},
Annotator, DataLoader, Options, Scale, Annotator, DataLoader, Options, Scale, Style,
}; };
#[derive(argh::FromArgs)] #[derive(argh::FromArgs)]
@ -30,37 +30,41 @@ fn main() -> Result<()> {
// build YOLOv8 // build YOLOv8
let options_yolo = Options::yolo_detect() let options_yolo = Options::yolo_detect()
.with_model_scale(Scale::N) .with_model_scale(Scale::N)
.with_model_version(8.0.into()) .with_model_version(8.into())
.with_model_device(args.device.as_str().try_into()?) .with_model_device(args.device.as_str().try_into()?)
.commit()?; .commit()?;
let mut yolo = YOLO::new(options_yolo)?; let mut yolo = YOLO::new(options_yolo)?;
// load one image // load one image
let xs = DataLoader::try_read_batch(&["images/dog.jpg"])?; let xs = DataLoader::try_read_n(&["images/dog.jpg"])?;
// build annotator // build annotator
let annotator = Annotator::default() let annotator = Annotator::default().with_hbb_style(Style::hbb().with_draw_fill(true));
.with_bboxes_thickness(7)
.without_bboxes_name(true)
.without_bboxes_conf(true)
.without_mbrs(true)
.with_saveout("YOLO-SAM");
// run & annotate // run & annotate
let ys_det = yolo.forward(&xs)?; let ys_det = yolo.forward(&xs)?;
for y_det in ys_det.iter() { for y_det in ys_det.iter() {
if let Some(bboxes) = y_det.bboxes() { if let Some(hbbs) = y_det.hbbs() {
for bbox in bboxes { for hbb in hbbs {
let ys_sam = sam.forward( let ys_sam = sam.forward(
&xs, &xs,
&[SamPrompt::default().with_bbox( &[SamPrompt::default().with_bbox(
bbox.xmin(), hbb.xmin(),
bbox.ymin(), hbb.ymin(),
bbox.xmax(), hbb.xmax(),
bbox.ymax(), hbb.ymax(),
)], )],
)?; )?;
annotator.annotate(&xs, &ys_sam); // annotator.annotate(&xs, &ys_sam);
for (x, y) in xs.iter().zip(ys_sam.iter()) {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "YOLO-SAM"])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
} }
} }

View File

@ -1,7 +1,7 @@
use anyhow::Result; use anyhow::Result;
use usls::{ use usls::{
models::YOLO, Annotator, DataLoader, Options, COCO_CLASS_NAMES_80, COCO_SKELETONS_16, models::YOLO, Annotator, DataLoader, Options, Style, NAMES_COCO_80, NAMES_COCO_KEYPOINTS_17,
IMAGENET_NAMES_1K, NAMES_IMAGENET_1K, SKELETON_COCO_19, SKELETON_COLOR_COCO_19,
}; };
#[derive(argh::FromArgs, Debug)] #[derive(argh::FromArgs, Debug)]
@ -39,10 +39,6 @@ struct Args {
#[argh(option, default = "true")] #[argh(option, default = "true")]
trt_fp16: bool, trt_fp16: bool,
/// find_contours
#[argh(option, default = "true")]
find_contours: bool,
/// batch_size /// batch_size
#[argh(option, default = "1")] #[argh(option, default = "1")]
batch_size: usize, batch_size: usize,
@ -91,6 +87,10 @@ struct Args {
#[argh(switch)] #[argh(switch)]
use_coco_80_classes: bool, use_coco_80_classes: bool,
/// use_coco_17_keypoints_classes
#[argh(switch)]
use_coco_17_keypoints_classes: bool,
/// use_imagenet_1k_classes /// use_imagenet_1k_classes
#[argh(switch)] #[argh(switch)]
use_imagenet_1k_classes: bool, use_imagenet_1k_classes: bool,
@ -118,6 +118,10 @@ struct Args {
/// keypoint_names /// keypoint_names
#[argh(option)] #[argh(option)]
keypoint_names: Vec<String>, keypoint_names: Vec<String>,
/// topk
#[argh(option, default = "5")]
topk: usize,
} }
fn main() -> Result<()> { fn main() -> Result<()> {
@ -131,7 +135,7 @@ fn main() -> Result<()> {
let mut options = Options::yolo() let mut options = Options::yolo()
.with_model_file(&args.model.unwrap_or_default()) .with_model_file(&args.model.unwrap_or_default())
.with_model_task(args.task.as_str().try_into()?) .with_model_task(args.task.as_str().try_into()?)
.with_model_version(args.ver.into()) .with_model_version(args.ver.try_into()?)
.with_model_scale(args.scale.as_str().try_into()?) .with_model_scale(args.scale.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?) .with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?) .with_model_device(args.device.as_str().try_into()?)
@ -166,16 +170,20 @@ fn main() -> Result<()> {
} else { } else {
&args.keypoint_confs &args.keypoint_confs
}) })
.with_find_contours(args.find_contours) .with_topk(args.topk)
.retain_classes(&args.retain_classes) .retain_classes(&args.retain_classes)
.exclude_classes(&args.exclude_classes); .exclude_classes(&args.exclude_classes);
if args.use_coco_80_classes { if args.use_coco_80_classes {
options = options.with_class_names(&COCO_CLASS_NAMES_80); options = options.with_class_names(&NAMES_COCO_80);
}
if args.use_coco_17_keypoints_classes {
options = options.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17);
} }
if args.use_imagenet_1k_classes { if args.use_imagenet_1k_classes {
options = options.with_class_names(&IMAGENET_NAMES_1K); options = options.with_class_names(&NAMES_IMAGENET_1K);
} }
if let Some(nc) = args.num_classes { if let Some(nc) = args.num_classes {
@ -216,26 +224,35 @@ fn main() -> Result<()> {
// build annotator // build annotator
let annotator = Annotator::default() let annotator = Annotator::default()
.with_skeletons(&COCO_SKELETONS_16) .with_obb_style(Style::obb().with_draw_fill(true))
.without_masks(true) .with_hbb_style(
.with_bboxes_thickness(3) Style::hbb()
.with_saveout(model.spec()); .with_draw_fill(true)
.with_palette(&usls::Color::palette_coco_80()),
)
.with_keypoint_style(
Style::keypoint()
.with_skeleton((SKELETON_COCO_19, SKELETON_COLOR_COCO_19).into())
.show_confidence(false)
.show_id(true)
.show_name(false),
)
.with_mask_style(Style::mask().with_draw_mask_polygon_largest(true));
// run & annotate // run & annotate
for (xs, _paths) in dl { for xs in &dl {
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
// extract bboxes println!("ys: {:?}", ys);
// for y in ys.iter() {
// if let Some(bboxes) = y.bboxes() {
// println!("[Bboxes]: Found {} objects", bboxes.len());
// for (i, bbox) in bboxes.iter().enumerate() {
// println!("{}: {:?}", i, bbox)
// }
// }
// }
// plot for (x, y) in xs.iter().zip(ys.iter()) {
annotator.annotate(&xs, &ys); annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
} }
model.summary(); model.summary();

View File

@ -12,16 +12,22 @@ fn main() -> Result<()> {
let mut model = YOLOPv2::new(options)?; let mut model = YOLOPv2::new(options)?;
// load image // load image
let x = [DataLoader::try_read("images/car-view.jpg")?]; let xs = DataLoader::try_read_n(&["images/car-view.jpg"])?;
// run // run
let y = model.forward(&x)?; let ys = model.forward(&xs)?;
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_polygons_name(true) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&x, &y); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -29,17 +29,23 @@ fn main() -> Result<()> {
let mut model = YOLO::new(config)?; let mut model = YOLO::new(config)?;
// load images // load images
let xs = DataLoader::try_read_batch(&["./assets/bus.jpg"])?; let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
// run // run
let ys = model.forward(&xs)?; let ys = model.forward(&xs)?;
println!("{:?}", ys); println!("{:?}", ys);
// annotate // annotate
let annotator = Annotator::default() let annotator = Annotator::default();
.with_bboxes_thickness(3) for (x, y) in xs.iter().zip(ys.iter()) {
.with_saveout(model.spec()); annotator.annotate(x, y)?.save(format!(
annotator.annotate(&xs, &ys); "{}.jpg",
usls::Dir::Current
.base_dir_with_subs(&["runs", "YOLOv8-RT-DETR"])?
.join(usls::timestamp(None))
.display(),
))?;
}
Ok(()) Ok(())
} }

View File

@ -3,13 +3,9 @@ use anyhow::Result;
use half::{bf16, f16}; use half::{bf16, f16};
use log::{debug, info, warn}; use log::{debug, info, warn};
use ndarray::{Array, IxDyn}; use ndarray::{Array, IxDyn};
#[allow(unused_imports)]
use ort::{ use ort::{
execution_providers::ExecutionProvider, execution_providers::ExecutionProvider,
session::{ session::{builder::GraphOptimizationLevel, Session, SessionInputValue},
builder::{GraphOptimizationLevel, SessionBuilder},
Session, SessionInputValue,
},
tensor::TensorElementType, tensor::TensorElementType,
value::{DynValue, Value}, value::{DynValue, Value},
}; };
@ -17,9 +13,31 @@ use prost::Message;
use std::collections::HashSet; use std::collections::HashSet;
use crate::{ use crate::{
build_progress_bar, elapsed, human_bytes, onnx, DType, Device, Iiix, MinOptMax, Ops, Ts, Xs, X, build_progress_bar, elapsed, human_bytes_binary, onnx, DType, Device, Iiix, MinOptMax, Ops, Ts,
Xs, PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_FINISH, X,
}; };
impl From<TensorElementType> for DType {
fn from(dtype: TensorElementType) -> Self {
match dtype {
TensorElementType::Int8 => Self::Int8,
TensorElementType::Int16 => Self::Int16,
TensorElementType::Int32 => Self::Int32,
TensorElementType::Int64 => Self::Int64,
TensorElementType::Uint8 => Self::Uint8,
TensorElementType::Uint16 => Self::Uint16,
TensorElementType::Uint32 => Self::Uint32,
TensorElementType::Uint64 => Self::Uint64,
TensorElementType::Float16 => Self::Fp16,
TensorElementType::Float32 => Self::Fp32,
TensorElementType::Float64 => Self::Fp64,
TensorElementType::Bfloat16 => Self::Bf16,
TensorElementType::String => Self::String,
TensorElementType::Bool => Self::Bool,
}
}
}
/// A struct for tensor attrs composed of the names, the dtypes, and the dimensions. /// A struct for tensor attrs composed of the names, the dtypes, and the dimensions.
#[derive(Builder, Debug, Clone)] #[derive(Builder, Debug, Clone)]
pub struct OrtTensorAttr { pub struct OrtTensorAttr {
@ -42,16 +60,17 @@ pub struct Engine {
pub spec: String, pub spec: String,
pub device: Device, pub device: Device,
pub trt_fp16: bool, pub trt_fp16: bool,
#[args(inc = true)] #[args(inc)]
pub iiixs: Vec<Iiix>, pub iiixs: Vec<Iiix>,
#[args(alias = "parameters")] #[args(aka = "parameters")]
pub params: Option<usize>, pub params: Option<usize>,
#[args(alias = "memory")] #[args(aka = "memory")]
pub wbmems: Option<usize>, pub wbmems: Option<usize>,
pub inputs_minoptmax: Vec<Vec<MinOptMax>>, pub inputs_minoptmax: Vec<Vec<MinOptMax>>,
pub onnx: Option<OnnxIo>, pub onnx: Option<OnnxIo>,
pub ts: Ts, pub ts: Ts,
pub num_dry_run: usize, pub num_dry_run: usize,
pub graph_opt_level: Option<u8>,
} }
impl Default for Engine { impl Default for Engine {
@ -68,6 +87,7 @@ impl Default for Engine {
inputs_minoptmax: vec![], inputs_minoptmax: vec![],
onnx: None, onnx: None,
ts: Ts::default(), ts: Ts::default(),
graph_opt_level: None,
} }
} }
} }
@ -151,7 +171,7 @@ impl Engine {
self.num_dry_run as u64, self.num_dry_run as u64,
"DryRun", "DryRun",
Some(self.spec()), Some(self.spec()),
crate::PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_CYAN_2,
)?; )?;
// dummy // dummy
@ -181,14 +201,14 @@ impl Engine {
self.spec, self.spec,
match self.params { match self.params {
Some(bytes) if bytes != 0 => { Some(bytes) if bytes != 0 => {
human_bytes(bytes as f64, true) human_bytes_binary(bytes as f64, 2)
} }
_ => "Unknown".to_string(), _ => "Unknown".to_string(),
}, },
self.device, self.device,
)); ));
pb.set_style(indicatif::ProgressStyle::with_template( pb.set_style(indicatif::ProgressStyle::with_template(
crate::PROGRESS_BAR_STYLE_FINISH, PROGRESS_BAR_STYLE_FINISH,
)?); )?);
pb.finish(); pb.finish();
} }
@ -349,7 +369,7 @@ impl Engine {
spec_max += &s_max; spec_max += &s_max;
} }
let p = crate::Dir::Cache.path_with_subs(&["trt-cache"])?; let p = crate::Dir::Cache.crate_dir_default_with_subs(&["trt-cache"])?;
let ep = ort::execution_providers::TensorRTExecutionProvider::default() let ep = ort::execution_providers::TensorRTExecutionProvider::default()
.with_device_id(id as i32) .with_device_id(id as i32)
.with_fp16(self.trt_fp16) .with_fp16(self.trt_fp16)
@ -430,8 +450,14 @@ impl Engine {
} }
// session // session
let graph_opt_level = match self.graph_opt_level {
Some(0) => GraphOptimizationLevel::Disable,
Some(1) => GraphOptimizationLevel::Level1,
Some(2) => GraphOptimizationLevel::Level2,
_ => GraphOptimizationLevel::Level3,
};
let session = builder let session = builder
.with_optimization_level(GraphOptimizationLevel::Level3)? .with_optimization_level(graph_opt_level)?
.with_intra_threads(std::thread::available_parallelism()?.get())? .with_intra_threads(std::thread::available_parallelism()?.get())?
.commit_from_file(self.file())?; .commit_from_file(self.file())?;
@ -602,6 +628,24 @@ impl Engine {
}) })
} }
// pub fn to_ort(&self) -> TensorElementType {
// match self {
// Self::Int8 => TensorElementType::Int8,
// Self::Int16 => TensorElementType::Int16,
// Self::Int32 => TensorElementType::Int32,
// Self::Int64 => TensorElementType::Int64,
// Self::Uint8 => TensorElementType::Uint8,
// Self::Uint16 => TensorElementType::Uint16,
// Self::Uint32 => TensorElementType::Uint32,
// Self::Uint64 => TensorElementType::Uint64,
// Self::Fp16 => TensorElementType::Float16,
// Self::Fp32 => TensorElementType::Float32,
// Self::Fp64 => TensorElementType::Float64,
// Self::Bf16 => TensorElementType::Bfloat16,
// _ => todo!(),
// }
// }
pub fn load_onnx<P: AsRef<std::path::Path>>(p: P) -> Result<onnx::ModelProto> { pub fn load_onnx<P: AsRef<std::path::Path>>(p: P) -> Result<onnx::ModelProto> {
let f = std::fs::read(p.as_ref())?; let f = std::fs::read(p.as_ref())?;
onnx::ModelProto::decode(f.as_slice()).map_err(|err| { onnx::ModelProto::decode(f.as_slice()).map_err(|err| {
@ -692,7 +736,7 @@ impl Engine {
x.inputs x.inputs
.dtypes() .dtypes()
.iter() .iter()
.map(DType::from_ort) .map(|x| DType::from(*x))
.collect::<Vec<DType>>() .collect::<Vec<DType>>()
.into() .into()
}) })
@ -715,7 +759,7 @@ impl Engine {
x.outputs x.outputs
.dtypes() .dtypes()
.iter() .iter()
.map(DType::from_ort) .map(|x| DType::from(*x))
.collect::<Vec<DType>>() .collect::<Vec<DType>>()
.into() .into()
}) })
@ -733,13 +777,13 @@ impl Engine {
self.device, self.device,
match self.params { match self.params {
Some(bytes) if bytes != 0 => { Some(bytes) if bytes != 0 => {
human_bytes(bytes as f64, true) human_bytes_binary(bytes as f64, 2)
} }
_ => "Unknown".to_string(), _ => "Unknown".to_string(),
}, },
match self.wbmems { match self.wbmems {
Some(bytes) if bytes != 0 => { Some(bytes) if bytes != 0 => {
human_bytes(bytes as f64, true) human_bytes_binary(bytes as f64, 2)
} }
_ => "Unknown".to_string(), _ => "Unknown".to_string(),
}, },

290
src/inference/hbb.rs Normal file
View File

@ -0,0 +1,290 @@
use aksr::Builder;
use crate::{InstanceMeta, Keypoint, Style};
#[derive(Builder, Clone, Default)]
pub struct Hbb {
x: f32,
y: f32,
w: f32,
h: f32,
meta: InstanceMeta,
style: Option<Style>,
keypoints: Option<Vec<Keypoint>>,
}
impl std::fmt::Debug for Hbb {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Hbb")
.field("xyxy", &[self.x, self.y, self.xmax(), self.ymax()])
.field("meta", &self.meta)
.finish()
}
}
impl PartialEq for Hbb {
fn eq(&self, other: &Self) -> bool {
self.x == other.x && self.y == other.y && self.w == other.w && self.h == other.h
}
}
impl From<(f32, f32, f32, f32)> for Hbb {
/// Creates a `Hbb` from a tuple of `(x, y, w, h)`.
///
/// # Arguments
///
/// * `(x, y, w, h)` - A tuple representing the bounding box's position and size.
///
/// # Returns
///
/// A `Hbb` with the specified position and size.
fn from((x, y, w, h): (f32, f32, f32, f32)) -> Self {
Self {
x,
y,
w,
h,
..Default::default()
}
}
}
impl From<[f32; 4]> for Hbb {
/// Creates a `Hbb` from an array of `[x, y, w, h]`.
///
/// # Arguments
///
/// * `[x, y, w, h]` - An array representing the bounding box's position and size.
///
/// # Returns
///
/// A `Hbb` with the specified position and size.
fn from([x, y, w, h]: [f32; 4]) -> Self {
Self {
x,
y,
w,
h,
..Default::default()
}
}
}
impl From<Hbb> for (f32, f32, f32, f32) {
fn from(Hbb { x, y, w, h, .. }: Hbb) -> Self {
(x, y, w, h)
}
}
impl From<Hbb> for [f32; 4] {
fn from(Hbb { x, y, w, h, .. }: Hbb) -> Self {
[x, y, w, h]
}
}
impl Hbb {
pub fn with_uid(mut self, uid: usize) -> Self {
self.meta = self.meta.with_uid(uid);
self
}
pub fn with_id(mut self, id: usize) -> Self {
self.meta = self.meta.with_id(id);
self
}
pub fn with_name(mut self, name: &str) -> Self {
self.meta = self.meta.with_name(name);
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.meta = self.meta.with_confidence(confidence);
self
}
pub fn uid(&self) -> usize {
self.meta.uid()
}
pub fn name(&self) -> Option<&str> {
self.meta.name()
}
pub fn confidence(&self) -> Option<f32> {
self.meta.confidence()
}
pub fn id(&self) -> Option<usize> {
self.meta.id()
}
}
impl Hbb {
pub fn from_xywh(x: f32, y: f32, w: f32, h: f32) -> Self {
Self {
x,
y,
w,
h,
..Default::default()
}
}
pub fn from_xyxy(x1: f32, y1: f32, x2: f32, y2: f32) -> Self {
Self {
x: x1,
y: y1,
w: x2 - x1,
h: y2 - y1,
..Default::default()
}
}
pub fn from_cxcywh(cx: f32, cy: f32, w: f32, h: f32) -> Self {
Self {
x: cx - w / 2.0,
y: cy - h / 2.0,
w,
h,
..Default::default()
}
}
pub fn with_xyxy(mut self, x1: f32, y1: f32, x2: f32, y2: f32) -> Self {
self.x = x1;
self.y = y1;
self.w = x2 - x1;
self.h = y2 - y1;
self
}
pub fn with_xywh(mut self, x: f32, y: f32, w: f32, h: f32) -> Self {
self.x = x;
self.y = y;
self.w = w;
self.h = h;
self
}
pub fn with_cxcywh(mut self, cx: f32, cy: f32, w: f32, h: f32) -> Self {
self.x = cx - w / 2.0;
self.y = cy - h / 2.0;
self.w = w;
self.h = h;
self
}
pub fn width(&self) -> f32 {
self.w
}
pub fn height(&self) -> f32 {
self.h
}
pub fn xmin(&self) -> f32 {
self.x
}
pub fn ymin(&self) -> f32 {
self.y
}
pub fn xmax(&self) -> f32 {
self.x + self.w
}
pub fn ymax(&self) -> f32 {
self.y + self.h
}
pub fn cx(&self) -> f32 {
self.x + self.w / 2.
}
pub fn cy(&self) -> f32 {
self.y + self.h / 2.
}
pub fn xyxy(&self) -> (f32, f32, f32, f32) {
(self.x, self.y, self.x + self.w, self.y + self.h)
}
pub fn xywh(&self) -> (f32, f32, f32, f32) {
(self.x, self.y, self.w, self.h)
}
pub fn cxywh(&self) -> (f32, f32, f32, f32) {
(self.cx(), self.cy(), self.w, self.h)
}
pub fn area(&self) -> f32 {
self.h * self.w
}
pub fn perimeter(&self) -> f32 {
(self.h + self.w) * 2.0
}
pub fn is_squre(&self) -> bool {
self.w == self.h
}
pub fn intersect(&self, other: &Hbb) -> f32 {
let l = self.xmin().max(other.xmin());
let r = (self.xmin() + self.width()).min(other.xmin() + other.width());
let t = self.ymin().max(other.ymin());
let b = (self.ymin() + self.height()).min(other.ymin() + other.height());
(r - l).max(0.) * (b - t).max(0.)
}
pub fn union(&self, other: &Hbb) -> f32 {
self.area() + other.area() - self.intersect(other)
}
pub fn iou(&self, other: &Self) -> f32 {
self.intersect(other) / self.union(other)
}
pub fn contains(&self, other: &Hbb) -> bool {
self.xmin() <= other.xmin()
&& self.xmax() >= other.xmax()
&& self.ymin() <= other.ymin()
&& self.ymax() >= other.ymax()
}
pub fn to_json() {
// Display?
todo!()
}
}
#[cfg(test)]
mod tests_bbox {
use super::Hbb;
#[test]
fn new() {
let bbox1 = Hbb::from((0., 0., 5., 5.));
let bbox2: Hbb = [0., 0., 5., 5.].into();
assert_eq!(bbox1, bbox2);
}
#[test]
fn funcs() {
let bbox1 = Hbb::from_xyxy(0., 0., 5., 5.);
let bbox2 = Hbb::from_xyxy(1., 1., 6., 6.);
assert_eq!(bbox1.intersect(&bbox2), 16.);
assert_eq!(bbox1.area(), 25.);
assert_eq!(bbox2.area(), 25.);
assert_eq!(bbox2.perimeter(), 20.);
assert!(bbox2.is_squre());
assert_eq!(bbox1.union(&bbox2), 34.);
let bbox3 = Hbb::from_xyxy(2., 2., 5., 5.);
assert!(!bbox1.contains(&bbox2));
assert!(bbox1.contains(&bbox3));
assert!(bbox2.contains(&bbox3));
}
}

402
src/inference/image.rs Normal file
View File

@ -0,0 +1,402 @@
use aksr::Builder;
use anyhow::Result;
use fast_image_resize::{
images::{CroppedImageMut, Image as FImage},
pixels::PixelType,
};
use image::{DynamicImage, GrayImage, RgbImage, RgbaImage};
use std::path::{Path, PathBuf};
use crate::{build_resizer_filter, Hub, Location, MediaType, X};
#[derive(Builder, Debug, Clone, Default)]
pub struct ImageTransformInfo {
pub width_src: u32,
pub height_src: u32,
pub width_dst: u32,
pub height_dst: u32,
pub height_scale: f32,
pub width_scale: f32,
}
#[derive(Debug, Clone, Default)]
pub enum ResizeMode {
/// StretchToFit
FitExact,
FitWidth,
FitHeight,
#[default]
FitAdaptive,
Letterbox,
}
#[derive(Builder, Clone)]
pub struct Image {
image: RgbImage,
source: Option<PathBuf>,
media_type: MediaType,
}
impl Default for Image {
fn default() -> Self {
Self {
image: RgbImage::new(0, 0),
source: None,
media_type: MediaType::Unknown,
}
}
}
impl std::fmt::Debug for Image {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Image")
.field("Height", &self.height())
.field("Width", &self.width())
.field("MediaType", &self.media_type)
.field("Source", &self.source)
.finish()
}
}
impl std::ops::Deref for Image {
type Target = RgbImage;
fn deref(&self) -> &Self::Target {
&self.image
}
}
impl std::ops::DerefMut for Image {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.image
}
}
impl From<DynamicImage> for Image {
fn from(image: DynamicImage) -> Self {
Self {
image: image.to_rgb8(),
..Default::default()
}
}
}
impl From<GrayImage> for Image {
fn from(image: GrayImage) -> Self {
Self {
image: DynamicImage::from(image).to_rgb8(),
..Default::default()
}
}
}
impl From<RgbImage> for Image {
fn from(image: RgbImage) -> Self {
Self {
image,
..Default::default()
}
}
}
impl From<RgbaImage> for Image {
fn from(image: RgbaImage) -> Self {
Self {
image: DynamicImage::from(image).to_rgb8(),
..Default::default()
}
}
}
impl From<Image> for DynamicImage {
fn from(image: Image) -> Self {
image.into_dyn()
}
}
impl From<Image> for RgbImage {
fn from(image: Image) -> Self {
image.into_rgb8()
}
}
impl From<Image> for RgbaImage {
fn from(image: Image) -> Self {
image.into_rgba8()
}
}
impl Image {
pub fn from_u8s(u8s: &[u8], width: u32, height: u32) -> Result<Self> {
let image = RgbImage::from_raw(width, height, u8s.to_vec())
.ok_or_else(|| anyhow::anyhow!("Failed to create image from raw data: buffer length might not match width * height * 3"))?;
Ok(Self {
image,
..Default::default()
})
}
pub fn try_read<P: AsRef<Path>>(path: P) -> Result<Self> {
let media_type;
let mut path = path.as_ref().to_path_buf();
// try to fetch from hub or local cache
if !path.exists() {
let p = match Hub::default()
.try_fetch(path.to_str().expect("Failed to convert path to str"))
{
Ok(p) => {
media_type = MediaType::Image(Location::Remote);
p
}
Err(err) => {
return Err(anyhow::anyhow!(
"Failed to locate path: {:?} and file also not found in hub. Error: {:?}",
path.display(),
err
));
}
};
path = PathBuf::from(&p);
} else {
media_type = MediaType::Image(Location::Local);
}
let image = image::ImageReader::open(&path)
.map_err(|err| {
anyhow::anyhow!(
"Failed to open image at {:?}. Error: {:?}",
path.display(),
err
)
})?
.with_guessed_format()
.map_err(|err| {
anyhow::anyhow!(
"Failed to make a format guess based on the content: {:?}. Error: {:?}",
path.display(),
err
)
})?
.decode()
.map_err(|err| {
anyhow::anyhow!(
"Failed to decode image at {:?}. Error: {:?}",
path.display(),
err
)
})?;
Ok(Self {
image: image.to_rgb8(),
media_type,
source: Some(path),
})
}
pub fn save<P: AsRef<Path>>(&self, p: P) -> Result<()> {
self.image
.save(p.as_ref())
.map_err(|err| anyhow::anyhow!("Failed to save image: {:?}", err))
}
/// (width, height)
pub fn dimensions(&self) -> (u32, u32) {
self.image.dimensions()
}
pub fn height(&self) -> u32 {
self.image.height()
}
pub fn width(&self) -> u32 {
self.image.width()
}
pub fn size(&self) -> u32 {
self.image.as_raw().len() as u32
}
pub fn to_u32s(&self) -> Vec<u32> {
use rayon::prelude::*;
self.image
.as_raw()
.par_chunks(3)
.map(|c| ((c[0] as u32) << 16) | ((c[1] as u32) << 8) | (c[2] as u32))
.collect()
}
pub fn to_f32s(&self) -> Vec<f32> {
use rayon::prelude::*;
self.image
.as_raw()
.into_par_iter()
.map(|x| *x as f32)
.collect()
}
pub fn to_dyn(&self) -> DynamicImage {
DynamicImage::from(self.image.clone())
}
pub fn to_rgb8(&self) -> RgbImage {
self.image.clone()
}
pub fn to_rgba8(&self) -> RgbaImage {
DynamicImage::from(self.image.clone()).to_rgba8()
}
pub fn to_luma8(&self) -> GrayImage {
DynamicImage::from(self.image.clone()).to_luma8()
}
pub fn into_dyn(self) -> DynamicImage {
DynamicImage::from(self.image)
}
pub fn into_rgb8(self) -> RgbImage {
self.image
}
pub fn into_rgba8(self) -> RgbaImage {
self.into_dyn().to_rgba8()
}
pub fn into_luma8(self) -> GrayImage {
self.into_dyn().to_luma8()
}
pub fn resize(
&self,
tw: u32,
th: u32,
filter: &str,
mode: &ResizeMode,
padding_value: u8,
) -> Result<Self> {
Ok(self
.resize_with_info(tw, th, filter, mode, padding_value)?
.0)
}
pub fn resize_with_info(
&self,
tw: u32,
th: u32,
filter: &str,
mode: &ResizeMode,
padding_value: u8,
) -> Result<(Self, ImageTransformInfo)> {
if tw + th == 0 {
anyhow::bail!("Invalid target height: {} or width: {}.", th, tw);
}
let (w0, h0) = self.dimensions();
let mut trans_info = ImageTransformInfo::default()
.with_width_src(w0)
.with_height_src(h0)
.with_width_dst(tw)
.with_height_dst(th);
if (w0, h0) == (tw, th) {
return Ok((
self.clone(),
trans_info.with_width_scale(1.).with_height_scale(1.),
));
}
let (mut resizer, options) = build_resizer_filter(filter)?;
let x: DynamicImage = self.to_dyn();
if let ResizeMode::FitExact = mode {
let mut dst = FImage::new(tw, th, PixelType::U8x3);
resizer.resize(&x, &mut dst, &options)?;
trans_info = trans_info
.with_height_scale(th as f32 / h0 as f32)
.with_width_scale(tw as f32 / w0 as f32);
Ok((Self::from_u8s(&dst.into_vec(), tw, th)?, trans_info))
} else {
let (w, h) = match mode {
ResizeMode::Letterbox | ResizeMode::FitAdaptive => {
let r = (tw as f32 / w0 as f32).min(th as f32 / h0 as f32);
trans_info = trans_info.with_height_scale(r).with_width_scale(r);
(
(w0 as f32 * r).round() as u32,
(h0 as f32 * r).round() as u32,
)
}
ResizeMode::FitHeight => {
let r = th as f32 / h0 as f32;
trans_info = trans_info.with_height_scale(1.).with_width_scale(r);
((r * w0 as f32).round() as u32, th)
}
ResizeMode::FitWidth => {
let r = tw as f32 / w0 as f32;
trans_info = trans_info.with_height_scale(r).with_width_scale(1.);
(tw, (r * h0 as f32).round() as u32)
}
_ => unreachable!(),
};
let mut dst = FImage::from_vec_u8(
tw,
th,
vec![padding_value; 3 * th as usize * tw as usize],
PixelType::U8x3,
)?;
let (l, t) = if let ResizeMode::Letterbox = mode {
if w == tw {
(0, (th - h) / 2)
} else {
((tw - w) / 2, 0)
}
} else {
(0, 0)
};
let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
resizer.resize(&x, &mut dst_cropped, &options)?;
Ok((Self::from_u8s(&dst.into_vec(), tw, th)?, trans_info))
}
}
pub fn to_ndarray(&self) -> Result<X> {
X::from_shape_vec(
&[self.height() as usize, self.width() as usize, 3],
self.to_f32s(),
)
}
}
pub trait ImageVecExt {
fn into_dyns(self) -> Vec<DynamicImage>;
fn into_images(self) -> Vec<Image>;
}
impl ImageVecExt for Vec<Image> {
fn into_dyns(self) -> Vec<DynamicImage> {
self.into_iter().map(|x| x.into()).collect()
}
fn into_images(self) -> Vec<Image> {
self
}
}
impl ImageVecExt for Vec<DynamicImage> {
fn into_dyns(self) -> Vec<DynamicImage> {
self
}
fn into_images(self) -> Vec<Image> {
self.into_iter().map(|x| x.into()).collect()
}
}

View File

@ -0,0 +1,79 @@
#[derive(aksr::Builder, Clone, PartialEq)]
pub struct InstanceMeta {
uid: usize,
id: Option<usize>,
confidence: Option<f32>,
name: Option<String>,
}
impl Default for InstanceMeta {
fn default() -> Self {
Self {
uid: {
static COUNTER: std::sync::atomic::AtomicUsize =
std::sync::atomic::AtomicUsize::new(1);
COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
},
id: None,
confidence: None,
name: None,
}
}
}
impl std::fmt::Debug for InstanceMeta {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Meta")
.field("uid", &self.uid)
.field("id", &self.id)
.field("name", &self.name)
.field("confidence", &self.confidence)
.finish()
}
}
impl InstanceMeta {
pub fn label(
&self,
show_id: bool,
show_name: bool,
show_conf: bool,
decimal_places: usize,
) -> String {
// Format: #id name confidence. e.g.: #0 Person 0.932
let mut label = String::new();
// id
if let Some(id) = self.id {
if show_id {
label.push('#');
label.push_str(id.to_string().as_str());
}
}
// name
if let Some(name) = &self.name {
if show_name {
let name = if label.is_empty() {
name.to_string()
} else {
format!(" {}", name)
};
label.push_str(&name);
}
}
// confidence
if let Some(confidence) = self.confidence {
if show_conf {
if label.is_empty() {
label.push_str(&format!("{:.decimal_places$}", confidence));
} else {
label.push_str(&format!(" {:.decimal_places$}", confidence));
}
}
}
label
}
}

View File

@ -1,39 +1,161 @@
use aksr::Builder; use aksr::Builder;
use std::ops::{Add, Div, Mul, Sub}; use std::ops::{Add, Div, Mul, Sub};
/// Keypoint 2D. use crate::{InstanceMeta, Style};
#[derive(Builder, PartialEq, Clone)]
/// Represents a keypoint in a 2D space with optional metadata.
#[derive(Builder, Default, Clone)]
pub struct Keypoint { pub struct Keypoint {
x: f32, x: f32,
y: f32, y: f32,
id: isize, meta: InstanceMeta,
confidence: f32, style: Option<Style>,
name: Option<String>,
} }
impl Default for Keypoint { // #[derive(Default, Builder, Clone)]
fn default() -> Self { // pub struct Keypoints {
Self { // keypoints: Vec<Keypoint>,
x: 0., // skeletons: Option<Vec<(usize, usize)>>,
y: 0., // }
confidence: 0.,
id: -1,
name: None,
}
}
}
impl std::fmt::Debug for Keypoint { impl std::fmt::Debug for Keypoint {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Keypoint") f.debug_struct("Keypoint")
.field("xy", &[self.x, self.y]) .field("xy", &[self.x, self.y])
.field("id", &self.id()) .field("uid", &self.meta.uid())
.field("name", &self.name()) .field("id", &self.meta.id())
.field("confidence", &self.confidence()) .field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
.finish() .finish()
} }
} }
impl PartialEq for Keypoint {
fn eq(&self, other: &Self) -> bool {
self.x == other.x && self.y == other.y
}
}
impl From<(f32, f32)> for Keypoint {
fn from((x, y): (f32, f32)) -> Self {
Self {
x,
y,
..Default::default()
}
}
}
impl From<[f32; 2]> for Keypoint {
fn from([x, y]: [f32; 2]) -> Self {
Self {
x,
y,
..Default::default()
}
}
}
impl From<Keypoint> for (f32, f32) {
fn from(Keypoint { x, y, .. }: Keypoint) -> Self {
(x, y)
}
}
impl From<Keypoint> for [f32; 2] {
fn from(Keypoint { x, y, .. }: Keypoint) -> Self {
[x, y]
}
}
impl Keypoint {
pub fn with_uid(mut self, uid: usize) -> Self {
self.meta = self.meta.with_uid(uid);
self
}
pub fn with_id(mut self, id: usize) -> Self {
self.meta = self.meta.with_id(id);
self
}
pub fn with_name(mut self, name: &str) -> Self {
self.meta = self.meta.with_name(name);
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.meta = self.meta.with_confidence(confidence);
self
}
pub fn uid(&self) -> usize {
self.meta.uid()
}
pub fn name(&self) -> Option<&str> {
self.meta.name()
}
pub fn confidence(&self) -> Option<f32> {
self.meta.confidence()
}
pub fn id(&self) -> Option<usize> {
self.meta.id()
}
}
impl Keypoint {
pub fn new(x: f32, y: f32) -> Self {
Self {
x,
y,
..Default::default()
}
}
pub fn with_xy(mut self, x: f32, y: f32) -> Self {
self.x = x;
self.y = y;
self
}
pub fn xy(&self) -> (f32, f32) {
(self.x, self.y)
}
pub fn is_origin(&self) -> bool {
self.x == 0.0_f32 && self.y == 0.0_f32
}
pub fn distance_from(&self, other: &Self) -> f32 {
((self.x - other.x).powf(2.0) + (self.y - other.y).powf(2.0)).sqrt()
}
pub fn distance_from_origin(&self) -> f32 {
(self.x.powf(2.0) + self.y.powf(2.0)).sqrt()
}
pub fn sum(&self) -> f32 {
self.x + self.y
}
/// Calculates the perpendicular distance from the current keypoint (`self`)
/// to a line segment defined by two other keypoints (`start` and `end`).
pub fn perpendicular_distance(&self, start: &Self, end: &Self) -> f32 {
let numerator = ((end.y - start.y) * self.x - (end.x - start.x) * self.y + end.x * start.y
- end.y * start.x)
.abs();
let denominator = ((end.y - start.y).powi(2) + (end.x - start.x).powi(2)).sqrt();
numerator / denominator
}
pub fn cross(&self, other: &Keypoint) -> f32 {
self.x * other.y - self.y * other.x
}
}
impl Add for Keypoint { impl Add for Keypoint {
type Output = Self; type Output = Self;
@ -130,119 +252,6 @@ impl Div<f32> for Keypoint {
} }
} }
impl From<(f32, f32)> for Keypoint {
fn from((x, y): (f32, f32)) -> Self {
Self {
x,
y,
..Default::default()
}
}
}
impl From<[f32; 2]> for Keypoint {
fn from([x, y]: [f32; 2]) -> Self {
Self {
x,
y,
..Default::default()
}
}
}
impl From<(f32, f32, isize)> for Keypoint {
fn from((x, y, id): (f32, f32, isize)) -> Self {
Self {
x,
y,
id,
confidence: 1.,
..Default::default()
}
}
}
impl From<(f32, f32, isize, f32)> for Keypoint {
fn from((x, y, id, confidence): (f32, f32, isize, f32)) -> Self {
Self {
x,
y,
id,
confidence,
..Default::default()
}
}
}
impl From<Keypoint> for (f32, f32) {
fn from(Keypoint { x, y, .. }: Keypoint) -> Self {
(x, y)
}
}
impl From<Keypoint> for [f32; 2] {
fn from(Keypoint { x, y, .. }: Keypoint) -> Self {
[x, y]
}
}
impl Keypoint {
pub fn with_xy(mut self, x: f32, y: f32) -> Self {
self.x = x;
self.y = y;
self
}
pub fn label(&self, with_name: bool, with_conf: bool, decimal_places: usize) -> String {
let mut label = String::new();
if with_name {
label.push_str(
&self
.name
.as_ref()
.unwrap_or(&self.id.to_string())
.to_string(),
);
}
if with_conf {
if with_name {
label.push_str(&format!(": {:.decimal_places$}", self.confidence));
} else {
label.push_str(&format!("{:.decimal_places$}", self.confidence));
}
}
label
}
pub fn is_origin(&self) -> bool {
self.x == 0.0_f32 && self.y == 0.0_f32
}
pub fn distance_from(&self, other: &Keypoint) -> f32 {
((self.x - other.x).powf(2.0) + (self.y - other.y).powf(2.0)).sqrt()
}
pub fn distance_from_origin(&self) -> f32 {
(self.x.powf(2.0) + self.y.powf(2.0)).sqrt()
}
pub fn sum(&self) -> f32 {
self.x + self.y
}
pub fn perpendicular_distance(&self, start: &Keypoint, end: &Keypoint) -> f32 {
let numerator = ((end.y - start.y) * self.x - (end.x - start.x) * self.y + end.x * start.y
- end.y * start.x)
.abs();
let denominator = ((end.y - start.y).powi(2) + (end.x - start.x).powi(2)).sqrt();
numerator / denominator
}
pub fn cross(&self, other: &Keypoint) -> f32 {
self.x * other.y - self.y * other.x
}
}
#[cfg(test)] #[cfg(test)]
mod tests_keypoint { mod tests_keypoint {
use super::Keypoint; use super::Keypoint;
@ -258,9 +267,9 @@ mod tests_keypoint {
assert_eq!(kpt1, kpt2); assert_eq!(kpt1, kpt2);
assert_eq!(kpt2, kpt3); assert_eq!(kpt2, kpt3);
assert_eq!(kpt3, kpt4); assert_eq!(kpt3, kpt4);
assert_eq!(kpt6, kpt5);
assert_eq!(kpt5.x(), 5.5); assert_eq!(kpt5.x(), 5.5);
assert_eq!(kpt5.y(), 6.6); assert_eq!(kpt5.y(), 6.6);
assert_eq!(kpt6, kpt5);
} }
#[test] #[test]
@ -314,7 +323,7 @@ mod tests_keypoint {
} }
#[test] #[test]
fn functions() { fn fns() {
assert!(Keypoint::from([0., 0.]).is_origin()); assert!(Keypoint::from([0., 0.]).is_origin());
assert!(!Keypoint::from([0., 0.1]).is_origin()); assert!(!Keypoint::from([0., 0.1]).is_origin());
let kpt1 = Keypoint::from((0., 0.)); let kpt1 = Keypoint::from((0., 0.));

142
src/inference/mask.rs Normal file
View File

@ -0,0 +1,142 @@
use aksr::Builder;
use anyhow::Result;
use image::GrayImage;
use rayon::prelude::*;
use crate::{InstanceMeta, Polygon, Style};
/// Mask: Gray Image.
#[derive(Builder, Default, Clone)]
pub struct Mask {
mask: GrayImage,
meta: InstanceMeta,
style: Option<Style>,
}
// #[derive(Builder, Default, Clone)]
// pub struct Masks(Vec<Mask>);
impl std::fmt::Debug for Mask {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Mask")
.field("dimensions", &self.dimensions())
.field("uid", &self.meta.uid())
.field("id", &self.meta.id())
.field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
.finish()
}
}
impl PartialEq for Mask {
fn eq(&self, other: &Self) -> bool {
self.mask == other.mask
}
}
impl Mask {
pub fn new(u8s: &[u8], width: u32, height: u32) -> Result<Self> {
let mask: image::ImageBuffer<image::Luma<_>, Vec<_>> =
image::ImageBuffer::from_raw(width, height, u8s.to_vec())
.ok_or(anyhow::anyhow!("Failed to build ImageBuffer."))?;
Ok(Self {
mask,
..Default::default()
})
}
pub fn to_vec(&self) -> Vec<u8> {
self.mask.to_vec()
}
pub fn height(&self) -> u32 {
self.mask.height()
}
pub fn width(&self) -> u32 {
self.mask.width()
}
pub fn dimensions(&self) -> (u32, u32) {
self.mask.dimensions()
}
pub fn polygon(&self) -> Option<Polygon> {
let polygons = self.polygons();
if polygons.is_empty() {
return None;
}
polygons
.into_iter()
.max_by(|x, y| x.area().total_cmp(&y.area()))
}
pub fn polygons(&self) -> Vec<Polygon> {
let contours: Vec<imageproc::contours::Contour<i32>> =
imageproc::contours::find_contours_with_threshold(self.mask(), 0);
let polygons: Vec<Polygon> = contours
.into_par_iter()
.filter_map(|contour| {
if contour.border_type == imageproc::contours::BorderType::Hole
&& contour.points.len() <= 2
{
return None;
}
let mut polygon = Polygon::default()
.with_points_imageproc(&contour.points)
.verify();
if let Some(x) = self.name() {
polygon = polygon.with_name(x);
}
if let Some(x) = self.id() {
polygon = polygon.with_id(x);
}
if let Some(x) = self.confidence() {
polygon = polygon.with_confidence(x);
}
Some(polygon)
})
.collect();
polygons
}
}
impl Mask {
pub fn with_uid(mut self, uid: usize) -> Self {
self.meta = self.meta.with_uid(uid);
self
}
pub fn with_id(mut self, id: usize) -> Self {
self.meta = self.meta.with_id(id);
self
}
pub fn with_name(mut self, name: &str) -> Self {
self.meta = self.meta.with_name(name);
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.meta = self.meta.with_confidence(confidence);
self
}
pub fn uid(&self) -> usize {
self.meta.uid()
}
pub fn name(&self) -> Option<&str> {
self.meta.name()
}
pub fn confidence(&self) -> Option<f32> {
self.meta.confidence()
}
pub fn id(&self) -> Option<usize> {
self.meta.id()
}
}

34
src/inference/mod.rs Normal file
View File

@ -0,0 +1,34 @@
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
mod engine;
mod hbb;
mod image;
mod instance_meta;
mod keypoint;
mod mask;
mod obb;
mod polygon;
mod prob;
mod skeleton;
mod x;
mod xs;
mod y;
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
#[allow(clippy::all)]
pub(crate) mod onnx {
include!(concat!(env!("OUT_DIR"), "/onnx.rs"));
}
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
pub use engine::*;
pub use hbb::*;
pub use image::*;
pub use instance_meta::*;
pub use keypoint::*;
pub use mask::*;
pub use obb::*;
pub use polygon::*;
pub use prob::*;
pub use skeleton::*;
pub use x::X;
pub use xs::Xs;
pub use y::*;

222
src/inference/obb.rs Normal file
View File

@ -0,0 +1,222 @@
use aksr::Builder;
use crate::{Hbb, InstanceMeta, Keypoint, Polygon, Style};
#[derive(Builder, Default, Clone, PartialEq)]
pub struct Obb {
vertices: [[f32; 2]; 4], // ordered
meta: InstanceMeta,
style: Option<Style>,
keypoints: Option<Vec<Keypoint>>,
}
impl std::fmt::Debug for Obb {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Obb")
.field("uid", &self.meta.uid())
.field("id", &self.meta.id())
.field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
.finish()
}
}
impl From<[[f32; 2]; 4]> for Obb {
fn from(vertices: [[f32; 2]; 4]) -> Self {
Self {
vertices,
..Default::default()
}
}
}
impl From<Vec<[f32; 2]>> for Obb {
fn from(vertices: Vec<[f32; 2]>) -> Self {
// Self::from(vertices[..4])
let vertices = [vertices[0], vertices[1], vertices[2], vertices[3]];
Self {
vertices,
..Default::default()
}
}
}
impl From<Obb> for [[f32; 2]; 4] {
fn from(obb: Obb) -> Self {
obb.vertices
}
}
impl Obb {
/// Build from (cx, cy, width, height, degrees)
pub fn from_cxcywhd(cx: f32, cy: f32, w: f32, h: f32, d: f32) -> Self {
Self::from_cxcywhr(cx, cy, w, h, d.to_radians())
}
/// Build from (cx, cy, width, height, radians)
pub fn from_cxcywhr(cx: f32, cy: f32, w: f32, h: f32, r: f32) -> Self {
// [[cos -sin], [sin cos]]
let m = [
[r.cos() * 0.5 * w, -r.sin() * 0.5 * h],
[r.sin() * 0.5 * w, r.cos() * 0.5 * h],
];
let c = [cx, cy];
let a_ = [m[0][0] + m[0][1], m[1][0] + m[1][1]];
let b_ = [m[0][0] - m[0][1], m[1][0] - m[1][1]];
let v1 = [c[0] + a_[0], c[1] + a_[1]];
let v2 = [c[0] + b_[0], c[1] + b_[1]];
let v3 = [c[0] * 2. - v1[0], c[1] * 2. - v1[1]];
let v4 = [c[0] * 2. - v2[0], c[1] * 2. - v2[1]];
Self {
vertices: [v1, v2, v3, v4],
..Default::default()
}
}
pub fn top(&self) -> [f32; 2] {
let mut top = self.vertices[0];
for v in &self.vertices {
if v[1] < top[1] {
top = *v;
}
}
top
}
pub fn bottom(&self) -> [f32; 2] {
let mut bottom = self.vertices[0];
for v in &self.vertices {
if v[1] > bottom[1] {
bottom = *v;
}
}
bottom
}
pub fn left(&self) -> [f32; 2] {
let mut left = self.vertices[0];
for v in &self.vertices {
if v[0] < left[0] {
left = *v;
}
}
left
}
pub fn right(&self) -> [f32; 2] {
let mut right = self.vertices[0];
for v in &self.vertices {
if v[0] > right[0] {
right = *v;
}
}
right
}
pub fn to_polygon(&self) -> Polygon {
Polygon::from_xys(&self.vertices)
}
pub fn area(&self) -> f32 {
self.to_polygon().area() as f32
}
pub fn intersect(&self, other: &Self) -> f32 {
let pa = self.to_polygon();
let pb = other.to_polygon();
pa.intersect(&pb)
}
pub fn union(&self, other: &Self) -> f32 {
let pa = self.to_polygon();
let pb = other.to_polygon();
pa.union(&pb)
}
pub fn iou(&self, other: &Self) -> f32 {
self.intersect(other) / self.union(other)
}
pub fn hbb() -> Hbb {
todo!()
}
}
impl Obb {
pub fn with_uid(mut self, uid: usize) -> Self {
self.meta = self.meta.with_uid(uid);
self
}
pub fn with_id(mut self, id: usize) -> Self {
self.meta = self.meta.with_id(id);
self
}
pub fn with_name(mut self, name: &str) -> Self {
self.meta = self.meta.with_name(name);
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.meta = self.meta.with_confidence(confidence);
self
}
pub fn uid(&self) -> usize {
self.meta.uid()
}
pub fn name(&self) -> Option<&str> {
self.meta.name()
}
pub fn confidence(&self) -> Option<f32> {
self.meta.confidence()
}
pub fn id(&self) -> Option<usize> {
self.meta.id()
}
}
#[cfg(test)]
mod tests_mbr {
// use crate::Nms;
use super::Obb;
#[test]
fn iou1() {
let a = Obb::from([[0., 0.], [0., 2.], [2., 2.], [2., 0.]]);
let b = Obb::from_cxcywhd(1., 1., 2., 2., 0.);
assert_eq!(a.iou(&b), 1.0);
}
#[test]
fn iou2() {
let a = Obb::from([[2.5, 5.], [-2.5, 5.], [-2.5, -5.], [2.5, -5.]]);
let b = Obb::from_cxcywhd(0., 0., 10., 5., 90.);
assert_eq!(a.iou(&b), 1.0);
}
#[test]
fn intersect() {
let a = Obb::from_cxcywhr(0., 0., 2.828427, 2.828427, 45.);
let b = Obb::from_cxcywhr(1., 1., 2., 2., 0.);
assert_eq!(a.intersect(&b).round(), 2.);
}
#[test]
fn union() {
let a = Obb::from([[2., 0.], [0., 2.], [-2., 0.], [0., -2.]]);
let b = Obb::from([[0., 0.], [2., 0.], [2., 2.], [0., 2.]]);
assert_eq!(a.union(&b), 10.);
}
#[test]
fn iou() {
let a = Obb::from([[2., 0.], [0., 2.], [-2., 0.], [0., -2.]]);
let b = Obb::from([[0., 0.], [2., 0.], [2., 2.], [0., 2.]]);
assert_eq!(a.iou(&b), 0.2);
}
}

View File

@ -1,27 +1,25 @@
use aksr::Builder; use aksr::Builder;
use geo::{ use geo::{
coord, point, polygon, Area, BoundingRect, Centroid, ConvexHull, Euclidean, Length, LineString, coord, point, polygon, Area, BooleanOps, Centroid, ConvexHull, Euclidean, Length, LineString,
MinimumRotatedRect, Point, Simplify, Point, Simplify,
}; };
use crate::{Bbox, Mbr}; use crate::{Hbb, InstanceMeta, Mask, Obb, Style};
/// Polygon. /// Polygon.
#[derive(Builder, Clone, PartialEq)] #[derive(Builder, Clone)]
pub struct Polygon { pub struct Polygon {
polygon: geo::Polygon, polygon: geo::Polygon, // TODO: Vec<[f32; 2]>
id: isize, meta: InstanceMeta,
name: Option<String>, style: Option<Style>,
confidence: f32,
} }
impl Default for Polygon { impl Default for Polygon {
fn default() -> Self { fn default() -> Self {
Self { Self {
polygon: polygon![], polygon: polygon![],
id: -1, meta: InstanceMeta::default(),
name: None, style: None,
confidence: 0.,
} }
} }
} }
@ -29,16 +27,21 @@ impl Default for Polygon {
impl std::fmt::Debug for Polygon { impl std::fmt::Debug for Polygon {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Polygon") f.debug_struct("Polygon")
.field("perimeter", &self.perimeter())
.field("area", &self.area())
.field("count", &self.count()) .field("count", &self.count())
.field("id", &self.id) .field("uid", &self.meta.uid())
.field("name", &self.name) .field("id", &self.meta.id())
.field("confidence", &self.confidence) .field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
.finish() .finish()
} }
} }
impl PartialEq for Polygon {
fn eq(&self, other: &Self) -> bool {
self.polygon == other.polygon
}
}
impl Polygon { impl Polygon {
pub fn with_points_imageproc(mut self, points: &[imageproc::point::Point<i32>]) -> Self { pub fn with_points_imageproc(mut self, points: &[imageproc::point::Point<i32>]) -> Self {
// exterior // exterior
@ -50,6 +53,20 @@ impl Polygon {
self self
} }
pub fn from_xys(xys: &[[f32; 2]]) -> Self {
// exterior
let v = xys
.iter()
.map(|p| coord! { x: p[0] as f64, y: p[1] as f64})
.collect::<Vec<_>>();
let polygon = geo::Polygon::new(LineString::from(v), vec![]);
Self {
polygon,
..Default::default()
}
}
pub fn with_points(mut self, points: &[Vec<f32>]) -> Self { pub fn with_points(mut self, points: &[Vec<f32>]) -> Self {
// exterior // exterior
let v = points let v = points
@ -60,27 +77,6 @@ impl Polygon {
self self
} }
pub fn label(&self, with_name: bool, with_conf: bool, decimal_places: usize) -> String {
let mut label = String::new();
if with_name {
label.push_str(
&self
.name
.as_ref()
.unwrap_or(&self.id.to_string())
.to_string(),
);
}
if with_conf {
if with_name {
label.push_str(&format!(": {:.decimal_places$}", self.confidence));
} else {
label.push_str(&format!("{:.decimal_places$}", self.confidence));
}
}
label
}
pub fn is_closed(&self) -> bool { pub fn is_closed(&self) -> bool {
self.polygon.exterior().is_closed() self.polygon.exterior().is_closed()
} }
@ -90,7 +86,6 @@ impl Polygon {
} }
pub fn perimeter(&self) -> f64 { pub fn perimeter(&self) -> f64 {
// use the `line.length::<Euclidean>()` via the `Length` trait instead.
Euclidean.length(self.polygon.exterior()) Euclidean.length(self.polygon.exterior())
} }
@ -104,9 +99,30 @@ impl Polygon {
.map(|x| (x.x() as f32, x.y() as f32)) .map(|x| (x.x() as f32, x.y() as f32))
} }
pub fn bbox(&self) -> Option<Bbox> { pub fn intersect(&self, other: &Self) -> f32 {
self.polygon.intersection(&other.polygon).unsigned_area() as f32
}
pub fn union(&self, other: &Self) -> f32 {
self.polygon.union(&other.polygon).unsigned_area() as f32
}
pub fn points(&self) -> Vec<[f32; 2]> {
self.polygon
.exterior()
.coords()
.map(|c| [c.x as f32, c.y as f32])
.collect::<Vec<_>>()
}
pub fn mask(&self) -> Mask {
todo!()
}
pub fn hbb(&self) -> Option<Hbb> {
use geo::BoundingRect;
self.polygon.bounding_rect().map(|x| { self.polygon.bounding_rect().map(|x| {
Bbox::default().with_xyxy( Hbb::default().with_xyxy(
x.min().x as f32, x.min().x as f32,
x.min().y as f32, x.min().y as f32,
x.max().x as f32, x.max().x as f32,
@ -115,9 +131,18 @@ impl Polygon {
}) })
} }
pub fn mbr(&self) -> Option<Mbr> { pub fn obb(&self) -> Option<Obb> {
MinimumRotatedRect::minimum_rotated_rect(&self.polygon) use geo::MinimumRotatedRect;
.map(|x| Mbr::from_line_string(x.exterior().to_owned())) MinimumRotatedRect::minimum_rotated_rect(&self.polygon).map(|x| {
let xy4 = x
.exterior()
.coords()
// .iter()
.map(|c| [c.x as f32, c.y as f32])
.collect::<Vec<_>>();
Obb::from(xy4)
})
} }
pub fn convex_hull(mut self) -> Self { pub fn convex_hull(mut self) -> Self {
@ -220,3 +245,40 @@ impl Polygon {
xs.retain(|point| seen.insert((point.x() as i32, point.y() as i32))); xs.retain(|point| seen.insert((point.x() as i32, point.y() as i32)));
} }
} }
impl Polygon {
pub fn with_uid(mut self, uid: usize) -> Self {
self.meta = self.meta.with_uid(uid);
self
}
pub fn with_id(mut self, id: usize) -> Self {
self.meta = self.meta.with_id(id);
self
}
pub fn with_name(mut self, name: &str) -> Self {
self.meta = self.meta.with_name(name);
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.meta = self.meta.with_confidence(confidence);
self
}
pub fn uid(&self) -> usize {
self.meta.uid()
}
pub fn name(&self) -> Option<&str> {
self.meta.name()
}
pub fn confidence(&self) -> Option<f32> {
self.meta.confidence()
}
pub fn id(&self) -> Option<usize> {
self.meta.id()
}
}

78
src/inference/prob.rs Normal file
View File

@ -0,0 +1,78 @@
use aksr::Builder;
use crate::{InstanceMeta, Style};
#[derive(Builder, Clone, PartialEq, Default, Debug)]
pub struct Prob {
meta: InstanceMeta,
style: Option<Style>,
}
// #[derive(Builder, Clone, PartialEq, Default, Debug)]
// pub struct Probs(#[args(aka = "probs")] Vec<Prob>);
impl Prob {
pub fn new_probs(probs: &[f32], names: Option<&[&str]>, k: usize) -> Vec<Self> {
let mut pairs: Vec<(usize, f32)> = probs
.iter()
.enumerate()
.map(|(id, &prob)| (id, prob))
.collect();
pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
pairs.truncate(k);
pairs
.into_iter()
.map(|(id, confidence)| {
let mut meta = InstanceMeta::default()
.with_id(id)
.with_confidence(confidence);
if let Some(names) = names {
if id < names.len() {
meta = meta.with_name(names[id]);
}
}
Prob::default().with_meta(meta)
})
.collect()
}
pub fn with_uid(mut self, uid: usize) -> Self {
self.meta = self.meta.with_uid(uid);
self
}
pub fn with_id(mut self, id: usize) -> Self {
self.meta = self.meta.with_id(id);
self
}
pub fn with_name(mut self, name: &str) -> Self {
self.meta = self.meta.with_name(name);
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.meta = self.meta.with_confidence(confidence);
self
}
pub fn uid(&self) -> usize {
self.meta.uid()
}
pub fn name(&self) -> Option<&str> {
self.meta.name()
}
pub fn confidence(&self) -> Option<f32> {
self.meta.confidence()
}
pub fn id(&self) -> Option<usize> {
self.meta.id()
}
}

130
src/inference/skeleton.rs Normal file
View File

@ -0,0 +1,130 @@
use crate::Color;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Connection {
pub indices: (usize, usize),
pub color: Option<Color>,
}
impl From<(usize, usize)> for Connection {
fn from(indices: (usize, usize)) -> Self {
Self {
indices,
color: None,
}
}
}
impl From<(usize, usize, Color)> for Connection {
fn from((a, b, color): (usize, usize, Color)) -> Self {
Self {
indices: (a, b),
color: Some(color),
}
}
}
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Skeleton {
pub connections: Vec<Connection>,
}
impl std::ops::Deref for Skeleton {
type Target = Vec<Connection>;
fn deref(&self) -> &Self::Target {
&self.connections
}
}
impl Skeleton {
pub fn with_connections<C: Into<Connection> + Clone>(mut self, connections: &[C]) -> Self {
self.connections = connections.iter().cloned().map(|c| c.into()).collect();
self
}
pub fn with_colors(mut self, colors: &[Color]) -> Self {
for (i, connection) in self.connections.iter_mut().enumerate() {
if i < colors.len() {
connection.color = Some(colors[i]);
}
}
self
}
}
impl From<&[(usize, usize)]> for Skeleton {
fn from(connections: &[(usize, usize)]) -> Self {
Self {
connections: connections.iter().map(|&c| c.into()).collect(),
}
}
}
impl<const N: usize> From<[(usize, usize); N]> for Skeleton {
fn from(arr: [(usize, usize); N]) -> Self {
Self::from(arr.as_slice())
}
}
impl From<(&[(usize, usize)], &[Color])> for Skeleton {
fn from((connections, colors): (&[(usize, usize)], &[Color])) -> Self {
Self {
connections: connections
.iter()
.zip(colors.iter())
.map(|(&(a, b), &c)| (a, b, c).into())
.collect(),
}
}
}
impl<const N: usize> From<([(usize, usize); N], [Color; N])> for Skeleton {
fn from((connections, colors): ([(usize, usize); N], [Color; N])) -> Self {
Skeleton::from((&connections[..], &colors[..]))
}
}
pub const SKELETON_COCO_19: [(usize, usize); 19] = [
(15, 13),
(13, 11),
(16, 14),
(14, 12),
(11, 12),
(5, 11),
(6, 12),
(5, 6),
(5, 7),
(6, 8),
(7, 9),
(8, 10),
(1, 2),
(0, 1),
(0, 2),
(1, 3),
(2, 4),
(3, 5),
(4, 6),
];
pub const SKELETON_COLOR_COCO_19: [Color; 19] = [
Color(0x3399ffff),
Color(0x3399ffff),
Color(0x3399ffff),
Color(0x3399ffff),
Color(0xff33ffff),
Color(0xff33ffff),
Color(0xff33ffff),
Color(0xff8000ff),
Color(0xff8000ff),
Color(0xff8000ff),
Color(0xff8000ff),
Color(0xff8000ff),
Color(0x00ff00ff),
Color(0x00ff00ff),
Color(0x00ff00ff),
Color(0x00ff00ff),
Color(0x00ff00ff),
Color(0x00ff00ff),
Color(0x00ff00ff),
];

View File

@ -4,13 +4,12 @@ use image::DynamicImage;
use std::collections::HashMap; use std::collections::HashMap;
use std::ops::{Deref, Index}; use std::ops::{Deref, Index};
use crate::{string_random, X}; use crate::{generate_random_string, X};
#[derive(Builder, Debug, Default, Clone)] #[derive(Builder, Debug, Default, Clone)]
pub struct Xs { pub struct Xs {
map: HashMap<String, X>, map: HashMap<String, X>,
names: Vec<String>, names: Vec<String>,
// TODO: move to Processor // TODO: move to Processor
pub images: Vec<Vec<DynamicImage>>, pub images: Vec<Vec<DynamicImage>>,
pub texts: Vec<Vec<DynamicImage>>, pub texts: Vec<Vec<DynamicImage>>,
@ -51,7 +50,7 @@ impl Xs {
pub fn push(&mut self, value: X) { pub fn push(&mut self, value: X) {
loop { loop {
let key = string_random(5); let key = generate_random_string(5);
if !self.map.contains_key(&key) { if !self.map.contains_key(&key) {
self.names.push(key.to_string()); self.names.push(key.to_string());
self.map.insert(key.to_string(), value); self.map.insert(key.to_string(), value);

66
src/inference/y.rs Normal file
View File

@ -0,0 +1,66 @@
use aksr::Builder;
use crate::{Hbb, Keypoint, Mask, Obb, Polygon, Prob};
/// Container for inference results for each image.
///
/// This struct holds various possible outputs from an image inference process,
/// including probabilities, bounding boxes, keypoints, minimum bounding rectangles,
/// polygons, masks, text annotations, and embeddings.
///
#[derive(Builder, Clone, Default)]
pub struct Y {
texts: Option<Vec<String>>,
probs: Option<Vec<Prob>>,
keypoints: Option<Vec<Keypoint>>,
keypointss: Option<Vec<Vec<Keypoint>>>,
hbbs: Option<Vec<Hbb>>,
obbs: Option<Vec<Obb>>,
polygons: Option<Vec<Polygon>>,
masks: Option<Vec<Mask>>,
}
impl std::fmt::Debug for Y {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut f = f.debug_struct("Y");
if let Some(xs) = &self.texts {
if !xs.is_empty() {
f.field("Texts", &xs);
}
}
if let Some(xs) = &self.probs {
f.field("Probs", &xs);
}
if let Some(xs) = &self.hbbs {
if !xs.is_empty() {
f.field("Hbbs", &xs);
}
}
if let Some(xs) = &self.obbs {
if !xs.is_empty() {
f.field("Obbs", &xs);
}
}
if let Some(xs) = &self.keypoints {
if !xs.is_empty() {
f.field("Kpts", &xs);
}
}
if let Some(xs) = &self.keypointss {
if !xs.is_empty() {
f.field("Kptss", &xs);
}
}
if let Some(xs) = &self.polygons {
if !xs.is_empty() {
f.field("Polys", &xs);
}
}
if let Some(xs) = &self.masks {
if !xs.is_empty() {
f.field("Masks", &xs);
}
}
f.finish()
}
}

635
src/io/dataloader.rs Normal file
View File

@ -0,0 +1,635 @@
use anyhow::{anyhow, Result};
use glob::{glob_with, MatchOptions};
use indicatif::{ProgressBar, ProgressStyle};
use log::{info, warn};
use rayon::prelude::*;
use std::collections::VecDeque;
use std::path::{Path, PathBuf};
use std::sync::mpsc;
#[cfg(feature = "video")]
use video_rs::{Decoder, Url};
use crate::{Image, Location, MediaType};
/// A structure designed to load and manage image, video, or stream data.
pub struct DataLoader {
/// Queue of paths for images.
paths: Option<VecDeque<PathBuf>>,
/// Media type of the source (image, video, stream, etc.).
media_type: MediaType,
/// Batch size for iteration, determining how many files are processed at once.
batch_size: usize,
/// Buffer size for the channel, used to manage the buffer between producer and consumer.
bound: Option<usize>,
/// Receiver for processed data.
receiver: mpsc::Receiver<Vec<Image>>,
/// Video decoder for handling video or stream data.
#[cfg(feature = "video")]
decoder: Option<video_rs::decode::Decoder>,
/// Number of images or frames; `u64::MAX` is used for live streams (indicating no limit).
nf: u64,
/// Number of frames to be skipped.
#[cfg(feature = "video")]
nf_skip: u64,
/// Progress bar for displaying iteration progress.
progress_bar: Option<ProgressBar>,
/// Display progress bar or not.
with_progress_bar: bool,
}
impl Default for DataLoader {
fn default() -> Self {
DataLoader {
paths: None,
media_type: Default::default(),
nf: 0,
batch_size: 1,
#[cfg(feature = "video")]
nf_skip: 0,
bound: None,
receiver: mpsc::sync_channel(0).1,
progress_bar: None,
with_progress_bar: false,
#[cfg(feature = "video")]
decoder: None,
}
}
}
impl std::fmt::Debug for DataLoader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DataLoader")
.field("paths", &self.paths)
.field("batch_size", &self.batch_size)
.field("nf", &self.nf)
// #[cfg(feature = "video")]
// .field("nf_skip", &self.nf_skip)
.field("media_type", &self.media_type)
.field("bound", &self.bound)
.field("progress_bar", &self.with_progress_bar)
.finish()
}
}
impl TryFrom<&str> for DataLoader {
type Error = anyhow::Error;
fn try_from(source: &str) -> Result<Self, Self::Error> {
Self::new(source)
}
}
impl DataLoader {
pub fn new(source: &str) -> Result<Self> {
// paths & media_type
let (paths, media_type) = Self::try_load_all(source)?;
// Number of frames or stream
#[cfg(feature = "video")]
let mut nf = match media_type {
MediaType::Image(Location::Local) => {
paths.as_ref().unwrap_or(&VecDeque::new()).len() as _
}
MediaType::Image(Location::Remote) | MediaType::Video(_) | MediaType::Stream => 1,
MediaType::Unknown => anyhow::bail!("Could not locate the source: {:?}", source),
_ => unimplemented!(),
};
#[cfg(not(feature = "video"))]
let nf = match media_type {
MediaType::Image(Location::Local) => {
paths.as_ref().unwrap_or(&VecDeque::new()).len() as _
}
MediaType::Image(Location::Remote) | MediaType::Video(_) | MediaType::Stream => 1,
MediaType::Unknown => anyhow::bail!("Could not locate the source: {:?}", source),
_ => unimplemented!(),
};
// video decoder
#[cfg(not(feature = "video"))]
{
match &media_type {
MediaType::Video(Location::Local)
| MediaType::Video(Location::Remote)
| MediaType::Stream => {
anyhow::bail!(
"Video processing requires the features: `video`. \
\nConsider enabling them by passing, e.g., `--features video`"
);
}
_ => {}
};
}
#[cfg(feature = "video")]
let decoder = match &media_type {
MediaType::Video(Location::Local) => Some(Decoder::new(Path::new(source))?),
MediaType::Video(Location::Remote) | MediaType::Stream => {
let location: video_rs::location::Location = source.parse::<Url>()?.into();
Some(Decoder::new(location)?)
}
_ => None,
};
// video & stream frames
#[cfg(feature = "video")]
if let Some(decoder) = &decoder {
nf = match decoder.frames() {
Err(_) => u64::MAX,
Ok(0) => u64::MAX,
Ok(x) => x,
}
}
// info
let info = match &media_type {
MediaType::Image(_) => format!("x{}", nf),
MediaType::Video(_) => format!("x1 ({} frames)", nf),
MediaType::Stream => "x1".to_string(),
_ => unimplemented!(),
};
info!("Found {:?} {}", media_type, info);
Ok(Self {
paths,
media_type,
nf,
#[cfg(feature = "video")]
decoder,
..Default::default()
})
}
pub fn build(mut self) -> Result<Self> {
let (sender, receiver) =
mpsc::sync_channel::<Vec<Image>>(self.bound.unwrap_or(self.batch_size * 10));
self.receiver = receiver;
let batch_size = self.batch_size;
#[cfg(feature = "video")]
let nf_skip = self.nf_skip;
let data = self.paths.take().unwrap_or_default();
let media_type = self.media_type;
#[cfg(feature = "video")]
let decoder = self.decoder.take();
// progress bar
self.progress_bar = if self.with_progress_bar {
crate::build_progress_bar(
self.nf,
"Iterating",
Some(&format!("{:?}", self.media_type)),
"{prefix:>12.cyan.bold} {human_pos}/{human_len} |{bar}| {msg}",
)
.ok()
} else {
None
};
// Spawn the producer thread
std::thread::spawn(move || {
DataLoader::producer_thread(
sender,
data,
batch_size,
#[cfg(feature = "video")]
nf_skip,
media_type,
#[cfg(feature = "video")]
decoder,
);
});
Ok(self)
}
fn producer_thread(
sender: mpsc::SyncSender<Vec<Image>>,
mut data: VecDeque<PathBuf>,
batch_size: usize,
#[cfg(feature = "video")] nf_skip: u64,
media_type: MediaType,
#[cfg(feature = "video")] mut decoder: Option<video_rs::decode::Decoder>,
) {
let mut images: Vec<Image> = Vec::with_capacity(batch_size);
match media_type {
MediaType::Image(_) => {
if data.len() < 8000 {
// TODO: fast but memory inefficient
data.par_iter()
.filter_map(|path| {
Some(
Self::try_read_one(path)
.map_err(|e| warn!("Failed: {:?}, {}", path, e))
.ok()?
.with_media_type(media_type),
)
})
.collect::<Vec<Image>>()
.chunks(batch_size)
.for_each(|chunk| {
if !chunk.is_empty() {
let _ = sender.send(chunk.to_vec());
}
});
} else {
// TODO: slow slow
while let Some(path) = data.pop_front() {
match Self::try_read_one(&path) {
Err(_err) => {
continue;
}
Ok(img) => {
images.push(img.with_media_type(media_type));
}
}
if images.len() == batch_size
&& sender.send(std::mem::take(&mut images)).is_err()
{
break;
}
}
}
}
#[cfg(feature = "video")]
MediaType::Video(_) | MediaType::Stream => {
if let Some(decoder) = decoder.as_mut() {
let (w, h) = decoder.size();
let mut cnt = 0;
for frame in decoder.decode_iter() {
match frame {
Ok((ts, frame)) => {
cnt += 1;
if (cnt - 1) % (nf_skip + 1) != 0 {
continue;
}
let rgb8: image::RgbImage = match image::ImageBuffer::from_raw(
w as _,
h as _,
frame.into_raw_vec_and_offset().0,
) {
Some(x) => x,
None => continue,
};
images.push(
Image::from(rgb8)
.with_media_type(media_type)
.with_source(format!("{:?}", ts).into()),
);
if images.len() == batch_size
&& sender.send(std::mem::take(&mut images)).is_err()
{
break;
}
}
Err(_) => break,
}
}
}
}
_ => unimplemented!(),
}
// Deal with remaining data
if !images.is_empty() && sender.send(images).is_err() {
info!("Receiver dropped, stopping production");
}
}
pub fn imread<P: AsRef<Path>>(path: P) -> Result<Image> {
Image::try_read(path)
}
pub fn try_read_one<P: AsRef<Path>>(path: P) -> Result<Image> {
Image::try_read(path)
}
pub fn try_read_n<P: AsRef<Path> + std::fmt::Debug + Sync>(paths: &[P]) -> Result<Vec<Image>> {
let images: Vec<Image> = paths
.par_iter()
.filter_map(|path| match Self::try_read_one(path) {
Ok(img) => Some(img),
Err(err) => {
log::warn!("Failed to read from: {:?}. Error: {:?}", path, err);
None
}
})
.collect();
Ok(images)
}
pub fn try_read_folder<P: AsRef<Path>>(path: P) -> Result<Vec<Image>> {
let paths: Vec<PathBuf> = Self::load_image_paths_from_folder(
path.as_ref().to_str().unwrap(),
crate::IMAGE_EXTENSIONS,
)?;
let images: Vec<Image> = paths
.par_iter()
.filter_map(|path| Self::try_read_one(path).ok())
.collect();
Ok(images)
}
pub fn try_read_pattern(path: &str) -> Result<Vec<Image>> {
// case sensitive
let paths: Vec<PathBuf> = Self::glob(path, true, true)?;
let images: Vec<Image> = paths
.par_iter()
.filter_map(|path| Self::try_read_one(path).ok())
.collect();
Ok(images)
}
pub fn try_read_pattern_case_insensitive(path: &str) -> Result<Vec<Image>> {
// case insensitive
let paths: Vec<PathBuf> = Self::glob(path, true, false)?;
let images: Vec<Image> = paths
.par_iter()
.filter_map(|path| Self::try_read_one(path).ok())
.collect();
Ok(images)
}
fn load_image_paths_from_folder(source: &str, exts: &[&str]) -> Result<Vec<PathBuf>> {
let source_path = Path::new(source);
let mut paths: Vec<PathBuf> = Vec::new();
let options = MatchOptions {
case_sensitive: false,
require_literal_separator: false,
require_literal_leading_dot: false,
};
for ext in exts.iter() {
let pattern = source_path.join(format!("*.{}", ext));
let paths_: Vec<PathBuf> = glob_with(pattern.to_str().unwrap(), options)?
.filter_map(|entry| entry.ok())
.collect();
paths.extend(paths_);
}
paths.sort_by(|a, b| {
let a = a.file_name().and_then(|s| s.to_str());
let b = b.file_name().and_then(|s| s.to_str());
match (a, b) {
(Some(a), Some(b)) => natord::compare(a, b),
_ => std::cmp::Ordering::Equal,
}
});
Ok(paths)
}
fn glob(pattern: &str, sort: bool, case_sensitive: bool) -> anyhow::Result<Vec<PathBuf>> {
let options = MatchOptions {
case_sensitive,
require_literal_separator: false,
require_literal_leading_dot: false,
};
let mut paths: Vec<PathBuf> = glob_with(pattern, options)?
.filter_map(|entry| entry.ok())
.collect();
if sort {
paths.sort_by(|a, b| {
let a = a.file_name().and_then(|s| s.to_str());
let b = b.file_name().and_then(|s| s.to_str());
match (a, b) {
(Some(a), Some(b)) => natord::compare(a, b),
_ => std::cmp::Ordering::Equal,
}
});
}
Ok(paths)
}
fn try_load_all(source: &str) -> Result<(Option<VecDeque<PathBuf>>, MediaType)> {
// paths & media_type
let is_source_remote = MediaType::is_possible_remote(source);
let source_path = Path::new(source);
let (paths, media_type) = if is_source_remote {
// remote
log::debug!("DataLoader try to load source from remote");
(
Some(VecDeque::from([source_path.to_path_buf()])),
MediaType::from_url(source),
)
} else {
// local
log::debug!("DataLoader try to load source from local");
if source_path.is_file() {
log::debug!("source is file");
// image
(
Some(VecDeque::from([source_path.to_path_buf()])),
MediaType::from_path(source_path),
)
} else if source_path.is_dir() {
// directory
log::debug!("source is directory");
let paths = Self::load_image_paths_from_folder(source, crate::IMAGE_EXTENSIONS)?;
(
Some(VecDeque::from(paths)),
MediaType::Image(Location::Local),
)
} else if glob::Pattern::new(source).is_ok() {
log::debug!("Load source with glob pattern");
// glob
// - case_sensitive: true
// - sort: true
let paths = Self::glob(source, true, true)?;
(
Some(VecDeque::from(paths)),
MediaType::Image(Location::Local),
)
} else {
log::debug!("Source is unknown");
(None, MediaType::Unknown)
}
};
Ok((paths, media_type))
}
pub fn paths(&self) -> Option<&VecDeque<PathBuf>> {
self.paths.as_ref()
}
pub fn with_bound(mut self, x: usize) -> Self {
self.bound = Some(x);
self
}
pub fn with_batch(mut self, x: usize) -> Self {
self.batch_size = x;
self
}
pub fn with_batch_size(mut self, x: usize) -> Self {
self.batch_size = x;
self
}
pub fn nf(&self) -> u64 {
self.nf
}
#[cfg(feature = "video")]
pub fn with_nf_skip(mut self, x: u64) -> Self {
self.nf_skip = x;
self
}
#[cfg(feature = "video")]
pub fn nf_skip(&self) -> u64 {
self.nf_skip
}
pub fn with_progress_bar(mut self, x: bool) -> Self {
self.with_progress_bar = x;
self
}
pub fn iter(&self) -> DataLoaderIter<'_> {
DataLoaderIter {
receiver: &self.receiver,
progress_bar: self.progress_bar.as_ref(),
batch_size: self.batch_size as u64,
}
}
}
trait DataLoaderIterator {
type Receiver;
fn receiver(&self) -> &Self::Receiver;
fn batch_size(&self) -> u64;
fn progress_bar(&self) -> Option<&ProgressBar>;
fn next_impl(
&mut self,
recv_result: Result<Vec<Image>, mpsc::RecvError>,
) -> Option<Vec<Image>> {
match self.progress_bar() {
Some(progress_bar) => match recv_result {
Ok(item) => {
progress_bar.inc(self.batch_size());
Some(item)
}
Err(_) => {
progress_bar.set_prefix("Iterated");
progress_bar.set_style(
ProgressStyle::with_template(
crate::PROGRESS_BAR_STYLE_FINISH_2, // "{prefix:>12.green.bold} {msg} x{human_len} in {elapsed}",
)
.map_err(|e| anyhow!("Style error: {}", e))
.ok()?,
);
progress_bar.finish();
None
}
},
None => recv_result.ok(),
}
}
}
pub struct DataLoaderIntoIterator {
receiver: mpsc::Receiver<Vec<Image>>,
progress_bar: Option<ProgressBar>,
batch_size: u64,
}
impl DataLoaderIterator for DataLoaderIntoIterator {
type Receiver = mpsc::Receiver<Vec<Image>>;
fn receiver(&self) -> &Self::Receiver {
&self.receiver
}
fn batch_size(&self) -> u64 {
self.batch_size
}
fn progress_bar(&self) -> Option<&ProgressBar> {
self.progress_bar.as_ref()
}
}
impl Iterator for DataLoaderIntoIterator {
type Item = Vec<Image>;
fn next(&mut self) -> Option<Self::Item> {
self.next_impl(self.receiver().recv())
}
}
impl IntoIterator for DataLoader {
type Item = Vec<Image>;
type IntoIter = DataLoaderIntoIterator;
fn into_iter(self) -> Self::IntoIter {
DataLoaderIntoIterator {
receiver: self.receiver,
progress_bar: self.progress_bar,
batch_size: self.batch_size as u64,
}
}
}
pub struct DataLoaderIter<'a> {
receiver: &'a mpsc::Receiver<Vec<Image>>,
progress_bar: Option<&'a ProgressBar>,
batch_size: u64,
}
impl DataLoaderIterator for DataLoaderIter<'_> {
type Receiver = mpsc::Receiver<Vec<Image>>;
fn receiver(&self) -> &Self::Receiver {
self.receiver
}
fn batch_size(&self) -> u64 {
self.batch_size
}
fn progress_bar(&self) -> Option<&ProgressBar> {
self.progress_bar
}
}
impl Iterator for DataLoaderIter<'_> {
type Item = Vec<Image>;
fn next(&mut self) -> Option<Self::Item> {
self.next_impl(self.receiver().recv())
}
}
impl<'a> IntoIterator for &'a DataLoader {
type Item = Vec<Image>;
type IntoIter = DataLoaderIter<'a>;
fn into_iter(self) -> Self::IntoIter {
DataLoaderIter {
receiver: &self.receiver,
progress_bar: self.progress_bar.as_ref(),
batch_size: self.batch_size as u64,
}
}
}

145
src/io/dir.rs Normal file
View File

@ -0,0 +1,145 @@
use anyhow::Result;
use std::path::PathBuf;
/// Represents various directories on the system, including Home, Cache, Config, and more.
#[derive(Debug)]
pub enum Dir {
Home,
Cache,
Config,
Current,
}
impl Dir {
/// Returns the raw path for the directory without adding the `crate_name` subdirectory.
///
/// Examples:
/// `~/.cache`, `~/.config`, `~`.
///
pub fn base_dir(&self) -> Result<PathBuf> {
let p = match self {
Dir::Home => dirs::home_dir(),
Dir::Cache => dirs::cache_dir(),
Dir::Config => dirs::config_dir(),
Dir::Current => std::env::current_dir().ok(),
};
let p = p.ok_or_else(|| {
anyhow::anyhow!("Failed to retrieve base path for {:?}. Unsupported operating system. Now supports Linux, MacOS, Windows.", self)
})?;
Ok(p)
}
/// Returns the default path for the `crate_name` directory, creating it automatically if it does not exist.
///
/// Examples:
/// `~/.cache/crate_name`, `~/.config/crate_name`, `~/.crate_name`.
pub fn crate_dir(&self, crate_name: &str) -> Result<PathBuf> {
let mut p = self.base_dir()?;
if let Dir::Home = self {
p.push(format!(".{}", crate_name));
} else {
p.push(crate_name);
}
self.try_create_directory(&p)?;
Ok(p)
}
/// Returns the default path for the `usls` directory, creating it automatically if it does not exist.
///
/// Examples:
/// `~/.cache/usls`, `~/.config/usls`, `~/.usls`.
pub fn crate_dir_default(&self) -> Result<PathBuf> {
self.crate_dir(crate::CRATE_NAME)
}
/// Constructs a path to a specified directory with the provided subdirectories, creating it automatically.
///
/// Examples:
/// `~/.cache/sub1/sub2/sub3`, `~/.config/sub1/sub2`, `~/sub1/sub2`.
///
/// # Arguments
/// * `subs` - A slice of strings representing subdirectories to append.
///
/// # Returns
/// * `Result<PathBuf>` - The resulting directory path.
pub fn base_dir_with_subs(&self, subs: &[&str]) -> anyhow::Result<std::path::PathBuf> {
let mut d = self.base_dir()?;
self.append_subs(&mut d, subs)?;
Ok(d)
}
pub fn base_dir_with_filename(&self, filename: &str) -> anyhow::Result<std::path::PathBuf> {
let d = self.base_dir()?.join(filename);
self.try_create_directory(&d)?;
Ok(d)
}
/// Constructs a path to the `crate_name` directory with the provided subdirectories, creating it automatically.
///
/// Examples:
/// `~/.cache/crate_name/sub1/sub2/sub3`, `~/.config/crate_name/sub1/sub2`, `~/.crate_name/sub1/sub2`.
pub fn crate_dir_with_subs(
&self,
crate_name: &str,
subs: &[&str],
) -> anyhow::Result<std::path::PathBuf> {
let mut d = self.crate_dir(crate_name)?;
self.append_subs(&mut d, subs)?;
Ok(d)
}
/// Constructs a path to the `usls` directory with the provided subdirectories, creating it automatically.
///
/// Examples:
/// `~/.cache/usls/sub1/sub2/sub3`, `~/.config/usls/sub1/sub2`, `~/.usls/sub1/sub2`.
pub fn crate_dir_default_with_subs(&self, subs: &[&str]) -> anyhow::Result<std::path::PathBuf> {
let mut d = self.crate_dir_default()?;
self.append_subs(&mut d, subs)?;
Ok(d)
}
pub fn crate_dir_with_filename(
&self,
crate_name: &str,
filename: &str,
) -> anyhow::Result<std::path::PathBuf> {
let d = self.crate_dir(crate_name)?.join(filename);
self.try_create_directory(&d)?;
Ok(d)
}
pub fn crate_dir_default_with_filename(
&self,
filename: &str,
) -> anyhow::Result<std::path::PathBuf> {
let d = self.crate_dir_default()?.join(filename);
self.try_create_directory(&d)?;
Ok(d)
}
/// Appends subdirectories to the given base path and creates the directories if they don't exist.
fn append_subs(&self, path: &mut std::path::PathBuf, subs: &[&str]) -> anyhow::Result<()> {
for sub in subs {
path.push(sub);
}
self.try_create_directory(path)?;
Ok(())
}
fn try_create_directory<P: AsRef<std::path::Path>>(&self, path: P) -> anyhow::Result<()> {
let path = path.as_ref();
if let Err(err) = std::fs::create_dir_all(path) {
return Err(anyhow::anyhow!(
"Failed to create directory at {:?}: {}",
path,
err
));
}
Ok(())
}
}

View File

@ -1,5 +1,5 @@
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::ProgressStyle;
use regex::Regex; use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
@ -7,11 +7,11 @@ use std::io::{Read, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::time::Duration; use std::time::Duration;
use crate::{retry, Dir, PREFIX_LENGTH}; use crate::{retry, Dir};
/// Represents a downloadable asset in a release /// Represents a downloadable asset in a release
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Asset { pub(crate) struct Asset {
pub name: String, pub name: String,
pub browser_download_url: String, pub browser_download_url: String,
pub size: u64, pub size: u64,
@ -19,7 +19,7 @@ pub struct Asset {
/// Represents a GitHub release /// Represents a GitHub release
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Release { pub(crate) struct Release {
pub tag_name: String, pub tag_name: String,
pub assets: Vec<Asset>, pub assets: Vec<Asset>,
} }
@ -46,14 +46,14 @@ pub struct Release {
/// ## 1. Download from a default GitHub release /// ## 1. Download from a default GitHub release
/// Download a file by specifying its path relative to the release: /// Download a file by specifying its path relative to the release:
/// ```rust,ignore /// ```rust,ignore
/// let path = usls::Hub::default().try_fetch("images/bus.jpg")?; /// let path = Hub::default().try_fetch("images/bus.jpg")?;
/// println!("Fetched image to: {:?}", path); /// println!("Fetched image to: {:?}", path);
/// ``` /// ```
/// ///
/// ## 2. Download from a specific GitHub release URL /// ## 2. Download from a specific GitHub release URL
/// Fetch a file directly using its full GitHub release URL: /// Fetch a file directly using its full GitHub release URL:
/// ```rust,ignore /// ```rust,ignore
/// let path = usls::Hub::default() /// let path = Hub::default()
/// .try_fetch("https://github.com/jamjamjon/assets/releases/download/images/bus.jpg")?; /// .try_fetch("https://github.com/jamjamjon/assets/releases/download/images/bus.jpg")?;
/// println!("Fetched file to: {:?}", path); /// println!("Fetched file to: {:?}", path);
/// ``` /// ```
@ -61,7 +61,7 @@ pub struct Release {
/// ## 3. Fetch available tags and files in a repository /// ## 3. Fetch available tags and files in a repository
/// List all release tags and the files associated with each tag: /// List all release tags and the files associated with each tag:
/// ```rust,ignore /// ```rust,ignore
/// let hub = usls::Hub::default().with_owner("jamjamjon").with_repo("usls"); /// let hub = Hub::default().with_owner("jamjamjon").with_repo("usls");
/// for tag in hub.tags().iter() { /// for tag in hub.tags().iter() {
/// let files = hub.files(tag); /// let files = hub.files(tag);
/// println!("Tag: {}, Files: {:?}", tag, files); /// println!("Tag: {}, Files: {:?}", tag, files);
@ -105,7 +105,7 @@ impl Default for Hub {
let repo = "assets".to_string(); let repo = "assets".to_string();
let to = [Dir::Cache, Dir::Home, Dir::Config, Dir::Current] let to = [Dir::Cache, Dir::Home, Dir::Config, Dir::Current]
.into_iter() .into_iter()
.find(|dir| dir.path().is_ok()) .find(|dir| dir.crate_dir_default().is_ok())
.expect( .expect(
"Unable to get cache directory, home directory, config directory, and current directory. Possible reason: \ "Unable to get cache directory, home directory, config directory, and current directory. Possible reason: \
\n1. Unsupported OS \ \n1. Unsupported OS \
@ -192,7 +192,7 @@ impl Hub {
// keep original owner, repo and tag // keep original owner, repo and tag
let saveout = self let saveout = self
.to .to
.path_with_subs(&[&owner_, &repo_, &tag_])? .crate_dir_default_with_subs(&[&owner_, &repo_, &tag_])?
.join(&file_name_); .join(&file_name_);
pack = pack.with_url(s).with_tag(&tag_).with_file_name(&file_name_); pack = pack.with_url(s).with_tag(&tag_).with_file_name(&file_name_);
@ -249,7 +249,7 @@ impl Hub {
} }
} }
self.to.path_with_subs(&[tag_])?.join(file_name_) self.to.crate_dir_default_with_subs(&[tag_])?.join(file_name_)
} }
} }
_ => anyhow::bail!( _ => anyhow::bail!(
@ -395,19 +395,15 @@ impl Hub {
.header("Content-Length") .header("Content-Length")
.and_then(|s| s.parse::<u64>().ok()) .and_then(|s| s.parse::<u64>().ok())
.context("Content-Length header is missing or invalid")?; .context("Content-Length header is missing or invalid")?;
let pb = crate::build_progress_bar(
let pb = ProgressBar::new(ntotal); ntotal,
pb.set_style( "Fetching",
ProgressStyle::with_template( Some(message.unwrap_or_default()),
"{prefix:.cyan.bold} {msg} |{bar}| ({percent_precise}%, {binary_bytes}/{binary_total_bytes}, {binary_bytes_per_sec})", "{prefix:.cyan.bold} {msg} |{bar}| ({percent_precise}%, {binary_bytes}/{binary_total_bytes}, {binary_bytes_per_sec})"
)? )?;
.progress_chars("██ "),
);
pb.set_prefix(format!("{:>PREFIX_LENGTH$}", "Fetching"));
pb.set_message(message.unwrap_or_default().to_string());
let mut reader = resp.into_reader(); let mut reader = resp.into_reader();
let mut buffer = [0; 512]; let mut buffer = [0; 2048];
let mut downloaded_bytes = 0usize; let mut downloaded_bytes = 0usize;
let mut file = std::fs::File::create(&dst) let mut file = std::fs::File::create(&dst)
.with_context(|| format!("Failed to create destination file: {:?}", dst))?; .with_context(|| format!("Failed to create destination file: {:?}", dst))?;
@ -423,12 +419,11 @@ impl Hub {
pb.inc(bytes_read as u64); pb.inc(bytes_read as u64);
} }
// check size
if downloaded_bytes as u64 != ntotal { if downloaded_bytes as u64 != ntotal {
anyhow::bail!("The downloaded file is incomplete."); anyhow::bail!("The downloaded file is incomplete.");
} }
// update // Update the progress bar
pb.set_prefix("Downloaded"); pb.set_prefix("Downloaded");
pb.set_style(ProgressStyle::with_template( pb.set_style(ProgressStyle::with_template(
crate::PROGRESS_BAR_STYLE_FINISH_3, crate::PROGRESS_BAR_STYLE_FINISH_3,
@ -439,10 +434,11 @@ impl Hub {
} }
fn fetch_get_response(url: &str) -> anyhow::Result<ureq::Response> { fn fetch_get_response(url: &str) -> anyhow::Result<ureq::Response> {
let response = ureq::get(url) let agent = ureq::AgentBuilder::new().try_proxy_from_env(true).build();
let response = agent
.get(url)
.call() .call()
.map_err(|err| anyhow::anyhow!("Failed to GET response from {}: {}", url, err))?; .map_err(|err| anyhow::anyhow!("Failed to GET response from {}: {}", url, err))?;
if response.status() != 200 { if response.status() != 200 {
anyhow::bail!("Failed to fetch data from remote due to: {:?}", response); anyhow::bail!("Failed to fetch data from remote due to: {:?}", response);
} }
@ -457,7 +453,7 @@ impl Hub {
} }
fn get_releases(owner: &str, repo: &str, to: &Dir, ttl: &Duration) -> Result<Vec<Release>> { fn get_releases(owner: &str, repo: &str, to: &Dir, ttl: &Duration) -> Result<Vec<Release>> {
let cache = to.path()?.join(Self::cache_file(owner, repo)); let cache = to.crate_dir_default()?.join(Self::cache_file(owner, repo));
let is_file_expired = Self::is_file_expired(&cache, ttl)?; let is_file_expired = Self::is_file_expired(&cache, ttl)?;
let body = if is_file_expired { let body = if is_file_expired {
let gh_api_release = format!( let gh_api_release = format!(
@ -472,9 +468,7 @@ impl Hub {
Ok(serde_json::from_str(&body)?) Ok(serde_json::from_str(&body)?)
} }
pub(crate) fn is_valid_github_release_url( pub fn is_valid_github_release_url(url: &str) -> Option<(String, String, String, String)> {
url: &str,
) -> Option<(String, String, String, String)> {
let re = let re =
Regex::new(r"^https://github\.com/([^/]+)/([^/]+)/releases/download/([^/]+)/([^/]+)$") Regex::new(r"^https://github\.com/([^/]+)/([^/]+)/releases/download/([^/]+)/([^/]+)$")
.expect("Failed to compile the regex for GitHub release URL pattern"); .expect("Failed to compile the regex for GitHub release URL pattern");

131
src/io/media.rs Normal file
View File

@ -0,0 +1,131 @@
pub(crate) const IMAGE_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp"];
pub(crate) const VIDEO_EXTENSIONS: &[&str] = &[
"mp4", "avi", "mkv", "mov", "wmv", "flv", "webm", "mpeg", "mpg", "m4v", "m4p",
];
pub(crate) const AUDIO_EXTENSIONS: &[&str] = &["mp3", "wav", "flac", "aac", "ogg", "wma"];
pub(crate) const REMOTE_PROTOCOLS: &[&str] = &[
"http://", "https://", "ftp://", "ftps://", "sftp://", "mms://", "mmsh://", "rtsp://",
"rtmp://", "rtmps://", "file://",
];
pub(crate) const STREAM_PROTOCOLS: &[&str] = &[
"rtsp://", "rtsps://", "rtspu://", "rtmp://", "rtmps://", "hls://",
];
#[derive(Debug, Clone, Default, Copy)]
pub enum Location {
#[default]
Local,
Remote,
}
#[derive(Debug, Clone, Copy, Default)]
pub enum StreamType {
#[default]
Pre,
Live,
}
#[derive(Debug, Clone, Copy, Default)]
pub enum MediaType {
#[default]
Unknown,
Image(Location),
Video(Location),
Audio(Location),
Stream,
}
impl MediaType {
pub fn is_possible_remote(s: &str) -> bool {
// remote
if REMOTE_PROTOCOLS.iter().any(|&p| s.starts_with(p)) {
return true;
}
// local (in case of no network connection)
if s.starts_with("./")
|| s.starts_with("../")
|| s.starts_with('/')
|| std::path::Path::new(s).exists()
{
return false;
}
// check out remote hub tags
if s.split('/').collect::<Vec<&str>>().len() == 2 {
let hub_tags = crate::Hub::default().tags();
return hub_tags.iter().any(|tag| s.starts_with(tag));
}
// default
false
}
pub fn from_path<P: AsRef<std::path::Path>>(path: P) -> Self {
let extension = path
.as_ref()
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("")
.to_lowercase();
if IMAGE_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Image(Location::Local)
} else if VIDEO_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Video(Location::Local)
} else if AUDIO_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Audio(Location::Local)
} else {
MediaType::Unknown
}
}
pub fn from_url(url: &str) -> Self {
if IMAGE_EXTENSIONS
.iter()
.any(|&ext| url.ends_with(&format!(".{}", ext)))
{
MediaType::Image(Location::Remote)
} else if VIDEO_EXTENSIONS
.iter()
.any(|&ext| url.ends_with(&format!(".{}", ext)))
{
MediaType::Video(Location::Remote)
} else if STREAM_PROTOCOLS
.iter()
.any(|&protocol| url.starts_with(protocol))
{
MediaType::Stream
} else {
MediaType::Unknown
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_remote() {
assert!(MediaType::is_possible_remote(
"http://example.com/image.jpg"
));
assert!(MediaType::is_possible_remote("rtsp://camera.local/stream"));
// assert!(MediaType::is_possible_remote("images/image.jpg"));
// assert!(MediaType::is_possible_remote("yolo/image.jpg"));
}
#[test]
fn test_is_local() {
assert!(MediaType::is_possible_remote(
"http://example.com/image.jpg"
)); // remote
assert!(!MediaType::is_possible_remote("example.com/image.jpg"));
assert!(!MediaType::is_possible_remote("./assets/bus.jpg"));
assert!(!MediaType::is_possible_remote("assets/bus.jpg"));
assert!(!MediaType::is_possible_remote("./images/image.jpg"));
assert!(!MediaType::is_possible_remote("../images/image.jpg"));
assert!(!MediaType::is_possible_remote("../../images/image.jpg"));
}
}

9
src/io/mod.rs Normal file
View File

@ -0,0 +1,9 @@
mod dataloader;
mod dir;
mod hub;
mod media;
pub use dataloader::*;
pub use dir::*;
pub use hub::*;
pub use media::*;

View File

@ -1,49 +1,14 @@
//! **usls** is a Rust library integrated with **ONNXRuntime**, offering a suite of advanced models for **Computer Vision** and **Vision-Language** tasks, including: mod inference;
//! mod io;
//! - **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLO11](https://github.com/ultralytics/ultralytics) #[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
//! - **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
//! - **Vision Models**: [RT-DETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro), [FastViT](https://github.com/apple/ml-fastvit), [BEiT](https://github.com/microsoft/unilm/tree/master/beit), [MobileOne](https://github.com/apple/ml-mobileone)
//! - **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242)
//! - **OCR Models**: [DB](https://arxiv.org/abs/1911.08947), [FAST](https://github.com/czczup/FAST), [SVTR](https://arxiv.org/abs/2205.00159), [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html), [TrOCR](https://huggingface.co/microsoft/trocr-base-printed), [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
//! - **And more...**
//!
//! ## ⛳️ Cargo Features
//!
//! By default, **none of the following features are enabled**. You can enable them as needed:
//!
//! - **`auto`**: Automatically downloads prebuilt ONNXRuntime binaries from Pykes CDN for supported platforms.
//!
//! - If disabled, you'll need to [compile `ONNXRuntime` from source](https://github.com/microsoft/onnxruntime) or [download a precompiled package](https://github.com/microsoft/onnxruntime/releases), and then [link it manually](https://ort.pyke.io/setup/linking).
//!
//! <details>
//! <summary>👉 For Linux or macOS Users</summary>
//!
//! - Download from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
//! - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
//! ```shell
//! export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.20.1
//! ```
//!
//! </details>
//! - **`ffmpeg`**: Adds support for video streams, real-time frame visualization, and video export.
//!
//! - Powered by [video-rs](https://github.com/oddity-ai/video-rs) and [minifb](https://github.com/emoon/rust_minifb). For any issues related to `ffmpeg` features, please refer to the issues of these two crates.
//! - **`cuda`**: Enables the NVIDIA TensorRT provider.
//! - **`trt`**: Enables the NVIDIA TensorRT provider.
//! - **`mps`**: Enables the Apple CoreML provider.
//!
//! ## 🎈 Example
//!
//! ```Shell
//! cargo run -r -F cuda --example svtr -- --device cuda
//! ```
//!
//! All examples are located in the [examples](https://github.com/jamjamjon/usls/tree/main/examples) directory.
mod misc;
pub mod models; pub mod models;
mod xy; mod utils;
mod viz;
pub use misc::*; pub use inference::*;
pub use io::*;
pub use minifb::Key;
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
pub use models::*; pub use models::*;
pub use xy::*; pub use utils::*;
pub use viz::*;

View File

@ -1,773 +0,0 @@
use ab_glyph::{FontArc, PxScale};
use anyhow::Result;
use image::{DynamicImage, GenericImage, Rgba, RgbaImage};
use imageproc::map::map_colors;
use crate::{
string_now, Bbox, Color, ColorMap256, Dir, Hub, Keypoint, Mask, Mbr, Polygon, Prob, Y,
};
/// Annotator for struct `Y`
#[derive(Clone)]
pub struct Annotator {
font: FontArc,
_scale: f32, // Cope with ab_glyph & imageproc=0.24.0
scale_dy: f32,
saveout_base: String,
saveout: Option<String>,
saveout_subs: Vec<String>,
decimal_places: usize,
palette: Vec<Color>,
// About mbrs
without_mbrs: bool,
without_mbrs_conf: bool,
without_mbrs_name: bool,
without_mbrs_text_bg: bool,
mbrs_text_color: Rgba<u8>,
// About bboxes
without_bboxes: bool,
without_bboxes_conf: bool,
without_bboxes_name: bool,
without_bboxes_text_bg: bool,
bboxes_text_color: Rgba<u8>,
bboxes_thickness: usize,
bboxes_thickness_threshold: f32,
// About keypoints
without_keypoints: bool,
with_keypoints_conf: bool,
with_keypoints_name: bool,
without_keypoints_text_bg: bool,
keypoints_text_color: Rgba<u8>,
skeletons: Option<Vec<(usize, usize)>>,
keypoints_radius: usize,
keypoints_palette: Option<Vec<(u8, u8, u8, u8)>>,
// About polygons
without_polygons: bool,
without_contours: bool,
with_polygons_conf: bool,
with_polygons_name: bool,
with_polygons_text_bg: bool,
polygons_text_color: Rgba<u8>,
polygons_alpha: u8,
contours_color: Rgba<u8>,
// About masks
without_masks: bool,
colormap: Option<[Color; 256]>,
// About probs
probs_topk: usize,
}
impl Default for Annotator {
fn default() -> Self {
Self {
font: match Self::load_font(None) {
Ok(x) => x,
Err(err) => panic!("Failed to load font: {}", err),
},
_scale: 6.666667,
scale_dy: 28.,
polygons_alpha: 179,
palette: Color::palette_base_20(),
saveout: None,
saveout_subs: vec![],
saveout_base: String::from("runs"),
decimal_places: 4,
without_bboxes: false,
without_bboxes_conf: false,
without_bboxes_name: false,
bboxes_text_color: Rgba([0, 0, 0, 255]),
bboxes_thickness: 1,
bboxes_thickness_threshold: 0.3,
without_bboxes_text_bg: false,
without_mbrs: false,
without_mbrs_conf: false,
without_mbrs_name: false,
without_mbrs_text_bg: false,
mbrs_text_color: Rgba([0, 0, 0, 255]),
without_keypoints: false,
with_keypoints_conf: false,
with_keypoints_name: false,
keypoints_radius: 3,
skeletons: None,
keypoints_palette: None,
without_keypoints_text_bg: false,
keypoints_text_color: Rgba([0, 0, 0, 255]),
without_polygons: false,
without_contours: false,
contours_color: Rgba([255, 255, 255, 255]),
with_polygons_name: false,
with_polygons_conf: false,
with_polygons_text_bg: false,
polygons_text_color: Rgba([255, 255, 255, 255]),
probs_topk: 5usize,
without_masks: false,
colormap: None,
}
}
}
impl Annotator {
pub fn new() -> Self {
Default::default()
}
pub fn with_decimal_places(mut self, x: usize) -> Self {
self.decimal_places = x;
self
}
/// Plotting bboxes or not
pub fn without_bboxes(mut self, x: bool) -> Self {
self.without_bboxes = x;
self
}
pub fn without_bboxes_conf(mut self, x: bool) -> Self {
self.without_bboxes_conf = x;
self
}
pub fn without_bboxes_name(mut self, x: bool) -> Self {
self.without_bboxes_name = x;
self
}
pub fn without_bboxes_text_bg(mut self, x: bool) -> Self {
self.without_bboxes_text_bg = x;
self
}
pub fn with_bboxes_text_bg_alpha(mut self, x: u8) -> Self {
self.bboxes_text_color.0[3] = x;
self
}
pub fn with_bboxes_text_color(mut self, rgba: [u8; 4]) -> Self {
self.bboxes_text_color = Rgba(rgba);
self
}
pub fn with_bboxes_thickness(mut self, thickness: usize) -> Self {
self.bboxes_thickness = thickness;
self
}
pub fn with_bboxes_thickness_threshold(mut self, threshold: f32) -> Self {
self.bboxes_thickness_threshold = threshold;
self
}
pub fn without_keypoints(mut self, x: bool) -> Self {
self.without_keypoints = x;
self
}
pub fn with_skeletons(mut self, x: &[(usize, usize)]) -> Self {
self.skeletons = Some(x.to_vec());
self
}
pub fn with_keypoints_palette(mut self, x: &[(u8, u8, u8, u8)]) -> Self {
self.keypoints_palette = Some(x.to_vec());
self
}
pub fn with_keypoints_radius(mut self, x: usize) -> Self {
self.keypoints_radius = x;
self
}
pub fn with_keypoints_conf(mut self, x: bool) -> Self {
self.with_keypoints_conf = x;
self
}
pub fn with_keypoints_name(mut self, x: bool) -> Self {
self.with_keypoints_name = x;
self
}
pub fn with_keypoints_text_color(mut self, rgba: [u8; 4]) -> Self {
self.keypoints_text_color = Rgba(rgba);
self
}
pub fn without_keypoints_text_bg(mut self, x: bool) -> Self {
self.without_keypoints_text_bg = x;
self
}
pub fn with_keypoints_text_bg_alpha(mut self, x: u8) -> Self {
self.keypoints_text_color.0[3] = x;
self
}
/// Plotting mbrs or not
pub fn without_mbrs(mut self, x: bool) -> Self {
self.without_mbrs = x;
self
}
pub fn without_mbrs_conf(mut self, x: bool) -> Self {
self.without_mbrs_conf = x;
self
}
pub fn without_mbrs_name(mut self, x: bool) -> Self {
self.without_mbrs_name = x;
self
}
pub fn without_mbrs_text_bg(mut self, x: bool) -> Self {
self.without_mbrs_text_bg = x;
self
}
pub fn with_mbrs_text_color(mut self, rgba: [u8; 4]) -> Self {
self.mbrs_text_color = Rgba(rgba);
self
}
pub fn with_mbrs_text_bg_alpha(mut self, x: u8) -> Self {
self.mbrs_text_color.0[3] = x;
self
}
/// Plotting polygons' areas or not
pub fn without_polygons(mut self, x: bool) -> Self {
self.without_polygons = x;
self
}
/// Plotting polygons' contours or not
pub fn without_contours(mut self, x: bool) -> Self {
self.without_contours = x;
self
}
pub fn with_polygons_conf(mut self, x: bool) -> Self {
self.with_polygons_conf = x;
self
}
pub fn with_polygons_name(mut self, x: bool) -> Self {
self.with_polygons_name = x;
self
}
pub fn with_polygons_text_bg(mut self, x: bool) -> Self {
self.with_polygons_text_bg = x;
self
}
/// Plotting masks or not
pub fn without_masks(mut self, x: bool) -> Self {
self.without_masks = x;
self
}
pub fn with_colormap(mut self, x: &str) -> Self {
let x = ColorMap256::from(x);
self.colormap = Some(x.data());
self
}
pub fn with_polygons_text_color(mut self, rgba: [u8; 4]) -> Self {
self.polygons_text_color = Rgba(rgba);
self
}
pub fn with_polygons_alpha(mut self, x: u8) -> Self {
self.polygons_alpha = x;
self
}
pub fn with_polygons_text_bg_alpha(mut self, x: u8) -> Self {
self.polygons_text_color.0[3] = x;
self
}
pub fn with_contours_color(mut self, rgba: [u8; 4]) -> Self {
self.contours_color = Rgba(rgba);
self
}
pub fn with_probs_topk(mut self, x: usize) -> Self {
self.probs_topk = x;
self
}
pub fn with_saveout_base(mut self, x: &str) -> Self {
self.saveout_base = x.to_string();
self
}
pub fn with_saveout(mut self, x: &str) -> Self {
self.saveout = Some(x.to_string());
self
}
pub fn with_saveout_subs(mut self, xs: &[&str]) -> Self {
self.saveout_subs = xs.iter().map(|x| x.to_string()).collect::<Vec<String>>();
self
}
pub fn with_font(mut self, path: &str) -> Result<Self> {
self.font = Self::load_font(Some(path))?;
Ok(self)
}
/// Create folders for saving annotated results. e.g., `./runs/xxx`
pub fn saveout(&self) -> Result<std::path::PathBuf> {
let mut subs = vec![self.saveout_base.as_str()];
if let Some(saveout) = &self.saveout {
// add subs
if !self.saveout_subs.is_empty() {
let xs = self
.saveout_subs
.iter()
.map(|x| x.as_str())
.collect::<Vec<&str>>();
subs.extend(xs);
}
// add filename
subs.push(saveout);
}
// mkdir even no filename specified
Dir::Current.raw_path_with_subs(&subs)
}
/// Annotate images, save, and no return
pub fn annotate(&self, imgs: &[DynamicImage], ys: &[Y]) {
let _ = self.plot(imgs, ys, true);
}
/// Plot images and return plotted images
pub fn plot(&self, imgs: &[DynamicImage], ys: &[Y], save: bool) -> Result<Vec<DynamicImage>> {
let mut vs: Vec<DynamicImage> = Vec::new();
// annotate
for (img, y) in imgs.iter().zip(ys.iter()) {
let mut img_rgba = img.to_rgba8();
// polygons
if !self.without_polygons {
if let Some(xs) = &y.polygons() {
self.plot_polygons(&mut img_rgba, xs);
}
}
// bboxes
if !self.without_bboxes {
if let Some(xs) = &y.bboxes() {
self.plot_bboxes(&mut img_rgba, xs);
}
}
// mbrs
if !self.without_mbrs {
if let Some(xs) = &y.mbrs() {
self.plot_mbrs(&mut img_rgba, xs);
}
}
// keypoints
if !self.without_keypoints {
if let Some(xs) = &y.keypoints() {
self.plot_keypoints(&mut img_rgba, xs);
}
}
// masks
if !self.without_masks {
if let Some(xs) = &y.masks() {
self.plot_masks(&mut img_rgba, xs);
}
}
// probs
if let Some(xs) = &y.probs() {
self.plot_probs(&mut img_rgba, xs);
}
// save or not
if save {
let saveout = self.saveout()?.join(format!("{}.png", string_now("-")));
match img_rgba.save(&saveout) {
Err(err) => anyhow::bail!("Failed to save annotated image: {:?}", err),
Ok(_) => {
println!("Annotated image saved to: {:?}", saveout);
}
}
}
// RgbaImage -> DynamicImage
vs.push(image::DynamicImage::from(img_rgba));
}
Ok(vs)
}
/// Plot bounding bboxes and labels
pub fn plot_bboxes(&self, img: &mut RgbaImage, bboxes: &[Bbox]) {
for bbox in bboxes.iter() {
// bbox
let short_side_threshold =
bbox.width().min(bbox.height()) * self.bboxes_thickness_threshold;
let thickness = self.bboxes_thickness.min(short_side_threshold as usize);
for i in 0..thickness {
imageproc::drawing::draw_hollow_rect_mut(
img,
imageproc::rect::Rect::at(
(bbox.xmin().round() as i32) - (i as i32),
(bbox.ymin().round() as i32) - (i as i32),
)
.of_size(
(bbox.width().round() as u32) + (2 * i as u32),
(bbox.height().round() as u32) + (2 * i as u32),
),
image::Rgba(self.get_color(bbox.id() as usize).into()),
);
}
// label
if !self.without_bboxes_name || !self.without_bboxes_conf {
let label = bbox.label(
!self.without_bboxes_name,
!self.without_bboxes_conf,
self.decimal_places,
);
self.put_text(
img,
&label,
(bbox.xmin().round() as i32 - (thickness - 1) as i32).max(0) as f32,
(bbox.ymin().round() as i32 - (thickness - 1) as i32).max(0) as f32,
image::Rgba(self.get_color(bbox.id() as usize).into()),
self.bboxes_text_color,
self.without_bboxes_text_bg,
);
}
}
}
/// Plot minimum bounding rectangle and labels
pub fn plot_mbrs(&self, img: &mut RgbaImage, mbrs: &[Mbr]) {
for mbr in mbrs.iter() {
// mbr
for i in 0..mbr.vertices().len() {
let p1 = mbr.vertices()[i];
let p2 = mbr.vertices()[(i + 1) % mbr.vertices().len()];
imageproc::drawing::draw_line_segment_mut(
img,
(p1.x.round() as f32, p1.y.round() as f32),
(p2.x.round() as f32, p2.y.round() as f32),
image::Rgba(self.get_color(mbr.id() as usize).into()),
);
}
// label
if !self.without_mbrs_name || !self.without_mbrs_conf {
let label = mbr.label(
!self.without_mbrs_name,
!self.without_mbrs_conf,
self.decimal_places,
);
self.put_text(
img,
&label,
mbr.top().x as f32,
mbr.top().y as f32,
image::Rgba(self.get_color(mbr.id() as usize).into()),
self.mbrs_text_color,
self.without_mbrs_text_bg,
);
}
}
}
/// Plot polygons(hollow & filled) and labels
pub fn plot_polygons(&self, img: &mut RgbaImage, polygons: &[Polygon]) {
let mut convas = img.clone();
for polygon in polygons.iter() {
// filled
let polygon_i32 = polygon
.polygon()
.exterior()
.points()
.take(if polygon.is_closed() {
polygon.count() - 1
} else {
polygon.count()
})
.map(|p| imageproc::point::Point::new(p.x() as i32, p.y() as i32))
.collect::<Vec<_>>();
let mut color_ = self.get_color(polygon.id() as usize);
color_.3 = self.polygons_alpha;
imageproc::drawing::draw_polygon_mut(&mut convas, &polygon_i32, Rgba(color_.into()));
// contour
if !self.without_contours {
let polygon_f32 = polygon
.polygon()
.exterior()
.points()
.take(if polygon.is_closed() {
polygon.count() - 1
} else {
polygon.count()
})
.map(|p| imageproc::point::Point::new(p.x() as f32, p.y() as f32))
.collect::<Vec<_>>();
imageproc::drawing::draw_hollow_polygon_mut(img, &polygon_f32, self.contours_color);
}
}
image::imageops::overlay(img, &convas, 0, 0);
// labels on top
if self.with_polygons_name || self.with_polygons_conf {
for polygon in polygons.iter() {
if let Some((x, y)) = polygon.centroid() {
let label = polygon.label(
self.with_polygons_name,
self.with_polygons_conf,
self.decimal_places,
);
self.put_text(
img,
&label,
x,
y,
image::Rgba(self.get_color(polygon.id() as usize).into()),
self.polygons_text_color,
!self.with_polygons_text_bg,
);
}
}
}
}
/// Plot keypoints and texts
pub fn plot_keypoints(&self, img: &mut RgbaImage, keypoints: &[Vec<Keypoint>]) {
for kpts in keypoints.iter() {
for (i, kpt) in kpts.iter().enumerate() {
if kpt.confidence() == 0.0 {
continue;
}
// keypoint
let color = match &self.keypoints_palette {
None => self.get_color(i),
Some(keypoints_palette) => keypoints_palette[i],
};
imageproc::drawing::draw_filled_circle_mut(
img,
(kpt.x() as i32, kpt.y() as i32),
self.keypoints_radius as i32,
image::Rgba(color.into()),
);
// label
if self.with_keypoints_name || self.with_keypoints_conf {
let label = kpt.label(
self.with_keypoints_name,
self.with_keypoints_conf,
self.decimal_places,
);
self.put_text(
img,
&label,
kpt.x(),
kpt.y(),
image::Rgba(self.get_color(kpt.id() as usize).into()),
self.keypoints_text_color,
self.without_keypoints_text_bg,
);
}
}
// skeletons
if let Some(skeletons) = &self.skeletons {
for &(i, ii) in skeletons.iter() {
let kpt1 = &kpts[i];
let kpt2 = &kpts[ii];
if kpt1.confidence() == 0.0 || kpt2.confidence() == 0.0 {
continue;
}
imageproc::drawing::draw_line_segment_mut(
img,
(kpt1.x(), kpt1.y()),
(kpt2.x(), kpt2.y()),
image::Rgba([255, 51, 255, 255]),
);
}
}
}
}
/// Plot masks
pub fn plot_masks(&self, img: &mut RgbaImage, masks: &[Mask]) {
let (w, h) = img.dimensions();
// let hstack = w < h;
let hstack = true;
let scale = 2;
let size = (masks.len() + 1) as u32;
// convas
let convas = img.clone();
let mut convas = image::DynamicImage::from(convas);
if hstack {
convas = convas.resize_exact(
w,
h / scale * (size / scale),
image::imageops::FilterType::CatmullRom,
);
} else {
convas = convas.resize_exact(
w / scale,
h * size / scale,
image::imageops::FilterType::CatmullRom,
);
}
for x in 0..convas.width() {
for y in 0..convas.height() {
convas.put_pixel(x, y, Rgba([255, 255, 255, 255]));
}
}
// place original
let im_ori = img.clone();
let im_ori = image::DynamicImage::from(im_ori);
let im_ori = im_ori.resize_exact(
w / scale,
h / scale,
image::imageops::FilterType::CatmullRom,
);
image::imageops::overlay(&mut convas, &im_ori, 0, 0);
// place masks
for (i, mask) in masks.iter().enumerate() {
let i = i + 1;
let luma = if let Some(colormap) = self.colormap {
let luma = map_colors(mask.mask(), |p| {
let x = p[0];
image::Rgb(colormap[x as usize].rgb().into())
});
image::DynamicImage::from(luma)
} else {
image::DynamicImage::from(mask.mask().to_owned())
};
let luma = luma.resize_exact(
w / scale,
h / scale,
image::imageops::FilterType::CatmullRom,
);
if hstack {
let pos_x = (i as u32 % scale) * luma.width();
let pos_y = (i as u32 / scale) * luma.height();
image::imageops::overlay(&mut convas, &luma, pos_x as i64, pos_y as i64);
} else {
let pos_x = 0;
let pos_y = i as u32 * luma.height();
image::imageops::overlay(&mut convas, &luma, pos_x as i64, pos_y as i64);
}
}
*img = convas.into_rgba8();
}
/// Plot probs
pub fn plot_probs(&self, img: &mut RgbaImage, probs: &Prob) {
let (x, mut y) = (img.width() as i32 / 20, img.height() as i32 / 20);
for k in probs.topk(self.probs_topk).iter() {
let legend = format!("{}: {:.4}", k.2.as_ref().unwrap_or(&k.0.to_string()), k.1);
let scale = PxScale::from(self.scale_dy);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, &legend);
let text_h = text_h + text_h / 3;
y += text_h as i32;
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(x, y).of_size(text_w, text_h),
image::Rgba(self.get_color(k.0).into()),
);
imageproc::drawing::draw_text_mut(
img,
image::Rgba([0, 0, 0, 255]),
x,
y - (self.scale_dy / self._scale).floor() as i32 + 2,
scale,
&self.font,
&legend,
);
}
}
/// Helper for putting texts
#[allow(clippy::too_many_arguments)]
fn put_text(
&self,
img: &mut RgbaImage,
legend: &str,
x: f32,
y: f32,
color: Rgba<u8>,
text_color: Rgba<u8>,
without_text_bg: bool,
) {
if !legend.is_empty() {
let scale = PxScale::from(self.scale_dy);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, legend);
let text_h = text_h + text_h / 3;
let top = if y > text_h as f32 {
(y.round() as u32 - text_h) as i32
} else {
0
};
let mut left = x as i32;
if left + text_w as i32 > img.width() as i32 {
left = img.width() as i32 - text_w as i32;
}
// text bbox
if !without_text_bg {
imageproc::drawing::draw_filled_rect_mut(
img,
imageproc::rect::Rect::at(left, top).of_size(text_w, text_h),
color,
);
}
// text
imageproc::drawing::draw_text_mut(
img,
text_color,
left,
top - (self.scale_dy / self._scale).floor() as i32 + 2,
scale,
&self.font,
legend,
);
}
}
/// Load custom font
fn load_font(path: Option<&str>) -> Result<FontArc> {
let path_font = match path {
None => Hub::default().try_fetch("fonts/Arial.ttf")?,
Some(p) => p.into(),
};
let buf = std::fs::read(path_font)?;
Ok(FontArc::try_from_vec(buf.to_owned())?)
}
/// Color palette
pub fn get_color(&self, n: usize) -> (u8, u8, u8, u8) {
self.palette[n % self.palette.len()].rgba()
}
}

View File

@ -1,171 +0,0 @@
use anyhow::Result;
use rand::Rng;
/// Color: 0xRRGGBBAA
#[derive(Copy, Clone)]
pub struct Color(u32);
impl std::fmt::Debug for Color {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Color")
.field("RGBA", &self.rgba())
.field("HEX", &self.hex())
.finish()
}
}
impl std::fmt::Display for Color {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.hex())
}
}
impl From<u32> for Color {
fn from(x: u32) -> Self {
Self(x)
}
}
impl From<(u8, u8, u8)> for Color {
fn from((r, g, b): (u8, u8, u8)) -> Self {
Self::from_rgba(r, g, b, 0xff)
}
}
impl From<[u8; 3]> for Color {
fn from(c: [u8; 3]) -> Self {
Self::from((c[0], c[1], c[2]))
}
}
impl From<(u8, u8, u8, u8)> for Color {
fn from((r, g, b, a): (u8, u8, u8, u8)) -> Self {
Self::from_rgba(r, g, b, a)
}
}
impl From<[u8; 4]> for Color {
fn from(c: [u8; 4]) -> Self {
Self::from((c[0], c[1], c[2], c[3]))
}
}
impl TryFrom<&str> for Color {
type Error = &'static str;
fn try_from(x: &str) -> Result<Self, Self::Error> {
let hex = x.trim_start_matches('#');
let hex = match hex.len() {
6 => format!("{}ff", hex),
8 => hex.to_string(),
_ => return Err("Failed to convert `Color` from str: invalid length"),
};
u32::from_str_radix(&hex, 16)
.map(Self)
.map_err(|_| "Failed to convert `Color` from str: invalid hex")
}
}
impl Color {
const fn from_rgba(r: u8, g: u8, b: u8, a: u8) -> Self {
Self(((r as u32) << 24) | ((g as u32) << 16) | ((b as u32) << 8) | (a as u32))
}
pub fn rgba(&self) -> (u8, u8, u8, u8) {
let r = ((self.0 >> 24) & 0xff) as u8;
let g = ((self.0 >> 16) & 0xff) as u8;
let b = ((self.0 >> 8) & 0xff) as u8;
let a = (self.0 & 0xff) as u8;
(r, g, b, a)
}
pub fn rgb(&self) -> (u8, u8, u8) {
let (r, g, b, _) = self.rgba();
(r, g, b)
}
pub fn bgr(&self) -> (u8, u8, u8) {
let (r, g, b) = self.rgb();
(b, g, r)
}
pub fn hex(&self) -> String {
format!("#{:08x}", self.0)
}
pub fn create_palette<A: Into<Self> + Copy>(xs: &[A]) -> Vec<Self> {
xs.iter().copied().map(Into::into).collect()
}
pub fn try_create_palette<A: TryInto<Self> + Copy>(xs: &[A]) -> Result<Vec<Self>>
where
<A as TryInto<Self>>::Error: std::fmt::Debug,
{
xs.iter()
.copied()
.map(|x| {
x.try_into()
.map_err(|e| anyhow::anyhow!("Failed to convert: {:?}", e))
})
.collect()
}
pub fn palette_rand(n: usize) -> Vec<Self> {
let mut rng = rand::thread_rng();
let xs: Vec<(u8, u8, u8)> = (0..n)
.map(|_| {
(
rng.gen_range(0..=255),
rng.gen_range(0..=255),
rng.gen_range(0..=255),
)
})
.collect();
Self::create_palette(&xs)
}
pub fn palette_base_20() -> Vec<Self> {
Self::create_palette(&[
0x00ff7fff, // SpringGreen
0xff69b4ff, // HotPink
0xff6347ff, // Tomato
0xffd700ff, // Gold
0xbc8f8fff, // RosyBrown
0x00bfffff, // DeepSkyBlue
0x8fb88fff, // DarkSeaGreen
0xee82eeff, // Violet
0x9acd32ff, // YellowGreen
0xcd853fff, // Peru
0x1e90ffff, // DodgerBlue
0xd74a49ff, // ?
0x7fffd4ff, // AquaMarine
0x3399ffff, // Blue2
0x00ffffff, // Cyan
0x8a2befff, // BlueViolet
0xa52a2aff, // Brown
0xd8bfd8ff, // Thistle
0xf0ffffff, // Azure
0x609ea0ff, // CadetBlue
])
}
pub fn palette_cotton_candy_5() -> Vec<Self> {
Self::try_create_palette(&["#ff595e", "#ffca3a", "#8ac926", "#1982c4", "#6a4c93"])
.expect("Faild to create palette: Cotton Candy")
}
pub fn palette_tropical_sunrise_5() -> Vec<Self> {
// https://colorkit.co/palette/e12729-f37324-f8cc1b-72b043-007f4e/
Self::try_create_palette(&["#e12729", "#f37324", "#f8cc1b", "#72b043", "#007f4e"])
.expect("Faild to create palette: Tropical Sunrise")
}
pub fn palette_rainbow_10() -> Vec<Self> {
Self::create_palette(&[
0xff595eff, 0xff924cff, 0xffca3aff, 0xc5ca30ff, 0x8ac926ff, 0x52a675ff, 0x1982c4ff,
0x4267acff, 0x6a4c93ff, 0xb5a6c9ff,
])
}
}

View File

@ -1,476 +0,0 @@
use anyhow::{anyhow, Result};
use image::DynamicImage;
use indicatif::ProgressBar;
use log::{info, warn};
use std::collections::VecDeque;
use std::path::{Path, PathBuf};
use std::sync::mpsc;
#[cfg(feature = "ffmpeg")]
use video_rs::{
encode::{Encoder, Settings},
time::Time,
Decoder, Url,
};
use crate::{build_progress_bar, Hub, Location, MediaType};
type TempReturnType = (Vec<DynamicImage>, Vec<PathBuf>);
pub struct DataLoaderIterator {
receiver: mpsc::Receiver<TempReturnType>,
progress_bar: Option<ProgressBar>,
batch_size: u64,
}
impl Iterator for DataLoaderIterator {
type Item = TempReturnType;
fn next(&mut self) -> Option<Self::Item> {
match &self.progress_bar {
None => self.receiver.recv().ok(),
Some(progress_bar) => {
match self.receiver.recv().ok() {
Some(item) => {
progress_bar.inc(self.batch_size);
Some(item)
}
None => {
progress_bar.set_prefix("Iterated");
progress_bar.set_style(
match indicatif::ProgressStyle::with_template(crate::PROGRESS_BAR_STYLE_FINISH_2) {
Ok(x) => x,
Err(err) => panic!("Failed to set style for progressbar in `DataLoaderIterator`: {}", err),
},
);
progress_bar.finish();
None
}
}
}
}
}
}
impl IntoIterator for DataLoader {
type Item = TempReturnType;
type IntoIter = DataLoaderIterator;
fn into_iter(self) -> Self::IntoIter {
let progress_bar = if self.with_pb {
build_progress_bar(
self.nf,
"Iterating",
Some(&format!("{:?}", self.media_type)),
crate::PROGRESS_BAR_STYLE_CYAN_2,
)
.ok()
} else {
None
};
DataLoaderIterator {
receiver: self.receiver,
progress_bar,
batch_size: self.batch_size as _,
}
}
}
/// A structure designed to load and manage image, video, or stream data.
/// It handles local file paths, remote URLs, and live streams, supporting both batch processing
/// and optional progress bar display. The structure also supports video decoding through
/// `video_rs` for video and stream data.
pub struct DataLoader {
/// Queue of paths for images.
paths: Option<VecDeque<PathBuf>>,
/// Media type of the source (image, video, stream, etc.).
media_type: MediaType,
/// Batch size for iteration, determining how many files are processed at once.
batch_size: usize,
/// Buffer size for the channel, used to manage the buffer between producer and consumer.
bound: usize,
/// Receiver for processed data.
receiver: mpsc::Receiver<TempReturnType>,
/// Video decoder for handling video or stream data.
#[cfg(feature = "ffmpeg")]
decoder: Option<video_rs::decode::Decoder>,
/// Number of images or frames; `u64::MAX` is used for live streams (indicating no limit).
nf: u64,
/// Flag indicating whether to display a progress bar.
with_pb: bool,
}
impl TryFrom<&str> for DataLoader {
type Error = anyhow::Error;
fn try_from(str: &str) -> Result<Self, Self::Error> {
Self::new(str)
}
}
impl DataLoader {
pub fn new(source: &str) -> Result<Self> {
// TODO: multi-types
// Vec<&str>
// Number of frames or stream
let mut nf = 0;
// paths & media_type
let source_path = Path::new(source);
let (paths, media_type) = match source_path.exists() {
false => {
// remote
nf = 1;
(
Some(VecDeque::from([source_path.to_path_buf()])),
MediaType::from_url(source),
)
}
true => {
// local
if source_path.is_file() {
nf = 1;
(
Some(VecDeque::from([source_path.to_path_buf()])),
MediaType::from_path(source_path),
)
} else if source_path.is_dir() {
let paths_sorted = Self::load_from_folder(source_path)?;
nf = paths_sorted.len() as _;
(
Some(VecDeque::from(paths_sorted)),
MediaType::Image(Location::Local),
)
} else {
(None, MediaType::Unknown)
}
}
};
if let MediaType::Unknown = media_type {
anyhow::bail!("Could not locate the source path: {:?}", source_path);
}
// video decoder
#[cfg(not(feature = "ffmpeg"))]
{
match &media_type {
MediaType::Video(Location::Local)
| MediaType::Video(Location::Remote)
| MediaType::Stream => {
anyhow::bail!(
"Video processing requires the features: `ffmpeg`. \
\nConsider enabling them by passing, e.g., `--features ffmpeg`"
);
}
_ => {}
};
}
#[cfg(feature = "ffmpeg")]
let decoder = match &media_type {
MediaType::Video(Location::Local) => Some(Decoder::new(source_path)?),
MediaType::Video(Location::Remote) | MediaType::Stream => {
let location: video_rs::location::Location = source.parse::<Url>()?.into();
Some(Decoder::new(location)?)
}
_ => None,
};
// video & stream frames
#[cfg(feature = "ffmpeg")]
if let Some(decoder) = &decoder {
nf = match decoder.frames() {
Err(_) => u64::MAX,
Ok(0) => u64::MAX,
Ok(x) => x,
}
}
// summary
info!("Found {:?} x{}", media_type, nf);
Ok(DataLoader {
paths,
media_type,
bound: 50,
receiver: mpsc::sync_channel(1).1,
batch_size: 1,
#[cfg(feature = "ffmpeg")]
decoder,
nf,
with_pb: true,
})
}
pub fn with_bound(mut self, x: usize) -> Self {
self.bound = x;
self
}
pub fn with_batch(mut self, x: usize) -> Self {
self.batch_size = x;
self
}
pub fn with_batch_size(mut self, x: usize) -> Self {
self.batch_size = x;
self
}
pub fn with_progress_bar(mut self, x: bool) -> Self {
self.with_pb = x;
self
}
pub fn build(mut self) -> Result<Self> {
let (sender, receiver) = mpsc::sync_channel::<TempReturnType>(self.bound);
self.receiver = receiver;
let batch_size = self.batch_size;
let data = self.paths.take().unwrap_or_default();
let media_type = self.media_type.clone();
#[cfg(feature = "ffmpeg")]
let decoder = self.decoder.take();
// Spawn the producer thread
std::thread::spawn(move || {
DataLoader::producer_thread(
sender,
data,
batch_size,
media_type,
#[cfg(feature = "ffmpeg")]
decoder,
);
});
Ok(self)
}
fn producer_thread(
sender: mpsc::SyncSender<TempReturnType>,
mut data: VecDeque<PathBuf>,
batch_size: usize,
media_type: MediaType,
#[cfg(feature = "ffmpeg")] mut decoder: Option<video_rs::decode::Decoder>,
) {
let mut yis: Vec<DynamicImage> = Vec::with_capacity(batch_size);
let mut yps: Vec<PathBuf> = Vec::with_capacity(batch_size);
match media_type {
MediaType::Image(_) => {
while let Some(path) = data.pop_front() {
match Self::try_read(&path) {
Err(err) => {
warn!("{:?} | {:?}", path, err);
continue;
}
Ok(img) => {
yis.push(img);
yps.push(path);
}
}
if yis.len() == batch_size
&& sender
.send((std::mem::take(&mut yis), std::mem::take(&mut yps)))
.is_err()
{
break;
}
}
}
#[cfg(feature = "ffmpeg")]
MediaType::Video(_) | MediaType::Stream => {
if let Some(decoder) = decoder.as_mut() {
let (w, h) = decoder.size();
let frames = decoder.decode_iter();
for frame in frames {
match frame {
Ok((ts, frame)) => {
let rgb8: image::ImageBuffer<image::Rgb<u8>, Vec<u8>> =
match image::ImageBuffer::from_raw(
w as _,
h as _,
frame.into_raw_vec_and_offset().0,
) {
Some(x) => x,
None => continue,
};
let img = image::DynamicImage::from(rgb8);
yis.push(img);
yps.push(ts.to_string().into());
if yis.len() == batch_size
&& sender
.send((std::mem::take(&mut yis), std::mem::take(&mut yps)))
.is_err()
{
break;
}
}
Err(_) => break,
}
}
}
}
_ => unimplemented!(),
}
// Deal with remaining data
if !yis.is_empty() && sender.send((yis, yps)).is_err() {
info!("Receiver dropped, stopping production");
}
}
pub fn load_from_folder<P: AsRef<std::path::Path>>(path: P) -> Result<Vec<std::path::PathBuf>> {
let mut paths: Vec<PathBuf> = std::fs::read_dir(path)?
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
let path = entry.path();
if path.is_file() {
Some(path)
} else {
None
}
})
.collect();
paths.sort_by(|a, b| {
let a_name = a.file_name().and_then(|s| s.to_str());
let b_name = b.file_name().and_then(|s| s.to_str());
match (a_name, b_name) {
(Some(a_str), Some(b_str)) => natord::compare(a_str, b_str),
_ => std::cmp::Ordering::Equal,
}
});
Ok(paths)
}
pub fn try_read<P: AsRef<Path>>(path: P) -> Result<DynamicImage> {
let mut path = path.as_ref().to_path_buf();
// try to fetch from hub or local cache
if !path.exists() {
let p = Hub::default().try_fetch(path.to_str().unwrap())?;
path = PathBuf::from(&p);
}
let img = Self::read_into_rgb8(path)?;
Ok(DynamicImage::from(img))
}
pub fn try_read_batch<P: AsRef<Path> + std::fmt::Debug>(
paths: &[P],
) -> Result<Vec<DynamicImage>> {
let images = paths
.iter()
.filter_map(|path| match Self::try_read(path) {
Ok(img) => Some(img),
Err(err) => {
warn!("Failed to read from: {:?}. Error: {:?}", path, err);
None
}
})
.collect();
Ok(images)
}
fn read_into_rgb8<P: AsRef<Path>>(path: P) -> Result<image::RgbImage> {
let path = path.as_ref();
let img = image::ImageReader::open(path)
.map_err(|err| {
anyhow!(
"Failed to open image at {:?}. Error: {:?}",
path.display(),
err
)
})?
.with_guessed_format()
.map_err(|err| {
anyhow!(
"Failed to make a format guess based on the content: {:?}. Error: {:?}",
path.display(),
err
)
})?
.decode()
.map_err(|err| {
anyhow!(
"Failed to decode image at {:?}. Error: {:?}",
path.display(),
err
)
})?
.into_rgb8();
Ok(img)
}
/// Convert images into a video
#[cfg(feature = "ffmpeg")]
pub fn is2v<P: AsRef<Path>>(source: P, subs: &[&str], fps: usize) -> Result<()> {
let paths = Self::load_from_folder(source.as_ref())?;
if paths.is_empty() {
anyhow::bail!("No images found.");
}
let mut encoder = None;
let mut position = Time::zero();
let saveout = crate::Dir::Current
.raw_path_with_subs(subs)?
.join(format!("{}.mp4", crate::string_now("-")));
let pb = build_progress_bar(
paths.len() as u64,
"Converting",
Some(&format!("{:?}", MediaType::Video(Location::Local))),
crate::PROGRESS_BAR_STYLE_CYAN_2,
)?;
// loop
for path in paths {
pb.inc(1);
let img = Self::read_into_rgb8(path)?;
let (w, h) = img.dimensions();
// build encoder at the 1st time
if encoder.is_none() {
let settings = Settings::preset_h264_yuv420p(w as _, h as _, false);
encoder = Some(Encoder::new(saveout.clone(), settings)?);
}
// write video
if let Some(encoder) = encoder.as_mut() {
let raw_data = img.into_raw();
let frame = ndarray::Array3::from_shape_vec((h as usize, w as usize, 3), raw_data)
.expect("Failed to create ndarray from raw image data");
// encode and update
encoder.encode(&frame, position)?;
position = position.aligned_with(Time::from_nth_of_a_second(fps)).add();
}
}
match &mut encoder {
Some(vencoder) => vencoder.finish()?,
None => anyhow::bail!("Found no video encoder."),
}
// update
pb.set_prefix("Converted");
pb.set_message(saveout.to_str().unwrap_or_default().to_string());
pb.set_style(indicatif::ProgressStyle::with_template(
crate::PROGRESS_BAR_STYLE_FINISH_4,
)?);
pb.finish();
Ok(())
}
}

View File

@ -1,123 +0,0 @@
/// Represents various directories on the system, including Home, Cache, Config, and more.
#[derive(Debug)]
pub enum Dir {
Home,
Cache,
Config,
Current,
Document,
Data,
Download,
Desktop,
Audio,
Picture,
}
impl Dir {
pub fn saveout(subs: &[&str]) -> anyhow::Result<std::path::PathBuf> {
Self::Current.raw_path_with_subs(subs)
}
/// Retrieves the base path for the specified directory type, optionally appending the `usls` subdirectory.
///
/// # Arguments
/// * `raw` - If `true`, returns the base path without adding the `usls` subdirectory.
///
/// # Returns
/// * `Result<PathBuf>` - The base path for the directory.
fn get_path(&self, raw: bool) -> anyhow::Result<std::path::PathBuf> {
let base_path = match self {
Dir::Home => dirs::home_dir(),
Dir::Cache => dirs::cache_dir(),
Dir::Config => dirs::config_dir(),
Dir::Current => std::env::current_dir().ok(),
_ => None,
};
let mut path = base_path.ok_or_else(|| {
anyhow::anyhow!("Unsupported operating system. Now supports Linux, MacOS, Windows.")
})?;
if !raw {
if let Dir::Home = self {
path.push(".usls");
} else {
path.push("usls");
}
}
Ok(path)
}
/// Returns the default path for the `usls` directory, creating it automatically if it does not exist.
///
/// Examples:
/// `~/.cache/usls`, `~/.config/usls`, `~/.usls`.
///
/// # Returns
/// * `Result<PathBuf>` - The default `usls` directory path.
pub fn path(&self) -> anyhow::Result<std::path::PathBuf> {
let d = self.get_path(false)?;
self.create_directory(&d)?;
Ok(d)
}
/// Returns the raw path for the directory without adding the `usls` subdirectory.
///
/// Examples:
/// `~/.cache`, `~/.config`, `~`.
///
/// # Returns
/// * `Result<PathBuf>` - The raw directory path.
pub fn raw_path(&self) -> anyhow::Result<std::path::PathBuf> {
self.get_path(true)
}
/// Constructs a path to the `usls` directory with the provided subdirectories, creating it automatically.
///
/// Examples:
/// `~/.cache/usls/sub1/sub2/sub3`, `~/.config/usls/sub1/sub2`, `~/.usls/sub1/sub2`.
///
/// # Arguments
/// * `subs` - A slice of strings representing subdirectories to append.
///
/// # Returns
/// * `Result<PathBuf>` - The resulting directory path.
pub fn path_with_subs(&self, subs: &[&str]) -> anyhow::Result<std::path::PathBuf> {
let mut d = self.get_path(false)?;
self.append_subs(&mut d, subs)?;
Ok(d)
}
/// Constructs a path to a specified directory with the provided subdirectories, creating it automatically.
///
/// Examples:
/// `~/.cache/sub1/sub2/sub3`, `~/.config/sub1/sub2`, `~/sub1/sub2`.
///
/// # Arguments
/// * `subs` - A slice of strings representing subdirectories to append.
///
/// # Returns
/// * `Result<PathBuf>` - The resulting directory path.
pub fn raw_path_with_subs(&self, subs: &[&str]) -> anyhow::Result<std::path::PathBuf> {
let mut d = self.get_path(true)?;
self.append_subs(&mut d, subs)?;
Ok(d)
}
/// Appends subdirectories to the given base path and creates the directories if they don't exist.
fn append_subs(&self, path: &mut std::path::PathBuf, subs: &[&str]) -> anyhow::Result<()> {
for sub in subs {
path.push(sub);
}
self.create_directory(path)?;
Ok(())
}
/// Creates the specified directory if it does not exist.
fn create_directory(&self, path: &std::path::PathBuf) -> anyhow::Result<()> {
if !path.exists() {
std::fs::create_dir_all(path)?;
}
Ok(())
}
}

View File

@ -1,64 +0,0 @@
use crate::{AUDIO_EXTENSIONS, IMAGE_EXTENSIONS, STREAM_PROTOCOLS, VIDEO_EXTENSIONS};
#[derive(Debug, Clone)]
pub enum Location {
Local,
Remote,
}
#[derive(Debug, Clone)]
pub enum StreamType {
Pre,
Live,
}
#[derive(Debug, Clone)]
pub enum MediaType {
Image(Location),
Video(Location),
Audio(Location),
Stream,
Unknown,
}
impl MediaType {
pub fn from_path<P: AsRef<std::path::Path>>(path: P) -> Self {
let extension = path
.as_ref()
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("")
.to_lowercase();
if IMAGE_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Image(Location::Local)
} else if VIDEO_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Video(Location::Local)
} else if AUDIO_EXTENSIONS.contains(&extension.as_str()) {
MediaType::Audio(Location::Local)
} else {
MediaType::Unknown
}
}
pub fn from_url(url: &str) -> Self {
if IMAGE_EXTENSIONS
.iter()
.any(|&ext| url.ends_with(&format!(".{}", ext)))
{
MediaType::Image(Location::Remote)
} else if VIDEO_EXTENSIONS
.iter()
.any(|&ext| url.ends_with(&format!(".{}", ext)))
{
MediaType::Video(Location::Remote)
} else if STREAM_PROTOCOLS
.iter()
.any(|&protocol| url.starts_with(protocol))
{
MediaType::Stream
} else {
MediaType::Unknown
}
}
}

View File

@ -1,59 +0,0 @@
mod annotator;
mod color;
mod colormap256;
mod dataloader;
mod device;
mod dir;
mod dtype;
mod dynconf;
mod engine;
mod hub;
mod iiix;
mod kind;
mod labels;
mod logits_sampler;
mod media;
mod min_opt_max;
pub(crate) mod onnx;
mod ops;
mod options;
mod processor;
mod retry;
mod scale;
mod task;
mod ts;
mod utils;
mod version;
#[cfg(feature = "ffmpeg")]
mod viewer;
pub use annotator::Annotator;
pub use color::Color;
pub use colormap256::*;
pub use dataloader::DataLoader;
pub use device::Device;
pub use dir::Dir;
pub use dtype::DType;
pub use dynconf::DynConf;
pub use engine::*;
pub use hub::Hub;
pub use iiix::Iiix;
pub use kind::Kind;
pub use labels::*;
pub use logits_sampler::LogitsSampler;
pub use media::*;
pub use min_opt_max::MinOptMax;
pub use ops::*;
pub use options::*;
pub use processor::*;
pub use scale::Scale;
pub use task::Task;
pub use ts::Ts;
pub use utils::*;
pub use version::Version;
#[cfg(feature = "ffmpeg")]
pub use viewer::Viewer;
// re-export
#[cfg(feature = "ffmpeg")]
pub use minifb::Key;

File diff suppressed because it is too large Load Diff

View File

@ -1,487 +0,0 @@
use anyhow::Result;
use fast_image_resize::{
images::{CroppedImageMut, Image},
pixels::PixelType,
FilterType, ResizeAlg, ResizeOptions, Resizer,
};
use image::{DynamicImage, GenericImageView};
use ndarray::{s, Array, Axis};
use rayon::prelude::*;
use std::sync::Mutex;
use tokenizers::{Encoding, Tokenizer};
use crate::{LogitsSampler, X};
#[derive(Debug, Clone)]
pub enum ResizeMode {
FitExact, // StretchToFit
FitWidth,
FitHeight,
FitAdaptive,
Letterbox,
}
#[derive(aksr::Builder, Debug, Clone)]
pub struct Processor {
pub image_width: u32, // target image width
pub image_height: u32, // target image height
pub image0s_size: Vec<(u32, u32)>, // original image height and width
pub scale_factors_hw: Vec<Vec<f32>>,
pub resize_mode: ResizeMode,
pub resize_filter: &'static str,
pub padding_value: u8,
pub do_normalize: bool,
pub image_mean: Vec<f32>,
pub image_std: Vec<f32>,
pub nchw: bool,
pub tokenizer: Option<Tokenizer>,
pub vocab: Vec<String>,
pub unsigned: bool,
pub logits_sampler: Option<LogitsSampler>,
}
impl Default for Processor {
fn default() -> Self {
Self {
image0s_size: vec![],
image_width: 0,
image_height: 0,
scale_factors_hw: vec![],
resize_mode: ResizeMode::FitAdaptive,
resize_filter: "Bilinear",
padding_value: 114,
do_normalize: true,
image_mean: vec![],
image_std: vec![],
nchw: true,
tokenizer: Default::default(),
vocab: vec![],
unsigned: false,
logits_sampler: None,
}
}
}
impl Processor {
pub fn reset_image0_status(&mut self) {
self.scale_factors_hw.clear();
self.image0s_size.clear();
}
pub fn process_images(&mut self, xs: &[DynamicImage]) -> Result<X> {
// self.reset_image0_status();
let (mut x, image0s_size, scale_factors_hw) = self.par_resize(xs)?;
self.image0s_size = image0s_size;
self.scale_factors_hw = scale_factors_hw;
if self.do_normalize {
x = x.normalize(0., 255.)?;
}
if !self.image_std.is_empty() && !self.image_mean.is_empty() {
x = x.standardize(&self.image_mean, &self.image_std, 3)?;
}
if self.nchw {
x = x.nhwc2nchw()?;
}
// Cope with padding problem
if self.unsigned {
x = x.unsigned();
}
Ok(x)
}
pub fn encode_text(&self, x: &str, skip_special_tokens: bool) -> Result<Encoding> {
self.tokenizer
.as_ref()
.expect("No tokenizer specified in `Processor`")
.encode(x, skip_special_tokens)
.map_err(|err| anyhow::anyhow!("Tokenizer encode error: {}", err))
}
pub fn encode_texts(&self, xs: &[&str], skip_special_tokens: bool) -> Result<Vec<Encoding>> {
self.tokenizer
.as_ref()
.expect("No tokenizer specified in `Processor`")
.encode_batch(xs.to_vec(), skip_special_tokens)
.map_err(|err| anyhow::anyhow!("Tokenizer encode_batch error: {}", err))
}
pub fn encode_text_ids(&self, x: &str, skip_special_tokens: bool) -> Result<Vec<f32>> {
let ids: Vec<f32> = if x.is_empty() {
vec![0.0f32]
} else {
self.encode_text(x, skip_special_tokens)?
.get_ids()
.iter()
.map(|x| *x as f32)
.collect()
};
Ok(ids)
}
pub fn encode_texts_ids(
&self,
xs: &[&str],
skip_special_tokens: bool,
) -> Result<Vec<Vec<f32>>> {
let ids: Vec<Vec<f32>> = if xs.is_empty() {
vec![vec![0.0f32]]
} else {
self.encode_texts(xs, skip_special_tokens)?
.into_iter()
.map(|encoding| encoding.get_ids().iter().map(|x| *x as f32).collect())
.collect()
};
Ok(ids)
}
pub fn encode_text_tokens(&self, x: &str, skip_special_tokens: bool) -> Result<Vec<String>> {
Ok(self
.encode_text(x, skip_special_tokens)?
.get_tokens()
.to_vec())
}
pub fn encode_texts_tokens(
&self,
xs: &[&str],
skip_special_tokens: bool,
) -> Result<Vec<Vec<String>>> {
Ok(self
.encode_texts(xs, skip_special_tokens)?
.into_iter()
.map(|encoding| encoding.get_tokens().to_vec())
.collect())
}
pub fn decode_tokens(&self, ids: &[u32], skip_special_tokens: bool) -> Result<String> {
self.tokenizer
.as_ref()
.expect("No tokenizer specified in `Processor`")
.decode(ids, skip_special_tokens)
.map_err(|err| anyhow::anyhow!("Tokenizer decode error: {}", err))
}
pub fn decode_tokens_batch2(
&self,
ids: &[&[u32]],
skip_special_tokens: bool,
) -> Result<Vec<String>> {
self.tokenizer
.as_ref()
.expect("No tokenizer specified in `Processor`")
.decode_batch(ids, skip_special_tokens)
.map_err(|err| anyhow::anyhow!("Tokenizer decode_batch error: {}", err))
}
pub fn decode_tokens_batch(
&self,
ids: &[Vec<u32>],
skip_special_tokens: bool,
) -> Result<Vec<String>> {
self.tokenizer
.as_ref()
.expect("No tokenizer specified in `Processor`")
.decode_batch(
&ids.iter().map(|x| x.as_slice()).collect::<Vec<_>>(),
skip_special_tokens,
)
.map_err(|err| anyhow::anyhow!("Tokenizer decode_batch error: {}", err))
}
pub fn par_generate(
&self,
logits: &X,
token_ids: &mut [Vec<u32>],
eos_token_id: u32,
) -> Result<(bool, Vec<f32>)> {
// token ids
// let mut token_ids: Vec<Vec<u32>> = vec![vec![]; self.encoder.batch()];
// let mut finished = vec![false; self.encoder.batch()];
let batch = token_ids.len();
let mut finished = vec![false; batch];
let mut last_tokens: Vec<f32> = vec![0.; batch];
// let mut logits_sampler = LogitsSampler::new();
// decode each token for each batch
for (i, logit) in logits.axis_iter(Axis(0)).enumerate() {
if !finished[i] {
let token_id = self
.logits_sampler
.as_ref()
.expect("No `LogitsSampler` specified!")
.decode(
&logit
.slice(s![-1, ..])
.into_owned()
.into_raw_vec_and_offset()
.0,
)?;
if token_id == eos_token_id {
finished[i] = true;
} else {
token_ids[i].push(token_id);
}
// update
last_tokens[i] = token_id as f32;
}
}
// all finished?
Ok((finished.iter().all(|&x| x), last_tokens))
}
pub fn build_resizer_filter(ty: &str) -> Result<(Resizer, ResizeOptions)> {
let ty = match ty.to_lowercase().as_str() {
"box" => FilterType::Box,
"bilinear" => FilterType::Bilinear,
"hamming" => FilterType::Hamming,
"catmullrom" => FilterType::CatmullRom,
"mitchell" => FilterType::Mitchell,
"gaussian" => FilterType::Gaussian,
"lanczos3" => FilterType::Lanczos3,
x => anyhow::bail!("Unsupported resizer's filter type: {}", x),
};
Ok((
Resizer::new(),
ResizeOptions::new().resize_alg(ResizeAlg::Convolution(ty)),
))
}
pub fn resize(&mut self, x: &DynamicImage) -> Result<X> {
if self.image_width + self.image_height == 0 {
anyhow::bail!(
"Invalid target height: {} or width: {}.",
self.image_height,
self.image_width
);
}
let buffer = match x.dimensions() {
(w, h) if (w, h) == (self.image_height, self.image_width) => {
self.image0s_size.push((h, w));
self.scale_factors_hw.push(vec![1., 1.]);
x.to_rgb8().into_raw()
}
(w0, h0) => {
self.image0s_size.push((h0, w0));
let (mut resizer, options) = Self::build_resizer_filter(self.resize_filter)?;
if let ResizeMode::FitExact = self.resize_mode {
let mut dst = Image::new(self.image_width, self.image_height, PixelType::U8x3);
resizer.resize(x, &mut dst, &options)?;
self.scale_factors_hw.push(vec![
(self.image_height as f32 / h0 as f32),
(self.image_width as f32 / w0 as f32),
]);
dst.into_vec()
} else {
let (w, h) = match self.resize_mode {
ResizeMode::Letterbox | ResizeMode::FitAdaptive => {
let r = (self.image_width as f32 / w0 as f32)
.min(self.image_height as f32 / h0 as f32);
self.scale_factors_hw.push(vec![r, r]);
(
(w0 as f32 * r).round() as u32,
(h0 as f32 * r).round() as u32,
)
}
ResizeMode::FitHeight => {
let r = self.image_height as f32 / h0 as f32;
self.scale_factors_hw.push(vec![1.0, r]);
((r * w0 as f32).round() as u32, self.image_height)
}
ResizeMode::FitWidth => {
// scale factor
let r = self.image_width as f32 / w0 as f32;
self.scale_factors_hw.push(vec![r, 1.0]);
(self.image_width, (r * h0 as f32).round() as u32)
}
_ => unreachable!(),
};
let mut dst = Image::from_vec_u8(
self.image_width,
self.image_height,
vec![
self.padding_value;
3 * self.image_height as usize * self.image_width as usize
],
PixelType::U8x3,
)?;
let (l, t) = if let ResizeMode::Letterbox = self.resize_mode {
if w == self.image_width {
(0, (self.image_height - h) / 2)
} else {
((self.image_width - w) / 2, 0)
}
} else {
(0, 0)
};
let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
resizer.resize(x, &mut dst_cropped, &options)?;
dst.into_vec()
}
}
};
let y = Array::from_shape_vec(
(self.image_height as usize, self.image_width as usize, 3),
buffer,
)?
.mapv(|x| x as f32)
.into_dyn();
Ok(y.into())
}
#[allow(clippy::type_complexity)]
pub fn resize2(&self, x: &DynamicImage) -> Result<(X, (u32, u32), Vec<f32>)> {
if self.image_width + self.image_height == 0 {
anyhow::bail!(
"Invalid target height: {} or width: {}.",
self.image_height,
self.image_width
);
}
let image0s_size: (u32, u32); // original image height and width
let scale_factors_hw: Vec<f32>;
let buffer = match x.dimensions() {
(w, h) if (w, h) == (self.image_height, self.image_width) => {
image0s_size = (h, w);
scale_factors_hw = vec![1., 1.];
x.to_rgb8().into_raw()
}
(w0, h0) => {
image0s_size = (h0, w0);
let (mut resizer, options) = Self::build_resizer_filter(self.resize_filter)?;
if let ResizeMode::FitExact = self.resize_mode {
let mut dst = Image::new(self.image_width, self.image_height, PixelType::U8x3);
resizer.resize(x, &mut dst, &options)?;
scale_factors_hw = vec![
(self.image_height as f32 / h0 as f32),
(self.image_width as f32 / w0 as f32),
];
dst.into_vec()
} else {
let (w, h) = match self.resize_mode {
ResizeMode::Letterbox | ResizeMode::FitAdaptive => {
let r = (self.image_width as f32 / w0 as f32)
.min(self.image_height as f32 / h0 as f32);
scale_factors_hw = vec![r, r];
(
(w0 as f32 * r).round() as u32,
(h0 as f32 * r).round() as u32,
)
}
ResizeMode::FitHeight => {
let r = self.image_height as f32 / h0 as f32;
scale_factors_hw = vec![1.0, r];
((r * w0 as f32).round() as u32, self.image_height)
}
ResizeMode::FitWidth => {
// scale factor
let r = self.image_width as f32 / w0 as f32;
scale_factors_hw = vec![r, 1.0];
(self.image_width, (r * h0 as f32).round() as u32)
}
_ => unreachable!(),
};
let mut dst = Image::from_vec_u8(
self.image_width,
self.image_height,
vec![
self.padding_value;
3 * self.image_height as usize * self.image_width as usize
],
PixelType::U8x3,
)?;
let (l, t) = if let ResizeMode::Letterbox = self.resize_mode {
if w == self.image_width {
(0, (self.image_height - h) / 2)
} else {
((self.image_width - w) / 2, 0)
}
} else {
(0, 0)
};
let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
resizer.resize(x, &mut dst_cropped, &options)?;
dst.into_vec()
}
}
};
let y = Array::from_shape_vec(
(self.image_height as usize, self.image_width as usize, 3),
buffer,
)?
.mapv(|x| x as f32)
.into_dyn();
Ok((y.into(), image0s_size, scale_factors_hw))
}
#[allow(clippy::type_complexity)]
pub fn par_resize(&self, xs: &[DynamicImage]) -> Result<(X, Vec<(u32, u32)>, Vec<Vec<f32>>)> {
match xs.len() {
0 => anyhow::bail!("Found no input images."),
1 => {
let (y, image0_size, scale_factors) = self.resize2(&xs[0])?;
Ok((y.insert_axis(0)?, vec![image0_size], vec![scale_factors]))
}
_ => {
let ys = Mutex::new(
Array::zeros((
xs.len(),
self.image_height as usize,
self.image_width as usize,
3,
))
.into_dyn(),
);
let results: Result<Vec<((u32, u32), Vec<f32>)>> = xs
.par_iter()
.enumerate()
.map(|(idx, x)| {
let (y, image0_size, scale_factors) = self.resize2(x)?;
{
let mut ys_guard = ys
.lock()
.map_err(|e| anyhow::anyhow!("Mutex lock error: {e}"))?;
ys_guard.slice_mut(s![idx, .., .., ..]).assign(&y);
}
Ok((image0_size, scale_factors))
})
.collect();
let (image0s_size, scale_factors_hw): (Vec<_>, Vec<_>) =
results?.into_iter().unzip();
let ys_inner = ys
.into_inner()
.map_err(|e| anyhow::anyhow!("Mutex into_inner error: {e}"))?;
Ok((ys_inner.into(), image0s_size, scale_factors_hw))
}
}
}
}

View File

@ -1,99 +0,0 @@
#![allow(dead_code)]
use indicatif::{ProgressBar, ProgressStyle};
use rand::{distributions::Alphanumeric, thread_rng, Rng};
pub(crate) const PREFIX_LENGTH: usize = 12;
pub(crate) const NETWORK_PREFIXES: &[&str] = &[
"http://", "https://", "ftp://", "ftps://", "sftp://", "rtsp://", "mms://", "mmsh://",
"rtmp://", "rtmps://", "file://",
];
pub(crate) const IMAGE_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp"];
pub(crate) const VIDEO_EXTENSIONS: &[&str] = &[
"mp4", "avi", "mkv", "mov", "wmv", "flv", "webm", "mpeg", "mpg", "m4v", "m4p",
];
pub(crate) const AUDIO_EXTENSIONS: &[&str] = &["mp3", "wav", "flac", "aac", "ogg", "wma"];
pub(crate) const STREAM_PROTOCOLS: &[&str] = &[
"rtsp://", "rtsps://", "rtspu://", "rtmp://", "rtmps://", "hls://", "http://", "https://",
];
pub(crate) const PROGRESS_BAR_STYLE_CYAN: &str =
"{prefix:>12.cyan.bold} {msg} {human_pos}/{human_len} |{bar}| {elapsed_precise}";
pub(crate) const PROGRESS_BAR_STYLE_GREEN: &str =
"{prefix:>12.green.bold} {msg} {human_pos}/{human_len} |{bar}| {elapsed_precise}";
pub(crate) const PROGRESS_BAR_STYLE_CYAN_2: &str =
"{prefix:>12.cyan.bold} {human_pos}/{human_len} |{bar}| {msg}";
pub(crate) const PROGRESS_BAR_STYLE_CYAN_3: &str =
"{prefix:>12.cyan.bold} |{bar}| {human_pos}/{human_len} {msg}";
pub(crate) const PROGRESS_BAR_STYLE_GREEN_2: &str =
"{prefix:>12.green.bold} {human_pos}/{human_len} |{bar}| {elapsed_precise}";
pub(crate) const PROGRESS_BAR_STYLE_FINISH: &str =
"{prefix:>12.green.bold} {msg} for {human_len} iterations in {elapsed}";
pub(crate) const PROGRESS_BAR_STYLE_FINISH_2: &str =
"{prefix:>12.green.bold} {msg} x{human_len} in {elapsed}";
pub(crate) const PROGRESS_BAR_STYLE_FINISH_3: &str =
"{prefix:>12.green.bold} {msg} ({binary_total_bytes}) in {elapsed}";
pub(crate) const PROGRESS_BAR_STYLE_FINISH_4: &str = "{prefix:>12.green.bold} {msg} in {elapsed}";
pub(crate) fn try_fetch_stem<P: AsRef<std::path::Path>>(p: P) -> anyhow::Result<String> {
let p = p.as_ref();
let stem = p
.file_stem()
.ok_or(anyhow::anyhow!(
"Failed to get the `file_stem` of `model_file`: {:?}",
p
))?
.to_str()
.ok_or(anyhow::anyhow!("Failed to convert from `&OsStr` to `&str`"))?;
Ok(stem.to_string())
}
pub fn human_bytes(size: f64, use_binary: bool) -> String {
let units = if use_binary {
["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"]
} else {
["B", "KB", "MB", "GB", "TB", "PB", "EB"]
};
let mut size = size;
let mut unit_index = 0;
let k = if use_binary { 1024. } else { 1000. };
while size >= k && unit_index < units.len() - 1 {
size /= k;
unit_index += 1;
}
format!("{:.2} {}", size, units[unit_index])
}
pub(crate) fn string_random(n: usize) -> String {
thread_rng()
.sample_iter(&Alphanumeric)
.take(n)
.map(char::from)
.collect()
}
pub(crate) fn string_now(delimiter: &str) -> String {
let t_now = chrono::Local::now();
let fmt = format!(
"%Y{}%m{}%d{}%H{}%M{}%S{}%f",
delimiter, delimiter, delimiter, delimiter, delimiter, delimiter
);
t_now.format(&fmt).to_string()
}
pub(crate) fn build_progress_bar(
n: u64,
prefix: &str,
msg: Option<&str>,
style_temp: &str,
) -> anyhow::Result<ProgressBar> {
let pb = ProgressBar::new(n);
pb.set_style(ProgressStyle::with_template(style_temp)?.progress_chars("██ "));
pb.set_prefix(format!("{:>PREFIX_LENGTH$}", prefix));
pb.set_message(msg.unwrap_or_default().to_string());
Ok(pb)
}

View File

@ -1,43 +0,0 @@
#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash, Default)]
pub struct Version(pub u8, pub u8);
impl std::fmt::Display for Version {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let x = if self.1 == 0 {
format!("v{}", self.0)
} else {
format!("v{}.{}", self.0, self.1)
};
write!(f, "{}", x)
}
}
impl From<(u8, u8)> for Version {
fn from((x, y): (u8, u8)) -> Self {
Self(x, y)
}
}
impl From<f32> for Version {
fn from(x: f32) -> Self {
let x = format!("{:?}", x);
let x: Vec<u8> = x
.as_str()
.split('.')
.map(|x| x.parse::<u8>().unwrap_or(0))
.collect();
Self(x[0], x[1])
}
}
impl From<u8> for Version {
fn from(x: u8) -> Self {
Self(x, 0)
}
}
impl Version {
pub fn new(x: u8, y: u8) -> Self {
Self(x, y)
}
}

View File

@ -1,195 +0,0 @@
use anyhow::Result;
use image::DynamicImage;
use log::info;
use minifb::{Window, WindowOptions};
use video_rs::{
encode::{Encoder, Settings},
time::Time,
};
pub struct Viewer<'a> {
name: &'a str,
window: Option<Window>,
window_scale: f32,
window_resizable: bool,
fps_poll: usize,
fps: usize,
writer: Option<Encoder>,
position: Time,
}
impl Default for Viewer<'_> {
fn default() -> Self {
Self {
name: "usls-viewer",
window: None,
window_scale: 0.5,
window_resizable: true,
fps_poll: 100,
fps: 25,
writer: None,
position: Time::zero(),
}
}
}
impl Viewer<'_> {
pub fn new() -> Self {
Default::default()
}
pub fn imshow(&mut self, xs: &[DynamicImage]) -> Result<()> {
for x in xs.iter() {
let rgb = x.to_rgb8();
let (w, h) = (rgb.width() as usize, rgb.height() as usize);
let (w_scale, h_scale) = (
(w as f32 * self.window_scale) as usize,
(h as f32 * self.window_scale) as usize,
);
// should reload?
let should_reload = match &self.window {
None => true,
Some(window) => {
if self.window_resizable {
false
} else {
window.get_size() != (w_scale, h_scale)
}
}
};
// create window
if should_reload {
self.window = Window::new(
self.name,
w_scale,
h_scale,
WindowOptions {
resize: true,
topmost: true,
borderless: false,
scale: minifb::Scale::X1,
..WindowOptions::default()
},
)
.ok()
.map(|mut x| {
x.set_target_fps(self.fps_poll);
x
});
}
// build buffer
let mut buffer: Vec<u32> = Vec::with_capacity(w * h);
for pixel in rgb.pixels() {
let r = pixel[0];
let g = pixel[1];
let b = pixel[2];
let p = Self::rgb8_to_u32(r, g, b);
buffer.push(p);
}
// update buffer
self.window
.as_mut()
.unwrap()
.update_with_buffer(&buffer, w, h)?;
}
Ok(())
}
pub fn write(&mut self, frame: &image::DynamicImage) -> Result<()> {
// build writer at the 1st time
let frame = frame.to_rgb8();
let (w, h) = frame.dimensions();
if self.writer.is_none() {
let settings = Settings::preset_h264_yuv420p(w as _, h as _, false);
let saveout =
crate::Dir::saveout(&["runs"])?.join(format!("{}.mp4", crate::string_now("-")));
info!("Video will be save to: {:?}", saveout);
self.writer = Some(Encoder::new(saveout, settings)?);
}
// write video
if let Some(writer) = self.writer.as_mut() {
let raw_data = frame.to_vec();
let frame = ndarray::Array3::from_shape_vec((h as usize, w as usize, 3), raw_data)?;
// encode and update
writer.encode(&frame, self.position)?;
self.position = self
.position
.aligned_with(Time::from_nth_of_a_second(self.fps))
.add();
}
Ok(())
}
pub fn write_batch(&mut self, frames: &[image::DynamicImage]) -> Result<()> {
for frame in frames.iter() {
self.write(frame)?
}
Ok(())
}
pub fn finish_write(&mut self) -> Result<()> {
match &mut self.writer {
Some(writer) => writer.finish()?,
None => {
info!("Found no video writer. No need to release.");
}
}
Ok(())
}
pub fn is_open(&self) -> bool {
if let Some(window) = &self.window {
window.is_open()
} else {
false
}
}
pub fn is_key_pressed(&self, key: crate::Key) -> bool {
if let Some(window) = &self.window {
window.is_key_down(key)
} else {
false
}
}
pub fn is_esc_pressed(&self) -> bool {
self.is_key_pressed(crate::Key::Escape)
}
pub fn resizable(mut self, x: bool) -> Self {
self.window_resizable = x;
self
}
pub fn with_scale(mut self, x: f32) -> Self {
self.window_scale = x;
self
}
pub fn with_fps(mut self, x: usize) -> Self {
self.fps = x;
self
}
pub fn with_delay(mut self, x: usize) -> Self {
self.fps_poll = 1000 / x;
self
}
pub fn wh(&self) -> Option<(usize, usize)> {
self.window.as_ref().map(|x| x.get_size())
}
fn rgb8_to_u32(r: u8, g: u8, b: u8) -> u32 {
let (r, g, b) = (r as u32, g as u32, b as u32);
(r << 16) | (g << 8) | b
}
}

View File

@ -1,4 +1,4 @@
use crate::IMAGENET_NAMES_1K; use crate::NAMES_IMAGENET_1K;
/// Model configuration for `BEiT` /// Model configuration for `BEiT`
impl crate::Options { impl crate::Options {
@ -13,7 +13,7 @@ impl crate::Options {
.with_image_std(&[0.5, 0.5, 0.5]) .with_image_std(&[0.5, 0.5, 0.5])
.with_normalize(true) .with_normalize(true)
.with_apply_softmax(true) .with_apply_softmax(true)
.with_class_names(&IMAGENET_NAMES_1K) .with_class_names(&NAMES_IMAGENET_1K)
} }
pub fn beit_base() -> Self { pub fn beit_base() -> Self {

View File

@ -10,9 +10,3 @@ The official repository can be found on: [GitHub](https://github.com/salesforce/
- [ ] Visual Question Answering (VQA) - [ ] Visual Question Answering (VQA)
- [ ] Image-Text Retrieval - [ ] Image-Text Retrieval
- [ ] TensorRT Support for Textual Model - [ ] TensorRT Support for Textual Model
## Example
Refer to the [example](../../../examples/blip)

View File

@ -22,13 +22,13 @@ impl crate::Options {
pub fn blip_v1_base_caption_visual() -> Self { pub fn blip_v1_base_caption_visual() -> Self {
Self::blip_visual() Self::blip_visual()
.with_model_version(1.0.into()) .with_model_version(1.into())
.with_model_file("v1-base-caption-visual.onnx") .with_model_file("v1-base-caption-visual.onnx")
} }
pub fn blip_v1_base_caption_textual() -> Self { pub fn blip_v1_base_caption_textual() -> Self {
Self::blip_textual() Self::blip_textual()
.with_model_version(1.0.into()) .with_model_version(1.into())
.with_model_file("v1-base-caption-textual.onnx") .with_model_file("v1-base-caption-textual.onnx")
} }
} }

View File

@ -1,12 +1,11 @@
use aksr::Builder; use aksr::Builder;
use anyhow::Result; use anyhow::Result;
use image::DynamicImage;
use ndarray::{s, Axis}; use ndarray::{s, Axis};
use crate::{ use crate::{
elapsed, elapsed,
models::{BaseModelTextual, BaseModelVisual}, models::{BaseModelTextual, BaseModelVisual},
LogitsSampler, Options, Ts, Xs, Ys, X, Y, Image, LogitsSampler, Options, Ts, Xs, X, Y,
}; };
#[derive(Debug, Builder)] #[derive(Debug, Builder)]
@ -35,7 +34,7 @@ impl Blip {
}) })
} }
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<X> { pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
self.visual.encode(xs) self.visual.encode(xs)
} }
@ -47,14 +46,14 @@ impl Blip {
Ok(vec![input_ids.clone(); self.batch()]) Ok(vec![input_ids.clone(); self.batch()])
} }
pub fn forward(&mut self, images: &[DynamicImage], text: Option<&str>) -> Result<Ys> { pub fn forward(&mut self, images: &[Image], text: Option<&str>) -> Result<Vec<Y>> {
let image_embeds = elapsed!("encode_images", self.ts, { self.encode_images(images)? }); let image_embeds = elapsed!("encode_images", self.ts, { self.encode_images(images)? });
let ys = elapsed!("generate", self.ts, { self.generate(&image_embeds, text)? }); let ys = elapsed!("generate", self.ts, { self.generate(&image_embeds, text)? });
Ok(ys) Ok(ys)
} }
pub fn generate(&mut self, image_embeds: &X, text: Option<&str>) -> Result<Ys> { pub fn generate(&mut self, image_embeds: &X, text: Option<&str>) -> Result<Vec<Y>> {
// encode texts // encode texts
let mut token_ids = self.encode_texts(text)?; let mut token_ids = self.encode_texts(text)?;
@ -113,9 +112,9 @@ impl Blip {
let ys = texts let ys = texts
.into_iter() .into_iter()
.map(|x| Y::default().with_texts(&[x.into()])) .map(|x| Y::default().with_texts(&[&x]))
.collect::<Vec<_>>() .collect::<Vec<_>>();
.into(); // .into();
Ok(ys) Ok(ys)
} }

View File

@ -1,9 +1,8 @@
use aksr::Builder; use aksr::Builder;
use anyhow::Result; use anyhow::Result;
use image::DynamicImage;
use ndarray::Array2; use ndarray::Array2;
use crate::{elapsed, Engine, Options, Processor, Ts, Xs, X}; use crate::{elapsed, Engine, Image, Options, Processor, Ts, Xs, X};
#[derive(Debug, Builder)] #[derive(Debug, Builder)]
pub struct ClipVisual { pub struct ClipVisual {
@ -39,7 +38,7 @@ impl ClipVisual {
}) })
} }
pub fn preprocess(&mut self, xs: &[DynamicImage]) -> Result<Xs> { pub fn preprocess(&mut self, xs: &[Image]) -> Result<Xs> {
let x = self.processor.process_images(xs)?; let x = self.processor.process_images(xs)?;
Ok(x.into()) Ok(x.into())
@ -49,7 +48,7 @@ impl ClipVisual {
self.engine.run(xs) self.engine.run(xs)
} }
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<X> { pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
let xs = elapsed!("visual-preprocess", self.ts, { self.preprocess(xs)? }); let xs = elapsed!("visual-preprocess", self.ts, { self.preprocess(xs)? });
let xs = elapsed!("visual-inference", self.ts, { self.inference(xs)? }); let xs = elapsed!("visual-inference", self.ts, { self.inference(xs)? });
let x = elapsed!("visual-postprocess", self.ts, { xs[0].to_owned() }); let x = elapsed!("visual-postprocess", self.ts, { xs[0].to_owned() });
@ -129,7 +128,7 @@ impl Clip {
}) })
} }
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<X> { pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
let x = elapsed!("encode_images", self.ts, { self.visual.encode_images(xs)? }); let x = elapsed!("encode_images", self.ts, { self.visual.encode_images(xs)? });
Ok(x) Ok(x)
} }

View File

@ -1,4 +1,4 @@
use crate::IMAGENET_NAMES_1K; use crate::NAMES_IMAGENET_1K;
/// Model configuration for `ConvNeXt` /// Model configuration for `ConvNeXt`
impl crate::Options { impl crate::Options {
@ -13,7 +13,7 @@ impl crate::Options {
.with_image_std(&[0.229, 0.224, 0.225]) .with_image_std(&[0.229, 0.224, 0.225])
.with_normalize(true) .with_normalize(true)
.with_apply_softmax(true) .with_apply_softmax(true)
.with_class_names(&IMAGENET_NAMES_1K) .with_class_names(&NAMES_IMAGENET_1K)
} }
pub fn convnext_v1_tiny() -> Self { pub fn convnext_v1_tiny() -> Self {

Some files were not shown because too many files have changed in this diff Show More