Add Depth-Anything model (#10)

* Add Depth-Anything model
This commit is contained in:
Jamjamjon
2024-04-25 00:41:29 +08:00
committed by GitHub
parent beda8ef803
commit e614ca4136
11 changed files with 476 additions and 24 deletions

View File

@ -1,9 +1,15 @@
# usls
A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) and others.
A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) , [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) and others.
## Recently Updated
| Depth-Anything |
| :----------------------------: |
|<img src='examples/depth-anything/demo.png' width="800px">|
| YOLOP-v2 | Face-Parsing | Text-Detection |
| :----------------------------: | :------------------------------: | :------------------------------: |
|<img src='examples/yolop/demo.png' height="240px">| <img src='examples/face-parsing/demo.png' height="240px"> | <img src='examples/db/demo.png' height="240px"> |
@ -41,6 +47,7 @@ A Rust library integrated with **ONNXRuntime**, providing a collection of **Comp
| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ |
| [YOLOv5-classification](https://github.com/ultralytics/yolov5) | Object Detection | [demo](examples/yolov5) | ✅ | ✅ | ✅ | ✅ |
| [YOLOv5-segmentation](https://github.com/ultralytics/yolov5) | Instance Segmentation | [demo](examples/yolov5) | ✅ | ✅ | ✅ | ✅ |
| [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) | Instance Segmentation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ |
## Solution Models

View File

@ -0,0 +1,16 @@
## Quick Start
```shell
cargo run -r --example depth-anything
```
## ONNX Model
- [depth-anything-s-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-s-dyn.onnx)
- [depth-anything-b-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-b-dyn.onnx)
- [depth-anything-l-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-l-dyn.onnx)
## Results
![](./demo.png)

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

View File

@ -0,0 +1,23 @@
use usls::{models::DepthAnything, Annotator, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options = Options::default()
.with_model("../models/depth-anything-s-dyn.onnx")
.with_i00((1, 1, 8).into())
.with_i02((384, 512, 1024).into())
.with_i03((384, 512, 1024).into());
let model = DepthAnything::new(&options)?;
// load
let x = vec![DataLoader::try_read("./assets/2.jpg")?];
// run
let y = model.run(&x)?;
// annotate
let annotator = Annotator::default().with_saveout("Depth-Anything");
annotator.annotate(&x, &y);
Ok(())
}

View File

@ -6,31 +6,16 @@ This demo showcases how to use `DINOv2` to compute image similarity, applicable
cargo run -r --example dinov2
```
## Or you can manully
## Donwload DINOv2 ONNX Model
### 1.Donwload DINOv2 ONNX Model
- [dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx)
- [dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx)
- [dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx)
[dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx)
[dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx)
[dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx)
[dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx)
[dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx)
- [dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx)
- [dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx)
### 2. Specify the ONNX model path in `main.rs`
```Rust
let options = Options::default()
.with_model("ONNX_PATH") // <= modify this
.with_profile(false);
```
### 3. Then, run
```bash
cargo run -r --example dinov2
```
## Results

View File

@ -1,7 +1,10 @@
use crate::{auto_load, string_now, Bbox, Keypoint, Mask, Mbr, Prob, CHECK_MARK, CROSS_MARK, Y};
use crate::{
auto_load, string_now, Bbox, Keypoint, Mask, Mbr, Prob, CHECK_MARK, CROSS_MARK, TURBO, Y,
};
use ab_glyph::{FontVec, PxScale};
use anyhow::Result;
use image::{DynamicImage, Rgba, RgbaImage};
use image::{DynamicImage, ImageBuffer, Rgba, RgbaImage};
use imageproc::map::map_colors;
/// Annotator for struct `Y`
#[derive(Debug)]
@ -265,6 +268,13 @@ impl Annotator {
for (img, y) in imgs.iter().zip(ys.iter()) {
let mut img_rgb = img.to_rgba8();
// pixels
if !self.without_masks {
if let Some(xs) = &y.pixels() {
self.plot_pixels(&mut img_rgb, xs)
}
}
// masks
if !self.without_masks {
if let Some(xs) = &y.masks() {
@ -377,6 +387,60 @@ impl Annotator {
}
}
pub fn plot_pixels(&self, img: &mut RgbaImage, pixels: &[u8]) {
let (w, h) = img.dimensions();
let luma: ImageBuffer<image::Luma<_>, Vec<u8>> =
ImageBuffer::from_raw(w, h, pixels.to_vec())
.expect("Faild to create luma from ndarray");
let luma = map_colors(&luma, |p| {
let x = p[0];
image::Rgb(TURBO[x as usize])
});
let luma = image::DynamicImage::from(luma);
let luma = luma.resize_exact(w / 2, h / 2, image::imageops::FilterType::CatmullRom);
let im_ori = img.clone();
let im_ori = image::DynamicImage::from(im_ori);
let im_ori = im_ori.resize_exact(w / 2, h / 2, image::imageops::FilterType::CatmullRom);
// overwrite
for x in 0..w {
for y in 0..h {
img.put_pixel(x, y, Rgba([255, 255, 255, 255]));
}
}
// paste
let pos_x = 0;
let pos_y = (2 * (h - im_ori.height()) / 3) as i64;
image::imageops::overlay(img, &im_ori, pos_x, pos_y);
image::imageops::overlay(img, &luma, im_ori.width().into(), pos_y);
// text
let legend = "Raw";
let scale = PxScale::from(self.scale_dy * 2.5);
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, legend);
imageproc::drawing::draw_text_mut(
img,
Rgba([0, 0, 0, 255]),
((im_ori.width() - text_w) / 2) as i32,
((pos_y as u32 - text_h) / 2) as i32,
scale,
&self.font,
legend,
);
let legend = "Depth";
let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, legend);
imageproc::drawing::draw_text_mut(
img,
Rgba([0, 0, 0, 255]),
(im_ori.width() + (im_ori.width() - text_w) / 2) as i32,
((pos_y as u32 - text_h) / 2) as i32,
scale,
&self.font,
legend,
);
}
pub fn plot_masks_and_polygons(&self, img: &mut RgbaImage, masks: &[Mask]) {
let mut convas = img.clone();
for mask in masks.iter() {

View File

@ -0,0 +1,78 @@
use crate::{ops, MinOptMax, Options, OrtEngine, Y};
use anyhow::Result;
use image::{DynamicImage, ImageBuffer};
use ndarray::{Array, Axis, IxDyn};
#[derive(Debug)]
pub struct DepthAnything {
engine: OrtEngine,
height: MinOptMax,
width: MinOptMax,
batch: MinOptMax,
}
impl DepthAnything {
pub fn new(options: &Options) -> Result<Self> {
let engine = OrtEngine::new(options)?;
let (batch, height, width) = (
engine.batch().to_owned(),
engine.height().to_owned(),
engine.width().to_owned(),
);
engine.dry_run()?;
Ok(Self {
engine,
height,
width,
batch,
})
}
pub fn run(&self, xs: &[DynamicImage]) -> Result<Vec<Y>> {
let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]);
let ys = self.engine.run(&[xs_])?;
self.postprocess(ys, xs)
}
pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Y>> {
let mut ys: Vec<Y> = Vec::new();
for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() {
let luma = luma
.into_shape((self.height() as usize, self.width() as usize, 1))?
.into_owned();
let v = luma.into_raw_vec();
let max_ = v.iter().max_by(|x, y| x.total_cmp(y)).unwrap();
let min_ = v.iter().min_by(|x, y| x.total_cmp(y)).unwrap();
let v = v
.iter()
.map(|x| (((*x - min_) / (max_ - min_)) * 255.).min(255.).max(0.) as u8)
.collect::<Vec<_>>();
let luma: ImageBuffer<image::Luma<_>, Vec<u8>> =
ImageBuffer::from_raw(self.width() as u32, self.height() as u32, v)
.expect("Faild to create image from ndarray");
let luma = image::DynamicImage::from(luma);
let luma = luma.resize_exact(
xs0[idx].width(),
xs0[idx].height(),
image::imageops::FilterType::CatmullRom,
);
ys.push(Y::default().with_pixels(&luma.into_luma8().into_raw()));
}
Ok(ys)
}
pub fn batch(&self) -> isize {
self.batch.opt
}
pub fn width(&self) -> isize {
self.width.opt
}
pub fn height(&self) -> isize {
self.height.opt
}
}

View File

@ -1,6 +1,7 @@
mod blip;
mod clip;
mod db;
mod depth_anything;
mod dinov2;
mod rtdetr;
mod rtmo;
@ -11,6 +12,7 @@ mod yolop;
pub use blip::Blip;
pub use clip::Clip;
pub use db::DB;
pub use depth_anything::DepthAnything;
pub use dinov2::Dinov2;
pub use rtdetr::RTDETR;
pub use rtmo::RTMO;

View File

@ -4,6 +4,10 @@ use std::io::{Read, Write};
use std::path::{Path, PathBuf};
pub mod coco;
mod turbo;
pub use turbo::TURBO;
pub const GITHUB_ASSETS: &str = "https://github.com/jamjamjon/assets/releases/download/v0.0.1";
pub const CHECK_MARK: &str = "";
pub const CROSS_MARK: &str = "";

258
src/utils/turbo.rs Normal file
View File

@ -0,0 +1,258 @@
pub const TURBO: [[u8; 3]; 256] = [
[48, 18, 59],
[50, 21, 67],
[51, 24, 74],
[52, 27, 81],
[53, 30, 88],
[54, 33, 95],
[55, 36, 102],
[56, 39, 109],
[57, 42, 115],
[58, 45, 121],
[59, 47, 128],
[60, 50, 134],
[61, 53, 139],
[62, 56, 145],
[63, 59, 151],
[63, 62, 156],
[64, 64, 162],
[65, 67, 167],
[65, 70, 172],
[66, 73, 177],
[66, 75, 181],
[67, 78, 186],
[68, 81, 191],
[68, 84, 195],
[68, 86, 199],
[69, 89, 203],
[69, 92, 207],
[69, 94, 211],
[70, 97, 214],
[70, 100, 218],
[70, 102, 221],
[70, 105, 224],
[70, 107, 227],
[71, 110, 230],
[71, 113, 233],
[71, 115, 235],
[71, 118, 238],
[71, 120, 240],
[71, 123, 242],
[70, 125, 244],
[70, 128, 246],
[70, 130, 248],
[70, 133, 250],
[70, 135, 251],
[69, 138, 252],
[69, 140, 253],
[68, 143, 254],
[67, 145, 254],
[66, 148, 255],
[65, 150, 255],
[64, 153, 255],
[62, 155, 254],
[61, 158, 254],
[59, 160, 253],
[58, 163, 252],
[56, 165, 251],
[55, 168, 250],
[53, 171, 248],
[51, 173, 247],
[49, 175, 245],
[47, 178, 244],
[46, 180, 242],
[44, 183, 240],
[42, 185, 238],
[40, 188, 235],
[39, 190, 233],
[37, 192, 231],
[35, 195, 228],
[34, 197, 226],
[32, 199, 223],
[31, 201, 221],
[30, 203, 218],
[28, 205, 216],
[27, 208, 213],
[26, 210, 210],
[26, 212, 208],
[25, 213, 205],
[24, 215, 202],
[24, 217, 200],
[24, 219, 197],
[24, 221, 194],
[24, 222, 192],
[24, 224, 189],
[25, 226, 187],
[25, 227, 185],
[26, 228, 182],
[28, 230, 180],
[29, 231, 178],
[31, 233, 175],
[32, 234, 172],
[34, 235, 170],
[37, 236, 167],
[39, 238, 164],
[42, 239, 161],
[44, 240, 158],
[47, 241, 155],
[50, 242, 152],
[53, 243, 148],
[56, 244, 145],
[60, 245, 142],
[63, 246, 138],
[67, 247, 135],
[70, 248, 132],
[74, 248, 128],
[78, 249, 125],
[82, 250, 122],
[85, 250, 118],
[89, 251, 115],
[93, 252, 111],
[97, 252, 108],
[101, 253, 105],
[105, 253, 102],
[109, 254, 98],
[113, 254, 95],
[117, 254, 92],
[121, 254, 89],
[125, 255, 86],
[128, 255, 83],
[132, 255, 81],
[136, 255, 78],
[139, 255, 75],
[143, 255, 73],
[146, 255, 71],
[150, 254, 68],
[153, 254, 66],
[156, 254, 64],
[159, 253, 63],
[161, 253, 61],
[164, 252, 60],
[167, 252, 58],
[169, 251, 57],
[172, 251, 56],
[175, 250, 55],
[177, 249, 54],
[180, 248, 54],
[183, 247, 53],
[185, 246, 53],
[188, 245, 52],
[190, 244, 52],
[193, 243, 52],
[195, 241, 52],
[198, 240, 52],
[200, 239, 52],
[203, 237, 52],
[205, 236, 52],
[208, 234, 52],
[210, 233, 53],
[212, 231, 53],
[215, 229, 53],
[217, 228, 54],
[219, 226, 54],
[221, 224, 55],
[223, 223, 55],
[225, 221, 55],
[227, 219, 56],
[229, 217, 56],
[231, 215, 57],
[233, 213, 57],
[235, 211, 57],
[236, 209, 58],
[238, 207, 58],
[239, 205, 58],
[241, 203, 58],
[242, 201, 58],
[244, 199, 58],
[245, 197, 58],
[246, 195, 58],
[247, 193, 58],
[248, 190, 57],
[249, 188, 57],
[250, 186, 57],
[251, 184, 56],
[251, 182, 55],
[252, 179, 54],
[252, 177, 54],
[253, 174, 53],
[253, 172, 52],
[254, 169, 51],
[254, 167, 50],
[254, 164, 49],
[254, 161, 48],
[254, 158, 47],
[254, 155, 45],
[254, 153, 44],
[254, 150, 43],
[254, 147, 42],
[254, 144, 41],
[253, 141, 39],
[253, 138, 38],
[252, 135, 37],
[252, 132, 35],
[251, 129, 34],
[251, 126, 33],
[250, 123, 31],
[249, 120, 30],
[249, 117, 29],
[248, 114, 28],
[247, 111, 26],
[246, 108, 25],
[245, 105, 24],
[244, 102, 23],
[243, 99, 21],
[242, 96, 20],
[241, 93, 19],
[240, 91, 18],
[239, 88, 17],
[237, 85, 16],
[236, 83, 15],
[235, 80, 14],
[234, 78, 13],
[232, 75, 12],
[231, 73, 12],
[229, 71, 11],
[228, 69, 10],
[226, 67, 10],
[225, 65, 9],
[223, 63, 8],
[221, 61, 8],
[220, 59, 7],
[218, 57, 7],
[216, 55, 6],
[214, 53, 6],
[212, 51, 5],
[210, 49, 5],
[208, 47, 5],
[206, 45, 4],
[204, 43, 4],
[202, 42, 4],
[200, 40, 3],
[197, 38, 3],
[195, 37, 3],
[193, 35, 2],
[190, 33, 2],
[188, 32, 2],
[185, 30, 2],
[183, 29, 2],
[180, 27, 1],
[178, 26, 1],
[175, 24, 1],
[172, 23, 1],
[169, 22, 1],
[167, 20, 1],
[164, 19, 1],
[161, 18, 1],
[158, 16, 1],
[155, 15, 1],
[152, 14, 1],
[149, 13, 1],
[146, 11, 1],
[142, 10, 1],
[139, 9, 2],
[136, 8, 2],
[133, 7, 2],
[129, 6, 2],
[126, 5, 2],
[122, 4, 3],
];

View File

@ -8,6 +8,7 @@ pub struct Y {
mbrs: Option<Vec<Mbr>>,
masks: Option<Vec<Mask>>,
texts: Option<Vec<String>>,
pixels: Option<Vec<u8>>,
}
impl std::fmt::Debug for Y {
@ -41,11 +42,21 @@ impl std::fmt::Debug for Y {
f.field("Masks", &x);
}
}
if let Some(x) = &self.pixels {
if !x.is_empty() {
f.field("Pixels", &x);
}
}
f.finish()
}
}
impl Y {
pub fn with_pixels(mut self, pixels: &[u8]) -> Self {
self.pixels = Some(pixels.to_vec());
self
}
pub fn with_probs(mut self, probs: Prob) -> Self {
self.probs = Some(probs);
self
@ -75,6 +86,10 @@ impl Y {
self
}
pub fn pixels(&self) -> Option<&Vec<u8>> {
self.pixels.as_ref()
}
pub fn probs(&self) -> Option<&Prob> {
self.probs.as_ref()
}