diff --git a/README.md b/README.md index e26e5f1..8261002 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,15 @@ # usls -A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) and others. +A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) , [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) and others. ## Recently Updated +| Depth-Anything | +| :----------------------------: | +|| + + + | YOLOP-v2 | Face-Parsing | Text-Detection | | :----------------------------: | :------------------------------: | :------------------------------: | || | | @@ -41,6 +47,7 @@ A Rust library integrated with **ONNXRuntime**, providing a collection of **Comp | [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ | | [YOLOv5-classification](https://github.com/ultralytics/yolov5) | Object Detection | [demo](examples/yolov5) | ✅ | ✅ | ✅ | ✅ | | [YOLOv5-segmentation](https://github.com/ultralytics/yolov5) | Instance Segmentation | [demo](examples/yolov5) | ✅ | ✅ | ✅ | ✅ | +| [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) | Instance Segmentation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ | ## Solution Models diff --git a/examples/depth-anything/README.md b/examples/depth-anything/README.md new file mode 100644 index 0000000..8d8e7e2 --- /dev/null +++ b/examples/depth-anything/README.md @@ -0,0 +1,16 @@ +## Quick Start + +```shell +cargo run -r --example depth-anything +``` + +## ONNX Model + +- [depth-anything-s-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-s-dyn.onnx) +- [depth-anything-b-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-b-dyn.onnx) +- [depth-anything-l-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-l-dyn.onnx) + + +## Results + +![](./demo.png) diff --git a/examples/depth-anything/demo.png b/examples/depth-anything/demo.png new file mode 100644 index 0000000..8325016 Binary files /dev/null and b/examples/depth-anything/demo.png differ diff --git a/examples/depth-anything/main.rs b/examples/depth-anything/main.rs new file mode 100644 index 0000000..53dcca9 --- /dev/null +++ b/examples/depth-anything/main.rs @@ -0,0 +1,23 @@ +use usls::{models::DepthAnything, Annotator, DataLoader, Options}; + +fn main() -> Result<(), Box> { + // visual + let options = Options::default() + .with_model("../models/depth-anything-s-dyn.onnx") + .with_i00((1, 1, 8).into()) + .with_i02((384, 512, 1024).into()) + .with_i03((384, 512, 1024).into()); + let model = DepthAnything::new(&options)?; + + // load + let x = vec![DataLoader::try_read("./assets/2.jpg")?]; + + // run + let y = model.run(&x)?; + + // annotate + let annotator = Annotator::default().with_saveout("Depth-Anything"); + annotator.annotate(&x, &y); + + Ok(()) +} diff --git a/examples/dinov2/README.md b/examples/dinov2/README.md index 42646a7..d18c803 100644 --- a/examples/dinov2/README.md +++ b/examples/dinov2/README.md @@ -6,31 +6,16 @@ This demo showcases how to use `DINOv2` to compute image similarity, applicable cargo run -r --example dinov2 ``` -## Or you can manully +## Donwload DINOv2 ONNX Model -### 1.Donwload DINOv2 ONNX Model +- [dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx) +- [dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx) +- [dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx) -[dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx) -[dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx) -[dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx) - -[dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx) -[dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx) +- [dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx) +- [dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx) -### 2. Specify the ONNX model path in `main.rs` - -```Rust -let options = Options::default() - .with_model("ONNX_PATH") // <= modify this - .with_profile(false); -``` - -### 3. Then, run - -```bash -cargo run -r --example dinov2 -``` ## Results diff --git a/src/core/annotator.rs b/src/core/annotator.rs index ed93368..3d4fa3a 100644 --- a/src/core/annotator.rs +++ b/src/core/annotator.rs @@ -1,7 +1,10 @@ -use crate::{auto_load, string_now, Bbox, Keypoint, Mask, Mbr, Prob, CHECK_MARK, CROSS_MARK, Y}; +use crate::{ + auto_load, string_now, Bbox, Keypoint, Mask, Mbr, Prob, CHECK_MARK, CROSS_MARK, TURBO, Y, +}; use ab_glyph::{FontVec, PxScale}; use anyhow::Result; -use image::{DynamicImage, Rgba, RgbaImage}; +use image::{DynamicImage, ImageBuffer, Rgba, RgbaImage}; +use imageproc::map::map_colors; /// Annotator for struct `Y` #[derive(Debug)] @@ -265,6 +268,13 @@ impl Annotator { for (img, y) in imgs.iter().zip(ys.iter()) { let mut img_rgb = img.to_rgba8(); + // pixels + if !self.without_masks { + if let Some(xs) = &y.pixels() { + self.plot_pixels(&mut img_rgb, xs) + } + } + // masks if !self.without_masks { if let Some(xs) = &y.masks() { @@ -377,6 +387,60 @@ impl Annotator { } } + pub fn plot_pixels(&self, img: &mut RgbaImage, pixels: &[u8]) { + let (w, h) = img.dimensions(); + let luma: ImageBuffer, Vec> = + ImageBuffer::from_raw(w, h, pixels.to_vec()) + .expect("Faild to create luma from ndarray"); + let luma = map_colors(&luma, |p| { + let x = p[0]; + image::Rgb(TURBO[x as usize]) + }); + let luma = image::DynamicImage::from(luma); + let luma = luma.resize_exact(w / 2, h / 2, image::imageops::FilterType::CatmullRom); + let im_ori = img.clone(); + let im_ori = image::DynamicImage::from(im_ori); + let im_ori = im_ori.resize_exact(w / 2, h / 2, image::imageops::FilterType::CatmullRom); + + // overwrite + for x in 0..w { + for y in 0..h { + img.put_pixel(x, y, Rgba([255, 255, 255, 255])); + } + } + + // paste + let pos_x = 0; + let pos_y = (2 * (h - im_ori.height()) / 3) as i64; + image::imageops::overlay(img, &im_ori, pos_x, pos_y); + image::imageops::overlay(img, &luma, im_ori.width().into(), pos_y); + + // text + let legend = "Raw"; + let scale = PxScale::from(self.scale_dy * 2.5); + let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, legend); + imageproc::drawing::draw_text_mut( + img, + Rgba([0, 0, 0, 255]), + ((im_ori.width() - text_w) / 2) as i32, + ((pos_y as u32 - text_h) / 2) as i32, + scale, + &self.font, + legend, + ); + let legend = "Depth"; + let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, legend); + imageproc::drawing::draw_text_mut( + img, + Rgba([0, 0, 0, 255]), + (im_ori.width() + (im_ori.width() - text_w) / 2) as i32, + ((pos_y as u32 - text_h) / 2) as i32, + scale, + &self.font, + legend, + ); + } + pub fn plot_masks_and_polygons(&self, img: &mut RgbaImage, masks: &[Mask]) { let mut convas = img.clone(); for mask in masks.iter() { diff --git a/src/models/depth_anything.rs b/src/models/depth_anything.rs new file mode 100644 index 0000000..ee3aa8f --- /dev/null +++ b/src/models/depth_anything.rs @@ -0,0 +1,78 @@ +use crate::{ops, MinOptMax, Options, OrtEngine, Y}; +use anyhow::Result; +use image::{DynamicImage, ImageBuffer}; +use ndarray::{Array, Axis, IxDyn}; + +#[derive(Debug)] +pub struct DepthAnything { + engine: OrtEngine, + height: MinOptMax, + width: MinOptMax, + batch: MinOptMax, +} + +impl DepthAnything { + pub fn new(options: &Options) -> Result { + let engine = OrtEngine::new(options)?; + let (batch, height, width) = ( + engine.batch().to_owned(), + engine.height().to_owned(), + engine.width().to_owned(), + ); + engine.dry_run()?; + + Ok(Self { + engine, + height, + width, + batch, + }) + } + + pub fn run(&self, xs: &[DynamicImage]) -> Result> { + let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?; + let xs_ = ops::normalize(xs_, 0.0, 255.0); + let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]); + let ys = self.engine.run(&[xs_])?; + self.postprocess(ys, xs) + } + + pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + let mut ys: Vec = Vec::new(); + for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() { + let luma = luma + .into_shape((self.height() as usize, self.width() as usize, 1))? + .into_owned(); + let v = luma.into_raw_vec(); + let max_ = v.iter().max_by(|x, y| x.total_cmp(y)).unwrap(); + let min_ = v.iter().min_by(|x, y| x.total_cmp(y)).unwrap(); + let v = v + .iter() + .map(|x| (((*x - min_) / (max_ - min_)) * 255.).min(255.).max(0.) as u8) + .collect::>(); + let luma: ImageBuffer, Vec> = + ImageBuffer::from_raw(self.width() as u32, self.height() as u32, v) + .expect("Faild to create image from ndarray"); + let luma = image::DynamicImage::from(luma); + let luma = luma.resize_exact( + xs0[idx].width(), + xs0[idx].height(), + image::imageops::FilterType::CatmullRom, + ); + ys.push(Y::default().with_pixels(&luma.into_luma8().into_raw())); + } + Ok(ys) + } + + pub fn batch(&self) -> isize { + self.batch.opt + } + + pub fn width(&self) -> isize { + self.width.opt + } + + pub fn height(&self) -> isize { + self.height.opt + } +} diff --git a/src/models/mod.rs b/src/models/mod.rs index 0b4e86a..98e1e31 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -1,6 +1,7 @@ mod blip; mod clip; mod db; +mod depth_anything; mod dinov2; mod rtdetr; mod rtmo; @@ -11,6 +12,7 @@ mod yolop; pub use blip::Blip; pub use clip::Clip; pub use db::DB; +pub use depth_anything::DepthAnything; pub use dinov2::Dinov2; pub use rtdetr::RTDETR; pub use rtmo::RTMO; diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 02ce2a8..3d418d9 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -4,6 +4,10 @@ use std::io::{Read, Write}; use std::path::{Path, PathBuf}; pub mod coco; +mod turbo; + +pub use turbo::TURBO; + pub const GITHUB_ASSETS: &str = "https://github.com/jamjamjon/assets/releases/download/v0.0.1"; pub const CHECK_MARK: &str = "✅"; pub const CROSS_MARK: &str = "❌"; diff --git a/src/utils/turbo.rs b/src/utils/turbo.rs new file mode 100644 index 0000000..e9e5518 --- /dev/null +++ b/src/utils/turbo.rs @@ -0,0 +1,258 @@ +pub const TURBO: [[u8; 3]; 256] = [ + [48, 18, 59], + [50, 21, 67], + [51, 24, 74], + [52, 27, 81], + [53, 30, 88], + [54, 33, 95], + [55, 36, 102], + [56, 39, 109], + [57, 42, 115], + [58, 45, 121], + [59, 47, 128], + [60, 50, 134], + [61, 53, 139], + [62, 56, 145], + [63, 59, 151], + [63, 62, 156], + [64, 64, 162], + [65, 67, 167], + [65, 70, 172], + [66, 73, 177], + [66, 75, 181], + [67, 78, 186], + [68, 81, 191], + [68, 84, 195], + [68, 86, 199], + [69, 89, 203], + [69, 92, 207], + [69, 94, 211], + [70, 97, 214], + [70, 100, 218], + [70, 102, 221], + [70, 105, 224], + [70, 107, 227], + [71, 110, 230], + [71, 113, 233], + [71, 115, 235], + [71, 118, 238], + [71, 120, 240], + [71, 123, 242], + [70, 125, 244], + [70, 128, 246], + [70, 130, 248], + [70, 133, 250], + [70, 135, 251], + [69, 138, 252], + [69, 140, 253], + [68, 143, 254], + [67, 145, 254], + [66, 148, 255], + [65, 150, 255], + [64, 153, 255], + [62, 155, 254], + [61, 158, 254], + [59, 160, 253], + [58, 163, 252], + [56, 165, 251], + [55, 168, 250], + [53, 171, 248], + [51, 173, 247], + [49, 175, 245], + [47, 178, 244], + [46, 180, 242], + [44, 183, 240], + [42, 185, 238], + [40, 188, 235], + [39, 190, 233], + [37, 192, 231], + [35, 195, 228], + [34, 197, 226], + [32, 199, 223], + [31, 201, 221], + [30, 203, 218], + [28, 205, 216], + [27, 208, 213], + [26, 210, 210], + [26, 212, 208], + [25, 213, 205], + [24, 215, 202], + [24, 217, 200], + [24, 219, 197], + [24, 221, 194], + [24, 222, 192], + [24, 224, 189], + [25, 226, 187], + [25, 227, 185], + [26, 228, 182], + [28, 230, 180], + [29, 231, 178], + [31, 233, 175], + [32, 234, 172], + [34, 235, 170], + [37, 236, 167], + [39, 238, 164], + [42, 239, 161], + [44, 240, 158], + [47, 241, 155], + [50, 242, 152], + [53, 243, 148], + [56, 244, 145], + [60, 245, 142], + [63, 246, 138], + [67, 247, 135], + [70, 248, 132], + [74, 248, 128], + [78, 249, 125], + [82, 250, 122], + [85, 250, 118], + [89, 251, 115], + [93, 252, 111], + [97, 252, 108], + [101, 253, 105], + [105, 253, 102], + [109, 254, 98], + [113, 254, 95], + [117, 254, 92], + [121, 254, 89], + [125, 255, 86], + [128, 255, 83], + [132, 255, 81], + [136, 255, 78], + [139, 255, 75], + [143, 255, 73], + [146, 255, 71], + [150, 254, 68], + [153, 254, 66], + [156, 254, 64], + [159, 253, 63], + [161, 253, 61], + [164, 252, 60], + [167, 252, 58], + [169, 251, 57], + [172, 251, 56], + [175, 250, 55], + [177, 249, 54], + [180, 248, 54], + [183, 247, 53], + [185, 246, 53], + [188, 245, 52], + [190, 244, 52], + [193, 243, 52], + [195, 241, 52], + [198, 240, 52], + [200, 239, 52], + [203, 237, 52], + [205, 236, 52], + [208, 234, 52], + [210, 233, 53], + [212, 231, 53], + [215, 229, 53], + [217, 228, 54], + [219, 226, 54], + [221, 224, 55], + [223, 223, 55], + [225, 221, 55], + [227, 219, 56], + [229, 217, 56], + [231, 215, 57], + [233, 213, 57], + [235, 211, 57], + [236, 209, 58], + [238, 207, 58], + [239, 205, 58], + [241, 203, 58], + [242, 201, 58], + [244, 199, 58], + [245, 197, 58], + [246, 195, 58], + [247, 193, 58], + [248, 190, 57], + [249, 188, 57], + [250, 186, 57], + [251, 184, 56], + [251, 182, 55], + [252, 179, 54], + [252, 177, 54], + [253, 174, 53], + [253, 172, 52], + [254, 169, 51], + [254, 167, 50], + [254, 164, 49], + [254, 161, 48], + [254, 158, 47], + [254, 155, 45], + [254, 153, 44], + [254, 150, 43], + [254, 147, 42], + [254, 144, 41], + [253, 141, 39], + [253, 138, 38], + [252, 135, 37], + [252, 132, 35], + [251, 129, 34], + [251, 126, 33], + [250, 123, 31], + [249, 120, 30], + [249, 117, 29], + [248, 114, 28], + [247, 111, 26], + [246, 108, 25], + [245, 105, 24], + [244, 102, 23], + [243, 99, 21], + [242, 96, 20], + [241, 93, 19], + [240, 91, 18], + [239, 88, 17], + [237, 85, 16], + [236, 83, 15], + [235, 80, 14], + [234, 78, 13], + [232, 75, 12], + [231, 73, 12], + [229, 71, 11], + [228, 69, 10], + [226, 67, 10], + [225, 65, 9], + [223, 63, 8], + [221, 61, 8], + [220, 59, 7], + [218, 57, 7], + [216, 55, 6], + [214, 53, 6], + [212, 51, 5], + [210, 49, 5], + [208, 47, 5], + [206, 45, 4], + [204, 43, 4], + [202, 42, 4], + [200, 40, 3], + [197, 38, 3], + [195, 37, 3], + [193, 35, 2], + [190, 33, 2], + [188, 32, 2], + [185, 30, 2], + [183, 29, 2], + [180, 27, 1], + [178, 26, 1], + [175, 24, 1], + [172, 23, 1], + [169, 22, 1], + [167, 20, 1], + [164, 19, 1], + [161, 18, 1], + [158, 16, 1], + [155, 15, 1], + [152, 14, 1], + [149, 13, 1], + [146, 11, 1], + [142, 10, 1], + [139, 9, 2], + [136, 8, 2], + [133, 7, 2], + [129, 6, 2], + [126, 5, 2], + [122, 4, 3], +]; diff --git a/src/ys/y.rs b/src/ys/y.rs index 69b95aa..ed1083d 100644 --- a/src/ys/y.rs +++ b/src/ys/y.rs @@ -8,6 +8,7 @@ pub struct Y { mbrs: Option>, masks: Option>, texts: Option>, + pixels: Option>, } impl std::fmt::Debug for Y { @@ -41,11 +42,21 @@ impl std::fmt::Debug for Y { f.field("Masks", &x); } } + if let Some(x) = &self.pixels { + if !x.is_empty() { + f.field("Pixels", &x); + } + } f.finish() } } impl Y { + pub fn with_pixels(mut self, pixels: &[u8]) -> Self { + self.pixels = Some(pixels.to_vec()); + self + } + pub fn with_probs(mut self, probs: Prob) -> Self { self.probs = Some(probs); self @@ -75,6 +86,10 @@ impl Y { self } + pub fn pixels(&self) -> Option<&Vec> { + self.pixels.as_ref() + } + pub fn probs(&self) -> Option<&Prob> { self.probs.as_ref() }