diff --git a/Cargo.toml b/Cargo.toml index ccd0305..09f7ea2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "usls" -version = "0.0.3" +version = "0.0.4" edition = "2021" description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models." repository = "https://github.com/jamjamjon/usls" @@ -44,4 +44,4 @@ ab_glyph = "0.2.23" geo = "0.28.0" prost = "0.12.4" human_bytes = "0.4.3" -fast_image_resize = "3.0.4" \ No newline at end of file +fast_image_resize = { git = "https://github.com/jamjamjon/fast_image_resize", branch = "dev" , features = ["image"]} diff --git a/README.md b/README.md index 6dc2504..6d267bf 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,8 @@ A Rust library integrated with **ONNXRuntime**, providing a collection of **Comp || | | +- 2024/06/30: **Accelerate model pre-processing and post-processing using SIMD**. YOLOv8-seg post-processing (~120ms => ~20ms), Depth-Anything post-processing (~23ms => ~2ms). + ## Supported Models @@ -100,7 +102,7 @@ check **[ort guide](https://ort.pyke.io/setup/linking)** #### 1. Add `usls` as a dependency to your project's `Cargo.toml` ```shell -cargo add --git https://github.com/jamjamjon/usls +usls = { git = "https://github.com/jamjamjon/usls", rev = "xxx"} ``` #### 2. Set `Options` and build model diff --git a/examples/face-parsing/main.rs b/examples/face-parsing/main.rs index a0b9a9f..ad55569 100644 --- a/examples/face-parsing/main.rs +++ b/examples/face-parsing/main.rs @@ -1,4 +1,4 @@ -use usls::{models::YOLO, Annotator, DataLoader, Options}; +use usls::{models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // build model diff --git a/examples/fastsam/main.rs b/examples/fastsam/main.rs index a315a56..f7cf16b 100644 --- a/examples/fastsam/main.rs +++ b/examples/fastsam/main.rs @@ -1,4 +1,4 @@ -use usls::{models::YOLO, Annotator, DataLoader, Options}; +use usls::{models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // build model diff --git a/examples/rtmo/main.rs b/examples/rtmo/main.rs index a06bef3..bf6f696 100644 --- a/examples/rtmo/main.rs +++ b/examples/rtmo/main.rs @@ -16,7 +16,7 @@ fn main() -> Result<(), Box> { // run let y = model.run(&x)?; - // // annotate + // annotate let annotator = Annotator::default() .with_saveout("RTMO") .with_skeletons(&coco::SKELETONS_16); diff --git a/examples/yolo-world/main.rs b/examples/yolo-world/main.rs index 324e774..d616d32 100644 --- a/examples/yolo-world/main.rs +++ b/examples/yolo-world/main.rs @@ -1,4 +1,4 @@ -use usls::{models::YOLO, Annotator, DataLoader, Options}; +use usls::{models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // build model diff --git a/examples/yolov10/main.rs b/examples/yolov10/main.rs index 8ce8a5e..0bf65cd 100644 --- a/examples/yolov10/main.rs +++ b/examples/yolov10/main.rs @@ -1,6 +1,6 @@ use usls::{ models::{YOLOVersion, YOLO}, - Annotator, DataLoader, Options, + Annotator, DataLoader, Options, Vision, }; fn main() -> Result<(), Box> { diff --git a/examples/yolov5/main.rs b/examples/yolov5/main.rs index d1c3c8c..905c0c9 100644 --- a/examples/yolov5/main.rs +++ b/examples/yolov5/main.rs @@ -1,6 +1,6 @@ use usls::{ models::{YOLOTask, YOLOVersion, YOLO}, - Annotator, DataLoader, Options, + Annotator, DataLoader, Options, Vision, }; fn main() -> Result<(), Box> { diff --git a/examples/yolov8-face/main.rs b/examples/yolov8-face/main.rs index b095b53..f9b5419 100644 --- a/examples/yolov8-face/main.rs +++ b/examples/yolov8-face/main.rs @@ -1,4 +1,4 @@ -use usls::{models::YOLO, Annotator, DataLoader, Options}; +use usls::{models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // build model diff --git a/examples/yolov8-falldown/main.rs b/examples/yolov8-falldown/main.rs index 14b114e..75078cb 100644 --- a/examples/yolov8-falldown/main.rs +++ b/examples/yolov8-falldown/main.rs @@ -1,4 +1,4 @@ -use usls::{models::YOLO, Annotator, DataLoader, Options}; +use usls::{models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // build model diff --git a/examples/yolov8-head/main.rs b/examples/yolov8-head/main.rs index f8af6c8..d76a17e 100644 --- a/examples/yolov8-head/main.rs +++ b/examples/yolov8-head/main.rs @@ -1,4 +1,4 @@ -use usls::{models::YOLO, Annotator, DataLoader, Options}; +use usls::{models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // build model diff --git a/examples/yolov8-trash/main.rs b/examples/yolov8-trash/main.rs index 6c906d2..e98b8ba 100644 --- a/examples/yolov8-trash/main.rs +++ b/examples/yolov8-trash/main.rs @@ -1,4 +1,4 @@ -use usls::{models::YOLO, Annotator, DataLoader, Options}; +use usls::{models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // 1.build model diff --git a/examples/yolov8/main.rs b/examples/yolov8/main.rs index 579a7d4..a6bc6bb 100644 --- a/examples/yolov8/main.rs +++ b/examples/yolov8/main.rs @@ -1,13 +1,13 @@ -use usls::{coco, models::YOLO, Annotator, DataLoader, Options}; +use usls::{coco, models::YOLO, Annotator, DataLoader, Options, Vision}; fn main() -> Result<(), Box> { // build model let options = Options::default() - .with_model("yolov8m-dyn.onnx")? + // .with_model("yolov8m-dyn.onnx")? // .with_model("yolov8m-dyn-f16.onnx")? // .with_model("yolov8m-pose-dyn.onnx")? // .with_model("yolov8m-cls-dyn.onnx")? - // .with_model("yolov8m-seg-dyn.onnx")? + .with_model("yolov8m-seg-dyn.onnx")? // .with_model("yolov8m-obb-dyn.onnx")? // .with_model("yolov8m-oiv7-dyn.onnx")? // .with_trt(0) @@ -37,6 +37,7 @@ fn main() -> Result<(), Box> { // run & annotate for (xs, _paths) in dl { let ys = model.run(&xs)?; + // let ys = model.forward(&xs, true)?; annotator.annotate(&xs, &ys); } diff --git a/examples/yolov9/main.rs b/examples/yolov9/main.rs index efb172c..8a181cd 100644 --- a/examples/yolov9/main.rs +++ b/examples/yolov9/main.rs @@ -1,6 +1,6 @@ use usls::{ models::{YOLOVersion, YOLO}, - Annotator, DataLoader, Options, + Annotator, DataLoader, Options, Vision, }; fn main() -> Result<(), Box> { diff --git a/src/core/dataloader.rs b/src/core/dataloader.rs index 6d47e28..c94fcc4 100644 --- a/src/core/dataloader.rs +++ b/src/core/dataloader.rs @@ -92,10 +92,12 @@ impl DataLoader { } pub fn try_read>(path: P) -> Result { - image::io::Reader::open(&path) + let img = image::io::Reader::open(&path) .map_err(|_| anyhow!("Failed to open image at {:?}", path.as_ref()))? .decode() - .map_err(|_| anyhow!("Failed to decode image at {:?}", path.as_ref())) + .map_err(|_| anyhow!("Failed to decode image at {:?}", path.as_ref()))? + .into_rgb8(); + Ok(DynamicImage::from(img)) } pub fn with_batch(mut self, x: usize) -> Self { diff --git a/src/core/engine.rs b/src/core/engine.rs index f11807c..796fabe 100644 --- a/src/core/engine.rs +++ b/src/core/engine.rs @@ -9,9 +9,7 @@ use ort::{ use prost::Message; use std::collections::HashSet; -use crate::{ - home_dir, onnx, ops::make_divisible, Device, MinOptMax, Options, Ts, CHECK_MARK, CROSS_MARK, -}; +use crate::{home_dir, onnx, Device, MinOptMax, Ops, Options, Ts, CHECK_MARK, CROSS_MARK, X}; /// Ort Tensor Attrs: name, data_type, dims #[derive(Debug)] @@ -57,7 +55,7 @@ impl OrtEngine { params += param; // mems - let param = make_divisible(param, byte_alignment); + let param = Ops::make_divisible(param, byte_alignment); let n = Self::nbytes_from_onnx_dtype_id(tensor_proto.data_type as usize); let wbmem = param * n; wbmems += wbmem; @@ -258,17 +256,18 @@ impl OrtEngine { pub fn dry_run(&mut self) -> Result<()> { if self.num_dry_run > 0 { - let mut xs: Vec> = Vec::new(); + let mut xs = Vec::new(); for i in self.inputs_minoptmax.iter() { let mut x: Vec = Vec::new(); for i_ in i.iter() { x.push(i_.opt as usize); } let x: Array = Array::ones(x).into_dyn(); - xs.push(x); + xs.push(X::from(x)); } for _ in 0..self.num_dry_run { - self.run(xs.as_ref())?; + // self.run(xs.as_ref())?; + self.run(xs.clone())?; } self.ts.clear(); println!("{CHECK_MARK} Dryrun x{}", self.num_dry_run); @@ -276,7 +275,7 @@ impl OrtEngine { Ok(()) } - pub fn run(&mut self, xs: &[Array]) -> Result>> { + pub fn run(&mut self, xs: Vec) -> Result> { // inputs dtype alignment let mut xs_ = Vec::new(); let t_pre = std::time::Instant::now(); @@ -330,7 +329,8 @@ impl OrtEngine { .into_owned(), _ => todo!(), }; - ys.push(y_); + // ys.push(y_); + ys.push(X::from(y_)); } let t_post = t_post.elapsed(); self.ts.add_or_push(2, t_post); diff --git a/src/core/mod.rs b/src/core/mod.rs index 170bef2..3bc2d7a 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -11,6 +11,8 @@ pub mod ops; mod options; mod tokenizer_stream; mod ts; +mod vision; +mod x; pub use annotator::Annotator; pub use dataloader::DataLoader; @@ -20,6 +22,9 @@ pub use engine::OrtEngine; pub use logits_sampler::LogitsSampler; pub use metric::Metric; pub use min_opt_max::MinOptMax; +pub use ops::Ops; pub use options::Options; pub use tokenizer_stream::TokenizerStream; pub use ts::Ts; +pub use vision::Vision; +pub use x::X; diff --git a/src/core/ops.rs b/src/core/ops.rs index ac2edf3..e294f65 100644 --- a/src/core/ops.rs +++ b/src/core/ops.rs @@ -1,230 +1,292 @@ use anyhow::Result; -use fast_image_resize as fr; -use image::{DynamicImage, GenericImageView, ImageBuffer}; +use fast_image_resize as fir; +use fast_image_resize::{ + images::{CroppedImageMut, Image}, + pixels::PixelType, + FilterType, ResizeAlg, ResizeOptions, Resizer, +}; +use image::{DynamicImage, GenericImageView}; use ndarray::{s, Array, Axis, IxDyn}; +use rayon::prelude::*; -pub fn standardize(xs: Array, mean: &[f32], std: &[f32]) -> Array { - let mean = Array::from_shape_vec((1, mean.len(), 1, 1), mean.to_vec()).unwrap(); - let std = Array::from_shape_vec((1, std.len(), 1, 1), std.to_vec()).unwrap(); - (xs - mean) / std +use crate::X; + +pub enum Ops<'a> { + Resize(&'a [DynamicImage], u32, u32, &'a str), + Letterbox(&'a [DynamicImage], u32, u32, &'a str, u8, &'a str, bool), + Normalize(f32, f32), + Standardize(&'a [f32], &'a [f32], usize), + Permute(&'a [usize]), + InsertAxis(usize), + Nhwc2nchw, + Nchw2nhwc, + Norm, } -pub fn normalize(xs: Array, min_: f32, max_: f32) -> Array { - (xs - min_) / (max_ - min_) -} +impl Ops<'_> { + pub fn apply(ops: &[Self]) -> Result { + let mut y = X::default(); -pub fn norm2(xs: &Array) -> Array { - let std_ = xs - .mapv(|x| x * x) - .sum_axis(Axis(1)) - .mapv(f32::sqrt) - .insert_axis(Axis(1)); - xs / std_ -} - -pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) { - let r = (w1 / w0).min(h1 / h0); - (r, (w0 * r).round(), (h0 * r).round()) -} - -pub fn build_resizer(ty: &str) -> fr::Resizer { - let ty = match ty { - "box" => fr::FilterType::Box, - "bilinear" => fr::FilterType::Bilinear, - "hamming" => fr::FilterType::Hamming, - "catmullRom" => fr::FilterType::CatmullRom, - "mitchell" => fr::FilterType::Mitchell, - "lanczos3" => fr::FilterType::Lanczos3, - _ => todo!(), - }; - fr::Resizer::new(fr::ResizeAlg::Convolution(ty)) -} - -pub fn resize( - xs: &[DynamicImage], - height: u32, - width: u32, - filter: &str, -) -> Result> { - let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); - let mut resizer = build_resizer(filter); - for (idx, x) in xs.iter().enumerate() { - // src - let src_image = fr::Image::from_vec_u8( - std::num::NonZeroU32::new(x.width()).unwrap(), - std::num::NonZeroU32::new(x.height()).unwrap(), - x.to_rgb8().into_raw(), - fr::PixelType::U8x3, - ) - .unwrap(); - - // dst - let mut dst_image = fr::Image::new( - std::num::NonZeroU32::new(width).unwrap(), - std::num::NonZeroU32::new(height).unwrap(), - src_image.pixel_type(), - ); - - // resize - resizer - .resize(&src_image.view(), &mut dst_image.view_mut()) - .unwrap(); - let buffer = dst_image.into_vec(); - - // to ndarray - let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer) - .unwrap() - .mapv(|x| x as f32) - .permuted_axes([2, 0, 1]); - let mut data = ys.slice_mut(s![idx, .., .., ..]); - data.assign(&y_); + for op in ops { + y = match op { + Self::Resize(xs, h, w, filter) => X::resize(xs, *h, *w, filter)?, + Self::Letterbox(xs, h, w, filter, bg, resize_by, center) => { + X::letterbox(xs, *h, *w, filter, *bg, resize_by, *center)? + } + Self::Normalize(min_, max_) => y.normalize(*min_, *max_)?, + Self::Standardize(mean, std, d) => y.standardize(mean, std, *d)?, + Self::Permute(shape) => y.permute(shape)?, + Self::InsertAxis(d) => y.insert_axis(*d)?, + Self::Nhwc2nchw => y.nhwc2nchw()?, + Self::Nchw2nhwc => y.nchw2nhwc()?, + _ => todo!(), + } + } + Ok(y) } - Ok(ys) -} -pub fn letterbox( - xs: &[DynamicImage], - height: u32, - width: u32, - filter: &str, - bg: Option, -) -> Result> { - let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); - let mut resizer = build_resizer(filter); - for (idx, x) in xs.iter().enumerate() { - let (w0, h0) = x.dimensions(); - let (_, w_new, h_new) = scale_wh(w0 as f32, h0 as f32, width as f32, height as f32); + pub fn normalize(x: Array, min: f32, max: f32) -> Result> { + if min > max { + anyhow::bail!("Input `min` is greater than `max`"); + } + Ok((x - min) / (max - min)) + } - // src - let src_image = fr::Image::from_vec_u8( - std::num::NonZeroU32::new(w0).unwrap(), - std::num::NonZeroU32::new(h0).unwrap(), - x.to_rgb8().into_raw(), - fr::PixelType::U8x3, - ) - .unwrap(); + pub fn standardize( + x: Array, + mean: &[f32], + std: &[f32], + dim: usize, + ) -> Result> { + if mean.len() != std.len() { + anyhow::bail!("The lengths of mean and std are not equal."); + } + let shape = x.shape(); + if dim >= shape.len() || shape[dim] != mean.len() { + anyhow::bail!("The specified dimension or mean/std length is inconsistent with the input dimensions."); + } + let mut shape = vec![1; shape.len()]; + shape[dim] = mean.len(); + let mean = Array::from_shape_vec(shape.clone(), mean.to_vec())?; + let std = Array::from_shape_vec(shape, std.to_vec())?; + Ok((x - mean) / std) + } - // dst - let mut dst_image = match bg { - Some(bg) => fr::Image::from_vec_u8( - std::num::NonZeroU32::new(width).unwrap(), - std::num::NonZeroU32::new(height).unwrap(), - vec![bg; 3 * height as usize * width as usize], - src_image.pixel_type(), - ) - .unwrap(), - None => fr::Image::new( - std::num::NonZeroU32::new(width).unwrap(), - std::num::NonZeroU32::new(height).unwrap(), - src_image.pixel_type(), - ), + pub fn permute(x: Array, shape: &[usize]) -> Result> { + if shape.len() != x.shape().len() { + anyhow::bail!( + "Shape inconsistent. Target: {:?}, {}, got: {:?}, {}", + x.shape(), + x.shape().len(), + shape, + shape.len() + ); + } + Ok(x.permuted_axes(shape.to_vec()).into_dyn()) + } + + pub fn nhwc2nchw(x: Array) -> Result> { + Self::permute(x, &[0, 3, 1, 2]) + } + + pub fn nchw2nhwc(x: Array) -> Result> { + Self::permute(x, &[0, 2, 3, 1]) + } + + pub fn insert_axis(x: Array, d: usize) -> Result> { + if x.shape().len() < d { + anyhow::bail!( + "The specified axis insertion position {} exceeds the shape's maximum limit of {}.", + d, + x.shape().len() + ); + } + Ok(x.insert_axis(Axis(d))) + } + + pub fn norm(xs: Array, d: usize) -> Result> { + if xs.shape().len() < d { + anyhow::bail!( + "The specified axis {} exceeds the shape's maximum limit of {}.", + d, + xs.shape().len() + ); + } + let std_ = xs + .mapv(|x| x * x) + .sum_axis(Axis(d)) + .mapv(f32::sqrt) + .insert_axis(Axis(d)); + Ok(xs / std_) + } + + pub fn scale_wh(w0: f32, h0: f32, w1: f32, h1: f32) -> (f32, f32, f32) { + let r = (w1 / w0).min(h1 / h0); + (r, (w0 * r).round(), (h0 * r).round()) + } + + pub fn make_divisible(x: usize, divisor: usize) -> usize { + (x + divisor - 1) / divisor * divisor + } + + // deprecated + pub fn descale_mask(mask: DynamicImage, w0: f32, h0: f32, w1: f32, h1: f32) -> DynamicImage { + // 0 -> 1 + let (_, w, h) = Ops::scale_wh(w1, h1, w0, h0); + let mut mask = mask.to_owned(); + let mask = mask.crop(0, 0, w as u32, h as u32); + mask.resize_exact(w1 as u32, h1 as u32, image::imageops::FilterType::Triangle) + } + + pub fn resize_lumaf32_vec( + v: &[f32], + w0: f32, + h0: f32, + w1: f32, + h1: f32, + crop_src: bool, + filter: &str, + ) -> Result> { + let src_mask = fir::images::Image::from_vec_u8( + w0 as _, + h0 as _, + v.iter().flat_map(|x| x.to_le_bytes()).collect(), + fir::PixelType::F32, + )?; + let mut dst_mask = fir::images::Image::new(w1 as _, h1 as _, src_mask.pixel_type()); + let (mut resizer, mut options) = Self::build_resizer_filter(filter)?; + if crop_src { + let (_, w, h) = Self::scale_wh(w1 as _, h1 as _, w0 as _, h0 as _); + options = options.crop(0., 0., w.into(), h.into()); }; + resizer.resize(&src_mask, &mut dst_mask, &options)?; - // mutable view - let mut dst_view = dst_image - .view_mut() - .crop( - 0, - 0, - std::num::NonZeroU32::new(w_new as u32).unwrap(), - std::num::NonZeroU32::new(h_new as u32).unwrap(), - ) - .unwrap(); + // u8*2 -> f32 + let mask_f32: Vec = dst_mask + .into_vec() + .chunks_exact(4) + .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])) + .collect(); - // resize - resizer.resize(&src_image.view(), &mut dst_view).unwrap(); - let buffer = dst_image.into_vec(); - - // to ndarray - let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer) - .unwrap() - .mapv(|x| x as f32) - .permuted_axes([2, 0, 1]); - let mut data = ys.slice_mut(s![idx, .., .., ..]); - data.assign(&y_); + // f32 -> u8 + let v: Vec = mask_f32.par_iter().map(|&x| (x * 255.0) as u8).collect(); + Ok(v) } - Ok(ys) -} -pub fn resize_with_fixed_height( - xs: &[DynamicImage], - height: u32, - width: u32, - filter: &str, - bg: Option, -) -> Result> { - let mut ys = Array::ones((xs.len(), 3, height as usize, width as usize)).into_dyn(); - let mut resizer = build_resizer(filter); - for (idx, x) in xs.iter().enumerate() { - let (w0, h0) = x.dimensions(); - let h_new = height; - let w_new = height * w0 / h0; - - // src - let src_image = fr::Image::from_vec_u8( - std::num::NonZeroU32::new(w0).unwrap(), - std::num::NonZeroU32::new(h0).unwrap(), - x.to_rgb8().into_raw(), - fr::PixelType::U8x3, - ) - .unwrap(); - - // dst - let mut dst_image = match bg { - Some(bg) => fr::Image::from_vec_u8( - std::num::NonZeroU32::new(width).unwrap(), - std::num::NonZeroU32::new(height).unwrap(), - vec![bg; 3 * height as usize * width as usize], - src_image.pixel_type(), - ) - .unwrap(), - None => fr::Image::new( - std::num::NonZeroU32::new(width).unwrap(), - std::num::NonZeroU32::new(height).unwrap(), - src_image.pixel_type(), - ), + pub fn resize_luma8_vec( + v: &[u8], + w0: f32, + h0: f32, + w1: f32, + h1: f32, + crop_src: bool, + filter: &str, + ) -> Result> { + let src_mask = + fir::images::Image::from_vec_u8(w0 as _, h0 as _, v.to_vec(), fir::PixelType::U8)?; + let mut dst_mask = fir::images::Image::new(w1 as _, h1 as _, src_mask.pixel_type()); + let (mut resizer, mut options) = Self::build_resizer_filter(filter)?; + if crop_src { + let (_, w, h) = Self::scale_wh(w1 as _, h1 as _, w0 as _, h0 as _); + options = options.crop(0., 0., w.into(), h.into()); }; - - // mutable view - let mut dst_view = dst_image - .view_mut() - .crop( - 0, - 0, - std::num::NonZeroU32::new(w_new).unwrap(), - std::num::NonZeroU32::new(h_new).unwrap(), - ) - .unwrap(); - - // resize - resizer.resize(&src_image.view(), &mut dst_view).unwrap(); - let buffer = dst_image.into_vec(); - - // to ndarray - let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer) - .unwrap() - .mapv(|x| x as f32) - .permuted_axes([2, 0, 1]); - let mut data = ys.slice_mut(s![idx, .., .., ..]); - data.assign(&y_); + resizer.resize(&src_mask, &mut dst_mask, &options)?; + Ok(dst_mask.into_vec()) } - Ok(ys) -} -pub fn build_dyn_image_from_raw(v: Vec, height: u32, width: u32) -> DynamicImage { - let v: ImageBuffer, Vec> = - ImageBuffer::from_raw(width, height, v).expect("Faild to create image from ndarray"); - image::DynamicImage::from(v) -} + pub fn build_resizer_filter(ty: &str) -> Result<(Resizer, ResizeOptions)> { + let ty = match ty { + "Box" => FilterType::Box, + "Bilinear" => FilterType::Bilinear, + "Hamming" => FilterType::Hamming, + "CatmullRom" => FilterType::CatmullRom, + "Mitchell" => FilterType::Mitchell, + "Gaussian" => FilterType::Gaussian, + "Lanczos3" => FilterType::Lanczos3, + _ => anyhow::bail!("Unsupported resize filter type: {ty}"), + }; + Ok(( + Resizer::new(), + ResizeOptions::new().resize_alg(ResizeAlg::Convolution(ty)), + )) + } -pub fn descale_mask(mask: DynamicImage, w0: f32, h0: f32, w1: f32, h1: f32) -> DynamicImage { - // 0 -> 1 - let (_, w, h) = scale_wh(w1, h1, w0, h0); - let mut mask = mask.to_owned(); - let mask = mask.crop(0, 0, w as u32, h as u32); - mask.resize_exact(w1 as u32, h1 as u32, image::imageops::FilterType::Triangle) -} + pub fn resize( + xs: &[DynamicImage], + height: u32, + width: u32, + filter: &str, + ) -> Result> { + let mut ys = Array::ones((xs.len(), height as usize, width as usize, 3)).into_dyn(); + let (mut resizer, options) = Self::build_resizer_filter(filter)?; + for (idx, x) in xs.iter().enumerate() { + let buffer = if x.dimensions() == (width, height) { + x.to_rgba8().into_raw() + } else { + let mut dst_image = Image::new(width, height, PixelType::U8x3); + resizer.resize(x, &mut dst_image, &options)?; + dst_image.into_vec() + }; + let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer)? + .mapv(|x| x as f32); + ys.slice_mut(s![idx, .., .., ..]).assign(&y_); + } + Ok(ys) + } -pub fn make_divisible(x: usize, divisor: usize) -> usize { - (x + divisor - 1) / divisor * divisor + pub fn letterbox( + xs: &[DynamicImage], + height: u32, + width: u32, + filter: &str, + bg: u8, + resize_by: &str, + center: bool, + ) -> Result> { + let mut ys = Array::ones((xs.len(), height as usize, width as usize, 3)).into_dyn(); + let (mut resizer, options) = Self::build_resizer_filter(filter)?; + + for (idx, x) in xs.iter().enumerate() { + let (w0, h0) = x.dimensions(); + let buffer = if w0 == width && h0 == height { + x.to_rgba8().into_raw() + } else { + let (w, h) = match resize_by { + "auto" => { + let r = (width as f32 / w0 as f32).min(height as f32 / h0 as f32); + ( + (w0 as f32 * r).round() as u32, + (h0 as f32 * r).round() as u32, + ) + } + "height" => (height * w0 / h0, height), + "width" => (width, width * h0 / w0), + _ => anyhow::bail!("Option: width, height, auto"), + }; + + let mut dst_image = Image::from_vec_u8( + width, + height, + vec![bg; 3 * height as usize * width as usize], + PixelType::U8x3, + )?; + let (l, t) = if center { + if w == width { + (0, (height - h) / 2) + } else { + ((width - w) / 2, 0) + } + } else { + (0, 0) + }; + let mut cropped_dst_image = CroppedImageMut::new(&mut dst_image, l, t, w, h)?; + resizer.resize(x, &mut cropped_dst_image, &options)?; + dst_image.into_vec() + }; + let y_ = Array::from_shape_vec((height as usize, width as usize, 3), buffer)? + .mapv(|x| x as f32); + ys.slice_mut(s![idx, .., .., ..]).assign(&y_); + } + Ok(ys) + } } diff --git a/src/core/ts.rs b/src/core/ts.rs index 9866953..dc65ae1 100644 --- a/src/core/ts.rs +++ b/src/core/ts.rs @@ -30,7 +30,6 @@ impl Ts { &self.ts } - // TODO: overhead? pub fn add_or_push(&mut self, i: usize, x: Duration) { match self.ts.get_mut(i) { Some(elem) => *elem += x, diff --git a/src/core/vision.rs b/src/core/vision.rs new file mode 100644 index 0000000..b3dad90 --- /dev/null +++ b/src/core/vision.rs @@ -0,0 +1,46 @@ +use crate::{Options, X, Y}; + +pub trait Vision: Sized { + type Input; // DynamicImage + + /// Creates a new instance of the model with the given options. + fn new(options: Options) -> anyhow::Result; + + /// Preprocesses the input data. + fn preprocess(&self, xs: &[Self::Input]) -> anyhow::Result>; + + /// Executes the model on the preprocessed data. + fn inference(&mut self, xs: Vec) -> anyhow::Result>; + + /// Postprocesses the model's output. + fn postprocess(&self, xs: Vec, xs0: &[Self::Input]) -> anyhow::Result>; + + /// Executes the full pipeline. + fn run(&mut self, xs: &[Self::Input]) -> anyhow::Result> { + let ys = self.preprocess(xs)?; + let ys = self.inference(ys)?; + let ys = self.postprocess(ys, xs)?; + Ok(ys) + } + + /// Executes the full pipeline. + fn forward(&mut self, xs: &[Self::Input], profile: bool) -> anyhow::Result> { + let t_pre = std::time::Instant::now(); + let ys = self.preprocess(xs)?; + let t_pre = t_pre.elapsed(); + + let t_exe = std::time::Instant::now(); + let ys = self.inference(ys)?; + let t_exe = t_exe.elapsed(); + + let t_post = std::time::Instant::now(); + let ys = self.postprocess(ys, xs)?; + let t_post = t_post.elapsed(); + + if profile { + println!("> Preprocess: {t_pre:?} | Execution: {t_exe:?} | Postprocess: {t_post:?}"); + } + + Ok(ys) + } +} diff --git a/src/core/x.rs b/src/core/x.rs new file mode 100644 index 0000000..4678fd2 --- /dev/null +++ b/src/core/x.rs @@ -0,0 +1,89 @@ +use anyhow::Result; +use image::DynamicImage; +use ndarray::{Array, Dim, IxDyn, IxDynImpl}; + +use crate::Ops; + +#[derive(Debug, Clone, Default)] +pub struct X(pub Array); + +impl From> for X { + fn from(x: Array) -> Self { + Self(x) + } +} + +impl std::ops::Deref for X { + type Target = Array; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl X { + pub fn zeros(shape: &[usize]) -> Self { + Self(Array::zeros(Dim(IxDynImpl::from(shape.to_vec())))) + } + + pub fn apply(ops: &[Ops]) -> Result { + Ops::apply(ops) + } + + pub fn permute(mut self, shape: &[usize]) -> Result { + self.0 = Ops::permute(self.0, shape)?; + Ok(self) + } + + pub fn nhwc2nchw(mut self) -> Result { + self.0 = Ops::nhwc2nchw(self.0)?; + Ok(self) + } + + pub fn nchw2nhwc(mut self) -> Result { + self.0 = Ops::nchw2nhwc(self.0)?; + Ok(self) + } + + pub fn insert_axis(mut self, d: usize) -> Result { + self.0 = Ops::insert_axis(self.0, d)?; + Ok(self) + } + + pub fn dims(&self) -> &[usize] { + self.0.shape() + } + + pub fn normalize(mut self, min_: f32, max_: f32) -> Result { + self.0 = Ops::normalize(self.0, min_, max_)?; + Ok(self) + } + + pub fn standardize(mut self, mean: &[f32], std: &[f32], dim: usize) -> Result { + self.0 = Ops::standardize(self.0, mean, std, dim)?; + Ok(self) + } + + pub fn norm(mut self, d: usize) -> Result { + self.0 = Ops::norm(self.0, d)?; + Ok(self) + } + + pub fn resize(xs: &[DynamicImage], height: u32, width: u32, filter: &str) -> Result { + Ok(Self(Ops::resize(xs, height, width, filter)?)) + } + + pub fn letterbox( + xs: &[DynamicImage], + height: u32, + width: u32, + filter: &str, + bg: u8, + resize_by: &str, + center: bool, + ) -> Result { + Ok(Self(Ops::letterbox( + xs, height, width, filter, bg, resize_by, center, + )?)) + } +} diff --git a/src/models/blip.rs b/src/models/blip.rs index be4f0c1..128f5fb 100644 --- a/src/models/blip.rs +++ b/src/models/blip.rs @@ -4,7 +4,7 @@ use ndarray::{s, Array, Axis, IxDyn}; use std::io::Write; use tokenizers::Tokenizer; -use crate::{ops, Embedding, LogitsSampler, MinOptMax, Options, OrtEngine, TokenizerStream, Y}; +use crate::{Embedding, LogitsSampler, MinOptMax, Ops, Options, OrtEngine, TokenizerStream, X, Y}; #[derive(Debug)] pub struct Blip { @@ -43,20 +43,23 @@ impl Blip { } pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result { - let xs_ = ops::resize( - xs, - self.height.opt as u32, - self.width.opt as u32, - "bilinear", - )?; - let xs_ = ops::normalize(xs_, 0., 255.); - let xs_ = ops::standardize( - xs_, - &[0.48145466, 0.4578275, 0.40821073], - &[0.26862954, 0.2613026, 0.2757771], - ); - let ys: Vec> = self.visual.run(&[xs_])?; - Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned()))) + let xs_ = X::apply(&[ + Ops::Resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "Bilinear", + ), + Ops::Normalize(0., 255.), + Ops::Standardize( + &[0.48145466, 0.4578275, 0.40821073], + &[0.26862954, 0.2613026, 0.2757771], + 3, + ), + Ops::Nhwc2nchw, + ])?; + let ys = self.visual.run(vec![xs_])?; + Ok(Y::default().with_embedding(Embedding::from(ys[0].to_owned()))) } pub fn caption( @@ -100,13 +103,16 @@ impl Blip { loop { let input_ids_nd: Array = Array::from_vec(input_ids.to_owned()).into_dyn(); let input_ids_nd = input_ids_nd.insert_axis(Axis(0)); + let input_ids_nd = X::from(input_ids_nd); let input_ids_attn_mask: Array = Array::ones(input_ids_nd.shape()).into_dyn(); - let y = self.textual.run(&[ + let input_ids_attn_mask = X::from(input_ids_attn_mask); + + let y = self.textual.run(vec![ input_ids_nd, input_ids_attn_mask, - image_embeds.data().to_owned(), - image_embeds_attn_mask.to_owned(), + X::from(image_embeds.data().to_owned()), + X::from(image_embeds_attn_mask.to_owned()), ])?; // N, length, vocab_size let y = y[0].slice(s!(0, -1.., ..)); let logits = y.slice(s!(0, ..)).to_vec(); diff --git a/src/models/clip.rs b/src/models/clip.rs index a10e10e..d2a80a1 100644 --- a/src/models/clip.rs +++ b/src/models/clip.rs @@ -1,9 +1,10 @@ -use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y}; use anyhow::Result; use image::DynamicImage; -use ndarray::{Array, Array2, IxDyn}; +use ndarray::Array2; use tokenizers::{PaddingDirection, PaddingParams, PaddingStrategy, Tokenizer}; +use crate::{Embedding, MinOptMax, Ops, Options, OrtEngine, X, Y}; + #[derive(Debug)] pub struct Clip { pub textual: OrtEngine, @@ -53,20 +54,23 @@ impl Clip { } pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result { - let xs_ = ops::resize( - xs, - self.height.opt as u32, - self.width.opt as u32, - "bilinear", - )?; - let xs_ = ops::normalize(xs_, 0., 255.); - let xs_ = ops::standardize( - xs_, - &[0.48145466, 0.4578275, 0.40821073], - &[0.26862954, 0.2613026, 0.2757771], - ); - let ys: Vec> = self.visual.run(&[xs_])?; - Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned()))) + let xs_ = X::apply(&[ + Ops::Resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "Bilinear", + ), + Ops::Normalize(0., 255.), + Ops::Standardize( + &[0.48145466, 0.4578275, 0.40821073], + &[0.26862954, 0.2613026, 0.2757771], + 3, + ), + Ops::Nhwc2nchw, + ])?; + let ys = self.visual.run(vec![xs_])?; + Ok(Y::default().with_embedding(Embedding::from(ys[0].to_owned()))) } pub fn encode_texts(&mut self, texts: &[String]) -> Result { @@ -79,8 +83,9 @@ impl Clip { .flat_map(|i| i.get_ids().iter().map(|&b| b as f32)) .collect(); let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn(); - let ys = self.textual.run(&[xs])?; - Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned()))) + let xs = X::from(xs); + let ys = self.textual.run(vec![xs])?; + Ok(Y::default().with_embedding(Embedding::from(ys[0].to_owned()))) } pub fn batch_visual(&self) -> usize { diff --git a/src/models/db.rs b/src/models/db.rs index 18c6d2c..2e14b2d 100644 --- a/src/models/db.rs +++ b/src/models/db.rs @@ -1,7 +1,8 @@ -use crate::{ops, DynConf, Mbr, MinOptMax, Options, OrtEngine, Polygon, Y}; use anyhow::Result; use image::DynamicImage; -use ndarray::{Array, Axis, IxDyn}; +use ndarray::Axis; + +use crate::{DynConf, Mbr, MinOptMax, Ops, Options, OrtEngine, Polygon, X, Y}; #[derive(Debug)] pub struct DB { @@ -45,54 +46,66 @@ impl DB { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox( - xs, - self.height.opt as u32, - self.width.opt as u32, - "bilinear", - Some(114), - )?; - let xs_ = ops::normalize(xs_, 0., 255.); - let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]); - let ys = self.engine.run(&[xs_])?; + let xs_ = X::apply(&[ + Ops::Letterbox( + xs, + self.height() as u32, + self.width() as u32, + "Bilinear", + 114, + "auto", + false, + ), + Ops::Normalize(0., 255.), + Ops::Standardize(&[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225], 3), + Ops::Nhwc2nchw, + ])?; + let ys = self.engine.run(vec![xs_])?; self.postprocess(ys, xs) } - pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + pub fn postprocess(&self, xs: Vec, xs0: &[DynamicImage]) -> Result> { let mut ys = Vec::new(); for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() { let mut y_bbox = Vec::new(); let mut y_polygons: Vec = Vec::new(); let mut y_mbrs: Vec = Vec::new(); - // reshape - let h = luma.dim()[1]; - let w = luma.dim()[2]; - let luma = luma.into_shape((h, w, 1))?.into_owned(); - - // build image from ndarray - let v = luma - .into_raw_vec() - .iter() - .map(|x| if x <= &self.binary_thresh { 0.0 } else { *x }) - .collect::>(); - let mut mask_im = - ops::build_dyn_image_from_raw(v, self.height() as u32, self.width() as u32); - // input image let image_width = xs0[idx].width() as f32; let image_height = xs0[idx].height() as f32; - // rescale mask image - let (ratio, w_mask, h_mask) = - ops::scale_wh(image_width, image_height, w as f32, h as f32); - let mask_im = mask_im.crop(0, 0, w_mask as u32, h_mask as u32); - let mask_im = mask_im.resize_exact( - image_width as u32, - image_height as u32, - image::imageops::FilterType::Triangle, - ); - let mask_im = mask_im.into_luma8(); + // reshape + let h = luma.dim()[1]; + let w = luma.dim()[2]; + let (ratio, _, _) = Ops::scale_wh(image_width, image_height, w as f32, h as f32); + let v = luma + .into_owned() + .into_raw_vec() + .iter() + .map(|x| { + if x <= &self.binary_thresh { + 0u8 + } else { + (*x * 255.0) as u8 + } + }) + .collect::>(); + + let luma = Ops::resize_luma8_vec( + &v, + self.width() as _, + self.height() as _, + image_width as _, + image_height as _, + true, + "Bilinear", + )?; + let mask_im: image::ImageBuffer, Vec<_>> = + match image::ImageBuffer::from_raw(image_width as _, image_height as _, luma) { + None => continue, + Some(x) => x, + }; // contours let contours: Vec> = @@ -105,14 +118,18 @@ impl DB { { continue; } + let mask = Polygon::default().with_points_imageproc(&contour.points); let delta = mask.area() * ratio.round() as f64 * self.unclip_ratio as f64 / mask.perimeter(); + + // TODO: optimize let mask = mask .unclip(delta, image_width as f64, image_height as f64) .resample(50) // .simplify(6e-4) .convex_hull(); + if let Some(bbox) = mask.bbox() { if bbox.height() < self.min_height || bbox.width() < self.min_width { continue; @@ -131,6 +148,7 @@ impl DB { continue; } } + ys.push( Y::default() .with_bboxes(&y_bbox) diff --git a/src/models/depth_anything.rs b/src/models/depth_anything.rs index 878d48c..bdf2f20 100644 --- a/src/models/depth_anything.rs +++ b/src/models/depth_anything.rs @@ -1,7 +1,7 @@ -use crate::{ops, Mask, MinOptMax, Options, OrtEngine, Y}; +use crate::{Mask, MinOptMax, Ops, Options, OrtEngine, X, Y}; use anyhow::Result; -use image::{DynamicImage, ImageBuffer}; -use ndarray::{Array, Axis, IxDyn}; +use image::DynamicImage; +use ndarray::Axis; #[derive(Debug)] pub struct DepthAnything { @@ -30,41 +30,50 @@ impl DepthAnything { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::resize( - xs, - self.height.opt as u32, - self.width.opt as u32, - "lanczos3", - )?; - let xs_ = ops::normalize(xs_, 0.0, 255.0); - let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]); - let ys = self.engine.run(&[xs_])?; + let xs_ = X::apply(&[ + Ops::Resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "Lanczos3", + ), + Ops::Normalize(0., 255.), + Ops::Standardize(&[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225], 3), + Ops::Nhwc2nchw, + ])?; + let ys = self.engine.run(vec![xs_])?; self.postprocess(ys, xs) } - pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + pub fn postprocess(&self, xs: Vec, xs0: &[DynamicImage]) -> Result> { let mut ys: Vec = Vec::new(); for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() { - let luma = luma - .into_shape((self.height() as usize, self.width() as usize, 1))? - .into_owned(); - let v = luma.into_raw_vec(); + let (w1, h1) = (xs0[idx].width(), xs0[idx].height()); + let v = luma.into_owned().into_raw_vec(); let max_ = v.iter().max_by(|x, y| x.total_cmp(y)).unwrap(); let min_ = v.iter().min_by(|x, y| x.total_cmp(y)).unwrap(); let v = v .iter() .map(|x| (((*x - min_) / (max_ - min_)) * 255.).clamp(0., 255.) as u8) .collect::>(); - let luma: ImageBuffer, Vec> = - ImageBuffer::from_raw(self.width() as u32, self.height() as u32, v) - .expect("Faild to create image from ndarray"); - let luma = image::DynamicImage::from(luma); - let luma = luma.resize_exact( - xs0[idx].width(), - xs0[idx].height(), - image::imageops::FilterType::CatmullRom, + + let luma = Ops::resize_luma8_vec( + &v, + self.width() as _, + self.height() as _, + w1 as _, + h1 as _, + false, + "Bilinear", + )?; + let luma: image::ImageBuffer, Vec<_>> = + match image::ImageBuffer::from_raw(w1 as _, h1 as _, luma) { + None => continue, + Some(x) => x, + }; + ys.push( + Y::default().with_masks(&[Mask::default().with_mask(DynamicImage::from(luma))]), ); - ys.push(Y::default().with_masks(&[Mask::default().with_mask(luma)])); } Ok(ys) } diff --git a/src/models/dinov2.rs b/src/models/dinov2.rs index 0ea2a42..85485ca 100644 --- a/src/models/dinov2.rs +++ b/src/models/dinov2.rs @@ -1,7 +1,6 @@ -use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y}; +use crate::{Embedding, MinOptMax, Ops, Options, OrtEngine, X, Y}; use anyhow::Result; use image::DynamicImage; -use ndarray::{Array, IxDyn}; // use std::path::PathBuf; // use usearch::ffi::{IndexOptions, MetricKind, ScalarKind}; @@ -49,20 +48,23 @@ impl Dinov2 { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result { - let xs_ = ops::resize( - xs, - self.height.opt as u32, - self.width.opt as u32, - "lanczos3", - )?; - let xs_ = ops::normalize(xs_, 0., 255.); - let xs_ = ops::standardize( - xs_, - &[0.48145466, 0.4578275, 0.40821073], - &[0.26862954, 0.2613026, 0.2757771], - ); - let ys: Vec> = self.engine.run(&[xs_])?; - Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned()))) + let xs_ = X::apply(&[ + Ops::Resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "Lanczos3", + ), + Ops::Normalize(0., 255.), + Ops::Standardize( + &[0.48145466, 0.4578275, 0.40821073], + &[0.26862954, 0.2613026, 0.2757771], + 3, + ), + Ops::Nhwc2nchw, + ])?; + let ys = self.engine.run(vec![xs_])?; + Ok(Y::default().with_embedding(Embedding::from(ys[0].to_owned()))) } // pub fn build_index(&self, metric: Metric) -> Result { diff --git a/src/models/modnet.rs b/src/models/modnet.rs index 2d586b6..87004eb 100644 --- a/src/models/modnet.rs +++ b/src/models/modnet.rs @@ -1,8 +1,8 @@ use anyhow::Result; use image::DynamicImage; -use ndarray::{Array, Axis, IxDyn}; +use ndarray::Axis; -use crate::{ops, Mask, MinOptMax, Options, OrtEngine, Y}; +use crate::{Mask, MinOptMax, Ops, Options, OrtEngine, X, Y}; #[derive(Debug)] pub struct MODNet { @@ -31,37 +31,42 @@ impl MODNet { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::resize( - xs, - self.height.opt as u32, - self.width.opt as u32, - "lanczos3", - )?; - let xs_ = ops::normalize(xs_, 127.5, 255.); - let ys = self.engine.run(&[xs_])?; + let xs_ = X::apply(&[ + Ops::Resize( + xs, + self.height.opt as u32, + self.width.opt as u32, + "Lanczos3", + ), + Ops::Normalize(0., 255.), + Ops::Nhwc2nchw, + ])?; + + let ys = self.engine.run(vec![xs_])?; self.postprocess(ys, xs) } - pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + pub fn postprocess(&self, xs: Vec, xs0: &[DynamicImage]) -> Result> { let mut ys: Vec = Vec::new(); for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() { - let luma = luma - .into_shape((self.height() as usize, self.width() as usize, 1))? - .into_owned(); - let v = luma - .into_raw_vec() - .iter() - .map(|x| (x * 255.0) as u8) - .collect::>(); - let luma: image::ImageBuffer, Vec> = - image::ImageBuffer::from_raw(self.width() as u32, self.height() as u32, v) - .expect("Faild to create image from ndarray"); - let luma = image::DynamicImage::from(luma); - let luma = luma.resize_exact( - xs0[idx].width(), - xs0[idx].height(), - image::imageops::FilterType::CatmullRom, - ); + let (w1, h1) = (xs0[idx].width(), xs0[idx].height()); + let luma = luma.mapv(|x| (x * 255.0) as u8); + let luma = Ops::resize_luma8_vec( + &luma.into_raw_vec(), + self.width() as _, + self.height() as _, + w1 as _, + h1 as _, + false, + "Bilinear", + )?; + let luma: image::ImageBuffer, Vec<_>> = + match image::ImageBuffer::from_raw(w1 as _, h1 as _, luma) { + None => continue, + Some(x) => x, + }; + let luma = DynamicImage::from(luma); + ys.push(Y::default().with_masks(&[Mask::default().with_mask(luma)])); } Ok(ys) diff --git a/src/models/rtdetr.rs b/src/models/rtdetr.rs index 855598f..5af4600 100644 --- a/src/models/rtdetr.rs +++ b/src/models/rtdetr.rs @@ -1,9 +1,9 @@ use anyhow::Result; use image::DynamicImage; -use ndarray::{s, Array, Axis, IxDyn}; +use ndarray::{s, Axis}; use regex::Regex; -use crate::{ops, Bbox, DynConf, MinOptMax, Options, OrtEngine, Y}; +use crate::{Bbox, DynConf, MinOptMax, Ops, Options, OrtEngine, X, Y}; #[derive(Debug)] pub struct RTDETR { @@ -56,19 +56,24 @@ impl RTDETR { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox( - xs, - self.height() as u32, - self.width() as u32, - "catmullRom", - Some(114), - )?; - let xs_ = ops::normalize(xs_, 0.0, 255.0); - let ys = self.engine.run(&[xs_])?; + let xs_ = X::apply(&[ + Ops::Letterbox( + xs, + self.height() as u32, + self.width() as u32, + "CatmullRom", + 114, + "auto", + false, + ), + Ops::Normalize(0., 255.), + Ops::Nhwc2nchw, + ])?; + let ys = self.engine.run(vec![xs_])?; self.postprocess(ys, xs) } - pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + pub fn postprocess(&self, xs: Vec, xs0: &[DynamicImage]) -> Result> { const CXYWH_OFFSET: usize = 4; // cxcywh let preds = &xs[0]; diff --git a/src/models/rtmo.rs b/src/models/rtmo.rs index 6fad123..3c20090 100644 --- a/src/models/rtmo.rs +++ b/src/models/rtmo.rs @@ -1,8 +1,8 @@ use anyhow::Result; use image::DynamicImage; -use ndarray::{Array, Axis, IxDyn}; +use ndarray::Axis; -use crate::{ops, Bbox, DynConf, Keypoint, MinOptMax, Options, OrtEngine, Y}; +use crate::{Bbox, DynConf, Keypoint, MinOptMax, Options, OrtEngine, X, Y}; #[derive(Debug)] pub struct RTMO { @@ -39,18 +39,21 @@ impl RTMO { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox( + let xs_ = X::letterbox( xs, self.height() as u32, self.width() as u32, - "catmullRom", - Some(114), - )?; - let ys = self.engine.run(&[xs_])?; + "CatmullRom", + 114, + "auto", + false, + )? + .nhwc2nchw()?; + let ys = self.engine.run(vec![xs_])?; self.postprocess(ys, xs) } - pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + pub fn postprocess(&self, xs: Vec, xs0: &[DynamicImage]) -> Result> { let mut ys: Vec = Vec::new(); let (preds_bboxes, preds_kpts) = if xs[0].ndim() == 3 { (&xs[0], &xs[1]) @@ -80,6 +83,7 @@ impl RTMO { let x2 = xyxyc[2] / ratio; let y2 = xyxyc[3] / ratio; let confidence = xyxyc[4]; + if confidence < self.confs[0] { continue; } diff --git a/src/models/svtr.rs b/src/models/svtr.rs index cb3990c..cacc131 100644 --- a/src/models/svtr.rs +++ b/src/models/svtr.rs @@ -1,8 +1,8 @@ use anyhow::Result; use image::DynamicImage; -use ndarray::{Array, Axis, IxDyn}; +use ndarray::Axis; -use crate::{ops, DynConf, MinOptMax, Options, OrtEngine, Y}; +use crate::{DynConf, MinOptMax, Ops, Options, OrtEngine, X, Y}; #[derive(Debug)] pub struct SVTR { @@ -43,22 +43,27 @@ impl SVTR { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::resize_with_fixed_height( - xs, - self.height.opt as u32, - self.width.opt as u32, - "bilinear", - Some(0), - )?; - let xs_ = ops::normalize(xs_, 0.0, 255.0); - let ys: Vec> = self.engine.run(&[xs_])?; - let ys = ys[0].to_owned(); - self.postprocess(&ys) + let xs_ = X::apply(&[ + Ops::Letterbox( + xs, + self.height.opt as u32, + self.width.opt as u32, + "Bilinear", + 0, + "auto", + false, + ), + Ops::Normalize(0., 255.), + Ops::Nhwc2nchw, + ])?; + + let ys = self.engine.run(vec![xs_])?; + self.postprocess(ys) } - pub fn postprocess(&self, output: &Array) -> Result> { + pub fn postprocess(&self, xs: Vec) -> Result> { let mut ys: Vec = Vec::new(); - for batch in output.axis_iter(Axis(0)) { + for batch in xs[0].axis_iter(Axis(0)) { let preds = batch .axis_iter(Axis(0)) .filter_map(|x| { @@ -91,7 +96,6 @@ impl SVTR { ys.push(Y::default().with_texts(&[text])) } - Ok(ys) } } diff --git a/src/models/yolo.rs b/src/models/yolo.rs index 77211d4..9bf31e3 100644 --- a/src/models/yolo.rs +++ b/src/models/yolo.rs @@ -1,10 +1,12 @@ use anyhow::Result; use clap::ValueEnum; -use image::{DynamicImage, ImageBuffer}; -use ndarray::{s, Array, Axis, IxDyn}; +use image::DynamicImage; +use ndarray::{s, Array, Axis}; use regex::Regex; -use crate::{ops, Bbox, DynConf, Keypoint, Mbr, MinOptMax, Options, OrtEngine, Polygon, Prob, Y}; +use crate::{ + Bbox, DynConf, Keypoint, Mbr, MinOptMax, Ops, Options, OrtEngine, Polygon, Prob, Vision, X, Y, +}; const CXYWH_OFFSET: usize = 4; const KPT_STEP: usize = 3; @@ -49,8 +51,10 @@ pub struct YOLO { apply_probs_softmax: bool, } -impl YOLO { - pub fn new(options: Options) -> Result { +impl Vision for YOLO { + type Input = DynamicImage; + + fn new(options: Options) -> Result { let mut engine = OrtEngine::new(&options)?; let (batch, height, width) = ( engine.batch().to_owned(), @@ -161,32 +165,46 @@ impl YOLO { }) } - pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { + // pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { + fn preprocess(&self, xs: &[Self::Input]) -> Result> { let xs_ = match self.task { YOLOTask::Classify => { - ops::resize(xs, self.height() as u32, self.width() as u32, "bilinear")? + X::resize(xs, self.height() as u32, self.width() as u32, "Bilinear")? + .normalize(0., 255.)? + .nhwc2nchw()? } - _ => ops::letterbox( - xs, - self.height() as u32, - self.width() as u32, - "catmullRom", - Some(114), - )?, + _ => X::apply(&[ + Ops::Letterbox( + xs, + self.height() as u32, + self.width() as u32, + "CatmullRom", + 114, + "auto", + false, + ), + Ops::Normalize(0., 255.), + Ops::Nhwc2nchw, + ])?, }; - let xs_ = ops::normalize(xs_, 0., 255.); - let ys = self.engine.run(&[xs_])?; - self.postprocess(ys, xs) + + Ok(vec![xs_]) + // let ys = self.engine.run(vec![xs_])?; + // self.postprocess(ys, xs) } - pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + fn inference(&mut self, xs: Vec) -> Result> { + self.engine.run(xs) + } + + // pub fn postprocess(&self, xs: Vec, xs0: &[DynamicImage]) -> Result> { + fn postprocess(&self, xs: Vec, xs0: &[Self::Input]) -> Result> { let mut ys = Vec::new(); let protos = if xs.len() == 2 { Some(&xs[1]) } else { None }; for (idx, preds) in xs[0].axis_iter(Axis(0)).enumerate() { let image_width = xs0[idx].width() as f32; let image_height = xs0[idx].height() as f32; - // decode match self.task { YOLOTask::Classify => { let y = if self.apply_probs_softmax { @@ -253,7 +271,7 @@ impl YOLO { let ratio = (self.width() as f32 / image_width) .min(self.height() as f32 / image_height); - // bboxes + // Detection for (i, pred) in preds .axis_iter(if self.anchors_first { Axis(0) } else { Axis(1) }) .enumerate() @@ -324,13 +342,13 @@ impl YOLO { } } - // nms + // NMS let mut y = Y::default().with_bboxes(&y_bboxes); if self.apply_nms { y = y.apply_bboxes_nms(self.iou); } - // keypoints + // Pose if let YOLOTask::Pose = self.task { if let Some(bboxes) = y.bboxes() { let mut y_kpts: Vec> = Vec::new(); @@ -377,7 +395,7 @@ impl YOLO { } } - // masks + // Segment if let YOLOTask::Segment = self.task { if let Some(bboxes) = y.bboxes() { let mut y_polygons: Vec = Vec::new(); @@ -391,60 +409,55 @@ impl YOLO { .slice(s![preds.shape()[0] - self.nm.., bbox.id_born()]) .to_vec() }; + let proto = protos.unwrap().slice(s![idx, .., .., ..]); + let (nm, mh, mw) = proto.dim(); - // coefs * proto -> mask - let (nm, nh, nw) = proto.dim(); + // coefs * proto => mask (311.427µs) let coefs = Array::from_shape_vec((1, nm), coefs)?; // (n, nm) - let proto = proto.to_owned().into_shape((nm, nh * nw))?; // (nm, nh*nw) - let mask = coefs.dot(&proto).into_shape((nh, nw, 1))?; // (nh, nw, n) + let proto = proto.into_shape((nm, mh * mw))?; // (nm, mh * mw) + let mask = coefs.dot(&proto); // (mh, mw, n) - // build image from ndarray - let mask: ImageBuffer, Vec> = - match ImageBuffer::from_raw( - nw as u32, - nh as u32, - mask.clone().into_raw_vec(), + // de-scale + let mask = Ops::resize_lumaf32_vec( + &mask.into_raw_vec(), + mw as _, + mh as _, + image_width as _, + image_height as _, + true, + "Bilinear", + )?; + + let mut mask: image::ImageBuffer, Vec<_>> = + match image::ImageBuffer::from_raw( + image_width as _, + image_height as _, + mask, ) { - Some(buf) => buf, None => continue, + Some(x) => x, }; - let mask = image::DynamicImage::from(mask); - // rescale - let mask_original = ops::descale_mask( - mask, - nw as f32, - nh as f32, - image_width, - image_height, - ); - let mut mask_original = mask_original.into_luma8(); + let (xmin, ymin, xmax, ymax) = + (bbox.xmin(), bbox.ymin(), bbox.xmax(), bbox.ymax()); - // crop mask - for y in 0..image_height as usize { - for x in 0..image_width as usize { - if x < bbox.xmin() as usize - || x > bbox.xmax() as usize - || y < bbox.ymin() as usize - || y > bbox.ymax() as usize - // || mask_original.get_pixel(x as u32, y as u32).0 < [127] + // Using bbox to crop the mask (75.93µs) + for (y, row) in mask.enumerate_rows_mut() { + for (x, _, pixel) in row { + if x < xmin as _ + || x > xmax as _ + || y < ymin as _ + || y > ymax as _ { - mask_original.put_pixel( - x as u32, - y as u32, - image::Luma([0u8]), - ); + *pixel = image::Luma([0u8]); } } } - // get masks from image + // Find contours (1.413853ms) let contours: Vec> = - imageproc::contours::find_contours_with_threshold( - &mask_original, - 0, - ); + imageproc::contours::find_contours_with_threshold(&mask, 0); let polygon = match contours .iter() .map(|x| { @@ -458,6 +471,7 @@ impl YOLO { None => continue, Some(x) => x, }; + y_polygons.push(polygon); } y = y.with_polygons(&y_polygons); @@ -469,7 +483,9 @@ impl YOLO { } Ok(ys) } +} +impl YOLO { pub fn batch(&self) -> isize { self.batch.opt } diff --git a/src/models/yolop.rs b/src/models/yolop.rs index b082e87..6675b18 100644 --- a/src/models/yolop.rs +++ b/src/models/yolop.rs @@ -2,7 +2,7 @@ use anyhow::Result; use image::DynamicImage; use ndarray::{s, Array, Axis, IxDyn}; -use crate::{ops, Bbox, DynConf, MinOptMax, Options, OrtEngine, Polygon, Y}; +use crate::{Bbox, DynConf, MinOptMax, Ops, Options, OrtEngine, Polygon, X, Y}; #[derive(Debug)] pub struct YOLOPv2 { @@ -37,19 +37,24 @@ impl YOLOPv2 { } pub fn run(&mut self, xs: &[DynamicImage]) -> Result> { - let xs_ = ops::letterbox( - xs, - self.height() as u32, - self.width() as u32, - "bilinear", - Some(114), - )?; - let xs_ = ops::normalize(xs_, 0., 255.); - let ys = self.engine.run(&[xs_])?; + let xs_ = X::apply(&[ + Ops::Letterbox( + xs, + self.height() as u32, + self.width() as u32, + "Bilinear", + 114, + "auto", + false, + ), + Ops::Normalize(0., 255.), + Ops::Nhwc2nchw, + ])?; + let ys = self.engine.run(vec![xs_])?; self.postprocess(ys, xs) } - pub fn postprocess(&self, xs: Vec>, xs0: &[DynamicImage]) -> Result> { + pub fn postprocess(&self, xs: Vec, xs0: &[DynamicImage]) -> Result> { let mut ys: Vec = Vec::new(); let (xs_da, xs_ll, xs_det) = (&xs[0], &xs[1], &xs[2]); for (idx, ((x_det, x_ll), x_da)) in xs_det @@ -60,7 +65,7 @@ impl YOLOPv2 { { let image_width = xs0[idx].width() as f32; let image_height = xs0[idx].height() as f32; - let (ratio, _, _) = ops::scale_wh( + let (ratio, _, _) = Ops::scale_wh( image_width, image_height, self.width() as f32, @@ -97,32 +102,23 @@ impl YOLOPv2 { .with_id(id as isize), ); } + let mut y_polygons: Vec = Vec::new(); // Drivable area let x_da_0 = x_da.slice(s![0, .., ..]).to_owned(); let x_da_1 = x_da.slice(s![1, .., ..]).to_owned(); let x_da = x_da_1 - x_da_0; - let x_da = x_da - .into_shape((self.height() as usize, self.width() as usize, 1))? - .into_owned(); - let v = x_da - .into_raw_vec() - .iter() - .map(|x| if x < &0.0 { 0.0 } else { 1.0 }) - .collect::>(); - let mask_da = - ops::build_dyn_image_from_raw(v, self.height() as u32, self.width() as u32); - let mask_da = ops::descale_mask( - mask_da, - self.width() as f32, - self.height() as f32, + let contours = match self.get_contours_from_mask( + x_da.into_dyn(), + 0.0, + self.width() as _, + self.height() as _, image_width, image_height, - ); - let mask_da = mask_da.into_luma8(); - let mut y_polygons: Vec = Vec::new(); - let contours: Vec> = - imageproc::contours::find_contours_with_threshold(&mask_da, 0); + ) { + Err(_) => continue, + Ok(x) => x, + }; if let Some(polygon) = contours .iter() .map(|x| { @@ -137,26 +133,17 @@ impl YOLOPv2 { }; // Lane line - let x_ll = x_ll - .into_shape((self.height() as usize, self.width() as usize, 1))? - .into_owned(); - let v = x_ll - .into_raw_vec() - .iter() - .map(|x| if x < &0.5 { 0.0 } else { 1.0 }) - .collect::>(); - let mask_ll = - ops::build_dyn_image_from_raw(v, self.height() as u32, self.width() as u32); - let mask_ll = ops::descale_mask( - mask_ll, - self.width() as f32, - self.height() as f32, + let contours = match self.get_contours_from_mask( + x_ll.to_owned(), + 0.5, + self.width() as _, + self.height() as _, image_width, image_height, - ); - let mask_ll = mask_ll.into_luma8(); - let contours: Vec> = - imageproc::contours::find_contours_with_threshold(&mask_ll, 0); + ) { + Err(_) => continue, + Ok(x) => x, + }; if let Some(polygon) = contours .iter() .map(|x| { @@ -192,4 +179,23 @@ impl YOLOPv2 { pub fn height(&self) -> isize { self.height.opt } + + fn get_contours_from_mask( + &self, + mask: Array, + thresh: f32, + w0: f32, + h0: f32, + w1: f32, + h1: f32, + ) -> Result>> { + let mask = mask.mapv(|x| if x < thresh { 0u8 } else { 255u8 }); + let mask = Ops::resize_luma8_vec(&mask.into_raw_vec(), w0, h0, w1, h1, false, "Bilinear")?; + let mask: image::ImageBuffer, Vec<_>> = + image::ImageBuffer::from_raw(w1 as _, h1 as _, mask) + .ok_or(anyhow::anyhow!("Failed to build image"))?; + let contours: Vec> = + imageproc::contours::find_contours_with_threshold(&mask, 0); + Ok(contours) + } } diff --git a/src/ys/embedding.rs b/src/ys/embedding.rs index c615772..1145214 100644 --- a/src/ys/embedding.rs +++ b/src/ys/embedding.rs @@ -1,6 +1,8 @@ use anyhow::Result; use ndarray::{Array, Axis, Ix2, IxDyn}; +use crate::X; + /// Embedding #[derive(Clone, PartialEq, Default)] pub struct Embedding(Array); @@ -11,6 +13,12 @@ impl std::fmt::Debug for Embedding { } } +impl From for Embedding { + fn from(x: X) -> Self { + Self(x.0) + } +} + impl Embedding { pub fn new(x: Array) -> Self { Self(x)