diff --git a/README.md b/README.md index 661deb7..18ead33 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ | [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | | [YOLOv12](https://github.com/sunsmarterjie/yolov12) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | | [RT-DETR](https://github.com/lyuwenyu/RT-DETR) | Object Detection | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | | | +| [RF-DETR](https://github.com/roboflow/rf-detr) | Object Detection | [demo](examples/rfdetr) | ✅ | ✅ | ✅ | | | | [PP-PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.8/configs/picodet) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | ✅ | | | | [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | ✅ | | | | [D-FINE](https://github.com/manhbd-22022602/D-FINE) | Object Detection | [demo](examples/d-fine) | ✅ | ✅ | ✅ | | | diff --git a/examples/rfdetr/README.md b/examples/rfdetr/README.md new file mode 100644 index 0000000..a196caf --- /dev/null +++ b/examples/rfdetr/README.md @@ -0,0 +1,17 @@ +## Quick Start + +```shell +cargo run -r --example rfdetr +``` + +## Results + +``` +[Bboxes]: Found 6 objects +0: Bbox { xyxy: [221.55753, 408.0652, 345.23325, 860.2527], class_id: 1, name: Some("person"), confidence: 0.93212366 } +1: Bbox { xyxy: [44.967827, 397.84177, 246.13187, 905.7567], class_id: 1, name: Some("person"), confidence: 0.93540853 } +2: Bbox { xyxy: [6.2678833, 233.208, 801.6806, 737.4714], class_id: 6, name: Some("bus"), confidence: 0.93637216 } +3: Bbox { xyxy: [0.0, 555.167, 77.74801, 870.2772], class_id: 1, name: Some("person"), confidence: 0.85163206 } +4: Bbox { xyxy: [133.94543, 473.6574, 149.62558, 507.99875], class_id: 32, name: Some("tie"), confidence: 0.2992424 } +5: Bbox { xyxy: [669.81836, 395.28635, 813.44855, 879.9562], class_id: 1, name: Some("person"), confidence: 0.83661026 } +``` diff --git a/examples/rfdetr/main.rs b/examples/rfdetr/main.rs new file mode 100644 index 0000000..c86ec96 --- /dev/null +++ b/examples/rfdetr/main.rs @@ -0,0 +1,37 @@ +use anyhow::Result; +use usls::{models::RFDETR, Annotator, DataLoader, Options}; + +fn main() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339()) + .init(); + + // options + let options = Options::rfdetr_base().commit()?; + let mut model = RFDETR::new(options)?; + + // load + let xs = [DataLoader::try_read("./assets/bus.jpg")?]; + + // run + let ys = model.forward(&xs)?; + + // extract bboxes + for y in ys.iter() { + if let Some(bboxes) = y.bboxes() { + println!("[Bboxes]: Found {} objects", bboxes.len()); + for (i, bbox) in bboxes.iter().enumerate() { + println!("{}: {:?}", i, bbox) + } + } + } + + // annotate + let annotator = Annotator::default() + .with_bboxes_thickness(3) + .with_saveout(model.spec()); + annotator.annotate(&xs, &ys); + + Ok(()) +} diff --git a/examples/rtdetr/README.md b/examples/rtdetr/README.md index 711c097..35f9c32 100644 --- a/examples/rtdetr/README.md +++ b/examples/rtdetr/README.md @@ -13,5 +13,4 @@ cargo run -r --example rtdetr 2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 } 3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 } 4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 } - ``` diff --git a/src/misc/labels.rs b/src/misc/labels.rs index 0415615..237dae0 100644 --- a/src/misc/labels.rs +++ b/src/misc/labels.rs @@ -120,6 +120,100 @@ pub const COCO_CLASS_NAMES_80: [&str; 80] = [ "toothbrush", ]; +pub const COCO_CLASS_NAMES_91: [&str; 91] = [ + "background", // 0 + "person", // 1 + "bicycle", // 2 + "car", // 3 + "motorcycle", // 4 + "airplane", // 5 + "bus", // 6 + "train", // 7 + "truck", // 8 + "boat", // 9 + "traffic light", // 10 + "fire hydrant", // 11 + "unused", // 12 + "stop sign", // 13 + "parking meter", // 14 + "bench", // 15 + "bird", // 16 + "cat", // 17 + "dog", // 18 + "horse", // 19 + "sheep", // 20 + "cow", // 21 + "elephant", // 22 + "bear", // 23 + "zebra", // 24 + "giraffe", // 25 + "unused", // 26 + "backpack", // 27 + "umbrella", // 28 + "unused", // 29 + "unused", // 30 + "handbag", // 31 + "tie", // 32 + "suitcase", // 33 + "frisbee", // 34 + "skis", // 35 + "snowboard", // 36 + "sports ball", // 37 + "kite", // 38 + "baseball bat", // 39 + "baseball glove", // 40 + "skateboard", // 41 + "surfboard", // 42 + "tennis racket", // 43 + "bottle", // 44 + "unused", // 45 + "wine glass", // 46 + "cup", // 47 + "fork", // 48 + "knife", // 49 + "spoon", // 50 + "bowl", // 51 + "banana", // 52 + "apple", // 53 + "sandwich", // 54 + "orange", // 55 + "broccoli", // 56 + "carrot", // 57 + "hot dog", // 58 + "pizza", // 59 + "donut", // 60 + "cake", // 61 + "chair", // 62 + "couch", // 63 + "potted plant", // 64 + "bed", // 65 + "unused", // 66 + "dining table", // 67 + "unused", // 68 + "unused", // 69 + "toilet", // 70 + "unused", // 71 + "tv", // 72 + "laptop", // 73 + "mouse", // 74 + "remote", // 75 + "keyboard", // 76 + "cell phone", // 77 + "microwave", // 78 + "oven", // 79 + "toaster", // 80 + "sink", // 81 + "refrigerator", // 82 + "book", // 83 + "unused", // 84 + "clock", // 85 + "vase", // 86 + "scissors", // 87 + "teddy bear", // 88 + "hair drier", // 89 + "toothbrush", // 90 +]; + pub const BODY_PARTS_NAMES_28: [&str; 28] = [ "Background", "Apparel", diff --git a/src/misc/onnx.rs b/src/misc/onnx.rs index 33bdfc0..d31a822 100644 --- a/src/misc/onnx.rs +++ b/src/misc/onnx.rs @@ -1,3 +1,4 @@ +#![allow(clippy::doc_overindented_list_items)] //! ONNX file generated by prost-build. // This file is @generated by prost-build. diff --git a/src/models/mod.rs b/src/models/mod.rs index 9f260e1..bbab7a8 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -20,6 +20,7 @@ mod moondream2; mod owl; mod picodet; mod pipeline; +mod rfdetr; mod rtdetr; mod rtmo; mod sam; @@ -44,6 +45,7 @@ pub use moondream2::*; pub use owl::*; pub use picodet::*; pub use pipeline::*; +pub use rfdetr::*; pub use rtdetr::*; pub use rtmo::*; pub use sam::*; diff --git a/src/models/rfdetr/README.md b/src/models/rfdetr/README.md new file mode 100644 index 0000000..9c0ffcf --- /dev/null +++ b/src/models/rfdetr/README.md @@ -0,0 +1,9 @@ +# RF-DETR: SOTA Real-Time Object Detection Model + +## Official Repository + +The official repository can be found on: [GitHub](https://github.com/roboflow/rf-detr) + +## Example + +Refer to the [example](../../../examples/rfdetr) diff --git a/src/models/rfdetr/config.rs b/src/models/rfdetr/config.rs new file mode 100644 index 0000000..1084c90 --- /dev/null +++ b/src/models/rfdetr/config.rs @@ -0,0 +1,22 @@ +use crate::COCO_CLASS_NAMES_91; + +/// Model configuration for `RT-DETR` +impl crate::Options { + pub fn rfdetr() -> Self { + Self::default() + .with_model_name("rfdetr") + .with_batch_size(1) + .with_model_ixx(0, 2, 560.into()) + .with_model_ixx(0, 3, 560.into()) + .with_resize_mode(crate::ResizeMode::FitAdaptive) + .with_normalize(true) + .with_image_mean(&[0.485, 0.456, 0.406]) + .with_image_std(&[0.229, 0.224, 0.225]) + .with_class_confs(&[0.25]) + .with_class_names(&COCO_CLASS_NAMES_91) + } + + pub fn rfdetr_base() -> Self { + Self::rfdetr().with_model_file("base.onnx") + } +} diff --git a/src/models/rfdetr/impl.rs b/src/models/rfdetr/impl.rs new file mode 100644 index 0000000..ea80bdb --- /dev/null +++ b/src/models/rfdetr/impl.rs @@ -0,0 +1,134 @@ +use aksr::Builder; +use anyhow::Result; +use image::DynamicImage; +use ndarray::{s, Axis}; +use rayon::prelude::*; + +use crate::{elapsed, Bbox, DynConf, Engine, Options, Processor, Ts, Xs, Ys, Y}; + +#[derive(Debug, Builder)] +pub struct RFDETR { + engine: Engine, + height: usize, + width: usize, + batch: usize, + names: Vec, + confs: DynConf, + ts: Ts, + processor: Processor, + spec: String, +} + +impl RFDETR { + pub fn new(options: Options) -> Result { + let engine = options.to_engine()?; + let (batch, height, width, ts) = ( + engine.batch().opt(), + engine.try_height().unwrap_or(&560.into()).opt(), + engine.try_width().unwrap_or(&560.into()).opt(), + engine.ts.clone(), + ); + let spec = engine.spec().to_owned(); + let processor = options + .to_processor()? + .with_image_width(width as _) + .with_image_height(height as _); + let names = options + .class_names() + .expect("No class names specified.") + .to_vec(); + let confs = DynConf::new(options.class_confs(), names.len()); + + Ok(Self { + engine, + height, + width, + batch, + spec, + names, + confs, + ts, + processor, + }) + } + + fn preprocess(&mut self, xs: &[DynamicImage]) -> Result { + let x1 = self.processor.process_images(xs)?; + let xs = Xs::from(vec![x1]); + + Ok(xs) + } + + fn inference(&mut self, xs: Xs) -> Result { + self.engine.run(xs) + } + + pub fn forward(&mut self, xs: &[DynamicImage]) -> Result { + let ys = elapsed!("preprocess", self.ts, { self.preprocess(xs)? }); + let ys = elapsed!("inference", self.ts, { self.inference(ys)? }); + let ys = elapsed!("postprocess", self.ts, { self.postprocess(ys)? }); + + Ok(ys) + } + + fn postprocess(&mut self, xs: Xs) -> Result { + // 0: bboxes + // 1: logits + let ys: Vec = xs[1] + .axis_iter(Axis(0)) + .into_par_iter() + .enumerate() + .filter_map(|(idx, logits)| { + let (image_height, image_width) = self.processor.image0s_size[idx]; + let ratio = self.processor.scale_factors_hw[idx][0]; + let y_bboxes: Vec = logits + .axis_iter(Axis(0)) + .into_par_iter() + .enumerate() + .filter_map(|(i, clss)| { + let (class_id, &conf) = clss + .mapv(|x| 1. / ((-x).exp() + 1.)) + .iter() + .enumerate() + .max_by(|a, b| a.1.total_cmp(b.1))?; + + if conf < self.confs[idx] { + return None; + } + + let bbox = xs[0].slice(s![idx, i, ..]).mapv(|x| x / ratio); + let cx = bbox[0] * self.width as f32; + let cy = bbox[1] * self.height as f32; + let w = bbox[2] * self.width as f32; + let h = bbox[3] * self.height as f32; + let x = cx - w / 2.; + let y = cy - h / 2.; + let x = x.max(0.0).min(image_width as _); + let y = y.max(0.0).min(image_height as _); + + Some( + Bbox::default() + .with_xywh(x, y, w, h) + .with_confidence(conf) + .with_id(class_id as _) + .with_name(&self.names[class_id]), + ) + }) + .collect(); + + let mut y = Y::default(); + if !y_bboxes.is_empty() { + y = y.with_bboxes(&y_bboxes); + } + + Some(y) + }) + .collect(); + + Ok(ys.into()) + } + + pub fn summary(&mut self) { + self.ts.summary(); + } +} diff --git a/src/models/rfdetr/mod.rs b/src/models/rfdetr/mod.rs new file mode 100644 index 0000000..fbd2b75 --- /dev/null +++ b/src/models/rfdetr/mod.rs @@ -0,0 +1,4 @@ +mod config; +mod r#impl; + +pub use r#impl::*;