From cf21eabcaa483d557b25ec16dc6b4bc9614ad6d2 Mon Sep 17 00:00:00 2001 From: Jamjamjon <51357717+jamjamjon@users.noreply.github.com> Date: Wed, 21 May 2025 13:29:17 +0800 Subject: [PATCH] Add PPOCRv5 DET and REC models (#98) * Add PPOCRv5 DET and REC models * Add Text Struct --- examples/db/main.rs | 20 ++++++++----- examples/smolvlm/main.rs | 2 +- examples/svtr/README.md | 23 +++++++------- examples/svtr/main.rs | 9 +++++- src/inference/mod.rs | 2 ++ src/inference/text.rs | 56 +++++++++++++++++++++++++++++++++++ src/inference/y.rs | 4 +-- src/models/blip/impl.rs | 2 +- src/models/db/config.rs | 14 +++++++++ src/models/florence2/impl.rs | 4 +-- src/models/moondream2/impl.rs | 4 +-- src/models/slanet/impl.rs | 8 +++-- src/models/smolvlm/impl.rs | 2 +- src/models/svtr/config.rs | 14 ++++++++- src/models/svtr/impl.rs | 9 +++--- src/models/trocr/impl.rs | 2 +- 16 files changed, 137 insertions(+), 38 deletions(-) create mode 100644 src/inference/text.rs diff --git a/examples/db/main.rs b/examples/db/main.rs index c4e6373..6bd7b16 100644 --- a/examples/db/main.rs +++ b/examples/db/main.rs @@ -28,9 +28,13 @@ struct Args { #[argh(option, default = "false")] show_hbbs_conf: bool, - /// show mbrs confidence + /// show obbs confidence #[argh(option, default = "false")] show_obbs_conf: bool, + + /// show polygons confidence + #[argh(option, default = "false")] + show_polygons_conf: bool, } fn main() -> Result<()> { @@ -43,7 +47,7 @@ fn main() -> Result<()> { // build model let config = match &args.model { Some(m) => Config::db().with_model_file(m), - None => Config::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?), + None => Config::ppocr_det_v5_mobile().with_model_dtype(args.dtype.as_str().try_into()?), } .with_device_all(args.device.as_str().try_into()?) .commit()?; @@ -66,16 +70,16 @@ fn main() -> Result<()> { .with_polygon_style( Style::polygon() .with_visible(true) - .with_text_visible(false) - .show_confidence(true) - .show_id(true) - .show_name(true) + .with_text_visible(true) + .show_confidence(args.show_polygons_conf) + .show_id(false) + .show_name(false) .with_color(usls::StyleColors::default().with_outline([255, 105, 180, 255].into())), ) .with_hbb_style( Style::hbb() .with_visible(args.show_hbbs) - .with_text_visible(false) + .with_text_visible(true) .with_thickness(1) .show_confidence(args.show_hbbs_conf) .show_id(false) @@ -84,7 +88,7 @@ fn main() -> Result<()> { .with_obb_style( Style::obb() .with_visible(args.show_obbs) - .with_text_visible(false) + .with_text_visible(true) .show_confidence(args.show_obbs_conf) .show_id(false) .show_name(false), diff --git a/examples/smolvlm/main.rs b/examples/smolvlm/main.rs index 1f585cf..87d58b0 100644 --- a/examples/smolvlm/main.rs +++ b/examples/smolvlm/main.rs @@ -47,7 +47,7 @@ fn main() -> Result<()> { for y in ys.iter() { if let Some(texts) = y.texts() { for text in texts { - println!("[User]: {}\n\n[Assistant]:{}", args.prompt, text); + println!("[User]: {}\n\n[Assistant]:{:?}", args.prompt, text); } } } diff --git a/examples/svtr/README.md b/examples/svtr/README.md index 82c10c5..5229716 100644 --- a/examples/svtr/README.md +++ b/examples/svtr/README.md @@ -7,15 +7,14 @@ cargo run -r -F cuda --example svtr -- --device cuda ## Results ```shell -["./examples/svtr/images/license-ch-2.png"]: Ys([Y { Texts: [Text("粤A·68688")] }]) -["./examples/svtr/images/license-ch.png"]: Ys([Y { Texts: [Text("冀B6G000")] }]) -["./examples/svtr/images/sign-ch-2.png"]: Ys([Y { Texts: [Text("我在南锣鼓捣猫呢")] }]) -["./examples/svtr/images/sign-ch.png"]: Ys([Y { Texts: [Text("小菊儿胡同71号")] }]) -["./examples/svtr/images/text-110022345.png"]: Ys([Y { Texts: [Text("110022345")] }]) -["./examples/svtr/images/text-ch.png"]: Ys([Y { Texts: [Text("你有这么高速运转的机械进入中国,记住我给出的原理")] }]) -["./examples/svtr/images/text-en-2.png"]: Ys([Y { Texts: [Text("from the background, but also separate text instances which")] }]) -["./examples/svtr/images/text-en-dark.png"]: Ys([Y { Texts: [Text("Please lower your volume")] }]) -["./examples/svtr/images/text-en.png"]: Ys([Y { Texts: [Text("are closely jointed. Some examples are illustrated in Fig.7.")] }]) -["./examples/svtr/images/text-hello-rust-handwritten.png"]: Ys([Y { Texts: [Text("HeloRuSt")] }]) - -``` \ No newline at end of file +ys: [Y { Texts: [Text { text: "粤A68688", confidence: 0.9940011 }] }] +ys: [Y { Texts: [Text { text: "冀B6G000", confidence: 0.86073524 }] }] +ys: [Y { Texts: [Text { text: "我在南锣鼓捣猫呢", confidence: 0.99346924 }] }] +ys: [Y { Texts: [Text { text: "小菊儿胡同71号", confidence: 0.99450684 }] }] +ys: [Y { Texts: [Text { text: "110022345", confidence: 0.99994576 }] }] +ys: [Y { Texts: [Text { text: "你有这么高速运转的机械进入中国,记住我给出的原理", confidence: 0.9996338 }] }] +ys: [Y { Texts: [Text { text: "from the background, but also separate text instances which", confidence: 0.9954648 }] }] +ys: [Y { Texts: [Text { text: "Please lower your yolume", confidence: 0.93910724 }] }] +ys: [Y { Texts: [Text { text: "are closely jointed. Some examples are illustrated in Fig.7.", confidence: 0.9959717 }] }] +ys: [Y { Texts: [Text { text: "HelloRust", confidence: 0.97661674 }] }] +``` diff --git a/examples/svtr/main.rs b/examples/svtr/main.rs index 779afb7..d0d9d53 100644 --- a/examples/svtr/main.rs +++ b/examples/svtr/main.rs @@ -11,6 +11,10 @@ struct Args { /// dtype #[argh(option, default = "String::from(\"auto\")")] dtype: String, + + /// max text length + #[argh(option, default = "960")] + max_text_length: usize, } fn main() -> Result<()> { @@ -22,9 +26,12 @@ fn main() -> Result<()> { let args: Args = argh::from_env(); // build model - let config = Config::ppocr_rec_v4_ch() + let config = Config::ppocr_rec_v5_mobile() + // ppocr_rec_v5_server() + // ppocr_rec_v4_ch() // ppocr_rec_v4_en() // repsvtr_ch() + .with_model_ixx(0, 3, args.max_text_length.into()) .with_model_device(args.device.as_str().try_into()?) .with_model_dtype(args.dtype.as_str().try_into()?) .commit()?; diff --git a/src/inference/mod.rs b/src/inference/mod.rs index 1cb35bd..7cd2231 100644 --- a/src/inference/mod.rs +++ b/src/inference/mod.rs @@ -9,6 +9,7 @@ mod obb; mod polygon; mod prob; mod skeleton; +mod text; mod x; mod xs; mod y; @@ -29,6 +30,7 @@ pub use obb::*; pub use polygon::*; pub use prob::*; pub use skeleton::*; +pub use text::*; pub use x::X; pub use xs::Xs; pub use y::*; diff --git a/src/inference/text.rs b/src/inference/text.rs new file mode 100644 index 0000000..8c3f910 --- /dev/null +++ b/src/inference/text.rs @@ -0,0 +1,56 @@ +use aksr::Builder; + +use crate::{impl_meta_methods, InstanceMeta, Style}; + +#[derive(Builder, Clone, Default)] +pub struct Text { + text: String, + meta: InstanceMeta, + style: Option