mirror of
https://github.com/mii443/usls.git
synced 2025-08-22 15:45:41 +00:00
Add PPOCRv5 DET and REC models (#98)
* Add PPOCRv5 DET and REC models * Add Text Struct
This commit is contained in:
@ -28,9 +28,13 @@ struct Args {
|
||||
#[argh(option, default = "false")]
|
||||
show_hbbs_conf: bool,
|
||||
|
||||
/// show mbrs confidence
|
||||
/// show obbs confidence
|
||||
#[argh(option, default = "false")]
|
||||
show_obbs_conf: bool,
|
||||
|
||||
/// show polygons confidence
|
||||
#[argh(option, default = "false")]
|
||||
show_polygons_conf: bool,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@ -43,7 +47,7 @@ fn main() -> Result<()> {
|
||||
// build model
|
||||
let config = match &args.model {
|
||||
Some(m) => Config::db().with_model_file(m),
|
||||
None => Config::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
|
||||
None => Config::ppocr_det_v5_mobile().with_model_dtype(args.dtype.as_str().try_into()?),
|
||||
}
|
||||
.with_device_all(args.device.as_str().try_into()?)
|
||||
.commit()?;
|
||||
@ -66,16 +70,16 @@ fn main() -> Result<()> {
|
||||
.with_polygon_style(
|
||||
Style::polygon()
|
||||
.with_visible(true)
|
||||
.with_text_visible(false)
|
||||
.show_confidence(true)
|
||||
.show_id(true)
|
||||
.show_name(true)
|
||||
.with_text_visible(true)
|
||||
.show_confidence(args.show_polygons_conf)
|
||||
.show_id(false)
|
||||
.show_name(false)
|
||||
.with_color(usls::StyleColors::default().with_outline([255, 105, 180, 255].into())),
|
||||
)
|
||||
.with_hbb_style(
|
||||
Style::hbb()
|
||||
.with_visible(args.show_hbbs)
|
||||
.with_text_visible(false)
|
||||
.with_text_visible(true)
|
||||
.with_thickness(1)
|
||||
.show_confidence(args.show_hbbs_conf)
|
||||
.show_id(false)
|
||||
@ -84,7 +88,7 @@ fn main() -> Result<()> {
|
||||
.with_obb_style(
|
||||
Style::obb()
|
||||
.with_visible(args.show_obbs)
|
||||
.with_text_visible(false)
|
||||
.with_text_visible(true)
|
||||
.show_confidence(args.show_obbs_conf)
|
||||
.show_id(false)
|
||||
.show_name(false),
|
||||
|
@ -47,7 +47,7 @@ fn main() -> Result<()> {
|
||||
for y in ys.iter() {
|
||||
if let Some(texts) = y.texts() {
|
||||
for text in texts {
|
||||
println!("[User]: {}\n\n[Assistant]:{}", args.prompt, text);
|
||||
println!("[User]: {}\n\n[Assistant]:{:?}", args.prompt, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,15 +7,14 @@ cargo run -r -F cuda --example svtr -- --device cuda
|
||||
## Results
|
||||
|
||||
```shell
|
||||
["./examples/svtr/images/license-ch-2.png"]: Ys([Y { Texts: [Text("粤A·68688")] }])
|
||||
["./examples/svtr/images/license-ch.png"]: Ys([Y { Texts: [Text("冀B6G000")] }])
|
||||
["./examples/svtr/images/sign-ch-2.png"]: Ys([Y { Texts: [Text("我在南锣鼓捣猫呢")] }])
|
||||
["./examples/svtr/images/sign-ch.png"]: Ys([Y { Texts: [Text("小菊儿胡同71号")] }])
|
||||
["./examples/svtr/images/text-110022345.png"]: Ys([Y { Texts: [Text("110022345")] }])
|
||||
["./examples/svtr/images/text-ch.png"]: Ys([Y { Texts: [Text("你有这么高速运转的机械进入中国,记住我给出的原理")] }])
|
||||
["./examples/svtr/images/text-en-2.png"]: Ys([Y { Texts: [Text("from the background, but also separate text instances which")] }])
|
||||
["./examples/svtr/images/text-en-dark.png"]: Ys([Y { Texts: [Text("Please lower your volume")] }])
|
||||
["./examples/svtr/images/text-en.png"]: Ys([Y { Texts: [Text("are closely jointed. Some examples are illustrated in Fig.7.")] }])
|
||||
["./examples/svtr/images/text-hello-rust-handwritten.png"]: Ys([Y { Texts: [Text("HeloRuSt")] }])
|
||||
|
||||
```
|
||||
ys: [Y { Texts: [Text { text: "粤A68688", confidence: 0.9940011 }] }]
|
||||
ys: [Y { Texts: [Text { text: "冀B6G000", confidence: 0.86073524 }] }]
|
||||
ys: [Y { Texts: [Text { text: "我在南锣鼓捣猫呢", confidence: 0.99346924 }] }]
|
||||
ys: [Y { Texts: [Text { text: "小菊儿胡同71号", confidence: 0.99450684 }] }]
|
||||
ys: [Y { Texts: [Text { text: "110022345", confidence: 0.99994576 }] }]
|
||||
ys: [Y { Texts: [Text { text: "你有这么高速运转的机械进入中国,记住我给出的原理", confidence: 0.9996338 }] }]
|
||||
ys: [Y { Texts: [Text { text: "from the background, but also separate text instances which", confidence: 0.9954648 }] }]
|
||||
ys: [Y { Texts: [Text { text: "Please lower your yolume", confidence: 0.93910724 }] }]
|
||||
ys: [Y { Texts: [Text { text: "are closely jointed. Some examples are illustrated in Fig.7.", confidence: 0.9959717 }] }]
|
||||
ys: [Y { Texts: [Text { text: "HelloRust", confidence: 0.97661674 }] }]
|
||||
```
|
||||
|
@ -11,6 +11,10 @@ struct Args {
|
||||
/// dtype
|
||||
#[argh(option, default = "String::from(\"auto\")")]
|
||||
dtype: String,
|
||||
|
||||
/// max text length
|
||||
#[argh(option, default = "960")]
|
||||
max_text_length: usize,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@ -22,9 +26,12 @@ fn main() -> Result<()> {
|
||||
let args: Args = argh::from_env();
|
||||
|
||||
// build model
|
||||
let config = Config::ppocr_rec_v4_ch()
|
||||
let config = Config::ppocr_rec_v5_mobile()
|
||||
// ppocr_rec_v5_server()
|
||||
// ppocr_rec_v4_ch()
|
||||
// ppocr_rec_v4_en()
|
||||
// repsvtr_ch()
|
||||
.with_model_ixx(0, 3, args.max_text_length.into())
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.commit()?;
|
||||
|
@ -9,6 +9,7 @@ mod obb;
|
||||
mod polygon;
|
||||
mod prob;
|
||||
mod skeleton;
|
||||
mod text;
|
||||
mod x;
|
||||
mod xs;
|
||||
mod y;
|
||||
@ -29,6 +30,7 @@ pub use obb::*;
|
||||
pub use polygon::*;
|
||||
pub use prob::*;
|
||||
pub use skeleton::*;
|
||||
pub use text::*;
|
||||
pub use x::X;
|
||||
pub use xs::Xs;
|
||||
pub use y::*;
|
||||
|
56
src/inference/text.rs
Normal file
56
src/inference/text.rs
Normal file
@ -0,0 +1,56 @@
|
||||
use aksr::Builder;
|
||||
|
||||
use crate::{impl_meta_methods, InstanceMeta, Style};
|
||||
|
||||
#[derive(Builder, Clone, Default)]
|
||||
pub struct Text {
|
||||
text: String,
|
||||
meta: InstanceMeta,
|
||||
style: Option<Style>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Text {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// f.debug_struct("Text")
|
||||
// .field("text", &self.text)
|
||||
// .field("id", &self.meta.id())
|
||||
// .field("name", &self.meta.name())
|
||||
// .field("confidence", &self.meta.confidence())
|
||||
// .finish()
|
||||
|
||||
let mut f = f.debug_struct("Text");
|
||||
f.field("text", &self.text);
|
||||
if let Some(id) = &self.meta.id() {
|
||||
f.field("id", id);
|
||||
}
|
||||
if let Some(name) = &self.meta.name() {
|
||||
f.field("name", name);
|
||||
}
|
||||
if let Some(confidence) = &self.meta.confidence() {
|
||||
f.field("confidence", confidence);
|
||||
}
|
||||
f.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Text {
|
||||
fn from(text: String) -> Self {
|
||||
Self {
|
||||
text,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Text {
|
||||
fn from(text: &str) -> Self {
|
||||
Self {
|
||||
text: text.to_string(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Text {
|
||||
impl_meta_methods!();
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
use aksr::Builder;
|
||||
|
||||
use crate::{Hbb, Keypoint, Mask, Obb, Polygon, Prob};
|
||||
use crate::{Hbb, Keypoint, Mask, Obb, Polygon, Prob, Text};
|
||||
|
||||
/// Container for inference results for each image.
|
||||
///
|
||||
@ -10,7 +10,7 @@ use crate::{Hbb, Keypoint, Mask, Obb, Polygon, Prob};
|
||||
///
|
||||
#[derive(Builder, Clone, Default)]
|
||||
pub struct Y {
|
||||
texts: Option<Vec<String>>,
|
||||
texts: Option<Vec<Text>>,
|
||||
probs: Option<Vec<Prob>>,
|
||||
keypoints: Option<Vec<Keypoint>>,
|
||||
keypointss: Option<Vec<Vec<Keypoint>>>,
|
||||
|
@ -127,7 +127,7 @@ impl Blip {
|
||||
|
||||
let ys = texts
|
||||
.into_iter()
|
||||
.map(|x| Y::default().with_texts(&[&x]))
|
||||
.map(|x| Y::default().with_texts(&[x.into()]))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(ys)
|
||||
|
@ -29,6 +29,20 @@ impl crate::Config {
|
||||
Self::db().with_model_file("ppocr-v4-server-ch.onnx")
|
||||
}
|
||||
|
||||
fn ppocr_det_v5() -> Self {
|
||||
Self::db()
|
||||
.with_model_ixx(0, 2, (608, 640, 1600).into())
|
||||
.with_model_ixx(0, 3, (608, 640, 1600).into())
|
||||
}
|
||||
|
||||
pub fn ppocr_det_v5_mobile() -> Self {
|
||||
Self::ppocr_det_v5().with_model_file("ppocr-v5-mobile.onnx")
|
||||
}
|
||||
|
||||
pub fn ppocr_det_v5_server() -> Self {
|
||||
Self::ppocr_det_v5().with_model_file("ppocr-v5-server.onnx")
|
||||
}
|
||||
|
||||
pub fn db2() -> Self {
|
||||
Self::db()
|
||||
.with_image_mean(&[0.798, 0.785, 0.772])
|
||||
|
@ -277,13 +277,13 @@ impl Florence2 {
|
||||
// postprocess
|
||||
let mut y = Y::default();
|
||||
if let Task::Caption(_) | Task::Ocr = x_textual {
|
||||
y = y.with_texts(&[&text]);
|
||||
y = y.with_texts(&[text.into()]);
|
||||
} else {
|
||||
let elems = Self::loc_parse(&text)?;
|
||||
match x_textual {
|
||||
Task::RegionToCategory(..) | Task::RegionToDescription(..) => {
|
||||
let text = elems[0][0].clone();
|
||||
y = y.with_texts(&[&text]);
|
||||
y = y.with_texts(&[text.into()]);
|
||||
}
|
||||
Task::ObjectDetection
|
||||
| Task::OpenSetDetection(_)
|
||||
|
@ -107,7 +107,7 @@ impl Moondream2 {
|
||||
_ => vec![198., 198., 24334., 1159., 25.],
|
||||
};
|
||||
let text = self.generate_text(&input_ids, kv_cache)?;
|
||||
let y = Y::default().with_texts(&[&text]);
|
||||
let y = Y::default().with_texts(&[text.into()]);
|
||||
|
||||
Ok(y)
|
||||
}
|
||||
@ -120,7 +120,7 @@ impl Moondream2 {
|
||||
.collect();
|
||||
|
||||
let text = self.generate_text(&input_ids, kv_cache)?;
|
||||
let y = Y::default().with_texts(&[&text]);
|
||||
let y = Y::default().with_texts(&[text.into()]);
|
||||
|
||||
Ok(y)
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use aksr::Builder;
|
||||
use anyhow::Result;
|
||||
use ndarray::{s, Axis};
|
||||
|
||||
use crate::{elapsed, models::BaseModelVisual, Config, Image, Keypoint, Ts, Xs, Y};
|
||||
use crate::{elapsed, models::BaseModelVisual, Config, Image, Keypoint, Text, Ts, Xs, Y};
|
||||
|
||||
#[derive(Builder, Debug)]
|
||||
pub struct SLANet {
|
||||
@ -107,7 +107,11 @@ impl SLANet {
|
||||
y_texts.extend_from_slice(&["</table>", "</body>", "</html>"]);
|
||||
}
|
||||
|
||||
ys.push(Y::default().with_keypointss(&y_kpts).with_texts(&y_texts));
|
||||
ys.push(
|
||||
Y::default()
|
||||
.with_keypointss(&y_kpts)
|
||||
.with_texts(&y_texts.into_iter().map(Text::from).collect::<Vec<_>>()),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(ys)
|
||||
|
@ -92,7 +92,7 @@ impl SmolVLM {
|
||||
let mut ys: Vec<Y> = Vec::new();
|
||||
for image in images.iter() {
|
||||
let y = self.generate_one(image, text)?;
|
||||
ys.push(Y::default().with_texts(&[&y]));
|
||||
ys.push(Y::default().with_texts(&[y.into()]));
|
||||
}
|
||||
|
||||
Ok(ys)
|
||||
|
@ -6,7 +6,7 @@ impl crate::Config {
|
||||
.with_model_ixx(0, 0, (1, 1, 8).into())
|
||||
.with_model_ixx(0, 1, 3.into())
|
||||
.with_model_ixx(0, 2, 48.into())
|
||||
.with_model_ixx(0, 3, (320, 960, 1600).into())
|
||||
.with_model_ixx(0, 3, (320, 960, 3200).into())
|
||||
.with_resize_mode(crate::ResizeMode::FitHeight)
|
||||
.with_padding_value(0)
|
||||
.with_normalize(true)
|
||||
@ -56,4 +56,16 @@ impl crate::Config {
|
||||
pub fn svtr_v2_student_ch() -> Self {
|
||||
Self::svtr_ch().with_model_file("v2-distill-student-ch.onnx")
|
||||
}
|
||||
|
||||
fn ppocr_rec_v5() -> Self {
|
||||
Self::svtr().with_vocab_txt("svtr/vocab_v5_ppocr_rec.txt")
|
||||
}
|
||||
|
||||
pub fn ppocr_rec_v5_mobile() -> Self {
|
||||
Self::ppocr_rec_v5().with_model_file("ppocr-v5-mobile.onnx")
|
||||
}
|
||||
|
||||
pub fn ppocr_rec_v5_server() -> Self {
|
||||
Self::ppocr_rec_v5().with_model_file("ppocr-v5-server.onnx")
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
||||
use ndarray::Axis;
|
||||
use rayon::prelude::*;
|
||||
|
||||
use crate::{elapsed, Config, DynConf, Engine, Image, Processor, Ts, Xs, Y};
|
||||
use crate::{elapsed, Config, DynConf, Engine, Image, Processor, Text, Ts, Xs, Y};
|
||||
|
||||
#[derive(Builder, Debug)]
|
||||
pub struct SVTR {
|
||||
@ -80,13 +80,14 @@ impl SVTR {
|
||||
|
||||
preds.dedup_by(|a, b| a.0 == b.0);
|
||||
|
||||
let text: String = preds
|
||||
let (text, confs): (String, Vec<f32>) = preds
|
||||
.into_iter()
|
||||
.filter(|(id, &conf)| *id != 0 && conf >= self.confs[0])
|
||||
.map(|(id, _)| self.processor.vocab()[id].clone())
|
||||
.map(|(id, &conf)| (self.processor.vocab()[id].clone(), conf))
|
||||
.collect();
|
||||
|
||||
Y::default().with_texts(&[&text])
|
||||
Y::default().with_texts(&[Text::from(text)
|
||||
.with_confidence(confs.iter().sum::<f32>() / confs.len() as f32)])
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
@ -188,7 +188,7 @@ impl TrOCR {
|
||||
// to texts
|
||||
let texts = texts
|
||||
.into_par_iter()
|
||||
.map(|x| Y::default().with_texts(&[&x]))
|
||||
.map(|x| Y::default().with_texts(&[x.into()]))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(texts)
|
||||
|
Reference in New Issue
Block a user