mirror of
https://github.com/mii443/usls.git
synced 2025-08-22 15:45:41 +00:00
Add PPOCRv5 DET and REC models (#98)
* Add PPOCRv5 DET and REC models * Add Text Struct
This commit is contained in:
@ -28,9 +28,13 @@ struct Args {
|
|||||||
#[argh(option, default = "false")]
|
#[argh(option, default = "false")]
|
||||||
show_hbbs_conf: bool,
|
show_hbbs_conf: bool,
|
||||||
|
|
||||||
/// show mbrs confidence
|
/// show obbs confidence
|
||||||
#[argh(option, default = "false")]
|
#[argh(option, default = "false")]
|
||||||
show_obbs_conf: bool,
|
show_obbs_conf: bool,
|
||||||
|
|
||||||
|
/// show polygons confidence
|
||||||
|
#[argh(option, default = "false")]
|
||||||
|
show_polygons_conf: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
@ -43,7 +47,7 @@ fn main() -> Result<()> {
|
|||||||
// build model
|
// build model
|
||||||
let config = match &args.model {
|
let config = match &args.model {
|
||||||
Some(m) => Config::db().with_model_file(m),
|
Some(m) => Config::db().with_model_file(m),
|
||||||
None => Config::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
|
None => Config::ppocr_det_v5_mobile().with_model_dtype(args.dtype.as_str().try_into()?),
|
||||||
}
|
}
|
||||||
.with_device_all(args.device.as_str().try_into()?)
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
@ -66,16 +70,16 @@ fn main() -> Result<()> {
|
|||||||
.with_polygon_style(
|
.with_polygon_style(
|
||||||
Style::polygon()
|
Style::polygon()
|
||||||
.with_visible(true)
|
.with_visible(true)
|
||||||
.with_text_visible(false)
|
.with_text_visible(true)
|
||||||
.show_confidence(true)
|
.show_confidence(args.show_polygons_conf)
|
||||||
.show_id(true)
|
.show_id(false)
|
||||||
.show_name(true)
|
.show_name(false)
|
||||||
.with_color(usls::StyleColors::default().with_outline([255, 105, 180, 255].into())),
|
.with_color(usls::StyleColors::default().with_outline([255, 105, 180, 255].into())),
|
||||||
)
|
)
|
||||||
.with_hbb_style(
|
.with_hbb_style(
|
||||||
Style::hbb()
|
Style::hbb()
|
||||||
.with_visible(args.show_hbbs)
|
.with_visible(args.show_hbbs)
|
||||||
.with_text_visible(false)
|
.with_text_visible(true)
|
||||||
.with_thickness(1)
|
.with_thickness(1)
|
||||||
.show_confidence(args.show_hbbs_conf)
|
.show_confidence(args.show_hbbs_conf)
|
||||||
.show_id(false)
|
.show_id(false)
|
||||||
@ -84,7 +88,7 @@ fn main() -> Result<()> {
|
|||||||
.with_obb_style(
|
.with_obb_style(
|
||||||
Style::obb()
|
Style::obb()
|
||||||
.with_visible(args.show_obbs)
|
.with_visible(args.show_obbs)
|
||||||
.with_text_visible(false)
|
.with_text_visible(true)
|
||||||
.show_confidence(args.show_obbs_conf)
|
.show_confidence(args.show_obbs_conf)
|
||||||
.show_id(false)
|
.show_id(false)
|
||||||
.show_name(false),
|
.show_name(false),
|
||||||
|
@ -47,7 +47,7 @@ fn main() -> Result<()> {
|
|||||||
for y in ys.iter() {
|
for y in ys.iter() {
|
||||||
if let Some(texts) = y.texts() {
|
if let Some(texts) = y.texts() {
|
||||||
for text in texts {
|
for text in texts {
|
||||||
println!("[User]: {}\n\n[Assistant]:{}", args.prompt, text);
|
println!("[User]: {}\n\n[Assistant]:{:?}", args.prompt, text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,15 +7,14 @@ cargo run -r -F cuda --example svtr -- --device cuda
|
|||||||
## Results
|
## Results
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
["./examples/svtr/images/license-ch-2.png"]: Ys([Y { Texts: [Text("粤A·68688")] }])
|
ys: [Y { Texts: [Text { text: "粤A68688", confidence: 0.9940011 }] }]
|
||||||
["./examples/svtr/images/license-ch.png"]: Ys([Y { Texts: [Text("冀B6G000")] }])
|
ys: [Y { Texts: [Text { text: "冀B6G000", confidence: 0.86073524 }] }]
|
||||||
["./examples/svtr/images/sign-ch-2.png"]: Ys([Y { Texts: [Text("我在南锣鼓捣猫呢")] }])
|
ys: [Y { Texts: [Text { text: "我在南锣鼓捣猫呢", confidence: 0.99346924 }] }]
|
||||||
["./examples/svtr/images/sign-ch.png"]: Ys([Y { Texts: [Text("小菊儿胡同71号")] }])
|
ys: [Y { Texts: [Text { text: "小菊儿胡同71号", confidence: 0.99450684 }] }]
|
||||||
["./examples/svtr/images/text-110022345.png"]: Ys([Y { Texts: [Text("110022345")] }])
|
ys: [Y { Texts: [Text { text: "110022345", confidence: 0.99994576 }] }]
|
||||||
["./examples/svtr/images/text-ch.png"]: Ys([Y { Texts: [Text("你有这么高速运转的机械进入中国,记住我给出的原理")] }])
|
ys: [Y { Texts: [Text { text: "你有这么高速运转的机械进入中国,记住我给出的原理", confidence: 0.9996338 }] }]
|
||||||
["./examples/svtr/images/text-en-2.png"]: Ys([Y { Texts: [Text("from the background, but also separate text instances which")] }])
|
ys: [Y { Texts: [Text { text: "from the background, but also separate text instances which", confidence: 0.9954648 }] }]
|
||||||
["./examples/svtr/images/text-en-dark.png"]: Ys([Y { Texts: [Text("Please lower your volume")] }])
|
ys: [Y { Texts: [Text { text: "Please lower your yolume", confidence: 0.93910724 }] }]
|
||||||
["./examples/svtr/images/text-en.png"]: Ys([Y { Texts: [Text("are closely jointed. Some examples are illustrated in Fig.7.")] }])
|
ys: [Y { Texts: [Text { text: "are closely jointed. Some examples are illustrated in Fig.7.", confidence: 0.9959717 }] }]
|
||||||
["./examples/svtr/images/text-hello-rust-handwritten.png"]: Ys([Y { Texts: [Text("HeloRuSt")] }])
|
ys: [Y { Texts: [Text { text: "HelloRust", confidence: 0.97661674 }] }]
|
||||||
|
```
|
||||||
```
|
|
||||||
|
@ -11,6 +11,10 @@ struct Args {
|
|||||||
/// dtype
|
/// dtype
|
||||||
#[argh(option, default = "String::from(\"auto\")")]
|
#[argh(option, default = "String::from(\"auto\")")]
|
||||||
dtype: String,
|
dtype: String,
|
||||||
|
|
||||||
|
/// max text length
|
||||||
|
#[argh(option, default = "960")]
|
||||||
|
max_text_length: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
@ -22,9 +26,12 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let config = Config::ppocr_rec_v4_ch()
|
let config = Config::ppocr_rec_v5_mobile()
|
||||||
|
// ppocr_rec_v5_server()
|
||||||
|
// ppocr_rec_v4_ch()
|
||||||
// ppocr_rec_v4_en()
|
// ppocr_rec_v4_en()
|
||||||
// repsvtr_ch()
|
// repsvtr_ch()
|
||||||
|
.with_model_ixx(0, 3, args.max_text_length.into())
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
|
@ -9,6 +9,7 @@ mod obb;
|
|||||||
mod polygon;
|
mod polygon;
|
||||||
mod prob;
|
mod prob;
|
||||||
mod skeleton;
|
mod skeleton;
|
||||||
|
mod text;
|
||||||
mod x;
|
mod x;
|
||||||
mod xs;
|
mod xs;
|
||||||
mod y;
|
mod y;
|
||||||
@ -29,6 +30,7 @@ pub use obb::*;
|
|||||||
pub use polygon::*;
|
pub use polygon::*;
|
||||||
pub use prob::*;
|
pub use prob::*;
|
||||||
pub use skeleton::*;
|
pub use skeleton::*;
|
||||||
|
pub use text::*;
|
||||||
pub use x::X;
|
pub use x::X;
|
||||||
pub use xs::Xs;
|
pub use xs::Xs;
|
||||||
pub use y::*;
|
pub use y::*;
|
||||||
|
56
src/inference/text.rs
Normal file
56
src/inference/text.rs
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
use aksr::Builder;
|
||||||
|
|
||||||
|
use crate::{impl_meta_methods, InstanceMeta, Style};
|
||||||
|
|
||||||
|
#[derive(Builder, Clone, Default)]
|
||||||
|
pub struct Text {
|
||||||
|
text: String,
|
||||||
|
meta: InstanceMeta,
|
||||||
|
style: Option<Style>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for Text {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
// f.debug_struct("Text")
|
||||||
|
// .field("text", &self.text)
|
||||||
|
// .field("id", &self.meta.id())
|
||||||
|
// .field("name", &self.meta.name())
|
||||||
|
// .field("confidence", &self.meta.confidence())
|
||||||
|
// .finish()
|
||||||
|
|
||||||
|
let mut f = f.debug_struct("Text");
|
||||||
|
f.field("text", &self.text);
|
||||||
|
if let Some(id) = &self.meta.id() {
|
||||||
|
f.field("id", id);
|
||||||
|
}
|
||||||
|
if let Some(name) = &self.meta.name() {
|
||||||
|
f.field("name", name);
|
||||||
|
}
|
||||||
|
if let Some(confidence) = &self.meta.confidence() {
|
||||||
|
f.field("confidence", confidence);
|
||||||
|
}
|
||||||
|
f.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for Text {
|
||||||
|
fn from(text: String) -> Self {
|
||||||
|
Self {
|
||||||
|
text,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for Text {
|
||||||
|
fn from(text: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
text: text.to_string(),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Text {
|
||||||
|
impl_meta_methods!();
|
||||||
|
}
|
@ -1,6 +1,6 @@
|
|||||||
use aksr::Builder;
|
use aksr::Builder;
|
||||||
|
|
||||||
use crate::{Hbb, Keypoint, Mask, Obb, Polygon, Prob};
|
use crate::{Hbb, Keypoint, Mask, Obb, Polygon, Prob, Text};
|
||||||
|
|
||||||
/// Container for inference results for each image.
|
/// Container for inference results for each image.
|
||||||
///
|
///
|
||||||
@ -10,7 +10,7 @@ use crate::{Hbb, Keypoint, Mask, Obb, Polygon, Prob};
|
|||||||
///
|
///
|
||||||
#[derive(Builder, Clone, Default)]
|
#[derive(Builder, Clone, Default)]
|
||||||
pub struct Y {
|
pub struct Y {
|
||||||
texts: Option<Vec<String>>,
|
texts: Option<Vec<Text>>,
|
||||||
probs: Option<Vec<Prob>>,
|
probs: Option<Vec<Prob>>,
|
||||||
keypoints: Option<Vec<Keypoint>>,
|
keypoints: Option<Vec<Keypoint>>,
|
||||||
keypointss: Option<Vec<Vec<Keypoint>>>,
|
keypointss: Option<Vec<Vec<Keypoint>>>,
|
||||||
|
@ -127,7 +127,7 @@ impl Blip {
|
|||||||
|
|
||||||
let ys = texts
|
let ys = texts
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|x| Y::default().with_texts(&[&x]))
|
.map(|x| Y::default().with_texts(&[x.into()]))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
Ok(ys)
|
Ok(ys)
|
||||||
|
@ -29,6 +29,20 @@ impl crate::Config {
|
|||||||
Self::db().with_model_file("ppocr-v4-server-ch.onnx")
|
Self::db().with_model_file("ppocr-v4-server-ch.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn ppocr_det_v5() -> Self {
|
||||||
|
Self::db()
|
||||||
|
.with_model_ixx(0, 2, (608, 640, 1600).into())
|
||||||
|
.with_model_ixx(0, 3, (608, 640, 1600).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ppocr_det_v5_mobile() -> Self {
|
||||||
|
Self::ppocr_det_v5().with_model_file("ppocr-v5-mobile.onnx")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ppocr_det_v5_server() -> Self {
|
||||||
|
Self::ppocr_det_v5().with_model_file("ppocr-v5-server.onnx")
|
||||||
|
}
|
||||||
|
|
||||||
pub fn db2() -> Self {
|
pub fn db2() -> Self {
|
||||||
Self::db()
|
Self::db()
|
||||||
.with_image_mean(&[0.798, 0.785, 0.772])
|
.with_image_mean(&[0.798, 0.785, 0.772])
|
||||||
|
@ -277,13 +277,13 @@ impl Florence2 {
|
|||||||
// postprocess
|
// postprocess
|
||||||
let mut y = Y::default();
|
let mut y = Y::default();
|
||||||
if let Task::Caption(_) | Task::Ocr = x_textual {
|
if let Task::Caption(_) | Task::Ocr = x_textual {
|
||||||
y = y.with_texts(&[&text]);
|
y = y.with_texts(&[text.into()]);
|
||||||
} else {
|
} else {
|
||||||
let elems = Self::loc_parse(&text)?;
|
let elems = Self::loc_parse(&text)?;
|
||||||
match x_textual {
|
match x_textual {
|
||||||
Task::RegionToCategory(..) | Task::RegionToDescription(..) => {
|
Task::RegionToCategory(..) | Task::RegionToDescription(..) => {
|
||||||
let text = elems[0][0].clone();
|
let text = elems[0][0].clone();
|
||||||
y = y.with_texts(&[&text]);
|
y = y.with_texts(&[text.into()]);
|
||||||
}
|
}
|
||||||
Task::ObjectDetection
|
Task::ObjectDetection
|
||||||
| Task::OpenSetDetection(_)
|
| Task::OpenSetDetection(_)
|
||||||
|
@ -107,7 +107,7 @@ impl Moondream2 {
|
|||||||
_ => vec![198., 198., 24334., 1159., 25.],
|
_ => vec![198., 198., 24334., 1159., 25.],
|
||||||
};
|
};
|
||||||
let text = self.generate_text(&input_ids, kv_cache)?;
|
let text = self.generate_text(&input_ids, kv_cache)?;
|
||||||
let y = Y::default().with_texts(&[&text]);
|
let y = Y::default().with_texts(&[text.into()]);
|
||||||
|
|
||||||
Ok(y)
|
Ok(y)
|
||||||
}
|
}
|
||||||
@ -120,7 +120,7 @@ impl Moondream2 {
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let text = self.generate_text(&input_ids, kv_cache)?;
|
let text = self.generate_text(&input_ids, kv_cache)?;
|
||||||
let y = Y::default().with_texts(&[&text]);
|
let y = Y::default().with_texts(&[text.into()]);
|
||||||
|
|
||||||
Ok(y)
|
Ok(y)
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::{s, Axis};
|
use ndarray::{s, Axis};
|
||||||
|
|
||||||
use crate::{elapsed, models::BaseModelVisual, Config, Image, Keypoint, Ts, Xs, Y};
|
use crate::{elapsed, models::BaseModelVisual, Config, Image, Keypoint, Text, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct SLANet {
|
pub struct SLANet {
|
||||||
@ -107,7 +107,11 @@ impl SLANet {
|
|||||||
y_texts.extend_from_slice(&["</table>", "</body>", "</html>"]);
|
y_texts.extend_from_slice(&["</table>", "</body>", "</html>"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
ys.push(Y::default().with_keypointss(&y_kpts).with_texts(&y_texts));
|
ys.push(
|
||||||
|
Y::default()
|
||||||
|
.with_keypointss(&y_kpts)
|
||||||
|
.with_texts(&y_texts.into_iter().map(Text::from).collect::<Vec<_>>()),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(ys)
|
Ok(ys)
|
||||||
|
@ -92,7 +92,7 @@ impl SmolVLM {
|
|||||||
let mut ys: Vec<Y> = Vec::new();
|
let mut ys: Vec<Y> = Vec::new();
|
||||||
for image in images.iter() {
|
for image in images.iter() {
|
||||||
let y = self.generate_one(image, text)?;
|
let y = self.generate_one(image, text)?;
|
||||||
ys.push(Y::default().with_texts(&[&y]));
|
ys.push(Y::default().with_texts(&[y.into()]));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(ys)
|
Ok(ys)
|
||||||
|
@ -6,7 +6,7 @@ impl crate::Config {
|
|||||||
.with_model_ixx(0, 0, (1, 1, 8).into())
|
.with_model_ixx(0, 0, (1, 1, 8).into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 48.into())
|
.with_model_ixx(0, 2, 48.into())
|
||||||
.with_model_ixx(0, 3, (320, 960, 1600).into())
|
.with_model_ixx(0, 3, (320, 960, 3200).into())
|
||||||
.with_resize_mode(crate::ResizeMode::FitHeight)
|
.with_resize_mode(crate::ResizeMode::FitHeight)
|
||||||
.with_padding_value(0)
|
.with_padding_value(0)
|
||||||
.with_normalize(true)
|
.with_normalize(true)
|
||||||
@ -56,4 +56,16 @@ impl crate::Config {
|
|||||||
pub fn svtr_v2_student_ch() -> Self {
|
pub fn svtr_v2_student_ch() -> Self {
|
||||||
Self::svtr_ch().with_model_file("v2-distill-student-ch.onnx")
|
Self::svtr_ch().with_model_file("v2-distill-student-ch.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn ppocr_rec_v5() -> Self {
|
||||||
|
Self::svtr().with_vocab_txt("svtr/vocab_v5_ppocr_rec.txt")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ppocr_rec_v5_mobile() -> Self {
|
||||||
|
Self::ppocr_rec_v5().with_model_file("ppocr-v5-mobile.onnx")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ppocr_rec_v5_server() -> Self {
|
||||||
|
Self::ppocr_rec_v5().with_model_file("ppocr-v5-server.onnx")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
|||||||
use ndarray::Axis;
|
use ndarray::Axis;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{elapsed, Config, DynConf, Engine, Image, Processor, Ts, Xs, Y};
|
use crate::{elapsed, Config, DynConf, Engine, Image, Processor, Text, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct SVTR {
|
pub struct SVTR {
|
||||||
@ -80,13 +80,14 @@ impl SVTR {
|
|||||||
|
|
||||||
preds.dedup_by(|a, b| a.0 == b.0);
|
preds.dedup_by(|a, b| a.0 == b.0);
|
||||||
|
|
||||||
let text: String = preds
|
let (text, confs): (String, Vec<f32>) = preds
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|(id, &conf)| *id != 0 && conf >= self.confs[0])
|
.filter(|(id, &conf)| *id != 0 && conf >= self.confs[0])
|
||||||
.map(|(id, _)| self.processor.vocab()[id].clone())
|
.map(|(id, &conf)| (self.processor.vocab()[id].clone(), conf))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Y::default().with_texts(&[&text])
|
Y::default().with_texts(&[Text::from(text)
|
||||||
|
.with_confidence(confs.iter().sum::<f32>() / confs.len() as f32)])
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
@ -188,7 +188,7 @@ impl TrOCR {
|
|||||||
// to texts
|
// to texts
|
||||||
let texts = texts
|
let texts = texts
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.map(|x| Y::default().with_texts(&[&x]))
|
.map(|x| Y::default().with_texts(&[x.into()]))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
Ok(texts)
|
Ok(texts)
|
||||||
|
Reference in New Issue
Block a user