mirror of
https://github.com/mii443/usls.git
synced 2025-08-22 15:45:41 +00:00
Options -> ModelConfig
This commit is contained in:
@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "usls"
|
name = "usls"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
version = "0.1.0-beta.1"
|
version = "0.1.0-beta.2"
|
||||||
rust-version = "1.82"
|
rust-version = "1.82"
|
||||||
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
|
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
|
||||||
repository = "https://github.com/jamjamjon/usls"
|
repository = "https://github.com/jamjamjon/usls"
|
||||||
@ -44,6 +44,7 @@ ort = { version = "2.0.0-rc.9", default-features = false, optional = true , feat
|
|||||||
"half"
|
"half"
|
||||||
]}
|
]}
|
||||||
tokenizers = { version = "0.21.1" }
|
tokenizers = { version = "0.21.1" }
|
||||||
|
paste = "1.0.15"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
prost-build = "0.13.5"
|
prost-build = "0.13.5"
|
||||||
|
@ -116,7 +116,8 @@
|
|||||||
| [Moondream2](https://github.com/vikhyat/moondream/tree/main) | Open-Set Object Detection<br />Open-Set Keypoints Detection<br />Image Caption<br />Visual Question Answering | [demo](examples/moondream2) | ✅ | ✅ | ✅ | | |
|
| [Moondream2](https://github.com/vikhyat/moondream/tree/main) | Open-Set Object Detection<br />Open-Set Keypoints Detection<br />Image Caption<br />Visual Question Answering | [demo](examples/moondream2) | ✅ | ✅ | ✅ | | |
|
||||||
| [OWLv2](https://huggingface.co/google/owlv2-base-patch16-ensemble) | Open-Set Object Detection | [demo](examples/owlv2) | ✅ | ✅ | ✅ | | |
|
| [OWLv2](https://huggingface.co/google/owlv2-base-patch16-ensemble) | Open-Set Object Detection | [demo](examples/owlv2) | ✅ | ✅ | ✅ | | |
|
||||||
| [SmolVLM(256M, 500M)](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct) | Visual Question Answering | [demo](examples/smolvlm) | ✅ | ✅ | ✅ | | |
|
| [SmolVLM(256M, 500M)](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct) | Visual Question Answering | [demo](examples/smolvlm) | ✅ | ✅ | ✅ | | |
|
||||||
| [RMBG(1.4, 2.0)](https://huggingface.co/briaai/RMBG-2.0) | Image Segmentation Answering | [demo](examples/rmbg) | ✅ | ✅ | ✅ | | |
|
| [RMBG(1.4, 2.0)](https://huggingface.co/briaai/RMBG-2.0) | Image Segmentation<br />Background Erase | [demo](examples/rmbg) | ✅ | ✅ | ✅ | | |
|
||||||
|
| [BEN2](https://huggingface.co/PramaLLC/BEN2) | Image Segmentation<br />Background Erase | [demo](examples/rmbg) | ✅ | ✅ | ✅ | | |
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use usls::{models::RMBG, Annotator, DataLoader, Options};
|
use usls::{models::RMBG, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -20,11 +20,11 @@ fn main() -> anyhow::Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = Options::ben2_base()
|
let config = ModelConfig::ben2_base()
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = RMBG::new(options)?;
|
let mut model = RMBG::new(config)?;
|
||||||
|
|
||||||
// load image
|
// load image
|
||||||
let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
|
let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use usls::{models::Blip, DataLoader, Options};
|
use usls::{models::Blip, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// BLIP Example
|
/// BLIP Example
|
||||||
@ -20,13 +20,10 @@ fn main() -> anyhow::Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options_visual = Options::blip_v1_base_caption_visual()
|
let config = ModelConfig::blip_v1_base_caption()
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let options_textual = Options::blip_v1_base_caption_textual()
|
let mut model = Blip::new(config)?;
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?;
|
|
||||||
let mut model = Blip::new(options_visual, options_textual)?;
|
|
||||||
|
|
||||||
// image caption
|
// image caption
|
||||||
let xs = DataLoader::try_read_n(&args.source)?;
|
let xs = DataLoader::try_read_n(&args.source)?;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
|
use usls::{models::ImageClassifier, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -36,20 +36,20 @@ fn main() -> anyhow::Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = match args.model.to_lowercase().as_str() {
|
let config = match args.model.to_lowercase().as_str() {
|
||||||
"beit" => Options::beit_base(),
|
"beit" => ModelConfig::beit_base(),
|
||||||
"convnext" => Options::convnext_v2_atto(),
|
"convnext" => ModelConfig::convnext_v2_atto(),
|
||||||
"deit" => Options::deit_tiny_distill(),
|
"deit" => ModelConfig::deit_tiny_distill(),
|
||||||
"fastvit" => Options::fastvit_t8_distill(),
|
"fastvit" => ModelConfig::fastvit_t8_distill(),
|
||||||
"mobileone" => Options::mobileone_s0(),
|
"mobileone" => ModelConfig::mobileone_s0(),
|
||||||
_ => anyhow::bail!("Unsupported model: {}", args.model),
|
_ => anyhow::bail!("Unsupported model: {}", args.model),
|
||||||
};
|
};
|
||||||
|
|
||||||
let options = options
|
let config = config
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = ImageClassifier::try_from(options)?;
|
let mut model = ImageClassifier::try_from(config)?;
|
||||||
|
|
||||||
// load images
|
// load images
|
||||||
let xs = DataLoader::try_read_n(&args.source)?;
|
let xs = DataLoader::try_read_n(&args.source)?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::Clip, DataLoader, Ops, Options};
|
use usls::{models::Clip, DataLoader, ModelConfig, Ops};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// CLIP Example
|
/// CLIP Example
|
||||||
@ -14,18 +14,13 @@ fn main() -> Result<()> {
|
|||||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options_visual = Options::jina_clip_v1_visual()
|
let config = ModelConfig::jina_clip_v1()
|
||||||
// clip_vit_b32_visual()
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let options_textual = Options::jina_clip_v1_textual()
|
let mut model = Clip::new(config)?;
|
||||||
// clip_vit_b32_textual()
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?;
|
|
||||||
let mut model = Clip::new(options_visual, options_textual)?;
|
|
||||||
|
|
||||||
// texts
|
// texts
|
||||||
let texts = vec![
|
let texts = vec![
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::RTDETR, Annotator, DataLoader, Options};
|
use usls::{models::RTDETR, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -7,9 +7,8 @@ fn main() -> Result<()> {
|
|||||||
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
// options
|
// config
|
||||||
let options = Options::d_fine_n_coco().commit()?;
|
let mut model = RTDETR::new(ModelConfig::d_fine_n_coco().commit()?)?;
|
||||||
let mut model = RTDETR::new(options)?;
|
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::DB, Annotator, DataLoader, Options, Style};
|
use usls::{models::DB, Annotator, DataLoader, ModelConfig, Style};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -41,15 +41,13 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = match &args.model {
|
let config = match &args.model {
|
||||||
Some(m) => Options::db().with_model_file(m),
|
Some(m) => ModelConfig::db().with_model_file(m),
|
||||||
None => Options::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
|
None => ModelConfig::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
|
||||||
};
|
}
|
||||||
let mut model = DB::new(
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
options
|
.commit()?;
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
let mut model = DB::new(config)?;
|
||||||
.commit()?,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// load image
|
// load image
|
||||||
let xs = DataLoader::try_read_n(&[
|
let xs = DataLoader::try_read_n(&[
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::RTDETR, Annotator, DataLoader, Options};
|
use usls::{models::RTDETR, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -7,9 +7,8 @@ fn main() -> Result<()> {
|
|||||||
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
// options
|
// config
|
||||||
let options = Options::deim_dfine_s_coco().commit()?;
|
let mut model = RTDETR::new(ModelConfig::deim_dfine_s_coco().commit()?)?;
|
||||||
let mut model = RTDETR::new(options)?;
|
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::DepthAnything, Annotator, DataLoader, Options, Style};
|
use usls::{models::DepthAnything, Annotator, DataLoader, ModelConfig, Style};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -8,8 +8,7 @@ fn main() -> Result<()> {
|
|||||||
.init();
|
.init();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = Options::depth_anything_v2_small().commit()?;
|
let mut model = DepthAnything::new(ModelConfig::depth_anything_v2_small().commit()?)?;
|
||||||
let mut model = DepthAnything::new(options)?;
|
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::DataLoader;
|
use usls::DataLoader;
|
||||||
use usls::{models::DepthPro, Annotator, Options, Style};
|
use usls::{models::DepthPro, Annotator, ModelConfig, Style};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -23,11 +23,11 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// model
|
// model
|
||||||
let options = Options::depth_pro()
|
let config = ModelConfig::depth_pro()
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = DepthPro::new(options)?;
|
let mut model = DepthPro::new(config)?;
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::DINOv2, DataLoader, Options};
|
use usls::{models::DINOv2, DataLoader, ModelConfig};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -11,8 +11,10 @@ fn main() -> Result<()> {
|
|||||||
let xs = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/bus.jpg"])?;
|
||||||
|
|
||||||
// model
|
// model
|
||||||
let options = Options::dinov2_small().with_batch_size(xs.len()).commit()?;
|
let config = ModelConfig::dinov2_small()
|
||||||
let mut model = DINOv2::new(options)?;
|
.with_batch_size_all(xs.len())
|
||||||
|
.commit()?;
|
||||||
|
let mut model = DINOv2::new(config)?;
|
||||||
|
|
||||||
// encode images
|
// encode images
|
||||||
let y = model.encode_images(&xs)?;
|
let y = model.encode_images(&xs)?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
use usls::{models::YOLO, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -18,7 +18,7 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let config = Options::doclayout_yolo_docstructbench()
|
let config = ModelConfig::doclayout_yolo_docstructbench()
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = YOLO::new(config)?;
|
let mut model = YOLO::new(config)?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::DB, Annotator, DataLoader, Options, Scale, Style};
|
use usls::{models::DB, Annotator, DataLoader, ModelConfig, Scale, Style};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -26,16 +26,16 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = match args.scale.as_str().try_into()? {
|
let config = match args.scale.as_str().try_into()? {
|
||||||
Scale::T => Options::fast_tiny(),
|
Scale::T => ModelConfig::fast_tiny(),
|
||||||
Scale::S => Options::fast_small(),
|
Scale::S => ModelConfig::fast_small(),
|
||||||
Scale::B => Options::fast_base(),
|
Scale::B => ModelConfig::fast_base(),
|
||||||
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
|
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
|
||||||
};
|
};
|
||||||
let mut model = DB::new(
|
let mut model = DB::new(
|
||||||
options
|
config
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_dtype_all(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
.commit()?,
|
.commit()?,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
use usls::{models::YOLO, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -22,7 +22,7 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let config = Options::fastsam_s()
|
let config = ModelConfig::fastsam_s()
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
cargo run -r -F cuda --example florence2 -- --device cuda --scale base --dtype fp16
|
cargo run -r -F cuda --example florence2 -- --device cuda --dtype fp16
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,20 +1,16 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::Florence2, Annotator, DataLoader, Options, Scale, Style, Task};
|
use usls::{models::Florence2, Annotator, DataLoader, ModelConfig, Style, Task};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
struct Args {
|
struct Args {
|
||||||
/// dtype
|
/// dtype
|
||||||
#[argh(option, default = "String::from(\"auto\")")]
|
#[argh(option, default = "String::from(\"fp16\")")]
|
||||||
dtype: String,
|
dtype: String,
|
||||||
|
|
||||||
/// device
|
/// device
|
||||||
#[argh(option, default = "String::from(\"cpu:0\")")]
|
#[argh(option, default = "String::from(\"cpu:0\")")]
|
||||||
device: String,
|
device: String,
|
||||||
|
|
||||||
/// scale
|
|
||||||
#[argh(option, default = "String::from(\"base\")")]
|
|
||||||
scale: String,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
@ -29,51 +25,12 @@ fn main() -> Result<()> {
|
|||||||
let xs = DataLoader::try_read_n(&["images/green-car.jpg", "assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/green-car.jpg", "assets/bus.jpg"])?;
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let (
|
let config = ModelConfig::florence2_base()
|
||||||
options_vision_encoder,
|
.with_dtype_all(args.dtype.as_str().try_into()?)
|
||||||
options_text_embed,
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
options_encoder,
|
.with_batch_size_all(xs.len())
|
||||||
options_decoder,
|
.commit()?;
|
||||||
options_decoder_merged,
|
let mut model = Florence2::new(config)?;
|
||||||
) = match args.scale.as_str().try_into()? {
|
|
||||||
Scale::B => (
|
|
||||||
Options::florence2_visual_encoder_base(),
|
|
||||||
Options::florence2_textual_embed_base(),
|
|
||||||
Options::florence2_texual_encoder_base(),
|
|
||||||
Options::florence2_texual_decoder_base(),
|
|
||||||
Options::florence2_texual_decoder_merged_base(),
|
|
||||||
),
|
|
||||||
Scale::L => todo!(),
|
|
||||||
_ => anyhow::bail!("Unsupported Florence2 scale."),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut model = Florence2::new(
|
|
||||||
options_vision_encoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
options_text_embed
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
options_encoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
options_decoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
options_decoder_merged
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// tasks
|
// tasks
|
||||||
let tasks = [
|
let tasks = [
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::GroundingDINO, Annotator, DataLoader, Options};
|
use usls::{models::GroundingDINO, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
struct Args {
|
struct Args {
|
||||||
/// dtype
|
/// dtype
|
||||||
#[argh(option, default = "String::from(\"auto\")")]
|
#[argh(option, default = "String::from(\"fp16\")")]
|
||||||
dtype: String,
|
dtype: String,
|
||||||
|
|
||||||
/// device
|
/// device
|
||||||
@ -45,7 +45,7 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
let options = Options::grounding_dino_tiny()
|
let config = ModelConfig::grounding_dino_tiny()
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
|
.with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
|
||||||
@ -53,7 +53,7 @@ fn main() -> Result<()> {
|
|||||||
.with_text_confs(&[0.25])
|
.with_text_confs(&[0.25])
|
||||||
.commit()?;
|
.commit()?;
|
||||||
|
|
||||||
let mut model = GroundingDINO::new(options)?;
|
let mut model = GroundingDINO::new(config)?;
|
||||||
|
|
||||||
// load images
|
// load images
|
||||||
let xs = DataLoader::try_read_n(&args.source)?;
|
let xs = DataLoader::try_read_n(&args.source)?;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::DataLoader;
|
use usls::DataLoader;
|
||||||
use usls::{models::DB, Annotator, Options, Scale, Style};
|
use usls::{models::DB, Annotator, ModelConfig, Scale, Style};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -27,14 +27,14 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = match args.scale.as_str().try_into()? {
|
let config = match args.scale.as_str().try_into()? {
|
||||||
Scale::T => Options::linknet_r18(),
|
Scale::T => ModelConfig::linknet_r18(),
|
||||||
Scale::S => Options::linknet_r34(),
|
Scale::S => ModelConfig::linknet_r34(),
|
||||||
Scale::B => Options::linknet_r50(),
|
Scale::B => ModelConfig::linknet_r50(),
|
||||||
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
|
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
|
||||||
};
|
};
|
||||||
let mut model = DB::new(
|
let mut model = DB::new(
|
||||||
options
|
config
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?,
|
.commit()?,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use usls::{models::MODNet, Annotator, DataLoader, Options};
|
use usls::{models::MODNet, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
fn main() -> anyhow::Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -7,8 +7,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
.init();
|
.init();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = Options::modnet_photographic().commit()?;
|
let mut model = MODNet::new(ModelConfig::modnet_photographic().commit()?)?;
|
||||||
let mut model = MODNet::new(options)?;
|
|
||||||
|
|
||||||
// load image
|
// load image
|
||||||
let xs = DataLoader::try_read_n(&["images/liuyifei.png"])?;
|
let xs = DataLoader::try_read_n(&["images/liuyifei.png"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::Moondream2, Annotator, DataLoader, Options, Scale, Task};
|
use usls::{models::Moondream2, Annotator, DataLoader, ModelConfig, Scale, Task};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -39,81 +39,16 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let (
|
let config = match args.scale.as_str().try_into()? {
|
||||||
options_vision_encoder,
|
Scale::Billion(0.5) => ModelConfig::moondream2_0_5b(),
|
||||||
options_vision_projection,
|
Scale::Billion(2.) => ModelConfig::moondream2_2b(),
|
||||||
options_text_decoder,
|
|
||||||
options_text_encoder,
|
|
||||||
options_coord_decoder,
|
|
||||||
options_coord_encoder,
|
|
||||||
options_size_decoder,
|
|
||||||
options_size_encoder,
|
|
||||||
) = match args.scale.as_str().try_into()? {
|
|
||||||
Scale::Billion(2.) => (
|
|
||||||
Options::moondream2_2b_vision_encoder(),
|
|
||||||
Options::moondream2_2b_vision_projection(),
|
|
||||||
Options::moondream2_2b_text_decoder(),
|
|
||||||
Options::moondream2_2b_text_encoder(),
|
|
||||||
Options::moondream2_2b_coord_decoder(),
|
|
||||||
Options::moondream2_2b_coord_encoder(),
|
|
||||||
Options::moondream2_2b_size_decoder(),
|
|
||||||
Options::moondream2_2b_size_encoder(),
|
|
||||||
),
|
|
||||||
Scale::Billion(0.5) => (
|
|
||||||
Options::moondream2_0_5b_vision_encoder(),
|
|
||||||
Options::moondream2_0_5b_vision_projection(),
|
|
||||||
Options::moondream2_0_5b_text_decoder(),
|
|
||||||
Options::moondream2_0_5b_text_encoder(),
|
|
||||||
Options::moondream2_0_5b_coord_decoder(),
|
|
||||||
Options::moondream2_0_5b_coord_encoder(),
|
|
||||||
Options::moondream2_0_5b_size_decoder(),
|
|
||||||
Options::moondream2_0_5b_size_encoder(),
|
|
||||||
),
|
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
};
|
}
|
||||||
|
.with_dtype_all(args.dtype.as_str().try_into()?)
|
||||||
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
|
.commit()?;
|
||||||
|
|
||||||
let mut model = Moondream2::new(
|
let mut model = Moondream2::new(config)?;
|
||||||
options_vision_encoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
options_vision_projection
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
options_text_encoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
options_text_decoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
Some(
|
|
||||||
options_coord_encoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
),
|
|
||||||
Some(
|
|
||||||
options_coord_decoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
),
|
|
||||||
Some(
|
|
||||||
options_size_encoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
),
|
|
||||||
Some(
|
|
||||||
options_size_decoder
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// load images
|
// load images
|
||||||
let xs = DataLoader::try_read_n(&args.source)?;
|
let xs = DataLoader::try_read_n(&args.source)?;
|
||||||
@ -142,13 +77,6 @@ fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => {
|
Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => {
|
||||||
println!("{:?}", ys);
|
println!("{:?}", ys);
|
||||||
// let annotator = Annotator::default()
|
|
||||||
// .with_bboxes_thickness(4)
|
|
||||||
// .without_bboxes_conf(true)
|
|
||||||
// .with_keypoints_radius(6)
|
|
||||||
// .with_keypoints_name(true)
|
|
||||||
// .with_saveout("moondream2");
|
|
||||||
// annotator.annotate(&xs, &ys);
|
|
||||||
|
|
||||||
// annotate
|
// annotate
|
||||||
let annotator = Annotator::default()
|
let annotator = Annotator::default()
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::DataLoader;
|
use usls::DataLoader;
|
||||||
use usls::{models::OWLv2, Annotator, Options};
|
use usls::{models::OWLv2, Annotator, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -46,14 +46,14 @@ fn main() -> Result<()> {
|
|||||||
.init();
|
.init();
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// options
|
// config
|
||||||
let options = Options::owlv2_base_ensemble()
|
let config = ModelConfig::owlv2_base_ensemble()
|
||||||
// owlv2_base()
|
// owlv2_base()
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.with_class_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
|
.with_class_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = OWLv2::new(options)?;
|
let mut model = OWLv2::new(config)?;
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&args.source)?;
|
let xs = DataLoader::try_read_n(&args.source)?;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::DataLoader;
|
use usls::DataLoader;
|
||||||
use usls::{models::PicoDet, Annotator, Options};
|
use usls::{models::PicoDet, Annotator, ModelConfig};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -8,12 +8,11 @@ fn main() -> Result<()> {
|
|||||||
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
// options
|
// config
|
||||||
let options = Options::picodet_layout_1x()
|
let config = ModelConfig::picodet_layout_1x().commit()?;
|
||||||
// picodet_l_layout_3cls()
|
// picodet_l_layout_3cls()
|
||||||
// picodet_l_layout_17cls()
|
// picodet_l_layout_17cls()
|
||||||
.commit()?;
|
let mut model = PicoDet::new(config)?;
|
||||||
let mut model = PicoDet::new(options)?;
|
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["images/academic.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/academic.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::RFDETR, Annotator, DataLoader, Options};
|
use usls::{models::RFDETR, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -7,9 +7,8 @@ fn main() -> Result<()> {
|
|||||||
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
// options
|
// config
|
||||||
let options = Options::rfdetr_base().commit()?;
|
let mut model = RFDETR::new(ModelConfig::rfdetr_base().commit()?)?;
|
||||||
let mut model = RFDETR::new(options)?;
|
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use usls::{models::RMBG, Annotator, DataLoader, Options};
|
use usls::{models::RMBG, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
struct Args {
|
struct Args {
|
||||||
/// dtype
|
/// dtype
|
||||||
#[argh(option, default = "String::from(\"auto\")")]
|
#[argh(option, default = "String::from(\"fp16\")")]
|
||||||
dtype: String,
|
dtype: String,
|
||||||
|
|
||||||
/// device
|
/// device
|
||||||
@ -23,18 +23,18 @@ fn main() -> anyhow::Result<()> {
|
|||||||
.init();
|
.init();
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
let options = match args.ver {
|
let config = match args.ver {
|
||||||
1.4 => Options::rmbg1_4(),
|
1.4 => ModelConfig::rmbg1_4(),
|
||||||
2.0 => Options::rmbg2_0(),
|
2.0 => ModelConfig::rmbg2_0(),
|
||||||
_ => unreachable!("Unsupported version"),
|
_ => unreachable!("Unsupported version"),
|
||||||
};
|
};
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = options
|
let config = config
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = RMBG::new(options)?;
|
let mut model = RMBG::new(config)?;
|
||||||
|
|
||||||
// load image
|
// load image
|
||||||
let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
|
let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::RTDETR, Annotator, DataLoader, Options};
|
use usls::{models::RTDETR, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -7,15 +7,14 @@ fn main() -> Result<()> {
|
|||||||
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
// options
|
// config
|
||||||
let options = Options::rtdetr_v2_s_coco()
|
let config = ModelConfig::rtdetr_v2_s_coco().commit()?;
|
||||||
// rtdetr_v1_r18vd_coco()
|
// rtdetr_v1_r18vd_coco()
|
||||||
// rtdetr_v2_ms_coco()
|
// rtdetr_v2_ms_coco()
|
||||||
// rtdetr_v2_m_coco()
|
// rtdetr_v2_m_coco()
|
||||||
// rtdetr_v2_l_coco()
|
// rtdetr_v2_l_coco()
|
||||||
// rtdetr_v2_x_coco()
|
// rtdetr_v2_x_coco()
|
||||||
.commit()?;
|
let mut model = RTDETR::new(config)?;
|
||||||
let mut model = RTDETR::new(options)?;
|
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::RTMO, Annotator, DataLoader, Options, Style, SKELETON_COCO_19};
|
use usls::{models::RTMO, Annotator, DataLoader, ModelConfig, Style, SKELETON_COCO_19};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -8,7 +8,7 @@ fn main() -> Result<()> {
|
|||||||
.init();
|
.init();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let mut model = RTMO::new(Options::rtmo_s().commit()?)?;
|
let mut model = RTMO::new(ModelConfig::rtmo_s().commit()?)?;
|
||||||
|
|
||||||
// load image
|
// load image
|
||||||
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{
|
use usls::{
|
||||||
models::{SamKind, SamPrompt, SAM},
|
models::{SamKind, SamPrompt, SAM},
|
||||||
Annotator, DataLoader, Options, Scale,
|
Annotator, DataLoader, ModelConfig, Scale,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
@ -28,40 +28,22 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
// Build model
|
// Build model
|
||||||
let (options_encoder, options_decoder) = match args.kind.as_str().try_into()? {
|
let config = match args.kind.as_str().try_into()? {
|
||||||
SamKind::Sam => (
|
SamKind::Sam => ModelConfig::sam_v1_base(),
|
||||||
Options::sam_v1_base_encoder(),
|
|
||||||
Options::sam_v1_base_decoder(),
|
|
||||||
),
|
|
||||||
SamKind::Sam2 => match args.scale.as_str().try_into()? {
|
SamKind::Sam2 => match args.scale.as_str().try_into()? {
|
||||||
Scale::T => (Options::sam2_tiny_encoder(), Options::sam2_tiny_decoder()),
|
Scale::T => ModelConfig::sam2_tiny(),
|
||||||
Scale::S => (Options::sam2_small_encoder(), Options::sam2_small_decoder()),
|
Scale::S => ModelConfig::sam2_small(),
|
||||||
Scale::B => (
|
Scale::B => ModelConfig::sam2_base_plus(),
|
||||||
Options::sam2_base_plus_encoder(),
|
|
||||||
Options::sam2_base_plus_decoder(),
|
|
||||||
),
|
|
||||||
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
|
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
|
||||||
},
|
},
|
||||||
|
SamKind::MobileSam => ModelConfig::mobile_sam_tiny(),
|
||||||
|
SamKind::SamHq => ModelConfig::sam_hq_tiny(),
|
||||||
|
SamKind::EdgeSam => ModelConfig::edge_sam_3x(),
|
||||||
|
}
|
||||||
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
|
.commit()?;
|
||||||
|
|
||||||
SamKind::MobileSam => (
|
let mut model = SAM::new(config)?;
|
||||||
Options::mobile_sam_tiny_encoder(),
|
|
||||||
Options::mobile_sam_tiny_decoder(),
|
|
||||||
),
|
|
||||||
SamKind::SamHq => (
|
|
||||||
Options::sam_hq_tiny_encoder(),
|
|
||||||
Options::sam_hq_tiny_decoder(),
|
|
||||||
),
|
|
||||||
SamKind::EdgeSam => (
|
|
||||||
Options::edge_sam_3x_encoder(),
|
|
||||||
Options::edge_sam_3x_decoder(),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
let options_encoder = options_encoder
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?;
|
|
||||||
let options_decoder = options_decoder.commit()?;
|
|
||||||
let mut model = SAM::new(options_encoder, options_decoder)?;
|
|
||||||
|
|
||||||
// Load image
|
// Load image
|
||||||
let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{
|
use usls::{
|
||||||
models::{SamPrompt, SAM2},
|
models::{SamPrompt, SAM2},
|
||||||
Annotator, DataLoader, Options, Scale,
|
Annotator, DataLoader, ModelConfig, Scale,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
@ -25,33 +25,16 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// Build model
|
// Build model
|
||||||
let (options_encoder, options_decoder) = match args.scale.as_str().try_into()? {
|
let config = match args.scale.as_str().try_into()? {
|
||||||
Scale::T => (
|
Scale::T => ModelConfig::sam2_1_tiny(),
|
||||||
Options::sam2_1_tiny_encoder(),
|
Scale::S => ModelConfig::sam2_1_small(),
|
||||||
Options::sam2_1_tiny_decoder(),
|
Scale::B => ModelConfig::sam2_1_base_plus(),
|
||||||
),
|
Scale::L => ModelConfig::sam2_1_large(),
|
||||||
Scale::S => (
|
|
||||||
Options::sam2_1_small_encoder(),
|
|
||||||
Options::sam2_1_small_decoder(),
|
|
||||||
),
|
|
||||||
Scale::B => (
|
|
||||||
Options::sam2_1_base_plus_encoder(),
|
|
||||||
Options::sam2_1_base_plus_decoder(),
|
|
||||||
),
|
|
||||||
Scale::L => (
|
|
||||||
Options::sam2_1_large_encoder(),
|
|
||||||
Options::sam2_1_large_decoder(),
|
|
||||||
),
|
|
||||||
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t, l.", args.scale),
|
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t, l.", args.scale),
|
||||||
};
|
}
|
||||||
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
let options_encoder = options_encoder
|
.commit()?;
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
let mut model = SAM2::new(config)?;
|
||||||
.commit()?;
|
|
||||||
let options_decoder = options_decoder
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?;
|
|
||||||
let mut model = SAM2::new(options_encoder, options_decoder)?;
|
|
||||||
|
|
||||||
// Load image
|
// Load image
|
||||||
let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::Sapiens, Annotator, DataLoader, Options};
|
use usls::{models::Sapiens, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -17,10 +17,10 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
// build
|
// build
|
||||||
let options = Options::sapiens_seg_0_3b()
|
let config = ModelConfig::sapiens_seg_0_3b()
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = Sapiens::new(options)?;
|
let mut model = Sapiens::new(config)?;
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["images/paul-george.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/paul-george.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::SLANet, Annotator, Color, DataLoader, Options};
|
use usls::{models::SLANet, Annotator, Color, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -26,11 +26,11 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = Options::slanet_lcnet_v2_mobile_ch()
|
let config = ModelConfig::slanet_lcnet_v2_mobile_ch()
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = SLANet::new(options)?;
|
let mut model = SLANet::new(config)?;
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&[args.source])?;
|
let xs = DataLoader::try_read_n(&[args.source])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::SmolVLM, DataLoader, Options, Scale};
|
use usls::{models::SmolVLM, DataLoader, ModelConfig, Scale};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -29,32 +29,15 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let (options_vision_encoder, options_text_embed, options_decode) =
|
let config = match args.scale.as_str().try_into()? {
|
||||||
match args.scale.as_str().try_into()? {
|
Scale::Million(256.) => ModelConfig::smolvlm_256m(),
|
||||||
Scale::Million(256.) => (
|
Scale::Million(500.) => ModelConfig::smolvlm_500m(),
|
||||||
Options::smolvlm_vision_256m(),
|
_ => unimplemented!(),
|
||||||
Options::smolvlm_text_embed_256m(),
|
}
|
||||||
Options::smolvlm_decoder_256m(),
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
),
|
.commit()?;
|
||||||
Scale::Million(500.) => (
|
|
||||||
Options::smolvlm_vision_500m(),
|
|
||||||
Options::smolvlm_text_embed_500m(),
|
|
||||||
Options::smolvlm_decoder_500m(),
|
|
||||||
),
|
|
||||||
_ => unimplemented!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut model = SmolVLM::new(
|
let mut model = SmolVLM::new(config)?;
|
||||||
options_vision_encoder
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
options_text_embed
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
options_decode
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.commit()?,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// load images
|
// load images
|
||||||
let xs = DataLoader::try_read_n(&args.source)?;
|
let xs = DataLoader::try_read_n(&args.source)?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::SVTR, DataLoader, Options};
|
use usls::{models::SVTR, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -22,13 +22,13 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = Options::ppocr_rec_v4_ch()
|
let config = ModelConfig::ppocr_rec_v4_ch()
|
||||||
// ppocr_rec_v4_en()
|
// ppocr_rec_v4_en()
|
||||||
// repsvtr_ch()
|
// repsvtr_ch()
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = SVTR::new(options)?;
|
let mut model = SVTR::new(config)?;
|
||||||
|
|
||||||
// load images
|
// load images
|
||||||
let dl = DataLoader::new("./examples/svtr/images")?
|
let dl = DataLoader::new("./examples/svtr/images")?
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use usls::{
|
use usls::{
|
||||||
models::{TrOCR, TrOCRKind},
|
models::{TrOCR, TrOCRKind},
|
||||||
DataLoader, Options, Scale,
|
DataLoader, ModelConfig, Scale,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
@ -38,52 +38,22 @@ fn main() -> anyhow::Result<()> {
|
|||||||
])?;
|
])?;
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let (options_encoder, options_decoder, options_decoder_merged) =
|
let config = match args.scale.as_str().try_into()? {
|
||||||
match args.scale.as_str().try_into()? {
|
Scale::S => match args.kind.as_str().try_into()? {
|
||||||
Scale::S => match args.kind.as_str().try_into()? {
|
TrOCRKind::Printed => ModelConfig::trocr_small_printed(),
|
||||||
TrOCRKind::Printed => (
|
TrOCRKind::HandWritten => ModelConfig::trocr_small_handwritten(),
|
||||||
Options::trocr_encoder_small_printed(),
|
},
|
||||||
Options::trocr_decoder_small_printed(),
|
Scale::B => match args.kind.as_str().try_into()? {
|
||||||
Options::trocr_decoder_merged_small_printed(),
|
TrOCRKind::Printed => ModelConfig::trocr_base_printed(),
|
||||||
),
|
TrOCRKind::HandWritten => ModelConfig::trocr_base_handwritten(),
|
||||||
TrOCRKind::HandWritten => (
|
},
|
||||||
Options::trocr_encoder_small_handwritten(),
|
x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
|
||||||
Options::trocr_decoder_small_handwritten(),
|
}
|
||||||
Options::trocr_decoder_merged_small_handwritten(),
|
.with_device_all(args.device.as_str().try_into()?)
|
||||||
),
|
.with_dtype_all(args.dtype.as_str().try_into()?)
|
||||||
},
|
.commit()?;
|
||||||
Scale::B => match args.kind.as_str().try_into()? {
|
|
||||||
TrOCRKind::Printed => (
|
|
||||||
Options::trocr_encoder_base_printed(),
|
|
||||||
Options::trocr_decoder_base_printed(),
|
|
||||||
Options::trocr_decoder_merged_base_printed(),
|
|
||||||
),
|
|
||||||
TrOCRKind::HandWritten => (
|
|
||||||
Options::trocr_encoder_base_handwritten(),
|
|
||||||
Options::trocr_decoder_base_handwritten(),
|
|
||||||
Options::trocr_decoder_merged_base_handwritten(),
|
|
||||||
),
|
|
||||||
},
|
|
||||||
x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut model = TrOCR::new(
|
let mut model = TrOCR::new(config)?;
|
||||||
options_encoder
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
options_decoder
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
options_decoder_merged
|
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
||||||
.with_batch_size(xs.len())
|
|
||||||
.commit()?,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// inference
|
// inference
|
||||||
let ys = model.forward(&xs)?;
|
let ys = model.forward(&xs)?;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{
|
use usls::{
|
||||||
models::{SamPrompt, SAM2, YOLO},
|
models::{SamPrompt, SAM2, YOLO},
|
||||||
Annotator, DataLoader, Options, Scale, Style,
|
Annotator, DataLoader, ModelConfig, Scale, Style,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
@ -21,17 +21,14 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build SAM
|
// build SAM
|
||||||
let (options_encoder, options_decoder) = (
|
let mut sam = SAM2::new(ModelConfig::sam2_1_tiny().commit()?)?;
|
||||||
Options::sam2_1_tiny_encoder().commit()?,
|
|
||||||
Options::sam2_1_tiny_decoder().commit()?,
|
|
||||||
);
|
|
||||||
let mut sam = SAM2::new(options_encoder, options_decoder)?;
|
|
||||||
|
|
||||||
// build YOLOv8
|
// build YOLOv8
|
||||||
let options_yolo = Options::yolo_detect()
|
let options_yolo = ModelConfig::yolo_detect()
|
||||||
.with_model_scale(Scale::N)
|
.with_scale(Scale::N)
|
||||||
.with_model_version(8.into())
|
.with_version(8.into())
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
|
.auto_yolo_model_file()
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut yolo = YOLO::new(options_yolo)?;
|
let mut yolo = YOLO::new(options_yolo)?;
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ cargo run -r --example yolo -- --ver 8 --task obb --scale n --image-width 1024 -
|
|||||||
cargo run -r --example yolo -- --ver 11 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv11-Obb
|
cargo run -r --example yolo -- --ver 11 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv11-Obb
|
||||||
```
|
```
|
||||||
|
|
||||||
**`cargo run -r --example yolo -- --help` for more options**
|
**`cargo run -r --example yolo -- --help` for more config**
|
||||||
|
|
||||||
## Other YOLOv8 Solution Models
|
## Other YOLOv8 Solution Models
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{
|
use usls::{
|
||||||
models::YOLO, Annotator, DataLoader, Options, Style, NAMES_COCO_80, NAMES_COCO_KEYPOINTS_17,
|
models::YOLO, Annotator, DataLoader, ModelConfig, Style, NAMES_COCO_80,
|
||||||
NAMES_IMAGENET_1K, SKELETON_COCO_19, SKELETON_COLOR_COCO_19,
|
NAMES_COCO_KEYPOINTS_17, NAMES_IMAGENET_1K, SKELETON_COCO_19, SKELETON_COLOR_COCO_19,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(argh::FromArgs, Debug)]
|
#[derive(argh::FromArgs, Debug)]
|
||||||
@ -132,14 +132,15 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
let mut options = Options::yolo()
|
let mut config = ModelConfig::yolo()
|
||||||
.with_model_file(&args.model.unwrap_or_default())
|
.with_model_file(&args.model.unwrap_or_default())
|
||||||
.with_model_task(args.task.as_str().try_into()?)
|
.with_task(args.task.as_str().try_into()?)
|
||||||
.with_model_version(args.ver.try_into()?)
|
.with_version(args.ver.try_into()?)
|
||||||
.with_model_scale(args.scale.as_str().try_into()?)
|
.with_scale(args.scale.as_str().try_into()?)
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.with_trt_fp16(args.trt_fp16)
|
// .with_trt_fp16(args.trt_fp16)
|
||||||
|
.with_model_trt_fp16(args.trt_fp16)
|
||||||
.with_model_ixx(
|
.with_model_ixx(
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
@ -175,27 +176,27 @@ fn main() -> Result<()> {
|
|||||||
.exclude_classes(&args.exclude_classes);
|
.exclude_classes(&args.exclude_classes);
|
||||||
|
|
||||||
if args.use_coco_80_classes {
|
if args.use_coco_80_classes {
|
||||||
options = options.with_class_names(&NAMES_COCO_80);
|
config = config.with_class_names(&NAMES_COCO_80);
|
||||||
}
|
}
|
||||||
|
|
||||||
if args.use_coco_17_keypoints_classes {
|
if args.use_coco_17_keypoints_classes {
|
||||||
options = options.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17);
|
config = config.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17);
|
||||||
}
|
}
|
||||||
|
|
||||||
if args.use_imagenet_1k_classes {
|
if args.use_imagenet_1k_classes {
|
||||||
options = options.with_class_names(&NAMES_IMAGENET_1K);
|
config = config.with_class_names(&NAMES_IMAGENET_1K);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(nc) = args.num_classes {
|
if let Some(nc) = args.num_classes {
|
||||||
options = options.with_nc(nc);
|
config = config.with_nc(nc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(nk) = args.num_keypoints {
|
if let Some(nk) = args.num_keypoints {
|
||||||
options = options.with_nk(nk);
|
config = config.with_nk(nk);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !args.class_names.is_empty() {
|
if !args.class_names.is_empty() {
|
||||||
options = options.with_class_names(
|
config = config.with_class_names(
|
||||||
&args
|
&args
|
||||||
.class_names
|
.class_names
|
||||||
.iter()
|
.iter()
|
||||||
@ -205,7 +206,7 @@ fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !args.keypoint_names.is_empty() {
|
if !args.keypoint_names.is_empty() {
|
||||||
options = options.with_keypoint_names(
|
config = config.with_keypoint_names(
|
||||||
&args
|
&args
|
||||||
.keypoint_names
|
.keypoint_names
|
||||||
.iter()
|
.iter()
|
||||||
@ -215,7 +216,7 @@ fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let mut model = YOLO::try_from(options.commit()?)?;
|
let mut model = YOLO::try_from(config.auto_yolo_model_file().commit()?)?;
|
||||||
|
|
||||||
// build dataloader
|
// build dataloader
|
||||||
let dl = DataLoader::new(&args.source)?
|
let dl = DataLoader::new(&args.source)?
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
cargo run -r --example yoloe
|
cargo run -r -F cuda --example yoloe -- --device cuda
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::YOLO, Annotator, DataLoader, Options, Style};
|
use usls::{models::YOLO, Annotator, DataLoader, ModelConfig, Style};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -21,8 +21,8 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// options
|
// config
|
||||||
let options = Options::yoloe_v8s_seg_pf()
|
let config = ModelConfig::yoloe_v8s_seg_pf()
|
||||||
// yoloe_v8m_seg_pf()
|
// yoloe_v8m_seg_pf()
|
||||||
// yoloe_v8l_seg_pf()
|
// yoloe_v8l_seg_pf()
|
||||||
// yoloe_11s_seg_pf()
|
// yoloe_11s_seg_pf()
|
||||||
@ -31,7 +31,7 @@ fn main() -> Result<()> {
|
|||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
let mut model = YOLO::new(options)?;
|
let mut model = YOLO::new(config)?;
|
||||||
|
|
||||||
// load
|
// load
|
||||||
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::YOLOPv2, Annotator, DataLoader, Options};
|
use usls::{models::YOLOPv2, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
@ -8,8 +8,7 @@ fn main() -> Result<()> {
|
|||||||
.init();
|
.init();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let options = Options::yolop_v2_480x800().commit()?;
|
let mut model = YOLOPv2::new(ModelConfig::yolop_v2_480x800().commit()?)?;
|
||||||
let mut model = YOLOPv2::new(options)?;
|
|
||||||
|
|
||||||
// load image
|
// load image
|
||||||
let xs = DataLoader::try_read_n(&["images/car-view.jpg"])?;
|
let xs = DataLoader::try_read_n(&["images/car-view.jpg"])?;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use usls::{models::YOLO, Annotator, DataLoader, Options};
|
use usls::{models::YOLO, Annotator, DataLoader, ModelConfig};
|
||||||
|
|
||||||
#[derive(argh::FromArgs)]
|
#[derive(argh::FromArgs)]
|
||||||
/// Example
|
/// Example
|
||||||
@ -22,7 +22,7 @@ fn main() -> Result<()> {
|
|||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
// build model
|
// build model
|
||||||
let config = Options::yolo_v8_rtdetr_l()
|
let config = ModelConfig::yolo_v8_rtdetr_l()
|
||||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||||
.with_model_device(args.device.as_str().try_into()?)
|
.with_model_device(args.device.as_str().try_into()?)
|
||||||
.commit()?;
|
.commit()?;
|
||||||
|
@ -13,8 +13,8 @@ use prost::Message;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
build_progress_bar, elapsed, human_bytes_binary, onnx, DType, Device, Iiix, MinOptMax, Ops, Ts,
|
build_progress_bar, elapsed, human_bytes_binary, onnx, DType, Device, EngineConfig, Iiix,
|
||||||
Xs, PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_FINISH, X,
|
MinOptMax, Ops, Ts, Xs, PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_FINISH, X,
|
||||||
};
|
};
|
||||||
|
|
||||||
impl From<TensorElementType> for DType {
|
impl From<TensorElementType> for DType {
|
||||||
@ -93,6 +93,20 @@ impl Default for Engine {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Engine {
|
impl Engine {
|
||||||
|
pub fn try_from_config(config: &EngineConfig) -> Result<Self> {
|
||||||
|
Self {
|
||||||
|
file: config.file.clone(),
|
||||||
|
spec: config.spec.clone(),
|
||||||
|
iiixs: config.iiixs.clone(),
|
||||||
|
device: config.device,
|
||||||
|
trt_fp16: config.trt_fp16,
|
||||||
|
num_dry_run: config.num_dry_run,
|
||||||
|
graph_opt_level: config.ort_graph_opt_level,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn build(mut self) -> Result<Self> {
|
pub fn build(mut self) -> Result<Self> {
|
||||||
let name = format!("[{}] ort_initialization", self.spec);
|
let name = format!("[{}] ort_initialization", self.spec);
|
||||||
elapsed!(&name, self.ts, {
|
elapsed!(&name, self.ts, {
|
||||||
|
112
src/inference/engine_config.rs
Normal file
112
src/inference/engine_config.rs
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
use aksr::Builder;
|
||||||
|
use anyhow::Result;
|
||||||
|
|
||||||
|
use crate::{try_fetch_file_stem, DType, Device, Hub, Iiix, MinOptMax};
|
||||||
|
|
||||||
|
#[derive(Builder, Debug, Clone, Default)]
|
||||||
|
pub struct EngineConfig {
|
||||||
|
pub file: String,
|
||||||
|
pub device: Device,
|
||||||
|
pub iiixs: Vec<Iiix>,
|
||||||
|
pub num_dry_run: usize,
|
||||||
|
pub trt_fp16: bool,
|
||||||
|
pub ort_graph_opt_level: Option<u8>,
|
||||||
|
pub spec: String, // TODO: move out
|
||||||
|
pub dtype: DType, // For dynamically loading the model
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EngineConfig {
|
||||||
|
pub fn try_commit(mut self, name: &str) -> Result<Self> {
|
||||||
|
// Identify the local model or fetch the remote model
|
||||||
|
if std::path::PathBuf::from(&self.file).exists() {
|
||||||
|
// Local
|
||||||
|
self.spec = format!("{}/{}", name, try_fetch_file_stem(&self.file)?);
|
||||||
|
} else {
|
||||||
|
if self.file.is_empty() && name.is_empty() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Failed to commit model. Invalid model config: neither `name` nor `file` were specified. Failed to fetch model from Hub."
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remote
|
||||||
|
match Hub::is_valid_github_release_url(&self.file) {
|
||||||
|
Some((owner, repo, tag, _file_name)) => {
|
||||||
|
let stem = try_fetch_file_stem(&self.file)?;
|
||||||
|
self.spec = format!("{}/{}-{}-{}-{}", name, owner, repo, tag, stem);
|
||||||
|
self.file = Hub::default().try_fetch(&self.file)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// append dtype to model file
|
||||||
|
match self.dtype {
|
||||||
|
d @ (DType::Auto | DType::Fp32) => {
|
||||||
|
if self.file.is_empty() {
|
||||||
|
self.file = format!("{}.onnx", d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dtype => {
|
||||||
|
if self.file.is_empty() {
|
||||||
|
self.file = format!("{}.onnx", dtype);
|
||||||
|
} else {
|
||||||
|
let pos = self.file.len() - 5; // .onnx
|
||||||
|
let suffix = self.file.split_off(pos);
|
||||||
|
self.file = format!("{}-{}{}", self.file, dtype, suffix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let stem = try_fetch_file_stem(&self.file)?;
|
||||||
|
self.spec = format!("{}/{}", name, stem);
|
||||||
|
self.file = Hub::default().try_fetch(&format!("{}/{}", name, self.file))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EngineConfig {
|
||||||
|
pub fn with_ixx(mut self, i: usize, ii: usize, x: MinOptMax) -> Self {
|
||||||
|
self.iiixs.push(Iiix::from((i, ii, x)));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_batch_size(mut self, x: MinOptMax) -> Self {
|
||||||
|
self.iiixs.push(Iiix::from((0, 0, x)));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! impl_model_config_methods {
|
||||||
|
($ty:ty, $field:ident) => {
|
||||||
|
impl $ty {
|
||||||
|
paste::paste! {
|
||||||
|
pub fn [<with_ $field _file>](mut self, file: &str) -> Self {
|
||||||
|
self.$field = self.$field.with_file(file);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn [<with_ $field _dtype>](mut self, dtype: $crate::DType) -> Self {
|
||||||
|
self.$field = self.$field.with_dtype(dtype);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn [<with_ $field _device>](mut self, device: $crate::Device) -> Self {
|
||||||
|
self.$field = self.$field.with_device(device);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn [<with_ $field _trt_fp16>](mut self, x: bool) -> Self {
|
||||||
|
self.$field = self.$field.with_trt_fp16(x);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn [<with_ $field _num_dry_run>](mut self, x: usize) -> Self {
|
||||||
|
self.$field = self.$field.with_num_dry_run(x);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn [<with_ $field _ixx>](mut self, i: usize, ii: usize, x: $crate::MinOptMax) -> Self {
|
||||||
|
self.$field = self.$field.with_ixx(i, ii, x);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
@ -308,12 +308,12 @@ impl Image {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let (mut resizer, options) = build_resizer_filter(filter)?;
|
let (mut resizer, config) = build_resizer_filter(filter)?;
|
||||||
let x: DynamicImage = self.to_dyn();
|
let x: DynamicImage = self.to_dyn();
|
||||||
|
|
||||||
if let ResizeMode::FitExact = mode {
|
if let ResizeMode::FitExact = mode {
|
||||||
let mut dst = FImage::new(tw, th, PixelType::U8x3);
|
let mut dst = FImage::new(tw, th, PixelType::U8x3);
|
||||||
resizer.resize(&x, &mut dst, &options)?;
|
resizer.resize(&x, &mut dst, &config)?;
|
||||||
trans_info = trans_info
|
trans_info = trans_info
|
||||||
.with_height_scale(th as f32 / h0 as f32)
|
.with_height_scale(th as f32 / h0 as f32)
|
||||||
.with_width_scale(tw as f32 / w0 as f32);
|
.with_width_scale(tw as f32 / w0 as f32);
|
||||||
@ -362,7 +362,7 @@ impl Image {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
|
let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
|
||||||
resizer.resize(&x, &mut dst_cropped, &options)?;
|
resizer.resize(&x, &mut dst_cropped, &config)?;
|
||||||
|
|
||||||
Ok((Self::from_u8s(&dst.into_vec(), tw, th)?, trans_info))
|
Ok((Self::from_u8s(&dst.into_vec(), tw, th)?, trans_info))
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
|
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
|
||||||
mod engine;
|
mod engine;
|
||||||
|
mod engine_config;
|
||||||
mod hbb;
|
mod hbb;
|
||||||
mod image;
|
mod image;
|
||||||
mod instance_meta;
|
mod instance_meta;
|
||||||
mod keypoint;
|
mod keypoint;
|
||||||
mod mask;
|
mod mask;
|
||||||
|
mod model_config;
|
||||||
mod obb;
|
mod obb;
|
||||||
mod polygon;
|
mod polygon;
|
||||||
mod prob;
|
mod prob;
|
||||||
@ -20,11 +22,13 @@ pub(crate) mod onnx {
|
|||||||
|
|
||||||
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
|
#[cfg(any(feature = "ort-download-binaries", feature = "ort-load-dynamic"))]
|
||||||
pub use engine::*;
|
pub use engine::*;
|
||||||
|
pub use engine_config::EngineConfig;
|
||||||
pub use hbb::*;
|
pub use hbb::*;
|
||||||
pub use image::*;
|
pub use image::*;
|
||||||
pub use instance_meta::*;
|
pub use instance_meta::*;
|
||||||
pub use keypoint::*;
|
pub use keypoint::*;
|
||||||
pub use mask::*;
|
pub use mask::*;
|
||||||
|
pub use model_config::*;
|
||||||
pub use obb::*;
|
pub use obb::*;
|
||||||
pub use polygon::*;
|
pub use polygon::*;
|
||||||
pub use prob::*;
|
pub use prob::*;
|
||||||
|
243
src/inference/model_config.rs
Normal file
243
src/inference/model_config.rs
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
use aksr::Builder;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
impl_model_config_methods, impl_process_config_methods,
|
||||||
|
models::{SamKind, YOLOPredsFormat},
|
||||||
|
EngineConfig, ProcessorConfig, Scale, Task, Version,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// ModelConfig for building models and inference
|
||||||
|
#[derive(Builder, Debug, Clone)]
|
||||||
|
pub struct ModelConfig {
|
||||||
|
// Basics
|
||||||
|
pub name: &'static str,
|
||||||
|
pub version: Option<Version>,
|
||||||
|
pub task: Option<Task>,
|
||||||
|
pub scale: Option<Scale>,
|
||||||
|
|
||||||
|
// Engines
|
||||||
|
pub model: EngineConfig,
|
||||||
|
pub visual: EngineConfig,
|
||||||
|
pub textual: EngineConfig,
|
||||||
|
pub encoder: EngineConfig,
|
||||||
|
pub decoder: EngineConfig,
|
||||||
|
pub visual_encoder: EngineConfig,
|
||||||
|
pub textual_encoder: EngineConfig,
|
||||||
|
pub visual_decoder: EngineConfig,
|
||||||
|
pub textual_decoder: EngineConfig,
|
||||||
|
pub textual_decoder_merged: EngineConfig,
|
||||||
|
pub size_encoder: EngineConfig,
|
||||||
|
pub size_decoder: EngineConfig,
|
||||||
|
pub coord_encoder: EngineConfig,
|
||||||
|
pub coord_decoder: EngineConfig,
|
||||||
|
pub visual_projection: EngineConfig,
|
||||||
|
pub textual_projection: EngineConfig,
|
||||||
|
|
||||||
|
// Processor
|
||||||
|
pub processor: ProcessorConfig,
|
||||||
|
|
||||||
|
// Others
|
||||||
|
pub class_names: Option<Vec<String>>, // TODO: remove Option
|
||||||
|
pub keypoint_names: Option<Vec<String>>, // TODO: remove Option
|
||||||
|
pub text_names: Option<Vec<String>>, // TODO: remove Option
|
||||||
|
pub class_confs: Vec<f32>,
|
||||||
|
pub keypoint_confs: Vec<f32>,
|
||||||
|
pub text_confs: Vec<f32>,
|
||||||
|
pub apply_softmax: Option<bool>,
|
||||||
|
pub topk: Option<usize>,
|
||||||
|
#[args(aka = "nc")]
|
||||||
|
pub num_classes: Option<usize>,
|
||||||
|
#[args(aka = "nk")]
|
||||||
|
pub num_keypoints: Option<usize>,
|
||||||
|
#[args(aka = "nm")]
|
||||||
|
pub num_masks: Option<usize>,
|
||||||
|
pub iou: Option<f32>,
|
||||||
|
pub apply_nms: Option<bool>,
|
||||||
|
pub find_contours: bool,
|
||||||
|
pub yolo_preds_format: Option<YOLOPredsFormat>,
|
||||||
|
pub classes_excluded: Vec<usize>,
|
||||||
|
pub classes_retained: Vec<usize>,
|
||||||
|
pub min_width: Option<f32>,
|
||||||
|
pub min_height: Option<f32>,
|
||||||
|
pub db_unclip_ratio: Option<f32>,
|
||||||
|
pub db_binary_thresh: Option<f32>,
|
||||||
|
pub sam_kind: Option<SamKind>,
|
||||||
|
pub sam_low_res_mask: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ModelConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
class_names: None,
|
||||||
|
keypoint_names: None,
|
||||||
|
text_names: None,
|
||||||
|
class_confs: vec![0.25f32],
|
||||||
|
keypoint_confs: vec![0.3f32],
|
||||||
|
text_confs: vec![0.25f32],
|
||||||
|
apply_softmax: Some(false),
|
||||||
|
num_classes: None,
|
||||||
|
num_keypoints: None,
|
||||||
|
num_masks: None,
|
||||||
|
iou: None,
|
||||||
|
find_contours: false,
|
||||||
|
yolo_preds_format: None,
|
||||||
|
classes_excluded: vec![],
|
||||||
|
classes_retained: vec![],
|
||||||
|
apply_nms: None,
|
||||||
|
min_width: None,
|
||||||
|
min_height: None,
|
||||||
|
db_unclip_ratio: Some(1.5),
|
||||||
|
db_binary_thresh: Some(0.2),
|
||||||
|
sam_kind: None,
|
||||||
|
sam_low_res_mask: None,
|
||||||
|
topk: None,
|
||||||
|
model: Default::default(),
|
||||||
|
encoder: Default::default(),
|
||||||
|
decoder: Default::default(),
|
||||||
|
visual: Default::default(),
|
||||||
|
textual: Default::default(),
|
||||||
|
visual_encoder: Default::default(),
|
||||||
|
textual_encoder: Default::default(),
|
||||||
|
visual_decoder: Default::default(),
|
||||||
|
textual_decoder: Default::default(),
|
||||||
|
textual_decoder_merged: Default::default(),
|
||||||
|
processor: ProcessorConfig::default(),
|
||||||
|
size_encoder: Default::default(),
|
||||||
|
size_decoder: Default::default(),
|
||||||
|
coord_encoder: Default::default(),
|
||||||
|
coord_decoder: Default::default(),
|
||||||
|
visual_projection: Default::default(),
|
||||||
|
textual_projection: Default::default(),
|
||||||
|
version: None,
|
||||||
|
task: None,
|
||||||
|
scale: None,
|
||||||
|
name: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ModelConfig {
|
||||||
|
pub fn exclude_classes(mut self, xs: &[usize]) -> Self {
|
||||||
|
self.classes_retained.clear();
|
||||||
|
self.classes_excluded.extend_from_slice(xs);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn retain_classes(mut self, xs: &[usize]) -> Self {
|
||||||
|
self.classes_excluded.clear();
|
||||||
|
self.classes_retained.extend_from_slice(xs);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn commit(mut self) -> anyhow::Result<Self> {
|
||||||
|
fn try_commit(name: &str, mut m: EngineConfig) -> anyhow::Result<EngineConfig> {
|
||||||
|
if !m.file.is_empty() {
|
||||||
|
m = m.try_commit(name)?;
|
||||||
|
return Ok(m);
|
||||||
|
}
|
||||||
|
Ok(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
self.model = try_commit(self.name, self.model)?;
|
||||||
|
self.visual = try_commit(self.name, self.visual)?;
|
||||||
|
self.textual = try_commit(self.name, self.textual)?;
|
||||||
|
self.encoder = try_commit(self.name, self.encoder)?;
|
||||||
|
self.decoder = try_commit(self.name, self.decoder)?;
|
||||||
|
self.visual_encoder = try_commit(self.name, self.visual_encoder)?;
|
||||||
|
self.textual_encoder = try_commit(self.name, self.textual_encoder)?;
|
||||||
|
self.visual_decoder = try_commit(self.name, self.visual_decoder)?;
|
||||||
|
self.textual_decoder = try_commit(self.name, self.textual_decoder)?;
|
||||||
|
self.textual_decoder_merged = try_commit(self.name, self.textual_decoder_merged)?;
|
||||||
|
self.size_encoder = try_commit(self.name, self.size_encoder)?;
|
||||||
|
self.size_decoder = try_commit(self.name, self.size_decoder)?;
|
||||||
|
self.coord_encoder = try_commit(self.name, self.coord_encoder)?;
|
||||||
|
self.coord_decoder = try_commit(self.name, self.coord_decoder)?;
|
||||||
|
self.visual_projection = try_commit(self.name, self.visual_projection)?;
|
||||||
|
self.textual_projection = try_commit(self.name, self.textual_projection)?;
|
||||||
|
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_batch_size_all(mut self, batch_size: usize) -> Self {
|
||||||
|
self.visual = self.visual.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.textual = self.textual.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.model = self.model.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.encoder = self.encoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.decoder = self.decoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.visual_encoder = self.visual_encoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.textual_encoder = self.textual_encoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.visual_decoder = self.visual_decoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.textual_decoder = self.textual_decoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.textual_decoder_merged = self
|
||||||
|
.textual_decoder_merged
|
||||||
|
.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.size_encoder = self.size_encoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.size_decoder = self.size_decoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.coord_encoder = self.coord_encoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.coord_decoder = self.coord_decoder.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.visual_projection = self.visual_projection.with_ixx(0, 0, batch_size.into());
|
||||||
|
self.textual_projection = self.textual_projection.with_ixx(0, 0, batch_size.into());
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_device_all(mut self, device: crate::Device) -> Self {
|
||||||
|
self.visual = self.visual.with_device(device);
|
||||||
|
self.textual = self.textual.with_device(device);
|
||||||
|
self.model = self.model.with_device(device);
|
||||||
|
self.encoder = self.encoder.with_device(device);
|
||||||
|
self.decoder = self.decoder.with_device(device);
|
||||||
|
self.visual_encoder = self.visual_encoder.with_device(device);
|
||||||
|
self.textual_encoder = self.textual_encoder.with_device(device);
|
||||||
|
self.visual_decoder = self.visual_decoder.with_device(device);
|
||||||
|
self.textual_decoder = self.textual_decoder.with_device(device);
|
||||||
|
self.textual_decoder_merged = self.textual_decoder_merged.with_device(device);
|
||||||
|
self.size_encoder = self.size_encoder.with_device(device);
|
||||||
|
self.size_decoder = self.size_decoder.with_device(device);
|
||||||
|
self.coord_encoder = self.coord_encoder.with_device(device);
|
||||||
|
self.coord_decoder = self.coord_decoder.with_device(device);
|
||||||
|
self.visual_projection = self.visual_projection.with_device(device);
|
||||||
|
self.textual_projection = self.textual_projection.with_device(device);
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_dtype_all(mut self, dtype: crate::DType) -> Self {
|
||||||
|
self.visual = self.visual.with_dtype(dtype);
|
||||||
|
self.textual = self.textual.with_dtype(dtype);
|
||||||
|
self.model = self.model.with_dtype(dtype);
|
||||||
|
self.encoder = self.encoder.with_dtype(dtype);
|
||||||
|
self.decoder = self.decoder.with_dtype(dtype);
|
||||||
|
self.visual_encoder = self.visual_encoder.with_dtype(dtype);
|
||||||
|
self.textual_encoder = self.textual_encoder.with_dtype(dtype);
|
||||||
|
self.visual_decoder = self.visual_decoder.with_dtype(dtype);
|
||||||
|
self.textual_decoder = self.textual_decoder.with_dtype(dtype);
|
||||||
|
self.textual_decoder_merged = self.textual_decoder_merged.with_dtype(dtype);
|
||||||
|
self.size_encoder = self.size_encoder.with_dtype(dtype);
|
||||||
|
self.size_decoder = self.size_decoder.with_dtype(dtype);
|
||||||
|
self.coord_encoder = self.coord_encoder.with_dtype(dtype);
|
||||||
|
self.coord_decoder = self.coord_decoder.with_dtype(dtype);
|
||||||
|
self.visual_projection = self.visual_projection.with_dtype(dtype);
|
||||||
|
self.textual_projection = self.textual_projection.with_dtype(dtype);
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_model_config_methods!(ModelConfig, model);
|
||||||
|
impl_model_config_methods!(ModelConfig, visual);
|
||||||
|
impl_model_config_methods!(ModelConfig, textual);
|
||||||
|
impl_model_config_methods!(ModelConfig, encoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, decoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, visual_encoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, textual_encoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, visual_decoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, textual_decoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, textual_decoder_merged);
|
||||||
|
impl_model_config_methods!(ModelConfig, size_encoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, size_decoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, coord_encoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, coord_decoder);
|
||||||
|
impl_model_config_methods!(ModelConfig, visual_projection);
|
||||||
|
impl_model_config_methods!(ModelConfig, textual_projection);
|
||||||
|
impl_process_config_methods!(ModelConfig, processor);
|
@ -367,14 +367,14 @@ impl DataLoader {
|
|||||||
fn load_image_paths_from_folder(source: &str, exts: &[&str]) -> Result<Vec<PathBuf>> {
|
fn load_image_paths_from_folder(source: &str, exts: &[&str]) -> Result<Vec<PathBuf>> {
|
||||||
let source_path = Path::new(source);
|
let source_path = Path::new(source);
|
||||||
let mut paths: Vec<PathBuf> = Vec::new();
|
let mut paths: Vec<PathBuf> = Vec::new();
|
||||||
let options = MatchOptions {
|
let config = MatchOptions {
|
||||||
case_sensitive: false,
|
case_sensitive: false,
|
||||||
require_literal_separator: false,
|
require_literal_separator: false,
|
||||||
require_literal_leading_dot: false,
|
require_literal_leading_dot: false,
|
||||||
};
|
};
|
||||||
for ext in exts.iter() {
|
for ext in exts.iter() {
|
||||||
let pattern = source_path.join(format!("*.{}", ext));
|
let pattern = source_path.join(format!("*.{}", ext));
|
||||||
let paths_: Vec<PathBuf> = glob_with(pattern.to_str().unwrap(), options)?
|
let paths_: Vec<PathBuf> = glob_with(pattern.to_str().unwrap(), config)?
|
||||||
.filter_map(|entry| entry.ok())
|
.filter_map(|entry| entry.ok())
|
||||||
.collect();
|
.collect();
|
||||||
paths.extend(paths_);
|
paths.extend(paths_);
|
||||||
@ -393,12 +393,12 @@ impl DataLoader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn glob(pattern: &str, sort: bool, case_sensitive: bool) -> anyhow::Result<Vec<PathBuf>> {
|
fn glob(pattern: &str, sort: bool, case_sensitive: bool) -> anyhow::Result<Vec<PathBuf>> {
|
||||||
let options = MatchOptions {
|
let config = MatchOptions {
|
||||||
case_sensitive,
|
case_sensitive,
|
||||||
require_literal_separator: false,
|
require_literal_separator: false,
|
||||||
require_literal_leading_dot: false,
|
require_literal_leading_dot: false,
|
||||||
};
|
};
|
||||||
let mut paths: Vec<PathBuf> = glob_with(pattern, options)?
|
let mut paths: Vec<PathBuf> = glob_with(pattern, config)?
|
||||||
.filter_map(|entry| entry.ok())
|
.filter_map(|entry| entry.ok())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@ -479,7 +479,7 @@ impl DataLoader {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_batch_size(mut self, x: usize) -> Self {
|
pub fn with_batch_size_all(mut self, x: usize) -> Self {
|
||||||
self.batch_size = x;
|
self.batch_size = x;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,8 @@
|
|||||||
use crate::NAMES_IMAGENET_1K;
|
|
||||||
|
|
||||||
/// Model configuration for `BEiT`
|
/// Model configuration for `BEiT`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn beit() -> Self {
|
pub fn beit() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("beit")
|
.with_name("beit")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 224.into())
|
.with_model_ixx(0, 2, 224.into())
|
||||||
@ -13,7 +11,7 @@ impl crate::Options {
|
|||||||
.with_image_std(&[0.5, 0.5, 0.5])
|
.with_image_std(&[0.5, 0.5, 0.5])
|
||||||
.with_normalize(true)
|
.with_normalize(true)
|
||||||
.with_apply_softmax(true)
|
.with_apply_softmax(true)
|
||||||
.with_class_names(&NAMES_IMAGENET_1K)
|
.with_class_names(&crate::NAMES_IMAGENET_1K)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn beit_base() -> Self {
|
pub fn beit_base() -> Self {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/// Model configuration for `BEN2`
|
/// Model configuration for `BEN2`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn ben2_base() -> Self {
|
pub fn ben2_base() -> Self {
|
||||||
Self::rmbg().with_model_file("ben2-base.onnx")
|
Self::rmbg().with_model_file("ben2-base.onnx")
|
||||||
}
|
}
|
||||||
|
@ -1,34 +1,24 @@
|
|||||||
/// Model configuration for `BLIP`
|
/// Model configuration for `BLIP`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn blip() -> Self {
|
|
||||||
Self::default().with_model_name("blip").with_batch_size(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(clippy::excessive_precision)]
|
#[allow(clippy::excessive_precision)]
|
||||||
pub fn blip_visual() -> Self {
|
pub fn blip() -> Self {
|
||||||
Self::blip()
|
Self::default()
|
||||||
.with_model_kind(crate::Kind::Vision)
|
.with_name("blip")
|
||||||
.with_model_ixx(0, 2, 384.into())
|
.with_batch_size_all(1)
|
||||||
.with_model_ixx(0, 3, 384.into())
|
.with_visual_ixx(0, 1, 3.into())
|
||||||
|
.with_visual_ixx(0, 2, 384.into())
|
||||||
|
.with_visual_ixx(0, 3, 384.into())
|
||||||
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
|
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
|
||||||
.with_image_std(&[0.26862954, 0.26130258, 0.27577711])
|
.with_image_std(&[0.26862954, 0.26130258, 0.27577711])
|
||||||
.with_resize_filter("Bilinear")
|
|
||||||
.with_normalize(true)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn blip_textual() -> Self {
|
pub fn blip_v1_base_caption() -> Self {
|
||||||
Self::blip().with_model_kind(crate::Kind::Language)
|
Self::blip()
|
||||||
}
|
.with_version(1.into())
|
||||||
|
.with_visual_file("v1-base-caption-visual.onnx")
|
||||||
pub fn blip_v1_base_caption_visual() -> Self {
|
.with_textual_file("v1-base-caption-textual.onnx")
|
||||||
Self::blip_visual()
|
.with_tokenizer_file("blip/tokenizer.json")
|
||||||
.with_model_version(1.into())
|
.with_tokenizer_config_file("blip/tokenizer_config.json")
|
||||||
.with_model_file("v1-base-caption-visual.onnx")
|
.with_special_tokens_map_file("blip/special_tokens_map.json")
|
||||||
}
|
|
||||||
|
|
||||||
pub fn blip_v1_base_caption_textual() -> Self {
|
|
||||||
Self::blip_textual()
|
|
||||||
.with_model_version(1.into())
|
|
||||||
.with_model_file("v1-base-caption-textual.onnx")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,26 +2,34 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::{s, Axis};
|
use ndarray::{s, Axis};
|
||||||
|
|
||||||
use crate::{
|
use crate::{elapsed, Engine, Image, LogitsSampler, ModelConfig, Processor, Ts, Xs, X, Y};
|
||||||
elapsed,
|
|
||||||
models::{BaseModelTextual, BaseModelVisual},
|
|
||||||
Image, LogitsSampler, Options, Ts, Xs, X, Y,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct Blip {
|
pub struct Blip {
|
||||||
visual: BaseModelVisual,
|
visual: Engine,
|
||||||
textual: BaseModelTextual,
|
textual: Engine,
|
||||||
ts: Ts,
|
batch: usize,
|
||||||
|
height: usize,
|
||||||
|
width: usize,
|
||||||
|
processor: Processor,
|
||||||
max_length: usize,
|
max_length: usize,
|
||||||
eos_token_id: u32,
|
eos_token_id: u32,
|
||||||
|
ts: Ts,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Blip {
|
impl Blip {
|
||||||
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let visual = BaseModelVisual::new(options_visual)?;
|
let visual = Engine::try_from_config(&config.visual)?;
|
||||||
let textual = BaseModelTextual::new(options_textual)?;
|
let textual = Engine::try_from_config(&config.textual)?;
|
||||||
let ts = Ts::merge(&[visual.engine().ts(), textual.engine().ts()]);
|
let (batch, height, width) = (
|
||||||
|
visual.batch().opt(),
|
||||||
|
visual.try_height().unwrap_or(&384.into()).opt(),
|
||||||
|
visual.try_width().unwrap_or(&384.into()).opt(),
|
||||||
|
);
|
||||||
|
let ts = Ts::merge(&[visual.ts(), textual.ts()]);
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
let max_length = 512;
|
let max_length = 512;
|
||||||
let eos_token_id = 102;
|
let eos_token_id = 102;
|
||||||
|
|
||||||
@ -31,17 +39,24 @@ impl Blip {
|
|||||||
ts,
|
ts,
|
||||||
max_length,
|
max_length,
|
||||||
eos_token_id,
|
eos_token_id,
|
||||||
|
batch,
|
||||||
|
height,
|
||||||
|
width,
|
||||||
|
processor,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
|
pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
|
||||||
self.visual.encode(xs)
|
let ys = self.processor.process_images(xs)?;
|
||||||
|
self.batch = xs.len(); // update
|
||||||
|
let ys = self.visual.run(ys.into())?;
|
||||||
|
|
||||||
|
Ok(ys[0].to_owned())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_texts(&mut self, text: Option<&str>) -> Result<Vec<Vec<f32>>> {
|
pub fn encode_texts(&mut self, text: Option<&str>) -> Result<Vec<Vec<f32>>> {
|
||||||
let input_ids = self
|
let input_ids = self
|
||||||
.textual
|
.processor
|
||||||
.processor()
|
|
||||||
.encode_text_ids(text.unwrap_or_default(), false)?;
|
.encode_text_ids(text.unwrap_or_default(), false)?;
|
||||||
Ok(vec![input_ids.clone(); self.batch()])
|
Ok(vec![input_ids.clone(); self.batch()])
|
||||||
}
|
}
|
||||||
@ -70,11 +85,11 @@ impl Blip {
|
|||||||
let input_ids_attn_mask = X::ones(input_ids_nd.dims());
|
let input_ids_attn_mask = X::ones(input_ids_nd.dims());
|
||||||
|
|
||||||
// decode
|
// decode
|
||||||
let outputs = self.textual.inference(Xs::from(vec![
|
let outputs = self.textual.run(Xs::from(vec![
|
||||||
input_ids_nd,
|
input_ids_nd,
|
||||||
input_ids_attn_mask,
|
input_ids_attn_mask,
|
||||||
image_embeds.clone(),
|
image_embeds.clone(),
|
||||||
X::ones(&[self.visual().batch(), image_embeds.dims()[1]]), // image_embeds_attn_mask
|
X::ones(&[self.batch(), image_embeds.dims()[1]]),
|
||||||
]))?;
|
]))?;
|
||||||
|
|
||||||
// decode each token for each batch
|
// decode each token for each batch
|
||||||
@ -102,7 +117,7 @@ impl Blip {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// batch decode
|
// batch decode
|
||||||
let texts = self.textual.processor().decode_tokens_batch(
|
let texts = self.processor.decode_tokens_batch(
|
||||||
&token_ids
|
&token_ids
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|v| v.into_iter().map(|x| x as u32).collect::<Vec<_>>())
|
.map(|v| v.into_iter().map(|x| x as u32).collect::<Vec<_>>())
|
||||||
@ -114,7 +129,6 @@ impl Blip {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|x| Y::default().with_texts(&[&x]))
|
.map(|x| Y::default().with_texts(&[&x]))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
// .into();
|
|
||||||
|
|
||||||
Ok(ys)
|
Ok(ys)
|
||||||
}
|
}
|
||||||
@ -122,8 +136,4 @@ impl Blip {
|
|||||||
pub fn summary(&mut self) {
|
pub fn summary(&mut self) {
|
||||||
self.ts.summary();
|
self.ts.summary();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn batch(&self) -> usize {
|
|
||||||
self.visual.batch() as _
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,71 +1,57 @@
|
|||||||
use crate::Kind;
|
|
||||||
|
|
||||||
/// Model configuration for `CLIP`
|
/// Model configuration for `CLIP`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn clip() -> Self {
|
pub fn clip() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("clip")
|
.with_name("clip")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_batch_size_all(1)
|
||||||
}
|
.with_visual_ixx(0, 1, 3.into())
|
||||||
|
.with_visual_ixx(0, 2, 224.into())
|
||||||
pub fn clip_visual() -> Self {
|
.with_visual_ixx(0, 3, 224.into())
|
||||||
Self::clip()
|
|
||||||
.with_model_kind(Kind::Vision)
|
|
||||||
.with_model_ixx(0, 2, 224.into())
|
|
||||||
.with_model_ixx(0, 3, 224.into())
|
|
||||||
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
|
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
|
||||||
.with_image_std(&[0.26862954, 0.2613026, 0.2757771])
|
.with_image_std(&[0.26862954, 0.2613026, 0.2757771])
|
||||||
}
|
|
||||||
|
|
||||||
pub fn clip_textual() -> Self {
|
|
||||||
Self::clip()
|
|
||||||
.with_model_kind(Kind::Language)
|
|
||||||
.with_model_max_length(77)
|
.with_model_max_length(77)
|
||||||
|
.with_tokenizer_file("clip/tokenizer.json")
|
||||||
|
.with_tokenizer_config_file("clip/tokenizer_config.json")
|
||||||
|
.with_special_tokens_map_file("clip/special_tokens_map.json")
|
||||||
|
.with_config_file("clip/config.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clip_vit_b16_visual() -> Self {
|
pub fn clip_vit_b16() -> Self {
|
||||||
Self::clip_visual().with_model_file("vit-b16-visual.onnx")
|
Self::clip()
|
||||||
|
.with_visual_file("vit-b16-visual.onnx")
|
||||||
|
.with_textual_file("vit-b16-textual.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clip_vit_b16_textual() -> Self {
|
pub fn clip_vit_b32() -> Self {
|
||||||
Self::clip_textual().with_model_file("vit-b16-textual.onnx")
|
Self::clip()
|
||||||
|
.with_visual_file("vit-b32-visual.onnx")
|
||||||
|
.with_textual_file("vit-b32-textual.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clip_vit_b32_visual() -> Self {
|
pub fn clip_vit_l14() -> Self {
|
||||||
Self::clip_visual().with_model_file("vit-b32-visual.onnx")
|
Self::clip()
|
||||||
|
.with_visual_file("vit-l14-visual.onnx")
|
||||||
|
.with_textual_file("vit-l14-textual.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clip_vit_b32_textual() -> Self {
|
pub fn jina_clip() -> Self {
|
||||||
Self::clip_textual().with_model_file("vit-b32-textual.onnx")
|
Self::default()
|
||||||
}
|
.with_name("jina-clip-v1")
|
||||||
|
.with_batch_size_all(1)
|
||||||
pub fn clip_vit_l14_visual() -> Self {
|
.with_visual_ixx(0, 1, 3.into())
|
||||||
Self::clip_visual().with_model_file("vit-l14-visual.onnx")
|
.with_visual_ixx(0, 2, 224.into())
|
||||||
}
|
.with_visual_ixx(0, 3, 224.into())
|
||||||
|
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
|
||||||
pub fn clip_vit_l14_textual() -> Self {
|
.with_image_std(&[0.26862954, 0.2613026, 0.2757771])
|
||||||
Self::clip_textual().with_model_file("vit-l14-textual.onnx")
|
.with_tokenizer_file("jina-clip-v1/tokenizer.json")
|
||||||
|
.with_tokenizer_config_file("jina-clip-v1/tokenizer_config.json")
|
||||||
|
.with_special_tokens_map_file("jina-clip-v1/special_tokens_map.json")
|
||||||
|
.with_config_file("jina-clip-v1/config.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn jina_clip_v1() -> Self {
|
pub fn jina_clip_v1() -> Self {
|
||||||
Self::default()
|
Self::jina_clip()
|
||||||
.with_model_name("jina-clip-v1")
|
.with_visual_file("visual.onnx")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_textual_file("textual.onnx")
|
||||||
}
|
|
||||||
|
|
||||||
pub fn jina_clip_v1_visual() -> Self {
|
|
||||||
Self::jina_clip_v1()
|
|
||||||
.with_model_kind(Kind::Vision)
|
|
||||||
.with_model_ixx(0, 2, 224.into())
|
|
||||||
.with_model_ixx(0, 3, 224.into())
|
|
||||||
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
|
|
||||||
.with_image_std(&[0.26862954, 0.2613026, 0.2757771])
|
|
||||||
.with_model_file("visual.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn jina_clip_v1_textual() -> Self {
|
|
||||||
Self::jina_clip_v1()
|
|
||||||
.with_model_kind(Kind::Language)
|
|
||||||
.with_model_file("textual.onnx")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,11 +2,12 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::Array2;
|
use ndarray::Array2;
|
||||||
|
|
||||||
use crate::{elapsed, Engine, Image, Options, Processor, Ts, Xs, X};
|
use crate::{elapsed, Engine, Image, ModelConfig, Processor, Ts, X};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct ClipVisual {
|
pub struct Clip {
|
||||||
engine: Engine,
|
visual: Engine,
|
||||||
|
textual: Engine,
|
||||||
height: usize,
|
height: usize,
|
||||||
width: usize,
|
width: usize,
|
||||||
batch: usize,
|
batch: usize,
|
||||||
@ -14,22 +15,23 @@ pub struct ClipVisual {
|
|||||||
ts: Ts,
|
ts: Ts,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ClipVisual {
|
impl Clip {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let visual = Engine::try_from_config(&config.visual)?;
|
||||||
let (batch, height, width, ts) = (
|
let textual = Engine::try_from_config(&config.textual)?;
|
||||||
engine.batch().opt(),
|
let (batch, height, width) = (
|
||||||
engine.try_height().unwrap_or(&224.into()).opt(),
|
visual.batch().opt(),
|
||||||
engine.try_width().unwrap_or(&224.into()).opt(),
|
visual.try_height().unwrap_or(&224.into()).opt(),
|
||||||
engine.ts.clone(),
|
visual.try_width().unwrap_or(&224.into()).opt(),
|
||||||
);
|
);
|
||||||
let processor = options
|
let ts = Ts::merge(&[visual.ts(), textual.ts()]);
|
||||||
.to_processor()?
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
textual,
|
||||||
|
visual,
|
||||||
height,
|
height,
|
||||||
width,
|
width,
|
||||||
batch,
|
batch,
|
||||||
@ -38,111 +40,39 @@ impl ClipVisual {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn preprocess(&mut self, xs: &[Image]) -> Result<Xs> {
|
|
||||||
let x = self.processor.process_images(xs)?;
|
|
||||||
|
|
||||||
Ok(x.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
|
|
||||||
self.engine.run(xs)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
|
pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
|
||||||
let xs = elapsed!("visual-preprocess", self.ts, { self.preprocess(xs)? });
|
let xs = elapsed!("visual-preprocess", self.ts, {
|
||||||
let xs = elapsed!("visual-inference", self.ts, { self.inference(xs)? });
|
self.processor.process_images(xs)?
|
||||||
|
});
|
||||||
|
let xs = elapsed!("visual-inference", self.ts, { self.visual.run(xs.into())? });
|
||||||
let x = elapsed!("visual-postprocess", self.ts, { xs[0].to_owned() });
|
let x = elapsed!("visual-postprocess", self.ts, { xs[0].to_owned() });
|
||||||
|
|
||||||
Ok(x)
|
Ok(x)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
|
||||||
pub struct ClipTextual {
|
|
||||||
engine: Engine,
|
|
||||||
batch: usize,
|
|
||||||
processor: Processor,
|
|
||||||
ts: Ts,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ClipTextual {
|
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
|
||||||
let engine = options.to_engine()?;
|
|
||||||
let (batch, ts) = (engine.batch().opt(), engine.ts.clone());
|
|
||||||
let processor = options.to_processor()?;
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
engine,
|
|
||||||
batch,
|
|
||||||
processor,
|
|
||||||
ts,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn preprocess(&self, xs: &[&str]) -> Result<Xs> {
|
|
||||||
let encodings: Vec<f32> = self
|
|
||||||
.processor
|
|
||||||
.encode_texts_ids(xs, false)? // skip_special_tokens
|
|
||||||
.into_iter()
|
|
||||||
.flatten()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let x: X = Array2::from_shape_vec((xs.len(), encodings.len() / xs.len()), encodings)?
|
|
||||||
.into_dyn()
|
|
||||||
.into();
|
|
||||||
|
|
||||||
Ok(x.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
|
|
||||||
self.engine.run(xs)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_texts(&mut self, xs: &[&str]) -> Result<X> {
|
pub fn encode_texts(&mut self, xs: &[&str]) -> Result<X> {
|
||||||
let xs = elapsed!("textual-preprocess", self.ts, { self.preprocess(xs)? });
|
let xs = elapsed!("textual-preprocess", self.ts, {
|
||||||
let xs = elapsed!("textual-inference", self.ts, { self.inference(xs)? });
|
let encodings: Vec<f32> = self
|
||||||
|
.processor
|
||||||
|
.encode_texts_ids(xs, false)?
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let x: X = Array2::from_shape_vec((xs.len(), encodings.len() / xs.len()), encodings)?
|
||||||
|
.into_dyn()
|
||||||
|
.into();
|
||||||
|
x
|
||||||
|
});
|
||||||
|
let xs = elapsed!("textual-inference", self.ts, {
|
||||||
|
self.textual.run(xs.into())?
|
||||||
|
});
|
||||||
let x = elapsed!("textual-postprocess", self.ts, { xs[0].to_owned() });
|
let x = elapsed!("textual-postprocess", self.ts, { xs[0].to_owned() });
|
||||||
|
|
||||||
Ok(x)
|
Ok(x)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
|
||||||
pub struct Clip {
|
|
||||||
textual: ClipTextual,
|
|
||||||
visual: ClipVisual,
|
|
||||||
ts: Ts,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Clip {
|
|
||||||
pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
|
|
||||||
let visual = ClipVisual::new(options_visual)?;
|
|
||||||
let textual = ClipTextual::new(options_textual)?;
|
|
||||||
// let ts = Ts::merge(&[visual.engine().ts(), textual.engine().ts()]);
|
|
||||||
let ts = Ts::default();
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
textual,
|
|
||||||
visual,
|
|
||||||
ts,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
|
|
||||||
let x = elapsed!("encode_images", self.ts, { self.visual.encode_images(xs)? });
|
|
||||||
Ok(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_texts(&mut self, xs: &[&str]) -> Result<X> {
|
|
||||||
let x = elapsed!("encode_texts", self.ts, { self.textual.encode_texts(xs)? });
|
|
||||||
Ok(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn summary(&mut self) {
|
pub fn summary(&mut self) {
|
||||||
// self.ts.clear();
|
|
||||||
// self.ts = Ts::merge(&[&self.ts, self.visual.ts(), self.textual.ts()]);
|
|
||||||
self.ts.summary();
|
self.ts.summary();
|
||||||
self.visual.ts().summary();
|
|
||||||
self.textual.ts().summary();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use crate::NAMES_IMAGENET_1K;
|
use crate::NAMES_IMAGENET_1K;
|
||||||
|
|
||||||
/// Model configuration for `ConvNeXt`
|
/// Model configuration for `ConvNeXt`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn convnext() -> Self {
|
pub fn convnext() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("convnext")
|
.with_name("convnext")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 224.into())
|
.with_model_ixx(0, 2, 224.into())
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/// Model configuration for `d_fine`
|
/// Model configuration for `d_fine`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn d_fine() -> Self {
|
pub fn d_fine() -> Self {
|
||||||
Self::rtdetr().with_model_name("d-fine")
|
Self::rtdetr().with_name("d-fine")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn d_fine_n_coco() -> Self {
|
pub fn d_fine_n_coco() -> Self {
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/// Model configuration for [DB](https://github.com/MhLiao/DB) and [PaddleOCR-Det](https://github.com/PaddlePaddle/PaddleOCR)
|
/// Model configuration for [DB](https://github.com/MhLiao/DB) and [PaddleOCR-Det](https://github.com/PaddlePaddle/PaddleOCR)
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn db() -> Self {
|
pub fn db() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("db")
|
.with_name("db")
|
||||||
.with_model_ixx(0, 0, (1, 1, 8).into())
|
.with_model_ixx(0, 0, (1, 1, 8).into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, (608, 960, 1600).into())
|
.with_model_ixx(0, 2, (608, 960, 1600).into())
|
||||||
@ -11,7 +11,7 @@ impl crate::Options {
|
|||||||
.with_normalize(true)
|
.with_normalize(true)
|
||||||
.with_image_mean(&[0.485, 0.456, 0.406])
|
.with_image_mean(&[0.485, 0.456, 0.406])
|
||||||
.with_image_std(&[0.229, 0.224, 0.225])
|
.with_image_std(&[0.229, 0.224, 0.225])
|
||||||
.with_binary_thresh(0.2)
|
.with_db_binary_thresh(0.2)
|
||||||
.with_class_confs(&[0.35])
|
.with_class_confs(&[0.35])
|
||||||
.with_min_width(5.0)
|
.with_min_width(5.0)
|
||||||
.with_min_height(12.0)
|
.with_min_height(12.0)
|
||||||
|
@ -4,7 +4,8 @@ use ndarray::Axis;
|
|||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
elapsed, DynConf, Engine, Hbb, Image, Mask, Obb, Ops, Options, Polygon, Processor, Ts, Xs, Y,
|
elapsed, DynConf, Engine, Hbb, Image, Mask, ModelConfig, Obb, Ops, Polygon, Processor, Ts, Xs,
|
||||||
|
Y,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
@ -24,8 +25,8 @@ pub struct DB {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl DB {
|
impl DB {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let (batch, height, width, ts, spec) = (
|
let (batch, height, width, ts, spec) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
engine.try_height().unwrap_or(&960.into()).opt(),
|
engine.try_height().unwrap_or(&960.into()).opt(),
|
||||||
@ -33,15 +34,14 @@ impl DB {
|
|||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
engine.spec().to_owned(),
|
engine.spec().to_owned(),
|
||||||
);
|
);
|
||||||
let processor = options
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
let confs = DynConf::new(options.class_confs(), 1);
|
let confs = DynConf::new(config.class_confs(), 1);
|
||||||
let binary_thresh = options.binary_thresh().unwrap_or(0.2);
|
let binary_thresh = config.db_binary_thresh().unwrap_or(0.2);
|
||||||
let unclip_ratio = options.unclip_ratio().unwrap_or(1.5);
|
let unclip_ratio = config.db_unclip_ratio().unwrap_or(1.5);
|
||||||
let min_width = options.min_width().unwrap_or(12.0);
|
let min_width = config.min_width().unwrap_or(12.0);
|
||||||
let min_height = options.min_height().unwrap_or(5.0);
|
let min_height = config.min_height().unwrap_or(5.0);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/// Model configuration for `DEIM`
|
/// Model configuration for `DEIM`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn deim() -> Self {
|
pub fn deim() -> Self {
|
||||||
Self::d_fine().with_model_name("deim")
|
Self::d_fine().with_name("deim")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deim_dfine_s_coco() -> Self {
|
pub fn deim_dfine_s_coco() -> Self {
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use crate::NAMES_IMAGENET_1K;
|
use crate::NAMES_IMAGENET_1K;
|
||||||
|
|
||||||
/// Model configuration for `DeiT`
|
/// Model configuration for `DeiT`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn deit() -> Self {
|
pub fn deit() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("deit")
|
.with_name("deit")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 224.into())
|
.with_model_ixx(0, 2, 224.into())
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/// Model configuration for `DepthAnything`
|
/// Model configuration for `DepthAnything`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn depth_anything() -> Self {
|
pub fn depth_anything() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("depth-anything")
|
.with_name("depth-anything")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, (384, 518, 1024).into())
|
.with_model_ixx(0, 2, (384, 518, 1024).into())
|
||||||
@ -14,26 +14,26 @@ impl crate::Options {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn depth_anything_s() -> Self {
|
pub fn depth_anything_s() -> Self {
|
||||||
Self::depth_anything().with_model_scale(crate::Scale::S)
|
Self::depth_anything().with_scale(crate::Scale::S)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn depth_anything_v1() -> Self {
|
pub fn depth_anything_v1() -> Self {
|
||||||
Self::depth_anything().with_model_version(1.into())
|
Self::depth_anything().with_version(1.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn depth_anything_v2() -> Self {
|
pub fn depth_anything_v2() -> Self {
|
||||||
Self::depth_anything().with_model_version(2.into())
|
Self::depth_anything().with_version(2.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn depth_anything_v1_small() -> Self {
|
pub fn depth_anything_v1_small() -> Self {
|
||||||
Self::depth_anything_v1()
|
Self::depth_anything_v1()
|
||||||
.with_model_scale(crate::Scale::S)
|
.with_scale(crate::Scale::S)
|
||||||
.with_model_file("v1-s.onnx")
|
.with_model_file("v1-s.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn depth_anything_v2_small() -> Self {
|
pub fn depth_anything_v2_small() -> Self {
|
||||||
Self::depth_anything_v2()
|
Self::depth_anything_v2()
|
||||||
.with_model_scale(crate::Scale::S)
|
.with_scale(crate::Scale::S)
|
||||||
.with_model_file("v2-s.onnx")
|
.with_model_file("v2-s.onnx")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use aksr::Builder;
|
use aksr::Builder;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
|
use crate::{elapsed, Engine, Image, Mask, ModelConfig, Ops, Processor, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct DepthAnything {
|
pub struct DepthAnything {
|
||||||
@ -15,8 +15,8 @@ pub struct DepthAnything {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl DepthAnything {
|
impl DepthAnything {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
|
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
@ -25,9 +25,7 @@ impl DepthAnything {
|
|||||||
engine.try_width().unwrap_or(&518.into()).opt(),
|
engine.try_width().unwrap_or(&518.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
let processor = options
|
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/// Model configuration for `DepthPro`
|
/// Model configuration for `DepthPro`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn depth_pro() -> Self {
|
pub fn depth_pro() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("depth-pro")
|
.with_name("depth-pro")
|
||||||
.with_model_ixx(0, 0, 1.into()) // batch. Note: now only support batch_size = 1
|
.with_model_ixx(0, 0, 1.into()) // batch. Note: now only support batch_size = 1
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 1536.into())
|
.with_model_ixx(0, 2, 1536.into())
|
||||||
@ -12,16 +12,4 @@ impl crate::Options {
|
|||||||
.with_resize_mode(crate::ResizeMode::FitExact)
|
.with_resize_mode(crate::ResizeMode::FitExact)
|
||||||
.with_normalize(true)
|
.with_normalize(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn depth_pro_q4f16() -> Self {
|
|
||||||
// Self::depth_pro().with_model_file("q4f16.onnx")
|
|
||||||
// }
|
|
||||||
|
|
||||||
// pub fn depth_pro_fp16() -> Self {
|
|
||||||
// Self::depth_pro().with_model_file("fp16.onnx")
|
|
||||||
// }
|
|
||||||
|
|
||||||
// pub fn depth_pro_bnb4() -> Self {
|
|
||||||
// Self::depth_pro().with_model_file("bnb4.onnx")
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::Axis;
|
use ndarray::Axis;
|
||||||
|
|
||||||
use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
|
use crate::{elapsed, Engine, Image, Mask, ModelConfig, Ops, Processor, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct DepthPro {
|
pub struct DepthPro {
|
||||||
@ -16,8 +16,8 @@ pub struct DepthPro {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl DepthPro {
|
impl DepthPro {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -25,8 +25,7 @@ impl DepthPro {
|
|||||||
engine.try_width().unwrap_or(&512.into()).opt(),
|
engine.try_width().unwrap_or(&512.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
let processor = options
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/// Model configuration for `DINOv2`
|
/// Model configuration for `DINOv2`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn dinov2() -> Self {
|
pub fn dinov2() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("dinov2")
|
.with_name("dinov2")
|
||||||
.with_model_ixx(0, 0, (1, 1, 8).into())
|
.with_model_ixx(0, 0, (1, 1, 8).into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 224.into())
|
.with_model_ixx(0, 2, 224.into())
|
||||||
@ -16,13 +16,13 @@ impl crate::Options {
|
|||||||
|
|
||||||
pub fn dinov2_small() -> Self {
|
pub fn dinov2_small() -> Self {
|
||||||
Self::dinov2()
|
Self::dinov2()
|
||||||
.with_model_scale(crate::Scale::S)
|
.with_scale(crate::Scale::S)
|
||||||
.with_model_file("s.onnx")
|
.with_model_file("s.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dinov2_base() -> Self {
|
pub fn dinov2_base() -> Self {
|
||||||
Self::dinov2()
|
Self::dinov2()
|
||||||
.with_model_scale(crate::Scale::B)
|
.with_scale(crate::Scale::B)
|
||||||
.with_model_file("b.onnx")
|
.with_model_file("b.onnx")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use aksr::Builder;
|
use aksr::Builder;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
use crate::{elapsed, Engine, Image, Options, Processor, Scale, Ts, Xs, X};
|
use crate::{elapsed, Engine, Image, ModelConfig, Processor, Scale, Ts, Xs, X};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct DINOv2 {
|
pub struct DINOv2 {
|
||||||
@ -15,15 +15,15 @@ pub struct DINOv2 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl DINOv2 {
|
impl DINOv2 {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
engine.try_height().unwrap_or(&384.into()).opt(),
|
engine.try_height().unwrap_or(&384.into()).opt(),
|
||||||
engine.try_width().unwrap_or(&384.into()).opt(),
|
engine.try_width().unwrap_or(&384.into()).opt(),
|
||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
);
|
);
|
||||||
let dim = match options.model_scale() {
|
let dim = match &config.scale {
|
||||||
Some(Scale::S) => 384,
|
Some(Scale::S) => 384,
|
||||||
Some(Scale::B) => 768,
|
Some(Scale::B) => 768,
|
||||||
Some(Scale::L) => 1024,
|
Some(Scale::L) => 1024,
|
||||||
@ -31,8 +31,7 @@ impl DINOv2 {
|
|||||||
Some(x) => anyhow::bail!("Unsupported scale: {:?}", x),
|
Some(x) => anyhow::bail!("Unsupported scale: {:?}", x),
|
||||||
None => anyhow::bail!("No model scale specified"),
|
None => anyhow::bail!("No model scale specified"),
|
||||||
};
|
};
|
||||||
let processor = options
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/// Model configuration for [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://github.com/czczup/FAST)
|
/// Model configuration for [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://github.com/czczup/FAST)
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn fast() -> Self {
|
pub fn fast() -> Self {
|
||||||
Self::db()
|
Self::db()
|
||||||
.with_model_name("fast")
|
.with_name("fast")
|
||||||
.with_image_mean(&[0.798, 0.785, 0.772])
|
.with_image_mean(&[0.798, 0.785, 0.772])
|
||||||
.with_image_std(&[0.264, 0.2749, 0.287])
|
.with_image_std(&[0.264, 0.2749, 0.287])
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use crate::NAMES_IMAGENET_1K;
|
use crate::NAMES_IMAGENET_1K;
|
||||||
|
|
||||||
/// Model configuration for `FastViT`
|
/// Model configuration for `FastViT`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn fastvit() -> Self {
|
pub fn fastvit() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("fastvit")
|
.with_name("fastvit")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 224.into())
|
.with_model_ixx(0, 2, 224.into())
|
||||||
|
@ -1,59 +1,31 @@
|
|||||||
/// Model configuration for `Florence2`
|
/// Model configuration for `Florence2`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn florence2() -> Self {
|
pub fn florence2() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("florence2")
|
.with_name("florence2")
|
||||||
.with_batch_size(1)
|
.with_batch_size_all(1)
|
||||||
}
|
.with_visual_ixx(0, 1, 3.into())
|
||||||
|
.with_visual_ixx(0, 2, 768.into())
|
||||||
pub fn florence2_visual() -> Self {
|
.with_visual_ixx(0, 3, 768.into())
|
||||||
Self::florence2()
|
|
||||||
.with_model_kind(crate::Kind::Vision)
|
|
||||||
.with_model_ixx(0, 2, 768.into())
|
|
||||||
.with_model_ixx(0, 3, 768.into())
|
|
||||||
.with_image_mean(&[0.485, 0.456, 0.406])
|
.with_image_mean(&[0.485, 0.456, 0.406])
|
||||||
.with_image_std(&[0.229, 0.224, 0.225])
|
.with_image_std(&[0.229, 0.224, 0.225])
|
||||||
.with_resize_filter("Bilinear")
|
|
||||||
.with_normalize(true)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn florence2_textual() -> Self {
|
pub fn florence2_base() -> Self {
|
||||||
Self::florence2().with_model_kind(crate::Kind::Language)
|
Self::florence2()
|
||||||
|
.with_scale(crate::Scale::B)
|
||||||
|
.with_visual_file("base-vision-encoder.onnx")
|
||||||
|
.with_textual_file("base-embed-tokens.onnx")
|
||||||
|
.with_textual_encoder_file("base-encoder.onnx")
|
||||||
|
.with_textual_decoder_file("base-decoder.onnx")
|
||||||
|
.with_textual_decoder_merged_file("base-decoder-merged.onnx")
|
||||||
|
.with_tokenizer_file("florence2/tokenizer.json")
|
||||||
|
.with_config_file("florence2/config.json")
|
||||||
|
.with_special_tokens_map_file("florence2/special_tokens_map.json")
|
||||||
|
.with_tokenizer_config_file("florence2/tokenizer_config.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn florence2_visual_base() -> Self {
|
pub fn florence2_large() -> Self {
|
||||||
Self::florence2_visual().with_model_scale(crate::Scale::B)
|
todo!()
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_textual_base() -> Self {
|
|
||||||
Self::florence2_textual().with_model_scale(crate::Scale::B)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_visual_large() -> Self {
|
|
||||||
Self::florence2_visual().with_model_scale(crate::Scale::L)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_textual_large() -> Self {
|
|
||||||
Self::florence2_textual().with_model_scale(crate::Scale::L)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_visual_encoder_base() -> Self {
|
|
||||||
Self::florence2_visual_base().with_model_file("base-vision-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_textual_embed_base() -> Self {
|
|
||||||
Self::florence2_textual_base().with_model_file("base-embed-tokens.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_texual_encoder_base() -> Self {
|
|
||||||
Self::florence2_textual_base().with_model_file("base-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_texual_decoder_base() -> Self {
|
|
||||||
Self::florence2_textual_base().with_model_file("base-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn florence2_texual_decoder_merged_base() -> Self {
|
|
||||||
Self::florence2_textual_base().with_model_file("base-decoder-merged.onnx")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,51 +4,59 @@ use ndarray::{s, Axis};
|
|||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
elapsed,
|
elapsed, models::Quantizer, Engine, Hbb, Image, LogitsSampler, ModelConfig, Polygon, Processor,
|
||||||
models::{BaseModelTextual, BaseModelVisual, Quantizer},
|
Scale, Task, Ts, Xs, X, Y,
|
||||||
Hbb, Image, LogitsSampler, Options, Polygon, Scale, Task, Ts, Xs, X, Y,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct Florence2 {
|
pub struct Florence2 {
|
||||||
pub vision_encoder: BaseModelVisual,
|
pub vision_encoder: Engine,
|
||||||
pub text_embed: BaseModelTextual,
|
pub text_embed: Engine,
|
||||||
pub encoder: BaseModelTextual,
|
pub encoder: Engine,
|
||||||
pub decoder: BaseModelTextual,
|
pub decoder: Engine,
|
||||||
pub decoder_merged: BaseModelTextual,
|
pub decoder_merged: Engine,
|
||||||
ts: Ts,
|
ts: Ts,
|
||||||
quantizer: Quantizer,
|
quantizer: Quantizer,
|
||||||
max_length: usize,
|
max_length: usize,
|
||||||
eos_token_id: u32,
|
eos_token_id: u32,
|
||||||
decoder_start_token_id: u32,
|
decoder_start_token_id: u32,
|
||||||
n_kvs: usize,
|
n_kvs: usize,
|
||||||
|
height: usize,
|
||||||
|
width: usize,
|
||||||
|
batch: usize,
|
||||||
|
processor: Processor,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Florence2 {
|
impl Florence2 {
|
||||||
pub fn new(
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
options_vision_encoder: Options,
|
let vision_encoder = Engine::try_from_config(&config.visual)?;
|
||||||
options_text_embed: Options,
|
let text_embed = Engine::try_from_config(&config.textual)?;
|
||||||
options_encoder: Options,
|
let encoder = Engine::try_from_config(&config.textual_encoder)?;
|
||||||
options_decoder: Options,
|
let decoder = Engine::try_from_config(&config.textual_decoder)?;
|
||||||
options_decoder_merged: Options,
|
let decoder_merged = Engine::try_from_config(&config.textual_decoder_merged)?;
|
||||||
) -> Result<Self> {
|
|
||||||
let vision_encoder = BaseModelVisual::new(options_vision_encoder)?;
|
let (batch, height, width) = (
|
||||||
let text_embed = BaseModelTextual::new(options_text_embed)?;
|
vision_encoder.batch().opt(),
|
||||||
let encoder = BaseModelTextual::new(options_encoder)?;
|
vision_encoder.try_height().unwrap_or(&1024.into()).opt(),
|
||||||
let decoder = BaseModelTextual::new(options_decoder)?;
|
vision_encoder.try_width().unwrap_or(&1024.into()).opt(),
|
||||||
let decoder_merged = BaseModelTextual::new(options_decoder_merged)?;
|
);
|
||||||
|
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
|
|
||||||
let quantizer = Quantizer::default();
|
let quantizer = Quantizer::default();
|
||||||
let ts = Ts::merge(&[
|
let ts = Ts::merge(&[
|
||||||
vision_encoder.engine().ts(),
|
vision_encoder.ts(),
|
||||||
text_embed.engine().ts(),
|
text_embed.ts(),
|
||||||
encoder.engine().ts(),
|
encoder.ts(),
|
||||||
decoder.engine().ts(),
|
decoder.ts(),
|
||||||
decoder_merged.engine().ts(),
|
decoder_merged.ts(),
|
||||||
]);
|
]);
|
||||||
let max_length = 1024;
|
let max_length = 1024;
|
||||||
let eos_token_id = 2;
|
let eos_token_id = 2;
|
||||||
let decoder_start_token_id = 2;
|
let decoder_start_token_id = 2;
|
||||||
let n_kvs = match decoder.scale() {
|
let n_kvs = match config.scale {
|
||||||
Some(Scale::B) => 6,
|
Some(Scale::B) => 6,
|
||||||
Some(Scale::L) => 12,
|
Some(Scale::L) => 12,
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
@ -66,6 +74,10 @@ impl Florence2 {
|
|||||||
eos_token_id,
|
eos_token_id,
|
||||||
decoder_start_token_id,
|
decoder_start_token_id,
|
||||||
n_kvs,
|
n_kvs,
|
||||||
|
batch,
|
||||||
|
height,
|
||||||
|
width,
|
||||||
|
processor,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,12 +109,12 @@ impl Florence2 {
|
|||||||
.map(|im| {
|
.map(|im| {
|
||||||
let text = Self::process_task(task, im.height() as _, im.width() as _)
|
let text = Self::process_task(task, im.height() as _, im.width() as _)
|
||||||
.prompt_for_florence2()?;
|
.prompt_for_florence2()?;
|
||||||
let ids = self.text_embed.processor().encode_text_ids(&text, true)?;
|
let ids = self.processor.encode_text_ids(&text, true)?;
|
||||||
X::from(ids).insert_axis(0)
|
X::from(ids).insert_axis(0)
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
let x = X::concat(&xs, 0)?;
|
let x = X::concat(&xs, 0)?;
|
||||||
let xs = self.text_embed.inference(x.into())?;
|
let xs = self.text_embed.run(x.into())?;
|
||||||
let x = xs[0].to_owned();
|
let x = xs[0].to_owned();
|
||||||
|
|
||||||
Ok(x)
|
Ok(x)
|
||||||
@ -110,7 +122,10 @@ impl Florence2 {
|
|||||||
|
|
||||||
pub fn forward(&mut self, xs_visual: &[Image], x_textual: &Task) -> Result<Vec<Y>> {
|
pub fn forward(&mut self, xs_visual: &[Image], x_textual: &Task) -> Result<Vec<Y>> {
|
||||||
let visual_embeddings = elapsed!("visual-encode", self.ts, {
|
let visual_embeddings = elapsed!("visual-encode", self.ts, {
|
||||||
self.vision_encoder.encode(xs_visual)?
|
let xs = self.processor.process_images(xs_visual)?;
|
||||||
|
self.batch = xs_visual.len(); // update
|
||||||
|
let xs = self.vision_encoder.run(xs.into())?;
|
||||||
|
xs[0].to_owned()
|
||||||
});
|
});
|
||||||
|
|
||||||
let textual_embedding = elapsed!("textual-encode", self.ts, {
|
let textual_embedding = elapsed!("textual-encode", self.ts, {
|
||||||
@ -141,7 +156,7 @@ impl Florence2 {
|
|||||||
let attention_mask = X::ones(&[self.batch(), inputs_embeds.dims()[1]]);
|
let attention_mask = X::ones(&[self.batch(), inputs_embeds.dims()[1]]);
|
||||||
|
|
||||||
// encoder
|
// encoder
|
||||||
let last_hidden_state = self.encoder.inference(Xs::from(vec![
|
let last_hidden_state = self.encoder.run(Xs::from(vec![
|
||||||
attention_mask.clone(),
|
attention_mask.clone(),
|
||||||
inputs_embeds.clone(),
|
inputs_embeds.clone(),
|
||||||
]))?[0]
|
]))?[0]
|
||||||
@ -150,7 +165,7 @@ impl Florence2 {
|
|||||||
// decoder
|
// decoder
|
||||||
let inputs_embeds = inputs_embeds.slice(s![.., -1.., ..]);
|
let inputs_embeds = inputs_embeds.slice(s![.., -1.., ..]);
|
||||||
let inputs_embeds = X::from(inputs_embeds.to_owned().into_dyn());
|
let inputs_embeds = X::from(inputs_embeds.to_owned().into_dyn());
|
||||||
let mut decoder_outputs = self.decoder.inference(Xs::from(vec![
|
let mut decoder_outputs = self.decoder.run(Xs::from(vec![
|
||||||
attention_mask.clone(),
|
attention_mask.clone(),
|
||||||
last_hidden_state.clone(),
|
last_hidden_state.clone(),
|
||||||
inputs_embeds,
|
inputs_embeds,
|
||||||
@ -215,7 +230,7 @@ impl Florence2 {
|
|||||||
|
|
||||||
// decode
|
// decode
|
||||||
let next_tokens = X::from(last_tokens.clone()).insert_axis(1)?;
|
let next_tokens = X::from(last_tokens.clone()).insert_axis(1)?;
|
||||||
let inputs_embeds = &self.text_embed.inference(Xs::from(next_tokens))?[0].clone();
|
let inputs_embeds = &self.text_embed.run(Xs::from(next_tokens))?[0].clone();
|
||||||
let use_cache = X::ones(&[1]);
|
let use_cache = X::ones(&[1]);
|
||||||
let mut xs = vec![
|
let mut xs = vec![
|
||||||
attention_mask.clone(),
|
attention_mask.clone(),
|
||||||
@ -229,13 +244,13 @@ impl Florence2 {
|
|||||||
xs.push(encoder_kvs[i * 2 + 1].clone());
|
xs.push(encoder_kvs[i * 2 + 1].clone());
|
||||||
}
|
}
|
||||||
xs.push(use_cache);
|
xs.push(use_cache);
|
||||||
decoder_outputs = self.decoder_merged.inference(xs.into())?;
|
decoder_outputs = self.decoder_merged.run(xs.into())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// batch decode
|
// batch decode
|
||||||
let texts = self
|
let texts = self
|
||||||
.text_embed
|
// .text_embed
|
||||||
.processor()
|
.processor
|
||||||
.decode_tokens_batch(&token_ids, false)?;
|
.decode_tokens_batch(&token_ids, false)?;
|
||||||
|
|
||||||
Ok(texts)
|
Ok(texts)
|
||||||
@ -416,10 +431,6 @@ impl Florence2 {
|
|||||||
Ok(ys)
|
Ok(ys)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn batch(&self) -> usize {
|
|
||||||
self.vision_encoder.batch() as _
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn summary(&mut self) {
|
pub fn summary(&mut self) {
|
||||||
self.ts.summary();
|
self.ts.summary();
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
/// Model configuration for `GroundingDino`
|
/// Model configuration for `GroundingDino`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn grounding_dino() -> Self {
|
pub fn grounding_dino() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("grounding-dino")
|
.with_name("grounding-dino")
|
||||||
.with_model_kind(crate::Kind::VisionLanguage)
|
|
||||||
.with_model_ixx(0, 0, 1.into()) // TODO: current onnx model does not support bs > 1
|
.with_model_ixx(0, 0, 1.into()) // TODO: current onnx model does not support bs > 1
|
||||||
.with_model_ixx(0, 2, 800.into()) // TODO: matters
|
.with_model_ixx(0, 2, 800.into()) // TODO: matters
|
||||||
.with_model_ixx(0, 3, 1200.into()) // TODO: matters
|
.with_model_ixx(0, 3, 1200.into()) // TODO: matters
|
||||||
@ -11,9 +10,10 @@ impl crate::Options {
|
|||||||
.with_resize_filter("CatmullRom")
|
.with_resize_filter("CatmullRom")
|
||||||
.with_image_mean(&[0.485, 0.456, 0.406])
|
.with_image_mean(&[0.485, 0.456, 0.406])
|
||||||
.with_image_std(&[0.229, 0.224, 0.225])
|
.with_image_std(&[0.229, 0.224, 0.225])
|
||||||
.with_normalize(true)
|
.with_tokenizer_file("grounding-dino/tokenizer.json")
|
||||||
.with_class_confs(&[0.25])
|
.with_config_file("grounding-dino/config.json")
|
||||||
.with_text_confs(&[0.25])
|
.with_special_tokens_map_file("grounding-dino/special_tokens_map.json")
|
||||||
|
.with_tokenizer_config_file("grounding-dino/tokenizer_config.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn grounding_dino_tiny() -> Self {
|
pub fn grounding_dino_tiny() -> Self {
|
||||||
|
@ -4,7 +4,7 @@ use ndarray::{s, Array2, Axis};
|
|||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
|
||||||
use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
|
use crate::{elapsed, DynConf, Engine, Hbb, Image, ModelConfig, Processor, Ts, Xs, X, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct GroundingDINO {
|
pub struct GroundingDINO {
|
||||||
@ -24,8 +24,8 @@ pub struct GroundingDINO {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl GroundingDINO {
|
impl GroundingDINO {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -33,11 +33,8 @@ impl GroundingDINO {
|
|||||||
engine.try_width().unwrap_or(&1200.into()).opt(),
|
engine.try_width().unwrap_or(&1200.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
let processor = options
|
|
||||||
.to_processor()?
|
let class_names = config
|
||||||
.with_image_width(width as _)
|
|
||||||
.with_image_height(height as _);
|
|
||||||
let class_names = options
|
|
||||||
.text_names
|
.text_names
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.and_then(|v| {
|
.and_then(|v| {
|
||||||
@ -48,16 +45,20 @@ impl GroundingDINO {
|
|||||||
.collect();
|
.collect();
|
||||||
(!v.is_empty()).then_some(v)
|
(!v.is_empty()).then_some(v)
|
||||||
})
|
})
|
||||||
.ok_or_else(|| anyhow::anyhow!("No valid class names were provided in the options. Ensure the 'text_names' field is non-empty and contains valid class names."))?;
|
.ok_or_else(|| anyhow::anyhow!("No valid class names were provided in the config. Ensure the 'text_names' field is non-empty and contains valid class names."))?;
|
||||||
let text_prompt = class_names.iter().fold(String::new(), |mut acc, text| {
|
let text_prompt = class_names.iter().fold(String::new(), |mut acc, text| {
|
||||||
write!(&mut acc, "{}.", text).unwrap();
|
write!(&mut acc, "{}.", text).unwrap();
|
||||||
acc
|
acc
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let confs_visual = DynConf::new(config.class_confs(), class_names.len());
|
||||||
|
let confs_textual = DynConf::new(config.text_confs(), class_names.len());
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
let token_ids = processor.encode_text_ids(&text_prompt, true)?;
|
let token_ids = processor.encode_text_ids(&text_prompt, true)?;
|
||||||
let tokens = processor.encode_text_tokens(&text_prompt, true)?;
|
let tokens = processor.encode_text_tokens(&text_prompt, true)?;
|
||||||
let class_ids_map = Self::process_class_ids(&tokens);
|
let class_ids_map = Self::process_class_ids(&tokens);
|
||||||
let confs_visual = DynConf::new(options.class_confs(), class_names.len());
|
|
||||||
let confs_textual = DynConf::new(options.text_confs(), class_names.len());
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/// Model configuration for [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/abs/1707.03718)
|
/// Model configuration for [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/abs/1707.03718)
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn linknet() -> Self {
|
pub fn linknet() -> Self {
|
||||||
Self::fast()
|
Self::fast()
|
||||||
.with_model_name("linknet")
|
.with_name("linknet")
|
||||||
.with_image_mean(&[0.798, 0.785, 0.772])
|
.with_image_mean(&[0.798, 0.785, 0.772])
|
||||||
.with_image_std(&[0.264, 0.2749, 0.287])
|
.with_image_std(&[0.264, 0.2749, 0.287])
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use crate::NAMES_IMAGENET_1K;
|
use crate::NAMES_IMAGENET_1K;
|
||||||
|
|
||||||
/// Model configuration for `MobileOne`
|
/// Model configuration for `MobileOne`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn mobileone() -> Self {
|
pub fn mobileone() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("mobileone")
|
.with_name("mobileone")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 1, 3.into())
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 224.into())
|
.with_model_ixx(0, 2, 224.into())
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/// Model configuration for `MODNet`
|
/// Model configuration for `MODNet`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn modnet() -> Self {
|
pub fn modnet() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("modnet")
|
.with_name("modnet")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 2, (416, 512, 800).into())
|
.with_model_ixx(0, 2, (416, 512, 800).into())
|
||||||
.with_model_ixx(0, 3, (416, 512, 800).into())
|
.with_model_ixx(0, 3, (416, 512, 800).into())
|
||||||
|
@ -2,7 +2,7 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::Axis;
|
use ndarray::Axis;
|
||||||
|
|
||||||
use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
|
use crate::{elapsed, Engine, Image, Mask, ModelConfig, Ops, Processor, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct MODNet {
|
pub struct MODNet {
|
||||||
@ -16,8 +16,8 @@ pub struct MODNet {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MODNet {
|
impl MODNet {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -25,8 +25,7 @@ impl MODNet {
|
|||||||
engine.try_width().unwrap_or(&512.into()).opt(),
|
engine.try_width().unwrap_or(&512.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
let processor = options
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
|
@ -1,117 +1,47 @@
|
|||||||
/// Model configuration for `moondream2`
|
/// Model configuration for `moondream2`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn moondream2() -> Self {
|
pub fn moondream2() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("moondream2")
|
.with_name("moondream2")
|
||||||
.with_model_num_dry_run(0)
|
.with_visual_encoder_ixx(0, 0, (1, 3, 4).into()) // patch count
|
||||||
|
.with_image_mean(&[0.5, 0.5, 0.5])
|
||||||
|
.with_image_std(&[0.5, 0.5, 0.5])
|
||||||
|
.with_resize_mode(crate::ResizeMode::FitExact)
|
||||||
|
.with_resize_filter("catmullrom")
|
||||||
|
.with_visual_projection_ixx(0, 0, 1.into())
|
||||||
|
.with_textual_encoder_ixx(0, 0, 1.into())
|
||||||
|
.with_textual_decoder_ixx(0, 0, 1.into())
|
||||||
|
.with_size_encoder_ixx(0, 0, 1.into())
|
||||||
|
.with_size_decoder_ixx(0, 0, 1.into())
|
||||||
|
.with_coord_encoder_ixx(0, 0, 1.into())
|
||||||
|
.with_coord_decoder_ixx(0, 0, 1.into())
|
||||||
|
.with_tokenizer_file("moondream2/tokenizer.json")
|
||||||
|
.with_tokenizer_config_file("moondream2/tokenizer_config.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn moondream2_0_5b() -> Self {
|
pub fn moondream2_0_5b() -> Self {
|
||||||
Self::moondream2().with_model_scale(crate::Scale::Billion(0.5))
|
Self::moondream2()
|
||||||
|
.with_scale(crate::Scale::Billion(0.5))
|
||||||
|
.with_visual_encoder_file("0.5b-vision-encoder.onnx")
|
||||||
|
.with_visual_projection_file("0.5b-vision-projection.onnx")
|
||||||
|
.with_textual_decoder_file("0.5b-text-decoder.onnx")
|
||||||
|
.with_textual_encoder_file("0.5b-text-encoder.onnx")
|
||||||
|
.with_coord_encoder_file("0.5b-coord-encoder.onnx")
|
||||||
|
.with_coord_decoder_file("0.5b-coord-decoder.onnx")
|
||||||
|
.with_size_encoder_file("0.5b-size-encoder.onnx")
|
||||||
|
.with_size_decoder_file("0.5b-size-decoder.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn moondream2_0_5b_vision_encoder() -> Self {
|
pub fn moondream2_2b() -> Self {
|
||||||
Self::moondream2_0_5b()
|
Self::moondream2()
|
||||||
.with_model_ixx(0, 0, (1, 3, 4).into()) // patch count
|
.with_scale(crate::Scale::Billion(2.))
|
||||||
.with_model_kind(crate::Kind::Vision)
|
.with_visual_encoder_file("2b-vision-encoder.onnx")
|
||||||
.with_image_mean(&[0.5, 0.5, 0.5])
|
.with_visual_projection_file("2b-vision-projection.onnx")
|
||||||
.with_image_std(&[0.5, 0.5, 0.5])
|
.with_textual_decoder_file("2b-text-decoder.onnx")
|
||||||
.with_normalize(true)
|
.with_textual_encoder_file("2b-text-encoder.onnx")
|
||||||
.with_resize_mode(crate::ResizeMode::FitExact)
|
.with_coord_encoder_file("2b-coord-encoder.onnx")
|
||||||
.with_resize_filter("catmullrom")
|
.with_coord_decoder_file("2b-coord-decoder.onnx")
|
||||||
.with_model_file("0.5b-vision-encoder.onnx")
|
.with_size_encoder_file("2b-size-encoder.onnx")
|
||||||
}
|
.with_size_decoder_file("2b-size-decoder.onnx")
|
||||||
|
|
||||||
pub fn moondream2_0_5b_vision_projection() -> Self {
|
|
||||||
Self::moondream2_0_5b()
|
|
||||||
.with_batch_size(1)
|
|
||||||
.with_model_kind(crate::Kind::Vision)
|
|
||||||
.with_model_file("0.5b-vision-projection.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_0_5b_text_decoder() -> Self {
|
|
||||||
Self::moondream2_0_5b()
|
|
||||||
.with_batch_size(1)
|
|
||||||
.with_model_kind(crate::Kind::Language)
|
|
||||||
.with_model_file("0.5b-text-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_0_5b_text_encoder() -> Self {
|
|
||||||
Self::moondream2_0_5b()
|
|
||||||
.with_batch_size(1)
|
|
||||||
.with_model_kind(crate::Kind::Language)
|
|
||||||
.with_model_file("0.5b-text-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_0_5b_coord_encoder() -> Self {
|
|
||||||
Self::moondream2_0_5b()
|
|
||||||
.with_batch_size(1)
|
|
||||||
.with_model_file("0.5b-coord-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_0_5b_coord_decoder() -> Self {
|
|
||||||
Self::moondream2_0_5b()
|
|
||||||
.with_batch_size(1)
|
|
||||||
.with_model_file("0.5b-coord-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_0_5b_size_encoder() -> Self {
|
|
||||||
Self::moondream2_0_5b()
|
|
||||||
.with_batch_size(1)
|
|
||||||
.with_model_file("0.5b-size-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_0_5b_size_decoder() -> Self {
|
|
||||||
Self::moondream2_0_5b()
|
|
||||||
.with_batch_size(1)
|
|
||||||
.with_model_file("0.5b-size-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_vision_encoder() -> Self {
|
|
||||||
Self::moondream2_0_5b_vision_encoder()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-vision-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_vision_projection() -> Self {
|
|
||||||
Self::moondream2_0_5b_vision_projection()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-vision-projection.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_text_decoder() -> Self {
|
|
||||||
Self::moondream2_0_5b_text_decoder()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-text-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_text_encoder() -> Self {
|
|
||||||
Self::moondream2_0_5b_text_encoder()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-text-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_coord_encoder() -> Self {
|
|
||||||
Self::moondream2_0_5b_coord_encoder()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-coord-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_coord_decoder() -> Self {
|
|
||||||
Self::moondream2_0_5b_coord_decoder()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-coord-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_size_encoder() -> Self {
|
|
||||||
Self::moondream2_0_5b_size_encoder()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-size-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn moondream2_2b_size_decoder() -> Self {
|
|
||||||
Self::moondream2_0_5b_size_decoder()
|
|
||||||
.with_model_scale(crate::Scale::Billion(2.))
|
|
||||||
.with_model_file("2b-size-decoder.onnx")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,66 +5,57 @@ use ndarray::{s, Array, Array2, Array3, Axis, IxDyn};
|
|||||||
use ndarray_npy::ReadNpyExt;
|
use ndarray_npy::ReadNpyExt;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
BaseModelTextual, DType, Engine, Hbb, Hub, Image, Keypoint, LogitsSampler, Options, Processor,
|
DType, Engine, Hbb, Hub, Image, Keypoint, LogitsSampler, ModelConfig, Processor, Scale, Task,
|
||||||
Scale, Task, Ts, Xs, X, Y,
|
Xs, X, Y,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct Moondream2 {
|
pub struct Moondream2 {
|
||||||
vision_encoder: VisionEncoder,
|
vision_encoder: Engine,
|
||||||
vision_projection: VisionProjection,
|
vision_projection: Engine,
|
||||||
pub text_decoder: BaseModelTextual,
|
text_decoder: Engine,
|
||||||
text_encoder: BaseModelTextual,
|
text_encoder: Engine,
|
||||||
coord_decoder: Option<BaseModelTextual>,
|
coord_decoder: Option<Engine>,
|
||||||
coord_encoder: Option<BaseModelTextual>,
|
coord_encoder: Option<Engine>,
|
||||||
size_decoder: Option<BaseModelTextual>,
|
size_decoder: Option<Engine>,
|
||||||
size_encoder: Option<BaseModelTextual>,
|
size_encoder: Option<Engine>,
|
||||||
initial_kv_cache: X, // TODO: use f16
|
initial_kv_cache: X, // TODO: use f16
|
||||||
scale: Scale,
|
scale: Scale,
|
||||||
dtype: DType,
|
dtype: DType,
|
||||||
max_length: usize,
|
max_length: usize,
|
||||||
eos_token_id: u32,
|
eos_token_id: u32,
|
||||||
max_objects: usize,
|
max_objects: usize,
|
||||||
|
num_patch: usize,
|
||||||
|
patch_size: usize,
|
||||||
|
processor: Processor,
|
||||||
|
seq_len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Moondream2 {
|
impl Moondream2 {
|
||||||
// TODO
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
#[allow(clippy::too_many_arguments)]
|
|
||||||
pub fn new(
|
|
||||||
options_vision_encoder: Options,
|
|
||||||
options_vision_projection: Options,
|
|
||||||
options_text_encoder: Options,
|
|
||||||
options_text_decoder: Options,
|
|
||||||
options_coord_encoder: Option<Options>,
|
|
||||||
options_coord_decoder: Option<Options>,
|
|
||||||
options_size_encoder: Option<Options>,
|
|
||||||
options_size_decoder: Option<Options>,
|
|
||||||
) -> Result<Self> {
|
|
||||||
let max_length = 2048;
|
let max_length = 2048;
|
||||||
let max_objects = 50;
|
let max_objects = 50;
|
||||||
let eos_token_id = 50256;
|
let eos_token_id = 50256;
|
||||||
let dtype = options_vision_encoder.model_dtype;
|
let dtype = config.visual_encoder.dtype;
|
||||||
let scale = options_vision_encoder
|
let scale = config.scale.clone().unwrap_or(Scale::Billion(0.5));
|
||||||
.model_scale
|
|
||||||
.clone()
|
|
||||||
.unwrap_or(Scale::Billion(0.5));
|
|
||||||
let initial_kv_cache: X = KVCache::new(&scale, &dtype)?.0.into();
|
let initial_kv_cache: X = KVCache::new(&scale, &dtype)?.0.into();
|
||||||
let vision_encoder = VisionEncoder::new(options_vision_encoder)?;
|
let vision_encoder = Engine::try_from_config(&config.visual_encoder)?;
|
||||||
let vision_projection = VisionProjection::new(options_vision_projection)?;
|
let vision_projection = Engine::try_from_config(&config.visual_projection)?;
|
||||||
let text_decoder = BaseModelTextual::new(options_text_decoder)?;
|
let text_decoder = Engine::try_from_config(&config.textual_decoder)?;
|
||||||
let text_encoder = BaseModelTextual::new(options_text_encoder)?;
|
let text_encoder = Engine::try_from_config(&config.textual_encoder)?;
|
||||||
let coord_decoder = options_coord_decoder
|
let coord_decoder = Engine::try_from_config(&config.coord_decoder).ok();
|
||||||
.map(BaseModelTextual::new)
|
let coord_encoder = Engine::try_from_config(&config.coord_encoder).ok();
|
||||||
.transpose()?;
|
let size_decoder = Engine::try_from_config(&config.size_decoder).ok();
|
||||||
let coord_encoder = options_coord_encoder
|
let size_encoder = Engine::try_from_config(&config.size_encoder).ok();
|
||||||
.map(BaseModelTextual::new)
|
let (num_patch, patch_size, _ts) = (
|
||||||
.transpose()?;
|
vision_encoder.batch().opt(),
|
||||||
let size_decoder = options_size_decoder
|
vision_encoder.try_height().unwrap_or(&378.into()).opt(),
|
||||||
.map(BaseModelTextual::new)
|
vision_encoder.ts.clone(),
|
||||||
.transpose()?;
|
);
|
||||||
let size_encoder = options_size_encoder
|
let seq_len = vision_projection.inputs_minoptmax[0][1].opt();
|
||||||
.map(BaseModelTextual::new)
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.transpose()?;
|
.with_image_width(patch_size as _)
|
||||||
|
.with_image_height(patch_size as _);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
vision_encoder,
|
vision_encoder,
|
||||||
@ -81,12 +72,16 @@ impl Moondream2 {
|
|||||||
eos_token_id,
|
eos_token_id,
|
||||||
scale,
|
scale,
|
||||||
dtype,
|
dtype,
|
||||||
|
num_patch,
|
||||||
|
patch_size,
|
||||||
|
processor,
|
||||||
|
seq_len,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_image(&mut self, x: &Image) -> Result<X> {
|
pub fn encode_image(&mut self, x: &Image) -> Result<X> {
|
||||||
let patches_emb = self.vision_encoder.encode(x)?.clone().insert_axis(0)?;
|
let patches_emb = self.encode(x)?.clone().insert_axis(0)?;
|
||||||
let image_embedding = self.vision_projection.inference(patches_emb.into())?[0].to_owned();
|
let image_embedding = self.vision_projection.run(patches_emb.into())?[0].to_owned();
|
||||||
|
|
||||||
Ok(image_embedding)
|
Ok(image_embedding)
|
||||||
}
|
}
|
||||||
@ -119,12 +114,7 @@ impl Moondream2 {
|
|||||||
Task::Vqa(query) => {
|
Task::Vqa(query) => {
|
||||||
let input_ids: Vec<_> = [198., 198., 24361., 25.]
|
let input_ids: Vec<_> = [198., 198., 24361., 25.]
|
||||||
.iter()
|
.iter()
|
||||||
.chain(
|
.chain(&self.processor.encode_text_ids(query, false)?)
|
||||||
&self
|
|
||||||
.text_encoder
|
|
||||||
.processor()
|
|
||||||
.encode_text_ids(query, false)?,
|
|
||||||
)
|
|
||||||
.chain(&[198., 198., 33706., 25.])
|
.chain(&[198., 198., 33706., 25.])
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect();
|
.collect();
|
||||||
@ -139,8 +129,7 @@ impl Moondream2 {
|
|||||||
.iter()
|
.iter()
|
||||||
.chain(
|
.chain(
|
||||||
&self
|
&self
|
||||||
.text_encoder
|
.processor
|
||||||
.processor()
|
|
||||||
.encode_text_ids(&format!(" {}", object), false)?,
|
.encode_text_ids(&format!(" {}", object), false)?,
|
||||||
)
|
)
|
||||||
.chain(&[628.])
|
.chain(&[628.])
|
||||||
@ -156,8 +145,7 @@ impl Moondream2 {
|
|||||||
.iter()
|
.iter()
|
||||||
.chain(
|
.chain(
|
||||||
&self
|
&self
|
||||||
.text_encoder
|
.processor
|
||||||
.processor()
|
|
||||||
.encode_text_ids(&format!(" {}", object), false)?,
|
.encode_text_ids(&format!(" {}", object), false)?,
|
||||||
)
|
)
|
||||||
.chain(&[628.])
|
.chain(&[628.])
|
||||||
@ -174,10 +162,10 @@ impl Moondream2 {
|
|||||||
|
|
||||||
fn generate_text(&mut self, input_ids: &[f32], kv_cache: Array<f32, IxDyn>) -> Result<String> {
|
fn generate_text(&mut self, input_ids: &[f32], kv_cache: Array<f32, IxDyn>) -> Result<String> {
|
||||||
let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
|
let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
|
||||||
let mut input_embeds = self.text_encoder.inference(Xs::from(input_ids))?[0].to_owned();
|
let mut input_embeds = self.text_encoder.run(Xs::from(input_ids))?[0].to_owned();
|
||||||
let logits_sampler = LogitsSampler::new();
|
let logits_sampler = LogitsSampler::new();
|
||||||
let mut token_ids: Vec<u32> = Vec::new();
|
let mut token_ids: Vec<u32> = Vec::new();
|
||||||
let mut pos = self.vision_projection.seq_len() + self.initial_kv_cache.shape()[4];
|
let mut pos = self.seq_len + self.initial_kv_cache.shape()[4];
|
||||||
let mut inc = input_embeds.shape()[1];
|
let mut inc = input_embeds.shape()[1];
|
||||||
let mut kv_cache = kv_cache.clone();
|
let mut kv_cache = kv_cache.clone();
|
||||||
|
|
||||||
@ -192,7 +180,7 @@ impl Moondream2 {
|
|||||||
.into_dyn()
|
.into_dyn()
|
||||||
.into(),
|
.into(),
|
||||||
]);
|
]);
|
||||||
let decoder_outputs = self.text_decoder.inference(input)?;
|
let decoder_outputs = self.text_decoder.run(input)?;
|
||||||
|
|
||||||
// update
|
// update
|
||||||
let logits = &decoder_outputs["logits"];
|
let logits = &decoder_outputs["logits"];
|
||||||
@ -221,13 +209,10 @@ impl Moondream2 {
|
|||||||
|
|
||||||
// encode
|
// encode
|
||||||
let next_tokens = X::from(vec![token_id as f32]).insert_axis(1)?;
|
let next_tokens = X::from(vec![token_id as f32]).insert_axis(1)?;
|
||||||
input_embeds = self.text_encoder.inference(Xs::from(next_tokens))?[0].to_owned();
|
input_embeds = self.text_encoder.run(Xs::from(next_tokens))?[0].to_owned();
|
||||||
}
|
}
|
||||||
|
|
||||||
let text = self
|
let text = self.processor.decode_tokens(&token_ids, true)?;
|
||||||
.text_encoder
|
|
||||||
.processor()
|
|
||||||
.decode_tokens(&token_ids, true)?;
|
|
||||||
|
|
||||||
Ok(text)
|
Ok(text)
|
||||||
}
|
}
|
||||||
@ -242,16 +227,16 @@ impl Moondream2 {
|
|||||||
let mut y_bboxes: Vec<Hbb> = Vec::new();
|
let mut y_bboxes: Vec<Hbb> = Vec::new();
|
||||||
let mut y_kpts: Vec<Vec<Keypoint>> = Vec::new();
|
let mut y_kpts: Vec<Vec<Keypoint>> = Vec::new();
|
||||||
let (image_height, image_width) = (
|
let (image_height, image_width) = (
|
||||||
self.vision_encoder.processor.images_transform_info[0].height_src,
|
self.processor.images_transform_info[0].height_src,
|
||||||
self.vision_encoder.processor.images_transform_info[0].width_src,
|
self.processor.images_transform_info[0].width_src,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut pos = self.vision_projection.seq_len() + self.initial_kv_cache.shape()[4];
|
let mut pos = self.seq_len + self.initial_kv_cache.shape()[4];
|
||||||
let logits_sampler = LogitsSampler::new();
|
let logits_sampler = LogitsSampler::new();
|
||||||
|
|
||||||
// initial input_embeds
|
// initial input_embeds
|
||||||
let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
|
let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
|
||||||
let mut hidden = self.text_encoder.inference(Xs::from(input_ids))?[0].to_owned();
|
let mut hidden = self.text_encoder.run(Xs::from(input_ids))?[0].to_owned();
|
||||||
let mut kv_cache = kv_cache;
|
let mut kv_cache = kv_cache;
|
||||||
|
|
||||||
// generate
|
// generate
|
||||||
@ -273,12 +258,7 @@ impl Moondream2 {
|
|||||||
|
|
||||||
// cx
|
// cx
|
||||||
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
|
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
|
||||||
let cx = self
|
let cx = self.coord_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone(); // [1024]
|
||||||
.coord_decoder
|
|
||||||
.as_mut()
|
|
||||||
.unwrap()
|
|
||||||
.inference(Xs::from(input))?[0]
|
|
||||||
.clone(); // [1024]
|
|
||||||
let ratio = cx.shape()[0] as f32;
|
let ratio = cx.shape()[0] as f32;
|
||||||
let cx = logits_sampler
|
let cx = logits_sampler
|
||||||
.decode(cx.as_slice().context("Failed to get slice for `cx`")?)?
|
.decode(cx.as_slice().context("Failed to get slice for `cx`")?)?
|
||||||
@ -288,7 +268,7 @@ impl Moondream2 {
|
|||||||
.coord_encoder
|
.coord_encoder
|
||||||
.as_mut()
|
.as_mut()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inference(Xs::from(X::from(vec![cx])))?[0]
|
.run(Xs::from(X::from(vec![cx])))?[0]
|
||||||
.clone()
|
.clone()
|
||||||
.insert_axis(0)?
|
.insert_axis(0)?
|
||||||
.insert_axis(0)?;
|
.insert_axis(0)?;
|
||||||
@ -296,12 +276,7 @@ impl Moondream2 {
|
|||||||
// cy
|
// cy
|
||||||
let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
|
let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
|
||||||
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
|
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
|
||||||
let cy = self
|
let cy = self.coord_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone();
|
||||||
.coord_decoder
|
|
||||||
.as_mut()
|
|
||||||
.unwrap()
|
|
||||||
.inference(Xs::from(input))?[0]
|
|
||||||
.clone();
|
|
||||||
let ratio = cy.shape()[0] as f32;
|
let ratio = cy.shape()[0] as f32;
|
||||||
|
|
||||||
let cy = logits_sampler
|
let cy = logits_sampler
|
||||||
@ -313,7 +288,7 @@ impl Moondream2 {
|
|||||||
.coord_encoder
|
.coord_encoder
|
||||||
.as_mut()
|
.as_mut()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inference(Xs::from(X::from(vec![cy])))?[0]
|
.run(Xs::from(X::from(vec![cy])))?[0]
|
||||||
.clone()
|
.clone()
|
||||||
.insert_axis(0)?
|
.insert_axis(0)?
|
||||||
.insert_axis(0)?;
|
.insert_axis(0)?;
|
||||||
@ -334,12 +309,7 @@ impl Moondream2 {
|
|||||||
// wh
|
// wh
|
||||||
let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
|
let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
|
||||||
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
|
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
|
||||||
let size = self
|
let size = self.size_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone(); // [2, 1024]
|
||||||
.size_decoder
|
|
||||||
.as_mut()
|
|
||||||
.unwrap()
|
|
||||||
.inference(Xs::from(input))?[0]
|
|
||||||
.clone(); // [2, 1024]
|
|
||||||
|
|
||||||
let ratio = size.shape()[1] as f32;
|
let ratio = size.shape()[1] as f32;
|
||||||
let w = logits_sampler.decode(
|
let w = logits_sampler.decode(
|
||||||
@ -361,7 +331,7 @@ impl Moondream2 {
|
|||||||
.size_encoder
|
.size_encoder
|
||||||
.as_mut()
|
.as_mut()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.inference(Xs::from(X::from(vec![w, h])))?[0]
|
.run(Xs::from(X::from(vec![w, h])))?[0]
|
||||||
.clone()
|
.clone()
|
||||||
.insert_axis(0)?
|
.insert_axis(0)?
|
||||||
.insert_axis(0)?; // [1024]
|
.insert_axis(0)?; // [1024]
|
||||||
@ -392,7 +362,7 @@ impl Moondream2 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn prepare_kv_cache(&mut self, image_embedding: &X) -> Result<Array<f32, IxDyn>> {
|
fn prepare_kv_cache(&mut self, image_embedding: &X) -> Result<Array<f32, IxDyn>> {
|
||||||
let kv_cache_new = self.text_decoder.inference(Xs::from(vec![
|
let kv_cache_new = self.text_decoder.run(Xs::from(vec![
|
||||||
image_embedding.clone(),
|
image_embedding.clone(),
|
||||||
self.initial_kv_cache.clone(),
|
self.initial_kv_cache.clone(),
|
||||||
]))?["new_kv_cache"]
|
]))?["new_kv_cache"]
|
||||||
@ -421,7 +391,7 @@ impl Moondream2 {
|
|||||||
kv_cache: &mut Array<f32, IxDyn>,
|
kv_cache: &mut Array<f32, IxDyn>,
|
||||||
pos: &mut usize,
|
pos: &mut usize,
|
||||||
) -> Result<X> {
|
) -> Result<X> {
|
||||||
let decoder_outputs = self.text_decoder.inference(Xs::from(vec![
|
let decoder_outputs = self.text_decoder.run(Xs::from(vec![
|
||||||
input_embeds.clone(),
|
input_embeds.clone(),
|
||||||
kv_cache
|
kv_cache
|
||||||
.slice(s![.., .., .., .., ..*pos, ..])
|
.slice(s![.., .., .., .., ..*pos, ..])
|
||||||
@ -442,38 +412,6 @@ impl Moondream2 {
|
|||||||
|
|
||||||
Ok(decoder_outputs["logits"].to_owned())
|
Ok(decoder_outputs["logits"].to_owned())
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
|
||||||
pub struct VisionEncoder {
|
|
||||||
engine: Engine,
|
|
||||||
num_patch: usize,
|
|
||||||
patch_size: usize,
|
|
||||||
processor: Processor,
|
|
||||||
ts: Ts,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl VisionEncoder {
|
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
|
||||||
let engine = options.to_engine()?;
|
|
||||||
let (num_patch, patch_size, ts) = (
|
|
||||||
engine.batch().opt(),
|
|
||||||
engine.try_height().unwrap_or(&378.into()).opt(),
|
|
||||||
engine.ts.clone(),
|
|
||||||
);
|
|
||||||
let processor = options
|
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(patch_size as _)
|
|
||||||
.with_image_height(patch_size as _);
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
engine,
|
|
||||||
patch_size,
|
|
||||||
num_patch,
|
|
||||||
processor,
|
|
||||||
ts,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_patches(image: &Image, image_patch_size: usize) -> (Vec<Image>, (u32, u32)) {
|
fn create_patches(image: &Image, image_patch_size: usize) -> (Vec<Image>, (u32, u32)) {
|
||||||
let mut patches = vec![image.clone()];
|
let mut patches = vec![image.clone()];
|
||||||
@ -515,10 +453,6 @@ impl VisionEncoder {
|
|||||||
(patches, selected_template)
|
(patches, selected_template)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
|
|
||||||
self.engine.run(xs)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode(&mut self, x: &Image) -> Result<X> {
|
pub fn encode(&mut self, x: &Image) -> Result<X> {
|
||||||
let (patches, selected_template) = Self::create_patches(x, self.patch_size);
|
let (patches, selected_template) = Self::create_patches(x, self.patch_size);
|
||||||
let patches = self.processor.process_images(&patches)?;
|
let patches = self.processor.process_images(&patches)?;
|
||||||
@ -526,7 +460,7 @@ impl VisionEncoder {
|
|||||||
(selected_template.0 as usize),
|
(selected_template.0 as usize),
|
||||||
(selected_template.1 as usize),
|
(selected_template.1 as usize),
|
||||||
);
|
);
|
||||||
let patch_emb = self.inference(patches.clone().into())?[0].clone();
|
let patch_emb = self.vision_encoder.run(patches.clone().into())?[0].clone();
|
||||||
let patch_emb = patch_emb.clone().0.into_dimensionality::<ndarray::Ix3>()?;
|
let patch_emb = patch_emb.clone().0.into_dimensionality::<ndarray::Ix3>()?;
|
||||||
let patch_emb = Self::process_patch_emb(patch_emb, template)?;
|
let patch_emb = Self::process_patch_emb(patch_emb, template)?;
|
||||||
let patch_emb = X::from(patch_emb.into_dyn()); // TODO .insert_axis(x),
|
let patch_emb = X::from(patch_emb.into_dyn()); // TODO .insert_axis(x),
|
||||||
@ -608,30 +542,6 @@ impl VisionEncoder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
|
||||||
pub struct VisionProjection {
|
|
||||||
engine: Engine,
|
|
||||||
seq_len: usize,
|
|
||||||
ts: Ts,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl VisionProjection {
|
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
|
||||||
let engine = options.to_engine()?;
|
|
||||||
let (seq_len, ts) = (engine.inputs_minoptmax[0][1].opt(), engine.ts.clone());
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
engine,
|
|
||||||
seq_len,
|
|
||||||
ts,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
|
|
||||||
self.engine.run(xs)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
struct KVCache(pub Array<f32, IxDyn>);
|
struct KVCache(pub Array<f32, IxDyn>);
|
||||||
|
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
/// Model configuration for `OWLv2`
|
/// Model configuration for `OWLv2`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn owlv2() -> Self {
|
pub fn owlv2() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("owlv2")
|
.with_name("owlv2")
|
||||||
.with_model_kind(crate::Kind::VisionLanguage)
|
|
||||||
// 1st & 3rd: text
|
// 1st & 3rd: text
|
||||||
.with_model_ixx(0, 0, (1, 1, 1).into()) // TODO
|
.with_model_ixx(0, 0, (1, 1, 1).into())
|
||||||
.with_model_ixx(0, 1, 1.into())
|
.with_model_ixx(0, 1, 1.into())
|
||||||
.with_model_ixx(2, 0, (1, 1, 1).into())
|
.with_model_ixx(2, 0, (1, 1, 1).into())
|
||||||
.with_model_ixx(2, 1, 1.into())
|
.with_model_ixx(2, 1, 1.into())
|
||||||
@ -21,6 +20,7 @@ impl crate::Options {
|
|||||||
.with_normalize(true)
|
.with_normalize(true)
|
||||||
.with_class_confs(&[0.1])
|
.with_class_confs(&[0.1])
|
||||||
.with_model_num_dry_run(0)
|
.with_model_num_dry_run(0)
|
||||||
|
.with_tokenizer_file("owlv2/tokenizer.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn owlv2_base() -> Self {
|
pub fn owlv2_base() -> Self {
|
||||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
|||||||
use ndarray::{s, Axis};
|
use ndarray::{s, Axis};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
|
use crate::{elapsed, DynConf, Engine, Hbb, Image, ModelConfig, Processor, Ts, Xs, X, Y};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct OWLv2 {
|
pub struct OWLv2 {
|
||||||
@ -22,8 +22,8 @@ pub struct OWLv2 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl OWLv2 {
|
impl OWLv2 {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
engine.try_height().unwrap_or(&960.into()).opt(),
|
engine.try_height().unwrap_or(&960.into()).opt(),
|
||||||
@ -31,11 +31,7 @@ impl OWLv2 {
|
|||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
);
|
);
|
||||||
let spec = engine.spec().to_owned();
|
let spec = engine.spec().to_owned();
|
||||||
let processor = options
|
let names: Vec<String> = config
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
|
||||||
.with_image_height(height as _);
|
|
||||||
let names: Vec<String> = options
|
|
||||||
.class_names()
|
.class_names()
|
||||||
.expect("No class names specified.")
|
.expect("No class names specified.")
|
||||||
.iter()
|
.iter()
|
||||||
@ -44,7 +40,10 @@ impl OWLv2 {
|
|||||||
let names_with_prompt: Vec<String> =
|
let names_with_prompt: Vec<String> =
|
||||||
names.iter().map(|x| format!("a photo of {}", x)).collect();
|
names.iter().map(|x| format!("a photo of {}", x)).collect();
|
||||||
let n = names.len();
|
let n = names.len();
|
||||||
let confs = DynConf::new(options.class_confs(), n);
|
let confs = DynConf::new(config.class_confs(), n);
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
let input_ids: Vec<f32> = processor
|
let input_ids: Vec<f32> = processor
|
||||||
.encode_texts_ids(
|
.encode_texts_ids(
|
||||||
&names_with_prompt
|
&names_with_prompt
|
||||||
|
@ -4,11 +4,11 @@ use crate::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// Model configuration for `PicoDet`
|
/// Model configuration for `PicoDet`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn picodet() -> Self {
|
pub fn picodet() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("picodet")
|
.with_name("picodet")
|
||||||
.with_batch_size(1) // TODO: ONNX model's batch size seems always = 1
|
.with_batch_size_all(1) // TODO: ONNX model's batch size seems always = 1
|
||||||
.with_model_ixx(0, 2, 640.into())
|
.with_model_ixx(0, 2, 640.into())
|
||||||
.with_model_ixx(0, 3, 640.into())
|
.with_model_ixx(0, 3, 640.into())
|
||||||
.with_model_ixx(1, 0, (1, 1, 8).into())
|
.with_model_ixx(1, 0, (1, 1, 8).into())
|
||||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
|||||||
use ndarray::Axis;
|
use ndarray::Axis;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
|
use crate::{elapsed, DynConf, Engine, Hbb, Image, ModelConfig, Processor, Ts, Xs, X, Y};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct PicoDet {
|
pub struct PicoDet {
|
||||||
@ -19,8 +19,8 @@ pub struct PicoDet {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl PicoDet {
|
impl PicoDet {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
engine.try_height().unwrap_or(&640.into()).opt(),
|
engine.try_height().unwrap_or(&640.into()).opt(),
|
||||||
@ -28,15 +28,14 @@ impl PicoDet {
|
|||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
);
|
);
|
||||||
let spec = engine.spec().to_owned();
|
let spec = engine.spec().to_owned();
|
||||||
let processor = options
|
let names = config
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
|
||||||
.with_image_height(height as _);
|
|
||||||
let names = options
|
|
||||||
.class_names()
|
.class_names()
|
||||||
.expect("No class names are specified.")
|
.expect("No class names are specified.")
|
||||||
.to_vec();
|
.to_vec();
|
||||||
let confs = DynConf::new(options.class_confs(), names.len());
|
let confs = DynConf::new(config.class_confs(), names.len());
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
|
@ -2,8 +2,7 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
elapsed, DType, Device, Engine, Image, Kind, Options, Processor, Scale, Task, Ts, Version, Xs,
|
elapsed, DType, Device, Engine, Image, ModelConfig, Processor, Scale, Task, Ts, Version, Xs, X,
|
||||||
X,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
@ -20,7 +19,6 @@ pub struct BaseModelVisual {
|
|||||||
dtype: DType,
|
dtype: DType,
|
||||||
task: Option<Task>,
|
task: Option<Task>,
|
||||||
scale: Option<Scale>,
|
scale: Option<Scale>,
|
||||||
kind: Option<Kind>,
|
|
||||||
version: Option<Version>,
|
version: Option<Version>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -29,8 +27,8 @@ impl BaseModelVisual {
|
|||||||
self.ts.summary();
|
self.ts.summary();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let err_msg = "You need to specify the image height and image width for visual model.";
|
let err_msg = "You need to specify the image height and image width for visual model.";
|
||||||
let (batch, height, width, ts, spec) = (
|
let (batch, height, width, ts, spec) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -39,18 +37,16 @@ impl BaseModelVisual {
|
|||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
engine.spec().to_owned(),
|
engine.spec().to_owned(),
|
||||||
);
|
);
|
||||||
let processor = options
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
let device = options.model_device;
|
let device = config.model.device;
|
||||||
let task = options.model_task;
|
let task = config.task;
|
||||||
let scale = options.model_scale;
|
let scale = config.scale;
|
||||||
let dtype = options.model_dtype;
|
let dtype = config.model.dtype;
|
||||||
let kind = options.model_kind;
|
let name = config.name;
|
||||||
let name = options.model_name;
|
let version = config.version;
|
||||||
let version = options.model_version;
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
@ -63,7 +59,6 @@ impl BaseModelVisual {
|
|||||||
dtype,
|
dtype,
|
||||||
task,
|
task,
|
||||||
scale,
|
scale,
|
||||||
kind,
|
|
||||||
device,
|
device,
|
||||||
version,
|
version,
|
||||||
name,
|
name,
|
||||||
@ -101,7 +96,6 @@ pub struct BaseModelTextual {
|
|||||||
dtype: DType,
|
dtype: DType,
|
||||||
task: Option<Task>,
|
task: Option<Task>,
|
||||||
scale: Option<Scale>,
|
scale: Option<Scale>,
|
||||||
kind: Option<Kind>,
|
|
||||||
version: Option<Version>,
|
version: Option<Version>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,21 +104,20 @@ impl BaseModelTextual {
|
|||||||
self.ts.summary();
|
self.ts.summary();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let (batch, ts, spec) = (
|
let (batch, ts, spec) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
engine.spec().to_owned(),
|
engine.spec().to_owned(),
|
||||||
);
|
);
|
||||||
let processor = options.to_processor()?;
|
let processor = Processor::try_from_config(&config.processor)?;
|
||||||
let device = options.model_device;
|
let device = config.model.device;
|
||||||
let task = options.model_task;
|
let dtype = config.model.dtype;
|
||||||
let scale = options.model_scale;
|
let task = config.task;
|
||||||
let dtype = options.model_dtype;
|
let scale = config.scale;
|
||||||
let kind = options.model_kind;
|
let name = config.name;
|
||||||
let name = options.model_name;
|
let version = config.version;
|
||||||
let version = options.model_version;
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
@ -135,7 +128,6 @@ impl BaseModelTextual {
|
|||||||
dtype,
|
dtype,
|
||||||
task,
|
task,
|
||||||
scale,
|
scale,
|
||||||
kind,
|
|
||||||
device,
|
device,
|
||||||
version,
|
version,
|
||||||
name,
|
name,
|
||||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
|||||||
use ndarray::Axis;
|
use ndarray::Axis;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{elapsed, DynConf, Engine, Image, Options, Prob, Processor, Ts, Xs, Y};
|
use crate::{elapsed, DynConf, Engine, Image, ModelConfig, Prob, Processor, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct ImageClassifier {
|
pub struct ImageClassifier {
|
||||||
@ -20,11 +20,12 @@ pub struct ImageClassifier {
|
|||||||
spec: String,
|
spec: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<Options> for ImageClassifier {
|
impl TryFrom<ModelConfig> for ImageClassifier {
|
||||||
type Error = anyhow::Error;
|
type Error = anyhow::Error;
|
||||||
|
|
||||||
fn try_from(options: Options) -> Result<Self, Self::Error> {
|
fn try_from(config: ModelConfig) -> Result<Self, Self::Error> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
|
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -32,11 +33,8 @@ impl TryFrom<Options> for ImageClassifier {
|
|||||||
engine.try_width().unwrap_or(&224.into()).opt(),
|
engine.try_width().unwrap_or(&224.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
let processor = options
|
|
||||||
.to_processor()?
|
let (nc, names) = match (config.nc(), config.class_names()) {
|
||||||
.with_image_width(width as _)
|
|
||||||
.with_image_height(height as _);
|
|
||||||
let (nc, names) = match (options.nc(), options.class_names()) {
|
|
||||||
(Some(nc), Some(names)) => {
|
(Some(nc), Some(names)) => {
|
||||||
if nc != names.len() {
|
if nc != names.len() {
|
||||||
anyhow::bail!(
|
anyhow::bail!(
|
||||||
@ -56,8 +54,11 @@ impl TryFrom<Options> for ImageClassifier {
|
|||||||
anyhow::bail!("Neither class names nor class numbers were specified.");
|
anyhow::bail!("Neither class names nor class numbers were specified.");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let confs = DynConf::new(options.class_confs(), nc);
|
let confs = DynConf::new(config.class_confs(), nc);
|
||||||
let apply_softmax = options.apply_softmax.unwrap_or_default();
|
let apply_softmax = config.apply_softmax.unwrap_or_default();
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
|
@ -1,18 +1,17 @@
|
|||||||
use crate::NAMES_COCO_91;
|
use crate::NAMES_COCO_91;
|
||||||
|
|
||||||
/// Model configuration for `RT-DETR`
|
/// Model configuration for `RT-DETR`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn rfdetr() -> Self {
|
pub fn rfdetr() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("rfdetr")
|
.with_name("rfdetr")
|
||||||
.with_batch_size(1)
|
.with_model_ixx(0, 0, 1.into())
|
||||||
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 560.into())
|
.with_model_ixx(0, 2, 560.into())
|
||||||
.with_model_ixx(0, 3, 560.into())
|
.with_model_ixx(0, 3, 560.into())
|
||||||
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
||||||
.with_normalize(true)
|
|
||||||
.with_image_mean(&[0.485, 0.456, 0.406])
|
.with_image_mean(&[0.485, 0.456, 0.406])
|
||||||
.with_image_std(&[0.229, 0.224, 0.225])
|
.with_image_std(&[0.229, 0.224, 0.225])
|
||||||
.with_class_confs(&[0.25])
|
|
||||||
.with_class_names(&NAMES_COCO_91)
|
.with_class_names(&NAMES_COCO_91)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
|||||||
use ndarray::{s, Axis};
|
use ndarray::{s, Axis};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, Y};
|
use crate::{elapsed, DynConf, Engine, Hbb, Image, ModelConfig, Processor, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct RFDETR {
|
pub struct RFDETR {
|
||||||
@ -19,8 +19,8 @@ pub struct RFDETR {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RFDETR {
|
impl RFDETR {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
engine.try_height().unwrap_or(&560.into()).opt(),
|
engine.try_height().unwrap_or(&560.into()).opt(),
|
||||||
@ -28,16 +28,16 @@ impl RFDETR {
|
|||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
);
|
);
|
||||||
let spec = engine.spec().to_owned();
|
let spec = engine.spec().to_owned();
|
||||||
let processor = options
|
let names: Vec<String> = config
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
|
||||||
.with_image_height(height as _);
|
|
||||||
let names = options
|
|
||||||
.class_names()
|
.class_names()
|
||||||
.expect("No class names specified.")
|
.expect("No class names specified.")
|
||||||
.to_vec();
|
.iter()
|
||||||
let confs = DynConf::new(options.class_confs(), names.len());
|
.map(|x| x.to_string())
|
||||||
|
.collect();
|
||||||
|
let confs = DynConf::new(config.class_confs(), names.len());
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
height,
|
height,
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
/// Model configuration for `RMBG`
|
/// Model configuration for `RMBG`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn rmbg() -> Self {
|
pub fn rmbg() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("rmbg")
|
.with_name("rmbg")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 1024.into())
|
.with_model_ixx(0, 2, 1024.into())
|
||||||
.with_model_ixx(0, 3, 1024.into())
|
.with_model_ixx(0, 3, 1024.into())
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use aksr::Builder;
|
use aksr::Builder;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
|
use crate::{elapsed, Engine, Image, Mask, ModelConfig, Ops, Processor, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct RMBG {
|
pub struct RMBG {
|
||||||
@ -15,8 +15,8 @@ pub struct RMBG {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RMBG {
|
impl RMBG {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -24,8 +24,7 @@ impl RMBG {
|
|||||||
engine.try_width().unwrap_or(&1024.into()).opt(),
|
engine.try_width().unwrap_or(&1024.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
let processor = options
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
@ -63,7 +62,6 @@ impl RMBG {
|
|||||||
fn postprocess(&mut self, xs: Xs) -> Result<Vec<Y>> {
|
fn postprocess(&mut self, xs: Xs) -> Result<Vec<Y>> {
|
||||||
let mut ys: Vec<Y> = Vec::new();
|
let mut ys: Vec<Y> = Vec::new();
|
||||||
for (idx, luma) in xs[0].axis_iter(ndarray::Axis(0)).enumerate() {
|
for (idx, luma) in xs[0].axis_iter(ndarray::Axis(0)).enumerate() {
|
||||||
// image size
|
|
||||||
let (h1, w1) = (
|
let (h1, w1) = (
|
||||||
self.processor.images_transform_info[idx].height_src,
|
self.processor.images_transform_info[idx].height_src,
|
||||||
self.processor.images_transform_info[idx].width_src,
|
self.processor.images_transform_info[idx].width_src,
|
||||||
|
@ -1,15 +1,15 @@
|
|||||||
use crate::NAMES_COCO_80;
|
use crate::NAMES_COCO_80;
|
||||||
|
|
||||||
/// Model configuration for `RT-DETR`
|
/// Model configuration for `RT-DETR`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn rtdetr() -> Self {
|
pub fn rtdetr() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("rtdetr")
|
.with_name("rtdetr")
|
||||||
.with_batch_size(1)
|
.with_model_ixx(0, 0, 1.into())
|
||||||
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 640.into())
|
.with_model_ixx(0, 2, 640.into())
|
||||||
.with_model_ixx(0, 3, 640.into())
|
.with_model_ixx(0, 3, 640.into())
|
||||||
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
||||||
.with_normalize(true)
|
|
||||||
.with_class_confs(&[0.5])
|
.with_class_confs(&[0.5])
|
||||||
.with_class_names(&NAMES_COCO_80)
|
.with_class_names(&NAMES_COCO_80)
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
|||||||
use ndarray::{s, Axis};
|
use ndarray::{s, Axis};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
|
use crate::{elapsed, DynConf, Engine, Hbb, Image, ModelConfig, Processor, Ts, Xs, X, Y};
|
||||||
|
|
||||||
#[derive(Debug, Builder)]
|
#[derive(Debug, Builder)]
|
||||||
pub struct RTDETR {
|
pub struct RTDETR {
|
||||||
@ -19,8 +19,8 @@ pub struct RTDETR {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RTDETR {
|
impl RTDETR {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
engine.try_height().unwrap_or(&640.into()).opt(),
|
engine.try_height().unwrap_or(&640.into()).opt(),
|
||||||
@ -28,15 +28,16 @@ impl RTDETR {
|
|||||||
engine.ts.clone(),
|
engine.ts.clone(),
|
||||||
);
|
);
|
||||||
let spec = engine.spec().to_owned();
|
let spec = engine.spec().to_owned();
|
||||||
let processor = options
|
let names: Vec<String> = config
|
||||||
.to_processor()?
|
|
||||||
.with_image_width(width as _)
|
|
||||||
.with_image_height(height as _);
|
|
||||||
let names = options
|
|
||||||
.class_names()
|
.class_names()
|
||||||
.expect("No class names specified.")
|
.expect("No class names specified.")
|
||||||
.to_vec();
|
.iter()
|
||||||
let confs = DynConf::new(options.class_confs(), names.len());
|
.map(|x| x.to_string())
|
||||||
|
.collect();
|
||||||
|
let confs = DynConf::new(config.class_confs(), names.len());
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
@ -87,7 +88,6 @@ impl RTDETR {
|
|||||||
.enumerate()
|
.enumerate()
|
||||||
.filter_map(|(idx, ((labels, boxes), scores))| {
|
.filter_map(|(idx, ((labels, boxes), scores))| {
|
||||||
let ratio = self.processor.images_transform_info[idx].height_scale;
|
let ratio = self.processor.images_transform_info[idx].height_scale;
|
||||||
|
|
||||||
let mut y_bboxes = Vec::new();
|
let mut y_bboxes = Vec::new();
|
||||||
for (i, &score) in scores.iter().enumerate() {
|
for (i, &score) in scores.iter().enumerate() {
|
||||||
let class_id = labels[i] as usize;
|
let class_id = labels[i] as usize;
|
||||||
@ -102,7 +102,6 @@ impl RTDETR {
|
|||||||
xyxy[2] / ratio,
|
xyxy[2] / ratio,
|
||||||
xyxy[3] / ratio,
|
xyxy[3] / ratio,
|
||||||
);
|
);
|
||||||
|
|
||||||
y_bboxes.push(
|
y_bboxes.push(
|
||||||
Hbb::default()
|
Hbb::default()
|
||||||
.with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
|
.with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
/// Model configuration for `RTMO`
|
/// Model configuration for `RTMO`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn rtmo() -> Self {
|
pub fn rtmo() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("rtmo")
|
.with_name("rtmo")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, 640.into())
|
.with_model_ixx(0, 2, 640.into())
|
||||||
.with_model_ixx(0, 3, 640.into())
|
.with_model_ixx(0, 3, 640.into())
|
||||||
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
||||||
|
@ -2,7 +2,7 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::Axis;
|
use ndarray::Axis;
|
||||||
|
|
||||||
use crate::{elapsed, DynConf, Engine, Hbb, Image, Keypoint, Options, Processor, Ts, Xs, Y};
|
use crate::{elapsed, DynConf, Engine, Hbb, Image, Keypoint, ModelConfig, Processor, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct RTMO {
|
pub struct RTMO {
|
||||||
@ -18,8 +18,8 @@ pub struct RTMO {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RTMO {
|
impl RTMO {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -27,15 +27,14 @@ impl RTMO {
|
|||||||
engine.try_width().unwrap_or(&512.into()).opt(),
|
engine.try_width().unwrap_or(&512.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
let processor = options
|
|
||||||
.to_processor()?
|
let nk = config.nk().unwrap_or(17);
|
||||||
|
let confs = DynConf::new(config.class_confs(), 1);
|
||||||
|
let kconfs = DynConf::new(config.keypoint_confs(), nk);
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
|
|
||||||
let nk = options.nk().unwrap_or(17);
|
|
||||||
let confs = DynConf::new(options.class_confs(), 1);
|
|
||||||
let kconfs = DynConf::new(options.keypoint_confs(), nk);
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
height,
|
height,
|
||||||
|
@ -1,100 +1,73 @@
|
|||||||
use crate::{models::SamKind, Options};
|
use crate::{models::SamKind, ModelConfig};
|
||||||
|
|
||||||
/// Model configuration for `Segment Anything Model`
|
/// Model configuration for `Segment Anything Model`
|
||||||
impl Options {
|
impl ModelConfig {
|
||||||
pub fn sam() -> Self {
|
pub fn sam() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("sam")
|
.with_name("sam")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_encoder_ixx(0, 0, 1.into())
|
||||||
}
|
.with_encoder_ixx(0, 1, 3.into())
|
||||||
|
.with_encoder_ixx(0, 2, 1024.into())
|
||||||
pub fn sam_encoder() -> Self {
|
.with_encoder_ixx(0, 3, 1024.into())
|
||||||
Self::sam()
|
|
||||||
.with_model_ixx(0, 2, 1024.into())
|
|
||||||
.with_model_ixx(0, 3, 1024.into())
|
|
||||||
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
||||||
.with_resize_filter("Bilinear")
|
.with_resize_filter("Bilinear")
|
||||||
.with_image_mean(&[123.5, 116.5, 103.5])
|
.with_image_mean(&[123.5, 116.5, 103.5])
|
||||||
.with_image_std(&[58.5, 57.0, 57.5])
|
.with_image_std(&[58.5, 57.0, 57.5])
|
||||||
.with_normalize(false)
|
.with_normalize(false)
|
||||||
.with_sam_kind(SamKind::Sam)
|
.with_sam_kind(SamKind::Sam)
|
||||||
.with_low_res_mask(false)
|
.with_sam_low_res_mask(false)
|
||||||
.with_find_contours(true)
|
.with_find_contours(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam_decoder() -> Self {
|
pub fn sam_v1_base() -> Self {
|
||||||
Self::sam()
|
Self::sam()
|
||||||
|
.with_encoder_file("sam-vit-b-encoder.onnx")
|
||||||
|
.with_decoder_file("sam-vit-b-decoder.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam_v1_base_encoder() -> Self {
|
// pub fn sam_v1_base_singlemask_decoder() -> Self {
|
||||||
Self::sam_encoder().with_model_file("sam-vit-b-encoder.onnx")
|
// Self::sam().with_decoder_file("sam-vit-b-decoder-singlemask.onnx")
|
||||||
|
// }
|
||||||
|
|
||||||
|
pub fn sam2_tiny() -> Self {
|
||||||
|
Self::sam()
|
||||||
|
.with_encoder_file("sam2-hiera-tiny-encoder.onnx")
|
||||||
|
.with_sam_kind(SamKind::Sam2)
|
||||||
|
.with_decoder_file("sam2-hiera-tiny-decoder.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam_v1_base_decoder() -> Self {
|
pub fn sam2_small() -> Self {
|
||||||
Self::sam_decoder().with_model_file("sam-vit-b-decoder.onnx")
|
Self::sam()
|
||||||
}
|
.with_encoder_file("sam2-hiera-small-encoder.onnx")
|
||||||
|
.with_decoder_file("sam2-hiera-small-decoder.onnx")
|
||||||
pub fn sam_v1_base_singlemask_decoder() -> Self {
|
|
||||||
Self::sam_decoder().with_model_file("sam-vit-b-decoder-singlemask.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sam2_tiny_encoder() -> Self {
|
|
||||||
Self::sam_encoder()
|
|
||||||
.with_model_file("sam2-hiera-tiny-encoder.onnx")
|
|
||||||
.with_sam_kind(SamKind::Sam2)
|
.with_sam_kind(SamKind::Sam2)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam2_tiny_decoder() -> Self {
|
pub fn sam2_base_plus() -> Self {
|
||||||
Self::sam_decoder().with_model_file("sam2-hiera-tiny-decoder.onnx")
|
Self::sam()
|
||||||
}
|
.with_encoder_file("sam2-hiera-base-plus-encoder.onnx")
|
||||||
|
.with_decoder_file("sam2-hiera-base-plus-decoder.onnx")
|
||||||
pub fn sam2_small_encoder() -> Self {
|
|
||||||
Self::sam_encoder()
|
|
||||||
.with_model_file("sam2-hiera-small-encoder.onnx")
|
|
||||||
.with_sam_kind(SamKind::Sam2)
|
.with_sam_kind(SamKind::Sam2)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam2_small_decoder() -> Self {
|
pub fn mobile_sam_tiny() -> Self {
|
||||||
Self::sam_decoder().with_model_file("sam2-hiera-small-decoder.onnx")
|
Self::sam()
|
||||||
}
|
.with_encoder_file("mobile-sam-vit-t-encoder.onnx")
|
||||||
|
|
||||||
pub fn sam2_base_plus_encoder() -> Self {
|
|
||||||
Self::sam_encoder()
|
|
||||||
.with_model_file("sam2-hiera-base-plus-encoder.onnx")
|
|
||||||
.with_sam_kind(SamKind::Sam2)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sam2_base_plus_decoder() -> Self {
|
|
||||||
Self::sam_decoder().with_model_file("sam2-hiera-base-plus-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn mobile_sam_tiny_encoder() -> Self {
|
|
||||||
Self::sam_encoder()
|
|
||||||
.with_model_file("mobile-sam-vit-t-encoder.onnx")
|
|
||||||
.with_sam_kind(SamKind::MobileSam)
|
.with_sam_kind(SamKind::MobileSam)
|
||||||
|
.with_decoder_file("mobile-sam-vit-t-decoder.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn mobile_sam_tiny_decoder() -> Self {
|
pub fn sam_hq_tiny() -> Self {
|
||||||
Self::sam_decoder().with_model_file("mobile-sam-vit-t-decoder.onnx")
|
Self::sam()
|
||||||
}
|
.with_encoder_file("sam-hq-vit-t-encoder.onnx")
|
||||||
|
|
||||||
pub fn sam_hq_tiny_encoder() -> Self {
|
|
||||||
Self::sam_encoder()
|
|
||||||
.with_model_file("sam-hq-vit-t-encoder.onnx")
|
|
||||||
.with_sam_kind(SamKind::SamHq)
|
.with_sam_kind(SamKind::SamHq)
|
||||||
|
.with_decoder_file("sam-hq-vit-t-decoder.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam_hq_tiny_decoder() -> Self {
|
pub fn edge_sam_3x() -> Self {
|
||||||
Self::sam_decoder().with_model_file("sam-hq-vit-t-decoder.onnx")
|
Self::sam()
|
||||||
}
|
.with_encoder_file("edge-sam-3x-encoder.onnx")
|
||||||
|
.with_decoder_file("edge-sam-3x-decoder.onnx")
|
||||||
pub fn edge_sam_3x_encoder() -> Self {
|
|
||||||
Self::sam_encoder()
|
|
||||||
.with_model_file("edge-sam-3x-encoder.onnx")
|
|
||||||
.with_sam_kind(SamKind::EdgeSam)
|
.with_sam_kind(SamKind::EdgeSam)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn edge_sam_3x_decoder() -> Self {
|
|
||||||
Self::sam_decoder().with_model_file("edge-sam-3x-decoder.onnx")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,8 @@ use ndarray::{s, Axis};
|
|||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
elapsed, DynConf, Engine, Image, Mask, Ops, Options, Polygon, Processor, SamPrompt, Ts, Xs, X,
|
elapsed, DynConf, Engine, Image, Mask, ModelConfig, Ops, Polygon, Processor, SamPrompt, Ts, Xs,
|
||||||
Y,
|
X, Y,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@ -49,9 +49,10 @@ pub struct SAM {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl SAM {
|
impl SAM {
|
||||||
pub fn new(options_encoder: Options, options_decoder: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let encoder = options_encoder.to_engine()?;
|
let encoder = Engine::try_from_config(&config.encoder)?;
|
||||||
let decoder = options_decoder.to_engine()?;
|
let decoder = Engine::try_from_config(&config.decoder)?;
|
||||||
|
|
||||||
let (batch, height, width) = (
|
let (batch, height, width) = (
|
||||||
encoder.batch().opt(),
|
encoder.batch().opt(),
|
||||||
encoder.try_height().unwrap_or(&1024.into()).opt(),
|
encoder.try_height().unwrap_or(&1024.into()).opt(),
|
||||||
@ -60,24 +61,23 @@ impl SAM {
|
|||||||
let ts = Ts::merge(&[encoder.ts(), decoder.ts()]);
|
let ts = Ts::merge(&[encoder.ts(), decoder.ts()]);
|
||||||
let spec = encoder.spec().to_owned();
|
let spec = encoder.spec().to_owned();
|
||||||
|
|
||||||
let processor = options_encoder
|
let conf = DynConf::new(config.class_confs(), 1);
|
||||||
.to_processor()?
|
let find_contours = config.find_contours;
|
||||||
.with_image_width(width as _)
|
let kind = match config.sam_kind {
|
||||||
.with_image_height(height as _);
|
|
||||||
|
|
||||||
let conf = DynConf::new(options_encoder.class_confs(), 1);
|
|
||||||
let find_contours = options_encoder.find_contours;
|
|
||||||
let kind = match options_encoder.sam_kind {
|
|
||||||
Some(x) => x,
|
Some(x) => x,
|
||||||
None => anyhow::bail!("Error: no clear `SamKind` specified."),
|
None => anyhow::bail!("Error: no clear `SamKind` specified."),
|
||||||
};
|
};
|
||||||
let use_low_res_mask = match kind {
|
let use_low_res_mask = match kind {
|
||||||
SamKind::Sam | SamKind::MobileSam | SamKind::SamHq => {
|
SamKind::Sam | SamKind::MobileSam | SamKind::SamHq => {
|
||||||
options_encoder.low_res_mask.unwrap_or(false)
|
config.sam_low_res_mask.unwrap_or(false)
|
||||||
}
|
}
|
||||||
SamKind::EdgeSam | SamKind::Sam2 => true,
|
SamKind::EdgeSam | SamKind::Sam2 => true,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
|
.with_image_width(width as _)
|
||||||
|
.with_image_height(height as _);
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
encoder,
|
encoder,
|
||||||
decoder,
|
decoder,
|
||||||
|
@ -1,50 +1,28 @@
|
|||||||
use crate::Options;
|
use crate::ModelConfig;
|
||||||
|
|
||||||
/// Model configuration for `SAM2.1`
|
/// Model configuration for `SAM2.1`
|
||||||
impl Options {
|
impl ModelConfig {
|
||||||
pub fn sam2_encoder() -> Self {
|
pub fn sam2_1_tiny() -> Self {
|
||||||
Self::sam()
|
Self::sam()
|
||||||
.with_model_ixx(0, 2, 1024.into())
|
.with_encoder_file("sam2.1-hiera-tiny-encoder.onnx")
|
||||||
.with_model_ixx(0, 3, 1024.into())
|
.with_decoder_file("sam2.1-hiera-tiny-decoder.onnx")
|
||||||
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
|
||||||
.with_resize_filter("Bilinear")
|
|
||||||
.with_image_mean(&[0.485, 0.456, 0.406])
|
|
||||||
.with_image_std(&[0.229, 0.224, 0.225])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam2_decoder() -> Self {
|
pub fn sam2_1_small() -> Self {
|
||||||
Self::sam()
|
Self::sam()
|
||||||
|
.with_encoder_file("sam2.1-hiera-small-encoder.onnx")
|
||||||
|
.with_decoder_file("sam2.1-hiera-small-decoder.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam2_1_tiny_encoder() -> Self {
|
pub fn sam2_1_base_plus() -> Self {
|
||||||
Self::sam2_encoder().with_model_file("sam2.1-hiera-tiny-encoder.onnx")
|
Self::sam()
|
||||||
|
.with_encoder_file("sam2.1-hiera-base-plus-encoder.onnx")
|
||||||
|
.with_decoder_file("sam2.1-hiera-base-plus-decoder.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sam2_1_tiny_decoder() -> Self {
|
pub fn sam2_1_large() -> Self {
|
||||||
Self::sam2_decoder().with_model_file("sam2.1-hiera-tiny-decoder.onnx")
|
Self::sam()
|
||||||
}
|
.with_encoder_file("sam2.1-hiera-large-encoder.onnx")
|
||||||
|
.with_decoder_file("sam2.1-hiera-large-decoder.onnx")
|
||||||
pub fn sam2_1_small_encoder() -> Self {
|
|
||||||
Self::sam2_encoder().with_model_file("sam2.1-hiera-small-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sam2_1_small_decoder() -> Self {
|
|
||||||
Self::sam2_decoder().with_model_file("sam2.1-hiera-small-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sam2_1_base_plus_encoder() -> Self {
|
|
||||||
Self::sam2_encoder().with_model_file("sam2.1-hiera-base-plus-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sam2_1_base_plus_decoder() -> Self {
|
|
||||||
Self::sam2_decoder().with_model_file("sam2.1-hiera-base-plus-decoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sam2_1_large_encoder() -> Self {
|
|
||||||
Self::sam2_encoder().with_model_file("sam2.1-hiera-large-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sam2_1_large_decoder() -> Self {
|
|
||||||
Self::sam2_decoder().with_model_file("sam2.1-hiera-large-decoder.onnx")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ use anyhow::Result;
|
|||||||
use ndarray::{s, Axis};
|
use ndarray::{s, Axis};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
elapsed, DynConf, Engine, Image, Mask, Ops, Options, Processor, SamPrompt, Ts, Xs, X, Y,
|
elapsed, DynConf, Engine, Image, Mask, ModelConfig, Ops, Processor, SamPrompt, Ts, Xs, X, Y,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
@ -20,9 +20,9 @@ pub struct SAM2 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl SAM2 {
|
impl SAM2 {
|
||||||
pub fn new(options_encoder: Options, options_decoder: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let encoder = options_encoder.to_engine()?;
|
let encoder = Engine::try_from_config(&config.encoder)?;
|
||||||
let decoder = options_decoder.to_engine()?;
|
let decoder = Engine::try_from_config(&config.decoder)?;
|
||||||
let (batch, height, width) = (
|
let (batch, height, width) = (
|
||||||
encoder.batch().opt(),
|
encoder.batch().opt(),
|
||||||
encoder.try_height().unwrap_or(&1024.into()).opt(),
|
encoder.try_height().unwrap_or(&1024.into()).opt(),
|
||||||
@ -30,11 +30,11 @@ impl SAM2 {
|
|||||||
);
|
);
|
||||||
let ts = Ts::merge(&[encoder.ts(), decoder.ts()]);
|
let ts = Ts::merge(&[encoder.ts(), decoder.ts()]);
|
||||||
let spec = encoder.spec().to_owned();
|
let spec = encoder.spec().to_owned();
|
||||||
let processor = options_encoder
|
|
||||||
.to_processor()?
|
let conf = DynConf::new(config.class_confs(), 1);
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
let conf = DynConf::new(options_encoder.class_confs(), 1);
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
encoder,
|
encoder,
|
||||||
|
@ -1,15 +1,14 @@
|
|||||||
use crate::NAMES_BODY_PARTS_28;
|
use crate::NAMES_BODY_PARTS_28;
|
||||||
|
|
||||||
/// Model configuration for `Sapiens`
|
/// Model configuration for `Sapiens`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn sapiens() -> Self {
|
pub fn sapiens() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("sapiens")
|
.with_name("sapiens")
|
||||||
.with_model_ixx(0, 0, 1.into())
|
.with_model_ixx(0, 0, 1.into())
|
||||||
.with_model_ixx(0, 2, 1024.into())
|
.with_model_ixx(0, 2, 1024.into())
|
||||||
.with_model_ixx(0, 3, 768.into())
|
.with_model_ixx(0, 3, 768.into())
|
||||||
.with_resize_mode(crate::ResizeMode::FitExact)
|
.with_resize_mode(crate::ResizeMode::FitExact)
|
||||||
.with_resize_filter("Bilinear")
|
|
||||||
.with_image_mean(&[123.5, 116.5, 103.5])
|
.with_image_mean(&[123.5, 116.5, 103.5])
|
||||||
.with_image_std(&[58.5, 57.0, 57.5])
|
.with_image_std(&[58.5, 57.0, 57.5])
|
||||||
.with_normalize(false)
|
.with_normalize(false)
|
||||||
@ -17,31 +16,11 @@ impl crate::Options {
|
|||||||
|
|
||||||
pub fn sapiens_body_part_segmentation() -> Self {
|
pub fn sapiens_body_part_segmentation() -> Self {
|
||||||
Self::sapiens()
|
Self::sapiens()
|
||||||
.with_model_task(crate::Task::InstanceSegmentation)
|
.with_task(crate::Task::InstanceSegmentation)
|
||||||
.with_class_names(&NAMES_BODY_PARTS_28)
|
.with_class_names(&NAMES_BODY_PARTS_28)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn sapiens_seg_0_3b() -> Self {
|
pub fn sapiens_seg_0_3b() -> Self {
|
||||||
Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b.onnx")
|
Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn sapiens_seg_0_3b_uint8() -> Self {
|
|
||||||
// Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-uint8.onnx")
|
|
||||||
// }
|
|
||||||
|
|
||||||
// pub fn sapiens_seg_0_3b_fp16() -> Self {
|
|
||||||
// Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-fp16.onnx")
|
|
||||||
// }
|
|
||||||
|
|
||||||
// pub fn sapiens_seg_0_3b_bnb4() -> Self {
|
|
||||||
// Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-bnb4.onnx")
|
|
||||||
// }
|
|
||||||
|
|
||||||
// pub fn sapiens_seg_0_3b_q4f16() -> Self {
|
|
||||||
// Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-q4f16.onnx")
|
|
||||||
// }
|
|
||||||
|
|
||||||
// pub fn sapiens_seg_0_6b_fp16() -> Self {
|
|
||||||
// Self::sapiens_body_part_segmentation().with_model_file("seg-0.6b-fp16.onnx")
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::{s, Array2, Axis};
|
use ndarray::{s, Array2, Axis};
|
||||||
|
|
||||||
use crate::{elapsed, Engine, Image, Mask, Ops, Options, Polygon, Processor, Task, Ts, Xs, Y};
|
use crate::{elapsed, Engine, Image, Mask, ModelConfig, Ops, Polygon, Processor, Task, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct Sapiens {
|
pub struct Sapiens {
|
||||||
@ -18,8 +18,8 @@ pub struct Sapiens {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Sapiens {
|
impl Sapiens {
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let engine = options.to_engine()?;
|
let engine = Engine::try_from_config(&config.model)?;
|
||||||
let spec = engine.spec().to_string();
|
let spec = engine.spec().to_string();
|
||||||
let (batch, height, width, ts) = (
|
let (batch, height, width, ts) = (
|
||||||
engine.batch().opt(),
|
engine.batch().opt(),
|
||||||
@ -27,12 +27,12 @@ impl Sapiens {
|
|||||||
engine.try_width().unwrap_or(&768.into()).opt(),
|
engine.try_width().unwrap_or(&768.into()).opt(),
|
||||||
engine.ts().clone(),
|
engine.ts().clone(),
|
||||||
);
|
);
|
||||||
let processor = options
|
|
||||||
.to_processor()?
|
let task = config.task.expect("No sapiens task specified.");
|
||||||
|
let names_body = config.class_names;
|
||||||
|
let processor = Processor::try_from_config(&config.processor)?
|
||||||
.with_image_width(width as _)
|
.with_image_width(width as _)
|
||||||
.with_image_height(height as _);
|
.with_image_height(height as _);
|
||||||
let task = options.model_task.expect("No sapiens task specified.");
|
|
||||||
let names_body = options.class_names;
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
engine,
|
engine,
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
/// Model configuration for `SLANet`
|
/// Model configuration for `SLANet`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn slanet() -> Self {
|
pub fn slanet() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_model_name("slanet")
|
.with_name("slanet")
|
||||||
.with_model_ixx(0, 0, (1, 1, 8).into())
|
.with_model_ixx(0, 0, (1, 1, 8).into())
|
||||||
|
.with_model_ixx(0, 1, 3.into())
|
||||||
.with_model_ixx(0, 2, (320, 488, 488).into())
|
.with_model_ixx(0, 2, (320, 488, 488).into())
|
||||||
.with_model_ixx(0, 3, (320, 488, 488).into())
|
.with_model_ixx(0, 3, (320, 488, 488).into())
|
||||||
.with_image_mean(&[0.485, 0.456, 0.406])
|
.with_image_mean(&[0.485, 0.456, 0.406])
|
||||||
.with_image_std(&[0.229, 0.224, 0.225])
|
.with_image_std(&[0.229, 0.224, 0.225])
|
||||||
.with_normalize(true)
|
|
||||||
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
.with_resize_mode(crate::ResizeMode::FitAdaptive)
|
||||||
.with_padding_value(0)
|
.with_padding_value(0)
|
||||||
.with_unsigned(true)
|
.with_unsigned(true)
|
||||||
@ -17,6 +17,6 @@ impl crate::Options {
|
|||||||
pub fn slanet_lcnet_v2_mobile_ch() -> Self {
|
pub fn slanet_lcnet_v2_mobile_ch() -> Self {
|
||||||
Self::slanet()
|
Self::slanet()
|
||||||
.with_model_file("v2-mobile-ch.onnx")
|
.with_model_file("v2-mobile-ch.onnx")
|
||||||
.with_vocab_txt("vocab-sla-v2.txt")
|
.with_vocab_txt("slanet/vocab-sla-v2.txt")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use aksr::Builder;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use ndarray::{s, Axis};
|
use ndarray::{s, Axis};
|
||||||
|
|
||||||
use crate::{elapsed, models::BaseModelVisual, Image, Keypoint, Options, Ts, Xs, Y};
|
use crate::{elapsed, models::BaseModelVisual, Image, Keypoint, ModelConfig, Ts, Xs, Y};
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
pub struct SLANet {
|
pub struct SLANet {
|
||||||
@ -19,8 +19,8 @@ impl SLANet {
|
|||||||
self.ts.summary();
|
self.ts.summary();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(options: Options) -> Result<Self> {
|
pub fn new(config: ModelConfig) -> Result<Self> {
|
||||||
let base = BaseModelVisual::new(options)?;
|
let base = BaseModelVisual::new(config)?;
|
||||||
let spec = base.engine().spec().to_owned();
|
let spec = base.engine().spec().to_owned();
|
||||||
let sos = 0;
|
let sos = 0;
|
||||||
let eos = base.processor().vocab().len() - 1;
|
let eos = base.processor().vocab().len() - 1;
|
||||||
|
@ -1,58 +1,28 @@
|
|||||||
/// Model configuration for `SmolVLM`
|
/// Model configuration for `SmolVLM`
|
||||||
impl crate::Options {
|
impl crate::ModelConfig {
|
||||||
pub fn smolvlm() -> Self {
|
pub fn smolvlm() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
.with_batch_size(1)
|
.with_name("smolvlm")
|
||||||
.with_model_name("smolvlm")
|
.with_batch_size_all(1)
|
||||||
.with_model_num_dry_run(3)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn smolvlm_vision() -> Self {
|
|
||||||
Self::smolvlm()
|
|
||||||
.with_model_kind(crate::Kind::Vision)
|
|
||||||
.with_image_mean(&[0.5, 0.5, 0.5])
|
.with_image_mean(&[0.5, 0.5, 0.5])
|
||||||
.with_image_std(&[0.5, 0.5, 0.5])
|
.with_image_std(&[0.5, 0.5, 0.5])
|
||||||
.with_resize_filter("lanczos3")
|
.with_resize_filter("lanczos3")
|
||||||
.with_normalize(true)
|
.with_tokenizer_file("smolvlm/tokenizer.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn smolvlm_text() -> Self {
|
pub fn smolvlm_256m() -> Self {
|
||||||
Self::smolvlm().with_model_kind(crate::Kind::Language)
|
Self::smolvlm()
|
||||||
|
.with_scale(crate::Scale::Million(256.))
|
||||||
|
.with_visual_file("256m-vision-encoder.onnx")
|
||||||
|
.with_textual_file("256m-embed-tokens.onnx")
|
||||||
|
.with_textual_decoder_file("256m-decoder-model-merged.onnx")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn smolvlm_vision_256m() -> Self {
|
pub fn smolvlm_500m() -> Self {
|
||||||
Self::smolvlm_vision()
|
Self::smolvlm()
|
||||||
.with_model_scale(crate::Scale::Million(256.))
|
.with_scale(crate::Scale::Million(500.))
|
||||||
.with_model_file("256m-vision-encoder.onnx")
|
.with_visual_file("500m-vision-encoder.onnx")
|
||||||
}
|
.with_textual_file("500m-embed-tokens.onnx")
|
||||||
|
.with_textual_decoder_file("500m-decoder-model-merged.onnx")
|
||||||
pub fn smolvlm_text_embed_256m() -> Self {
|
|
||||||
Self::smolvlm_text()
|
|
||||||
.with_model_scale(crate::Scale::Million(256.))
|
|
||||||
.with_model_file("256m-embed-tokens.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn smolvlm_decoder_256m() -> Self {
|
|
||||||
Self::smolvlm_text()
|
|
||||||
.with_model_scale(crate::Scale::Million(256.))
|
|
||||||
.with_model_file("256m-decoder-model-merged.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn smolvlm_vision_500m() -> Self {
|
|
||||||
Self::smolvlm_vision()
|
|
||||||
.with_model_scale(crate::Scale::Million(500.))
|
|
||||||
.with_model_file("500m-vision-encoder.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn smolvlm_text_embed_500m() -> Self {
|
|
||||||
Self::smolvlm_text()
|
|
||||||
.with_model_scale(crate::Scale::Million(500.))
|
|
||||||
.with_model_file("500m-embed-tokens.onnx")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn smolvlm_decoder_500m() -> Self {
|
|
||||||
Self::smolvlm_text()
|
|
||||||
.with_model_scale(crate::Scale::Million(500.))
|
|
||||||
.with_model_file("500m-decoder-model-merged.onnx")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user