diff --git a/Cargo.toml b/Cargo.toml
index 1cd2b59..b4146ae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
[package]
name = "usls"
edition = "2021"
-version = "0.1.0-beta.1"
+version = "0.1.0-beta.2"
rust-version = "1.82"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls"
@@ -44,6 +44,7 @@ ort = { version = "2.0.0-rc.9", default-features = false, optional = true , feat
"half"
]}
tokenizers = { version = "0.21.1" }
+paste = "1.0.15"
[build-dependencies]
prost-build = "0.13.5"
diff --git a/README.md b/README.md
index f03afd1..5f7f086 100644
--- a/README.md
+++ b/README.md
@@ -116,7 +116,8 @@
| [Moondream2](https://github.com/vikhyat/moondream/tree/main) | Open-Set Object Detection
Open-Set Keypoints Detection
Image Caption
Visual Question Answering | [demo](examples/moondream2) | ✅ | ✅ | ✅ | | |
| [OWLv2](https://huggingface.co/google/owlv2-base-patch16-ensemble) | Open-Set Object Detection | [demo](examples/owlv2) | ✅ | ✅ | ✅ | | |
| [SmolVLM(256M, 500M)](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct) | Visual Question Answering | [demo](examples/smolvlm) | ✅ | ✅ | ✅ | | |
-| [RMBG(1.4, 2.0)](https://huggingface.co/briaai/RMBG-2.0) | Image Segmentation Answering | [demo](examples/rmbg) | ✅ | ✅ | ✅ | | |
+| [RMBG(1.4, 2.0)](https://huggingface.co/briaai/RMBG-2.0) | Image Segmentation
Background Removal | [demo](examples/rmbg) | ✅ | ✅ | ✅ | | |
+| [BEN2](https://huggingface.co/PramaLLC/BEN2) | Image Segmentation
Background Removal | [demo](examples/rmbg) | ✅ | ✅ | ✅ | | |
diff --git a/examples/ben2/main.rs b/examples/ben2/main.rs
index 9096318..8d44522 100644
--- a/examples/ben2/main.rs
+++ b/examples/ben2/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::RMBG, Annotator, DataLoader, Options};
+use usls::{models::RMBG, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -20,11 +20,11 @@ fn main() -> anyhow::Result<()> {
let args: Args = argh::from_env();
// build model
- let options = Options::ben2_base()
+ let config = Config::ben2_base()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
- let mut model = RMBG::new(options)?;
+ let mut model = RMBG::new(config)?;
// load image
let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
diff --git a/examples/blip/main.rs b/examples/blip/main.rs
index a04bfb8..6fd9456 100644
--- a/examples/blip/main.rs
+++ b/examples/blip/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::Blip, DataLoader, Options};
+use usls::{models::Blip, Config, DataLoader};
#[derive(argh::FromArgs)]
/// BLIP Example
@@ -20,13 +20,10 @@ fn main() -> anyhow::Result<()> {
let args: Args = argh::from_env();
// build model
- let options_visual = Options::blip_v1_base_caption_visual()
- .with_model_device(args.device.as_str().try_into()?)
+ let config = Config::blip_v1_base_caption()
+ .with_device_all(args.device.as_str().try_into()?)
.commit()?;
- let options_textual = Options::blip_v1_base_caption_textual()
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?;
- let mut model = Blip::new(options_visual, options_textual)?;
+ let mut model = Blip::new(config)?;
// image caption
let xs = DataLoader::try_read_n(&args.source)?;
diff --git a/examples/classifier/main.rs b/examples/classifier/main.rs
index fe9536e..d7bc9c1 100644
--- a/examples/classifier/main.rs
+++ b/examples/classifier/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
+use usls::{models::ImageClassifier, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -12,7 +12,7 @@ struct Args {
device: String,
/// model name
- #[argh(option, default = "String::from(\"beit\")")]
+ #[argh(option, default = "String::from(\"mobileone\")")]
model: String,
/// source image
@@ -36,20 +36,20 @@ fn main() -> anyhow::Result<()> {
let args: Args = argh::from_env();
// build model
- let options = match args.model.to_lowercase().as_str() {
- "beit" => Options::beit_base(),
- "convnext" => Options::convnext_v2_atto(),
- "deit" => Options::deit_tiny_distill(),
- "fastvit" => Options::fastvit_t8_distill(),
- "mobileone" => Options::mobileone_s0(),
+ let config = match args.model.to_lowercase().as_str() {
+ "beit" => Config::beit_base(),
+ "convnext" => Config::convnext_v2_atto(),
+ "deit" => Config::deit_tiny_distill(),
+ "fastvit" => Config::fastvit_t8_distill(),
+ "mobileone" => Config::mobileone_s0(),
_ => anyhow::bail!("Unsupported model: {}", args.model),
};
- let options = options
+ let config = config
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
- let mut model = ImageClassifier::try_from(options)?;
+ let mut model = ImageClassifier::try_from(config)?;
// load images
let xs = DataLoader::try_read_n(&args.source)?;
diff --git a/examples/clip/main.rs b/examples/clip/main.rs
index a600913..90d1055 100644
--- a/examples/clip/main.rs
+++ b/examples/clip/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::Clip, DataLoader, Ops, Options};
+use usls::{models::Clip, Config, DataLoader, Ops};
#[derive(argh::FromArgs)]
/// CLIP Example
@@ -14,18 +14,13 @@ fn main() -> Result<()> {
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
-
let args: Args = argh::from_env();
+
// build model
- let options_visual = Options::jina_clip_v1_visual()
- // clip_vit_b32_visual()
- .with_model_device(args.device.as_str().try_into()?)
+ let config = Config::jina_clip_v1()
+ .with_device_all(args.device.as_str().try_into()?)
.commit()?;
- let options_textual = Options::jina_clip_v1_textual()
- // clip_vit_b32_textual()
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?;
- let mut model = Clip::new(options_visual, options_textual)?;
+ let mut model = Clip::new(config)?;
// texts
let texts = vec![
diff --git a/examples/d-fine/main.rs b/examples/d-fine/main.rs
index b74e3dc..b7c1548 100644
--- a/examples/d-fine/main.rs
+++ b/examples/d-fine/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::RTDETR, Annotator, DataLoader, Options};
+use usls::{models::RTDETR, Annotator, Config, DataLoader};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -7,9 +7,8 @@ fn main() -> Result<()> {
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
- // options
- let options = Options::d_fine_n_coco().commit()?;
- let mut model = RTDETR::new(options)?;
+ // config
+ let mut model = RTDETR::new(Config::d_fine_n_coco().commit()?)?;
// load
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
diff --git a/examples/db/main.rs b/examples/db/main.rs
index e205c35..c4e6373 100644
--- a/examples/db/main.rs
+++ b/examples/db/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::DB, Annotator, DataLoader, Options, Style};
+use usls::{models::DB, Annotator, Config, DataLoader, Style};
#[derive(argh::FromArgs)]
/// Example
@@ -41,15 +41,13 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let options = match &args.model {
- Some(m) => Options::db().with_model_file(m),
- None => Options::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
- };
- let mut model = DB::new(
- options
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- )?;
+ let config = match &args.model {
+ Some(m) => Config::db().with_model_file(m),
+ None => Config::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
+ }
+ .with_device_all(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = DB::new(config)?;
// load image
let xs = DataLoader::try_read_n(&[
diff --git a/examples/deim/main.rs b/examples/deim/main.rs
index b253394..980f518 100644
--- a/examples/deim/main.rs
+++ b/examples/deim/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::RTDETR, Annotator, DataLoader, Options};
+use usls::{models::RTDETR, Annotator, Config, DataLoader};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -7,9 +7,8 @@ fn main() -> Result<()> {
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
- // options
- let options = Options::deim_dfine_s_coco().commit()?;
- let mut model = RTDETR::new(options)?;
+ // config
+ let mut model = RTDETR::new(Config::deim_dfine_s_coco().commit()?)?;
// load
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
diff --git a/examples/depth-anything/main.rs b/examples/depth-anything/main.rs
index f5b1d3e..2523063 100644
--- a/examples/depth-anything/main.rs
+++ b/examples/depth-anything/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::DepthAnything, Annotator, DataLoader, Options, Style};
+use usls::{models::DepthAnything, Annotator, Config, DataLoader, Style};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -8,8 +8,7 @@ fn main() -> Result<()> {
.init();
// build model
- let options = Options::depth_anything_v2_small().commit()?;
- let mut model = DepthAnything::new(options)?;
+ let mut model = DepthAnything::new(Config::depth_anything_v2_small().commit()?)?;
// load
let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
diff --git a/examples/depth-pro/main.rs b/examples/depth-pro/main.rs
index 929fa53..84002e6 100644
--- a/examples/depth-pro/main.rs
+++ b/examples/depth-pro/main.rs
@@ -1,6 +1,6 @@
use anyhow::Result;
use usls::DataLoader;
-use usls::{models::DepthPro, Annotator, Options, Style};
+use usls::{models::DepthPro, Annotator, Config, Style};
#[derive(argh::FromArgs)]
/// Example
@@ -23,11 +23,12 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// model
- let options = Options::depth_pro()
+ let config = Config::depth_pro()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
- let mut model = DepthPro::new(options)?;
+
+ let mut model = DepthPro::new(config)?;
// load
let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
diff --git a/examples/dinov2/main.rs b/examples/dinov2/main.rs
index e39427b..3c23a91 100644
--- a/examples/dinov2/main.rs
+++ b/examples/dinov2/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::DINOv2, DataLoader, Options};
+use usls::{models::DINOv2, Config, DataLoader};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -11,8 +11,10 @@ fn main() -> Result<()> {
let xs = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/bus.jpg"])?;
// model
- let options = Options::dinov2_small().with_batch_size(xs.len()).commit()?;
- let mut model = DINOv2::new(options)?;
+ let config = Config::dinov2_small()
+ .with_batch_size_all(xs.len())
+ .commit()?;
+ let mut model = DINOv2::new(config)?;
// encode images
let y = model.encode_images(&xs)?;
diff --git a/examples/doclayout-yolo/main.rs b/examples/doclayout-yolo/main.rs
index b6ab0d8..52eb13d 100644
--- a/examples/doclayout-yolo/main.rs
+++ b/examples/doclayout-yolo/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options};
+use usls::{models::YOLO, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -18,7 +18,7 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let config = Options::doclayout_yolo_docstructbench()
+ let config = Config::doclayout_yolo_docstructbench()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = YOLO::new(config)?;
diff --git a/examples/fast/main.rs b/examples/fast/main.rs
index 9304c9d..875b466 100644
--- a/examples/fast/main.rs
+++ b/examples/fast/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::DB, Annotator, DataLoader, Options, Scale, Style};
+use usls::{models::DB, Annotator, Config, DataLoader, Scale, Style};
#[derive(argh::FromArgs)]
/// Example
@@ -26,16 +26,16 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let options = match args.scale.as_str().try_into()? {
- Scale::T => Options::fast_tiny(),
- Scale::S => Options::fast_small(),
- Scale::B => Options::fast_base(),
+ let config = match args.scale.as_str().try_into()? {
+ Scale::T => Config::fast_tiny(),
+ Scale::S => Config::fast_small(),
+ Scale::B => Config::fast_base(),
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
};
let mut model = DB::new(
- options
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
+ config
+ .with_dtype_all(args.dtype.as_str().try_into()?)
+ .with_device_all(args.device.as_str().try_into()?)
.commit()?,
)?;
diff --git a/examples/fastsam/main.rs b/examples/fastsam/main.rs
index 29e2c1e..f5aa616 100644
--- a/examples/fastsam/main.rs
+++ b/examples/fastsam/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options};
+use usls::{models::YOLO, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -22,7 +22,7 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let config = Options::fastsam_s()
+ let config = Config::fastsam_s()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
@@ -45,7 +45,7 @@ fn main() -> Result<()> {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
- .base_dir_with_subs(&["runs", "FastSAM"])?
+ .base_dir_with_subs(&["runs", model.spec()])?
.join(usls::timestamp(None))
.display(),
))?;
diff --git a/examples/florence2/README.md b/examples/florence2/README.md
index 6078515..377cb5d 100644
--- a/examples/florence2/README.md
+++ b/examples/florence2/README.md
@@ -1,7 +1,7 @@
## Quick Start
```shell
-cargo run -r -F cuda --example florence2 -- --device cuda --scale base --dtype fp16
+cargo run -r -F cuda --example florence2 -- --device cuda --dtype fp16
```
diff --git a/examples/florence2/main.rs b/examples/florence2/main.rs
index 4d6bde0..3ab421f 100644
--- a/examples/florence2/main.rs
+++ b/examples/florence2/main.rs
@@ -1,20 +1,16 @@
use anyhow::Result;
-use usls::{models::Florence2, Annotator, DataLoader, Options, Scale, Style, Task};
+use usls::{models::Florence2, Annotator, Config, DataLoader, Style, Task};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
- #[argh(option, default = "String::from(\"auto\")")]
+ #[argh(option, default = "String::from(\"fp16\")")]
dtype: String,
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
-
- /// scale
- #[argh(option, default = "String::from(\"base\")")]
- scale: String,
}
fn main() -> Result<()> {
@@ -29,51 +25,12 @@ fn main() -> Result<()> {
let xs = DataLoader::try_read_n(&["images/green-car.jpg", "assets/bus.jpg"])?;
// build model
- let (
- options_vision_encoder,
- options_text_embed,
- options_encoder,
- options_decoder,
- options_decoder_merged,
- ) = match args.scale.as_str().try_into()? {
- Scale::B => (
- Options::florence2_visual_encoder_base(),
- Options::florence2_textual_embed_base(),
- Options::florence2_texual_encoder_base(),
- Options::florence2_texual_decoder_base(),
- Options::florence2_texual_decoder_merged_base(),
- ),
- Scale::L => todo!(),
- _ => anyhow::bail!("Unsupported Florence2 scale."),
- };
-
- let mut model = Florence2::new(
- options_vision_encoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- options_text_embed
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- options_encoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- options_decoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- options_decoder_merged
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- )?;
+ let config = Config::florence2_base()
+ .with_dtype_all(args.dtype.as_str().try_into()?)
+ .with_device_all(args.device.as_str().try_into()?)
+ .with_batch_size_all(xs.len())
+ .commit()?;
+ let mut model = Florence2::new(config)?;
// tasks
let tasks = [
diff --git a/examples/grounding-dino/main.rs b/examples/grounding-dino/main.rs
index e80083f..1099963 100644
--- a/examples/grounding-dino/main.rs
+++ b/examples/grounding-dino/main.rs
@@ -1,11 +1,11 @@
use anyhow::Result;
-use usls::{models::GroundingDINO, Annotator, DataLoader, Options};
+use usls::{models::GroundingDINO, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
- #[argh(option, default = "String::from(\"auto\")")]
+ #[argh(option, default = "String::from(\"fp16\")")]
dtype: String,
/// device
@@ -45,7 +45,7 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
- let options = Options::grounding_dino_tiny()
+ let config = Config::grounding_dino_tiny()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::>())
@@ -53,7 +53,7 @@ fn main() -> Result<()> {
.with_text_confs(&[0.25])
.commit()?;
- let mut model = GroundingDINO::new(options)?;
+ let mut model = GroundingDINO::new(config)?;
// load images
let xs = DataLoader::try_read_n(&args.source)?;
diff --git a/examples/linknet/main.rs b/examples/linknet/main.rs
index 3dfd21a..b8a4523 100644
--- a/examples/linknet/main.rs
+++ b/examples/linknet/main.rs
@@ -1,6 +1,6 @@
use anyhow::Result;
use usls::DataLoader;
-use usls::{models::DB, Annotator, Options, Scale, Style};
+use usls::{models::DB, Annotator, Config, Scale, Style};
#[derive(argh::FromArgs)]
/// Example
@@ -27,14 +27,14 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let options = match args.scale.as_str().try_into()? {
- Scale::T => Options::linknet_r18(),
- Scale::S => Options::linknet_r34(),
- Scale::B => Options::linknet_r50(),
+ let config = match args.scale.as_str().try_into()? {
+ Scale::T => Config::linknet_r18(),
+ Scale::S => Config::linknet_r34(),
+ Scale::B => Config::linknet_r50(),
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
};
let mut model = DB::new(
- options
+ config
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?,
diff --git a/examples/modnet/main.rs b/examples/modnet/main.rs
index b0ee231..0c09036 100644
--- a/examples/modnet/main.rs
+++ b/examples/modnet/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::MODNet, Annotator, DataLoader, Options};
+use usls::{models::MODNet, Annotator, Config, DataLoader};
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
@@ -7,8 +7,7 @@ fn main() -> anyhow::Result<()> {
.init();
// build model
- let options = Options::modnet_photographic().commit()?;
- let mut model = MODNet::new(options)?;
+ let mut model = MODNet::new(Config::modnet_photographic().commit()?)?;
// load image
let xs = DataLoader::try_read_n(&["images/liuyifei.png"])?;
diff --git a/examples/moondream2/main.rs b/examples/moondream2/main.rs
index 963197a..f23e9cf 100644
--- a/examples/moondream2/main.rs
+++ b/examples/moondream2/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::Moondream2, Annotator, DataLoader, Options, Scale, Task};
+use usls::{models::Moondream2, Annotator, Config, DataLoader, Scale, Task};
#[derive(argh::FromArgs)]
/// Example
@@ -39,81 +39,16 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let (
- options_vision_encoder,
- options_vision_projection,
- options_text_decoder,
- options_text_encoder,
- options_coord_decoder,
- options_coord_encoder,
- options_size_decoder,
- options_size_encoder,
- ) = match args.scale.as_str().try_into()? {
- Scale::Billion(2.) => (
- Options::moondream2_2b_vision_encoder(),
- Options::moondream2_2b_vision_projection(),
- Options::moondream2_2b_text_decoder(),
- Options::moondream2_2b_text_encoder(),
- Options::moondream2_2b_coord_decoder(),
- Options::moondream2_2b_coord_encoder(),
- Options::moondream2_2b_size_decoder(),
- Options::moondream2_2b_size_encoder(),
- ),
- Scale::Billion(0.5) => (
- Options::moondream2_0_5b_vision_encoder(),
- Options::moondream2_0_5b_vision_projection(),
- Options::moondream2_0_5b_text_decoder(),
- Options::moondream2_0_5b_text_encoder(),
- Options::moondream2_0_5b_coord_decoder(),
- Options::moondream2_0_5b_coord_encoder(),
- Options::moondream2_0_5b_size_decoder(),
- Options::moondream2_0_5b_size_encoder(),
- ),
+ let config = match args.scale.as_str().try_into()? {
+ Scale::Billion(0.5) => Config::moondream2_0_5b(),
+ Scale::Billion(2.) => Config::moondream2_2b(),
_ => unimplemented!(),
- };
+ }
+ .with_dtype_all(args.dtype.as_str().try_into()?)
+ .with_device_all(args.device.as_str().try_into()?)
+ .commit()?;
- let mut model = Moondream2::new(
- options_vision_encoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- options_vision_projection
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- options_text_encoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- options_text_decoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- Some(
- options_coord_encoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- ),
- Some(
- options_coord_decoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- ),
- Some(
- options_size_encoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- ),
- Some(
- options_size_decoder
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- ),
- )?;
+ let mut model = Moondream2::new(config)?;
// load images
let xs = DataLoader::try_read_n(&args.source)?;
@@ -142,13 +77,6 @@ fn main() -> Result<()> {
}
Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => {
println!("{:?}", ys);
- // let annotator = Annotator::default()
- // .with_bboxes_thickness(4)
- // .without_bboxes_conf(true)
- // .with_keypoints_radius(6)
- // .with_keypoints_name(true)
- // .with_saveout("moondream2");
- // annotator.annotate(&xs, &ys);
// annotate
let annotator = Annotator::default()
diff --git a/examples/owlv2/main.rs b/examples/owlv2/main.rs
index f717e00..0037d50 100644
--- a/examples/owlv2/main.rs
+++ b/examples/owlv2/main.rs
@@ -1,6 +1,6 @@
use anyhow::Result;
use usls::DataLoader;
-use usls::{models::OWLv2, Annotator, Options};
+use usls::{models::OWLv2, Annotator, Config};
#[derive(argh::FromArgs)]
/// Example
@@ -46,14 +46,14 @@ fn main() -> Result<()> {
.init();
let args: Args = argh::from_env();
- // options
- let options = Options::owlv2_base_ensemble()
+ // config
+ let config = Config::owlv2_base_ensemble()
// owlv2_base()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
- .with_class_names(&args.labels.iter().map(|x| x.as_str()).collect::>())
+ .with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::>())
.commit()?;
- let mut model = OWLv2::new(options)?;
+ let mut model = OWLv2::new(config)?;
// load
let xs = DataLoader::try_read_n(&args.source)?;
diff --git a/examples/picodet-layout/main.rs b/examples/picodet-layout/main.rs
index baa9bf4..cbae280 100644
--- a/examples/picodet-layout/main.rs
+++ b/examples/picodet-layout/main.rs
@@ -1,6 +1,6 @@
use anyhow::Result;
use usls::DataLoader;
-use usls::{models::PicoDet, Annotator, Options};
+use usls::{models::PicoDet, Annotator, Config};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -8,12 +8,11 @@ fn main() -> Result<()> {
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
- // options
- let options = Options::picodet_layout_1x()
- // picodet_l_layout_3cls()
- // picodet_l_layout_17cls()
- .commit()?;
- let mut model = PicoDet::new(options)?;
+ // config
+ let config = Config::picodet_layout_1x().commit()?;
+ // picodet_l_layout_3cls()
+ // picodet_l_layout_17cls()
+ let mut model = PicoDet::new(config)?;
// load
let xs = DataLoader::try_read_n(&["images/academic.jpg"])?;
diff --git a/examples/rfdetr/main.rs b/examples/rfdetr/main.rs
index e034e45..79b544e 100644
--- a/examples/rfdetr/main.rs
+++ b/examples/rfdetr/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::RFDETR, Annotator, DataLoader, Options};
+use usls::{models::RFDETR, Annotator, Config, DataLoader};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -7,9 +7,8 @@ fn main() -> Result<()> {
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
- // options
- let options = Options::rfdetr_base().commit()?;
- let mut model = RFDETR::new(options)?;
+ // config
+ let mut model = RFDETR::new(Config::rfdetr_base().commit()?)?;
// load
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
diff --git a/examples/rmbg/main.rs b/examples/rmbg/main.rs
index cb34be0..b1456d9 100644
--- a/examples/rmbg/main.rs
+++ b/examples/rmbg/main.rs
@@ -1,10 +1,10 @@
-use usls::{models::RMBG, Annotator, DataLoader, Options};
+use usls::{models::RMBG, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
struct Args {
/// dtype
- #[argh(option, default = "String::from(\"auto\")")]
+ #[argh(option, default = "String::from(\"fp16\")")]
dtype: String,
/// device
@@ -23,18 +23,18 @@ fn main() -> anyhow::Result<()> {
.init();
let args: Args = argh::from_env();
- let options = match args.ver {
- 1.4 => Options::rmbg1_4(),
- 2.0 => Options::rmbg2_0(),
+ let config = match args.ver {
+ 1.4 => Config::rmbg1_4(),
+ 2.0 => Config::rmbg2_0(),
_ => unreachable!("Unsupported version"),
};
// build model
- let options = options
+ let config = config
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
- let mut model = RMBG::new(options)?;
+ let mut model = RMBG::new(config)?;
// load image
let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
diff --git a/examples/rtdetr/main.rs b/examples/rtdetr/main.rs
index 1d3aa5e..1f73704 100644
--- a/examples/rtdetr/main.rs
+++ b/examples/rtdetr/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::RTDETR, Annotator, DataLoader, Options};
+use usls::{models::RTDETR, Annotator, Config, DataLoader};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -7,15 +7,14 @@ fn main() -> Result<()> {
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
- // options
- let options = Options::rtdetr_v2_s_coco()
- // rtdetr_v1_r18vd_coco()
- // rtdetr_v2_ms_coco()
- // rtdetr_v2_m_coco()
- // rtdetr_v2_l_coco()
- // rtdetr_v2_x_coco()
- .commit()?;
- let mut model = RTDETR::new(options)?;
+ // config
+ let config = Config::rtdetr_v2_s_coco().commit()?;
+ // rtdetr_v1_r18vd_coco()
+ // rtdetr_v2_ms_coco()
+ // rtdetr_v2_m_coco()
+ // rtdetr_v2_l_coco()
+ // rtdetr_v2_x_coco()
+ let mut model = RTDETR::new(config)?;
// load
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
diff --git a/examples/rtmo/main.rs b/examples/rtmo/main.rs
index 314e6fc..b68aef1 100644
--- a/examples/rtmo/main.rs
+++ b/examples/rtmo/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::RTMO, Annotator, DataLoader, Options, Style, SKELETON_COCO_19};
+use usls::{models::RTMO, Annotator, Config, DataLoader, Style, SKELETON_COCO_19};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -8,7 +8,7 @@ fn main() -> Result<()> {
.init();
// build model
- let mut model = RTMO::new(Options::rtmo_s().commit()?)?;
+ let mut model = RTMO::new(Config::rtmo_s().commit()?)?;
// load image
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
diff --git a/examples/sam/main.rs b/examples/sam/main.rs
index 21e4fcc..9a698d8 100644
--- a/examples/sam/main.rs
+++ b/examples/sam/main.rs
@@ -1,7 +1,7 @@
use anyhow::Result;
use usls::{
models::{SamKind, SamPrompt, SAM},
- Annotator, DataLoader, Options, Scale,
+ Annotator, Config, DataLoader, Scale,
};
#[derive(argh::FromArgs)]
@@ -28,40 +28,22 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// Build model
- let (options_encoder, options_decoder) = match args.kind.as_str().try_into()? {
- SamKind::Sam => (
- Options::sam_v1_base_encoder(),
- Options::sam_v1_base_decoder(),
- ),
+ let config = match args.kind.as_str().try_into()? {
+ SamKind::Sam => Config::sam_v1_base(),
SamKind::Sam2 => match args.scale.as_str().try_into()? {
- Scale::T => (Options::sam2_tiny_encoder(), Options::sam2_tiny_decoder()),
- Scale::S => (Options::sam2_small_encoder(), Options::sam2_small_decoder()),
- Scale::B => (
- Options::sam2_base_plus_encoder(),
- Options::sam2_base_plus_decoder(),
- ),
+ Scale::T => Config::sam2_tiny(),
+ Scale::S => Config::sam2_small(),
+ Scale::B => Config::sam2_base_plus(),
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
},
+ SamKind::MobileSam => Config::mobile_sam_tiny(),
+ SamKind::SamHq => Config::sam_hq_tiny(),
+ SamKind::EdgeSam => Config::edge_sam_3x(),
+ }
+ .with_device_all(args.device.as_str().try_into()?)
+ .commit()?;
- SamKind::MobileSam => (
- Options::mobile_sam_tiny_encoder(),
- Options::mobile_sam_tiny_decoder(),
- ),
- SamKind::SamHq => (
- Options::sam_hq_tiny_encoder(),
- Options::sam_hq_tiny_decoder(),
- ),
- SamKind::EdgeSam => (
- Options::edge_sam_3x_encoder(),
- Options::edge_sam_3x_decoder(),
- ),
- };
-
- let options_encoder = options_encoder
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?;
- let options_decoder = options_decoder.commit()?;
- let mut model = SAM::new(options_encoder, options_decoder)?;
+ let mut model = SAM::new(config)?;
// Load image
let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
diff --git a/examples/sam2/README.md b/examples/sam2/README.md
index 5a97486..a5f181e 100644
--- a/examples/sam2/README.md
+++ b/examples/sam2/README.md
@@ -1,6 +1,5 @@
## Quick Start
```Shell
-
-cargo run -r -F cuda --example sam -- --device cuda --scale t
+cargo run -r -F cuda --example sam2 -- --device cuda --scale t
```
\ No newline at end of file
diff --git a/examples/sam2/main.rs b/examples/sam2/main.rs
index e8722a4..48eef6e 100644
--- a/examples/sam2/main.rs
+++ b/examples/sam2/main.rs
@@ -1,7 +1,7 @@
use anyhow::Result;
use usls::{
models::{SamPrompt, SAM2},
- Annotator, DataLoader, Options, Scale,
+ Annotator, Config, DataLoader, Scale,
};
#[derive(argh::FromArgs)]
@@ -25,33 +25,16 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// Build model
- let (options_encoder, options_decoder) = match args.scale.as_str().try_into()? {
- Scale::T => (
- Options::sam2_1_tiny_encoder(),
- Options::sam2_1_tiny_decoder(),
- ),
- Scale::S => (
- Options::sam2_1_small_encoder(),
- Options::sam2_1_small_decoder(),
- ),
- Scale::B => (
- Options::sam2_1_base_plus_encoder(),
- Options::sam2_1_base_plus_decoder(),
- ),
- Scale::L => (
- Options::sam2_1_large_encoder(),
- Options::sam2_1_large_decoder(),
- ),
+ let config = match args.scale.as_str().try_into()? {
+ Scale::T => Config::sam2_1_tiny(),
+ Scale::S => Config::sam2_1_small(),
+ Scale::B => Config::sam2_1_base_plus(),
+ Scale::L => Config::sam2_1_large(),
_ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t, l.", args.scale),
- };
-
- let options_encoder = options_encoder
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?;
- let options_decoder = options_decoder
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?;
- let mut model = SAM2::new(options_encoder, options_decoder)?;
+ }
+ .with_device_all(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = SAM2::new(config)?;
// Load image
let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
diff --git a/examples/sapiens/README.md b/examples/sapiens/README.md
index 7699915..82903f5 100644
--- a/examples/sapiens/README.md
+++ b/examples/sapiens/README.md
@@ -1,7 +1,7 @@
## Quick Start
```shell
-cargo run -r -F cuda --example sapiens -- --device cuda
+cargo run -r -F cuda --example sapiens -- --device cuda
```
## Results
diff --git a/examples/sapiens/main.rs b/examples/sapiens/main.rs
index a209e27..caf2d17 100644
--- a/examples/sapiens/main.rs
+++ b/examples/sapiens/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::Sapiens, Annotator, DataLoader, Options};
+use usls::{models::Sapiens, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -17,10 +17,10 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build
- let options = Options::sapiens_seg_0_3b()
+ let config = Config::sapiens_seg_0_3b()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
- let mut model = Sapiens::new(options)?;
+ let mut model = Sapiens::new(config)?;
// load
let xs = DataLoader::try_read_n(&["images/paul-george.jpg"])?;
diff --git a/examples/slanet/main.rs b/examples/slanet/main.rs
index 362671c..71a0fc7 100644
--- a/examples/slanet/main.rs
+++ b/examples/slanet/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::SLANet, Annotator, Color, DataLoader, Options};
+use usls::{models::SLANet, Annotator, Color, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -26,11 +26,11 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let options = Options::slanet_lcnet_v2_mobile_ch()
+ let config = Config::slanet_lcnet_v2_mobile_ch()
.with_model_device(args.device.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?)
.commit()?;
- let mut model = SLANet::new(options)?;
+ let mut model = SLANet::new(config)?;
// load
let xs = DataLoader::try_read_n(&[args.source])?;
diff --git a/examples/smolvlm/main.rs b/examples/smolvlm/main.rs
index 3bc87fb..1f585cf 100644
--- a/examples/smolvlm/main.rs
+++ b/examples/smolvlm/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::SmolVLM, DataLoader, Options, Scale};
+use usls::{models::SmolVLM, Config, DataLoader, Scale};
#[derive(argh::FromArgs)]
/// Example
@@ -29,32 +29,14 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let (options_vision_encoder, options_text_embed, options_decode) =
- match args.scale.as_str().try_into()? {
- Scale::Million(256.) => (
- Options::smolvlm_vision_256m(),
- Options::smolvlm_text_embed_256m(),
- Options::smolvlm_decoder_256m(),
- ),
- Scale::Million(500.) => (
- Options::smolvlm_vision_500m(),
- Options::smolvlm_text_embed_500m(),
- Options::smolvlm_decoder_500m(),
- ),
- _ => unimplemented!(),
- };
-
- let mut model = SmolVLM::new(
- options_vision_encoder
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- options_text_embed
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- options_decode
- .with_model_device(args.device.as_str().try_into()?)
- .commit()?,
- )?;
+ let config = match args.scale.as_str().try_into()? {
+ Scale::Million(256.) => Config::smolvlm_256m(),
+ Scale::Million(500.) => Config::smolvlm_500m(),
+ _ => unimplemented!(),
+ }
+ .with_device_all(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = SmolVLM::new(config)?;
// load images
let xs = DataLoader::try_read_n(&args.source)?;
diff --git a/examples/svtr/main.rs b/examples/svtr/main.rs
index bd92036..779afb7 100644
--- a/examples/svtr/main.rs
+++ b/examples/svtr/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::SVTR, DataLoader, Options};
+use usls::{models::SVTR, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -22,13 +22,13 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let options = Options::ppocr_rec_v4_ch()
+ let config = Config::ppocr_rec_v4_ch()
// ppocr_rec_v4_en()
// repsvtr_ch()
.with_model_device(args.device.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?)
.commit()?;
- let mut model = SVTR::new(options)?;
+ let mut model = SVTR::new(config)?;
// load images
let dl = DataLoader::new("./examples/svtr/images")?
diff --git a/examples/trocr/main.rs b/examples/trocr/main.rs
index f64b72e..8e5fcc8 100644
--- a/examples/trocr/main.rs
+++ b/examples/trocr/main.rs
@@ -1,6 +1,6 @@
use usls::{
models::{TrOCR, TrOCRKind},
- DataLoader, Options, Scale,
+ Config, DataLoader, Scale,
};
#[derive(argh::FromArgs)]
@@ -38,52 +38,22 @@ fn main() -> anyhow::Result<()> {
])?;
// build model
- let (options_encoder, options_decoder, options_decoder_merged) =
- match args.scale.as_str().try_into()? {
- Scale::S => match args.kind.as_str().try_into()? {
- TrOCRKind::Printed => (
- Options::trocr_encoder_small_printed(),
- Options::trocr_decoder_small_printed(),
- Options::trocr_decoder_merged_small_printed(),
- ),
- TrOCRKind::HandWritten => (
- Options::trocr_encoder_small_handwritten(),
- Options::trocr_decoder_small_handwritten(),
- Options::trocr_decoder_merged_small_handwritten(),
- ),
- },
- Scale::B => match args.kind.as_str().try_into()? {
- TrOCRKind::Printed => (
- Options::trocr_encoder_base_printed(),
- Options::trocr_decoder_base_printed(),
- Options::trocr_decoder_merged_base_printed(),
- ),
- TrOCRKind::HandWritten => (
- Options::trocr_encoder_base_handwritten(),
- Options::trocr_decoder_base_handwritten(),
- Options::trocr_decoder_merged_base_handwritten(),
- ),
- },
- x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
- };
+ let config = match args.scale.as_str().try_into()? {
+ Scale::S => match args.kind.as_str().try_into()? {
+ TrOCRKind::Printed => Config::trocr_small_printed(),
+ TrOCRKind::HandWritten => Config::trocr_small_handwritten(),
+ },
+ Scale::B => match args.kind.as_str().try_into()? {
+ TrOCRKind::Printed => Config::trocr_base_printed(),
+ TrOCRKind::HandWritten => Config::trocr_base_handwritten(),
+ },
+ x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
+ }
+ .with_device_all(args.device.as_str().try_into()?)
+ .with_dtype_all(args.dtype.as_str().try_into()?)
+ .commit()?;
- let mut model = TrOCR::new(
- options_encoder
- .with_model_device(args.device.as_str().try_into()?)
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- options_decoder
- .with_model_device(args.device.as_str().try_into()?)
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- options_decoder_merged
- .with_model_device(args.device.as_str().try_into()?)
- .with_model_dtype(args.dtype.as_str().try_into()?)
- .with_batch_size(xs.len())
- .commit()?,
- )?;
+ let mut model = TrOCR::new(config)?;
// inference
let ys = model.forward(&xs)?;
diff --git a/examples/yolov8-rtdetr/README.md b/examples/ultralytics-rtdetr/README.md
similarity index 100%
rename from examples/yolov8-rtdetr/README.md
rename to examples/ultralytics-rtdetr/README.md
diff --git a/examples/yolov8-rtdetr/main.rs b/examples/ultralytics-rtdetr/main.rs
similarity index 87%
rename from examples/yolov8-rtdetr/main.rs
rename to examples/ultralytics-rtdetr/main.rs
index 3b96e8e..a67ebde 100644
--- a/examples/yolov8-rtdetr/main.rs
+++ b/examples/ultralytics-rtdetr/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options};
+use usls::{models::YOLO, Annotator, Config, DataLoader};
#[derive(argh::FromArgs)]
/// Example
@@ -22,7 +22,7 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build model
- let config = Options::yolo_v8_rtdetr_l()
+ let config = Config::ultralytics_rtdetr_l()
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
@@ -41,7 +41,7 @@ fn main() -> Result<()> {
annotator.annotate(x, y)?.save(format!(
"{}.jpg",
usls::Dir::Current
- .base_dir_with_subs(&["runs", "YOLOv8-RT-DETR"])?
+ .base_dir_with_subs(&["runs", "ultralytics-RTDETR"])?
.join(usls::timestamp(None))
.display(),
))?;
diff --git a/examples/yolo-sam2/main.rs b/examples/yolo-sam2/main.rs
index 8b60d87..5e9fb53 100644
--- a/examples/yolo-sam2/main.rs
+++ b/examples/yolo-sam2/main.rs
@@ -1,7 +1,7 @@
use anyhow::Result;
use usls::{
models::{SamPrompt, SAM2, YOLO},
- Annotator, DataLoader, Options, Scale, Style,
+ Annotator, Config, DataLoader, Scale, Style,
};
#[derive(argh::FromArgs)]
@@ -21,16 +21,12 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
// build SAM
- let (options_encoder, options_decoder) = (
- Options::sam2_1_tiny_encoder().commit()?,
- Options::sam2_1_tiny_decoder().commit()?,
- );
- let mut sam = SAM2::new(options_encoder, options_decoder)?;
+ let mut sam = SAM2::new(Config::sam2_1_tiny().commit()?)?;
// build YOLOv8
- let options_yolo = Options::yolo_detect()
- .with_model_scale(Scale::N)
- .with_model_version(8.into())
+ let options_yolo = Config::yolo_detect()
+ .with_scale(Scale::N)
+ .with_version(8.into())
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut yolo = YOLO::new(options_yolo)?;
diff --git a/examples/yolo/README.md b/examples/yolo/README.md
index d53104e..4d2f71a 100644
--- a/examples/yolo/README.md
+++ b/examples/yolo/README.md
@@ -27,34 +27,36 @@ cargo run -r --example yolo -- --task detect --ver v8 --num-classes 6 --model xx
# Classify
cargo run -r --example yolo -- --task classify --ver 5 --scale s --image-width 224 --image-height 224 --num-classes 1000 --use-imagenet-1k-classes # YOLOv5
-cargo run -r --example yolo -- --task classify --ver 8 --scale n --image-width 224 --image-height 224 # YOLOv8
-cargo run -r --example yolo -- --task classify --ver 11 --scale n --image-width 224 --image-height 224 # YOLOv11
+cargo run -r --example yolo -- --task classify --ver 8 --scale n --image-width 224 --image-height 224 --use-imagenet-1k-classes # YOLOv8
+cargo run -r --example yolo -- --task classify --ver 11 --scale n --image-width 224 --image-height 224 # YOLO11
# Detect
-cargo run -r --example yolo -- --task detect --ver 5 --scale n --use-coco-80-classes # YOLOv5
-cargo run -r --example yolo -- --task detect --ver 6 --scale n --use-coco-80-classes # YOLOv6
-cargo run -r --example yolo -- --task detect --ver 7 --scale t --use-coco-80-classes # YOLOv7
-cargo run -r --example yolo -- --task detect --ver 8 --scale n --use-coco-80-classes # YOLOv8
-cargo run -r --example yolo -- --task detect --ver 9 --scale t --use-coco-80-classes # YOLOv9
-cargo run -r --example yolo -- --task detect --ver 10 --scale n --use-coco-80-classes # YOLOv10
-cargo run -r --example yolo -- --task detect --ver 11 --scale n --use-coco-80-classes # YOLOv11
-cargo run -r --example yolo -- --task detect --ver 8 --model v8-s-world-v2-shoes.onnx # YOLOv8-world
+cargo run -r --example yolo -- --task detect --ver 5 --scale n --use-coco-80-classes --dtype fp16 # YOLOv5
+cargo run -r --example yolo -- --task detect --ver 6 --scale n --use-coco-80-classes --dtype fp16 # YOLOv6
+cargo run -r --example yolo -- --task detect --ver 7 --scale t --use-coco-80-classes --dtype fp16 # YOLOv7
+cargo run -r --example yolo -- --task detect --ver 8 --scale n --use-coco-80-classes --dtype fp16 # YOLOv8
+cargo run -r --example yolo -- --task detect --ver 9 --scale t --use-coco-80-classes --dtype fp16 # YOLOv9
+cargo run -r --example yolo -- --task detect --ver 10 --scale n --use-coco-80-classes --dtype fp16 # YOLOv10
+cargo run -r --example yolo -- --task detect --ver 11 --scale n --use-coco-80-classes --dtype fp16 # YOLO11
+cargo run -r --example yolo -- --task detect --ver 12 --scale n --use-coco-80-classes --dtype fp16 # YOLOv12
+cargo run -r --example yolo -- --task detect --ver 8 --model v8-s-world-v2-shoes.onnx # YOLOv8-world
# Pose
cargo run -r --example yolo -- --task pose --ver 8 --scale n # YOLOv8-Pose
cargo run -r --example yolo -- --task pose --ver 11 --scale n # YOLOv11-Pose
# Segment
-cargo run -r --example yolo -- --task segment --ver 5 --scale n # YOLOv5-Segment
-cargo run -r --example yolo -- --task segment --ver 8 --scale n # YOLOv8-Segment
-cargo run -r --example yolo -- --task segment --ver 11 --scale n # YOLOv8-Segment
+cargo run -r --example yolo -- --task segment --ver 5 --scale n --use-coco-80-classes --dtype fp16 # YOLOv5-Segment
+cargo run -r --example yolo -- --task segment --ver 8 --scale n --use-coco-80-classes --dtype fp16 # YOLOv8-Segment
+cargo run -r --example yolo -- --task segment --ver 9 --scale c --use-coco-80-classes --dtype fp16 # YOLOv9-Segment
+cargo run -r --example yolo -- --task segment --ver 11 --scale n --use-coco-80-classes --dtype fp16 # YOLO11-Segment
# Obb
cargo run -r --example yolo -- --ver 8 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv8-Obb
cargo run -r --example yolo -- --ver 11 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv11-Obb
```
-**`cargo run -r --example yolo -- --help` for more options**
+**`cargo run -r --example yolo -- --help` for more config**
## Other YOLOv8 Solution Models
diff --git a/examples/yolo/main.rs b/examples/yolo/main.rs
index 86111bd..e2f9007 100644
--- a/examples/yolo/main.rs
+++ b/examples/yolo/main.rs
@@ -1,25 +1,25 @@
use anyhow::Result;
use usls::{
- models::YOLO, Annotator, DataLoader, Options, Style, NAMES_COCO_80, NAMES_COCO_KEYPOINTS_17,
+ models::YOLO, Annotator, Config, DataLoader, Style, NAMES_COCO_80, NAMES_COCO_KEYPOINTS_17,
NAMES_IMAGENET_1K, SKELETON_COCO_19, SKELETON_COLOR_COCO_19,
};
#[derive(argh::FromArgs, Debug)]
-/// Example
+/// YOLO Example
struct Args {
- /// model file
+ /// model file(.onnx)
#[argh(option)]
model: Option,
- /// source
+ /// source: image, image folder, video stream
#[argh(option, default = "String::from(\"./assets/bus.jpg\")")]
source: String,
- /// dtype
+ /// model dtype
#[argh(option, default = "String::from(\"auto\")")]
dtype: String,
- /// task
+ /// task: det, seg, pose, classify, obb
#[argh(option, default = "String::from(\"det\")")]
task: String,
@@ -27,101 +27,101 @@ struct Args {
#[argh(option, default = "8.0")]
ver: f32,
- /// device
+ /// device: cuda, cpu, mps
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
- /// scale
+ /// scale: n, s, m, l, x
#[argh(option, default = "String::from(\"n\")")]
scale: String,
- /// trt_fp16
+ /// enable TensorRT FP16
#[argh(option, default = "true")]
trt_fp16: bool,
- /// batch_size
+ /// batch size
#[argh(option, default = "1")]
batch_size: usize,
- /// min_batch_size
+ /// bin batch size: For TensorRT
#[argh(option, default = "1")]
min_batch_size: usize,
- /// max_batch_size
+ /// max Batch size: For TensorRT
#[argh(option, default = "4")]
max_batch_size: usize,
- /// min_image_width
+ /// min image width: For TensorRT
#[argh(option, default = "224")]
min_image_width: isize,
- /// image_width
+ /// image width: For TensorRT
#[argh(option, default = "640")]
image_width: isize,
- /// max_image_width
+ /// max image width: For TensorRT
#[argh(option, default = "1280")]
max_image_width: isize,
- /// min_image_height
+ /// min image height: For TensorRT
#[argh(option, default = "224")]
min_image_height: isize,
- /// image_height
+ /// image height: For TensorRT
#[argh(option, default = "640")]
image_height: isize,
- /// max_image_height
+ /// max image height: For TensorRT
#[argh(option, default = "1280")]
max_image_height: isize,
- /// num_classes
+ /// num classes
#[argh(option)]
num_classes: Option,
- /// num_keypoints
+ /// num keypoints
#[argh(option)]
num_keypoints: Option,
- /// use_coco_80_classes
- #[argh(switch)]
- use_coco_80_classes: bool,
-
- /// use_coco_17_keypoints_classes
- #[argh(switch)]
- use_coco_17_keypoints_classes: bool,
-
- /// use_imagenet_1k_classes
- #[argh(switch)]
- use_imagenet_1k_classes: bool,
-
- /// confs
- #[argh(option)]
- confs: Vec,
-
- /// keypoint_confs
- #[argh(option)]
- keypoint_confs: Vec,
-
- /// exclude_classes
- #[argh(option)]
- exclude_classes: Vec,
-
- /// retain_classes
- #[argh(option)]
- retain_classes: Vec,
-
- /// class_names
+ /// class names
#[argh(option)]
class_names: Vec,
- /// keypoint_names
+ /// keypoint names
#[argh(option)]
keypoint_names: Vec,
- /// topk
+ /// top-k
#[argh(option, default = "5")]
topk: usize,
+
+ /// use COCO 80 classes
+ #[argh(switch)]
+ use_coco_80_classes: bool,
+
+ /// use COCO 17 keypoints classes
+ #[argh(switch)]
+ use_coco_17_keypoints_classes: bool,
+
+ /// use ImageNet 1K classes
+ #[argh(switch)]
+ use_imagenet_1k_classes: bool,
+
+ /// confidences
+ #[argh(option)]
+ confs: Vec,
+
+ /// keypoint nonfidences
+ #[argh(option)]
+ keypoint_confs: Vec,
+
+ /// exclude nlasses
+ #[argh(option)]
+ exclude_classes: Vec,
+
+ /// retain classes
+ #[argh(option)]
+ retain_classes: Vec,
}
fn main() -> Result<()> {
@@ -129,17 +129,15 @@ fn main() -> Result<()> {
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
-
let args: Args = argh::from_env();
-
- let mut options = Options::yolo()
+ let mut config = Config::yolo()
.with_model_file(&args.model.unwrap_or_default())
- .with_model_task(args.task.as_str().try_into()?)
- .with_model_version(args.ver.try_into()?)
- .with_model_scale(args.scale.as_str().try_into()?)
+ .with_task(args.task.as_str().try_into()?)
+ .with_version(args.ver.try_into()?)
+ .with_scale(args.scale.as_str().try_into()?)
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
- .with_trt_fp16(args.trt_fp16)
+ .with_model_trt_fp16(args.trt_fp16)
.with_model_ixx(
0,
0,
@@ -172,30 +170,25 @@ fn main() -> Result<()> {
})
.with_topk(args.topk)
.retain_classes(&args.retain_classes)
- .exclude_classes(&args.exclude_classes);
-
+ .exclude_classes(&args.exclude_classes)
+ .with_model_num_dry_run(2);
if args.use_coco_80_classes {
- options = options.with_class_names(&NAMES_COCO_80);
+ config = config.with_class_names(&NAMES_COCO_80);
}
-
if args.use_coco_17_keypoints_classes {
- options = options.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17);
+ config = config.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17);
}
-
if args.use_imagenet_1k_classes {
- options = options.with_class_names(&NAMES_IMAGENET_1K);
+ config = config.with_class_names(&NAMES_IMAGENET_1K);
}
-
if let Some(nc) = args.num_classes {
- options = options.with_nc(nc);
+ config = config.with_nc(nc);
}
-
if let Some(nk) = args.num_keypoints {
- options = options.with_nk(nk);
+ config = config.with_nk(nk);
}
-
if !args.class_names.is_empty() {
- options = options.with_class_names(
+ config = config.with_class_names(
&args
.class_names
.iter()
@@ -203,9 +196,8 @@ fn main() -> Result<()> {
.collect::>(),
);
}
-
if !args.keypoint_names.is_empty() {
- options = options.with_keypoint_names(
+ config = config.with_keypoint_names(
&args
.keypoint_names
.iter()
@@ -215,7 +207,7 @@ fn main() -> Result<()> {
}
// build model
- let mut model = YOLO::try_from(options.commit()?)?;
+ let mut model = YOLO::new(config.commit()?)?;
// build dataloader
let dl = DataLoader::new(&args.source)?
@@ -255,6 +247,7 @@ fn main() -> Result<()> {
}
}
+ // summary
model.summary();
Ok(())
diff --git a/examples/yoloe/README.md b/examples/yoloe/README.md
index 110c5a4..005ff33 100644
--- a/examples/yoloe/README.md
+++ b/examples/yoloe/README.md
@@ -1,6 +1,6 @@
## Quick Start
```shell
-cargo run -r --example yoloe
+cargo run -r -F cuda --example yoloe -- --device cuda
```
diff --git a/examples/yoloe/main.rs b/examples/yoloe/main.rs
index 841945c..3fa3b2f 100644
--- a/examples/yoloe/main.rs
+++ b/examples/yoloe/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options, Style};
+use usls::{models::YOLO, Annotator, Config, DataLoader, Style};
#[derive(argh::FromArgs)]
/// Example
@@ -21,8 +21,8 @@ fn main() -> Result<()> {
let args: Args = argh::from_env();
- // options
- let options = Options::yoloe_v8s_seg_pf()
+ // config
+ let config = Config::yoloe_v8s_seg_pf()
// yoloe_v8m_seg_pf()
// yoloe_v8l_seg_pf()
// yoloe_11s_seg_pf()
@@ -31,7 +31,7 @@ fn main() -> Result<()> {
.with_model_dtype(args.dtype.as_str().try_into()?)
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
- let mut model = YOLO::new(options)?;
+ let mut model = YOLO::new(config)?;
// load
let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
diff --git a/examples/yolop/main.rs b/examples/yolop/main.rs
index 7f062db..78427fb 100644
--- a/examples/yolop/main.rs
+++ b/examples/yolop/main.rs
@@ -1,5 +1,5 @@
use anyhow::Result;
-use usls::{models::YOLOPv2, Annotator, DataLoader, Options};
+use usls::{models::YOLOPv2, Annotator, Config, DataLoader};
fn main() -> Result<()> {
tracing_subscriber::fmt()
@@ -8,8 +8,7 @@ fn main() -> Result<()> {
.init();
// build model
- let options = Options::yolop_v2_480x800().commit()?;
- let mut model = YOLOPv2::new(options)?;
+ let mut model = YOLOPv2::new(Config::yolop_v2_480x800().commit()?)?;
// load image
let xs = DataLoader::try_read_n(&["images/car-view.jpg"])?;
diff --git a/src/inference/engine.rs b/src/inference/engine.rs
index ad6cdf0..2884a7c 100644
--- a/src/inference/engine.rs
+++ b/src/inference/engine.rs
@@ -13,8 +13,8 @@ use prost::Message;
use std::collections::HashSet;
use crate::{
- build_progress_bar, elapsed, human_bytes_binary, onnx, DType, Device, Iiix, MinOptMax, Ops, Ts,
- Xs, PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_FINISH, X,
+ build_progress_bar, elapsed, human_bytes_binary, onnx, DType, Device, Iiix, MinOptMax,
+ ORTConfig, Ops, Ts, Xs, PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_FINISH, X,
};
impl From for DType {
@@ -93,6 +93,20 @@ impl Default for Engine {
}
impl Engine {
+ pub fn try_from_config(config: &ORTConfig) -> Result {
+ Self {
+ file: config.file.clone(),
+ spec: config.spec.clone(),
+ iiixs: config.iiixs.clone(),
+ device: config.device,
+ trt_fp16: config.trt_fp16,
+ num_dry_run: config.num_dry_run,
+ graph_opt_level: config.graph_opt_level,
+ ..Default::default()
+ }
+ .build()
+ }
+
pub fn build(mut self) -> Result {
let name = format!("[{}] ort_initialization", self.spec);
elapsed!(&name, self.ts, {
diff --git a/src/inference/hbb.rs b/src/inference/hbb.rs
index 12bcac3..04e5115 100644
--- a/src/inference/hbb.rs
+++ b/src/inference/hbb.rs
@@ -17,7 +17,9 @@ impl std::fmt::Debug for Hbb {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Hbb")
.field("xyxy", &[self.x, self.y, self.xmax(), self.ymax()])
- .field("meta", &self.meta)
+ .field("id", &self.meta.id())
+ .field("name", &self.meta.name())
+ .field("confidence", &self.meta.confidence())
.finish()
}
}
diff --git a/src/inference/image.rs b/src/inference/image.rs
index 50c4778..23b113f 100644
--- a/src/inference/image.rs
+++ b/src/inference/image.rs
@@ -308,12 +308,12 @@ impl Image {
));
}
- let (mut resizer, options) = build_resizer_filter(filter)?;
+ let (mut resizer, config) = build_resizer_filter(filter)?;
let x: DynamicImage = self.to_dyn();
if let ResizeMode::FitExact = mode {
let mut dst = FImage::new(tw, th, PixelType::U8x3);
- resizer.resize(&x, &mut dst, &options)?;
+ resizer.resize(&x, &mut dst, &config)?;
trans_info = trans_info
.with_height_scale(th as f32 / h0 as f32)
.with_width_scale(tw as f32 / w0 as f32);
@@ -362,7 +362,7 @@ impl Image {
};
let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
- resizer.resize(&x, &mut dst_cropped, &options)?;
+ resizer.resize(&x, &mut dst_cropped, &config)?;
Ok((Self::from_u8s(&dst.into_vec(), tw, th)?, trans_info))
}
diff --git a/src/inference/keypoint.rs b/src/inference/keypoint.rs
index 3bf25e4..8ba23c3 100644
--- a/src/inference/keypoint.rs
+++ b/src/inference/keypoint.rs
@@ -22,7 +22,6 @@ impl std::fmt::Debug for Keypoint {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Keypoint")
.field("xy", &[self.x, self.y])
- .field("uid", &self.meta.uid())
.field("id", &self.meta.id())
.field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
diff --git a/src/inference/mask.rs b/src/inference/mask.rs
index 09ce1bf..47bb1c0 100644
--- a/src/inference/mask.rs
+++ b/src/inference/mask.rs
@@ -20,7 +20,6 @@ impl std::fmt::Debug for Mask {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Mask")
.field("dimensions", &self.dimensions())
- .field("uid", &self.meta.uid())
.field("id", &self.meta.id())
.field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
diff --git a/src/inference/obb.rs b/src/inference/obb.rs
index 0f2f781..1c034f4 100644
--- a/src/inference/obb.rs
+++ b/src/inference/obb.rs
@@ -13,7 +13,7 @@ pub struct Obb {
impl std::fmt::Debug for Obb {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Obb")
- .field("uid", &self.meta.uid())
+ .field("vertices", &self.vertices)
.field("id", &self.meta.id())
.field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
diff --git a/src/inference/polygon.rs b/src/inference/polygon.rs
index 1eecb96..3d97d7a 100644
--- a/src/inference/polygon.rs
+++ b/src/inference/polygon.rs
@@ -27,8 +27,7 @@ impl Default for Polygon {
impl std::fmt::Debug for Polygon {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Polygon")
- .field("count", &self.count())
- .field("uid", &self.meta.uid())
+ .field("n_points", &self.count())
.field("id", &self.meta.id())
.field("name", &self.meta.name())
.field("confidence", &self.meta.confidence())
diff --git a/src/inference/prob.rs b/src/inference/prob.rs
index 107859c..919247f 100644
--- a/src/inference/prob.rs
+++ b/src/inference/prob.rs
@@ -31,7 +31,7 @@ impl Prob {
.with_confidence(confidence);
if let Some(names) = names {
- if id < names.len() {
+ if !names.is_empty() {
meta = meta.with_name(names[id]);
}
}
diff --git a/src/io/dataloader.rs b/src/io/dataloader.rs
index 420fdff..19aed4e 100644
--- a/src/io/dataloader.rs
+++ b/src/io/dataloader.rs
@@ -367,14 +367,14 @@ impl DataLoader {
fn load_image_paths_from_folder(source: &str, exts: &[&str]) -> Result> {
let source_path = Path::new(source);
let mut paths: Vec = Vec::new();
- let options = MatchOptions {
+ let config = MatchOptions {
case_sensitive: false,
require_literal_separator: false,
require_literal_leading_dot: false,
};
for ext in exts.iter() {
let pattern = source_path.join(format!("*.{}", ext));
- let paths_: Vec = glob_with(pattern.to_str().unwrap(), options)?
+ let paths_: Vec = glob_with(pattern.to_str().unwrap(), config)?
.filter_map(|entry| entry.ok())
.collect();
paths.extend(paths_);
@@ -393,12 +393,12 @@ impl DataLoader {
}
fn glob(pattern: &str, sort: bool, case_sensitive: bool) -> anyhow::Result> {
- let options = MatchOptions {
+ let config = MatchOptions {
case_sensitive,
require_literal_separator: false,
require_literal_leading_dot: false,
};
- let mut paths: Vec = glob_with(pattern, options)?
+ let mut paths: Vec = glob_with(pattern, config)?
.filter_map(|entry| entry.ok())
.collect();
@@ -479,7 +479,7 @@ impl DataLoader {
self
}
- pub fn with_batch_size(mut self, x: usize) -> Self {
+ pub fn with_batch_size_all(mut self, x: usize) -> Self {
self.batch_size = x;
self
}
diff --git a/src/models/beit/config.rs b/src/models/beit/config.rs
index 46636b6..b2668da 100644
--- a/src/models/beit/config.rs
+++ b/src/models/beit/config.rs
@@ -1,10 +1,8 @@
-use crate::NAMES_IMAGENET_1K;
-
/// Model configuration for `BEiT`
-impl crate::Options {
+impl crate::Config {
pub fn beit() -> Self {
Self::default()
- .with_model_name("beit")
+ .with_name("beit")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 224.into())
@@ -13,7 +11,7 @@ impl crate::Options {
.with_image_std(&[0.5, 0.5, 0.5])
.with_normalize(true)
.with_apply_softmax(true)
- .with_class_names(&NAMES_IMAGENET_1K)
+ .with_class_names(&crate::NAMES_IMAGENET_1K)
}
pub fn beit_base() -> Self {
diff --git a/src/models/ben2/config.rs b/src/models/ben2/config.rs
index b6b5632..9a40732 100644
--- a/src/models/ben2/config.rs
+++ b/src/models/ben2/config.rs
@@ -1,5 +1,5 @@
/// Model configuration for `BEN2`
-impl crate::Options {
+impl crate::Config {
pub fn ben2_base() -> Self {
Self::rmbg().with_model_file("ben2-base.onnx")
}
diff --git a/src/models/blip/config.rs b/src/models/blip/config.rs
index 038e046..eb70c61 100644
--- a/src/models/blip/config.rs
+++ b/src/models/blip/config.rs
@@ -1,34 +1,24 @@
/// Model configuration for `BLIP`
-impl crate::Options {
- pub fn blip() -> Self {
- Self::default().with_model_name("blip").with_batch_size(1)
- }
-
+impl crate::Config {
#[allow(clippy::excessive_precision)]
- pub fn blip_visual() -> Self {
- Self::blip()
- .with_model_kind(crate::Kind::Vision)
- .with_model_ixx(0, 2, 384.into())
- .with_model_ixx(0, 3, 384.into())
+ pub fn blip() -> Self {
+ Self::default()
+ .with_name("blip")
+ .with_batch_size_all(1)
+ .with_visual_ixx(0, 1, 3.into())
+ .with_visual_ixx(0, 2, 384.into())
+ .with_visual_ixx(0, 3, 384.into())
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
.with_image_std(&[0.26862954, 0.26130258, 0.27577711])
- .with_resize_filter("Bilinear")
- .with_normalize(true)
}
- pub fn blip_textual() -> Self {
- Self::blip().with_model_kind(crate::Kind::Language)
- }
-
- pub fn blip_v1_base_caption_visual() -> Self {
- Self::blip_visual()
- .with_model_version(1.into())
- .with_model_file("v1-base-caption-visual.onnx")
- }
-
- pub fn blip_v1_base_caption_textual() -> Self {
- Self::blip_textual()
- .with_model_version(1.into())
- .with_model_file("v1-base-caption-textual.onnx")
+ pub fn blip_v1_base_caption() -> Self {
+ Self::blip()
+ .with_version(1.into())
+ .with_visual_file("v1-base-caption-visual.onnx")
+ .with_textual_file("v1-base-caption-textual.onnx")
+ .with_tokenizer_file("blip/tokenizer.json")
+ .with_tokenizer_config_file("blip/tokenizer_config.json")
+ .with_special_tokens_map_file("blip/special_tokens_map.json")
}
}
diff --git a/src/models/blip/impl.rs b/src/models/blip/impl.rs
index 3719f8a..933dc41 100644
--- a/src/models/blip/impl.rs
+++ b/src/models/blip/impl.rs
@@ -2,26 +2,34 @@ use aksr::Builder;
use anyhow::Result;
use ndarray::{s, Axis};
-use crate::{
- elapsed,
- models::{BaseModelTextual, BaseModelVisual},
- Image, LogitsSampler, Options, Ts, Xs, X, Y,
-};
+use crate::{elapsed, Config, Engine, Image, LogitsSampler, Processor, Ts, Xs, X, Y};
#[derive(Debug, Builder)]
pub struct Blip {
- visual: BaseModelVisual,
- textual: BaseModelTextual,
- ts: Ts,
+ visual: Engine,
+ textual: Engine,
+ batch: usize,
+ height: usize,
+ width: usize,
+ processor: Processor,
max_length: usize,
eos_token_id: u32,
+ ts: Ts,
}
impl Blip {
- pub fn new(options_visual: Options, options_textual: Options) -> Result {
- let visual = BaseModelVisual::new(options_visual)?;
- let textual = BaseModelTextual::new(options_textual)?;
- let ts = Ts::merge(&[visual.engine().ts(), textual.engine().ts()]);
+ pub fn new(config: Config) -> Result {
+ let visual = Engine::try_from_config(&config.visual)?;
+ let textual = Engine::try_from_config(&config.textual)?;
+ let (batch, height, width) = (
+ visual.batch().opt(),
+ visual.try_height().unwrap_or(&384.into()).opt(),
+ visual.try_width().unwrap_or(&384.into()).opt(),
+ );
+ let ts = Ts::merge(&[visual.ts(), textual.ts()]);
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
let max_length = 512;
let eos_token_id = 102;
@@ -31,17 +39,24 @@ impl Blip {
ts,
max_length,
eos_token_id,
+ batch,
+ height,
+ width,
+ processor,
})
}
pub fn encode_images(&mut self, xs: &[Image]) -> Result {
- self.visual.encode(xs)
+ let ys = self.processor.process_images(xs)?;
+ self.batch = xs.len(); // update
+ let ys = self.visual.run(ys.into())?;
+
+ Ok(ys[0].to_owned())
}
pub fn encode_texts(&mut self, text: Option<&str>) -> Result>> {
let input_ids = self
- .textual
- .processor()
+ .processor
.encode_text_ids(text.unwrap_or_default(), false)?;
Ok(vec![input_ids.clone(); self.batch()])
}
@@ -70,11 +85,11 @@ impl Blip {
let input_ids_attn_mask = X::ones(input_ids_nd.dims());
// decode
- let outputs = self.textual.inference(Xs::from(vec![
+ let outputs = self.textual.run(Xs::from(vec![
input_ids_nd,
input_ids_attn_mask,
image_embeds.clone(),
- X::ones(&[self.visual().batch(), image_embeds.dims()[1]]), // image_embeds_attn_mask
+ X::ones(&[self.batch(), image_embeds.dims()[1]]),
]))?;
// decode each token for each batch
@@ -102,7 +117,7 @@ impl Blip {
}
// batch decode
- let texts = self.textual.processor().decode_tokens_batch(
+ let texts = self.processor.decode_tokens_batch(
&token_ids
.into_iter()
.map(|v| v.into_iter().map(|x| x as u32).collect::>())
@@ -114,7 +129,6 @@ impl Blip {
.into_iter()
.map(|x| Y::default().with_texts(&[&x]))
.collect::>();
- // .into();
Ok(ys)
}
@@ -122,8 +136,4 @@ impl Blip {
pub fn summary(&mut self) {
self.ts.summary();
}
-
- pub fn batch(&self) -> usize {
- self.visual.batch() as _
- }
}
diff --git a/src/models/clip/config.rs b/src/models/clip/config.rs
index 0454261..8b1d69f 100644
--- a/src/models/clip/config.rs
+++ b/src/models/clip/config.rs
@@ -1,71 +1,57 @@
-use crate::Kind;
-
/// Model configuration for `CLIP`
-impl crate::Options {
+impl crate::Config {
pub fn clip() -> Self {
Self::default()
- .with_model_name("clip")
- .with_model_ixx(0, 0, 1.into())
- }
-
- pub fn clip_visual() -> Self {
- Self::clip()
- .with_model_kind(Kind::Vision)
- .with_model_ixx(0, 2, 224.into())
- .with_model_ixx(0, 3, 224.into())
+ .with_name("clip")
+ .with_batch_size_all(1)
+ .with_visual_ixx(0, 1, 3.into())
+ .with_visual_ixx(0, 2, 224.into())
+ .with_visual_ixx(0, 3, 224.into())
.with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
.with_image_std(&[0.26862954, 0.2613026, 0.2757771])
- }
-
- pub fn clip_textual() -> Self {
- Self::clip()
- .with_model_kind(Kind::Language)
.with_model_max_length(77)
+ .with_tokenizer_file("clip/tokenizer.json")
+ .with_tokenizer_config_file("clip/tokenizer_config.json")
+ .with_special_tokens_map_file("clip/special_tokens_map.json")
+ .with_config_file("clip/config.json")
}
- pub fn clip_vit_b16_visual() -> Self {
- Self::clip_visual().with_model_file("vit-b16-visual.onnx")
+ pub fn clip_vit_b16() -> Self {
+ Self::clip()
+ .with_visual_file("vit-b16-visual.onnx")
+ .with_textual_file("vit-b16-textual.onnx")
}
- pub fn clip_vit_b16_textual() -> Self {
- Self::clip_textual().with_model_file("vit-b16-textual.onnx")
+ pub fn clip_vit_b32() -> Self {
+ Self::clip()
+ .with_visual_file("vit-b32-visual.onnx")
+ .with_textual_file("vit-b32-textual.onnx")
}
- pub fn clip_vit_b32_visual() -> Self {
- Self::clip_visual().with_model_file("vit-b32-visual.onnx")
+ pub fn clip_vit_l14() -> Self {
+ Self::clip()
+ .with_visual_file("vit-l14-visual.onnx")
+ .with_textual_file("vit-l14-textual.onnx")
}
- pub fn clip_vit_b32_textual() -> Self {
- Self::clip_textual().with_model_file("vit-b32-textual.onnx")
- }
-
- pub fn clip_vit_l14_visual() -> Self {
- Self::clip_visual().with_model_file("vit-l14-visual.onnx")
- }
-
- pub fn clip_vit_l14_textual() -> Self {
- Self::clip_textual().with_model_file("vit-l14-textual.onnx")
+ pub fn jina_clip() -> Self {
+ Self::default()
+ .with_name("jina-clip-v1")
+ .with_batch_size_all(1)
+ .with_visual_ixx(0, 1, 3.into())
+ .with_visual_ixx(0, 2, 224.into())
+ .with_visual_ixx(0, 3, 224.into())
+ .with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
+ .with_image_std(&[0.26862954, 0.2613026, 0.2757771])
+ .with_tokenizer_file("jina-clip-v1/tokenizer.json")
+ .with_tokenizer_config_file("jina-clip-v1/tokenizer_config.json")
+ .with_special_tokens_map_file("jina-clip-v1/special_tokens_map.json")
+ .with_config_file("jina-clip-v1/config.json")
}
pub fn jina_clip_v1() -> Self {
- Self::default()
- .with_model_name("jina-clip-v1")
- .with_model_ixx(0, 0, 1.into())
- }
-
- pub fn jina_clip_v1_visual() -> Self {
- Self::jina_clip_v1()
- .with_model_kind(Kind::Vision)
- .with_model_ixx(0, 2, 224.into())
- .with_model_ixx(0, 3, 224.into())
- .with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
- .with_image_std(&[0.26862954, 0.2613026, 0.2757771])
- .with_model_file("visual.onnx")
- }
-
- pub fn jina_clip_v1_textual() -> Self {
- Self::jina_clip_v1()
- .with_model_kind(Kind::Language)
- .with_model_file("textual.onnx")
+ Self::jina_clip()
+ .with_visual_file("visual.onnx")
+ .with_textual_file("textual.onnx")
}
}
diff --git a/src/models/clip/impl.rs b/src/models/clip/impl.rs
index 34f41ec..7d464c0 100644
--- a/src/models/clip/impl.rs
+++ b/src/models/clip/impl.rs
@@ -2,11 +2,12 @@ use aksr::Builder;
use anyhow::Result;
use ndarray::Array2;
-use crate::{elapsed, Engine, Image, Options, Processor, Ts, Xs, X};
+use crate::{elapsed, Config, Engine, Image, Processor, Ts, X};
#[derive(Debug, Builder)]
-pub struct ClipVisual {
- engine: Engine,
+pub struct Clip {
+ visual: Engine,
+ textual: Engine,
height: usize,
width: usize,
batch: usize,
@@ -14,22 +15,23 @@ pub struct ClipVisual {
ts: Ts,
}
-impl ClipVisual {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
- let (batch, height, width, ts) = (
- engine.batch().opt(),
- engine.try_height().unwrap_or(&224.into()).opt(),
- engine.try_width().unwrap_or(&224.into()).opt(),
- engine.ts.clone(),
+impl Clip {
+ pub fn new(config: Config) -> Result {
+ let visual = Engine::try_from_config(&config.visual)?;
+ let textual = Engine::try_from_config(&config.textual)?;
+ let (batch, height, width) = (
+ visual.batch().opt(),
+ visual.try_height().unwrap_or(&224.into()).opt(),
+ visual.try_width().unwrap_or(&224.into()).opt(),
);
- let processor = options
- .to_processor()?
+ let ts = Ts::merge(&[visual.ts(), textual.ts()]);
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
Ok(Self {
- engine,
+ textual,
+ visual,
height,
width,
batch,
@@ -38,111 +40,39 @@ impl ClipVisual {
})
}
- pub fn preprocess(&mut self, xs: &[Image]) -> Result {
- let x = self.processor.process_images(xs)?;
-
- Ok(x.into())
- }
-
- pub fn inference(&mut self, xs: Xs) -> Result {
- self.engine.run(xs)
- }
-
pub fn encode_images(&mut self, xs: &[Image]) -> Result {
- let xs = elapsed!("visual-preprocess", self.ts, { self.preprocess(xs)? });
- let xs = elapsed!("visual-inference", self.ts, { self.inference(xs)? });
+ let xs = elapsed!("visual-preprocess", self.ts, {
+ self.processor.process_images(xs)?
+ });
+ let xs = elapsed!("visual-inference", self.ts, { self.visual.run(xs.into())? });
let x = elapsed!("visual-postprocess", self.ts, { xs[0].to_owned() });
Ok(x)
}
-}
-
-#[derive(Debug, Builder)]
-pub struct ClipTextual {
- engine: Engine,
- batch: usize,
- processor: Processor,
- ts: Ts,
-}
-
-impl ClipTextual {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
- let (batch, ts) = (engine.batch().opt(), engine.ts.clone());
- let processor = options.to_processor()?;
-
- Ok(Self {
- engine,
- batch,
- processor,
- ts,
- })
- }
-
- pub fn preprocess(&self, xs: &[&str]) -> Result {
- let encodings: Vec = self
- .processor
- .encode_texts_ids(xs, false)? // skip_special_tokens
- .into_iter()
- .flatten()
- .collect();
-
- let x: X = Array2::from_shape_vec((xs.len(), encodings.len() / xs.len()), encodings)?
- .into_dyn()
- .into();
-
- Ok(x.into())
- }
-
- pub fn inference(&mut self, xs: Xs) -> Result {
- self.engine.run(xs)
- }
pub fn encode_texts(&mut self, xs: &[&str]) -> Result {
- let xs = elapsed!("textual-preprocess", self.ts, { self.preprocess(xs)? });
- let xs = elapsed!("textual-inference", self.ts, { self.inference(xs)? });
+ let xs = elapsed!("textual-preprocess", self.ts, {
+ let encodings: Vec = self
+ .processor
+ .encode_texts_ids(xs, false)?
+ .into_iter()
+ .flatten()
+ .collect();
+
+ let x: X = Array2::from_shape_vec((xs.len(), encodings.len() / xs.len()), encodings)?
+ .into_dyn()
+ .into();
+ x
+ });
+ let xs = elapsed!("textual-inference", self.ts, {
+ self.textual.run(xs.into())?
+ });
let x = elapsed!("textual-postprocess", self.ts, { xs[0].to_owned() });
Ok(x)
}
-}
-
-#[derive(Debug, Builder)]
-pub struct Clip {
- textual: ClipTextual,
- visual: ClipVisual,
- ts: Ts,
-}
-
-impl Clip {
- pub fn new(options_visual: Options, options_textual: Options) -> Result {
- let visual = ClipVisual::new(options_visual)?;
- let textual = ClipTextual::new(options_textual)?;
- // let ts = Ts::merge(&[visual.engine().ts(), textual.engine().ts()]);
- let ts = Ts::default();
-
- Ok(Self {
- textual,
- visual,
- ts,
- })
- }
-
- pub fn encode_images(&mut self, xs: &[Image]) -> Result {
- let x = elapsed!("encode_images", self.ts, { self.visual.encode_images(xs)? });
- Ok(x)
- }
-
- pub fn encode_texts(&mut self, xs: &[&str]) -> Result {
- let x = elapsed!("encode_texts", self.ts, { self.textual.encode_texts(xs)? });
- Ok(x)
- }
pub fn summary(&mut self) {
- // self.ts.clear();
- // self.ts = Ts::merge(&[&self.ts, self.visual.ts(), self.textual.ts()]);
self.ts.summary();
- self.visual.ts().summary();
- self.textual.ts().summary();
}
}
diff --git a/src/models/convnext/config.rs b/src/models/convnext/config.rs
index 0ca435d..162d409 100644
--- a/src/models/convnext/config.rs
+++ b/src/models/convnext/config.rs
@@ -1,10 +1,10 @@
use crate::NAMES_IMAGENET_1K;
/// Model configuration for `ConvNeXt`
-impl crate::Options {
+impl crate::Config {
pub fn convnext() -> Self {
Self::default()
- .with_model_name("convnext")
+ .with_name("convnext")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 224.into())
@@ -13,6 +13,7 @@ impl crate::Options {
.with_image_std(&[0.229, 0.224, 0.225])
.with_normalize(true)
.with_apply_softmax(true)
+ .with_topk(5)
.with_class_names(&NAMES_IMAGENET_1K)
}
diff --git a/src/models/d_fine/config.rs b/src/models/d_fine/config.rs
index ea2cea7..32e6eae 100644
--- a/src/models/d_fine/config.rs
+++ b/src/models/d_fine/config.rs
@@ -1,7 +1,7 @@
/// Model configuration for `d_fine`
-impl crate::Options {
+impl crate::Config {
pub fn d_fine() -> Self {
- Self::rtdetr().with_model_name("d-fine")
+ Self::rtdetr().with_name("d-fine")
}
pub fn d_fine_n_coco() -> Self {
diff --git a/src/models/db/config.rs b/src/models/db/config.rs
index c594526..d728dde 100644
--- a/src/models/db/config.rs
+++ b/src/models/db/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for [DB](https://github.com/MhLiao/DB) and [PaddleOCR-Det](https://github.com/PaddlePaddle/PaddleOCR)
-impl crate::Options {
+impl crate::Config {
pub fn db() -> Self {
Self::default()
- .with_model_name("db")
+ .with_name("db")
.with_model_ixx(0, 0, (1, 1, 8).into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, (608, 960, 1600).into())
@@ -11,7 +11,7 @@ impl crate::Options {
.with_normalize(true)
.with_image_mean(&[0.485, 0.456, 0.406])
.with_image_std(&[0.229, 0.224, 0.225])
- .with_binary_thresh(0.2)
+ .with_db_binary_thresh(0.2)
.with_class_confs(&[0.35])
.with_min_width(5.0)
.with_min_height(12.0)
diff --git a/src/models/db/impl.rs b/src/models/db/impl.rs
index 651fa87..781a2aa 100644
--- a/src/models/db/impl.rs
+++ b/src/models/db/impl.rs
@@ -4,7 +4,7 @@ use ndarray::Axis;
use rayon::prelude::*;
use crate::{
- elapsed, DynConf, Engine, Hbb, Image, Mask, Obb, Ops, Options, Polygon, Processor, Ts, Xs, Y,
+ elapsed, Config, DynConf, Engine, Hbb, Image, Mask, Obb, Ops, Polygon, Processor, Ts, Xs, Y,
};
#[derive(Debug, Builder)]
@@ -24,8 +24,8 @@ pub struct DB {
}
impl DB {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts, spec) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&960.into()).opt(),
@@ -33,15 +33,14 @@ impl DB {
engine.ts.clone(),
engine.spec().to_owned(),
);
- let processor = options
- .to_processor()?
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let confs = DynConf::new(options.class_confs(), 1);
- let binary_thresh = options.binary_thresh().unwrap_or(0.2);
- let unclip_ratio = options.unclip_ratio().unwrap_or(1.5);
- let min_width = options.min_width().unwrap_or(12.0);
- let min_height = options.min_height().unwrap_or(5.0);
+ let confs = DynConf::new(config.class_confs(), 1);
+ let binary_thresh = config.db_binary_thresh().unwrap_or(0.2);
+ let unclip_ratio = config.db_unclip_ratio().unwrap_or(1.5);
+ let min_width = config.min_width().unwrap_or(12.0);
+ let min_height = config.min_height().unwrap_or(5.0);
Ok(Self {
engine,
diff --git a/src/models/deim/config.rs b/src/models/deim/config.rs
index 10c4a0a..8dd6a38 100644
--- a/src/models/deim/config.rs
+++ b/src/models/deim/config.rs
@@ -1,7 +1,7 @@
/// Model configuration for `DEIM`
-impl crate::Options {
+impl crate::Config {
pub fn deim() -> Self {
- Self::d_fine().with_model_name("deim")
+ Self::d_fine().with_name("deim")
}
pub fn deim_dfine_s_coco() -> Self {
diff --git a/src/models/deit/config.rs b/src/models/deit/config.rs
index 8be8000..798c205 100644
--- a/src/models/deit/config.rs
+++ b/src/models/deit/config.rs
@@ -1,10 +1,10 @@
use crate::NAMES_IMAGENET_1K;
/// Model configuration for `DeiT`
-impl crate::Options {
+impl crate::Config {
pub fn deit() -> Self {
Self::default()
- .with_model_name("deit")
+ .with_name("deit")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 224.into())
diff --git a/src/models/depth_anything/config.rs b/src/models/depth_anything/config.rs
index d19344d..30668fd 100644
--- a/src/models/depth_anything/config.rs
+++ b/src/models/depth_anything/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for `DepthAnything`
-impl crate::Options {
+impl crate::Config {
pub fn depth_anything() -> Self {
Self::default()
- .with_model_name("depth-anything")
+ .with_name("depth-anything")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, (384, 518, 1024).into())
@@ -14,26 +14,26 @@ impl crate::Options {
}
pub fn depth_anything_s() -> Self {
- Self::depth_anything().with_model_scale(crate::Scale::S)
+ Self::depth_anything().with_scale(crate::Scale::S)
}
pub fn depth_anything_v1() -> Self {
- Self::depth_anything().with_model_version(1.into())
+ Self::depth_anything().with_version(1.into())
}
pub fn depth_anything_v2() -> Self {
- Self::depth_anything().with_model_version(2.into())
+ Self::depth_anything().with_version(2.into())
}
pub fn depth_anything_v1_small() -> Self {
Self::depth_anything_v1()
- .with_model_scale(crate::Scale::S)
+ .with_scale(crate::Scale::S)
.with_model_file("v1-s.onnx")
}
pub fn depth_anything_v2_small() -> Self {
Self::depth_anything_v2()
- .with_model_scale(crate::Scale::S)
+ .with_scale(crate::Scale::S)
.with_model_file("v2-s.onnx")
}
}
diff --git a/src/models/depth_anything/impl.rs b/src/models/depth_anything/impl.rs
index f094cc6..962f52f 100644
--- a/src/models/depth_anything/impl.rs
+++ b/src/models/depth_anything/impl.rs
@@ -1,7 +1,7 @@
use aksr::Builder;
use anyhow::Result;
-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};
#[derive(Debug, Builder)]
pub struct DepthAnything {
@@ -15,8 +15,8 @@ pub struct DepthAnything {
}
impl DepthAnything {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
@@ -25,9 +25,7 @@ impl DepthAnything {
engine.try_width().unwrap_or(&518.into()).opt(),
engine.ts().clone(),
);
-
- let processor = options
- .to_processor()?
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
diff --git a/src/models/depth_pro/config.rs b/src/models/depth_pro/config.rs
index 451682e..2d569c7 100644
--- a/src/models/depth_pro/config.rs
+++ b/src/models/depth_pro/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for `DepthPro`
-impl crate::Options {
+impl crate::Config {
pub fn depth_pro() -> Self {
Self::default()
- .with_model_name("depth-pro")
+ .with_name("depth-pro")
.with_model_ixx(0, 0, 1.into()) // batch. Note: now only support batch_size = 1
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 1536.into())
@@ -11,17 +11,6 @@ impl crate::Options {
.with_image_std(&[0.5, 0.5, 0.5])
.with_resize_mode(crate::ResizeMode::FitExact)
.with_normalize(true)
+ .with_model_file("model.onnx")
}
-
- // pub fn depth_pro_q4f16() -> Self {
- // Self::depth_pro().with_model_file("q4f16.onnx")
- // }
-
- // pub fn depth_pro_fp16() -> Self {
- // Self::depth_pro().with_model_file("fp16.onnx")
- // }
-
- // pub fn depth_pro_bnb4() -> Self {
- // Self::depth_pro().with_model_file("bnb4.onnx")
- // }
}
diff --git a/src/models/depth_pro/impl.rs b/src/models/depth_pro/impl.rs
index 6e1d254..cbb419e 100644
--- a/src/models/depth_pro/impl.rs
+++ b/src/models/depth_pro/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
use anyhow::Result;
use ndarray::Axis;
-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};
#[derive(Builder, Debug)]
pub struct DepthPro {
@@ -16,8 +16,8 @@ pub struct DepthPro {
}
impl DepthPro {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -25,8 +25,7 @@ impl DepthPro {
engine.try_width().unwrap_or(&512.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
diff --git a/src/models/dinov2/config.rs b/src/models/dinov2/config.rs
index abf7696..22eb541 100644
--- a/src/models/dinov2/config.rs
+++ b/src/models/dinov2/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for `DINOv2`
-impl crate::Options {
+impl crate::Config {
pub fn dinov2() -> Self {
Self::default()
- .with_model_name("dinov2")
+ .with_name("dinov2")
.with_model_ixx(0, 0, (1, 1, 8).into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 224.into())
@@ -16,13 +16,13 @@ impl crate::Options {
pub fn dinov2_small() -> Self {
Self::dinov2()
- .with_model_scale(crate::Scale::S)
+ .with_scale(crate::Scale::S)
.with_model_file("s.onnx")
}
pub fn dinov2_base() -> Self {
Self::dinov2()
- .with_model_scale(crate::Scale::B)
+ .with_scale(crate::Scale::B)
.with_model_file("b.onnx")
}
}
diff --git a/src/models/dinov2/impl.rs b/src/models/dinov2/impl.rs
index 813851d..53e3b74 100644
--- a/src/models/dinov2/impl.rs
+++ b/src/models/dinov2/impl.rs
@@ -1,7 +1,7 @@
use aksr::Builder;
use anyhow::Result;
-use crate::{elapsed, Engine, Image, Options, Processor, Scale, Ts, Xs, X};
+use crate::{elapsed, Config, Engine, Image, Processor, Scale, Ts, Xs, X};
#[derive(Builder, Debug)]
pub struct DINOv2 {
@@ -15,15 +15,15 @@ pub struct DINOv2 {
}
impl DINOv2 {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&384.into()).opt(),
engine.try_width().unwrap_or(&384.into()).opt(),
engine.ts.clone(),
);
- let dim = match options.model_scale() {
+ let dim = match &config.scale {
Some(Scale::S) => 384,
Some(Scale::B) => 768,
Some(Scale::L) => 1024,
@@ -31,8 +31,7 @@ impl DINOv2 {
Some(x) => anyhow::bail!("Unsupported scale: {:?}", x),
None => anyhow::bail!("No model scale specified"),
};
- let processor = options
- .to_processor()?
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
diff --git a/src/models/fast/config.rs b/src/models/fast/config.rs
index 6ec39c4..2277ff5 100644
--- a/src/models/fast/config.rs
+++ b/src/models/fast/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://github.com/czczup/FAST)
-impl crate::Options {
+impl crate::Config {
pub fn fast() -> Self {
Self::db()
- .with_model_name("fast")
+ .with_name("fast")
.with_image_mean(&[0.798, 0.785, 0.772])
.with_image_std(&[0.264, 0.2749, 0.287])
}
diff --git a/src/models/fastvit/config.rs b/src/models/fastvit/config.rs
index 71cdd31..351e790 100644
--- a/src/models/fastvit/config.rs
+++ b/src/models/fastvit/config.rs
@@ -1,10 +1,10 @@
use crate::NAMES_IMAGENET_1K;
/// Model configuration for `FastViT`
-impl crate::Options {
+impl crate::Config {
pub fn fastvit() -> Self {
Self::default()
- .with_model_name("fastvit")
+ .with_name("fastvit")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 224.into())
diff --git a/src/models/florence2/config.rs b/src/models/florence2/config.rs
index 8ef74ac..5da2c99 100644
--- a/src/models/florence2/config.rs
+++ b/src/models/florence2/config.rs
@@ -1,59 +1,31 @@
/// Model configuration for `Florence2`
-impl crate::Options {
+impl crate::Config {
pub fn florence2() -> Self {
Self::default()
- .with_model_name("florence2")
- .with_batch_size(1)
- }
-
- pub fn florence2_visual() -> Self {
- Self::florence2()
- .with_model_kind(crate::Kind::Vision)
- .with_model_ixx(0, 2, 768.into())
- .with_model_ixx(0, 3, 768.into())
+ .with_name("florence2")
+ .with_batch_size_all(1)
+ .with_visual_ixx(0, 1, 3.into())
+ .with_visual_ixx(0, 2, 768.into())
+ .with_visual_ixx(0, 3, 768.into())
.with_image_mean(&[0.485, 0.456, 0.406])
.with_image_std(&[0.229, 0.224, 0.225])
- .with_resize_filter("Bilinear")
- .with_normalize(true)
}
- pub fn florence2_textual() -> Self {
- Self::florence2().with_model_kind(crate::Kind::Language)
+ pub fn florence2_base() -> Self {
+ Self::florence2()
+ .with_scale(crate::Scale::B)
+ .with_visual_file("base-vision-encoder.onnx")
+ .with_textual_file("base-embed-tokens.onnx")
+ .with_textual_encoder_file("base-encoder.onnx")
+ .with_textual_decoder_file("base-decoder.onnx")
+ .with_textual_decoder_merged_file("base-decoder-merged.onnx")
+ .with_tokenizer_file("florence2/tokenizer.json")
+ .with_config_file("florence2/config.json")
+ .with_special_tokens_map_file("florence2/special_tokens_map.json")
+ .with_tokenizer_config_file("florence2/tokenizer_config.json")
}
- pub fn florence2_visual_base() -> Self {
- Self::florence2_visual().with_model_scale(crate::Scale::B)
- }
-
- pub fn florence2_textual_base() -> Self {
- Self::florence2_textual().with_model_scale(crate::Scale::B)
- }
-
- pub fn florence2_visual_large() -> Self {
- Self::florence2_visual().with_model_scale(crate::Scale::L)
- }
-
- pub fn florence2_textual_large() -> Self {
- Self::florence2_textual().with_model_scale(crate::Scale::L)
- }
-
- pub fn florence2_visual_encoder_base() -> Self {
- Self::florence2_visual_base().with_model_file("base-vision-encoder.onnx")
- }
-
- pub fn florence2_textual_embed_base() -> Self {
- Self::florence2_textual_base().with_model_file("base-embed-tokens.onnx")
- }
-
- pub fn florence2_texual_encoder_base() -> Self {
- Self::florence2_textual_base().with_model_file("base-encoder.onnx")
- }
-
- pub fn florence2_texual_decoder_base() -> Self {
- Self::florence2_textual_base().with_model_file("base-decoder.onnx")
- }
-
- pub fn florence2_texual_decoder_merged_base() -> Self {
- Self::florence2_textual_base().with_model_file("base-decoder-merged.onnx")
+ pub fn florence2_large() -> Self {
+ todo!()
}
}
diff --git a/src/models/florence2/impl.rs b/src/models/florence2/impl.rs
index 6a56775..4404327 100644
--- a/src/models/florence2/impl.rs
+++ b/src/models/florence2/impl.rs
@@ -4,51 +4,56 @@ use ndarray::{s, Axis};
use rayon::prelude::*;
use crate::{
- elapsed,
- models::{BaseModelTextual, BaseModelVisual, Quantizer},
- Hbb, Image, LogitsSampler, Options, Polygon, Scale, Task, Ts, Xs, X, Y,
+ elapsed, models::Quantizer, Config, Engine, Hbb, Image, LogitsSampler, Polygon, Processor,
+ Scale, Task, Ts, Xs, X, Y,
};
#[derive(Debug, Builder)]
pub struct Florence2 {
- pub vision_encoder: BaseModelVisual,
- pub text_embed: BaseModelTextual,
- pub encoder: BaseModelTextual,
- pub decoder: BaseModelTextual,
- pub decoder_merged: BaseModelTextual,
+ pub vision_encoder: Engine,
+ pub text_embed: Engine,
+ pub encoder: Engine,
+ pub decoder: Engine,
+ pub decoder_merged: Engine,
ts: Ts,
quantizer: Quantizer,
max_length: usize,
eos_token_id: u32,
decoder_start_token_id: u32,
n_kvs: usize,
+ height: usize,
+ width: usize,
+ batch: usize,
+ processor: Processor,
}
impl Florence2 {
- pub fn new(
- options_vision_encoder: Options,
- options_text_embed: Options,
- options_encoder: Options,
- options_decoder: Options,
- options_decoder_merged: Options,
- ) -> Result {
- let vision_encoder = BaseModelVisual::new(options_vision_encoder)?;
- let text_embed = BaseModelTextual::new(options_text_embed)?;
- let encoder = BaseModelTextual::new(options_encoder)?;
- let decoder = BaseModelTextual::new(options_decoder)?;
- let decoder_merged = BaseModelTextual::new(options_decoder_merged)?;
+ pub fn new(config: Config) -> Result {
+ let vision_encoder = Engine::try_from_config(&config.visual)?;
+ let text_embed = Engine::try_from_config(&config.textual)?;
+ let encoder = Engine::try_from_config(&config.textual_encoder)?;
+ let decoder = Engine::try_from_config(&config.textual_decoder)?;
+ let decoder_merged = Engine::try_from_config(&config.textual_decoder_merged)?;
+ let (batch, height, width) = (
+ vision_encoder.batch().opt(),
+ vision_encoder.try_height().unwrap_or(&1024.into()).opt(),
+ vision_encoder.try_width().unwrap_or(&1024.into()).opt(),
+ );
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
let quantizer = Quantizer::default();
let ts = Ts::merge(&[
- vision_encoder.engine().ts(),
- text_embed.engine().ts(),
- encoder.engine().ts(),
- decoder.engine().ts(),
- decoder_merged.engine().ts(),
+ vision_encoder.ts(),
+ text_embed.ts(),
+ encoder.ts(),
+ decoder.ts(),
+ decoder_merged.ts(),
]);
let max_length = 1024;
let eos_token_id = 2;
let decoder_start_token_id = 2;
- let n_kvs = match decoder.scale() {
+ let n_kvs = match config.scale {
Some(Scale::B) => 6,
Some(Scale::L) => 12,
_ => unimplemented!(),
@@ -66,6 +71,10 @@ impl Florence2 {
eos_token_id,
decoder_start_token_id,
n_kvs,
+ batch,
+ height,
+ width,
+ processor,
})
}
@@ -97,12 +106,12 @@ impl Florence2 {
.map(|im| {
let text = Self::process_task(task, im.height() as _, im.width() as _)
.prompt_for_florence2()?;
- let ids = self.text_embed.processor().encode_text_ids(&text, true)?;
+ let ids = self.processor.encode_text_ids(&text, true)?;
X::from(ids).insert_axis(0)
})
.collect::, _>>()?;
let x = X::concat(&xs, 0)?;
- let xs = self.text_embed.inference(x.into())?;
+ let xs = self.text_embed.run(x.into())?;
let x = xs[0].to_owned();
Ok(x)
@@ -110,7 +119,10 @@ impl Florence2 {
pub fn forward(&mut self, xs_visual: &[Image], x_textual: &Task) -> Result> {
let visual_embeddings = elapsed!("visual-encode", self.ts, {
- self.vision_encoder.encode(xs_visual)?
+ let xs = self.processor.process_images(xs_visual)?;
+ self.batch = xs_visual.len(); // update
+ let xs = self.vision_encoder.run(xs.into())?;
+ xs[0].to_owned()
});
let textual_embedding = elapsed!("textual-encode", self.ts, {
@@ -141,7 +153,7 @@ impl Florence2 {
let attention_mask = X::ones(&[self.batch(), inputs_embeds.dims()[1]]);
// encoder
- let last_hidden_state = self.encoder.inference(Xs::from(vec![
+ let last_hidden_state = self.encoder.run(Xs::from(vec![
attention_mask.clone(),
inputs_embeds.clone(),
]))?[0]
@@ -150,7 +162,7 @@ impl Florence2 {
// decoder
let inputs_embeds = inputs_embeds.slice(s![.., -1.., ..]);
let inputs_embeds = X::from(inputs_embeds.to_owned().into_dyn());
- let mut decoder_outputs = self.decoder.inference(Xs::from(vec![
+ let mut decoder_outputs = self.decoder.run(Xs::from(vec![
attention_mask.clone(),
last_hidden_state.clone(),
inputs_embeds,
@@ -215,7 +227,7 @@ impl Florence2 {
// decode
let next_tokens = X::from(last_tokens.clone()).insert_axis(1)?;
- let inputs_embeds = &self.text_embed.inference(Xs::from(next_tokens))?[0].clone();
+ let inputs_embeds = &self.text_embed.run(Xs::from(next_tokens))?[0].clone();
let use_cache = X::ones(&[1]);
let mut xs = vec![
attention_mask.clone(),
@@ -229,13 +241,13 @@ impl Florence2 {
xs.push(encoder_kvs[i * 2 + 1].clone());
}
xs.push(use_cache);
- decoder_outputs = self.decoder_merged.inference(xs.into())?;
+ decoder_outputs = self.decoder_merged.run(xs.into())?;
}
// batch decode
let texts = self
- .text_embed
- .processor()
+ // .text_embed
+ .processor
.decode_tokens_batch(&token_ids, false)?;
Ok(texts)
@@ -416,10 +428,6 @@ impl Florence2 {
Ok(ys)
}
- pub fn batch(&self) -> usize {
- self.vision_encoder.batch() as _
- }
-
pub fn summary(&mut self) {
self.ts.summary();
}
diff --git a/src/models/grounding_dino/config.rs b/src/models/grounding_dino/config.rs
index ce7096d..375732c 100644
--- a/src/models/grounding_dino/config.rs
+++ b/src/models/grounding_dino/config.rs
@@ -1,9 +1,8 @@
/// Model configuration for `GroundingDino`
-impl crate::Options {
+impl crate::Config {
pub fn grounding_dino() -> Self {
Self::default()
- .with_model_name("grounding-dino")
- .with_model_kind(crate::Kind::VisionLanguage)
+ .with_name("grounding-dino")
.with_model_ixx(0, 0, 1.into()) // TODO: current onnx model does not support bs > 1
.with_model_ixx(0, 2, 800.into()) // TODO: matters
.with_model_ixx(0, 3, 1200.into()) // TODO: matters
@@ -11,9 +10,10 @@ impl crate::Options {
.with_resize_filter("CatmullRom")
.with_image_mean(&[0.485, 0.456, 0.406])
.with_image_std(&[0.229, 0.224, 0.225])
- .with_normalize(true)
- .with_class_confs(&[0.25])
- .with_text_confs(&[0.25])
+ .with_tokenizer_file("grounding-dino/tokenizer.json")
+ .with_config_file("grounding-dino/config.json")
+ .with_special_tokens_map_file("grounding-dino/special_tokens_map.json")
+ .with_tokenizer_config_file("grounding-dino/tokenizer_config.json")
}
pub fn grounding_dino_tiny() -> Self {
diff --git a/src/models/grounding_dino/impl.rs b/src/models/grounding_dino/impl.rs
index d03e948..efebd19 100644
--- a/src/models/grounding_dino/impl.rs
+++ b/src/models/grounding_dino/impl.rs
@@ -4,7 +4,7 @@ use ndarray::{s, Array2, Axis};
use rayon::prelude::*;
use std::fmt::Write;
-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};
#[derive(Builder, Debug)]
pub struct GroundingDINO {
@@ -24,8 +24,8 @@ pub struct GroundingDINO {
}
impl GroundingDINO {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -33,31 +33,29 @@ impl GroundingDINO {
engine.try_width().unwrap_or(&1200.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
- .with_image_width(width as _)
- .with_image_height(height as _);
- let class_names = options
+ let class_names: Vec<_> = config
.text_names
- .as_ref()
- .and_then(|v| {
- let v: Vec<_> = v
- .iter()
- .map(|s| s.trim().to_ascii_lowercase())
- .filter(|s| !s.is_empty())
- .collect();
- (!v.is_empty()).then_some(v)
- })
- .ok_or_else(|| anyhow::anyhow!("No valid class names were provided in the options. Ensure the 'text_names' field is non-empty and contains valid class names."))?;
+ .iter()
+ .map(|s| s.trim().to_ascii_lowercase())
+ .filter(|s| !s.is_empty())
+ .collect();
+ if class_names.is_empty() {
+ anyhow::bail!(
+ "No valid class names were provided in the config. Ensure the 'text_names' field is non-empty and contains valid class names."
+ );
+ }
let text_prompt = class_names.iter().fold(String::new(), |mut acc, text| {
write!(&mut acc, "{}.", text).unwrap();
acc
});
+ let confs_visual = DynConf::new(config.class_confs(), class_names.len());
+ let confs_textual = DynConf::new(config.text_confs(), class_names.len());
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
let token_ids = processor.encode_text_ids(&text_prompt, true)?;
let tokens = processor.encode_text_tokens(&text_prompt, true)?;
let class_ids_map = Self::process_class_ids(&tokens);
- let confs_visual = DynConf::new(options.class_confs(), class_names.len());
- let confs_textual = DynConf::new(options.text_confs(), class_names.len());
Ok(Self {
engine,
diff --git a/src/models/linknet/config.rs b/src/models/linknet/config.rs
index c1c666a..ecdc71e 100644
--- a/src/models/linknet/config.rs
+++ b/src/models/linknet/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/abs/1707.03718)
-impl crate::Options {
+impl crate::Config {
pub fn linknet() -> Self {
Self::fast()
- .with_model_name("linknet")
+ .with_name("linknet")
.with_image_mean(&[0.798, 0.785, 0.772])
.with_image_std(&[0.264, 0.2749, 0.287])
}
diff --git a/src/models/mobileone/config.rs b/src/models/mobileone/config.rs
index a76f271..a1efa79 100644
--- a/src/models/mobileone/config.rs
+++ b/src/models/mobileone/config.rs
@@ -1,10 +1,10 @@
use crate::NAMES_IMAGENET_1K;
/// Model configuration for `MobileOne`
-impl crate::Options {
+impl crate::Config {
pub fn mobileone() -> Self {
Self::default()
- .with_model_name("mobileone")
+ .with_name("mobileone")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 224.into())
diff --git a/src/models/modnet/config.rs b/src/models/modnet/config.rs
index 05174d2..7d2c978 100644
--- a/src/models/modnet/config.rs
+++ b/src/models/modnet/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for `MODNet`
-impl crate::Options {
+impl crate::Config {
pub fn modnet() -> Self {
Self::default()
- .with_model_name("modnet")
+ .with_name("modnet")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 2, (416, 512, 800).into())
.with_model_ixx(0, 3, (416, 512, 800).into())
diff --git a/src/models/modnet/impl.rs b/src/models/modnet/impl.rs
index 5322ab1..0d123f7 100644
--- a/src/models/modnet/impl.rs
+++ b/src/models/modnet/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
use anyhow::Result;
use ndarray::Axis;
-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};
#[derive(Builder, Debug)]
pub struct MODNet {
@@ -16,8 +16,8 @@ pub struct MODNet {
}
impl MODNet {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -25,8 +25,7 @@ impl MODNet {
engine.try_width().unwrap_or(&512.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
diff --git a/src/models/moondream2/config.rs b/src/models/moondream2/config.rs
index 96d0bf9..a68bfdc 100644
--- a/src/models/moondream2/config.rs
+++ b/src/models/moondream2/config.rs
@@ -1,117 +1,47 @@
/// Model configuration for `moondream2`
-impl crate::Options {
+impl crate::Config {
pub fn moondream2() -> Self {
Self::default()
- .with_model_name("moondream2")
- .with_model_num_dry_run(0)
+ .with_name("moondream2")
+ .with_visual_encoder_ixx(0, 0, (1, 3, 4).into()) // patch count
+ .with_image_mean(&[0.5, 0.5, 0.5])
+ .with_image_std(&[0.5, 0.5, 0.5])
+ .with_resize_mode(crate::ResizeMode::FitExact)
+ .with_resize_filter("catmullrom")
+ .with_visual_projection_ixx(0, 0, 1.into())
+ .with_textual_encoder_ixx(0, 0, 1.into())
+ .with_textual_decoder_ixx(0, 0, 1.into())
+ .with_size_encoder_ixx(0, 0, 1.into())
+ .with_size_decoder_ixx(0, 0, 1.into())
+ .with_coord_encoder_ixx(0, 0, 1.into())
+ .with_coord_decoder_ixx(0, 0, 1.into())
+ .with_tokenizer_file("moondream2/tokenizer.json")
+ .with_tokenizer_config_file("moondream2/tokenizer_config.json")
}
pub fn moondream2_0_5b() -> Self {
- Self::moondream2().with_model_scale(crate::Scale::Billion(0.5))
+ Self::moondream2()
+ .with_scale(crate::Scale::Billion(0.5))
+ .with_visual_encoder_file("0.5b-vision-encoder.onnx")
+ .with_visual_projection_file("0.5b-vision-projection.onnx")
+ .with_textual_decoder_file("0.5b-text-decoder.onnx")
+ .with_textual_encoder_file("0.5b-text-encoder.onnx")
+ .with_coord_encoder_file("0.5b-coord-encoder.onnx")
+ .with_coord_decoder_file("0.5b-coord-decoder.onnx")
+ .with_size_encoder_file("0.5b-size-encoder.onnx")
+ .with_size_decoder_file("0.5b-size-decoder.onnx")
}
- pub fn moondream2_0_5b_vision_encoder() -> Self {
- Self::moondream2_0_5b()
- .with_model_ixx(0, 0, (1, 3, 4).into()) // patch count
- .with_model_kind(crate::Kind::Vision)
- .with_image_mean(&[0.5, 0.5, 0.5])
- .with_image_std(&[0.5, 0.5, 0.5])
- .with_normalize(true)
- .with_resize_mode(crate::ResizeMode::FitExact)
- .with_resize_filter("catmullrom")
- .with_model_file("0.5b-vision-encoder.onnx")
- }
-
- pub fn moondream2_0_5b_vision_projection() -> Self {
- Self::moondream2_0_5b()
- .with_batch_size(1)
- .with_model_kind(crate::Kind::Vision)
- .with_model_file("0.5b-vision-projection.onnx")
- }
-
- pub fn moondream2_0_5b_text_decoder() -> Self {
- Self::moondream2_0_5b()
- .with_batch_size(1)
- .with_model_kind(crate::Kind::Language)
- .with_model_file("0.5b-text-decoder.onnx")
- }
-
- pub fn moondream2_0_5b_text_encoder() -> Self {
- Self::moondream2_0_5b()
- .with_batch_size(1)
- .with_model_kind(crate::Kind::Language)
- .with_model_file("0.5b-text-encoder.onnx")
- }
-
- pub fn moondream2_0_5b_coord_encoder() -> Self {
- Self::moondream2_0_5b()
- .with_batch_size(1)
- .with_model_file("0.5b-coord-encoder.onnx")
- }
-
- pub fn moondream2_0_5b_coord_decoder() -> Self {
- Self::moondream2_0_5b()
- .with_batch_size(1)
- .with_model_file("0.5b-coord-decoder.onnx")
- }
-
- pub fn moondream2_0_5b_size_encoder() -> Self {
- Self::moondream2_0_5b()
- .with_batch_size(1)
- .with_model_file("0.5b-size-encoder.onnx")
- }
-
- pub fn moondream2_0_5b_size_decoder() -> Self {
- Self::moondream2_0_5b()
- .with_batch_size(1)
- .with_model_file("0.5b-size-decoder.onnx")
- }
-
- pub fn moondream2_2b_vision_encoder() -> Self {
- Self::moondream2_0_5b_vision_encoder()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-vision-encoder.onnx")
- }
-
- pub fn moondream2_2b_vision_projection() -> Self {
- Self::moondream2_0_5b_vision_projection()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-vision-projection.onnx")
- }
-
- pub fn moondream2_2b_text_decoder() -> Self {
- Self::moondream2_0_5b_text_decoder()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-text-decoder.onnx")
- }
-
- pub fn moondream2_2b_text_encoder() -> Self {
- Self::moondream2_0_5b_text_encoder()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-text-encoder.onnx")
- }
-
- pub fn moondream2_2b_coord_encoder() -> Self {
- Self::moondream2_0_5b_coord_encoder()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-coord-encoder.onnx")
- }
-
- pub fn moondream2_2b_coord_decoder() -> Self {
- Self::moondream2_0_5b_coord_decoder()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-coord-decoder.onnx")
- }
-
- pub fn moondream2_2b_size_encoder() -> Self {
- Self::moondream2_0_5b_size_encoder()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-size-encoder.onnx")
- }
-
- pub fn moondream2_2b_size_decoder() -> Self {
- Self::moondream2_0_5b_size_decoder()
- .with_model_scale(crate::Scale::Billion(2.))
- .with_model_file("2b-size-decoder.onnx")
+ pub fn moondream2_2b() -> Self {
+ Self::moondream2()
+ .with_scale(crate::Scale::Billion(2.))
+ .with_visual_encoder_file("2b-vision-encoder.onnx")
+ .with_visual_projection_file("2b-vision-projection.onnx")
+ .with_textual_decoder_file("2b-text-decoder.onnx")
+ .with_textual_encoder_file("2b-text-encoder.onnx")
+ .with_coord_encoder_file("2b-coord-encoder.onnx")
+ .with_coord_decoder_file("2b-coord-decoder.onnx")
+ .with_size_encoder_file("2b-size-encoder.onnx")
+ .with_size_decoder_file("2b-size-decoder.onnx")
}
}
diff --git a/src/models/moondream2/impl.rs b/src/models/moondream2/impl.rs
index 55a0942..0c2aa7e 100644
--- a/src/models/moondream2/impl.rs
+++ b/src/models/moondream2/impl.rs
@@ -5,66 +5,57 @@ use ndarray::{s, Array, Array2, Array3, Axis, IxDyn};
use ndarray_npy::ReadNpyExt;
use crate::{
- BaseModelTextual, DType, Engine, Hbb, Hub, Image, Keypoint, LogitsSampler, Options, Processor,
- Scale, Task, Ts, Xs, X, Y,
+ Config, DType, Engine, Hbb, Hub, Image, Keypoint, LogitsSampler, Processor, Scale, Task, Xs, X,
+ Y,
};
#[derive(Builder, Debug)]
pub struct Moondream2 {
- vision_encoder: VisionEncoder,
- vision_projection: VisionProjection,
- pub text_decoder: BaseModelTextual,
- text_encoder: BaseModelTextual,
- coord_decoder: Option,
- coord_encoder: Option,
- size_decoder: Option,
- size_encoder: Option,
+ vision_encoder: Engine,
+ vision_projection: Engine,
+ text_decoder: Engine,
+ text_encoder: Engine,
+ coord_decoder: Option,
+ coord_encoder: Option,
+ size_decoder: Option,
+ size_encoder: Option,
initial_kv_cache: X, // TODO: use f16
scale: Scale,
dtype: DType,
max_length: usize,
eos_token_id: u32,
max_objects: usize,
+ num_patch: usize,
+ patch_size: usize,
+ processor: Processor,
+ seq_len: usize,
}
impl Moondream2 {
- // TODO
- #[allow(clippy::too_many_arguments)]
- pub fn new(
- options_vision_encoder: Options,
- options_vision_projection: Options,
- options_text_encoder: Options,
- options_text_decoder: Options,
- options_coord_encoder: Option,
- options_coord_decoder: Option,
- options_size_encoder: Option,
- options_size_decoder: Option,
- ) -> Result {
+ pub fn new(config: Config) -> Result {
let max_length = 2048;
let max_objects = 50;
let eos_token_id = 50256;
- let dtype = options_vision_encoder.model_dtype;
- let scale = options_vision_encoder
- .model_scale
- .clone()
- .unwrap_or(Scale::Billion(0.5));
+ let dtype = config.visual_encoder.dtype;
+ let scale = config.scale.clone().unwrap_or(Scale::Billion(0.5));
let initial_kv_cache: X = KVCache::new(&scale, &dtype)?.0.into();
- let vision_encoder = VisionEncoder::new(options_vision_encoder)?;
- let vision_projection = VisionProjection::new(options_vision_projection)?;
- let text_decoder = BaseModelTextual::new(options_text_decoder)?;
- let text_encoder = BaseModelTextual::new(options_text_encoder)?;
- let coord_decoder = options_coord_decoder
- .map(BaseModelTextual::new)
- .transpose()?;
- let coord_encoder = options_coord_encoder
- .map(BaseModelTextual::new)
- .transpose()?;
- let size_decoder = options_size_decoder
- .map(BaseModelTextual::new)
- .transpose()?;
- let size_encoder = options_size_encoder
- .map(BaseModelTextual::new)
- .transpose()?;
+ let vision_encoder = Engine::try_from_config(&config.visual_encoder)?;
+ let vision_projection = Engine::try_from_config(&config.visual_projection)?;
+ let text_decoder = Engine::try_from_config(&config.textual_decoder)?;
+ let text_encoder = Engine::try_from_config(&config.textual_encoder)?;
+ let coord_decoder = Engine::try_from_config(&config.coord_decoder).ok();
+ let coord_encoder = Engine::try_from_config(&config.coord_encoder).ok();
+ let size_decoder = Engine::try_from_config(&config.size_decoder).ok();
+ let size_encoder = Engine::try_from_config(&config.size_encoder).ok();
+ let (num_patch, patch_size, _ts) = (
+ vision_encoder.batch().opt(),
+ vision_encoder.try_height().unwrap_or(&378.into()).opt(),
+ vision_encoder.ts.clone(),
+ );
+ let seq_len = vision_projection.inputs_minoptmax[0][1].opt();
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(patch_size as _)
+ .with_image_height(patch_size as _);
Ok(Self {
vision_encoder,
@@ -81,12 +72,16 @@ impl Moondream2 {
eos_token_id,
scale,
dtype,
+ num_patch,
+ patch_size,
+ processor,
+ seq_len,
})
}
pub fn encode_image(&mut self, x: &Image) -> Result {
- let patches_emb = self.vision_encoder.encode(x)?.clone().insert_axis(0)?;
- let image_embedding = self.vision_projection.inference(patches_emb.into())?[0].to_owned();
+ let patches_emb = self.encode(x)?.clone().insert_axis(0)?;
+ let image_embedding = self.vision_projection.run(patches_emb.into())?[0].to_owned();
Ok(image_embedding)
}
@@ -119,12 +114,7 @@ impl Moondream2 {
Task::Vqa(query) => {
let input_ids: Vec<_> = [198., 198., 24361., 25.]
.iter()
- .chain(
- &self
- .text_encoder
- .processor()
- .encode_text_ids(query, false)?,
- )
+ .chain(&self.processor.encode_text_ids(query, false)?)
.chain(&[198., 198., 33706., 25.])
.cloned()
.collect();
@@ -139,8 +129,7 @@ impl Moondream2 {
.iter()
.chain(
&self
- .text_encoder
- .processor()
+ .processor
.encode_text_ids(&format!(" {}", object), false)?,
)
.chain(&[628.])
@@ -156,8 +145,7 @@ impl Moondream2 {
.iter()
.chain(
&self
- .text_encoder
- .processor()
+ .processor
.encode_text_ids(&format!(" {}", object), false)?,
)
.chain(&[628.])
@@ -174,10 +162,10 @@ impl Moondream2 {
fn generate_text(&mut self, input_ids: &[f32], kv_cache: Array) -> Result {
let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
- let mut input_embeds = self.text_encoder.inference(Xs::from(input_ids))?[0].to_owned();
+ let mut input_embeds = self.text_encoder.run(Xs::from(input_ids))?[0].to_owned();
let logits_sampler = LogitsSampler::new();
let mut token_ids: Vec = Vec::new();
- let mut pos = self.vision_projection.seq_len() + self.initial_kv_cache.shape()[4];
+ let mut pos = self.seq_len + self.initial_kv_cache.shape()[4];
let mut inc = input_embeds.shape()[1];
let mut kv_cache = kv_cache.clone();
@@ -192,7 +180,7 @@ impl Moondream2 {
.into_dyn()
.into(),
]);
- let decoder_outputs = self.text_decoder.inference(input)?;
+ let decoder_outputs = self.text_decoder.run(input)?;
// update
let logits = &decoder_outputs["logits"];
@@ -221,13 +209,10 @@ impl Moondream2 {
// encode
let next_tokens = X::from(vec![token_id as f32]).insert_axis(1)?;
- input_embeds = self.text_encoder.inference(Xs::from(next_tokens))?[0].to_owned();
+ input_embeds = self.text_encoder.run(Xs::from(next_tokens))?[0].to_owned();
}
- let text = self
- .text_encoder
- .processor()
- .decode_tokens(&token_ids, true)?;
+ let text = self.processor.decode_tokens(&token_ids, true)?;
Ok(text)
}
@@ -242,16 +227,16 @@ impl Moondream2 {
let mut y_bboxes: Vec = Vec::new();
let mut y_kpts: Vec> = Vec::new();
let (image_height, image_width) = (
- self.vision_encoder.processor.images_transform_info[0].height_src,
- self.vision_encoder.processor.images_transform_info[0].width_src,
+ self.processor.images_transform_info[0].height_src,
+ self.processor.images_transform_info[0].width_src,
);
- let mut pos = self.vision_projection.seq_len() + self.initial_kv_cache.shape()[4];
+ let mut pos = self.seq_len + self.initial_kv_cache.shape()[4];
let logits_sampler = LogitsSampler::new();
// initial input_embeds
let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
- let mut hidden = self.text_encoder.inference(Xs::from(input_ids))?[0].to_owned();
+ let mut hidden = self.text_encoder.run(Xs::from(input_ids))?[0].to_owned();
let mut kv_cache = kv_cache;
// generate
@@ -273,12 +258,7 @@ impl Moondream2 {
// cx
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
- let cx = self
- .coord_decoder
- .as_mut()
- .unwrap()
- .inference(Xs::from(input))?[0]
- .clone(); // [1024]
+ let cx = self.coord_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone(); // [1024]
let ratio = cx.shape()[0] as f32;
let cx = logits_sampler
.decode(cx.as_slice().context("Failed to get slice for `cx`")?)?
@@ -288,7 +268,7 @@ impl Moondream2 {
.coord_encoder
.as_mut()
.unwrap()
- .inference(Xs::from(X::from(vec![cx])))?[0]
+ .run(Xs::from(X::from(vec![cx])))?[0]
.clone()
.insert_axis(0)?
.insert_axis(0)?;
@@ -296,12 +276,7 @@ impl Moondream2 {
// cy
let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
- let cy = self
- .coord_decoder
- .as_mut()
- .unwrap()
- .inference(Xs::from(input))?[0]
- .clone();
+ let cy = self.coord_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone();
let ratio = cy.shape()[0] as f32;
let cy = logits_sampler
@@ -313,7 +288,7 @@ impl Moondream2 {
.coord_encoder
.as_mut()
.unwrap()
- .inference(Xs::from(X::from(vec![cy])))?[0]
+ .run(Xs::from(X::from(vec![cy])))?[0]
.clone()
.insert_axis(0)?
.insert_axis(0)?;
@@ -324,6 +299,7 @@ impl Moondream2 {
cy * image_height as f32,
))
.with_id(0)
+ .with_confidence(1.)
.with_name(object)]);
// keep?
@@ -334,12 +310,7 @@ impl Moondream2 {
// wh
let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
- let size = self
- .size_decoder
- .as_mut()
- .unwrap()
- .inference(Xs::from(input))?[0]
- .clone(); // [2, 1024]
+ let size = self.size_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone(); // [2, 1024]
let ratio = size.shape()[1] as f32;
let w = logits_sampler.decode(
@@ -361,7 +332,7 @@ impl Moondream2 {
.size_encoder
.as_mut()
.unwrap()
- .inference(Xs::from(X::from(vec![w, h])))?[0]
+ .run(Xs::from(X::from(vec![w, h])))?[0]
.clone()
.insert_axis(0)?
.insert_axis(0)?; // [1024]
@@ -392,7 +363,7 @@ impl Moondream2 {
}
fn prepare_kv_cache(&mut self, image_embedding: &X) -> Result> {
- let kv_cache_new = self.text_decoder.inference(Xs::from(vec![
+ let kv_cache_new = self.text_decoder.run(Xs::from(vec![
image_embedding.clone(),
self.initial_kv_cache.clone(),
]))?["new_kv_cache"]
@@ -421,7 +392,7 @@ impl Moondream2 {
kv_cache: &mut Array,
pos: &mut usize,
) -> Result {
- let decoder_outputs = self.text_decoder.inference(Xs::from(vec![
+ let decoder_outputs = self.text_decoder.run(Xs::from(vec![
input_embeds.clone(),
kv_cache
.slice(s![.., .., .., .., ..*pos, ..])
@@ -442,38 +413,6 @@ impl Moondream2 {
Ok(decoder_outputs["logits"].to_owned())
}
-}
-
-#[derive(Debug, Builder)]
-pub struct VisionEncoder {
- engine: Engine,
- num_patch: usize,
- patch_size: usize,
- processor: Processor,
- ts: Ts,
-}
-
-impl VisionEncoder {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
- let (num_patch, patch_size, ts) = (
- engine.batch().opt(),
- engine.try_height().unwrap_or(&378.into()).opt(),
- engine.ts.clone(),
- );
- let processor = options
- .to_processor()?
- .with_image_width(patch_size as _)
- .with_image_height(patch_size as _);
-
- Ok(Self {
- engine,
- patch_size,
- num_patch,
- processor,
- ts,
- })
- }
fn create_patches(image: &Image, image_patch_size: usize) -> (Vec, (u32, u32)) {
let mut patches = vec![image.clone()];
@@ -515,10 +454,6 @@ impl VisionEncoder {
(patches, selected_template)
}
- pub fn inference(&mut self, xs: Xs) -> Result {
- self.engine.run(xs)
- }
-
pub fn encode(&mut self, x: &Image) -> Result {
let (patches, selected_template) = Self::create_patches(x, self.patch_size);
let patches = self.processor.process_images(&patches)?;
@@ -526,7 +461,7 @@ impl VisionEncoder {
(selected_template.0 as usize),
(selected_template.1 as usize),
);
- let patch_emb = self.inference(patches.clone().into())?[0].clone();
+ let patch_emb = self.vision_encoder.run(patches.clone().into())?[0].clone();
let patch_emb = patch_emb.clone().0.into_dimensionality::()?;
let patch_emb = Self::process_patch_emb(patch_emb, template)?;
let patch_emb = X::from(patch_emb.into_dyn()); // TODO .insert_axis(x),
@@ -608,30 +543,6 @@ impl VisionEncoder {
}
}
-#[derive(Debug, Builder)]
-pub struct VisionProjection {
- engine: Engine,
- seq_len: usize,
- ts: Ts,
-}
-
-impl VisionProjection {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
- let (seq_len, ts) = (engine.inputs_minoptmax[0][1].opt(), engine.ts.clone());
-
- Ok(Self {
- engine,
- seq_len,
- ts,
- })
- }
-
- pub fn inference(&mut self, xs: Xs) -> Result {
- self.engine.run(xs)
- }
-}
-
#[derive(Builder, Debug)]
struct KVCache(pub Array);
diff --git a/src/models/owl/config.rs b/src/models/owl/config.rs
index cc17da9..9176a80 100644
--- a/src/models/owl/config.rs
+++ b/src/models/owl/config.rs
@@ -1,11 +1,10 @@
/// Model configuration for `OWLv2`
-impl crate::Options {
+impl crate::Config {
pub fn owlv2() -> Self {
Self::default()
- .with_model_name("owlv2")
- .with_model_kind(crate::Kind::VisionLanguage)
+ .with_name("owlv2")
// 1st & 3rd: text
- .with_model_ixx(0, 0, (1, 1, 1).into()) // TODO
+ .with_model_ixx(0, 0, (1, 1, 1).into())
.with_model_ixx(0, 1, 1.into())
.with_model_ixx(2, 0, (1, 1, 1).into())
.with_model_ixx(2, 1, 1.into())
@@ -21,6 +20,7 @@ impl crate::Options {
.with_normalize(true)
.with_class_confs(&[0.1])
.with_model_num_dry_run(0)
+ .with_tokenizer_file("owlv2/tokenizer.json")
}
pub fn owlv2_base() -> Self {
diff --git a/src/models/owl/impl.rs b/src/models/owl/impl.rs
index ca0ef67..ad7a4e1 100644
--- a/src/models/owl/impl.rs
+++ b/src/models/owl/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::{s, Axis};
use rayon::prelude::*;
-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};
#[derive(Debug, Builder)]
pub struct OWLv2 {
@@ -22,8 +22,8 @@ pub struct OWLv2 {
}
impl OWLv2 {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&960.into()).opt(),
@@ -31,20 +31,19 @@ impl OWLv2 {
engine.ts.clone(),
);
let spec = engine.spec().to_owned();
- let processor = options
- .to_processor()?
- .with_image_width(width as _)
- .with_image_height(height as _);
- let names: Vec = options
- .class_names()
- .expect("No class names specified.")
- .iter()
- .map(|x| x.to_string())
- .collect();
+ let names: Vec = config.text_names().to_vec();
+ if names.is_empty() {
+ anyhow::bail!(
+ "No valid class names were provided in the config. Ensure the 'text_names' field is non-empty and contains valid class names."
+ );
+ }
let names_with_prompt: Vec =
names.iter().map(|x| format!("a photo of {}", x)).collect();
let n = names.len();
- let confs = DynConf::new(options.class_confs(), n);
+ let confs = DynConf::new(config.class_confs(), n);
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
let input_ids: Vec = processor
.encode_texts_ids(
&names_with_prompt
diff --git a/src/models/picodet/config.rs b/src/models/picodet/config.rs
index b3988a5..073f755 100644
--- a/src/models/picodet/config.rs
+++ b/src/models/picodet/config.rs
@@ -4,11 +4,11 @@ use crate::{
};
/// Model configuration for `PicoDet`
-impl crate::Options {
+impl crate::Config {
pub fn picodet() -> Self {
Self::default()
- .with_model_name("picodet")
- .with_batch_size(1) // TODO: ONNX model's batch size seems always = 1
+ .with_name("picodet")
+ .with_batch_size_all(1) // TODO: ONNX model's batch size seems always = 1
.with_model_ixx(0, 2, 640.into())
.with_model_ixx(0, 3, 640.into())
.with_model_ixx(1, 0, (1, 1, 8).into())
diff --git a/src/models/picodet/impl.rs b/src/models/picodet/impl.rs
index 6bda13a..ce82629 100644
--- a/src/models/picodet/impl.rs
+++ b/src/models/picodet/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::Axis;
use rayon::prelude::*;
-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};
#[derive(Debug, Builder)]
pub struct PicoDet {
@@ -19,8 +19,8 @@ pub struct PicoDet {
}
impl PicoDet {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&640.into()).opt(),
@@ -28,15 +28,11 @@ impl PicoDet {
engine.ts.clone(),
);
let spec = engine.spec().to_owned();
- let processor = options
- .to_processor()?
+ let names: Vec = config.class_names().to_vec();
+ let confs = DynConf::new(config.class_confs(), names.len());
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let names = options
- .class_names()
- .expect("No class names are specified.")
- .to_vec();
- let confs = DynConf::new(options.class_confs(), names.len());
Ok(Self {
engine,
@@ -95,14 +91,15 @@ impl PicoDet {
return None;
}
let (x1, y1, x2, y2) = (pred[2], pred[3], pred[4], pred[5]);
+ let mut hbb = Hbb::default()
+ .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
+ .with_confidence(confidence)
+ .with_id(class_id);
+ if !self.names.is_empty() {
+ hbb = hbb.with_name(&self.names[class_id]);
+ }
- Some(
- Hbb::default()
- .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
- .with_confidence(confidence)
- .with_id(class_id)
- .with_name(&self.names[class_id]),
- )
+ Some(hbb)
})
.collect();
diff --git a/src/models/pipeline/basemodel.rs b/src/models/pipeline/basemodel.rs
index 062c84f..724f481 100644
--- a/src/models/pipeline/basemodel.rs
+++ b/src/models/pipeline/basemodel.rs
@@ -2,8 +2,7 @@ use aksr::Builder;
use anyhow::Result;
use crate::{
- elapsed, DType, Device, Engine, Image, Kind, Options, Processor, Scale, Task, Ts, Version, Xs,
- X,
+ elapsed, Config, DType, Device, Engine, Image, Processor, Scale, Task, Ts, Version, Xs, X,
};
#[derive(Debug, Builder)]
@@ -20,7 +19,6 @@ pub struct BaseModelVisual {
dtype: DType,
task: Option,
scale: Option,
- kind: Option,
version: Option,
}
@@ -29,8 +27,8 @@ impl BaseModelVisual {
self.ts.summary();
}
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let err_msg = "You need to specify the image height and image width for visual model.";
let (batch, height, width, ts, spec) = (
engine.batch().opt(),
@@ -39,18 +37,15 @@ impl BaseModelVisual {
engine.ts.clone(),
engine.spec().to_owned(),
);
- let processor = options
- .to_processor()?
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
-
- let device = options.model_device;
- let task = options.model_task;
- let scale = options.model_scale;
- let dtype = options.model_dtype;
- let kind = options.model_kind;
- let name = options.model_name;
- let version = options.model_version;
+ let device = config.model.device;
+ let task = config.task;
+ let scale = config.scale;
+ let dtype = config.model.dtype;
+ let name = config.name;
+ let version = config.version;
Ok(Self {
engine,
@@ -63,7 +58,6 @@ impl BaseModelVisual {
dtype,
task,
scale,
- kind,
device,
version,
name,
@@ -101,7 +95,6 @@ pub struct BaseModelTextual {
dtype: DType,
task: Option,
scale: Option,
- kind: Option,
version: Option,
}
@@ -110,21 +103,20 @@ impl BaseModelTextual {
self.ts.summary();
}
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, ts, spec) = (
engine.batch().opt(),
engine.ts.clone(),
engine.spec().to_owned(),
);
- let processor = options.to_processor()?;
- let device = options.model_device;
- let task = options.model_task;
- let scale = options.model_scale;
- let dtype = options.model_dtype;
- let kind = options.model_kind;
- let name = options.model_name;
- let version = options.model_version;
+ let processor = Processor::try_from_config(&config.processor)?;
+ let device = config.model.device;
+ let dtype = config.model.dtype;
+ let task = config.task;
+ let scale = config.scale;
+ let name = config.name;
+ let version = config.version;
Ok(Self {
engine,
@@ -135,7 +127,6 @@ impl BaseModelTextual {
dtype,
task,
scale,
- kind,
device,
version,
name,
diff --git a/src/models/pipeline/image_classifier.rs b/src/models/pipeline/image_classifier.rs
index 0dafd43..e851740 100644
--- a/src/models/pipeline/image_classifier.rs
+++ b/src/models/pipeline/image_classifier.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::Axis;
use rayon::prelude::*;
-use crate::{elapsed, DynConf, Engine, Image, Options, Prob, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Prob, Processor, Ts, Xs, Y};
#[derive(Debug, Builder)]
pub struct ImageClassifier {
@@ -12,19 +12,24 @@ pub struct ImageClassifier {
width: usize,
batch: usize,
apply_softmax: bool,
- ts: Ts,
processor: Processor,
- confs: DynConf,
- nc: usize,
names: Vec,
spec: String,
+ topk: usize,
+ ts: Ts,
}
-impl TryFrom for ImageClassifier {
+impl TryFrom for ImageClassifier {
type Error = anyhow::Error;
- fn try_from(options: Options) -> Result {
- let engine = options.to_engine()?;
+ fn try_from(config: Config) -> Result {
+ Self::new(config)
+ }
+}
+
+impl ImageClassifier {
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -32,50 +37,27 @@ impl TryFrom for ImageClassifier {
engine.try_width().unwrap_or(&224.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
+ let names = config.class_names.to_vec();
+ let apply_softmax = config.apply_softmax.unwrap_or_default();
+ let topk = config.topk.unwrap_or(5);
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let (nc, names) = match (options.nc(), options.class_names()) {
- (Some(nc), Some(names)) => {
- if nc != names.len() {
- anyhow::bail!(
- "The length of the input class names: {} is inconsistent with the number of classes: {}.",
- names.len(),
- nc
- );
- }
- (nc, names.to_vec())
- }
- (Some(nc), None) => (
- nc,
- (0..nc).map(|x| format!("# {}", x)).collect::>(),
- ),
- (None, Some(names)) => (names.len(), names.to_vec()),
- (None, None) => {
- anyhow::bail!("Neither class names nor class numbers were specified.");
- }
- };
- let confs = DynConf::new(options.class_confs(), nc);
- let apply_softmax = options.apply_softmax.unwrap_or_default();
Ok(Self {
engine,
height,
width,
batch,
- nc,
ts,
spec,
processor,
- confs,
names,
apply_softmax,
+ topk,
})
}
-}
-impl ImageClassifier {
pub fn summary(&mut self) {
self.ts.summary();
}
@@ -113,7 +95,7 @@ impl ImageClassifier {
let probs = Prob::new_probs(
&logits.into_raw_vec_and_offset().0,
Some(&self.names.iter().map(|x| x.as_str()).collect::>()),
- 3,
+ self.topk,
);
Some(Y::default().with_probs(&probs))
diff --git a/src/models/rfdetr/config.rs b/src/models/rfdetr/config.rs
index 53a861a..780faab 100644
--- a/src/models/rfdetr/config.rs
+++ b/src/models/rfdetr/config.rs
@@ -1,18 +1,17 @@
use crate::NAMES_COCO_91;
/// Model configuration for `RT-DETR`
-impl crate::Options {
+impl crate::Config {
pub fn rfdetr() -> Self {
Self::default()
- .with_model_name("rfdetr")
- .with_batch_size(1)
+ .with_name("rfdetr")
+ .with_model_ixx(0, 0, 1.into())
+ .with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 560.into())
.with_model_ixx(0, 3, 560.into())
.with_resize_mode(crate::ResizeMode::FitAdaptive)
- .with_normalize(true)
.with_image_mean(&[0.485, 0.456, 0.406])
.with_image_std(&[0.229, 0.224, 0.225])
- .with_class_confs(&[0.25])
.with_class_names(&NAMES_COCO_91)
}
diff --git a/src/models/rfdetr/impl.rs b/src/models/rfdetr/impl.rs
index dcac505..e5c2b5d 100644
--- a/src/models/rfdetr/impl.rs
+++ b/src/models/rfdetr/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::{s, Axis};
use rayon::prelude::*;
-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, Y};
#[derive(Debug, Builder)]
pub struct RFDETR {
@@ -19,8 +19,8 @@ pub struct RFDETR {
}
impl RFDETR {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&560.into()).opt(),
@@ -28,16 +28,11 @@ impl RFDETR {
engine.ts.clone(),
);
let spec = engine.spec().to_owned();
- let processor = options
- .to_processor()?
+ let names: Vec = config.class_names().to_vec();
+ let confs = DynConf::new(config.class_confs(), names.len());
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let names = options
- .class_names()
- .expect("No class names specified.")
- .to_vec();
- let confs = DynConf::new(options.class_confs(), names.len());
-
Ok(Self {
engine,
height,
@@ -107,14 +102,15 @@ impl RFDETR {
let y = cy - h / 2.;
let x = x.max(0.0).min(image_width as _);
let y = y.max(0.0).min(image_height as _);
+ let mut hbb = Hbb::default()
+ .with_xywh(x, y, w, h)
+ .with_confidence(conf)
+ .with_id(class_id as _);
+ if !self.names.is_empty() {
+ hbb = hbb.with_name(&self.names[class_id]);
+ }
- Some(
- Hbb::default()
- .with_xywh(x, y, w, h)
- .with_confidence(conf)
- .with_id(class_id as _)
- .with_name(&self.names[class_id]),
- )
+ Some(hbb)
})
.collect();
diff --git a/src/models/rmbg/config.rs b/src/models/rmbg/config.rs
index 65ae2bb..c0f9caa 100644
--- a/src/models/rmbg/config.rs
+++ b/src/models/rmbg/config.rs
@@ -1,9 +1,10 @@
/// Model configuration for `RMBG`
-impl crate::Options {
+impl crate::Config {
pub fn rmbg() -> Self {
Self::default()
- .with_model_name("rmbg")
+ .with_name("rmbg")
.with_model_ixx(0, 0, 1.into())
+ .with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 1024.into())
.with_model_ixx(0, 3, 1024.into())
}
diff --git a/src/models/rmbg/impl.rs b/src/models/rmbg/impl.rs
index f886fa6..34032e2 100644
--- a/src/models/rmbg/impl.rs
+++ b/src/models/rmbg/impl.rs
@@ -1,7 +1,7 @@
use aksr::Builder;
use anyhow::Result;
-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};
#[derive(Builder, Debug)]
pub struct RMBG {
@@ -15,8 +15,8 @@ pub struct RMBG {
}
impl RMBG {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -24,8 +24,7 @@ impl RMBG {
engine.try_width().unwrap_or(&1024.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
@@ -63,7 +62,6 @@ impl RMBG {
fn postprocess(&mut self, xs: Xs) -> Result> {
let mut ys: Vec = Vec::new();
for (idx, luma) in xs[0].axis_iter(ndarray::Axis(0)).enumerate() {
- // image size
let (h1, w1) = (
self.processor.images_transform_info[idx].height_src,
self.processor.images_transform_info[idx].width_src,
diff --git a/src/models/rtdetr/config.rs b/src/models/rtdetr/config.rs
index 2b2ebe9..ab87f23 100644
--- a/src/models/rtdetr/config.rs
+++ b/src/models/rtdetr/config.rs
@@ -1,15 +1,15 @@
use crate::NAMES_COCO_80;
/// Model configuration for `RT-DETR`
-impl crate::Options {
+impl crate::Config {
pub fn rtdetr() -> Self {
Self::default()
- .with_model_name("rtdetr")
- .with_batch_size(1)
+ .with_name("rtdetr")
+ .with_model_ixx(0, 0, 1.into())
+ .with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 640.into())
.with_model_ixx(0, 3, 640.into())
.with_resize_mode(crate::ResizeMode::FitAdaptive)
- .with_normalize(true)
.with_class_confs(&[0.5])
.with_class_names(&NAMES_COCO_80)
}
diff --git a/src/models/rtdetr/impl.rs b/src/models/rtdetr/impl.rs
index ebef66b..a75b09b 100644
--- a/src/models/rtdetr/impl.rs
+++ b/src/models/rtdetr/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::{s, Axis};
use rayon::prelude::*;
-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};
#[derive(Debug, Builder)]
pub struct RTDETR {
@@ -19,8 +19,8 @@ pub struct RTDETR {
}
impl RTDETR {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&640.into()).opt(),
@@ -28,15 +28,11 @@ impl RTDETR {
engine.ts.clone(),
);
let spec = engine.spec().to_owned();
- let processor = options
- .to_processor()?
+ let names: Vec = config.class_names().to_vec();
+ let confs = DynConf::new(config.class_confs(), names.len());
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let names = options
- .class_names()
- .expect("No class names specified.")
- .to_vec();
- let confs = DynConf::new(options.class_confs(), names.len());
Ok(Self {
engine,
@@ -87,14 +83,12 @@ impl RTDETR {
.enumerate()
.filter_map(|(idx, ((labels, boxes), scores))| {
let ratio = self.processor.images_transform_info[idx].height_scale;
-
let mut y_bboxes = Vec::new();
for (i, &score) in scores.iter().enumerate() {
let class_id = labels[i] as usize;
if score < self.confs[class_id] {
continue;
}
-
let xyxy = boxes.slice(s![i, ..]);
let (x1, y1, x2, y2) = (
xyxy[0] / ratio,
@@ -102,14 +96,14 @@ impl RTDETR {
xyxy[2] / ratio,
xyxy[3] / ratio,
);
-
- y_bboxes.push(
- Hbb::default()
- .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
- .with_confidence(score)
- .with_id(class_id)
- .with_name(&self.names[class_id]),
- );
+ let mut hbb = Hbb::default()
+ .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
+ .with_confidence(score)
+ .with_id(class_id);
+ if !self.names.is_empty() {
+ hbb = hbb.with_name(&self.names[class_id]);
+ }
+ y_bboxes.push(hbb);
}
let mut y = Y::default();
diff --git a/src/models/rtmo/config.rs b/src/models/rtmo/config.rs
index d223269..03f1d44 100644
--- a/src/models/rtmo/config.rs
+++ b/src/models/rtmo/config.rs
@@ -1,9 +1,10 @@
/// Model configuration for `RTMO`
-impl crate::Options {
+impl crate::Config {
pub fn rtmo() -> Self {
Self::default()
- .with_model_name("rtmo")
+ .with_name("rtmo")
.with_model_ixx(0, 0, 1.into())
+ .with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 640.into())
.with_model_ixx(0, 3, 640.into())
.with_resize_mode(crate::ResizeMode::FitAdaptive)
diff --git a/src/models/rtmo/impl.rs b/src/models/rtmo/impl.rs
index 1edb944..14cf442 100644
--- a/src/models/rtmo/impl.rs
+++ b/src/models/rtmo/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
use anyhow::Result;
use ndarray::Axis;
-use crate::{elapsed, DynConf, Engine, Hbb, Image, Keypoint, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Keypoint, Processor, Ts, Xs, Y};
#[derive(Builder, Debug)]
pub struct RTMO {
@@ -18,8 +18,8 @@ pub struct RTMO {
}
impl RTMO {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -27,15 +27,13 @@ impl RTMO {
engine.try_width().unwrap_or(&512.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
+ let nk = config.nk().unwrap_or(17);
+ let confs = DynConf::new(config.class_confs(), 1);
+ let kconfs = DynConf::new(config.keypoint_confs(), nk);
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let nk = options.nk().unwrap_or(17);
- let confs = DynConf::new(options.class_confs(), 1);
- let kconfs = DynConf::new(options.keypoint_confs(), nk);
-
Ok(Self {
engine,
height,
diff --git a/src/models/sam/config.rs b/src/models/sam/config.rs
index 0e9ce58..bd03825 100644
--- a/src/models/sam/config.rs
+++ b/src/models/sam/config.rs
@@ -1,100 +1,73 @@
-use crate::{models::SamKind, Options};
+use crate::{models::SamKind, Config};
/// Model configuration for `Segment Anything Model`
-impl Options {
+impl Config {
pub fn sam() -> Self {
Self::default()
- .with_model_name("sam")
- .with_model_ixx(0, 0, 1.into())
- }
-
- pub fn sam_encoder() -> Self {
- Self::sam()
- .with_model_ixx(0, 2, 1024.into())
- .with_model_ixx(0, 3, 1024.into())
+ .with_name("sam")
+ .with_encoder_ixx(0, 0, 1.into())
+ .with_encoder_ixx(0, 1, 3.into())
+ .with_encoder_ixx(0, 2, 1024.into())
+ .with_encoder_ixx(0, 3, 1024.into())
.with_resize_mode(crate::ResizeMode::FitAdaptive)
.with_resize_filter("Bilinear")
.with_image_mean(&[123.5, 116.5, 103.5])
.with_image_std(&[58.5, 57.0, 57.5])
.with_normalize(false)
.with_sam_kind(SamKind::Sam)
- .with_low_res_mask(false)
+ .with_sam_low_res_mask(false)
.with_find_contours(true)
}
- pub fn sam_decoder() -> Self {
+ pub fn sam_v1_base() -> Self {
Self::sam()
+ .with_encoder_file("sam-vit-b-encoder.onnx")
+ .with_decoder_file("sam-vit-b-decoder.onnx")
}
- pub fn sam_v1_base_encoder() -> Self {
- Self::sam_encoder().with_model_file("sam-vit-b-encoder.onnx")
+ // pub fn sam_v1_base_singlemask_decoder() -> Self {
+ // Self::sam().with_decoder_file("sam-vit-b-decoder-singlemask.onnx")
+ // }
+
+ pub fn sam2_tiny() -> Self {
+ Self::sam()
+ .with_encoder_file("sam2-hiera-tiny-encoder.onnx")
+ .with_sam_kind(SamKind::Sam2)
+ .with_decoder_file("sam2-hiera-tiny-decoder.onnx")
}
- pub fn sam_v1_base_decoder() -> Self {
- Self::sam_decoder().with_model_file("sam-vit-b-decoder.onnx")
- }
-
- pub fn sam_v1_base_singlemask_decoder() -> Self {
- Self::sam_decoder().with_model_file("sam-vit-b-decoder-singlemask.onnx")
- }
-
- pub fn sam2_tiny_encoder() -> Self {
- Self::sam_encoder()
- .with_model_file("sam2-hiera-tiny-encoder.onnx")
+ pub fn sam2_small() -> Self {
+ Self::sam()
+ .with_encoder_file("sam2-hiera-small-encoder.onnx")
+ .with_decoder_file("sam2-hiera-small-decoder.onnx")
.with_sam_kind(SamKind::Sam2)
}
- pub fn sam2_tiny_decoder() -> Self {
- Self::sam_decoder().with_model_file("sam2-hiera-tiny-decoder.onnx")
- }
-
- pub fn sam2_small_encoder() -> Self {
- Self::sam_encoder()
- .with_model_file("sam2-hiera-small-encoder.onnx")
+ pub fn sam2_base_plus() -> Self {
+ Self::sam()
+ .with_encoder_file("sam2-hiera-base-plus-encoder.onnx")
+ .with_decoder_file("sam2-hiera-base-plus-decoder.onnx")
.with_sam_kind(SamKind::Sam2)
}
- pub fn sam2_small_decoder() -> Self {
- Self::sam_decoder().with_model_file("sam2-hiera-small-decoder.onnx")
- }
-
- pub fn sam2_base_plus_encoder() -> Self {
- Self::sam_encoder()
- .with_model_file("sam2-hiera-base-plus-encoder.onnx")
- .with_sam_kind(SamKind::Sam2)
- }
-
- pub fn sam2_base_plus_decoder() -> Self {
- Self::sam_decoder().with_model_file("sam2-hiera-base-plus-decoder.onnx")
- }
-
- pub fn mobile_sam_tiny_encoder() -> Self {
- Self::sam_encoder()
- .with_model_file("mobile-sam-vit-t-encoder.onnx")
+ pub fn mobile_sam_tiny() -> Self {
+ Self::sam()
+ .with_encoder_file("mobile-sam-vit-t-encoder.onnx")
.with_sam_kind(SamKind::MobileSam)
+ .with_decoder_file("mobile-sam-vit-t-decoder.onnx")
}
- pub fn mobile_sam_tiny_decoder() -> Self {
- Self::sam_decoder().with_model_file("mobile-sam-vit-t-decoder.onnx")
- }
-
- pub fn sam_hq_tiny_encoder() -> Self {
- Self::sam_encoder()
- .with_model_file("sam-hq-vit-t-encoder.onnx")
+ pub fn sam_hq_tiny() -> Self {
+ Self::sam()
+ .with_encoder_file("sam-hq-vit-t-encoder.onnx")
.with_sam_kind(SamKind::SamHq)
+ .with_decoder_file("sam-hq-vit-t-decoder.onnx")
}
- pub fn sam_hq_tiny_decoder() -> Self {
- Self::sam_decoder().with_model_file("sam-hq-vit-t-decoder.onnx")
- }
-
- pub fn edge_sam_3x_encoder() -> Self {
- Self::sam_encoder()
- .with_model_file("edge-sam-3x-encoder.onnx")
+ pub fn edge_sam_3x() -> Self {
+ Self::sam()
+ .with_encoder_file("edge-sam-3x-encoder.onnx")
+ .with_decoder_file("edge-sam-3x-decoder.onnx")
.with_sam_kind(SamKind::EdgeSam)
}
-
- pub fn edge_sam_3x_decoder() -> Self {
- Self::sam_decoder().with_model_file("edge-sam-3x-decoder.onnx")
- }
}
diff --git a/src/models/sam/impl.rs b/src/models/sam/impl.rs
index 0e02ca5..d022295 100644
--- a/src/models/sam/impl.rs
+++ b/src/models/sam/impl.rs
@@ -4,8 +4,7 @@ use ndarray::{s, Axis};
use rand::prelude::*;
use crate::{
- elapsed, DynConf, Engine, Image, Mask, Ops, Options, Polygon, Processor, SamPrompt, Ts, Xs, X,
- Y,
+ elapsed, Config, DynConf, Engine, Image, Mask, Ops, Polygon, Processor, SamPrompt, Ts, Xs, X, Y,
};
#[derive(Debug, Clone)]
@@ -49,9 +48,10 @@ pub struct SAM {
}
impl SAM {
- pub fn new(options_encoder: Options, options_decoder: Options) -> Result {
- let encoder = options_encoder.to_engine()?;
- let decoder = options_decoder.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let encoder = Engine::try_from_config(&config.encoder)?;
+ let decoder = Engine::try_from_config(&config.decoder)?;
+
let (batch, height, width) = (
encoder.batch().opt(),
encoder.try_height().unwrap_or(&1024.into()).opt(),
@@ -60,24 +60,23 @@ impl SAM {
let ts = Ts::merge(&[encoder.ts(), decoder.ts()]);
let spec = encoder.spec().to_owned();
- let processor = options_encoder
- .to_processor()?
- .with_image_width(width as _)
- .with_image_height(height as _);
-
- let conf = DynConf::new(options_encoder.class_confs(), 1);
- let find_contours = options_encoder.find_contours;
- let kind = match options_encoder.sam_kind {
+ let conf = DynConf::new(config.class_confs(), 1);
+ let find_contours = config.find_contours;
+ let kind = match config.sam_kind {
Some(x) => x,
None => anyhow::bail!("Error: no clear `SamKind` specified."),
};
let use_low_res_mask = match kind {
SamKind::Sam | SamKind::MobileSam | SamKind::SamHq => {
- options_encoder.low_res_mask.unwrap_or(false)
+ config.sam_low_res_mask.unwrap_or(false)
}
SamKind::EdgeSam | SamKind::Sam2 => true,
};
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
+
Ok(Self {
encoder,
decoder,
diff --git a/src/models/sam2/config.rs b/src/models/sam2/config.rs
index db9df28..1ca09c8 100644
--- a/src/models/sam2/config.rs
+++ b/src/models/sam2/config.rs
@@ -1,50 +1,28 @@
-use crate::Options;
+use crate::Config;
/// Model configuration for `SAM2.1`
-impl Options {
- pub fn sam2_encoder() -> Self {
+impl Config {
+ pub fn sam2_1_tiny() -> Self {
Self::sam()
- .with_model_ixx(0, 2, 1024.into())
- .with_model_ixx(0, 3, 1024.into())
- .with_resize_mode(crate::ResizeMode::FitAdaptive)
- .with_resize_filter("Bilinear")
- .with_image_mean(&[0.485, 0.456, 0.406])
- .with_image_std(&[0.229, 0.224, 0.225])
+ .with_encoder_file("sam2.1-hiera-tiny-encoder.onnx")
+ .with_decoder_file("sam2.1-hiera-tiny-decoder.onnx")
}
- pub fn sam2_decoder() -> Self {
+ pub fn sam2_1_small() -> Self {
Self::sam()
+ .with_encoder_file("sam2.1-hiera-small-encoder.onnx")
+ .with_decoder_file("sam2.1-hiera-small-decoder.onnx")
}
- pub fn sam2_1_tiny_encoder() -> Self {
- Self::sam2_encoder().with_model_file("sam2.1-hiera-tiny-encoder.onnx")
+ pub fn sam2_1_base_plus() -> Self {
+ Self::sam()
+ .with_encoder_file("sam2.1-hiera-base-plus-encoder.onnx")
+ .with_decoder_file("sam2.1-hiera-base-plus-decoder.onnx")
}
- pub fn sam2_1_tiny_decoder() -> Self {
- Self::sam2_decoder().with_model_file("sam2.1-hiera-tiny-decoder.onnx")
- }
-
- pub fn sam2_1_small_encoder() -> Self {
- Self::sam2_encoder().with_model_file("sam2.1-hiera-small-encoder.onnx")
- }
-
- pub fn sam2_1_small_decoder() -> Self {
- Self::sam2_decoder().with_model_file("sam2.1-hiera-small-decoder.onnx")
- }
-
- pub fn sam2_1_base_plus_encoder() -> Self {
- Self::sam2_encoder().with_model_file("sam2.1-hiera-base-plus-encoder.onnx")
- }
-
- pub fn sam2_1_base_plus_decoder() -> Self {
- Self::sam2_decoder().with_model_file("sam2.1-hiera-base-plus-decoder.onnx")
- }
-
- pub fn sam2_1_large_encoder() -> Self {
- Self::sam2_encoder().with_model_file("sam2.1-hiera-large-encoder.onnx")
- }
-
- pub fn sam2_1_large_decoder() -> Self {
- Self::sam2_decoder().with_model_file("sam2.1-hiera-large-decoder.onnx")
+ pub fn sam2_1_large() -> Self {
+ Self::sam()
+ .with_encoder_file("sam2.1-hiera-large-encoder.onnx")
+ .with_decoder_file("sam2.1-hiera-large-decoder.onnx")
}
}
diff --git a/src/models/sam2/impl.rs b/src/models/sam2/impl.rs
index cb32dce..822ad06 100644
--- a/src/models/sam2/impl.rs
+++ b/src/models/sam2/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::{s, Axis};
use crate::{
- elapsed, DynConf, Engine, Image, Mask, Ops, Options, Processor, SamPrompt, Ts, Xs, X, Y,
+ elapsed, Config, DynConf, Engine, Image, Mask, Ops, Processor, SamPrompt, Ts, Xs, X, Y,
};
#[derive(Builder, Debug)]
@@ -20,9 +20,9 @@ pub struct SAM2 {
}
impl SAM2 {
- pub fn new(options_encoder: Options, options_decoder: Options) -> Result {
- let encoder = options_encoder.to_engine()?;
- let decoder = options_decoder.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let encoder = Engine::try_from_config(&config.encoder)?;
+ let decoder = Engine::try_from_config(&config.decoder)?;
let (batch, height, width) = (
encoder.batch().opt(),
encoder.try_height().unwrap_or(&1024.into()).opt(),
@@ -30,11 +30,10 @@ impl SAM2 {
);
let ts = Ts::merge(&[encoder.ts(), decoder.ts()]);
let spec = encoder.spec().to_owned();
- let processor = options_encoder
- .to_processor()?
+ let conf = DynConf::new(config.class_confs(), 1);
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let conf = DynConf::new(options_encoder.class_confs(), 1);
Ok(Self {
encoder,
diff --git a/src/models/sapiens/config.rs b/src/models/sapiens/config.rs
index 053cf07..55547ff 100644
--- a/src/models/sapiens/config.rs
+++ b/src/models/sapiens/config.rs
@@ -1,15 +1,14 @@
use crate::NAMES_BODY_PARTS_28;
/// Model configuration for `Sapiens`
-impl crate::Options {
+impl crate::Config {
pub fn sapiens() -> Self {
Self::default()
- .with_model_name("sapiens")
+ .with_name("sapiens")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 2, 1024.into())
.with_model_ixx(0, 3, 768.into())
.with_resize_mode(crate::ResizeMode::FitExact)
- .with_resize_filter("Bilinear")
.with_image_mean(&[123.5, 116.5, 103.5])
.with_image_std(&[58.5, 57.0, 57.5])
.with_normalize(false)
@@ -17,31 +16,11 @@ impl crate::Options {
pub fn sapiens_body_part_segmentation() -> Self {
Self::sapiens()
- .with_model_task(crate::Task::InstanceSegmentation)
+ .with_task(crate::Task::InstanceSegmentation)
.with_class_names(&NAMES_BODY_PARTS_28)
}
pub fn sapiens_seg_0_3b() -> Self {
Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b.onnx")
}
-
- // pub fn sapiens_seg_0_3b_uint8() -> Self {
- // Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-uint8.onnx")
- // }
-
- // pub fn sapiens_seg_0_3b_fp16() -> Self {
- // Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-fp16.onnx")
- // }
-
- // pub fn sapiens_seg_0_3b_bnb4() -> Self {
- // Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-bnb4.onnx")
- // }
-
- // pub fn sapiens_seg_0_3b_q4f16() -> Self {
- // Self::sapiens_body_part_segmentation().with_model_file("seg-0.3b-q4f16.onnx")
- // }
-
- // pub fn sapiens_seg_0_6b_fp16() -> Self {
- // Self::sapiens_body_part_segmentation().with_model_file("seg-0.6b-fp16.onnx")
- // }
}
diff --git a/src/models/sapiens/impl.rs b/src/models/sapiens/impl.rs
index 65ea6fd..577e756 100644
--- a/src/models/sapiens/impl.rs
+++ b/src/models/sapiens/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
use anyhow::Result;
use ndarray::{s, Array2, Axis};
-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Polygon, Processor, Task, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Polygon, Processor, Task, Ts, Xs, Y};
#[derive(Builder, Debug)]
pub struct Sapiens {
@@ -11,15 +11,15 @@ pub struct Sapiens {
width: usize,
batch: usize,
task: Task,
- names_body: Option>,
+ names_body: Vec,
ts: Ts,
processor: Processor,
spec: String,
}
impl Sapiens {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -27,12 +27,11 @@ impl Sapiens {
engine.try_width().unwrap_or(&768.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
+ let task = config.task.expect("No sapiens task specified.");
+ let names_body = config.class_names;
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let task = options.model_task.expect("No sapiens task specified.");
- let names_body = options.class_names;
Ok(Self {
engine,
@@ -124,8 +123,8 @@ impl Sapiens {
if let Some(polygon) = mask.polygon() {
y_polygons.push(polygon);
}
- if let Some(names_body) = &self.names_body {
- mask = mask.with_name(&names_body[*i]);
+ if !self.names_body.is_empty() {
+ mask = mask.with_name(&self.names_body[*i]);
}
y_masks.push(mask);
}
diff --git a/src/models/slanet/config.rs b/src/models/slanet/config.rs
index f29b311..ded9884 100644
--- a/src/models/slanet/config.rs
+++ b/src/models/slanet/config.rs
@@ -1,14 +1,14 @@
/// Model configuration for `SLANet`
-impl crate::Options {
+impl crate::Config {
pub fn slanet() -> Self {
Self::default()
- .with_model_name("slanet")
+ .with_name("slanet")
.with_model_ixx(0, 0, (1, 1, 8).into())
+ .with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, (320, 488, 488).into())
.with_model_ixx(0, 3, (320, 488, 488).into())
.with_image_mean(&[0.485, 0.456, 0.406])
.with_image_std(&[0.229, 0.224, 0.225])
- .with_normalize(true)
.with_resize_mode(crate::ResizeMode::FitAdaptive)
.with_padding_value(0)
.with_unsigned(true)
@@ -17,6 +17,6 @@ impl crate::Options {
pub fn slanet_lcnet_v2_mobile_ch() -> Self {
Self::slanet()
.with_model_file("v2-mobile-ch.onnx")
- .with_vocab_txt("vocab-sla-v2.txt")
+ .with_vocab_txt("slanet/vocab-sla-v2.txt")
}
}
diff --git a/src/models/slanet/impl.rs b/src/models/slanet/impl.rs
index fc2524e..5df6b01 100644
--- a/src/models/slanet/impl.rs
+++ b/src/models/slanet/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
use anyhow::Result;
use ndarray::{s, Axis};
-use crate::{elapsed, models::BaseModelVisual, Image, Keypoint, Options, Ts, Xs, Y};
+use crate::{elapsed, models::BaseModelVisual, Config, Image, Keypoint, Ts, Xs, Y};
#[derive(Builder, Debug)]
pub struct SLANet {
@@ -19,8 +19,8 @@ impl SLANet {
self.ts.summary();
}
- pub fn new(options: Options) -> Result {
- let base = BaseModelVisual::new(options)?;
+ pub fn new(config: Config) -> Result {
+ let base = BaseModelVisual::new(config)?;
let spec = base.engine().spec().to_owned();
let sos = 0;
let eos = base.processor().vocab().len() - 1;
diff --git a/src/models/smolvlm/config.rs b/src/models/smolvlm/config.rs
index 1aa4c9c..dcbdc9e 100644
--- a/src/models/smolvlm/config.rs
+++ b/src/models/smolvlm/config.rs
@@ -1,58 +1,28 @@
/// Model configuration for `SmolVLM`
-impl crate::Options {
+impl crate::Config {
pub fn smolvlm() -> Self {
Self::default()
- .with_batch_size(1)
- .with_model_name("smolvlm")
- .with_model_num_dry_run(3)
- }
-
- pub fn smolvlm_vision() -> Self {
- Self::smolvlm()
- .with_model_kind(crate::Kind::Vision)
+ .with_name("smolvlm")
+ .with_batch_size_all(1)
.with_image_mean(&[0.5, 0.5, 0.5])
.with_image_std(&[0.5, 0.5, 0.5])
.with_resize_filter("lanczos3")
- .with_normalize(true)
+ .with_tokenizer_file("smolvlm/tokenizer.json")
}
- pub fn smolvlm_text() -> Self {
- Self::smolvlm().with_model_kind(crate::Kind::Language)
+ pub fn smolvlm_256m() -> Self {
+ Self::smolvlm()
+ .with_scale(crate::Scale::Million(256.))
+ .with_visual_file("256m-vision-encoder.onnx")
+ .with_textual_file("256m-embed-tokens.onnx")
+ .with_textual_decoder_merged_file("256m-decoder-model-merged.onnx")
}
- pub fn smolvlm_vision_256m() -> Self {
- Self::smolvlm_vision()
- .with_model_scale(crate::Scale::Million(256.))
- .with_model_file("256m-vision-encoder.onnx")
- }
-
- pub fn smolvlm_text_embed_256m() -> Self {
- Self::smolvlm_text()
- .with_model_scale(crate::Scale::Million(256.))
- .with_model_file("256m-embed-tokens.onnx")
- }
-
- pub fn smolvlm_decoder_256m() -> Self {
- Self::smolvlm_text()
- .with_model_scale(crate::Scale::Million(256.))
- .with_model_file("256m-decoder-model-merged.onnx")
- }
-
- pub fn smolvlm_vision_500m() -> Self {
- Self::smolvlm_vision()
- .with_model_scale(crate::Scale::Million(500.))
- .with_model_file("500m-vision-encoder.onnx")
- }
-
- pub fn smolvlm_text_embed_500m() -> Self {
- Self::smolvlm_text()
- .with_model_scale(crate::Scale::Million(500.))
- .with_model_file("500m-embed-tokens.onnx")
- }
-
- pub fn smolvlm_decoder_500m() -> Self {
- Self::smolvlm_text()
- .with_model_scale(crate::Scale::Million(500.))
- .with_model_file("500m-decoder-model-merged.onnx")
+ pub fn smolvlm_500m() -> Self {
+ Self::smolvlm()
+ .with_scale(crate::Scale::Million(500.))
+ .with_visual_file("500m-vision-encoder.onnx")
+ .with_textual_file("500m-embed-tokens.onnx")
+ .with_textual_decoder_merged_file("500m-decoder-model-merged.onnx")
}
}
diff --git a/src/models/smolvlm/impl.rs b/src/models/smolvlm/impl.rs
index 9cb09e8..c5df584 100644
--- a/src/models/smolvlm/impl.rs
+++ b/src/models/smolvlm/impl.rs
@@ -3,15 +3,13 @@ use anyhow::Result;
use image::GenericImageView;
use ndarray::s;
-use crate::{
- models::BaseModelTextual, Engine, Image, LogitsSampler, Options, Processor, Scale, Ts, Xs, X, Y,
-};
+use crate::{Config, Engine, Image, LogitsSampler, Processor, Scale, Ts, Xs, X, Y};
#[derive(Debug, Builder)]
pub struct SmolVLM {
- vision: VisionEncoder,
- text_embed: BaseModelTextual,
- decoder: BaseModelTextual,
+ vision: Engine,
+ text_embed: Engine,
+ decoder: Engine,
scale: Scale,
image_token: String,
global_img_token: String,
@@ -25,17 +23,19 @@ pub struct SmolVLM {
num_hidden_layers: usize,
head_dim: usize,
num_key_value_heads: usize,
+ num_patch: usize,
+ batch: usize,
+ width: usize,
+ height: usize,
+ processor: Processor,
+ ts: Ts,
}
impl SmolVLM {
- pub fn new(
- options_vision_encoder: Options,
- options_text_embed: Options,
- options_decode: Options,
- ) -> Result {
- let vision = VisionEncoder::new(options_vision_encoder)?;
- let text_embed = BaseModelTextual::new(options_text_embed)?;
- let decoder = BaseModelTextual::new(options_decode)?;
+ pub fn new(config: Config) -> Result {
+ let vision = Engine::try_from_config(&config.visual)?;
+ let text_embed = Engine::try_from_config(&config.textual)?;
+ let decoder = Engine::try_from_config(&config.textual_decoder_merged)?;
let fake_image_token = "".to_string();
let image_token = "".to_string();
let global_img_token = "".to_string();
@@ -45,12 +45,22 @@ impl SmolVLM {
let image_token_id = 49190;
let image_seq_len = 64;
let max_length = 1024;
- let (num_hidden_layers, head_dim, num_key_value_heads) = match decoder.scale() {
+ let (num_hidden_layers, head_dim, num_key_value_heads) = match &config.scale {
Some(Scale::Million(256.)) => (30, 64, 3),
Some(Scale::Million(500.)) => (32, 64, 5),
_ => unimplemented!(),
};
- let scale = (*decoder.scale().unwrap()).clone();
+ let scale = config.scale.clone().unwrap();
+ let (batch, num_patch, height, width, ts) = (
+ vision.batch().opt(),
+ vision.inputs_minoptmax()[0][1].opt(),
+ vision.inputs_minoptmax()[0][3].opt(),
+ vision.inputs_minoptmax()[0][4].opt(),
+ vision.ts.clone(),
+ );
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
Ok(Self {
vision,
@@ -69,6 +79,12 @@ impl SmolVLM {
bos_token,
eos_token,
image_seq_len,
+ batch,
+ num_patch,
+ height,
+ width,
+ ts,
+ processor,
})
}
@@ -86,13 +102,13 @@ impl SmolVLM {
let bs = 1; // TODO
// patches and pixel_attention_mask
- let (patches, nw_nh) = self.vision.process_one(image)?;
+ let (patches, nw_nh) = self.process_one(image)?;
let dims = patches.dims();
let pixel_attention_mask = X::ones(&[dims[0], dims[1], dims[3], dims[4]]);
// input ids
let prompt = self.image_prompt_string(nw_nh, text);
- let mut input_ids: Vec = self.text_embed.processor().encode_text_ids(&prompt, true)?;
+ let mut input_ids: Vec = self.processor.encode_text_ids(&prompt, true)?;
// position ids
let mut position_ids = X::from(
@@ -114,12 +130,11 @@ impl SmolVLM {
for ii in 0..self.max_length {
// inputs embeds
let input_ids_x = X::from(input_ids.clone()).insert_axis(0)?;
- let mut inputs_embeds =
- self.text_embed.inference(input_ids_x.clone().into())?[0].clone();
+ let mut inputs_embeds = self.text_embed.run(input_ids_x.clone().into())?[0].clone();
// encode image and merge
if ii == 0 {
- let image_features = self.vision.inference(Xs::from(vec![
+ let image_features = self.vision.run(Xs::from(vec![
patches.clone(),
pixel_attention_mask.clone(),
]))?[0]
@@ -152,7 +167,7 @@ impl SmolVLM {
}
// decode
- let decoder_outputs = self.decoder.inference(xs.into())?;
+ let decoder_outputs = self.decoder.run(xs.into())?;
let logits = &decoder_outputs[0];
past_key_values = (1..decoder_outputs.len())
.step_by(2)
@@ -186,10 +201,7 @@ impl SmolVLM {
}
// decode tokens
- let text = self
- .text_embed
- .processor()
- .decode_tokens(&token_ids, true)?;
+ let text = self.processor.decode_tokens(&token_ids, true)?;
Ok(text)
}
@@ -233,44 +245,6 @@ impl SmolVLM {
}
}
}
-}
-
-#[derive(Debug, Builder)]
-pub struct VisionEncoder {
- engine: Engine,
- num_patch: usize,
- batch: usize,
- width: usize,
- height: usize,
- processor: Processor,
- ts: Ts,
-}
-
-impl VisionEncoder {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
- let (batch, num_patch, height, width, ts) = (
- engine.batch().opt(),
- engine.inputs_minoptmax()[0][1].opt(),
- engine.inputs_minoptmax()[0][3].opt(),
- engine.inputs_minoptmax()[0][4].opt(),
- engine.ts.clone(),
- );
- let processor = options
- .to_processor()?
- .with_image_width(width as _)
- .with_image_height(height as _);
-
- Ok(Self {
- engine,
- num_patch,
- batch,
- width,
- height,
- processor,
- ts,
- })
- }
fn create_patches(image: &Image, patch_size: (u32, u32)) -> (Vec, (u32, u32)) {
let mut patches = vec![];
@@ -307,10 +281,6 @@ impl VisionEncoder {
(patches, (nw, nh))
}
- pub fn inference(&mut self, xs: Xs) -> Result {
- self.engine.run(xs)
- }
-
pub fn process_one(&mut self, x: &Image) -> Result<(X, (u32, u32))> {
let (patches, nw_nh) = Self::create_patches(x, (self.width as _, self.height as _));
let patches = self.processor.process_images(&patches)?.insert_axis(0)?;
diff --git a/src/models/svtr/config.rs b/src/models/svtr/config.rs
index 8f0c06e..454c8f1 100644
--- a/src/models/svtr/config.rs
+++ b/src/models/svtr/config.rs
@@ -1,8 +1,8 @@
/// Model configuration for `SVTR`
-impl crate::Options {
+impl crate::Config {
pub fn svtr() -> Self {
Self::default()
- .with_model_name("svtr")
+ .with_name("svtr")
.with_model_ixx(0, 0, (1, 1, 8).into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 48.into())
@@ -14,11 +14,11 @@ impl crate::Options {
}
pub fn svtr_ch() -> Self {
- Self::svtr().with_vocab_txt("vocab-v1-ppocr-rec-ch.txt")
+ Self::svtr().with_vocab_txt("svtr/vocab-v1-ppocr-rec-ch.txt")
}
pub fn svtr_en() -> Self {
- Self::svtr().with_vocab_txt("vocab-v1-ppocr-rec-en.txt")
+ Self::svtr().with_vocab_txt("svtr/vocab-v1-ppocr-rec-en.txt")
}
pub fn ppocr_rec_v3_ch() -> Self {
diff --git a/src/models/svtr/impl.rs b/src/models/svtr/impl.rs
index eff6bda..3ae1ccf 100644
--- a/src/models/svtr/impl.rs
+++ b/src/models/svtr/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::Axis;
use rayon::prelude::*;
-use crate::{elapsed, DynConf, Engine, Image, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, DynConf, Engine, Image, Processor, Ts, Xs, Y};
#[derive(Builder, Debug)]
pub struct SVTR {
@@ -18,18 +18,17 @@ pub struct SVTR {
}
impl SVTR {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&960.into()).opt(),
engine.try_width().unwrap_or(&960.into()).opt(),
engine.ts.clone(),
);
- let spec = options.model_spec().to_string();
- let confs = DynConf::new(options.class_confs(), 1);
- let processor = options
- .to_processor()?
+ let spec = config.model.spec.to_string();
+ let confs = DynConf::new(config.class_confs(), 1);
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
if processor.vocab().is_empty() {
diff --git a/src/models/trocr/config.rs b/src/models/trocr/config.rs
index 8343434..3b6f6c9 100644
--- a/src/models/trocr/config.rs
+++ b/src/models/trocr/config.rs
@@ -1,92 +1,52 @@
use crate::Scale;
/// Model configuration for `TrOCR`
-impl crate::Options {
+impl crate::Config {
pub fn trocr() -> Self {
- Self::default().with_model_name("trocr").with_batch_size(1)
- }
-
- pub fn trocr_visual() -> Self {
- Self::trocr()
- .with_model_kind(crate::Kind::Vision)
- .with_model_ixx(0, 1, 3.into())
- .with_model_ixx(0, 2, 384.into())
- .with_model_ixx(0, 3, 384.into())
+ Self::default()
+ .with_name("trocr")
+ .with_batch_size_all(1)
+ .with_visual_ixx(0, 1, 3.into())
+ .with_visual_ixx(0, 2, 384.into())
+ .with_visual_ixx(0, 3, 384.into())
.with_image_mean(&[0.5, 0.5, 0.5])
.with_image_std(&[0.5, 0.5, 0.5])
- .with_resize_filter("Bilinear")
- .with_normalize(true)
+ .with_resize_filter("lanczos3")
+ .with_tokenizer_file("trocr/tokenizer.json")
+ .with_config_file("trocr/config.json")
+ .with_special_tokens_map_file("trocr/special_tokens_map.json")
+ .with_tokenizer_config_file("trocr/tokenizer_config.json")
}
- pub fn trocr_textual() -> Self {
- Self::trocr().with_model_kind(crate::Kind::Language)
- }
-
- pub fn trocr_visual_small() -> Self {
- Self::trocr_visual().with_model_scale(Scale::S)
- }
-
- pub fn trocr_textual_small() -> Self {
- Self::trocr_textual()
- .with_model_scale(Scale::S)
+ pub fn trocr_small_printed() -> Self {
+ Self::trocr()
+ .with_scale(Scale::S)
+ .with_visual_file("s-encoder-printed.onnx")
+ .with_textual_decoder_file("s-decoder-printed.onnx")
+ .with_textual_decoder_merged_file("s-decoder-merged-printed.onnx")
.with_tokenizer_file("trocr/tokenizer-small.json")
}
- pub fn trocr_visual_base() -> Self {
- Self::trocr_visual().with_model_scale(Scale::B)
- }
-
- pub fn trocr_textual_base() -> Self {
- Self::trocr_textual()
- .with_model_scale(Scale::B)
+ pub fn trocr_base_handwritten() -> Self {
+ Self::trocr()
+ .with_scale(Scale::B)
+ .with_visual_file("b-encoder-handwritten.onnx")
+ .with_textual_decoder_file("b-decoder-handwritten.onnx")
+ .with_textual_decoder_merged_file("b-decoder-merged-handwritten.onnx")
.with_tokenizer_file("trocr/tokenizer-base.json")
}
- pub fn trocr_encoder_small_printed() -> Self {
- Self::trocr_visual_small().with_model_file("s-encoder-printed.onnx")
+ pub fn trocr_small_handwritten() -> Self {
+ Self::trocr_small_printed()
+ .with_visual_file("s-encoder-handwritten.onnx")
+ .with_textual_decoder_file("s-decoder-handwritten.onnx")
+ .with_textual_decoder_merged_file("s-decoder-merged-handwritten.onnx")
}
- pub fn trocr_decoder_small_printed() -> Self {
- Self::trocr_textual_small().with_model_file("s-decoder-printed.onnx")
- }
-
- pub fn trocr_decoder_merged_small_printed() -> Self {
- Self::trocr_textual_small().with_model_file("s-decoder-merged-printed.onnx")
- }
-
- pub fn trocr_encoder_small_handwritten() -> Self {
- Self::trocr_visual_small().with_model_file("s-encoder-handwritten.onnx")
- }
-
- pub fn trocr_decoder_small_handwritten() -> Self {
- Self::trocr_textual_small().with_model_file("s-decoder-handwritten.onnx")
- }
-
- pub fn trocr_decoder_merged_small_handwritten() -> Self {
- Self::trocr_textual_small().with_model_file("s-decoder-merged-handwritten.onnx")
- }
-
- pub fn trocr_encoder_base_printed() -> Self {
- Self::trocr_visual_base().with_model_file("b-encoder-printed.onnx")
- }
-
- pub fn trocr_decoder_base_printed() -> Self {
- Self::trocr_textual_base().with_model_file("b-decoder-printed.onnx")
- }
-
- pub fn trocr_decoder_merged_base_printed() -> Self {
- Self::trocr_textual_base().with_model_file("b-decoder-merged-printed.onnx")
- }
-
- pub fn trocr_encoder_base_handwritten() -> Self {
- Self::trocr_visual_base().with_model_file("b-encoder-handwritten.onnx")
- }
-
- pub fn trocr_decoder_base_handwritten() -> Self {
- Self::trocr_textual_base().with_model_file("b-decoder-handwritten.onnx")
- }
-
- pub fn trocr_decoder_merged_base_handwritten() -> Self {
- Self::trocr_textual_base().with_model_file("b-decoder-merged-handwritten.onnx")
+ pub fn trocr_base_printed() -> Self {
+ Self::trocr_base_handwritten()
+ .with_visual_file("b-encoder-printed.onnx")
+ .with_textual_decoder_file("b-decoder-printed.onnx")
+ .with_textual_decoder_merged_file("b-decoder-merged-printed.onnx")
}
}
diff --git a/src/models/trocr/impl.rs b/src/models/trocr/impl.rs
index a0759a7..8811a07 100644
--- a/src/models/trocr/impl.rs
+++ b/src/models/trocr/impl.rs
@@ -3,11 +3,7 @@ use anyhow::Result;
use ndarray::{s, Axis};
use rayon::prelude::*;
-use crate::{
- elapsed,
- models::{BaseModelTextual, BaseModelVisual},
- Image, LogitsSampler, Options, Scale, Ts, Xs, X, Y,
-};
+use crate::{elapsed, Config, Engine, Image, LogitsSampler, Processor, Scale, Ts, Xs, X, Y};
#[derive(Debug, Copy, Clone)]
pub enum TrOCRKind {
@@ -29,35 +25,31 @@ impl TryFrom<&str> for TrOCRKind {
#[derive(Debug, Builder)]
pub struct TrOCR {
- encoder: BaseModelVisual,
- decoder: BaseModelTextual,
- decoder_merged: BaseModelTextual,
+ encoder: Engine,
+ decoder: Engine,
+ decoder_merged: Engine,
max_length: u32,
eos_token_id: u32,
decoder_start_token_id: u32,
ts: Ts,
n_kvs: usize,
+ processor: Processor,
+ batch: usize,
+ height: usize,
+ width: usize,
}
impl TrOCR {
- pub fn summary(&self) {
- self.ts.summary();
- }
-
- pub fn new(
- options_encoder: Options,
- options_decoder: Options,
- options_decoder_merged: Options,
- ) -> Result {
- let encoder = BaseModelVisual::new(options_encoder)?;
- let decoder = BaseModelTextual::new(options_decoder)?;
- let decoder_merged = BaseModelTextual::new(options_decoder_merged)?;
- let ts = Ts::merge(&[
- encoder.engine().ts(),
- decoder.engine().ts(),
- decoder_merged.engine().ts(),
- ]);
-
+ pub fn new(config: Config) -> Result {
+ let encoder = Engine::try_from_config(&config.visual)?;
+ let decoder = Engine::try_from_config(&config.textual_decoder)?;
+ let decoder_merged = Engine::try_from_config(&config.textual_decoder_merged)?;
+ let (batch, height, width) = (
+ encoder.batch().opt(),
+ encoder.try_height().unwrap_or(&384.into()).opt(),
+ encoder.try_width().unwrap_or(&384.into()).opt(),
+ );
+ let ts = Ts::merge(&[encoder.ts(), decoder.ts(), decoder_merged.ts()]);
// "bos_token": "", "eos_token": "", "sep_token": "",
// "model_max_length": 1000000000000000019884624838656,
// let bos_token = "";
@@ -68,11 +60,14 @@ impl TrOCR {
let max_length = 1024; // TODO
let eos_token_id = 2;
let decoder_start_token_id = 2;
- let n_kvs = match decoder.scale() {
+ let n_kvs = match &config.scale {
Some(Scale::S) => 6,
Some(Scale::B) => 12,
_ => unimplemented!(),
};
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
Ok(Self {
encoder,
@@ -83,11 +78,22 @@ impl TrOCR {
eos_token_id,
decoder_start_token_id,
n_kvs,
+ batch,
+ width,
+ height,
+ processor,
})
}
+ pub fn encode(&mut self, xs: &[Image]) -> Result {
+ let ys = self.processor.process_images(xs)?;
+ self.batch = xs.len(); // update
+ let ys = self.encoder.run(ys.into())?;
+ Ok(ys[0].to_owned())
+ }
+
pub fn forward(&mut self, xs: &[Image]) -> Result> {
- let encoder_hidden_states = elapsed!("encode", self.ts, { self.encoder.encode(xs)? });
+ let encoder_hidden_states = elapsed!("encode", self.ts, { self.encode(xs)? });
let generated = elapsed!("generate", self.ts, {
self.generate(&encoder_hidden_states)?
});
@@ -100,10 +106,10 @@ impl TrOCR {
// input_ids
let input_ids = X::from(vec![self.decoder_start_token_id as f32])
.insert_axis(0)?
- .repeat(0, self.encoder.batch())?;
+ .repeat(0, self.batch)?;
// decoder
- let mut decoder_outputs = self.decoder.inference(Xs::from(vec![
+ let mut decoder_outputs = self.decoder.run(Xs::from(vec![
input_ids.clone(),
encoder_hidden_states.clone(),
]))?;
@@ -116,9 +122,9 @@ impl TrOCR {
.collect();
// token ids
- let mut token_ids: Vec> = vec![vec![]; self.encoder.batch()];
- let mut finished = vec![false; self.encoder.batch()];
- let mut last_tokens: Vec = vec![0.; self.encoder.batch()];
+ let mut token_ids: Vec> = vec![vec![]; self.batch];
+ let mut finished = vec![false; self.batch];
+ let mut last_tokens: Vec = vec![0.; self.batch];
let logits_sampler = LogitsSampler::new();
// generate
@@ -169,78 +175,15 @@ impl TrOCR {
xs.push(X::ones(&[1])); // use_cache
// generate
- decoder_outputs = self.decoder_merged.inference(xs.into())?;
+ decoder_outputs = self.decoder_merged.run(xs.into())?;
}
Ok(token_ids)
}
- // fn generate(&mut self, encoder_hidden_states: &X) -> Result>> {
- // // input_ids
- // let input_ids = X::from(vec![self.decoder_start_token_id as f32])
- // .insert_axis(0)?
- // .repeat(0, self.encoder.batch())?;
-
- // // decoder
- // let mut decoder_outputs = self.decoder.inference(Xs::from(vec![
- // input_ids.clone(),
- // encoder_hidden_states.clone(),
- // ]))?;
-
- // // encoder kvs
- // let encoder_kvs: Vec<_> = (3..4 * self.n_kvs)
- // .step_by(4)
- // .flat_map(|i| [i, i + 1])
- // .map(|i| decoder_outputs[i].clone())
- // .collect();
-
- // // token ids
- // let mut token_ids: Vec> = vec![vec![]; self.encoder.batch()];
-
- // // generate
- // for _ in 0..self.max_length {
- // let logits = &decoder_outputs[0];
- // let decoder_kvs: Vec<_> = (1..(4 * self.n_kvs) - 2)
- // .step_by(4)
- // .flat_map(|i| [i, i + 1])
- // .map(|i| decoder_outputs[i].clone())
- // .collect();
-
- // // decode each token for each batch
- // let (finished, last_tokens) = self.decoder_merged.processor().par_generate(
- // logits,
- // &mut token_ids,
- // self.eos_token_id,
- // )?;
-
- // if finished {
- // break;
- // }
-
- // // build inputs
- // let input_ids = X::from(last_tokens).insert_axis(1)?;
- // let mut xs = vec![input_ids, encoder_hidden_states.clone()];
- // for i in 0..self.n_kvs {
- // xs.push(decoder_kvs[i * 2].clone());
- // xs.push(decoder_kvs[i * 2 + 1].clone());
- // xs.push(encoder_kvs[i * 2].clone());
- // xs.push(encoder_kvs[i * 2 + 1].clone());
- // }
- // xs.push(X::ones(&[1])); // use_cache
-
- // // generate
- // decoder_outputs = self.decoder_merged.inference(xs.into())?;
- // }
-
- // Ok(token_ids)
- // }
-
pub fn decode(&self, token_ids: Vec>) -> Result> {
// decode
- let texts = self
- .decoder_merged
- .processor()
- .decode_tokens_batch(&token_ids, false)?;
+ let texts = self.processor.decode_tokens_batch(&token_ids, false)?;
// to texts
let texts = texts
@@ -250,101 +193,8 @@ impl TrOCR {
Ok(texts)
}
+
+ pub fn summary(&self) {
+ self.ts.summary();
+ }
}
-
-// #[derive(Debug, Builder)]
-// pub struct TrOCREncoder {
-// // TODO: `BaseVisualEncoder`, `BaseVisualModel` struct?
-// engine: Engine,
-// height: usize,
-// width: usize,
-// batch: usize,
-// processor: Processor,
-// }
-
-// impl TrOCREncoder {
-// pub fn new(options: Options) -> Result {
-// let engine = options.to_engine()?;
-// let (batch, height, width) = (
-// engine.batch().opt(),
-// engine.try_height().unwrap_or(&384.into()).opt(),
-// engine.try_width().unwrap_or(&384.into()).opt(),
-// );
-// let processor = options
-// .to_processor()?
-// .with_image_width(width as _)
-// .with_image_height(height as _);
-
-// Ok(Self {
-// engine,
-// height,
-// width,
-// batch,
-// processor,
-// })
-// }
-
-// pub fn preprocess(&mut self, xs: &[DynamicImage]) -> Result {
-// self.batch = xs.len(); // TODO
-// let x = self.processor.process_images(xs)?;
-
-// Ok(x.into())
-// }
-
-// pub fn inference(&mut self, xs: Xs) -> Result {
-// self.engine.run(xs)
-// }
-
-// fn encode(&mut self, xs: &[DynamicImage]) -> Result {
-// // encode a batch of images into one embedding, that's `X`
-// let xs = self.preprocess(xs)?;
-// let xs = self.inference(xs)?;
-// let x = xs[0].to_owned();
-
-// Ok(x)
-// }
-// }
-
-// #[derive(Debug, Builder)]
-// pub struct TrOCRDecoder {
-// engine: Engine,
-// batch: usize,
-// }
-
-// impl TrOCRDecoder {
-// pub fn new(options: Options) -> Result {
-// let engine = options.to_engine()?;
-// let batch = engine.batch().opt();
-
-// Ok(Self { engine, batch })
-// }
-
-// pub fn inference(&mut self, xs: Xs) -> Result {
-// self.engine.run(xs)
-// }
-// }
-
-// #[derive(Debug, Builder)]
-// pub struct TrOCRDecoderMerged {
-// engine: Engine,
-// batch: usize,
-// processor: Processor,
-// }
-
-// impl TrOCRDecoderMerged {
-// pub fn new(options: Options) -> Result {
-// let engine = options.to_engine()?;
-// let batch = engine.batch().opt();
-// let processor = options.to_processor()?;
-
-// Ok(Self {
-// engine,
-// batch,
-// processor,
-// })
-// }
-
-// pub fn inference(&mut self, xs: Xs) -> Result {
-// self.engine.run(xs)
-// }
-// }
diff --git a/src/models/yolo/config.rs b/src/models/yolo/config.rs
index 8034827..1f240e9 100644
--- a/src/models/yolo/config.rs
+++ b/src/models/yolo/config.rs
@@ -1,234 +1,142 @@
use crate::{
- models::YOLOPredsFormat, Options, ResizeMode, Scale, Task, NAMES_COCO_KEYPOINTS_17,
+ models::YOLOPredsFormat, Config, ResizeMode, Scale, Task, NAMES_COCO_80,
+ NAMES_COCO_KEYPOINTS_17, NAMES_DOTA_V1_15, NAMES_IMAGENET_1K, NAMES_YOLOE_4585,
NAMES_YOLO_DOCLAYOUT_10,
};
-impl Options {
+impl Config {
pub fn yolo() -> Self {
Self::default()
- .with_model_name("yolo")
+ .with_name("yolo")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 1, 3.into())
.with_model_ixx(0, 2, 640.into())
.with_model_ixx(0, 3, 640.into())
.with_resize_mode(ResizeMode::FitAdaptive)
.with_resize_filter("CatmullRom")
- .with_find_contours(true)
- }
-
- pub fn doclayout_yolo_docstructbench() -> Self {
- Self::yolo_v10()
- .with_model_file("doclayout-docstructbench.onnx") // TODO: batch_size > 1
- .with_model_ixx(0, 2, (640, 1024, 1024).into())
- .with_model_ixx(0, 3, (640, 1024, 1024).into())
- .with_class_confs(&[0.4])
- .with_class_names(&NAMES_YOLO_DOCLAYOUT_10)
}
pub fn yolo_classify() -> Self {
Self::yolo()
- .with_model_task(Task::ImageClassification)
+ .with_task(Task::ImageClassification)
.with_model_ixx(0, 2, 224.into())
.with_model_ixx(0, 3, 224.into())
.with_resize_mode(ResizeMode::FitExact)
.with_resize_filter("Bilinear")
+ .with_class_names(&NAMES_IMAGENET_1K)
}
pub fn yolo_detect() -> Self {
- Self::yolo().with_model_task(Task::ObjectDetection)
+ Self::yolo()
+ .with_task(Task::ObjectDetection)
+ .with_class_names(&NAMES_COCO_80)
}
pub fn yolo_pose() -> Self {
Self::yolo()
- .with_model_task(Task::KeypointsDetection)
+ .with_task(Task::KeypointsDetection)
.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17)
}
pub fn yolo_segment() -> Self {
- Self::yolo().with_model_task(Task::InstanceSegmentation)
+ Self::yolo()
+ .with_task(Task::InstanceSegmentation)
+ .with_class_names(&NAMES_COCO_80)
}
pub fn yolo_obb() -> Self {
- Self::yolo().with_model_task(Task::OrientedObjectDetection)
- }
-
- pub fn fastsam_s() -> Self {
- Self::yolo_segment()
- .with_model_scale(Scale::S)
- .with_model_version(8.into())
- .with_model_file("FastSAM-s.onnx")
- }
-
- pub fn yolo_v8_rtdetr() -> Self {
Self::yolo()
- .with_model_version(7.into())
- .with_yolo_preds_format(YOLOPredsFormat::n_a_cxcywh_clss_n())
+ .with_model_ixx(0, 2, 1024.into())
+ .with_model_ixx(0, 3, 1024.into())
+ .with_task(Task::OrientedObjectDetection)
+ .with_class_names(&NAMES_DOTA_V1_15)
}
- pub fn yolo_v8_rtdetr_l() -> Self {
- Self::yolo_v8_rtdetr()
- .with_yolo_preds_format(YOLOPredsFormat::n_a_cxcywh_clss_n())
- .with_model_scale(Scale::L)
- .with_model_file("rtdetr-l-det.onnx")
+ pub fn doclayout_yolo_docstructbench() -> Self {
+ Self::yolo_detect()
+ .with_version(10.into())
+ .with_model_ixx(0, 2, (640, 1024, 1024).into())
+ .with_model_ixx(0, 3, (640, 1024, 1024).into())
+ .with_class_confs(&[0.4])
+ .with_class_names(&NAMES_YOLO_DOCLAYOUT_10)
+ .with_model_file("doclayout-docstructbench.onnx") // TODO: batch_size > 1
}
- pub fn yolo_v8_rtdetr_x() -> Self {
- Self::yolo_v8_rtdetr()
- .with_yolo_preds_format(YOLOPredsFormat::n_a_cxcywh_clss_n())
- .with_model_scale(Scale::X)
- }
-
- pub fn yolo_n() -> Self {
- Self::yolo().with_model_scale(Scale::N)
- }
-
- pub fn yolo_s() -> Self {
- Self::yolo().with_model_scale(Scale::S)
- }
-
- pub fn yolo_m() -> Self {
- Self::yolo().with_model_scale(Scale::M)
- }
-
- pub fn yolo_l() -> Self {
- Self::yolo().with_model_scale(Scale::L)
- }
-
- pub fn yolo_x() -> Self {
- Self::yolo().with_model_scale(Scale::X)
- }
-
- pub fn yolo_v5() -> Self {
- Self::yolo().with_model_version(5.into())
- }
-
- pub fn yolo_v6() -> Self {
- Self::yolo().with_model_version(6.into())
- }
-
- pub fn yolo_v7() -> Self {
- Self::yolo().with_model_version(7.into())
- }
-
- pub fn yolo_v8() -> Self {
- Self::yolo().with_model_version(8.into())
- }
-
- pub fn yolo_v9() -> Self {
- Self::yolo().with_model_version(9.into())
- }
-
- pub fn yolo_v10() -> Self {
- Self::yolo().with_model_version(10.into())
- }
-
- pub fn yolo_v11() -> Self {
- Self::yolo().with_model_version(11.into())
- }
-
- pub fn yolo_v12() -> Self {
- Self::yolo().with_model_version(12.into())
- }
-
- pub fn yolo_v8_n() -> Self {
+ // YOLOE models
+ pub fn yoloe() -> Self {
Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::N)
+ .with_task(Task::InstanceSegmentation)
+ .with_class_names(&NAMES_YOLOE_4585)
}
-
- pub fn yolo_v8_s() -> Self {
- Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::S)
- }
-
- pub fn yolo_v8_m() -> Self {
- Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::M)
- }
-
- pub fn yolo_v8_l() -> Self {
- Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::L)
- }
-
- pub fn yolo_v8_x() -> Self {
- Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::X)
- }
-
- pub fn yolo_v11_n() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::N)
- }
-
- pub fn yolo_v11_s() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::S)
- }
-
- pub fn yolo_v11_m() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::M)
- }
-
- pub fn yolo_v11_l() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::L)
- }
-
- pub fn yolo_v11_x() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::X)
- }
-
pub fn yoloe_v8s_seg_pf() -> Self {
- Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::S)
+ Self::yoloe()
+ .with_version(8.into())
+ .with_scale(Scale::S)
.with_model_file("yoloe-v8s-seg-pf.onnx")
}
pub fn yoloe_v8m_seg_pf() -> Self {
- Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::M)
+ Self::yoloe()
+ .with_version(8.into())
+ .with_scale(Scale::M)
.with_model_file("yoloe-v8m-seg-pf.onnx")
}
pub fn yoloe_v8l_seg_pf() -> Self {
- Self::yolo()
- .with_model_version(8.into())
- .with_model_scale(Scale::L)
+ Self::yoloe()
+ .with_version(8.into())
+ .with_scale(Scale::L)
.with_model_file("yoloe-v8l-seg-pf.onnx")
}
pub fn yoloe_11s_seg_pf() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::S)
+ Self::yoloe()
+ .with_version(11.into())
+ .with_scale(Scale::S)
.with_model_file("yoloe-11s-seg-pf.onnx")
}
pub fn yoloe_11m_seg_pf() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::M)
+ Self::yoloe()
+ .with_version(11.into())
+ .with_scale(Scale::M)
.with_model_file("yoloe-v8m-seg-pf.onnx")
}
pub fn yoloe_11l_seg_pf() -> Self {
- Self::yolo()
- .with_model_version(11.into())
- .with_model_scale(Scale::L)
+ Self::yoloe()
+ .with_version(11.into())
+ .with_scale(Scale::L)
.with_model_file("yoloe-11l-seg-pf.onnx")
}
+
+ pub fn fastsam_s() -> Self {
+ Self::yolo_segment()
+ .with_class_names(&["object"])
+ .with_scale(Scale::S)
+ .with_version(8.into())
+ .with_model_file("FastSAM-s.onnx")
+ }
+
+ pub fn fastsam_x() -> Self {
+ Self::yolo_segment()
+ .with_class_names(&["object"])
+ .with_scale(Scale::X)
+ .with_version(8.into())
+ .with_model_file("FastSAM-x.onnx")
+ }
+
+ pub fn ultralytics_rtdetr_l() -> Self {
+ Self::yolo_detect()
+ .with_yolo_preds_format(YOLOPredsFormat::n_a_cxcywh_clss_n())
+ .with_scale(Scale::L)
+ .with_model_file("rtdetr-l.onnx")
+ }
+
+ pub fn ultralytics_rtdetr_x() -> Self {
+ Self::yolo_detect()
+ .with_yolo_preds_format(YOLOPredsFormat::n_a_cxcywh_clss_n())
+ .with_scale(Scale::X)
+ .with_model_file("rtdetr-x.onnx")
+ }
}
diff --git a/src/models/yolo/impl.rs b/src/models/yolo/impl.rs
index a15d902..5f56d11 100644
--- a/src/models/yolo/impl.rs
+++ b/src/models/yolo/impl.rs
@@ -8,7 +8,7 @@ use regex::Regex;
use crate::{
elapsed,
models::{BoxType, YOLOPredsFormat},
- DynConf, Engine, Hbb, Image, Keypoint, Mask, NmsOps, Obb, Ops, Options, Prob, Processor, Task,
+ Config, DynConf, Engine, Hbb, Image, Keypoint, Mask, NmsOps, Obb, Ops, Prob, Processor, Task,
Ts, Version, Xs, Y,
};
@@ -28,25 +28,25 @@ pub struct YOLO {
confs: DynConf,
kconfs: DynConf,
iou: f32,
+ topk: usize,
processor: Processor,
ts: Ts,
spec: String,
classes_excluded: Vec,
classes_retained: Vec,
- topk: usize,
}
-impl TryFrom for YOLO {
+impl TryFrom for YOLO {
type Error = anyhow::Error;
- fn try_from(options: Options) -> Result {
- Self::new(options)
+ fn try_from(config: Config) -> Result {
+ Self::new(config)
}
}
impl YOLO {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let (batch, height, width, ts, spec) = (
engine.batch().opt(),
engine.try_height().unwrap_or(&640.into()).opt(),
@@ -54,11 +54,7 @@ impl YOLO {
engine.ts.clone(),
engine.spec().to_owned(),
);
- let processor = options
- .to_processor()?
- .with_image_width(width as _)
- .with_image_height(height as _);
- let task: Option = match &options.model_task {
+ let task: Option = match &config.task {
Some(task) => Some(task.clone()),
None => match engine.try_fetch("task") {
Some(x) => match x.as_str() {
@@ -77,8 +73,8 @@ impl YOLO {
};
// Task & layout
- let version = options.model_version;
- let (layout, task) = match &options.yolo_preds_format {
+ let version = config.version;
+ let (layout, task) = match &config.yolo_preds_format {
// customized
Some(layout) => {
// check task
@@ -130,9 +126,10 @@ impl YOLO {
(Task::InstanceSegmentation, Version(5, 0, _)) => {
YOLOPredsFormat::n_a_cxcywh_confclss_coefs()
}
- (Task::InstanceSegmentation, Version(8, 0, _) | Version(11, 0, _)) => {
- YOLOPredsFormat::n_cxcywh_clss_coefs_a()
- }
+ (
+ Task::InstanceSegmentation,
+ Version(8, 0, _) | Version(9, 0, _) | Version(11, 0, _),
+ ) => YOLOPredsFormat::n_cxcywh_clss_coefs_a(),
(Task::OrientedObjectDetection, Version(8, 0, _) | Version(11, 0, _)) => {
YOLOPredsFormat::n_cxcywh_clss_r_a()
}
@@ -171,70 +168,78 @@ impl YOLO {
};
// Class names
- let names: Option> = match Self::fetch_names_from_onnx(&engine) {
- Some(names_parsed) => match &options.class_names {
- Some(names) => {
- if names.len() == names_parsed.len() {
- // prioritize user-defined
- Some(names.clone())
- } else {
- // Fail to override
- anyhow::bail!(
- "The lengths of parsed class names: {} and user-defined class names: {} do not match.",
- names_parsed.len(),
- names.len(),
- )
- }
+ let names_parsed = Self::fetch_names_from_onnx(&engine);
+ let names_customized = config.class_names.to_vec();
+ let names: Vec<_> = match (names_parsed, names_customized.is_empty()) {
+ (None, true) => vec![],
+ (None, false) => names_customized,
+ (Some(names_parsed), true) => names_parsed,
+ (Some(names_parsed), false) => {
+ if names_parsed.len() == names_customized.len() {
+ names_customized // prioritize user-defined
+ } else {
+ anyhow::bail!(
+ "The lengths of parsed class names: {} and user-defined class names: {} do not match.",
+ names_parsed.len(),
+ names_customized.len(),
+ );
}
- None => Some(names_parsed),
- },
- None => options.class_names.clone(),
+ }
};
// Class names & Number of class
- let (nc, names) = match (options.nc(), names) {
- (_, Some(names)) => (names.len(), names.to_vec()),
- (Some(nc), None) => (nc, Self::n2s(nc)),
- (None, None) => {
- anyhow::bail!(
- "Neither class names nor the number of classes were specified. \
- \nConsider specify them with `Options::default().with_nc()` or `Options::default().with_class_names()`"
- );
+ let nc = match config.nc() {
+ None => names.len(),
+ Some(n) => {
+ if names.len() != n {
+ anyhow::bail!(
+ "The lengths of class names: {} and user-defined num_classes: {} do not match.",
+ names.len(),
+ n,
+ )
+ }
+ n
}
};
+ if nc == 0 && names.is_empty() {
+ anyhow::bail!(
+ "Neither class names nor the number of classes were specified. \
+ \nConsider specify them with `Config::default().with_nc()` or `Config::default().with_class_names()`"
+ );
+ }
// Keypoint names & Number of keypoints
- let (nk, names_kpt) = if let Task::KeypointsDetection = task {
- let nk = Self::fetch_nk_from_onnx(&engine).or(options.nk());
- match (&options.keypoint_names, nk) {
- (Some(names), Some(nk)) => {
- if names.len() != nk {
+ let names_kpt = config.keypoint_names.to_vec();
+ let nk = if let Task::KeypointsDetection = task {
+ match (names_kpt.is_empty(), Self::fetch_nk_from_onnx(&engine).or(config.nk())) {
+ (false, Some(nk)) => {
+ if names_kpt.len() != nk {
anyhow::bail!(
- "The lengths of user-defined keypoint names: {} and nk parsed: {} do not match.",
- names.len(),
+ "The lengths of user-defined keypoint class names: {} and num_keypoints: {} do not match.",
+ names_kpt.len(),
nk,
);
}
- (nk, names.clone())
- }
- (Some(names), None) => (names.len(), names.clone()),
- (None, Some(nk)) => (nk, Self::n2s(nk)),
- (None, None) => anyhow::bail!(
+ nk
+ },
+ (false, None) => names_kpt.len(),
+ (true, Some(nk)) => nk,
+ (true, None) => anyhow::bail!(
"Neither keypoint names nor the number of keypoints were specified when doing `KeypointsDetection` task. \
- \nConsider specify them with `Options::default().with_nk()` or `Options::default().with_keypoint_names()`"
+ \nConsider specify them with `Config::default().with_nk()` or `Config::default().with_keypoint_names()`"
),
}
} else {
- (0, vec![])
+ 0
};
// Attributes
- let topk = options.topk().unwrap_or(5);
- let confs = DynConf::new(options.class_confs(), nc);
- let kconfs = DynConf::new(options.keypoint_confs(), nk);
- let iou = options.iou().unwrap_or(0.45);
- let classes_excluded = options.classes_excluded().to_vec();
- let classes_retained = options.classes_retained().to_vec();
+ let topk = config.topk().unwrap_or(5);
+ let confs = DynConf::new(config.class_confs(), nc);
+ let kconfs = DynConf::new(config.keypoint_confs(), nk);
+ let iou = config.iou().unwrap_or(0.45);
+ let classes_excluded = config.classes_excluded().to_vec();
+ let classes_retained = config.classes_retained().to_vec();
let mut info = format!(
"YOLO Version: {}, Task: {:?}, Category Count: {}, Keypoint Count: {}, TopK: {}",
version.map_or("Unknown".into(), |x| x.to_string()),
@@ -249,6 +254,10 @@ impl YOLO {
if !classes_retained.is_empty() {
info = format!("{}, classes_retained: {:?}", info, classes_retained);
}
+ let processor = Processor::try_from_config(&config.processor)?
+ .with_image_width(width as _)
+ .with_image_height(height as _);
+
info!("{}", info);
Ok(Self {
@@ -293,12 +302,8 @@ impl YOLO {
Ok(ys)
}
- pub fn summary(&mut self) {
- self.ts.summary();
- }
-
fn postprocess(&self, xs: Xs) -> Result> {
- let protos = if xs.len() == 2 { Some(&xs[1]) } else { None };
+ // let protos = if xs.len() == 2 { Some(&xs[1]) } else { None };
let ys: Vec = xs[0]
.axis_iter(Axis(0))
.into_par_iter()
@@ -343,11 +348,11 @@ impl YOLO {
let ratio = self.processor.images_transform_info[idx].height_scale;
// Other tasks
- let (y_bboxes, y_mbrs) = slice_bboxes?
+ let (y_hbbs, y_obbs) = slice_bboxes?
.axis_iter(Axis(0))
.into_par_iter()
.enumerate()
- .filter_map(|(i, bbox)| {
+ .filter_map(|(i, hbb)| {
// confidence & class_id
let (class_id, confidence) = match &slice_id {
Some(ids) => (ids[[i, 0]] as _, slice_clss[[i, 0]] as _),
@@ -387,50 +392,50 @@ impl YOLO {
}
// Bboxes
- let bbox = bbox.mapv(|x| x / ratio);
- let bbox = if self.layout.is_bbox_normalized {
+ let hbb = hbb.mapv(|x| x / ratio);
+ let hbb = if self.layout.is_bbox_normalized {
(
- bbox[0] * self.width() as f32,
- bbox[1] * self.height() as f32,
- bbox[2] * self.width() as f32,
- bbox[3] * self.height() as f32,
+ hbb[0] * self.width() as f32,
+ hbb[1] * self.height() as f32,
+ hbb[2] * self.width() as f32,
+ hbb[3] * self.height() as f32,
)
} else {
- (bbox[0], bbox[1], bbox[2], bbox[3])
+ (hbb[0], hbb[1], hbb[2], hbb[3])
};
let (cx, cy, x, y, w, h) = match self.layout.box_type()? {
BoxType::Cxcywh => {
- let (cx, cy, w, h) = bbox;
+ let (cx, cy, w, h) = hbb;
let x = (cx - w / 2.).max(0.);
let y = (cy - h / 2.).max(0.);
(cx, cy, x, y, w, h)
}
BoxType::Xyxy => {
- let (x, y, x2, y2) = bbox;
+ let (x, y, x2, y2) = hbb;
let (w, h) = (x2 - x, y2 - y);
let (cx, cy) = ((x + x2) / 2., (y + y2) / 2.);
(cx, cy, x, y, w, h)
}
BoxType::Xywh => {
- let (x, y, w, h) = bbox;
+ let (x, y, w, h) = hbb;
let (cx, cy) = (x + w / 2., y + h / 2.);
(cx, cy, x, y, w, h)
}
BoxType::Cxcyxy => {
- let (cx, cy, x2, y2) = bbox;
+ let (cx, cy, x2, y2) = hbb;
let (w, h) = ((x2 - cx) * 2., (y2 - cy) * 2.);
let x = (x2 - w).max(0.);
let y = (y2 - h).max(0.);
(cx, cy, x, y, w, h)
}
BoxType::XyCxcy => {
- let (x, y, cx, cy) = bbox;
+ let (x, y, cx, cy) = hbb;
let (w, h) = ((cx - x) * 2., (cy - y) * 2.);
(cx, cy, x, y, w, h)
}
};
- let (y_bbox, y_mbr) = match &slice_radians {
+ let (y_hbb, y_obb) = match &slice_radians {
Some(slice_radians) => {
let radians = slice_radians[[i, 0]];
let (w, h, radians) = if w > h {
@@ -439,47 +444,51 @@ impl YOLO {
(h, w, radians + std::f32::consts::PI / 2.)
};
let radians = radians % std::f32::consts::PI;
- let mbr = Obb::from_cxcywhr(cx, cy, w, h, radians)
+ let mut obb = Obb::from_cxcywhr(cx, cy, w, h, radians)
.with_confidence(confidence)
- .with_id(class_id)
- .with_name(&self.names[class_id]);
+ .with_id(class_id);
+ if !self.names.is_empty() {
+ obb = obb.with_name(&self.names[class_id]);
+ }
- (None, Some(mbr))
+ (None, Some(obb))
}
None => {
- let bbox = Hbb::default()
+ let mut hbb = Hbb::default()
.with_xywh(x, y, w, h)
.with_confidence(confidence)
.with_id(class_id)
- .with_uid(i)
- .with_name(&self.names[class_id]);
+ .with_uid(i);
+ if !self.names.is_empty() {
+ hbb = hbb.with_name(&self.names[class_id]);
+ }
- (Some(bbox), None)
+ (Some(hbb), None)
}
};
- Some((y_bbox, y_mbr))
+ Some((y_hbb, y_obb))
})
.collect::<(Vec<_>, Vec<_>)>();
- let mut y_bboxes: Vec = y_bboxes.into_iter().flatten().collect();
- let mut y_mbrs: Vec = y_mbrs.into_iter().flatten().collect();
+ let mut y_hbbs: Vec = y_hbbs.into_iter().flatten().collect();
+ let mut y_obbs: Vec = y_obbs.into_iter().flatten().collect();
// Mbrs
- if !y_mbrs.is_empty() {
+ if !y_obbs.is_empty() {
if self.layout.apply_nms {
- y_mbrs.apply_nms_inplace(self.iou);
+ y_obbs.apply_nms_inplace(self.iou);
}
- y = y.with_obbs(&y_mbrs);
+ y = y.with_obbs(&y_obbs);
return Some(y);
}
// Bboxes
- if !y_bboxes.is_empty() {
+ if !y_hbbs.is_empty() {
if self.layout.apply_nms {
- y_bboxes.apply_nms_inplace(self.iou);
+ y_hbbs.apply_nms_inplace(self.iou);
}
- y = y.with_hbbs(&y_bboxes);
+ y = y.with_hbbs(&y_hbbs);
}
// KeypointsDetection
@@ -488,8 +497,8 @@ impl YOLO {
if let Some(hbbs) = y.hbbs() {
let y_kpts = hbbs
.into_par_iter()
- .filter_map(|bbox| {
- let pred = pred_kpts.slice(s![bbox.uid(), ..]);
+ .filter_map(|hbb| {
+ let pred = pred_kpts.slice(s![hbb.uid(), ..]);
let kpts = (0..self.nk)
.into_par_iter()
.map(|i| {
@@ -499,14 +508,17 @@ impl YOLO {
if kconf < self.kconfs[i] {
Keypoint::default()
} else {
- Keypoint::default()
+ let mut kpt = Keypoint::default()
.with_id(i)
.with_confidence(kconf)
.with_xy(
kx.max(0.0f32).min(image_width as f32),
ky.max(0.0f32).min(image_height as f32),
- )
- .with_name(&self.names_kpt[i])
+ );
+ if !self.names_kpt.is_empty() {
+ kpt = kpt.with_name(&self.names_kpt[i]);
+ }
+ kpt
}
})
.collect::>();
@@ -520,11 +532,12 @@ impl YOLO {
// InstanceSegmentation
if let Some(coefs) = slice_coefs {
if let Some(hbbs) = y.hbbs() {
+ let protos = &xs[1];
let y_masks = hbbs
.into_par_iter()
- .filter_map(|bbox| {
- let coefs = coefs.slice(s![bbox.uid(), ..]).to_vec();
- let proto = protos.as_ref()?.slice(s![idx, .., .., ..]);
+ .filter_map(|hbb| {
+ let coefs = coefs.slice(s![hbb.uid(), ..]).to_vec();
+ let proto = protos.slice(s![idx, .., .., ..]);
let (nm, mh, mw) = proto.dim();
// coefs * proto => mask
@@ -551,9 +564,9 @@ impl YOLO {
mask,
)?;
let (xmin, ymin, xmax, ymax) =
- (bbox.xmin(), bbox.ymin(), bbox.xmax(), bbox.ymax());
+ (hbb.xmin(), hbb.ymin(), hbb.xmax(), hbb.ymax());
- // Using bbox to crop the mask
+ // Using hbb to crop the mask
for (y, row) in mask.enumerate_rows_mut() {
for (x, _, pixel) in row {
if x < xmin as _
@@ -567,10 +580,10 @@ impl YOLO {
}
let mut mask = Mask::default().with_mask(mask);
- if let Some(id) = bbox.id() {
+ if let Some(id) = hbb.id() {
mask = mask.with_id(id);
}
- if let Some(name) = bbox.name() {
+ if let Some(name) = hbb.name() {
mask = mask.with_name(name);
}
@@ -611,7 +624,7 @@ impl YOLO {
.and_then(|m| m.as_str().parse::().ok())
}
- fn n2s(n: usize) -> Vec {
- (0..n).map(|x| format!("# {}", x)).collect::>()
+ pub fn summary(&mut self) {
+ self.ts.summary();
}
}
diff --git a/src/models/yolop/config.rs b/src/models/yolop/config.rs
index 6e1564e..208d9dc 100644
--- a/src/models/yolop/config.rs
+++ b/src/models/yolop/config.rs
@@ -1,14 +1,12 @@
/// Model configuration for `YOLOP`
-impl crate::Options {
+impl crate::Config {
pub fn yolop() -> Self {
Self::default()
- .with_model_name("yolop")
+ .with_name("yolop")
.with_model_ixx(0, 0, 1.into())
.with_model_ixx(0, 2, 640.into())
.with_model_ixx(0, 3, 640.into())
.with_resize_mode(crate::ResizeMode::FitAdaptive)
- .with_resize_filter("Bilinear")
- .with_normalize(true)
.with_class_confs(&[0.3])
}
diff --git a/src/models/yolop/impl.rs b/src/models/yolop/impl.rs
index 1cbde83..6324516 100644
--- a/src/models/yolop/impl.rs
+++ b/src/models/yolop/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
use ndarray::{s, Array, Axis, IxDyn};
use crate::{
- elapsed, DynConf, Engine, Hbb, Image, NmsOps, Ops, Options, Polygon, Processor, Ts, Xs, Y,
+ elapsed, Config, DynConf, Engine, Hbb, Image, NmsOps, Ops, Polygon, Processor, Ts, Xs, Y,
};
#[derive(Builder, Debug)]
@@ -20,8 +20,8 @@ pub struct YOLOPv2 {
}
impl YOLOPv2 {
- pub fn new(options: Options) -> Result {
- let engine = options.to_engine()?;
+ pub fn new(config: Config) -> Result {
+ let engine = Engine::try_from_config(&config.model)?;
let spec = engine.spec().to_string();
let (batch, height, width, ts) = (
engine.batch().opt(),
@@ -29,14 +29,12 @@ impl YOLOPv2 {
engine.try_width().unwrap_or(&512.into()).opt(),
engine.ts().clone(),
);
- let processor = options
- .to_processor()?
+ let confs = DynConf::new(config.class_confs(), 80);
+ let iou = config.iou.unwrap_or(0.45f32);
+ let processor = Processor::try_from_config(&config.processor)?
.with_image_width(width as _)
.with_image_height(height as _);
- let confs = DynConf::new(options.class_confs(), 80);
- let iou = options.iou.unwrap_or(0.45f32);
-
Ok(Self {
engine,
height,
diff --git a/src/utils/config.rs b/src/utils/config.rs
new file mode 100644
index 0000000..3f9895f
--- /dev/null
+++ b/src/utils/config.rs
@@ -0,0 +1,282 @@
+use aksr::Builder;
+
+use crate::{
+ impl_ort_config_methods, impl_processor_config_methods,
+ models::{SamKind, YOLOPredsFormat},
+ ORTConfig, ProcessorConfig, Scale, Task, Version,
+};
+
+/// Config for building models and inference
+#[derive(Builder, Debug, Clone)]
+pub struct Config {
+ // Basics
+ pub name: &'static str,
+ pub version: Option,
+ pub task: Option,
+ pub scale: Option,
+
+ // Engines
+ pub model: ORTConfig,
+ pub visual: ORTConfig,
+ pub textual: ORTConfig,
+ pub encoder: ORTConfig,
+ pub decoder: ORTConfig,
+ pub visual_encoder: ORTConfig,
+ pub textual_encoder: ORTConfig,
+ pub visual_decoder: ORTConfig,
+ pub textual_decoder: ORTConfig,
+ pub textual_decoder_merged: ORTConfig,
+ pub size_encoder: ORTConfig,
+ pub size_decoder: ORTConfig,
+ pub coord_encoder: ORTConfig,
+ pub coord_decoder: ORTConfig,
+ pub visual_projection: ORTConfig,
+ pub textual_projection: ORTConfig,
+
+ // Processor
+ pub processor: ProcessorConfig,
+
+ // Others
+ pub class_names: Vec,
+ pub keypoint_names: Vec,
+ pub text_names: Vec,
+ pub class_confs: Vec,
+ pub keypoint_confs: Vec,
+ pub text_confs: Vec,
+ pub apply_softmax: Option,
+ pub topk: Option,
+ #[args(aka = "nc")]
+ pub num_classes: Option,
+ #[args(aka = "nk")]
+ pub num_keypoints: Option,
+ #[args(aka = "nm")]
+ pub num_masks: Option,
+ pub iou: Option,
+ pub apply_nms: Option,
+ pub find_contours: bool,
+ pub yolo_preds_format: Option,
+ pub classes_excluded: Vec,
+ pub classes_retained: Vec,
+ pub min_width: Option,
+ pub min_height: Option,
+ pub db_unclip_ratio: Option,
+ pub db_binary_thresh: Option,
+ pub sam_kind: Option,
+ pub sam_low_res_mask: Option,
+}
+
+impl Default for Config {
+ fn default() -> Self {
+ Self {
+ class_names: vec![],
+ keypoint_names: vec![],
+ text_names: vec![],
+ class_confs: vec![0.25f32],
+ keypoint_confs: vec![0.3f32],
+ text_confs: vec![0.25f32],
+ apply_softmax: Some(false),
+ num_classes: None,
+ num_keypoints: None,
+ num_masks: None,
+ iou: None,
+ find_contours: false,
+ yolo_preds_format: None,
+ classes_excluded: vec![],
+ classes_retained: vec![],
+ apply_nms: None,
+ min_width: None,
+ min_height: None,
+ db_unclip_ratio: Some(1.5),
+ db_binary_thresh: Some(0.2),
+ sam_kind: None,
+ sam_low_res_mask: None,
+ topk: None,
+ model: Default::default(),
+ encoder: Default::default(),
+ decoder: Default::default(),
+ visual: Default::default(),
+ textual: Default::default(),
+ visual_encoder: Default::default(),
+ textual_encoder: Default::default(),
+ visual_decoder: Default::default(),
+ textual_decoder: Default::default(),
+ textual_decoder_merged: Default::default(),
+ processor: ProcessorConfig::default(),
+ size_encoder: Default::default(),
+ size_decoder: Default::default(),
+ coord_encoder: Default::default(),
+ coord_decoder: Default::default(),
+ visual_projection: Default::default(),
+ textual_projection: Default::default(),
+ version: None,
+ task: None,
+ scale: None,
+ name: Default::default(),
+ }
+ }
+}
+
+impl Config {
+ pub fn exclude_classes(mut self, xs: &[usize]) -> Self {
+ self.classes_retained.clear();
+ self.classes_excluded.extend_from_slice(xs);
+ self
+ }
+
+ pub fn retain_classes(mut self, xs: &[usize]) -> Self {
+ self.classes_excluded.clear();
+ self.classes_retained.extend_from_slice(xs);
+ self
+ }
+
+ pub fn commit(mut self) -> anyhow::Result {
+ // special case for yolo
+ if self.name == "yolo" && self.model.file.is_empty() {
+ // version-scale-task
+ let mut y = String::new();
+ if let Some(x) = self.version() {
+ y.push_str(&x.to_string());
+ }
+ if let Some(x) = self.scale() {
+ y.push_str(&format!("-{}", x));
+ }
+ if let Some(x) = self.task() {
+ y.push_str(&format!("-{}", x.yolo_str()));
+ }
+ y.push_str(".onnx");
+ self.model.file = y;
+ }
+
+ fn try_commit(name: &str, mut m: ORTConfig) -> anyhow::Result {
+ if !m.file.is_empty() {
+ m = m.try_commit(name)?;
+ return Ok(m);
+ }
+
+ Ok(m)
+ }
+
+ self.model = try_commit(self.name, self.model)?;
+ self.visual = try_commit(self.name, self.visual)?;
+ self.textual = try_commit(self.name, self.textual)?;
+ self.encoder = try_commit(self.name, self.encoder)?;
+ self.decoder = try_commit(self.name, self.decoder)?;
+ self.visual_encoder = try_commit(self.name, self.visual_encoder)?;
+ self.textual_encoder = try_commit(self.name, self.textual_encoder)?;
+ self.visual_decoder = try_commit(self.name, self.visual_decoder)?;
+ self.textual_decoder = try_commit(self.name, self.textual_decoder)?;
+ self.textual_decoder_merged = try_commit(self.name, self.textual_decoder_merged)?;
+ self.size_encoder = try_commit(self.name, self.size_encoder)?;
+ self.size_decoder = try_commit(self.name, self.size_decoder)?;
+ self.coord_encoder = try_commit(self.name, self.coord_encoder)?;
+ self.coord_decoder = try_commit(self.name, self.coord_decoder)?;
+ self.visual_projection = try_commit(self.name, self.visual_projection)?;
+ self.textual_projection = try_commit(self.name, self.textual_projection)?;
+
+ Ok(self)
+ }
+
+ pub fn with_num_dry_run_all(mut self, x: usize) -> Self {
+ self.visual = self.visual.with_num_dry_run(x);
+ self.textual = self.textual.with_num_dry_run(x);
+ self.model = self.model.with_num_dry_run(x);
+ self.encoder = self.encoder.with_num_dry_run(x);
+ self.decoder = self.decoder.with_num_dry_run(x);
+ self.visual_encoder = self.visual_encoder.with_num_dry_run(x);
+ self.textual_encoder = self.textual_encoder.with_num_dry_run(x);
+ self.visual_decoder = self.visual_decoder.with_num_dry_run(x);
+ self.textual_decoder = self.textual_decoder.with_num_dry_run(x);
+ self.textual_decoder_merged = self.textual_decoder_merged.with_num_dry_run(x);
+ self.size_encoder = self.size_encoder.with_num_dry_run(x);
+ self.size_decoder = self.size_decoder.with_num_dry_run(x);
+ self.coord_encoder = self.coord_encoder.with_num_dry_run(x);
+ self.coord_decoder = self.coord_decoder.with_num_dry_run(x);
+ self.visual_projection = self.visual_projection.with_num_dry_run(x);
+ self.textual_projection = self.textual_projection.with_num_dry_run(x);
+
+ self
+ }
+
+ pub fn with_batch_size_all(mut self, batch_size: usize) -> Self {
+ self.visual = self.visual.with_ixx(0, 0, batch_size.into());
+ self.textual = self.textual.with_ixx(0, 0, batch_size.into());
+ self.model = self.model.with_ixx(0, 0, batch_size.into());
+ self.encoder = self.encoder.with_ixx(0, 0, batch_size.into());
+ self.decoder = self.decoder.with_ixx(0, 0, batch_size.into());
+ self.visual_encoder = self.visual_encoder.with_ixx(0, 0, batch_size.into());
+ self.textual_encoder = self.textual_encoder.with_ixx(0, 0, batch_size.into());
+ self.visual_decoder = self.visual_decoder.with_ixx(0, 0, batch_size.into());
+ self.textual_decoder = self.textual_decoder.with_ixx(0, 0, batch_size.into());
+ self.textual_decoder_merged = self
+ .textual_decoder_merged
+ .with_ixx(0, 0, batch_size.into());
+ self.size_encoder = self.size_encoder.with_ixx(0, 0, batch_size.into());
+ self.size_decoder = self.size_decoder.with_ixx(0, 0, batch_size.into());
+ self.coord_encoder = self.coord_encoder.with_ixx(0, 0, batch_size.into());
+ self.coord_decoder = self.coord_decoder.with_ixx(0, 0, batch_size.into());
+ self.visual_projection = self.visual_projection.with_ixx(0, 0, batch_size.into());
+ self.textual_projection = self.textual_projection.with_ixx(0, 0, batch_size.into());
+
+ self
+ }
+
+ pub fn with_device_all(mut self, device: crate::Device) -> Self {
+ self.visual = self.visual.with_device(device);
+ self.textual = self.textual.with_device(device);
+ self.model = self.model.with_device(device);
+ self.encoder = self.encoder.with_device(device);
+ self.decoder = self.decoder.with_device(device);
+ self.visual_encoder = self.visual_encoder.with_device(device);
+ self.textual_encoder = self.textual_encoder.with_device(device);
+ self.visual_decoder = self.visual_decoder.with_device(device);
+ self.textual_decoder = self.textual_decoder.with_device(device);
+ self.textual_decoder_merged = self.textual_decoder_merged.with_device(device);
+ self.size_encoder = self.size_encoder.with_device(device);
+ self.size_decoder = self.size_decoder.with_device(device);
+ self.coord_encoder = self.coord_encoder.with_device(device);
+ self.coord_decoder = self.coord_decoder.with_device(device);
+ self.visual_projection = self.visual_projection.with_device(device);
+ self.textual_projection = self.textual_projection.with_device(device);
+
+ self
+ }
+
+ pub fn with_dtype_all(mut self, dtype: crate::DType) -> Self {
+ self.visual = self.visual.with_dtype(dtype);
+ self.textual = self.textual.with_dtype(dtype);
+ self.model = self.model.with_dtype(dtype);
+ self.encoder = self.encoder.with_dtype(dtype);
+ self.decoder = self.decoder.with_dtype(dtype);
+ self.visual_encoder = self.visual_encoder.with_dtype(dtype);
+ self.textual_encoder = self.textual_encoder.with_dtype(dtype);
+ self.visual_decoder = self.visual_decoder.with_dtype(dtype);
+ self.textual_decoder = self.textual_decoder.with_dtype(dtype);
+ self.textual_decoder_merged = self.textual_decoder_merged.with_dtype(dtype);
+ self.size_encoder = self.size_encoder.with_dtype(dtype);
+ self.size_decoder = self.size_decoder.with_dtype(dtype);
+ self.coord_encoder = self.coord_encoder.with_dtype(dtype);
+ self.coord_decoder = self.coord_decoder.with_dtype(dtype);
+ self.visual_projection = self.visual_projection.with_dtype(dtype);
+ self.textual_projection = self.textual_projection.with_dtype(dtype);
+
+ self
+ }
+}
+
+impl_ort_config_methods!(Config, model);
+impl_ort_config_methods!(Config, visual);
+impl_ort_config_methods!(Config, textual);
+impl_ort_config_methods!(Config, encoder);
+impl_ort_config_methods!(Config, decoder);
+impl_ort_config_methods!(Config, visual_encoder);
+impl_ort_config_methods!(Config, textual_encoder);
+impl_ort_config_methods!(Config, visual_decoder);
+impl_ort_config_methods!(Config, textual_decoder);
+impl_ort_config_methods!(Config, textual_decoder_merged);
+impl_ort_config_methods!(Config, size_encoder);
+impl_ort_config_methods!(Config, size_decoder);
+impl_ort_config_methods!(Config, coord_encoder);
+impl_ort_config_methods!(Config, coord_decoder);
+impl_ort_config_methods!(Config, visual_projection);
+impl_ort_config_methods!(Config, textual_projection);
+impl_processor_config_methods!(Config, processor);
diff --git a/src/utils/device.rs b/src/utils/device.rs
index 8c612cd..a8099cf 100644
--- a/src/utils/device.rs
+++ b/src/utils/device.rs
@@ -1,16 +1,20 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Device {
- Auto(usize),
Cpu(usize),
Cuda(usize),
TensorRT(usize),
CoreML(usize),
}
+impl Default for Device {
+ fn default() -> Self {
+ Self::Cpu(0)
+ }
+}
+
impl std::fmt::Display for Device {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let x = match self {
- Self::Auto(i) => format!("auto:{}", i),
Self::Cpu(i) => format!("cpu:{}", i),
Self::Cuda(i) => format!("cuda:{}", i),
Self::CoreML(i) => format!("mps:{}", i),
@@ -47,7 +51,6 @@ impl TryFrom<&str> for Device {
impl Device {
pub fn id(&self) -> usize {
match self {
- Device::Auto(i) => *i,
Device::Cpu(i) => *i,
Device::Cuda(i) => *i,
Device::TensorRT(i) => *i,
diff --git a/src/utils/dtype.rs b/src/utils/dtype.rs
index 538ae83..5874007 100644
--- a/src/utils/dtype.rs
+++ b/src/utils/dtype.rs
@@ -1,5 +1,6 @@
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum DType {
+ #[default]
Auto,
Int4,
Int8,
diff --git a/src/utils/dynconf.rs b/src/utils/dynconf.rs
index fa715d2..1ec0cff 100644
--- a/src/utils/dynconf.rs
+++ b/src/utils/dynconf.rs
@@ -6,7 +6,13 @@ pub struct DynConf(Vec);
impl Default for DynConf {
fn default() -> Self {
- Self(vec![0.4f32])
+ Self(vec![0.3f32])
+ }
+}
+
+impl From for DynConf {
+ fn from(conf: f32) -> Self {
+ Self(vec![conf])
}
}
diff --git a/src/utils/kind.rs b/src/utils/kind.rs
deleted file mode 100644
index 4519427..0000000
--- a/src/utils/kind.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
-pub enum Kind {
- // Do we really need this?
- Vision,
- Language,
- VisionLanguage,
-}
-
-impl std::fmt::Display for Kind {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- let x = match self {
- Self::Vision => "visual",
- Self::Language => "textual",
- Self::VisionLanguage => "vl",
- };
- write!(f, "{}", x)
- }
-}
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 50b8068..e145054 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -1,14 +1,15 @@
+mod config;
mod device;
mod dtype;
mod dynconf;
mod iiix;
-mod kind;
mod logits_sampler;
mod min_opt_max;
mod names;
mod ops;
-mod options;
+mod ort_config;
mod processor;
+mod processor_config;
mod retry;
mod scale;
mod task;
@@ -16,17 +17,18 @@ mod traits;
mod ts;
mod version;
+pub use config::*;
pub use device::Device;
pub use dtype::DType;
pub use dynconf::DynConf;
pub(crate) use iiix::Iiix;
-pub use kind::Kind;
pub use logits_sampler::LogitsSampler;
pub use min_opt_max::MinOptMax;
pub use names::*;
pub use ops::*;
-pub use options::*;
+pub use ort_config::ORTConfig;
pub use processor::*;
+pub use processor_config::ProcessorConfig;
pub use scale::Scale;
pub use task::Task;
pub use traits::*;
diff --git a/src/utils/names.rs b/src/utils/names.rs
index 644973e..e4affef 100644
--- a/src/utils/names.rs
+++ b/src/utils/names.rs
@@ -1,3 +1,4627 @@
+pub static NAMES_YOLOE_4585: [&str; 4585] = [
+ "3D CG rendering",
+ "3D glasses",
+ "abacus",
+ "abalone",
+ "monastery",
+ "belly",
+ "academy",
+ "accessory",
+ "accident",
+ "accordion",
+ "acorn",
+ "acrylic paint",
+ "act",
+ "action",
+ "action film",
+ "activity",
+ "actor",
+ "adaptation",
+ "add",
+ "adhesive tape",
+ "adjust",
+ "adult",
+ "adventure",
+ "advertisement",
+ "antenna",
+ "aerobics",
+ "spray can",
+ "afro",
+ "agriculture",
+ "aid",
+ "air conditioner",
+ "air conditioning",
+ "air sock",
+ "aircraft cabin",
+ "aircraft model",
+ "air field",
+ "air line",
+ "airliner",
+ "airman",
+ "plane",
+ "airplane window",
+ "airport",
+ "airport runway",
+ "airport terminal",
+ "airship",
+ "airshow",
+ "aisle",
+ "alarm",
+ "alarm clock",
+ "mollymawk",
+ "album",
+ "album cover",
+ "alcohol",
+ "alcove",
+ "algae",
+ "alley",
+ "almond",
+ "aloe vera",
+ "alp",
+ "alpaca",
+ "alphabet",
+ "german shepherd",
+ "altar",
+ "amber",
+ "ambulance",
+ "bald eagle",
+ "American shorthair",
+ "amethyst",
+ "amphitheater",
+ "amplifier",
+ "amusement park",
+ "amusement ride",
+ "anchor",
+ "ancient",
+ "anemone",
+ "angel",
+ "angle",
+ "animal",
+ "animal sculpture",
+ "animal shelter",
+ "animation",
+ "animation film",
+ "animator",
+ "anime",
+ "ankle",
+ "anklet",
+ "anniversary",
+ "trench coat",
+ "ant",
+ "antelope",
+ "antique",
+ "antler",
+ "anvil",
+ "apartment",
+ "ape",
+ "app",
+ "app icon",
+ "appear",
+ "appearance",
+ "appetizer",
+ "applause",
+ "apple",
+ "apple juice",
+ "apple pie",
+ "apple tree",
+ "applesauce",
+ "appliance",
+ "appointment",
+ "approach",
+ "apricot",
+ "apron",
+ "aqua",
+ "aquarium",
+ "aquarium fish",
+ "aqueduct",
+ "arcade",
+ "arcade machine",
+ "arch",
+ "arch bridge",
+ "archaelogical excavation",
+ "archery",
+ "archipelago",
+ "architect",
+ "architecture",
+ "archive",
+ "archway",
+ "area",
+ "arena",
+ "argument",
+ "arm",
+ "armadillo",
+ "armband",
+ "armchair",
+ "armoire",
+ "armor",
+ "army",
+ "army base",
+ "army tank",
+ "array",
+ "arrest",
+ "arrow",
+ "art",
+ "art exhibition",
+ "art gallery",
+ "art print",
+ "art school",
+ "art studio",
+ "art vector illustration",
+ "artichoke",
+ "article",
+ "artifact",
+ "artist",
+ "artists loft",
+ "ash",
+ "ashtray",
+ "asia temple",
+ "asparagus",
+ "asphalt road",
+ "assemble",
+ "assembly",
+ "assembly line",
+ "association",
+ "astronaut",
+ "astronomer",
+ "athlete",
+ "athletic",
+ "atlas",
+ "atm",
+ "atmosphere",
+ "atrium",
+ "attach",
+ "fighter jet",
+ "attend",
+ "attraction",
+ "atv",
+ "eggplant",
+ "auction",
+ "audi",
+ "audio",
+ "auditorium",
+ "aurora",
+ "author",
+ "auto factory",
+ "auto mechanic",
+ "auto part",
+ "auto show",
+ "auto showroom",
+ "car battery",
+ "automobile make",
+ "automobile model",
+ "motor vehicle",
+ "autumn",
+ "autumn forest",
+ "autumn leave",
+ "autumn park",
+ "autumn tree",
+ "avatar",
+ "avenue",
+ "aviator sunglasses",
+ "avocado",
+ "award",
+ "award ceremony",
+ "award winner",
+ "shed",
+ "ax",
+ "azalea",
+ "baboon",
+ "baby",
+ "baby bottle",
+ "baby carriage",
+ "baby clothe",
+ "baby elephant",
+ "baby food",
+ "baby seat",
+ "baby shower",
+ "back",
+ "backdrop",
+ "backlight",
+ "backpack",
+ "backyard",
+ "bacon",
+ "badge",
+ "badger",
+ "badlands",
+ "badminton",
+ "badminton racket",
+ "bag",
+ "bagel",
+ "bagpipe",
+ "baguette",
+ "bait",
+ "baked goods",
+ "baker",
+ "bakery",
+ "baking",
+ "baking sheet",
+ "balance",
+ "balance car",
+ "balcony",
+ "ball",
+ "ball pit",
+ "ballerina",
+ "ballet",
+ "ballet dancer",
+ "ballet skirt",
+ "balloon",
+ "balloon arch",
+ "baseball player",
+ "ballroom",
+ "bamboo",
+ "bamboo forest",
+ "banana",
+ "banana bread",
+ "banana leaf",
+ "banana tree",
+ "band",
+ "band aid",
+ "bandage",
+ "headscarf",
+ "bandeau",
+ "bangs",
+ "bracelet",
+ "balustrade",
+ "banjo",
+ "bank",
+ "bank card",
+ "bank vault",
+ "banknote",
+ "banner",
+ "banquet",
+ "banquet hall",
+ "banyan tree",
+ "baozi",
+ "baptism",
+ "bar",
+ "bar code",
+ "bar stool",
+ "barbecue",
+ "barbecue grill",
+ "barbell",
+ "barber",
+ "barber shop",
+ "barbie",
+ "barge",
+ "barista",
+ "bark",
+ "barley",
+ "barn",
+ "barn owl",
+ "barn door",
+ "barrel",
+ "barricade",
+ "barrier",
+ "handcart",
+ "bartender",
+ "baseball",
+ "baseball base",
+ "baseball bat",
+ "baseball hat",
+ "baseball stadium",
+ "baseball game",
+ "baseball glove",
+ "baseball pitcher",
+ "baseball team",
+ "baseball uniform",
+ "basement",
+ "basil",
+ "basin",
+ "basket",
+ "basket container",
+ "basketball",
+ "basketball backboard",
+ "basketball coach",
+ "basketball court",
+ "basketball game",
+ "basketball hoop",
+ "basketball player",
+ "basketball stadium",
+ "basketball team",
+ "bass",
+ "bass guitar",
+ "bass horn",
+ "bassist",
+ "bat",
+ "bath",
+ "bath heater",
+ "bath mat",
+ "bath towel",
+ "swimwear",
+ "bathrobe",
+ "bathroom",
+ "bathroom accessory",
+ "bathroom cabinet",
+ "bathroom door",
+ "bathroom mirror",
+ "bathroom sink",
+ "toilet paper",
+ "bathroom window",
+ "batman",
+ "wand",
+ "batter",
+ "battery",
+ "battle",
+ "battle rope",
+ "battleship",
+ "bay",
+ "bay bridge",
+ "bay window",
+ "bayberry",
+ "bazaar",
+ "beach",
+ "beach ball",
+ "beach chair",
+ "beach house",
+ "beach hut",
+ "beach towel",
+ "beach volleyball",
+ "lighthouse",
+ "bead",
+ "beagle",
+ "beak",
+ "beaker",
+ "beam",
+ "bean",
+ "bean bag chair",
+ "beanbag",
+ "bear",
+ "bear cub",
+ "beard",
+ "beast",
+ "beat",
+ "beautiful",
+ "beauty",
+ "beauty salon",
+ "beaver",
+ "bed",
+ "bedcover",
+ "bed frame",
+ "bedroom",
+ "bedding",
+ "bedpan",
+ "bedroom window",
+ "bedside lamp",
+ "bee",
+ "beech tree",
+ "beef",
+ "beekeeper",
+ "beeper",
+ "beer",
+ "beer bottle",
+ "beer can",
+ "beer garden",
+ "beer glass",
+ "beer hall",
+ "beet",
+ "beetle",
+ "beige",
+ "clock",
+ "bell pepper",
+ "bell tower",
+ "belt",
+ "belt buckle",
+ "bench",
+ "bend",
+ "bengal tiger",
+ "bento",
+ "beret",
+ "berry",
+ "berth",
+ "beverage",
+ "bib",
+ "bibimbap",
+ "bible",
+ "bichon",
+ "bicycle",
+ "bicycle helmet",
+ "bicycle wheel",
+ "biker",
+ "bidet",
+ "big ben",
+ "bike lane",
+ "bike path",
+ "bike racing",
+ "bike ride",
+ "bikini",
+ "bikini top",
+ "bill",
+ "billard",
+ "billboard",
+ "billiard table",
+ "bin",
+ "binder",
+ "binocular",
+ "biology laboratory",
+ "biplane",
+ "birch",
+ "birch tree",
+ "bird",
+ "bird bath",
+ "bird feeder",
+ "bird house",
+ "bird nest",
+ "birdbath",
+ "bird cage",
+ "birth",
+ "birthday",
+ "birthday cake",
+ "birthday candle",
+ "birthday card",
+ "birthday party",
+ "biscuit",
+ "bishop",
+ "bison",
+ "bit",
+ "bite",
+ "black",
+ "black sheep",
+ "blackberry",
+ "blackbird",
+ "blackboard",
+ "blacksmith",
+ "blade",
+ "blanket",
+ "sports coat",
+ "bleacher",
+ "blender",
+ "blessing",
+ "blind",
+ "eye mask",
+ "flasher",
+ "snowstorm",
+ "block",
+ "blog",
+ "blood",
+ "bloom",
+ "blossom",
+ "blouse",
+ "blow",
+ "hair drier",
+ "blowfish",
+ "blue",
+ "blue artist",
+ "blue jay",
+ "blue sky",
+ "blueberry",
+ "bluebird",
+ "pig",
+ "board",
+ "board eraser",
+ "board game",
+ "boardwalk",
+ "boat",
+ "boat deck",
+ "boat house",
+ "paddle",
+ "boat ride",
+ "bobfloat",
+ "bobcat",
+ "body",
+ "bodyboard",
+ "bodybuilder",
+ "boiled egg",
+ "boiler",
+ "bolo tie",
+ "bolt",
+ "bomb",
+ "bomber",
+ "bonasa umbellu",
+ "bone",
+ "bonfire",
+ "bonnet",
+ "bonsai",
+ "book",
+ "book cover",
+ "bookcase",
+ "folder",
+ "bookmark",
+ "bookshelf",
+ "bookstore",
+ "boom microphone",
+ "boost",
+ "boot",
+ "border",
+ "Border collie",
+ "botanical garden",
+ "bottle",
+ "bottle cap",
+ "bottle opener",
+ "bottle screw",
+ "bougainvillea",
+ "boulder",
+ "bouquet",
+ "boutique",
+ "boutique hotel",
+ "bow",
+ "bow tie",
+ "bow window",
+ "bowl",
+ "bowling",
+ "bowling alley",
+ "bowling ball",
+ "bowling equipment",
+ "box",
+ "box girder bridge",
+ "box turtle",
+ "boxer",
+ "underdrawers",
+ "boxing",
+ "boxing glove",
+ "boxing ring",
+ "boy",
+ "brace",
+ "bracket",
+ "braid",
+ "brain",
+ "brake",
+ "brake light",
+ "branch",
+ "brand",
+ "brandy",
+ "brass",
+ "brass plaque",
+ "bread",
+ "breadbox",
+ "break",
+ "breakfast",
+ "seawall",
+ "chest",
+ "brewery",
+ "brick",
+ "brick building",
+ "wall",
+ "brickwork",
+ "wedding dress",
+ "bride",
+ "groom",
+ "bridesmaid",
+ "bridge",
+ "bridle",
+ "briefcase",
+ "bright",
+ "brim",
+ "broach",
+ "broadcasting",
+ "broccoli",
+ "bronze",
+ "bronze medal",
+ "bronze sculpture",
+ "bronze statue",
+ "brooch",
+ "creek",
+ "broom",
+ "broth",
+ "brown",
+ "brown bear",
+ "brownie",
+ "brunch",
+ "brunette",
+ "brush",
+ "coyote",
+ "brussels sprout",
+ "bubble",
+ "bubble gum",
+ "bubble tea",
+ "bucket cabinet",
+ "shield",
+ "bud",
+ "buddha",
+ "buffalo",
+ "buffet",
+ "bug",
+ "build",
+ "builder",
+ "building",
+ "building block",
+ "building facade",
+ "building material",
+ "lamp",
+ "bull",
+ "bulldog",
+ "bullet",
+ "bullet train",
+ "bulletin board",
+ "bulletproof vest",
+ "bullfighting",
+ "megaphone",
+ "bullring",
+ "bumblebee",
+ "bumper",
+ "roll",
+ "bundle",
+ "bungee",
+ "bunk bed",
+ "bunker",
+ "bunny",
+ "buoy",
+ "bureau",
+ "burial chamber",
+ "burn",
+ "burrito",
+ "bus",
+ "bus driver",
+ "bus interior",
+ "bus station",
+ "bus stop",
+ "bus window",
+ "bush",
+ "business",
+ "business card",
+ "business executive",
+ "business suit",
+ "business team",
+ "business woman",
+ "businessman",
+ "bust",
+ "butcher",
+ "butchers shop",
+ "butte",
+ "butter",
+ "cream",
+ "butterfly",
+ "butterfly house",
+ "button",
+ "buttonwood",
+ "buy",
+ "taxi",
+ "cabana",
+ "cabbage",
+ "cabin",
+ "cabin car",
+ "cabinet",
+ "cabinetry",
+ "cable",
+ "cable car",
+ "cactus",
+ "cafe",
+ "canteen",
+ "cage",
+ "cake",
+ "cake stand",
+ "calculator",
+ "caldron",
+ "calendar",
+ "calf",
+ "call",
+ "phone box",
+ "calligraphy",
+ "calm",
+ "camcorder",
+ "camel",
+ "camera",
+ "camera lens",
+ "camouflage",
+ "camp",
+ "camper",
+ "campfire",
+ "camping",
+ "campsite",
+ "campus",
+ "can",
+ "can opener",
+ "canal",
+ "canary",
+ "cancer",
+ "candle",
+ "candle holder",
+ "candy",
+ "candy bar",
+ "candy cane",
+ "candy store",
+ "cane",
+ "jar",
+ "cannon",
+ "canopy",
+ "canopy bed",
+ "cantaloupe",
+ "cantilever bridge",
+ "canvas",
+ "canyon",
+ "cap",
+ "cape",
+ "cape cod",
+ "cappuccino",
+ "capsule",
+ "captain",
+ "capture",
+ "car",
+ "car dealership",
+ "car door",
+ "car interior",
+ "car logo",
+ "car mirror",
+ "parking lot",
+ "car seat",
+ "car show",
+ "car wash",
+ "car window",
+ "caramel",
+ "card",
+ "card game",
+ "cardboard",
+ "cardboard box",
+ "cardigan",
+ "cardinal",
+ "cargo",
+ "cargo aircraft",
+ "cargo ship",
+ "caribbean",
+ "carnation",
+ "carnival",
+ "carnivore",
+ "carousel",
+ "carp",
+ "carpenter",
+ "carpet",
+ "slipper",
+ "house finch",
+ "coach",
+ "dalmatian",
+ "aircraft carrier",
+ "carrot",
+ "carrot cake",
+ "carry",
+ "cart",
+ "carton",
+ "cartoon",
+ "cartoon character",
+ "cartoon illustration",
+ "cartoon style",
+ "carve",
+ "case",
+ "cash",
+ "cashew",
+ "casino",
+ "casserole",
+ "cassette",
+ "cassette deck",
+ "plaster bandage",
+ "casting",
+ "castle",
+ "cat",
+ "cat bed",
+ "cat food",
+ "cat furniture",
+ "cat tree",
+ "catacomb",
+ "catamaran",
+ "catamount",
+ "catch",
+ "catcher",
+ "caterpillar",
+ "catfish",
+ "cathedral",
+ "cattle",
+ "catwalk",
+ "catwalk show",
+ "cauliflower",
+ "cave",
+ "caviar",
+ "CD",
+ "CD player",
+ "cedar",
+ "ceiling",
+ "ceiling fan",
+ "celebrate",
+ "celebration",
+ "celebrity",
+ "celery",
+ "cello",
+ "smartphone",
+ "cement",
+ "graveyard",
+ "centerpiece",
+ "centipede",
+ "ceramic",
+ "ceramic tile",
+ "cereal",
+ "ceremony",
+ "certificate",
+ "chain",
+ "chain saw",
+ "chair",
+ "chairlift",
+ "daybed",
+ "chalet",
+ "chalice",
+ "chalk",
+ "chamber",
+ "chameleon",
+ "champagne",
+ "champagne flute",
+ "champion",
+ "championship",
+ "chandelier",
+ "changing table",
+ "channel",
+ "chap",
+ "chapel",
+ "character sculpture",
+ "charcoal",
+ "charge",
+ "charger",
+ "chariot",
+ "charity",
+ "charity event",
+ "charm",
+ "graph",
+ "chase",
+ "chassis",
+ "check",
+ "checkbook",
+ "chessboard",
+ "checklist",
+ "cheer",
+ "cheerlead",
+ "cheese",
+ "cheeseburger",
+ "cheesecake",
+ "cheetah",
+ "chef",
+ "chemical compound",
+ "chemist",
+ "chemistry",
+ "chemistry lab",
+ "cheongsam",
+ "cherry",
+ "cherry blossom",
+ "cherry tomato",
+ "cherry tree",
+ "chess",
+ "chestnut",
+ "chicken",
+ "chicken breast",
+ "chicken coop",
+ "chicken salad",
+ "chicken wing",
+ "garbanzo",
+ "chiffonier",
+ "chihuahua",
+ "child",
+ "child actor",
+ "childs room",
+ "chile",
+ "chili dog",
+ "chimney",
+ "chimpanzee",
+ "chinaware",
+ "chinese cabbage",
+ "chinese garden",
+ "chinese knot",
+ "chinese rose",
+ "chinese tower",
+ "chip",
+ "chipmunk",
+ "chisel",
+ "chocolate",
+ "chocolate bar",
+ "chocolate cake",
+ "chocolate chip",
+ "chocolate chip cookie",
+ "chocolate milk",
+ "chocolate mousse",
+ "truffle",
+ "choir",
+ "kitchen knife",
+ "cutting board",
+ "chopstick",
+ "christmas",
+ "christmas ball",
+ "christmas card",
+ "christmas decoration",
+ "christmas dinner",
+ "christmas eve",
+ "christmas hat",
+ "christmas light",
+ "christmas market",
+ "christmas ornament",
+ "christmas tree",
+ "chrysanthemum",
+ "church",
+ "church tower",
+ "cider",
+ "cigar",
+ "cigar box",
+ "cigarette",
+ "cigarette case",
+ "waistband",
+ "cinema",
+ "photographer",
+ "cinnamon",
+ "circle",
+ "circuit",
+ "circuit board",
+ "circus",
+ "water tank",
+ "citrus fruit",
+ "city",
+ "city bus",
+ "city hall",
+ "city nightview",
+ "city park",
+ "city skyline",
+ "city square",
+ "city street",
+ "city wall",
+ "city view",
+ "clam",
+ "clarinet",
+ "clasp",
+ "class",
+ "classic",
+ "classroom",
+ "clavicle",
+ "claw",
+ "clay",
+ "pottery",
+ "clean",
+ "clean room",
+ "cleaner",
+ "cleaning product",
+ "clear",
+ "cleat",
+ "clementine",
+ "client",
+ "cliff",
+ "climb",
+ "climb mountain",
+ "climber",
+ "clinic",
+ "clip",
+ "clip art",
+ "clipboard",
+ "clipper",
+ "clivia",
+ "cloak",
+ "clogs",
+ "close-up",
+ "closet",
+ "cloth",
+ "clothe",
+ "clothing",
+ "clothespin",
+ "clothesline",
+ "clothing store",
+ "cloud",
+ "cloud forest",
+ "cloudy",
+ "clover",
+ "joker",
+ "clown fish",
+ "club",
+ "clutch",
+ "clutch bag",
+ "coal",
+ "coast",
+ "coat",
+ "coatrack",
+ "cob",
+ "cock",
+ "cockatoo",
+ "cocker",
+ "cockpit",
+ "roach",
+ "cocktail",
+ "cocktail dress",
+ "cocktail shaker",
+ "cocktail table",
+ "cocoa",
+ "coconut",
+ "coconut tree",
+ "coffee",
+ "coffee bean",
+ "coffee cup",
+ "coffee machine",
+ "coffee shop",
+ "coffeepot",
+ "coffin",
+ "cognac",
+ "spiral",
+ "coin",
+ "coke",
+ "colander",
+ "cold",
+ "slaw",
+ "collaboration",
+ "collage",
+ "collection",
+ "college student",
+ "sheepdog",
+ "crash",
+ "color",
+ "coloring book",
+ "coloring material",
+ "pony",
+ "pillar",
+ "comb",
+ "combination lock",
+ "comic",
+ "comedy",
+ "comedy film",
+ "comet",
+ "comfort",
+ "comfort food",
+ "comic book",
+ "comic book character",
+ "comic strip",
+ "commander",
+ "commentator",
+ "community",
+ "commuter",
+ "company",
+ "compass",
+ "compete",
+ "contest",
+ "competitor",
+ "composer",
+ "composition",
+ "compost",
+ "computer",
+ "computer box",
+ "computer chair",
+ "computer desk",
+ "keyboard",
+ "computer monitor",
+ "computer room",
+ "computer screen",
+ "computer tower",
+ "concept car",
+ "concert",
+ "concert hall",
+ "conch",
+ "concrete",
+ "condiment",
+ "condom",
+ "condominium",
+ "conductor",
+ "cone",
+ "meeting",
+ "conference center",
+ "conference hall",
+ "meeting room",
+ "confetti",
+ "conflict",
+ "confluence",
+ "connect",
+ "connector",
+ "conservatory",
+ "constellation",
+ "construction site",
+ "construction worker",
+ "contain",
+ "container",
+ "container ship",
+ "continent",
+ "profile",
+ "contract",
+ "control",
+ "control tower",
+ "convenience store",
+ "convention",
+ "conversation",
+ "converter",
+ "convertible",
+ "transporter",
+ "cook",
+ "cooking",
+ "cooking spray",
+ "cooker",
+ "cool",
+ "cooler",
+ "copper",
+ "copy",
+ "coral",
+ "coral reef",
+ "rope",
+ "corded phone",
+ "liquor",
+ "corgi",
+ "cork",
+ "corkboard",
+ "cormorant",
+ "corn",
+ "corn field",
+ "cornbread",
+ "corner",
+ "trumpet",
+ "cornice",
+ "cornmeal",
+ "corral",
+ "corridor",
+ "corset",
+ "cosmetic",
+ "cosmetics brush",
+ "cosmetics mirror",
+ "cosplay",
+ "costume",
+ "costumer film designer",
+ "infant bed",
+ "cottage",
+ "cotton",
+ "cotton candy",
+ "couch",
+ "countdown",
+ "counter",
+ "counter top",
+ "country artist",
+ "country house",
+ "country lane",
+ "country pop artist",
+ "countryside",
+ "coupe",
+ "couple",
+ "couple photo",
+ "courgette",
+ "course",
+ "court",
+ "courthouse",
+ "courtyard",
+ "cousin",
+ "coverall",
+ "cow",
+ "cowbell",
+ "cowboy",
+ "cowboy boot",
+ "cowboy hat",
+ "crab",
+ "crabmeat",
+ "crack",
+ "cradle",
+ "craft",
+ "craftsman",
+ "cranberry",
+ "crane",
+ "crape",
+ "crapper",
+ "crate",
+ "crater lake",
+ "lobster",
+ "crayon",
+ "cream cheese",
+ "cream pitcher",
+ "create",
+ "creature",
+ "credit card",
+ "crescent",
+ "croissant",
+ "crest",
+ "crew",
+ "cricket",
+ "cricket ball",
+ "cricket team",
+ "cricketer",
+ "crochet",
+ "crock pot",
+ "crocodile",
+ "crop",
+ "crop top",
+ "cross",
+ "crossbar",
+ "crossroad",
+ "crosstalk",
+ "crosswalk",
+ "crouton",
+ "crow",
+ "crowbar",
+ "crowd",
+ "crowded",
+ "crown",
+ "crt screen",
+ "crucifix",
+ "cruise",
+ "cruise ship",
+ "cruiser",
+ "crumb",
+ "crush",
+ "crutch",
+ "crystal",
+ "cub",
+ "cube",
+ "cucumber",
+ "cue",
+ "cuff",
+ "cufflink",
+ "cuisine",
+ "farmland",
+ "cup",
+ "cupcake",
+ "cupid",
+ "curb",
+ "curl",
+ "hair roller",
+ "currant",
+ "currency",
+ "curry",
+ "curtain",
+ "curve",
+ "pad",
+ "customer",
+ "cut",
+ "cutlery",
+ "cycle",
+ "cycling",
+ "cyclone",
+ "cylinder",
+ "cymbal",
+ "cypress",
+ "cypress tree",
+ "dachshund",
+ "daffodil",
+ "dagger",
+ "dahlia",
+ "daikon",
+ "dairy",
+ "daisy",
+ "dam",
+ "damage",
+ "damp",
+ "dance",
+ "dance floor",
+ "dance room",
+ "dancer",
+ "dandelion",
+ "dark",
+ "darkness",
+ "dart",
+ "dartboard",
+ "dashboard",
+ "date",
+ "daughter",
+ "dawn",
+ "day bed",
+ "daylight",
+ "deadbolt",
+ "death",
+ "debate",
+ "debris",
+ "decanter",
+ "deck",
+ "decker bus",
+ "decor",
+ "decorate",
+ "decorative picture",
+ "deer",
+ "defender",
+ "deity",
+ "delicatessen",
+ "deliver",
+ "demolition",
+ "monster",
+ "demonstration",
+ "den",
+ "denim jacket",
+ "dentist",
+ "department store",
+ "depression",
+ "derby",
+ "dermopathy",
+ "desert",
+ "desert road",
+ "design",
+ "designer",
+ "table",
+ "table lamp",
+ "desktop",
+ "desktop computer",
+ "dessert",
+ "destruction",
+ "detective",
+ "detergent",
+ "dew",
+ "dial",
+ "diamond",
+ "diaper",
+ "diaper bag",
+ "journal",
+ "die",
+ "diet",
+ "excavator",
+ "number",
+ "digital clock",
+ "dill",
+ "dinner",
+ "rowboat",
+ "dining room",
+ "dinner party",
+ "dinning table",
+ "dinosaur",
+ "dip",
+ "diploma",
+ "direct",
+ "director",
+ "dirt",
+ "dirt bike",
+ "dirt field",
+ "dirt road",
+ "dirt track",
+ "disaster",
+ "disciple",
+ "disco",
+ "disco ball",
+ "discotheque",
+ "disease",
+ "plate",
+ "dish antenna",
+ "dish washer",
+ "dishrag",
+ "dishes",
+ "dishsoap",
+ "Disneyland",
+ "dispenser",
+ "display",
+ "display window",
+ "trench",
+ "dive",
+ "diver",
+ "diving board",
+ "paper cup",
+ "dj",
+ "doberman",
+ "dock",
+ "doctor",
+ "document",
+ "documentary",
+ "dog",
+ "dog bed",
+ "dog breed",
+ "dog collar",
+ "dog food",
+ "dog house",
+ "doll",
+ "dollar",
+ "dollhouse",
+ "dolly",
+ "dolphin",
+ "dome",
+ "domicile",
+ "domino",
+ "donkey",
+ "donut",
+ "doodle",
+ "door",
+ "door handle",
+ "doormat",
+ "doorplate",
+ "doorway",
+ "dormitory",
+ "dough",
+ "downtown",
+ "dozer",
+ "drag",
+ "dragon",
+ "dragonfly",
+ "drain",
+ "drama",
+ "drama film",
+ "draw",
+ "drawer",
+ "drawing",
+ "drawing pin",
+ "pigtail",
+ "dress",
+ "dress hat",
+ "dress shirt",
+ "dress shoe",
+ "dress suit",
+ "dresser",
+ "dressing room",
+ "dribble",
+ "drift",
+ "driftwood",
+ "drill",
+ "drink",
+ "drinking water",
+ "drive",
+ "driver",
+ "driveway",
+ "drone",
+ "drop",
+ "droplight",
+ "dropper",
+ "drought",
+ "medicine",
+ "pharmacy",
+ "drum",
+ "drummer",
+ "drumstick",
+ "dry",
+ "duchess",
+ "duck",
+ "duckbill",
+ "duckling",
+ "duct tape",
+ "dude",
+ "duet",
+ "duffel",
+ "canoe",
+ "dumbbell",
+ "dumpling",
+ "dune",
+ "dunk",
+ "durian",
+ "dusk",
+ "dust",
+ "garbage truck",
+ "dustpan",
+ "duvet",
+ "DVD",
+ "dye",
+ "eagle",
+ "ear",
+ "earmuff",
+ "earphone",
+ "earplug",
+ "earring",
+ "earthquake",
+ "easel",
+ "easter",
+ "easter bunny",
+ "easter egg",
+ "eat",
+ "restaurant",
+ "eclair",
+ "eclipse",
+ "ecosystem",
+ "edit",
+ "education",
+ "educator",
+ "eel",
+ "egg",
+ "egg roll",
+ "egg tart",
+ "eggbeater",
+ "egret",
+ "Eiffel tower",
+ "elastic band",
+ "senior",
+ "electric chair",
+ "electric drill",
+ "electrician",
+ "electricity",
+ "electron",
+ "electronic",
+ "elephant",
+ "elevation map",
+ "elevator",
+ "elevator car",
+ "elevator door",
+ "elevator lobby",
+ "elevator shaft",
+ "embankment",
+ "embassy",
+ "embellishment",
+ "ember",
+ "emblem",
+ "embroidery",
+ "emerald",
+ "emergency",
+ "emergency service",
+ "emergency vehicle",
+ "emotion",
+ "Empire State Building",
+ "enamel",
+ "enclosure",
+ "side table",
+ "energy",
+ "engagement",
+ "engagement ring",
+ "engine",
+ "engine room",
+ "engineer",
+ "engineering",
+ "english shorthair",
+ "ensemble",
+ "enter",
+ "entertainer",
+ "entertainment",
+ "entertainment center",
+ "entrance",
+ "entrance hall",
+ "envelope",
+ "equestrian",
+ "equipment",
+ "eraser",
+ "erhu",
+ "erosion",
+ "escalator",
+ "escargot",
+ "espresso",
+ "estate",
+ "estuary",
+ "eucalyptus tree",
+ "evening",
+ "evening dress",
+ "evening light",
+ "evening sky",
+ "evening sun",
+ "event",
+ "evergreen",
+ "ewe",
+ "excavation",
+ "exercise",
+ "exhaust hood",
+ "exhibition",
+ "exit",
+ "explorer",
+ "explosion",
+ "extension cord",
+ "extinguisher",
+ "extractor",
+ "extrude",
+ "eye",
+ "eye shadow",
+ "eyebrow",
+ "eyeliner",
+ "fabric",
+ "fabric store",
+ "facade",
+ "face",
+ "face close-up",
+ "face powder",
+ "face towel",
+ "facial tissue holder",
+ "facility",
+ "factory",
+ "factory workshop",
+ "fair",
+ "fairground",
+ "fairy",
+ "falcon",
+ "fall",
+ "family",
+ "family car",
+ "family photo",
+ "family room",
+ "fan",
+ "fang",
+ "farm",
+ "farmer",
+ "farmer market",
+ "farmhouse",
+ "fashion",
+ "fashion accessory",
+ "fashion designer",
+ "fashion girl",
+ "fashion illustration",
+ "fashion look",
+ "fashion model",
+ "fashion show",
+ "fast food",
+ "fastfood restaurant",
+ "father",
+ "faucet",
+ "fault",
+ "fauna",
+ "fawn",
+ "fax",
+ "feast",
+ "feather",
+ "fedora",
+ "feed",
+ "feedbag",
+ "feeding",
+ "feeding chair",
+ "feline",
+ "mountain lion",
+ "fence",
+ "fender",
+ "fern",
+ "ferret",
+ "ferris wheel",
+ "ferry",
+ "fertilizer",
+ "festival",
+ "fiber",
+ "fiction",
+ "fiction book",
+ "field",
+ "field road",
+ "fig",
+ "fight",
+ "figure skater",
+ "figurine",
+ "file",
+ "file photo",
+ "file cabinet",
+ "fill",
+ "film camera",
+ "film director",
+ "film format",
+ "film premiere",
+ "film producer",
+ "filming",
+ "filter",
+ "fin",
+ "hand",
+ "finish line",
+ "fir",
+ "fir tree",
+ "fire",
+ "fire alarm",
+ "fire department",
+ "fire truck",
+ "fire escape",
+ "fire hose",
+ "fire pit",
+ "fire station",
+ "firecracker",
+ "fireman",
+ "fireplace",
+ "firework",
+ "firework display",
+ "first-aid kit",
+ "fish",
+ "fish boat",
+ "fish market",
+ "fish pond",
+ "fishbowl",
+ "fisherman",
+ "fishing",
+ "fishing boat",
+ "fishing net",
+ "fishing pole",
+ "fishing village",
+ "fitness",
+ "fitness course",
+ "five",
+ "fixture",
+ "fjord",
+ "flag",
+ "flag pole",
+ "flake",
+ "flame",
+ "flamingo",
+ "flannel",
+ "flap",
+ "flare",
+ "flash",
+ "flask",
+ "flat",
+ "flatfish",
+ "flavor",
+ "flea",
+ "flea market",
+ "fleet",
+ "flight",
+ "flight attendant",
+ "flip",
+ "flip-flop",
+ "flipchart",
+ "float",
+ "flock",
+ "flood",
+ "floor",
+ "floor fan",
+ "floor mat",
+ "floor plan",
+ "floor window",
+ "floral arrangement",
+ "florist",
+ "floss",
+ "flour",
+ "flow",
+ "flower",
+ "flower basket",
+ "flower bed",
+ "flower box",
+ "flower field",
+ "flower girl",
+ "flower market",
+ "fluid",
+ "flush",
+ "flute",
+ "fly",
+ "fly fishing",
+ "flyer",
+ "horse",
+ "foam",
+ "fog",
+ "foggy",
+ "foie gra",
+ "foil",
+ "folding chair",
+ "leaf",
+ "folk artist",
+ "folk dance",
+ "folk rock artist",
+ "fondant",
+ "hotpot",
+ "font",
+ "food",
+ "food coloring",
+ "food court",
+ "food processor",
+ "food stand",
+ "food truck",
+ "foosball",
+ "foot",
+ "foot bridge",
+ "football",
+ "football coach",
+ "football college game",
+ "football match",
+ "football field",
+ "football game",
+ "football helmet",
+ "football player",
+ "football stadium",
+ "football team",
+ "path",
+ "footprint",
+ "footrest",
+ "footstall",
+ "footwear",
+ "forbidden city",
+ "ford",
+ "forehead",
+ "forest",
+ "forest fire",
+ "forest floor",
+ "forest path",
+ "forest road",
+ "forge",
+ "fork",
+ "forklift",
+ "form",
+ "formal garden",
+ "formation",
+ "formula 1",
+ "fort",
+ "fortification",
+ "forward",
+ "fossil",
+ "foundation",
+ "fountain",
+ "fountain pen",
+ "fox",
+ "frame",
+ "freckle",
+ "highway",
+ "lorry",
+ "French",
+ "French bulldog",
+ "French fries",
+ "French toast",
+ "freshener",
+ "fridge",
+ "fried chicken",
+ "fried egg",
+ "fried rice",
+ "friendship",
+ "frisbee",
+ "frog",
+ "frost",
+ "frosting",
+ "frosty",
+ "frozen",
+ "fruit",
+ "fruit cake",
+ "fruit dish",
+ "fruit market",
+ "fruit salad",
+ "fruit stand",
+ "fruit tree",
+ "fruits shop",
+ "fry",
+ "frying pan",
+ "fudge",
+ "fuel",
+ "fume hood",
+ "fun",
+ "funeral",
+ "fungi",
+ "funnel",
+ "fur",
+ "fur coat",
+ "furniture",
+ "futon",
+ "gadget",
+ "muzzle",
+ "galaxy",
+ "gallery",
+ "game",
+ "game board",
+ "game controller",
+ "ham",
+ "gang",
+ "garage",
+ "garage door",
+ "garage kit",
+ "garbage",
+ "garden",
+ "garden asparagus",
+ "garden hose",
+ "garden spider",
+ "gardener",
+ "gardening",
+ "garfield",
+ "gargoyle",
+ "wreath",
+ "garlic",
+ "garment",
+ "gas",
+ "gas station",
+ "gas stove",
+ "gasmask",
+ "collect",
+ "gathering",
+ "gauge",
+ "gazebo",
+ "gear",
+ "gecko",
+ "geisha",
+ "gel",
+ "general store",
+ "generator",
+ "geranium",
+ "ghost",
+ "gift",
+ "gift bag",
+ "gift basket",
+ "gift box",
+ "gift card",
+ "gift shop",
+ "gift wrap",
+ "gig",
+ "gin",
+ "ginger",
+ "gingerbread",
+ "gingerbread house",
+ "ginkgo tree",
+ "giraffe",
+ "girl",
+ "give",
+ "glacier",
+ "gladiator",
+ "glass bead",
+ "glass bottle",
+ "glass bowl",
+ "glass box",
+ "glass building",
+ "glass door",
+ "glass floor",
+ "glass house",
+ "glass jar",
+ "glass plate",
+ "glass table",
+ "glass vase",
+ "glass wall",
+ "glass window",
+ "glasses",
+ "glaze",
+ "glider",
+ "earth",
+ "glove",
+ "glow",
+ "glue pudding",
+ "go",
+ "go for",
+ "goal",
+ "goalkeeper",
+ "goat",
+ "goat cheese",
+ "gobi",
+ "goggles",
+ "gold",
+ "gold medal",
+ "Golden Gate Bridge",
+ "golden retriever",
+ "goldfish",
+ "golf",
+ "golf cap",
+ "golf cart",
+ "golf club",
+ "golf course",
+ "golfer",
+ "goose",
+ "gorilla",
+ "gothic",
+ "gourd",
+ "government",
+ "government agency",
+ "gown",
+ "graduate",
+ "graduation",
+ "grain",
+ "grampus",
+ "grand prix",
+ "grandfather",
+ "grandmother",
+ "grandparent",
+ "granite",
+ "granola",
+ "grape",
+ "grapefruit",
+ "wine",
+ "grass",
+ "grasshopper",
+ "grassland",
+ "grassy",
+ "grater",
+ "grave",
+ "gravel",
+ "gravestone",
+ "gravy",
+ "gravy boat",
+ "gray",
+ "graze",
+ "grazing",
+ "green",
+ "greenery",
+ "greet",
+ "greeting",
+ "greeting card",
+ "greyhound",
+ "grid",
+ "griddle",
+ "grill",
+ "grille",
+ "grilled eel",
+ "grind",
+ "grinder",
+ "grits",
+ "grocery bag",
+ "grotto",
+ "ground squirrel",
+ "group",
+ "group photo",
+ "grove",
+ "grow",
+ "guacamole",
+ "guard",
+ "guard dog",
+ "guest house",
+ "guest room",
+ "guide",
+ "guinea pig",
+ "guitar",
+ "guitarist",
+ "gulf",
+ "gull",
+ "gun",
+ "gundam",
+ "gurdwara",
+ "guzheng",
+ "gym",
+ "gymnast",
+ "habitat",
+ "hacker",
+ "hail",
+ "hair",
+ "hair color",
+ "hair spray",
+ "hairbrush",
+ "haircut",
+ "hairgrip",
+ "hairnet",
+ "hairpin",
+ "hairstyle",
+ "half",
+ "hall",
+ "halloween",
+ "halloween costume",
+ "halloween pumpkin",
+ "halter top",
+ "hamburg",
+ "hamburger",
+ "hami melon",
+ "hammer",
+ "hammock",
+ "hamper",
+ "hamster",
+ "hand dryer",
+ "hand glass",
+ "hand towel",
+ "handbag",
+ "handball",
+ "handcuff",
+ "handgun",
+ "handkerchief",
+ "handle",
+ "handsaw",
+ "handshake",
+ "handstand",
+ "handwriting",
+ "hanfu",
+ "hang",
+ "hangar",
+ "hanger",
+ "happiness",
+ "harbor",
+ "harbor seal",
+ "hard rock artist",
+ "hardback book",
+ "safety helmet",
+ "hardware",
+ "hardware store",
+ "hardwood",
+ "hardwood floor",
+ "mouth organ",
+ "pipe organ",
+ "harpsichord",
+ "harvest",
+ "harvester",
+ "hassock",
+ "hat",
+ "hatbox",
+ "hautboy",
+ "hawthorn",
+ "hay",
+ "hayfield",
+ "hazelnut",
+ "head",
+ "head coach",
+ "headlight",
+ "headboard",
+ "headdress",
+ "headland",
+ "headquarter",
+ "hearing",
+ "heart",
+ "heart shape",
+ "heat",
+ "heater",
+ "heather",
+ "hedge",
+ "hedgehog",
+ "heel",
+ "helicopter",
+ "heliport",
+ "helmet",
+ "help",
+ "hen",
+ "henna",
+ "herb",
+ "herd",
+ "hermit crab",
+ "hero",
+ "heron",
+ "hibiscus",
+ "hibiscus flower",
+ "hide",
+ "high bar",
+ "high heel",
+ "highland",
+ "highlight",
+ "hike",
+ "hiker",
+ "hiking boot",
+ "hiking equipment",
+ "hill",
+ "hill country",
+ "hill station",
+ "hillside",
+ "hindu temple",
+ "hinge",
+ "hip",
+ "hip hop artist",
+ "hippo",
+ "historian",
+ "historic",
+ "history",
+ "hockey",
+ "hockey arena",
+ "hockey game",
+ "hockey player",
+ "hockey stick",
+ "hoe",
+ "hole",
+ "vacation",
+ "holly",
+ "holothurian",
+ "home",
+ "home appliance",
+ "home base",
+ "home decor",
+ "home interior",
+ "home office",
+ "home theater",
+ "homework",
+ "hummus",
+ "honey",
+ "beehive",
+ "honeymoon",
+ "hood",
+ "hoodie",
+ "hook",
+ "jump",
+ "horizon",
+ "hornbill",
+ "horned cow",
+ "hornet",
+ "horror",
+ "horror film",
+ "horse blanket",
+ "horse cart",
+ "horse farm",
+ "horse ride",
+ "horseback",
+ "horseshoe",
+ "hose",
+ "hospital",
+ "hospital bed",
+ "hospital room",
+ "host",
+ "inn",
+ "hot",
+ "hot air balloon",
+ "hot dog",
+ "hot sauce",
+ "hot spring",
+ "hotel",
+ "hotel lobby",
+ "hotel room",
+ "hotplate",
+ "hourglass",
+ "house",
+ "house exterior",
+ "houseplant",
+ "hoverboard",
+ "howler",
+ "huddle",
+ "hug",
+ "hula hoop",
+ "person",
+ "humidifier",
+ "hummingbird",
+ "humpback whale",
+ "hunt",
+ "hunting lodge",
+ "hurdle",
+ "hurricane",
+ "husky",
+ "hut",
+ "hyaena",
+ "hybrid",
+ "hydrangea",
+ "hydrant",
+ "seaplane",
+ "ice",
+ "ice bag",
+ "polar bear",
+ "ice cave",
+ "icecream",
+ "ice cream cone",
+ "ice cream parlor",
+ "ice cube",
+ "ice floe",
+ "ice hockey player",
+ "ice hockey team",
+ "lollipop",
+ "ice maker",
+ "rink",
+ "ice sculpture",
+ "ice shelf",
+ "skate",
+ "ice skating",
+ "iceberg",
+ "icicle",
+ "icing",
+ "icon",
+ "id photo",
+ "identity card",
+ "igloo",
+ "light",
+ "iguana",
+ "illuminate",
+ "illustration",
+ "image",
+ "impala",
+ "incense",
+ "independence day",
+ "individual",
+ "indoor",
+ "indoor rower",
+ "induction cooker",
+ "industrial area",
+ "industry",
+ "infantry",
+ "inflatable boat",
+ "information desk",
+ "infrastructure",
+ "ingredient",
+ "inhalator",
+ "injection",
+ "injury",
+ "ink",
+ "inking pad",
+ "inlet",
+ "inscription",
+ "insect",
+ "install",
+ "instrument",
+ "insulated cup",
+ "interaction",
+ "interior design",
+ "website",
+ "intersection",
+ "interview",
+ "invertebrate",
+ "invitation",
+ "ipad",
+ "iphone",
+ "ipod",
+ "iris",
+ "iron",
+ "ironing board",
+ "irrigation system",
+ "island",
+ "islet",
+ "isopod",
+ "ivory",
+ "ivy",
+ "izakaya",
+ "jack",
+ "jackcrab",
+ "jacket",
+ "jacuzzi",
+ "jade",
+ "jaguar",
+ "jail cell",
+ "jam",
+ "japanese garden",
+ "jasmine",
+ "jaw",
+ "jay",
+ "jazz",
+ "jazz artist",
+ "jazz fusion artist",
+ "jeans",
+ "jeep",
+ "jelly",
+ "jelly bean",
+ "jellyfish",
+ "jet",
+ "motorboat",
+ "jewel",
+ "jewellery",
+ "jewelry shop",
+ "jigsaw puzzle",
+ "rickshaw",
+ "jockey",
+ "jockey cap",
+ "jog",
+ "joint",
+ "journalist",
+ "joystick",
+ "judge",
+ "jug",
+ "juggle",
+ "juice",
+ "juicer",
+ "jujube",
+ "jump rope",
+ "jumpsuit",
+ "jungle",
+ "junkyard",
+ "kale",
+ "kaleidoscope",
+ "kangaroo",
+ "karaoke",
+ "karate",
+ "karting",
+ "kasbah",
+ "kayak",
+ "kebab",
+ "key",
+ "keycard",
+ "khaki",
+ "kick",
+ "kilt",
+ "kimono",
+ "kindergarden classroom",
+ "kindergarten",
+ "king",
+ "king crab",
+ "kiss",
+ "kit",
+ "kitchen",
+ "kitchen cabinet",
+ "kitchen counter",
+ "kitchen floor",
+ "kitchen hood",
+ "kitchen island",
+ "kitchen sink",
+ "kitchen table",
+ "kitchen utensil",
+ "kitchen window",
+ "kitchenware",
+ "kite",
+ "kiwi",
+ "knee pad",
+ "kneel",
+ "knife",
+ "rider",
+ "knit",
+ "knitting needle",
+ "knob",
+ "knocker",
+ "knot",
+ "koala",
+ "koi",
+ "ktv",
+ "laboratory",
+ "lab coat",
+ "label",
+ "labrador",
+ "maze",
+ "lace",
+ "lace dress",
+ "ladder",
+ "ladle",
+ "ladybird",
+ "lagoon",
+ "lake",
+ "lake district",
+ "lake house",
+ "lakeshore",
+ "lamb",
+ "lamb chop",
+ "lamp post",
+ "lamp shade",
+ "spear",
+ "land",
+ "land vehicle",
+ "landfill",
+ "landing",
+ "landing deck",
+ "landmark",
+ "landscape",
+ "landslide",
+ "lanyard",
+ "lantern",
+ "lap",
+ "laptop",
+ "laptop keyboard",
+ "larva",
+ "lasagne",
+ "laser",
+ "lash",
+ "lasso",
+ "latch",
+ "latex",
+ "latte",
+ "laugh",
+ "launch",
+ "launch event",
+ "launch party",
+ "laundromat",
+ "laundry",
+ "laundry basket",
+ "laundry room",
+ "lava",
+ "lavender",
+ "lawn",
+ "lawn wedding",
+ "lawyer",
+ "lay",
+ "lead",
+ "lead singer",
+ "lead to",
+ "leader",
+ "leak",
+ "lean",
+ "learn",
+ "leash",
+ "leather",
+ "leather jacket",
+ "leather shoe",
+ "speech",
+ "lecture hall",
+ "lecture room",
+ "ledge",
+ "leftover",
+ "leg",
+ "legend",
+ "legging",
+ "legislative chamber",
+ "lego",
+ "legume",
+ "lemon",
+ "lemon juice",
+ "lemonade",
+ "lemur",
+ "lens",
+ "lens flare",
+ "lentil",
+ "leopard",
+ "leotard",
+ "tights",
+ "leprechaun",
+ "lesson",
+ "letter",
+ "mailbox",
+ "letter logo",
+ "lettering",
+ "lettuce",
+ "level",
+ "library",
+ "license",
+ "license plate",
+ "lichen",
+ "lick",
+ "lid",
+ "lie",
+ "life belt",
+ "life jacket",
+ "lifeboat",
+ "lifeguard",
+ "lift",
+ "light fixture",
+ "light show",
+ "light switch",
+ "lighting",
+ "lightning",
+ "lightning rod",
+ "lilac",
+ "lily",
+ "limb",
+ "lime",
+ "limestone",
+ "limo",
+ "line",
+ "line art",
+ "line up",
+ "linen",
+ "liner",
+ "lion",
+ "lip balm",
+ "lipstick",
+ "liquid",
+ "liquor store",
+ "list",
+ "litchi",
+ "live",
+ "livestock",
+ "living room",
+ "living space",
+ "lizard",
+ "load",
+ "loading dock",
+ "loafer",
+ "hallway",
+ "locate",
+ "lock",
+ "lock chamber",
+ "locker",
+ "loft",
+ "log",
+ "log cabin",
+ "logo",
+ "loki",
+ "long hair",
+ "longboard",
+ "loom",
+ "loop",
+ "lose",
+ "lottery",
+ "lotus",
+ "love",
+ "loveseat",
+ "luggage",
+ "lumber",
+ "lumberjack",
+ "lunch",
+ "lunch box",
+ "lush",
+ "luxury",
+ "luxury yacht",
+ "mac",
+ "macadamia",
+ "macaque",
+ "macaroni",
+ "macaw",
+ "machete",
+ "machine",
+ "machine gun",
+ "magazine",
+ "magic",
+ "magician",
+ "magnet",
+ "magnifying glass",
+ "magnolia",
+ "magpie",
+ "mahjong",
+ "mahout",
+ "maid",
+ "chain mail",
+ "mail slot",
+ "make",
+ "makeover",
+ "makeup artist",
+ "makeup tool",
+ "mallard",
+ "mallard duck",
+ "mallet",
+ "mammal",
+ "mammoth",
+ "man",
+ "management",
+ "manager",
+ "manatee",
+ "mandala",
+ "mandarin orange",
+ "mandarine",
+ "mane",
+ "manga",
+ "manger",
+ "mango",
+ "mangosteen",
+ "mangrove",
+ "manhattan",
+ "manhole",
+ "manhole cover",
+ "manicure",
+ "mannequin",
+ "manor house",
+ "mansion",
+ "mantid",
+ "mantle",
+ "manufactured home",
+ "manufacturing",
+ "manuscript",
+ "map",
+ "maple",
+ "maple leaf",
+ "maple syrup",
+ "maraca",
+ "marathon",
+ "marble",
+ "march",
+ "marching band",
+ "mare",
+ "marigold",
+ "marine",
+ "marine invertebrate",
+ "marine mammal",
+ "puppet",
+ "mark",
+ "market",
+ "market square",
+ "market stall",
+ "marriage",
+ "martial",
+ "martial artist",
+ "martial arts gym",
+ "martini",
+ "martini glass",
+ "mascara",
+ "mascot",
+ "mashed potato",
+ "masher",
+ "mask",
+ "massage",
+ "mast",
+ "mat",
+ "matador",
+ "match",
+ "matchbox",
+ "material",
+ "mattress",
+ "mausoleum",
+ "maxi dress",
+ "meal",
+ "measuring cup",
+ "measuring tape",
+ "meat",
+ "meatball",
+ "mechanic",
+ "mechanical fan",
+ "medal",
+ "media",
+ "medical equipment",
+ "medical image",
+ "medical staff",
+ "medicine cabinet",
+ "medieval",
+ "medina",
+ "meditation",
+ "meerkat",
+ "meet",
+ "melon",
+ "monument",
+ "menu",
+ "mermaid",
+ "net",
+ "mess",
+ "messenger bag",
+ "metal",
+ "metal artist",
+ "metal detector",
+ "meter",
+ "mezzanine",
+ "microphone",
+ "microscope",
+ "microwave",
+ "midnight",
+ "milestone",
+ "military uniform",
+ "milk",
+ "milk can",
+ "milk tea",
+ "milkshake",
+ "mill",
+ "mine",
+ "miner",
+ "mineral",
+ "mineral water",
+ "miniskirt",
+ "miniature",
+ "minibus",
+ "minister",
+ "minivan",
+ "mint",
+ "mint candy",
+ "mirror",
+ "miss",
+ "missile",
+ "mission",
+ "mistletoe",
+ "mix",
+ "mixer",
+ "mixing bowl",
+ "mixture",
+ "moat",
+ "mobility scooter",
+ "model",
+ "model car",
+ "modern",
+ "modern tower",
+ "moisture",
+ "mold",
+ "molding",
+ "mole",
+ "monarch",
+ "money",
+ "monitor",
+ "monk",
+ "monkey",
+ "monkey wrench",
+ "monochrome",
+ "monocycle",
+ "monster truck",
+ "moon",
+ "moon cake",
+ "moonlight",
+ "moor",
+ "moose",
+ "swab",
+ "moped",
+ "morning",
+ "morning fog",
+ "morning light",
+ "morning sun",
+ "mortar",
+ "mosaic",
+ "mosque",
+ "mosquito",
+ "moss",
+ "motel",
+ "moth",
+ "mother",
+ "motherboard",
+ "motif",
+ "sport",
+ "motor",
+ "motorbike",
+ "motorcycle",
+ "motorcycle helmet",
+ "motorcycle racer",
+ "motorcyclist",
+ "motorsport",
+ "mound",
+ "mountain",
+ "mountain bike",
+ "mountain biker",
+ "mountain biking",
+ "mountain gorilla",
+ "mountain lake",
+ "mountain landscape",
+ "mountain pass",
+ "mountain path",
+ "mountain range",
+ "mountain river",
+ "mountain snowy",
+ "mountain stream",
+ "mountain view",
+ "mountain village",
+ "mountaineer",
+ "mountaineering bag",
+ "mouse",
+ "mousepad",
+ "mousetrap",
+ "mouth",
+ "mouthwash",
+ "move",
+ "movie poster",
+ "movie ticket",
+ "mower",
+ "mp3 player",
+ "mr",
+ "mud",
+ "muffin",
+ "mug",
+ "mulberry",
+ "mulch",
+ "mule",
+ "municipality",
+ "mural",
+ "muscle",
+ "muscle car",
+ "museum",
+ "mushroom",
+ "music",
+ "music festival",
+ "music stool",
+ "music studio",
+ "music video performer",
+ "musical keyboard",
+ "musician",
+ "mussel",
+ "mustard",
+ "mythology",
+ "nacho",
+ "nail polish",
+ "nailfile",
+ "nanny",
+ "napkin",
+ "narrow",
+ "national flag",
+ "nativity scene",
+ "natural history museum",
+ "nature",
+ "nature reserve",
+ "navigation",
+ "navratri",
+ "navy",
+ "nebula",
+ "neck",
+ "neckband",
+ "necklace",
+ "neckline",
+ "nectar",
+ "nectarine",
+ "needle",
+ "neighbor",
+ "neighbourhood",
+ "neon",
+ "neon light",
+ "nerve",
+ "nest",
+ "new year",
+ "newborn",
+ "newfoundland",
+ "newlywed",
+ "news",
+ "news conference",
+ "newsstand",
+ "night",
+ "night market",
+ "night sky",
+ "night view",
+ "nightclub",
+ "nightstand",
+ "noodle",
+ "nose",
+ "noseband",
+ "note",
+ "notebook",
+ "notepad",
+ "notepaper",
+ "notice",
+ "number icon",
+ "nun",
+ "nurse",
+ "nursery",
+ "nursing home",
+ "nut",
+ "nutcracker",
+ "oak",
+ "oak tree",
+ "oar",
+ "oasis",
+ "oast house",
+ "oatmeal",
+ "oats",
+ "obelisk",
+ "observation tower",
+ "observatory",
+ "obstacle course",
+ "sea",
+ "octopus",
+ "offer",
+ "office",
+ "office building",
+ "office chair",
+ "office cubicle",
+ "office desk",
+ "office supply",
+ "office window",
+ "officer",
+ "official",
+ "oil",
+ "oil lamp",
+ "oil painting",
+ "oilrig",
+ "okra",
+ "old photo",
+ "olive",
+ "olive oil",
+ "olive tree",
+ "omelet",
+ "onion",
+ "onion ring",
+ "opal",
+ "open",
+ "opening",
+ "opening ceremony",
+ "opera",
+ "opera house",
+ "operate",
+ "operating room",
+ "operation",
+ "optical shop",
+ "orangutan",
+ "orange",
+ "orange juice",
+ "orange tree",
+ "orangery",
+ "orbit",
+ "orchard",
+ "orchestra pit",
+ "orchid",
+ "order",
+ "organization",
+ "origami",
+ "ornament",
+ "osprey",
+ "ostrich",
+ "otter",
+ "out",
+ "outcrop",
+ "outdoor",
+ "outhouse",
+ "electric outlet",
+ "outline",
+ "oval",
+ "oven",
+ "overall",
+ "overcoat",
+ "overpass",
+ "owl",
+ "oyster",
+ "teething ring",
+ "pack",
+ "package",
+ "paddock",
+ "police van",
+ "padlock",
+ "paella",
+ "pagoda",
+ "pain",
+ "paint brush",
+ "painter",
+ "paisley bandanna",
+ "palace",
+ "palette",
+ "paling",
+ "pall",
+ "palm tree",
+ "pan",
+ "pancake",
+ "panda",
+ "panel",
+ "panorama",
+ "pansy",
+ "pant",
+ "pantry",
+ "pants",
+ "pantyhose",
+ "papaya",
+ "paper",
+ "paper bag",
+ "paper cutter",
+ "paper lantern",
+ "paper plate",
+ "paper towel",
+ "paperback book",
+ "paperweight",
+ "parachute",
+ "parade",
+ "paradise",
+ "parrot",
+ "paramedic",
+ "paraquet",
+ "parasail",
+ "paratrooper",
+ "parchment",
+ "parish",
+ "park",
+ "park bench",
+ "parking",
+ "parking garage",
+ "parking meter",
+ "parking sign",
+ "parliament",
+ "parsley",
+ "participant",
+ "partner",
+ "partridge",
+ "party",
+ "party hat",
+ "pass",
+ "passage",
+ "passbook",
+ "passenger",
+ "passenger ship",
+ "passenger train",
+ "passion fruit",
+ "passport",
+ "pasta",
+ "paste",
+ "pastry",
+ "pasture",
+ "patch",
+ "patient",
+ "pattern",
+ "pavement",
+ "pavilion",
+ "paw",
+ "pay",
+ "payphone",
+ "pea",
+ "peace",
+ "peach",
+ "peacock",
+ "peak",
+ "peanut",
+ "peanut butter",
+ "pear",
+ "pearl",
+ "pebble",
+ "pecan",
+ "pedestrian",
+ "pedestrian bridge",
+ "pedestrian street",
+ "peel",
+ "peeler",
+ "pegboard",
+ "pegleg",
+ "pelican",
+ "pen",
+ "penalty kick",
+ "pencil",
+ "pencil case",
+ "pencil sharpener",
+ "pencil skirt",
+ "pendant",
+ "pendulum",
+ "penguin",
+ "peninsula",
+ "pennant",
+ "penny",
+ "piggy bank",
+ "peony",
+ "pepper",
+ "pepper grinder",
+ "peppercorn",
+ "pepperoni",
+ "perch",
+ "perform",
+ "performance",
+ "performance arena",
+ "perfume",
+ "pergola",
+ "persian cat",
+ "persimmon",
+ "personal care",
+ "personal flotation device",
+ "pest",
+ "pet",
+ "pet shop",
+ "pet store",
+ "petal",
+ "petunia",
+ "church bench",
+ "pheasant",
+ "phenomenon",
+ "philosopher",
+ "phone",
+ "phonebook",
+ "record player",
+ "photo",
+ "photo booth",
+ "photo frame",
+ "photography",
+ "physicist",
+ "physics laboratory",
+ "pianist",
+ "piano",
+ "plectrum",
+ "pick up",
+ "pickle",
+ "picnic",
+ "picnic area",
+ "picnic basket",
+ "picnic table",
+ "picture",
+ "picture frame",
+ "pie",
+ "pigeon",
+ "pilgrim",
+ "tablet",
+ "pillow",
+ "pilot",
+ "pilot boat",
+ "pin",
+ "pine",
+ "pine cone",
+ "pine forest",
+ "pine nut",
+ "pineapple",
+ "table tennis table",
+ "table tennis",
+ "pink",
+ "pint",
+ "pipa",
+ "pipe",
+ "pipe bowl",
+ "pirate",
+ "pirate flag",
+ "pirate ship",
+ "pistachio",
+ "ski slope",
+ "pocket bread",
+ "pitaya",
+ "pitbull",
+ "pitch",
+ "pitcher",
+ "pitcher plant",
+ "pitchfork",
+ "pizza",
+ "pizza cutter",
+ "pizza pan",
+ "pizzeria",
+ "placard",
+ "place",
+ "place mat",
+ "plaid",
+ "plain",
+ "plan",
+ "planet",
+ "planet earth",
+ "plank",
+ "plant",
+ "plantation",
+ "planting",
+ "plaque",
+ "plaster",
+ "plastic",
+ "plasticine",
+ "plateau",
+ "platform",
+ "platinum",
+ "platter",
+ "play",
+ "play badminton",
+ "play baseball",
+ "play basketball",
+ "play billiard",
+ "play football",
+ "play pong",
+ "play tennis",
+ "play volleyball",
+ "player",
+ "playground",
+ "playhouse",
+ "playing card",
+ "playing chess",
+ "playing golf",
+ "playing mahjong",
+ "playingfield",
+ "playpen",
+ "playroom",
+ "plaza",
+ "plier",
+ "plot",
+ "plow",
+ "plug",
+ "plug hat",
+ "plum",
+ "plumber",
+ "plumbing fixture",
+ "plume",
+ "plywood",
+ "pocket",
+ "pocket watch",
+ "pocketknife",
+ "pod",
+ "podium",
+ "poetry",
+ "poinsettia",
+ "point",
+ "pointer",
+ "poker card",
+ "poker chip",
+ "poker table",
+ "pole",
+ "polecat",
+ "police",
+ "police car",
+ "police dog",
+ "police station",
+ "politician",
+ "polka dot",
+ "pollen",
+ "pollution",
+ "polo",
+ "polo neck",
+ "polo shirt",
+ "pomegranate",
+ "pomeranian",
+ "poncho",
+ "pond",
+ "ponytail",
+ "poodle",
+ "pool",
+ "pop",
+ "pop artist",
+ "popcorn",
+ "pope",
+ "poppy",
+ "porcelain",
+ "porch",
+ "pork",
+ "porridge",
+ "portable battery",
+ "portal",
+ "portfolio",
+ "porthole",
+ "portrait",
+ "portrait session",
+ "pose",
+ "possum",
+ "post",
+ "post office",
+ "stamp",
+ "postcard",
+ "poster",
+ "poster page",
+ "pot",
+ "potato",
+ "potato chip",
+ "potato salad",
+ "potholder",
+ "potty",
+ "pouch",
+ "poultry",
+ "pound",
+ "pour",
+ "powder",
+ "power line",
+ "power plugs and sockets",
+ "power see",
+ "power station",
+ "practice",
+ "Prague Castle",
+ "prayer",
+ "preacher",
+ "premiere",
+ "prescription",
+ "show",
+ "presentation",
+ "president",
+ "press room",
+ "pressure cooker",
+ "pretzel",
+ "prince",
+ "princess",
+ "print",
+ "printed page",
+ "printer",
+ "printing",
+ "prison",
+ "produce",
+ "product",
+ "profession",
+ "professional",
+ "professor",
+ "project picture",
+ "projection screen",
+ "projector",
+ "prom",
+ "promenade",
+ "propeller",
+ "prophet",
+ "proposal",
+ "protective suit",
+ "protest",
+ "protester",
+ "publication",
+ "publicity portrait",
+ "ice hockey",
+ "pudding",
+ "puddle",
+ "puff",
+ "puffin",
+ "pug",
+ "pull",
+ "pulpit",
+ "pulse",
+ "pump",
+ "pumpkin",
+ "pumpkin pie",
+ "pumpkin seed",
+ "punch bag",
+ "punch",
+ "student",
+ "purple",
+ "push",
+ "putt",
+ "puzzle",
+ "tower",
+ "pyramid",
+ "python",
+ "qr code",
+ "quail",
+ "quarry",
+ "quarter",
+ "quartz",
+ "queen",
+ "quesadilla",
+ "queue",
+ "quiche",
+ "quilt",
+ "quilting",
+ "quote",
+ "rabbit",
+ "raccoon",
+ "race",
+ "race track",
+ "raceway",
+ "race car",
+ "racket",
+ "radar",
+ "radiator",
+ "radio",
+ "raft",
+ "rag doll",
+ "rail",
+ "railcar",
+ "railroad",
+ "railroad bridge",
+ "railway line",
+ "railway station",
+ "rain",
+ "rain boot",
+ "rainbow",
+ "rainbow trout",
+ "raincoat",
+ "rainforest",
+ "rainy",
+ "raisin",
+ "rake",
+ "ram",
+ "ramp",
+ "rapeseed",
+ "rapid",
+ "rapper",
+ "raspberry",
+ "rat",
+ "ratchet",
+ "raven",
+ "ravine",
+ "ray",
+ "razor",
+ "razor blade",
+ "read",
+ "reading",
+ "reamer",
+ "rear",
+ "rear light",
+ "rear view",
+ "rearview mirror",
+ "receipt",
+ "receive",
+ "reception",
+ "recipe",
+ "record",
+ "record producer",
+ "recorder",
+ "recording studio",
+ "recreation room",
+ "recreational vehicle",
+ "rectangle",
+ "recycling",
+ "recycling bin",
+ "red",
+ "red carpet",
+ "red flag",
+ "red panda",
+ "red wine",
+ "redwood",
+ "reed",
+ "reef",
+ "reel",
+ "referee",
+ "reflect",
+ "reflection",
+ "reflector",
+ "register",
+ "rein",
+ "reindeer",
+ "relax",
+ "release",
+ "relief",
+ "religion",
+ "religious",
+ "relish",
+ "remain",
+ "remodel",
+ "remote",
+ "remove",
+ "repair",
+ "repair shop",
+ "reptile",
+ "rescue",
+ "rescuer",
+ "research",
+ "researcher",
+ "reservoir",
+ "residence",
+ "residential neighborhood",
+ "resin",
+ "resort",
+ "resort town",
+ "restaurant kitchen",
+ "restaurant patio",
+ "restroom",
+ "retail",
+ "retriever",
+ "retro",
+ "reveal",
+ "rhinoceros",
+ "rhododendron",
+ "rib",
+ "ribbon",
+ "rice",
+ "rice cooker",
+ "rice field",
+ "ride",
+ "ridge",
+ "riding",
+ "rifle",
+ "rim",
+ "ring",
+ "riot",
+ "ripple",
+ "rise",
+ "rise building",
+ "river",
+ "river bank",
+ "river boat",
+ "river valley",
+ "riverbed",
+ "road",
+ "road sign",
+ "road trip",
+ "roadside",
+ "roast chicken",
+ "robe",
+ "robin",
+ "robot",
+ "stone",
+ "rock arch",
+ "rock artist",
+ "rock band",
+ "rock climber",
+ "rock climbing",
+ "rock concert",
+ "rock face",
+ "rock formation",
+ "rocker",
+ "rocket",
+ "rocking chair",
+ "rocky",
+ "rodent",
+ "rodeo",
+ "rodeo arena",
+ "roe",
+ "roe deer",
+ "roller",
+ "coaster",
+ "roller skate",
+ "roller skates",
+ "rolling pin",
+ "romance",
+ "romantic",
+ "roof",
+ "roof garden",
+ "room",
+ "room divider",
+ "root",
+ "root beer",
+ "rope bridge",
+ "rosary",
+ "rose",
+ "rosemary",
+ "rosy cloud",
+ "rottweiler",
+ "round table",
+ "router",
+ "row",
+ "rowan",
+ "royal",
+ "rubber stamp",
+ "rubble",
+ "rubik's cube",
+ "ruby",
+ "ruffle",
+ "rugby",
+ "rugby ball",
+ "rugby player",
+ "ruins",
+ "ruler",
+ "rum",
+ "run",
+ "runner",
+ "running shoe",
+ "rural",
+ "rust",
+ "rustic",
+ "rye",
+ "sack",
+ "saddle",
+ "saddlebag",
+ "safari",
+ "safe",
+ "safety vest",
+ "sage",
+ "sail",
+ "sailboat",
+ "sailing",
+ "sailor",
+ "squirrel monkey",
+ "sake",
+ "salad",
+ "salad bowl",
+ "salamander",
+ "salami",
+ "sale",
+ "salmon",
+ "salon",
+ "salsa",
+ "salt",
+ "salt and pepper shakers",
+ "salt lake",
+ "salt marsh",
+ "salt shaker",
+ "salute",
+ "samoyed",
+ "samurai",
+ "sand",
+ "sand bar",
+ "sand box",
+ "sand castle",
+ "sand sculpture",
+ "sandal",
+ "sandwich",
+ "sanitary napkin",
+ "santa claus",
+ "sapphire",
+ "sardine",
+ "sari",
+ "sashimi",
+ "satay",
+ "satchel",
+ "satellite",
+ "satin",
+ "sauce",
+ "saucer",
+ "sauna",
+ "sausage",
+ "savanna",
+ "saw",
+ "sawbuck",
+ "sax",
+ "saxophonist",
+ "scaffold",
+ "scale",
+ "scale model",
+ "scallop",
+ "scar",
+ "strawman",
+ "scarf",
+ "scene",
+ "scenery",
+ "schnauzer",
+ "school",
+ "school bus",
+ "school uniform",
+ "schoolhouse",
+ "schooner",
+ "science",
+ "science fiction film",
+ "science museum",
+ "scientist",
+ "scissors",
+ "wall lamp",
+ "scone",
+ "scoop",
+ "scooter",
+ "score",
+ "scoreboard",
+ "scorpion",
+ "scout",
+ "scrambled egg",
+ "scrap",
+ "scraper",
+ "scratch",
+ "screen",
+ "screen door",
+ "screenshot",
+ "screw",
+ "screwdriver",
+ "scroll",
+ "scrub",
+ "scrubbing brush",
+ "sculptor",
+ "sculpture",
+ "sea cave",
+ "sea ice",
+ "sea lion",
+ "sea turtle",
+ "sea urchin",
+ "seabass",
+ "seabed",
+ "seabird",
+ "seafood",
+ "seahorse",
+ "seal",
+ "sea view",
+ "seashell",
+ "seaside resort",
+ "season",
+ "seat",
+ "seat belt",
+ "seaweed",
+ "secretary",
+ "security",
+ "sedan",
+ "see",
+ "seed",
+ "seesaw",
+ "segway",
+ "selfie",
+ "sell",
+ "seminar",
+ "sense",
+ "sensor",
+ "server",
+ "server room",
+ "service",
+ "set",
+ "sewing machine",
+ "shadow",
+ "shake",
+ "shaker",
+ "shampoo",
+ "shape",
+ "share",
+ "shark",
+ "sharpener",
+ "sharpie",
+ "shaver",
+ "shaving cream",
+ "shawl",
+ "shear",
+ "shears",
+ "sheep",
+ "sheet",
+ "sheet music",
+ "shelf",
+ "shell",
+ "shellfish",
+ "shelter",
+ "shelve",
+ "shepherd",
+ "sherbert",
+ "shiba inu",
+ "shine",
+ "shipping",
+ "shipping container",
+ "shipwreck",
+ "shipyard",
+ "shirt",
+ "shirtless",
+ "shoal",
+ "shoe",
+ "shoe box",
+ "shoe shop",
+ "shoe tree",
+ "shoot",
+ "shooting basketball guard",
+ "shop window",
+ "shopfront",
+ "shopper",
+ "shopping",
+ "shopping bag",
+ "shopping basket",
+ "shopping cart",
+ "mall",
+ "shopping street",
+ "shore",
+ "shoreline",
+ "short",
+ "short hair",
+ "shorts",
+ "shot glass",
+ "shotgun",
+ "shoulder",
+ "shoulder bag",
+ "shovel",
+ "showcase",
+ "shower",
+ "shower cap",
+ "shower curtain",
+ "shower door",
+ "shower head",
+ "shredder",
+ "shrew",
+ "shrimp",
+ "shrine",
+ "shrub",
+ "shutter",
+ "siamese",
+ "siberia",
+ "sibling",
+ "side",
+ "side cabinet",
+ "side dish",
+ "sidecar",
+ "sideline",
+ "siding",
+ "sign",
+ "signage",
+ "signal",
+ "signature",
+ "silk",
+ "silk stocking",
+ "silo",
+ "silver",
+ "silver medal",
+ "silverware",
+ "sing",
+ "singe",
+ "singer",
+ "sink",
+ "sip",
+ "sit",
+ "sitting",
+ "skate park",
+ "skateboard",
+ "skateboarder",
+ "skater",
+ "skating rink",
+ "skeleton",
+ "sketch",
+ "skewer",
+ "ski",
+ "ski boot",
+ "ski equipment",
+ "ski jacket",
+ "ski lift",
+ "ski pole",
+ "ski resort",
+ "snowboard",
+ "skier",
+ "skiing shoes",
+ "skin",
+ "skull",
+ "skullcap",
+ "sky",
+ "sky tower",
+ "skylight",
+ "skyline",
+ "skyscraper",
+ "slalom",
+ "slate",
+ "sleigh",
+ "sleep",
+ "sleeping bag",
+ "sleepwear",
+ "sleeve",
+ "slice",
+ "slide",
+ "slider",
+ "sling",
+ "slope",
+ "slot",
+ "slot machine",
+ "sloth",
+ "slow cooker",
+ "slug",
+ "slum",
+ "smell",
+ "smile",
+ "smoke",
+ "snack",
+ "snail",
+ "snake",
+ "snapper",
+ "snapshot",
+ "snorkel",
+ "snout",
+ "snow",
+ "snow leopard",
+ "snow mountain",
+ "snowball",
+ "snowboarder",
+ "snowfield",
+ "snowflake",
+ "snowman",
+ "snowmobile",
+ "snowplow",
+ "snowshoe",
+ "snowy",
+ "soap",
+ "soap bubble",
+ "soap dispenser",
+ "soccer goalkeeper",
+ "socialite",
+ "sock",
+ "socket",
+ "soda",
+ "softball",
+ "software",
+ "solar battery",
+ "soldier",
+ "solo",
+ "solution",
+ "sombrero",
+ "song",
+ "sound",
+ "soup",
+ "soup bowl",
+ "soupspoon",
+ "sour cream",
+ "souvenir",
+ "soybean milk",
+ "spa",
+ "space",
+ "space shuttle",
+ "space station",
+ "spacecraft",
+ "spaghetti",
+ "span",
+ "wrench",
+ "spark",
+ "sparkle",
+ "sparkler",
+ "sparkling wine",
+ "sparrow",
+ "spatula",
+ "speaker",
+ "spectator",
+ "speech bubble",
+ "speed limit",
+ "speed limit sign",
+ "speedboat",
+ "speedometer",
+ "sphere",
+ "spice",
+ "spice rack",
+ "spider",
+ "spider web",
+ "spike",
+ "spin",
+ "spinach",
+ "spire",
+ "splash",
+ "sponge",
+ "spoon",
+ "sport association",
+ "sport equipment",
+ "sport team",
+ "sports ball",
+ "sports equipment",
+ "sports meet",
+ "sportswear",
+ "dot",
+ "spray",
+ "spread",
+ "spring",
+ "spring roll",
+ "sprinkle",
+ "sprinkler",
+ "sprout",
+ "spruce",
+ "spruce forest",
+ "squad",
+ "square",
+ "squash",
+ "squat",
+ "squeeze",
+ "squid",
+ "squirrel",
+ "water gun",
+ "stab",
+ "stable",
+ "stack",
+ "stadium",
+ "staff",
+ "stage",
+ "stage light",
+ "stagecoach",
+ "stain",
+ "stainless steel",
+ "stair",
+ "stairs",
+ "stairwell",
+ "stall",
+ "stallion",
+ "stand",
+ "standing",
+ "staple",
+ "stapler",
+ "star",
+ "stare",
+ "starfish",
+ "starfruit",
+ "starling",
+ "state park",
+ "state school",
+ "station",
+ "stationary bicycle",
+ "stationery",
+ "statue",
+ "steak",
+ "steak knife",
+ "steam",
+ "steam engine",
+ "steam locomotive",
+ "steam train",
+ "steamed bread",
+ "steel",
+ "steering wheel",
+ "stem",
+ "stencil",
+ "step stool",
+ "stereo",
+ "stethoscope",
+ "stew",
+ "stick",
+ "stick insect",
+ "sticker",
+ "still life",
+ "stilt",
+ "stingray",
+ "stir",
+ "stirrer",
+ "stirrup",
+ "sew",
+ "stock",
+ "stocking",
+ "stomach",
+ "stone building",
+ "stone carving",
+ "stone house",
+ "stone mill",
+ "stool",
+ "stop",
+ "stop at",
+ "stop light",
+ "stop sign",
+ "stop watch",
+ "traffic light",
+ "storage box",
+ "storage room",
+ "tank",
+ "store",
+ "storefront",
+ "stork",
+ "storm",
+ "storm cloud",
+ "stormy",
+ "stove",
+ "poker",
+ "straddle",
+ "strainer",
+ "strait",
+ "strap",
+ "straw",
+ "straw hat",
+ "strawberry",
+ "stream",
+ "street art",
+ "street artist",
+ "street corner",
+ "street dog",
+ "street food",
+ "street light",
+ "street market",
+ "street photography",
+ "street scene",
+ "street sign",
+ "street vendor",
+ "stretch",
+ "stretcher",
+ "strike",
+ "striker",
+ "string",
+ "string cheese",
+ "strip",
+ "stripe",
+ "stroll",
+ "structure",
+ "studio",
+ "studio shot",
+ "stuff",
+ "stuffed animal",
+ "stuffed toy",
+ "stuffing",
+ "stump",
+ "stunning",
+ "stunt",
+ "stupa",
+ "style",
+ "stylus",
+ "submarine",
+ "submarine sandwich",
+ "submarine water",
+ "suburb",
+ "subway",
+ "subway station",
+ "subwoofer",
+ "succulent",
+ "suede",
+ "sugar",
+ "sugar bowl",
+ "sugar cane",
+ "sugar cube",
+ "suit",
+ "suite",
+ "summer",
+ "summer evening",
+ "summit",
+ "sun",
+ "sun hat",
+ "sunbathe",
+ "sunday",
+ "sundial",
+ "sunflower",
+ "sunflower field",
+ "sunflower seed",
+ "sunglasses",
+ "sunny",
+ "sunrise",
+ "sunset",
+ "sunshade",
+ "sunshine",
+ "super bowl",
+ "sports car",
+ "superhero",
+ "supermarket",
+ "supermarket shelf",
+ "supermodel",
+ "supporter",
+ "surf",
+ "surface",
+ "surfboard",
+ "surfer",
+ "surgeon",
+ "surgery",
+ "surround",
+ "sushi",
+ "sushi bar",
+ "suspenders",
+ "suspension",
+ "suspension bridge",
+ "suv",
+ "swallow",
+ "swallowtail butterfly",
+ "swamp",
+ "swan",
+ "swan boat",
+ "sweat pant",
+ "sweatband",
+ "sweater",
+ "sweatshirt",
+ "sweet",
+ "sweet potato",
+ "swim",
+ "swim cap",
+ "swimmer",
+ "swimming hole",
+ "swimming pool",
+ "swing",
+ "swing bridge",
+ "swinge",
+ "swirl",
+ "switch",
+ "swivel chair",
+ "sword",
+ "swordfish",
+ "symbol",
+ "symmetry",
+ "synagogue",
+ "syringe",
+ "syrup",
+ "system",
+ "t shirt",
+ "t-shirt",
+ "tabasco sauce",
+ "tabby",
+ "table tennis racket",
+ "table top",
+ "tablecloth",
+ "tablet computer",
+ "tableware",
+ "tachometer",
+ "tackle",
+ "taco",
+ "tae kwon do",
+ "tai chi",
+ "tail",
+ "tailor",
+ "take",
+ "takeoff",
+ "talk",
+ "tambourine",
+ "tan",
+ "tangerine",
+ "tape",
+ "tapestry",
+ "tarmac",
+ "taro",
+ "tarp",
+ "tart",
+ "tassel",
+ "taste",
+ "tatami",
+ "tattoo",
+ "tattoo artist",
+ "tavern",
+ "tea",
+ "tea bag",
+ "tea party",
+ "tea plantation",
+ "tea pot",
+ "tea set",
+ "teach",
+ "teacher",
+ "teacup",
+ "teal",
+ "team photo",
+ "team presentation",
+ "tear",
+ "technician",
+ "technology",
+ "teddy",
+ "tee",
+ "teenager",
+ "telegraph pole",
+ "zoom lens",
+ "telescope",
+ "television",
+ "television camera",
+ "television room",
+ "television studio",
+ "temperature",
+ "temple",
+ "tempura",
+ "tennis",
+ "tennis court",
+ "tennis match",
+ "tennis net",
+ "tennis player",
+ "tennis racket",
+ "tent",
+ "tequila",
+ "terminal",
+ "terrace",
+ "terrain",
+ "terrarium",
+ "territory",
+ "test",
+ "test match",
+ "test tube",
+ "text",
+ "text message",
+ "textile",
+ "texture",
+ "thanksgiving",
+ "thanksgiving dinner",
+ "theater",
+ "theatre actor",
+ "therapy",
+ "thermometer",
+ "thermos",
+ "thermos bottle",
+ "thermostat",
+ "thicket",
+ "thimble",
+ "thing",
+ "thinking",
+ "thistle",
+ "throne",
+ "throne room",
+ "throw",
+ "throw pillow",
+ "thunder",
+ "thunderstorm",
+ "thyme",
+ "tiara",
+ "tick",
+ "ticket",
+ "ticket booth",
+ "tide pool",
+ "tie",
+ "tiger",
+ "tight",
+ "tile",
+ "tile flooring",
+ "tile roof",
+ "tile wall",
+ "tin",
+ "tinfoil",
+ "tinsel",
+ "tiramisu",
+ "tire",
+ "tissue",
+ "toast",
+ "toaster",
+ "tobacco",
+ "tobacco pipe",
+ "toddler",
+ "toe",
+ "tofu",
+ "toilet bowl",
+ "toilet seat",
+ "toiletry",
+ "tokyo tower",
+ "tomato",
+ "tomato sauce",
+ "tomato soup",
+ "tomb",
+ "tong",
+ "tongs",
+ "tool",
+ "toolbox",
+ "toothbrush",
+ "toothpaste",
+ "toothpick",
+ "topiary garden",
+ "topping",
+ "torch",
+ "tornado",
+ "tortilla",
+ "tortoise",
+ "tote bag",
+ "totem pole",
+ "totoro",
+ "toucan",
+ "touch",
+ "touchdown",
+ "tour",
+ "tour bus",
+ "tour guide",
+ "tourist",
+ "tourist attraction",
+ "tournament",
+ "tow truck",
+ "towel",
+ "towel bar",
+ "tower block",
+ "tower bridge",
+ "town",
+ "town square",
+ "toy",
+ "toy car",
+ "toy gun",
+ "toyshop",
+ "track",
+ "tractor",
+ "trade",
+ "tradition",
+ "traditional",
+ "traffic",
+ "traffic cone",
+ "traffic congestion",
+ "traffic jam",
+ "traffic sign",
+ "trail",
+ "trailer",
+ "trailer truck",
+ "train",
+ "train bridge",
+ "train car",
+ "train interior",
+ "train track",
+ "train window",
+ "trainer",
+ "training",
+ "training bench",
+ "training ground",
+ "trolley",
+ "trampoline",
+ "transformer",
+ "transparency",
+ "travel",
+ "tray",
+ "treadmill",
+ "treat",
+ "tree",
+ "tree branch",
+ "tree farm",
+ "tree frog",
+ "tree house",
+ "tree root",
+ "tree trunk",
+ "trial",
+ "triangle",
+ "triathlon",
+ "tribe",
+ "tributary",
+ "trick",
+ "tricycle",
+ "trim",
+ "trio",
+ "tripod",
+ "trombone",
+ "troop",
+ "trophy",
+ "trophy cup",
+ "tropic",
+ "trout",
+ "truck",
+ "truck driver",
+ "tub",
+ "tube",
+ "tugboat",
+ "tulip",
+ "tuna",
+ "tundra",
+ "tunnel",
+ "turbine",
+ "turkey",
+ "turn",
+ "turnip",
+ "turquoise",
+ "turret",
+ "turtle",
+ "tusk",
+ "tv actor",
+ "tv cabinet",
+ "tv drama",
+ "tv genre",
+ "tv personality",
+ "tv show",
+ "tv sitcom",
+ "tv tower",
+ "twig",
+ "twilight",
+ "twin",
+ "twine",
+ "twist",
+ "type",
+ "type on",
+ "typewriter",
+ "ukulele",
+ "ultraman",
+ "umbrella",
+ "underclothes",
+ "underwater",
+ "unicorn",
+ "uniform",
+ "universe",
+ "university",
+ "up",
+ "urban",
+ "urinal",
+ "urn",
+ "use",
+ "utensil",
+ "utility room",
+ "vacuum",
+ "valley",
+ "valve",
+ "vampire",
+ "van",
+ "vanilla",
+ "vanity",
+ "variety",
+ "vase",
+ "vault",
+ "vector cartoon illustration",
+ "vector icon",
+ "vegetable",
+ "vegetable garden",
+ "vegetable market",
+ "vegetation",
+ "vehicle",
+ "veil",
+ "vein",
+ "velvet",
+ "vending machine",
+ "vendor",
+ "vent",
+ "vespa",
+ "vessel",
+ "vest",
+ "vet",
+ "veteran",
+ "veterinarians office",
+ "viaduct",
+ "video",
+ "video camera",
+ "video game",
+ "videotape",
+ "view mirror",
+ "vigil",
+ "villa",
+ "village",
+ "vine",
+ "vinegar",
+ "vineyard",
+ "violence",
+ "violet",
+ "violin",
+ "violinist",
+ "violist",
+ "vision",
+ "visor",
+ "vodka",
+ "volcano",
+ "volleyball",
+ "volleyball court",
+ "volleyball player",
+ "volunteer",
+ "voyage",
+ "vulture",
+ "waffle",
+ "waffle iron",
+ "wagon",
+ "wagon wheel",
+ "waist",
+ "waiter",
+ "waiting hall",
+ "waiting room",
+ "walk",
+ "walking",
+ "walking cane",
+ "wall clock",
+ "wallpaper",
+ "walnut",
+ "walrus",
+ "war",
+ "warehouse",
+ "warm",
+ "warning sign",
+ "warrior",
+ "warship",
+ "warthog",
+ "wash",
+ "washer",
+ "washing",
+ "washing machine",
+ "wasp",
+ "waste",
+ "waste container",
+ "watch",
+ "water",
+ "water bird",
+ "water buffalo",
+ "water cooler",
+ "water drop",
+ "water feature",
+ "water heater",
+ "water level",
+ "water lily",
+ "water park",
+ "water pipe",
+ "water purifier",
+ "water ski",
+ "water sport",
+ "water surface",
+ "water tower",
+ "watercolor",
+ "watercolor illustration",
+ "watercolor painting",
+ "waterfall",
+ "watering can",
+ "watermark overlay stamp",
+ "watermelon",
+ "waterproof jacket",
+ "waterway",
+ "wave",
+ "wax",
+ "weapon",
+ "wear",
+ "weather",
+ "vane",
+ "web",
+ "webcam",
+ "wedding",
+ "wedding ring",
+ "wedding bouquet",
+ "wedding cake",
+ "wedding couple",
+ "wedding invitation",
+ "wedding party",
+ "wedding photo",
+ "wedding photographer",
+ "wedding photography",
+ "wedding reception",
+ "wedge",
+ "weed",
+ "weight",
+ "weight scale",
+ "welder",
+ "well",
+ "western food",
+ "western restaurant",
+ "wet",
+ "wet bar",
+ "wet suit",
+ "wetland",
+ "wetsuit",
+ "whale",
+ "whale shark",
+ "wheat",
+ "wheat field",
+ "wheel",
+ "wheelchair",
+ "wheelie",
+ "whipped cream",
+ "whisk",
+ "whisker",
+ "whiskey",
+ "whistle",
+ "white",
+ "white house",
+ "white wine",
+ "whiteboard",
+ "wicket",
+ "wide",
+ "wield",
+ "wig",
+ "Wii",
+ "Wii controller",
+ "wild",
+ "wildebeest",
+ "wildfire",
+ "wildflower",
+ "wildlife",
+ "willow",
+ "wind",
+ "wind chime",
+ "wind farm",
+ "wind turbine",
+ "windmill",
+ "window",
+ "window box",
+ "window display",
+ "window frame",
+ "window screen",
+ "window seat",
+ "window sill",
+ "wiper",
+ "windshield",
+ "windy",
+ "wine bottle",
+ "wine cooler",
+ "wine cabinet",
+ "wine cellar",
+ "wine glass",
+ "wine rack",
+ "wine tasting",
+ "winery",
+ "wing",
+ "winter",
+ "winter melon",
+ "winter morning",
+ "winter scene",
+ "winter sport",
+ "winter storm",
+ "wire",
+ "wisteria",
+ "witch",
+ "witch hat",
+ "wok",
+ "wolf",
+ "woman",
+ "wood",
+ "wood duck",
+ "wood floor",
+ "wood wall",
+ "wood-burning stove",
+ "wooden spoon",
+ "woodland",
+ "woodpecker",
+ "woodworking plane",
+ "wool",
+ "job",
+ "work card",
+ "workbench",
+ "worker",
+ "workplace",
+ "workshop",
+ "world",
+ "worm",
+ "worship",
+ "wound",
+ "wrap",
+ "wrap dress",
+ "wrapping paper",
+ "wrestle",
+ "wrestler",
+ "wrinkle",
+ "wristband",
+ "write",
+ "writer",
+ "writing",
+ "writing brush",
+ "writing desk",
+ "yacht",
+ "yak",
+ "yard",
+ "yellow",
+ "yoga",
+ "yoga mat",
+ "yoghurt",
+ "yoke",
+ "yolk",
+ "youth",
+ "youth hostel",
+ "yurt",
+ "zebra",
+ "zebra crossing",
+ "zen garden",
+ "zip",
+ "zipper",
+ "zombie",
+ "zongzi",
+ "zoo",
+];
+
+pub const NAMES_DOTA_V1_5_16: [&str; 16] = [
+ "plane",
+ "ship",
+ "storage tank",
+ "baseball diamond",
+ "tennis court",
+ "basketball court",
+ "ground track field",
+ "harbor",
+ "bridge",
+ "large vehicle",
+ "small vehicle",
+ "helicopter",
+ "roundabout",
+ "soccer ball field",
+ "swimming pool",
+ "container crane",
+];
+pub const NAMES_DOTA_V1_15: [&str; 15] = [
+ "plane",
+ "ship",
+ "storage tank",
+ "baseball diamond",
+ "tennis court",
+ "basketball court",
+ "ground track field",
+ "harbor",
+ "bridge",
+ "large vehicle",
+ "small vehicle",
+ "helicopter",
+ "roundabout",
+ "soccer ball field",
+ "swimming pool",
+];
+
pub const NAMES_YOLO_DOCLAYOUT_10: [&str; 10] = [
"title",
"plain text",
diff --git a/src/utils/ops.rs b/src/utils/ops.rs
index 8d416ce..b29e35f 100644
--- a/src/utils/ops.rs
+++ b/src/utils/ops.rs
@@ -267,12 +267,12 @@ impl Ops<'_> {
PixelType::F32,
)?;
let mut dst = Image::new(w1 as _, h1 as _, src.pixel_type());
- let (mut resizer, mut options) = Self::build_resizer_filter(filter)?;
+ let (mut resizer, mut config) = Self::build_resizer_filter(filter)?;
if crop_src {
let (_, w, h) = Self::scale_wh(w1 as _, h1 as _, w0 as _, h0 as _);
- options = options.crop(0., 0., w.into(), h.into());
+ config = config.crop(0., 0., w.into(), h.into());
};
- resizer.resize(&src, &mut dst, &options)?;
+ resizer.resize(&src, &mut dst, &config)?;
// u8 -> f32
Self::u8_slice_to_f32(&dst.into_vec())
@@ -317,12 +317,12 @@ impl Ops<'_> {
) -> Result> {
let src = Image::from_vec_u8(w0 as _, h0 as _, v.to_vec(), PixelType::U8)?;
let mut dst = Image::new(w1 as _, h1 as _, src.pixel_type());
- let (mut resizer, mut options) = Self::build_resizer_filter(filter)?;
+ let (mut resizer, mut config) = Self::build_resizer_filter(filter)?;
if crop_src {
let (_, w, h) = Self::scale_wh(w1 as _, h1 as _, w0 as _, h0 as _);
- options = options.crop(0., 0., w.into(), h.into());
+ config = config.crop(0., 0., w.into(), h.into());
};
- resizer.resize(&src, &mut dst, &options)?;
+ resizer.resize(&src, &mut dst, &config)?;
Ok(dst.into_vec())
}
@@ -348,13 +348,13 @@ impl Ops<'_> {
th: u32,
tw: u32,
resizer: &mut Resizer,
- options: &ResizeOptions,
+ config: &ResizeOptions,
) -> Result> {
let buffer = if x.dimensions() == (tw, th) {
x.to_rgb8().into_raw()
} else {
let mut dst = Image::new(tw, th, PixelType::U8x3);
- resizer.resize(x, &mut dst, options)?;
+ resizer.resize(x, &mut dst, config)?;
dst.into_vec()
};
let y = Array::from_shape_vec((th as usize, tw as usize, 3), buffer)?
@@ -370,9 +370,9 @@ impl Ops<'_> {
filter: &str,
) -> Result> {
let mut ys = Array::ones((xs.len(), th as usize, tw as usize, 3)).into_dyn();
- let (mut resizer, options) = Self::build_resizer_filter(filter)?;
+ let (mut resizer, config) = Self::build_resizer_filter(filter)?;
for (idx, x) in xs.iter().enumerate() {
- let y = Self::resize_rgb(x, th, tw, &mut resizer, &options)?;
+ let y = Self::resize_rgb(x, th, tw, &mut resizer, &config)?;
ys.slice_mut(s![idx, .., .., ..]).assign(&y);
}
Ok(ys)
@@ -387,7 +387,7 @@ impl Ops<'_> {
resize_by: &str,
center: bool,
resizer: &mut Resizer,
- options: &ResizeOptions,
+ config: &ResizeOptions,
) -> Result> {
let (w0, h0) = x.dimensions();
let buffer = if w0 == tw && h0 == th {
@@ -403,7 +403,7 @@ impl Ops<'_> {
}
"height" => (th * w0 / h0, th),
"width" => (tw, tw * h0 / w0),
- _ => anyhow::bail!("ModelConfig for `letterbox`: width, height, auto"),
+ _ => anyhow::bail!("ORTConfig for `letterbox`: width, height, auto"),
};
let mut dst = Image::from_vec_u8(
@@ -422,7 +422,7 @@ impl Ops<'_> {
(0, 0)
};
let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
- resizer.resize(x, &mut dst_cropped, options)?;
+ resizer.resize(x, &mut dst_cropped, config)?;
dst.into_vec()
};
let y = Array::from_shape_vec((th as usize, tw as usize, 3), buffer)?
@@ -441,9 +441,9 @@ impl Ops<'_> {
center: bool,
) -> Result> {
let mut ys = Array::ones((xs.len(), th as usize, tw as usize, 3)).into_dyn();
- let (mut resizer, options) = Self::build_resizer_filter(filter)?;
+ let (mut resizer, config) = Self::build_resizer_filter(filter)?;
for (idx, x) in xs.iter().enumerate() {
- let y = Self::letterbox_rgb(x, th, tw, bg, resize_by, center, &mut resizer, &options)?;
+ let y = Self::letterbox_rgb(x, th, tw, bg, resize_by, center, &mut resizer, &config)?;
ys.slice_mut(s![idx, .., .., ..]).assign(&y);
}
Ok(ys)
diff --git a/src/utils/options.rs b/src/utils/options.rs
deleted file mode 100644
index 613a620..0000000
--- a/src/utils/options.rs
+++ /dev/null
@@ -1,488 +0,0 @@
-//! Options for everthing
-use aksr::Builder;
-use anyhow::Result;
-use tokenizers::{PaddingParams, PaddingStrategy, Tokenizer, TruncationParams};
-
-use crate::{
- models::{SamKind, YOLOPredsFormat},
- try_fetch_file_stem, DType, Device, Engine, Hub, Iiix, Kind, LogitsSampler, MinOptMax,
- Processor, ResizeMode, Scale, Task, Version,
-};
-
-/// Options for building models and inference
-#[derive(Builder, Debug, Clone)]
-pub struct Options {
- // Model configs
- pub model_file: String,
- pub model_name: &'static str,
- pub model_device: Device,
- pub model_dtype: DType,
- pub model_version: Option,
- pub model_task: Option,
- pub model_scale: Option,
- pub model_kind: Option,
- pub model_iiixs: Vec,
- pub model_spec: String,
- pub model_num_dry_run: usize,
- pub trt_fp16: bool,
- pub profile: bool,
-
- // models
- pub model_encoder_file: Option,
- pub model_decoder_file: Option,
- pub visual_encoder_file: Option,
- pub visual_decoder_file: Option,
- pub textual_encoder_file: Option,
- pub textual_decoder_file: Option,
-
- // Processor configs
- #[args(except(setter))]
- pub image_width: u32,
- #[args(except(setter))]
- pub image_height: u32,
- pub resize_mode: ResizeMode,
- pub resize_filter: &'static str,
- pub padding_value: u8,
- pub letterbox_center: bool,
- pub normalize: bool,
- pub image_std: Vec,
- pub image_mean: Vec,
- pub nchw: bool,
- pub unsigned: bool,
-
- // Names
- pub class_names: Option>,
- pub class_names_2: Option>,
- pub class_names_3: Option>,
- pub keypoint_names: Option>,
- pub keypoint_names_2: Option>,
- pub keypoint_names_3: Option