Options -> Config

2025-12-03 02:58:22 +00:00 · 2025-05-20 17:14:12 +08:00
parent 54045543fc d3c738b5cf
commit 1215b9c8f4
128 changed files with 6624 additions and 2741 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "usls"
 edition = "2021"
-version = "0.1.0-beta.1"
+version = "0.1.0-beta.2"
 rust-version = "1.82"
 description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
 repository = "https://github.com/jamjamjon/usls"
@@ -44,6 +44,7 @@ ort = { version = "2.0.0-rc.9", default-features = false, optional = true , feat
   "half"
 ]}
 tokenizers = { version = "0.21.1" }
+paste = "1.0.15"

 [build-dependencies]
 prost-build = "0.13.5"
--- a/README.md
+++ b/README.md
@@ -116,7 +116,8 @@
 | [Moondream2](https://github.com/vikhyat/moondream/tree/main)                                                      | Open-Set Object Detection<br />Open-Set Keypoints Detection<br />Image Caption<br />Visual Question Answering               | [demo](examples/moondream2)     | ✅     | ✅             | ✅             |                    |                    |
 | [OWLv2](https://huggingface.co/google/owlv2-base-patch16-ensemble)                                                | Open-Set Object Detection                                                                                                    | [demo](examples/owlv2)          | ✅     | ✅             | ✅             |                    |                    |
 | [SmolVLM(256M, 500M)](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct)                                                | Visual Question Answering                                                                                                    | [demo](examples/smolvlm)          | ✅     | ✅             | ✅             |                    |                    |
-| [RMBG(1.4, 2.0)](https://huggingface.co/briaai/RMBG-2.0)                                                | Image Segmentation Answering                                                                                                    | [demo](examples/rmbg)          | ✅     | ✅             | ✅             |                    |                    |
+| [RMBG(1.4, 2.0)](https://huggingface.co/briaai/RMBG-2.0)                                                | Image Segmentation<br />Background Removal                                                                                         | [demo](examples/rmbg)          | ✅     | ✅             | ✅             |                    |                    |
+| [BEN2](https://huggingface.co/PramaLLC/BEN2)                                                | Image Segmentation<br />Background Removal                                                                                            | [demo](examples/rmbg)          | ✅     | ✅             | ✅             |                    |                    |

 </details>

--- a/examples/ben2/main.rs
+++ b/examples/ben2/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::RMBG, Annotator, DataLoader, Options};
+use usls::{models::RMBG, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -20,11 +20,11 @@ fn main() -> anyhow::Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options = Options::ben2_base()
+    let config = Config::ben2_base()
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
-    let mut model = RMBG::new(options)?;
+    let mut model = RMBG::new(config)?;

    // load image
    let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
--- a/examples/blip/main.rs
+++ b/examples/blip/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::Blip, DataLoader, Options};
+use usls::{models::Blip, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// BLIP Example
@@ -20,13 +20,10 @@ fn main() -> anyhow::Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options_visual = Options::blip_v1_base_caption_visual()
-        .with_model_device(args.device.as_str().try_into()?)
+    let config = Config::blip_v1_base_caption()
+        .with_device_all(args.device.as_str().try_into()?)
        .commit()?;
-    let options_textual = Options::blip_v1_base_caption_textual()
-        .with_model_device(args.device.as_str().try_into()?)
-        .commit()?;
-    let mut model = Blip::new(options_visual, options_textual)?;
+    let mut model = Blip::new(config)?;

    // image caption
    let xs = DataLoader::try_read_n(&args.source)?;
--- a/examples/classifier/main.rs
+++ b/examples/classifier/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
+use usls::{models::ImageClassifier, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -12,7 +12,7 @@ struct Args {
    device: String,

    /// model name
-    #[argh(option, default = "String::from(\"beit\")")]
+    #[argh(option, default = "String::from(\"mobileone\")")]
    model: String,

    /// source image
@@ -36,20 +36,20 @@ fn main() -> anyhow::Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options = match args.model.to_lowercase().as_str() {
-        "beit" => Options::beit_base(),
-        "convnext" => Options::convnext_v2_atto(),
-        "deit" => Options::deit_tiny_distill(),
-        "fastvit" => Options::fastvit_t8_distill(),
-        "mobileone" => Options::mobileone_s0(),
+    let config = match args.model.to_lowercase().as_str() {
+        "beit" => Config::beit_base(),
+        "convnext" => Config::convnext_v2_atto(),
+        "deit" => Config::deit_tiny_distill(),
+        "fastvit" => Config::fastvit_t8_distill(),
+        "mobileone" => Config::mobileone_s0(),
        _ => anyhow::bail!("Unsupported model: {}", args.model),
    };

-    let options = options
+    let config = config
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
-    let mut model = ImageClassifier::try_from(options)?;
+    let mut model = ImageClassifier::try_from(config)?;

    // load images
    let xs = DataLoader::try_read_n(&args.source)?;
--- a/examples/clip/main.rs
+++ b/examples/clip/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::Clip, DataLoader, Ops, Options};
+use usls::{models::Clip, Config, DataLoader, Ops};

 #[derive(argh::FromArgs)]
 /// CLIP Example
@@ -14,18 +14,13 @@ fn main() -> Result<()> {
        .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
        .init();
-
    let args: Args = argh::from_env();
+
    // build model
-    let options_visual = Options::jina_clip_v1_visual()
-        // clip_vit_b32_visual()
-        .with_model_device(args.device.as_str().try_into()?)
+    let config = Config::jina_clip_v1()
+        .with_device_all(args.device.as_str().try_into()?)
        .commit()?;
-    let options_textual = Options::jina_clip_v1_textual()
-        // clip_vit_b32_textual()
-        .with_model_device(args.device.as_str().try_into()?)
-        .commit()?;
-    let mut model = Clip::new(options_visual, options_textual)?;
+    let mut model = Clip::new(config)?;

    // texts
    let texts = vec![
--- a/examples/d-fine/main.rs
+++ b/examples/d-fine/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::RTDETR, Annotator, DataLoader, Options};
+use usls::{models::RTDETR, Annotator, Config, DataLoader};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -7,9 +7,8 @@ fn main() -> Result<()> {
        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
        .init();

-    // options
-    let options = Options::d_fine_n_coco().commit()?;
-    let mut model = RTDETR::new(options)?;
+    // config
+    let mut model = RTDETR::new(Config::d_fine_n_coco().commit()?)?;

    // load
    let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
--- a/examples/db/main.rs
+++ b/examples/db/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::DB, Annotator, DataLoader, Options, Style};
+use usls::{models::DB, Annotator, Config, DataLoader, Style};

 #[derive(argh::FromArgs)]
 /// Example
@@ -41,15 +41,13 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options = match &args.model {
-        Some(m) => Options::db().with_model_file(m),
-        None => Options::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
-    };
-    let mut model = DB::new(
-        options
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-    )?;
+    let config = match &args.model {
+        Some(m) => Config::db().with_model_file(m),
+        None => Config::ppocr_det_v4_ch().with_model_dtype(args.dtype.as_str().try_into()?),
+    }
+    .with_device_all(args.device.as_str().try_into()?)
+    .commit()?;
+    let mut model = DB::new(config)?;

    // load image
    let xs = DataLoader::try_read_n(&[
--- a/examples/deim/main.rs
+++ b/examples/deim/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::RTDETR, Annotator, DataLoader, Options};
+use usls::{models::RTDETR, Annotator, Config, DataLoader};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -7,9 +7,8 @@ fn main() -> Result<()> {
        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
        .init();

-    // options
-    let options = Options::deim_dfine_s_coco().commit()?;
-    let mut model = RTDETR::new(options)?;
+    // config
+    let mut model = RTDETR::new(Config::deim_dfine_s_coco().commit()?)?;

    // load
    let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
--- a/examples/depth-anything/main.rs
+++ b/examples/depth-anything/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::DepthAnything, Annotator, DataLoader, Options, Style};
+use usls::{models::DepthAnything, Annotator, Config, DataLoader, Style};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -8,8 +8,7 @@ fn main() -> Result<()> {
        .init();

    // build model
-    let options = Options::depth_anything_v2_small().commit()?;
-    let mut model = DepthAnything::new(options)?;
+    let mut model = DepthAnything::new(Config::depth_anything_v2_small().commit()?)?;

    // load
    let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
--- a/examples/depth-pro/main.rs
+++ b/examples/depth-pro/main.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use usls::DataLoader;
-use usls::{models::DepthPro, Annotator, Options, Style};
+use usls::{models::DepthPro, Annotator, Config, Style};

 #[derive(argh::FromArgs)]
 /// Example
@@ -23,11 +23,12 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // model
-    let options = Options::depth_pro()
+    let config = Config::depth_pro()
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
-    let mut model = DepthPro::new(options)?;
+
+    let mut model = DepthPro::new(config)?;

    // load
    let xs = DataLoader::try_read_n(&["images/street.jpg"])?;
--- a/examples/dinov2/main.rs
+++ b/examples/dinov2/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::DINOv2, DataLoader, Options};
+use usls::{models::DINOv2, Config, DataLoader};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -11,8 +11,10 @@ fn main() -> Result<()> {
    let xs = DataLoader::try_read_n(&["./assets/bus.jpg", "./assets/bus.jpg"])?;

    // model
-    let options = Options::dinov2_small().with_batch_size(xs.len()).commit()?;
-    let mut model = DINOv2::new(options)?;
+    let config = Config::dinov2_small()
+        .with_batch_size_all(xs.len())
+        .commit()?;
+    let mut model = DINOv2::new(config)?;

    // encode images
    let y = model.encode_images(&xs)?;
--- a/examples/doclayout-yolo/main.rs
+++ b/examples/doclayout-yolo/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options};
+use usls::{models::YOLO, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -18,7 +18,7 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let config = Options::doclayout_yolo_docstructbench()
+    let config = Config::doclayout_yolo_docstructbench()
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
    let mut model = YOLO::new(config)?;
--- a/examples/fast/main.rs
+++ b/examples/fast/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::DB, Annotator, DataLoader, Options, Scale, Style};
+use usls::{models::DB, Annotator, Config, DataLoader, Scale, Style};

 #[derive(argh::FromArgs)]
 /// Example
@@ -26,16 +26,16 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options = match args.scale.as_str().try_into()? {
-        Scale::T => Options::fast_tiny(),
-        Scale::S => Options::fast_small(),
-        Scale::B => Options::fast_base(),
+    let config = match args.scale.as_str().try_into()? {
+        Scale::T => Config::fast_tiny(),
+        Scale::S => Config::fast_small(),
+        Scale::B => Config::fast_base(),
        _ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
    };
    let mut model = DB::new(
-        options
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
+        config
+            .with_dtype_all(args.dtype.as_str().try_into()?)
+            .with_device_all(args.device.as_str().try_into()?)
            .commit()?,
    )?;

--- a/examples/fastsam/main.rs
+++ b/examples/fastsam/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options};
+use usls::{models::YOLO, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -22,7 +22,7 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let config = Options::fastsam_s()
+    let config = Config::fastsam_s()
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
@@ -45,7 +45,7 @@ fn main() -> Result<()> {
        annotator.annotate(x, y)?.save(format!(
            "{}.jpg",
            usls::Dir::Current
-                .base_dir_with_subs(&["runs", "FastSAM"])?
+                .base_dir_with_subs(&["runs", model.spec()])?
                .join(usls::timestamp(None))
                .display(),
        ))?;
--- a/examples/florence2/README.md
+++ b/examples/florence2/README.md
@@ -1,7 +1,7 @@
 ## Quick Start

 ```shell
-cargo run -r -F cuda --example florence2 -- --device cuda --scale base --dtype fp16
+cargo run -r -F cuda --example florence2 -- --device cuda --dtype fp16
 ```


--- a/examples/florence2/main.rs
+++ b/examples/florence2/main.rs
@@ -1,20 +1,16 @@
 use anyhow::Result;
-use usls::{models::Florence2, Annotator, DataLoader, Options, Scale, Style, Task};
+use usls::{models::Florence2, Annotator, Config, DataLoader, Style, Task};

 #[derive(argh::FromArgs)]
 /// Example
 struct Args {
    /// dtype
-    #[argh(option, default = "String::from(\"auto\")")]
+    #[argh(option, default = "String::from(\"fp16\")")]
    dtype: String,

    /// device
    #[argh(option, default = "String::from(\"cpu:0\")")]
    device: String,
-
-    /// scale
-    #[argh(option, default = "String::from(\"base\")")]
-    scale: String,
 }

 fn main() -> Result<()> {
@@ -29,51 +25,12 @@ fn main() -> Result<()> {
    let xs = DataLoader::try_read_n(&["images/green-car.jpg", "assets/bus.jpg"])?;

    // build model
-    let (
-        options_vision_encoder,
-        options_text_embed,
-        options_encoder,
-        options_decoder,
-        options_decoder_merged,
-    ) = match args.scale.as_str().try_into()? {
-        Scale::B => (
-            Options::florence2_visual_encoder_base(),
-            Options::florence2_textual_embed_base(),
-            Options::florence2_texual_encoder_base(),
-            Options::florence2_texual_decoder_base(),
-            Options::florence2_texual_decoder_merged_base(),
-        ),
-        Scale::L => todo!(),
-        _ => anyhow::bail!("Unsupported Florence2 scale."),
-    };
-
-    let mut model = Florence2::new(
-        options_vision_encoder
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-        options_text_embed
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-        options_encoder
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-        options_decoder
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-        options_decoder_merged
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-    )?;
+    let config = Config::florence2_base()
+        .with_dtype_all(args.dtype.as_str().try_into()?)
+        .with_device_all(args.device.as_str().try_into()?)
+        .with_batch_size_all(xs.len())
+        .commit()?;
+    let mut model = Florence2::new(config)?;

    // tasks
    let tasks = [
--- a/examples/grounding-dino/main.rs
+++ b/examples/grounding-dino/main.rs
@@ -1,11 +1,11 @@
 use anyhow::Result;
-use usls::{models::GroundingDINO, Annotator, DataLoader, Options};
+use usls::{models::GroundingDINO, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
 struct Args {
    /// dtype
-    #[argh(option, default = "String::from(\"auto\")")]
+    #[argh(option, default = "String::from(\"fp16\")")]
    dtype: String,

    /// device
@@ -45,7 +45,7 @@ fn main() -> Result<()> {

    let args: Args = argh::from_env();

-    let options = Options::grounding_dino_tiny()
+    let config = Config::grounding_dino_tiny()
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
@@ -53,7 +53,7 @@ fn main() -> Result<()> {
        .with_text_confs(&[0.25])
        .commit()?;

-    let mut model = GroundingDINO::new(options)?;
+    let mut model = GroundingDINO::new(config)?;

    // load images
    let xs = DataLoader::try_read_n(&args.source)?;
--- a/examples/linknet/main.rs
+++ b/examples/linknet/main.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use usls::DataLoader;
-use usls::{models::DB, Annotator, Options, Scale, Style};
+use usls::{models::DB, Annotator, Config, Scale, Style};

 #[derive(argh::FromArgs)]
 /// Example
@@ -27,14 +27,14 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options = match args.scale.as_str().try_into()? {
-        Scale::T => Options::linknet_r18(),
-        Scale::S => Options::linknet_r34(),
-        Scale::B => Options::linknet_r50(),
+    let config = match args.scale.as_str().try_into()? {
+        Scale::T => Config::linknet_r18(),
+        Scale::S => Config::linknet_r34(),
+        Scale::B => Config::linknet_r50(),
        _ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
    };
    let mut model = DB::new(
-        options
+        config
            .with_model_dtype(args.dtype.as_str().try_into()?)
            .with_model_device(args.device.as_str().try_into()?)
            .commit()?,
--- a/examples/modnet/main.rs
+++ b/examples/modnet/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::MODNet, Annotator, DataLoader, Options};
+use usls::{models::MODNet, Annotator, Config, DataLoader};

 fn main() -> anyhow::Result<()> {
    tracing_subscriber::fmt()
@@ -7,8 +7,7 @@ fn main() -> anyhow::Result<()> {
        .init();

    // build model
-    let options = Options::modnet_photographic().commit()?;
-    let mut model = MODNet::new(options)?;
+    let mut model = MODNet::new(Config::modnet_photographic().commit()?)?;

    // load image
    let xs = DataLoader::try_read_n(&["images/liuyifei.png"])?;
--- a/examples/moondream2/main.rs
+++ b/examples/moondream2/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::Moondream2, Annotator, DataLoader, Options, Scale, Task};
+use usls::{models::Moondream2, Annotator, Config, DataLoader, Scale, Task};

 #[derive(argh::FromArgs)]
 /// Example
@@ -39,81 +39,16 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let (
-        options_vision_encoder,
-        options_vision_projection,
-        options_text_decoder,
-        options_text_encoder,
-        options_coord_decoder,
-        options_coord_encoder,
-        options_size_decoder,
-        options_size_encoder,
-    ) = match args.scale.as_str().try_into()? {
-        Scale::Billion(2.) => (
-            Options::moondream2_2b_vision_encoder(),
-            Options::moondream2_2b_vision_projection(),
-            Options::moondream2_2b_text_decoder(),
-            Options::moondream2_2b_text_encoder(),
-            Options::moondream2_2b_coord_decoder(),
-            Options::moondream2_2b_coord_encoder(),
-            Options::moondream2_2b_size_decoder(),
-            Options::moondream2_2b_size_encoder(),
-        ),
-        Scale::Billion(0.5) => (
-            Options::moondream2_0_5b_vision_encoder(),
-            Options::moondream2_0_5b_vision_projection(),
-            Options::moondream2_0_5b_text_decoder(),
-            Options::moondream2_0_5b_text_encoder(),
-            Options::moondream2_0_5b_coord_decoder(),
-            Options::moondream2_0_5b_coord_encoder(),
-            Options::moondream2_0_5b_size_decoder(),
-            Options::moondream2_0_5b_size_encoder(),
-        ),
+    let config = match args.scale.as_str().try_into()? {
+        Scale::Billion(0.5) => Config::moondream2_0_5b(),
+        Scale::Billion(2.) => Config::moondream2_2b(),
        _ => unimplemented!(),
-    };
+    }
+    .with_dtype_all(args.dtype.as_str().try_into()?)
+    .with_device_all(args.device.as_str().try_into()?)
+    .commit()?;

-    let mut model = Moondream2::new(
-        options_vision_encoder
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-        options_vision_projection
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-        options_text_encoder
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-        options_text_decoder
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-        Some(
-            options_coord_encoder
-                .with_model_dtype(args.dtype.as_str().try_into()?)
-                .with_model_device(args.device.as_str().try_into()?)
-                .commit()?,
-        ),
-        Some(
-            options_coord_decoder
-                .with_model_dtype(args.dtype.as_str().try_into()?)
-                .with_model_device(args.device.as_str().try_into()?)
-                .commit()?,
-        ),
-        Some(
-            options_size_encoder
-                .with_model_dtype(args.dtype.as_str().try_into()?)
-                .with_model_device(args.device.as_str().try_into()?)
-                .commit()?,
-        ),
-        Some(
-            options_size_decoder
-                .with_model_dtype(args.dtype.as_str().try_into()?)
-                .with_model_device(args.device.as_str().try_into()?)
-                .commit()?,
-        ),
-    )?;
+    let mut model = Moondream2::new(config)?;

    // load images
    let xs = DataLoader::try_read_n(&args.source)?;
@@ -142,13 +77,6 @@ fn main() -> Result<()> {
        }
        Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => {
            println!("{:?}", ys);
-            // let annotator = Annotator::default()
-            //     .with_bboxes_thickness(4)
-            //     .without_bboxes_conf(true)
-            //     .with_keypoints_radius(6)
-            //     .with_keypoints_name(true)
-            //     .with_saveout("moondream2");
-            // annotator.annotate(&xs, &ys);

            // annotate
            let annotator = Annotator::default()
--- a/examples/owlv2/main.rs
+++ b/examples/owlv2/main.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use usls::DataLoader;
-use usls::{models::OWLv2, Annotator, Options};
+use usls::{models::OWLv2, Annotator, Config};

 #[derive(argh::FromArgs)]
 /// Example
@@ -46,14 +46,14 @@ fn main() -> Result<()> {
        .init();
    let args: Args = argh::from_env();

-    // options
-    let options = Options::owlv2_base_ensemble()
+    // config
+    let config = Config::owlv2_base_ensemble()
        // owlv2_base()
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
-        .with_class_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
+        .with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::<Vec<_>>())
        .commit()?;
-    let mut model = OWLv2::new(options)?;
+    let mut model = OWLv2::new(config)?;

    // load
    let xs = DataLoader::try_read_n(&args.source)?;
--- a/examples/picodet-layout/main.rs
+++ b/examples/picodet-layout/main.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use usls::DataLoader;
-use usls::{models::PicoDet, Annotator, Options};
+use usls::{models::PicoDet, Annotator, Config};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -8,12 +8,11 @@ fn main() -> Result<()> {
        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
        .init();

-    // options
-    let options = Options::picodet_layout_1x()
-        // picodet_l_layout_3cls()
-        // picodet_l_layout_17cls()
-        .commit()?;
-    let mut model = PicoDet::new(options)?;
+    // config
+    let config = Config::picodet_layout_1x().commit()?;
+    // picodet_l_layout_3cls()
+    // picodet_l_layout_17cls()
+    let mut model = PicoDet::new(config)?;

    // load
    let xs = DataLoader::try_read_n(&["images/academic.jpg"])?;
--- a/examples/rfdetr/main.rs
+++ b/examples/rfdetr/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::RFDETR, Annotator, DataLoader, Options};
+use usls::{models::RFDETR, Annotator, Config, DataLoader};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -7,9 +7,8 @@ fn main() -> Result<()> {
        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
        .init();

-    // options
-    let options = Options::rfdetr_base().commit()?;
-    let mut model = RFDETR::new(options)?;
+    // config
+    let mut model = RFDETR::new(Config::rfdetr_base().commit()?)?;

    // load
    let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
--- a/examples/rmbg/main.rs
+++ b/examples/rmbg/main.rs
@@ -1,10 +1,10 @@
-use usls::{models::RMBG, Annotator, DataLoader, Options};
+use usls::{models::RMBG, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
 struct Args {
    /// dtype
-    #[argh(option, default = "String::from(\"auto\")")]
+    #[argh(option, default = "String::from(\"fp16\")")]
    dtype: String,

    /// device
@@ -23,18 +23,18 @@ fn main() -> anyhow::Result<()> {
        .init();
    let args: Args = argh::from_env();

-    let options = match args.ver {
-        1.4 => Options::rmbg1_4(),
-        2.0 => Options::rmbg2_0(),
+    let config = match args.ver {
+        1.4 => Config::rmbg1_4(),
+        2.0 => Config::rmbg2_0(),
        _ => unreachable!("Unsupported version"),
    };

    // build model
-    let options = options
+    let config = config
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
-    let mut model = RMBG::new(options)?;
+    let mut model = RMBG::new(config)?;

    // load image
    let xs = DataLoader::try_read_n(&["./assets/cat.png"])?;
--- a/examples/rtdetr/main.rs
+++ b/examples/rtdetr/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::RTDETR, Annotator, DataLoader, Options};
+use usls::{models::RTDETR, Annotator, Config, DataLoader};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -7,15 +7,14 @@ fn main() -> Result<()> {
        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
        .init();

-    // options
-    let options = Options::rtdetr_v2_s_coco()
-        // rtdetr_v1_r18vd_coco()
-        // rtdetr_v2_ms_coco()
-        // rtdetr_v2_m_coco()
-        // rtdetr_v2_l_coco()
-        // rtdetr_v2_x_coco()
-        .commit()?;
-    let mut model = RTDETR::new(options)?;
+    // config
+    let config = Config::rtdetr_v2_s_coco().commit()?;
+    // rtdetr_v1_r18vd_coco()
+    // rtdetr_v2_ms_coco()
+    // rtdetr_v2_m_coco()
+    // rtdetr_v2_l_coco()
+    // rtdetr_v2_x_coco()
+    let mut model = RTDETR::new(config)?;

    // load
    let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
--- a/examples/rtmo/main.rs
+++ b/examples/rtmo/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::RTMO, Annotator, DataLoader, Options, Style, SKELETON_COCO_19};
+use usls::{models::RTMO, Annotator, Config, DataLoader, Style, SKELETON_COCO_19};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -8,7 +8,7 @@ fn main() -> Result<()> {
        .init();

    // build model
-    let mut model = RTMO::new(Options::rtmo_s().commit()?)?;
+    let mut model = RTMO::new(Config::rtmo_s().commit()?)?;

    // load image
    let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
--- a/examples/sam/main.rs
+++ b/examples/sam/main.rs
@@ -1,7 +1,7 @@
 use anyhow::Result;
 use usls::{
    models::{SamKind, SamPrompt, SAM},
-    Annotator, DataLoader, Options, Scale,
+    Annotator, Config, DataLoader, Scale,
 };

 #[derive(argh::FromArgs)]
@@ -28,40 +28,22 @@ fn main() -> Result<()> {

    let args: Args = argh::from_env();
    // Build model
-    let (options_encoder, options_decoder) = match args.kind.as_str().try_into()? {
-        SamKind::Sam => (
-            Options::sam_v1_base_encoder(),
-            Options::sam_v1_base_decoder(),
-        ),
+    let config = match args.kind.as_str().try_into()? {
+        SamKind::Sam => Config::sam_v1_base(),
        SamKind::Sam2 => match args.scale.as_str().try_into()? {
-            Scale::T => (Options::sam2_tiny_encoder(), Options::sam2_tiny_decoder()),
-            Scale::S => (Options::sam2_small_encoder(), Options::sam2_small_decoder()),
-            Scale::B => (
-                Options::sam2_base_plus_encoder(),
-                Options::sam2_base_plus_decoder(),
-            ),
+            Scale::T => Config::sam2_tiny(),
+            Scale::S => Config::sam2_small(),
+            Scale::B => Config::sam2_base_plus(),
            _ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
        },
+        SamKind::MobileSam => Config::mobile_sam_tiny(),
+        SamKind::SamHq => Config::sam_hq_tiny(),
+        SamKind::EdgeSam => Config::edge_sam_3x(),
+    }
+    .with_device_all(args.device.as_str().try_into()?)
+    .commit()?;

-        SamKind::MobileSam => (
-            Options::mobile_sam_tiny_encoder(),
-            Options::mobile_sam_tiny_decoder(),
-        ),
-        SamKind::SamHq => (
-            Options::sam_hq_tiny_encoder(),
-            Options::sam_hq_tiny_decoder(),
-        ),
-        SamKind::EdgeSam => (
-            Options::edge_sam_3x_encoder(),
-            Options::edge_sam_3x_decoder(),
-        ),
-    };
-
-    let options_encoder = options_encoder
-        .with_model_device(args.device.as_str().try_into()?)
-        .commit()?;
-    let options_decoder = options_decoder.commit()?;
-    let mut model = SAM::new(options_encoder, options_decoder)?;
+    let mut model = SAM::new(config)?;

    // Load image
    let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
--- a/examples/sam2/README.md
+++ b/examples/sam2/README.md
@@ -1,6 +1,5 @@
 ## Quick Start

 ```Shell
-
-cargo run -r -F cuda --example sam -- --device cuda --scale t
+cargo run -r -F cuda --example sam2 -- --device cuda --scale t
 ```
--- a/examples/sam2/main.rs
+++ b/examples/sam2/main.rs
@@ -1,7 +1,7 @@
 use anyhow::Result;
 use usls::{
    models::{SamPrompt, SAM2},
-    Annotator, DataLoader, Options, Scale,
+    Annotator, Config, DataLoader, Scale,
 };

 #[derive(argh::FromArgs)]
@@ -25,33 +25,16 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // Build model
-    let (options_encoder, options_decoder) = match args.scale.as_str().try_into()? {
-        Scale::T => (
-            Options::sam2_1_tiny_encoder(),
-            Options::sam2_1_tiny_decoder(),
-        ),
-        Scale::S => (
-            Options::sam2_1_small_encoder(),
-            Options::sam2_1_small_decoder(),
-        ),
-        Scale::B => (
-            Options::sam2_1_base_plus_encoder(),
-            Options::sam2_1_base_plus_decoder(),
-        ),
-        Scale::L => (
-            Options::sam2_1_large_encoder(),
-            Options::sam2_1_large_decoder(),
-        ),
+    let config = match args.scale.as_str().try_into()? {
+        Scale::T => Config::sam2_1_tiny(),
+        Scale::S => Config::sam2_1_small(),
+        Scale::B => Config::sam2_1_base_plus(),
+        Scale::L => Config::sam2_1_large(),
        _ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t, l.", args.scale),
-    };
-
-    let options_encoder = options_encoder
-        .with_model_device(args.device.as_str().try_into()?)
-        .commit()?;
-    let options_decoder = options_decoder
-        .with_model_device(args.device.as_str().try_into()?)
-        .commit()?;
-    let mut model = SAM2::new(options_encoder, options_decoder)?;
+    }
+    .with_device_all(args.device.as_str().try_into()?)
+    .commit()?;
+    let mut model = SAM2::new(config)?;

    // Load image
    let xs = DataLoader::try_read_n(&["images/truck.jpg"])?;
--- a/examples/sapiens/README.md
+++ b/examples/sapiens/README.md
@@ -1,7 +1,7 @@
 ## Quick Start

 ```shell
-cargo run -r -F cuda --example sapiens -- --device cuda 
+cargo run -r -F cuda --example sapiens -- --device cuda
 ```

 ## Results
--- a/examples/sapiens/main.rs
+++ b/examples/sapiens/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::Sapiens, Annotator, DataLoader, Options};
+use usls::{models::Sapiens, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -17,10 +17,10 @@ fn main() -> Result<()> {

    let args: Args = argh::from_env();
    // build
-    let options = Options::sapiens_seg_0_3b()
+    let config = Config::sapiens_seg_0_3b()
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
-    let mut model = Sapiens::new(options)?;
+    let mut model = Sapiens::new(config)?;

    // load
    let xs = DataLoader::try_read_n(&["images/paul-george.jpg"])?;
--- a/examples/slanet/main.rs
+++ b/examples/slanet/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::SLANet, Annotator, Color, DataLoader, Options};
+use usls::{models::SLANet, Annotator, Color, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -26,11 +26,11 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options = Options::slanet_lcnet_v2_mobile_ch()
+    let config = Config::slanet_lcnet_v2_mobile_ch()
        .with_model_device(args.device.as_str().try_into()?)
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .commit()?;
-    let mut model = SLANet::new(options)?;
+    let mut model = SLANet::new(config)?;

    // load
    let xs = DataLoader::try_read_n(&[args.source])?;
--- a/examples/smolvlm/main.rs
+++ b/examples/smolvlm/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::SmolVLM, DataLoader, Options, Scale};
+use usls::{models::SmolVLM, Config, DataLoader, Scale};

 #[derive(argh::FromArgs)]
 /// Example
@@ -29,32 +29,14 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let (options_vision_encoder, options_text_embed, options_decode) =
-        match args.scale.as_str().try_into()? {
-            Scale::Million(256.) => (
-                Options::smolvlm_vision_256m(),
-                Options::smolvlm_text_embed_256m(),
-                Options::smolvlm_decoder_256m(),
-            ),
-            Scale::Million(500.) => (
-                Options::smolvlm_vision_500m(),
-                Options::smolvlm_text_embed_500m(),
-                Options::smolvlm_decoder_500m(),
-            ),
-            _ => unimplemented!(),
-        };
-
-    let mut model = SmolVLM::new(
-        options_vision_encoder
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-        options_text_embed
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-        options_decode
-            .with_model_device(args.device.as_str().try_into()?)
-            .commit()?,
-    )?;
+    let config = match args.scale.as_str().try_into()? {
+        Scale::Million(256.) => Config::smolvlm_256m(),
+        Scale::Million(500.) => Config::smolvlm_500m(),
+        _ => unimplemented!(),
+    }
+    .with_device_all(args.device.as_str().try_into()?)
+    .commit()?;
+    let mut model = SmolVLM::new(config)?;

    // load images
    let xs = DataLoader::try_read_n(&args.source)?;
--- a/examples/svtr/main.rs
+++ b/examples/svtr/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::SVTR, DataLoader, Options};
+use usls::{models::SVTR, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -22,13 +22,13 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let options = Options::ppocr_rec_v4_ch()
+    let config = Config::ppocr_rec_v4_ch()
        // ppocr_rec_v4_en()
        // repsvtr_ch()
        .with_model_device(args.device.as_str().try_into()?)
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .commit()?;
-    let mut model = SVTR::new(options)?;
+    let mut model = SVTR::new(config)?;

    // load images
    let dl = DataLoader::new("./examples/svtr/images")?
--- a/examples/trocr/main.rs
+++ b/examples/trocr/main.rs
@@ -1,6 +1,6 @@
 use usls::{
    models::{TrOCR, TrOCRKind},
-    DataLoader, Options, Scale,
+    Config, DataLoader, Scale,
 };

 #[derive(argh::FromArgs)]
@@ -38,52 +38,22 @@ fn main() -> anyhow::Result<()> {
    ])?;

    // build model
-    let (options_encoder, options_decoder, options_decoder_merged) =
-        match args.scale.as_str().try_into()? {
-            Scale::S => match args.kind.as_str().try_into()? {
-                TrOCRKind::Printed => (
-                    Options::trocr_encoder_small_printed(),
-                    Options::trocr_decoder_small_printed(),
-                    Options::trocr_decoder_merged_small_printed(),
-                ),
-                TrOCRKind::HandWritten => (
-                    Options::trocr_encoder_small_handwritten(),
-                    Options::trocr_decoder_small_handwritten(),
-                    Options::trocr_decoder_merged_small_handwritten(),
-                ),
-            },
-            Scale::B => match args.kind.as_str().try_into()? {
-                TrOCRKind::Printed => (
-                    Options::trocr_encoder_base_printed(),
-                    Options::trocr_decoder_base_printed(),
-                    Options::trocr_decoder_merged_base_printed(),
-                ),
-                TrOCRKind::HandWritten => (
-                    Options::trocr_encoder_base_handwritten(),
-                    Options::trocr_decoder_base_handwritten(),
-                    Options::trocr_decoder_merged_base_handwritten(),
-                ),
-            },
-            x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
-        };
+    let config = match args.scale.as_str().try_into()? {
+        Scale::S => match args.kind.as_str().try_into()? {
+            TrOCRKind::Printed => Config::trocr_small_printed(),
+            TrOCRKind::HandWritten => Config::trocr_small_handwritten(),
+        },
+        Scale::B => match args.kind.as_str().try_into()? {
+            TrOCRKind::Printed => Config::trocr_base_printed(),
+            TrOCRKind::HandWritten => Config::trocr_base_handwritten(),
+        },
+        x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
+    }
+    .with_device_all(args.device.as_str().try_into()?)
+    .with_dtype_all(args.dtype.as_str().try_into()?)
+    .commit()?;

-    let mut model = TrOCR::new(
-        options_encoder
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-        options_decoder
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-        options_decoder_merged
-            .with_model_device(args.device.as_str().try_into()?)
-            .with_model_dtype(args.dtype.as_str().try_into()?)
-            .with_batch_size(xs.len())
-            .commit()?,
-    )?;
+    let mut model = TrOCR::new(config)?;

    // inference
    let ys = model.forward(&xs)?;
--- a/examples/ultralytics-rtdetr/README.md
+++ b/examples/ultralytics-rtdetr/README.md
--- a/examples/ultralytics-rtdetr/main.rs
+++ b/examples/ultralytics-rtdetr/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options};
+use usls::{models::YOLO, Annotator, Config, DataLoader};

 #[derive(argh::FromArgs)]
 /// Example
@@ -22,7 +22,7 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build model
-    let config = Options::yolo_v8_rtdetr_l()
+    let config = Config::ultralytics_rtdetr_l()
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
@@ -41,7 +41,7 @@ fn main() -> Result<()> {
        annotator.annotate(x, y)?.save(format!(
            "{}.jpg",
            usls::Dir::Current
-                .base_dir_with_subs(&["runs", "YOLOv8-RT-DETR"])?
+                .base_dir_with_subs(&["runs", "ultralytics-RTDETR"])?
                .join(usls::timestamp(None))
                .display(),
        ))?;
--- a/examples/yolo-sam2/main.rs
+++ b/examples/yolo-sam2/main.rs
@@ -1,7 +1,7 @@
 use anyhow::Result;
 use usls::{
    models::{SamPrompt, SAM2, YOLO},
-    Annotator, DataLoader, Options, Scale, Style,
+    Annotator, Config, DataLoader, Scale, Style,
 };

 #[derive(argh::FromArgs)]
@@ -21,16 +21,12 @@ fn main() -> Result<()> {
    let args: Args = argh::from_env();

    // build SAM
-    let (options_encoder, options_decoder) = (
-        Options::sam2_1_tiny_encoder().commit()?,
-        Options::sam2_1_tiny_decoder().commit()?,
-    );
-    let mut sam = SAM2::new(options_encoder, options_decoder)?;
+    let mut sam = SAM2::new(Config::sam2_1_tiny().commit()?)?;

    // build YOLOv8
-    let options_yolo = Options::yolo_detect()
-        .with_model_scale(Scale::N)
-        .with_model_version(8.into())
+    let options_yolo = Config::yolo_detect()
+        .with_scale(Scale::N)
+        .with_version(8.into())
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
    let mut yolo = YOLO::new(options_yolo)?;
--- a/examples/yolo/README.md
+++ b/examples/yolo/README.md
@@ -27,34 +27,36 @@ cargo run -r --example yolo -- --task detect --ver v8 --num-classes 6 --model xx

 # Classify
 cargo run -r --example yolo -- --task classify --ver 5 --scale s --image-width 224 --image-height 224 --num-classes 1000 --use-imagenet-1k-classes # YOLOv5
-cargo run -r --example yolo -- --task classify --ver 8 --scale n --image-width 224 --image-height 224 # YOLOv8 
-cargo run -r --example yolo -- --task classify --ver 11 --scale n --image-width 224 --image-height 224  # YOLOv11 
+cargo run -r --example yolo -- --task classify --ver 8 --scale n --image-width 224 --image-height 224 --use-imagenet-1k-classes # YOLOv8 
+cargo run -r --example yolo -- --task classify --ver 11 --scale n --image-width 224 --image-height 224  # YOLO11 

 # Detect
-cargo run -r --example yolo -- --task detect --ver 5 --scale n --use-coco-80-classes  # YOLOv5 
-cargo run -r --example yolo -- --task detect --ver 6 --scale n --use-coco-80-classes  # YOLOv6
-cargo run -r --example yolo -- --task detect --ver 7 --scale t --use-coco-80-classes  # YOLOv7
-cargo run -r --example yolo -- --task detect --ver 8 --scale n --use-coco-80-classes  # YOLOv8
-cargo run -r --example yolo -- --task detect --ver 9 --scale t --use-coco-80-classes  # YOLOv9
-cargo run -r --example yolo -- --task detect --ver 10 --scale n --use-coco-80-classes  # YOLOv10
-cargo run -r --example yolo -- --task detect --ver 11 --scale n --use-coco-80-classes  # YOLOv11
-cargo run -r --example yolo -- --task detect --ver 8 --model v8-s-world-v2-shoes.onnx  # YOLOv8-world
+cargo run -r --example yolo -- --task detect --ver 5 --scale n --use-coco-80-classes --dtype fp16  	# YOLOv5 
+cargo run -r --example yolo -- --task detect --ver 6 --scale n --use-coco-80-classes --dtype fp16  	# YOLOv6
+cargo run -r --example yolo -- --task detect --ver 7 --scale t --use-coco-80-classes --dtype fp16  	# YOLOv7
+cargo run -r --example yolo -- --task detect --ver 8 --scale n --use-coco-80-classes --dtype fp16  	# YOLOv8
+cargo run -r --example yolo -- --task detect --ver 9 --scale t --use-coco-80-classes --dtype fp16  	# YOLOv9
+cargo run -r --example yolo -- --task detect --ver 10 --scale n --use-coco-80-classes --dtype fp16 	# YOLOv10
+cargo run -r --example yolo -- --task detect --ver 11 --scale n --use-coco-80-classes --dtype fp16 	# YOLO11
+cargo run -r --example yolo -- --task detect --ver 12 --scale n --use-coco-80-classes --dtype fp16 	# YOLOv12
+cargo run -r --example yolo -- --task detect --ver 8 --model v8-s-world-v2-shoes.onnx  				# YOLOv8-world

 # Pose
 cargo run -r --example yolo -- --task pose --ver 8 --scale n   # YOLOv8-Pose
 cargo run -r --example yolo -- --task pose --ver 11 --scale n  # YOLOv11-Pose

 # Segment
-cargo run -r --example yolo -- --task segment --ver 5 --scale n  # YOLOv5-Segment
-cargo run -r --example yolo -- --task segment --ver 8 --scale n  # YOLOv8-Segment
-cargo run -r --example yolo -- --task segment --ver 11 --scale n  # YOLOv8-Segment
+cargo run -r --example yolo -- --task segment --ver 5 --scale n --use-coco-80-classes --dtype fp16 		# YOLOv5-Segment
+cargo run -r --example yolo -- --task segment --ver 8 --scale n  --use-coco-80-classes --dtype fp16 	# YOLOv8-Segment
+cargo run -r --example yolo -- --task segment --ver 9 --scale c  --use-coco-80-classes --dtype fp16 	# YOLOv9-Segment
+cargo run -r --example yolo -- --task segment --ver 11 --scale n --use-coco-80-classes --dtype fp16 	# YOLO11-Segment

 # Obb
 cargo run -r --example yolo -- --ver 8 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png  # YOLOv8-Obb
 cargo run -r --example yolo -- --ver 11 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png  # YOLOv11-Obb
 ```

-**`cargo run -r --example yolo -- --help` for more options**
+**`cargo run -r --example yolo -- --help` for more config**

 ## Other YOLOv8 Solution Models

--- a/examples/yolo/main.rs
+++ b/examples/yolo/main.rs
@@ -1,25 +1,25 @@
 use anyhow::Result;
 use usls::{
-    models::YOLO, Annotator, DataLoader, Options, Style, NAMES_COCO_80, NAMES_COCO_KEYPOINTS_17,
+    models::YOLO, Annotator, Config, DataLoader, Style, NAMES_COCO_80, NAMES_COCO_KEYPOINTS_17,
    NAMES_IMAGENET_1K, SKELETON_COCO_19, SKELETON_COLOR_COCO_19,
 };

 #[derive(argh::FromArgs, Debug)]
-/// Example
+/// YOLO Example
 struct Args {
-    /// model file
+    /// model file(.onnx)
    #[argh(option)]
    model: Option<String>,

-    /// source
+    /// source: image, image folder, video stream
    #[argh(option, default = "String::from(\"./assets/bus.jpg\")")]
    source: String,

-    /// dtype
+    /// model dtype
    #[argh(option, default = "String::from(\"auto\")")]
    dtype: String,

-    /// task
+    /// task: det, seg, pose, classify, obb
    #[argh(option, default = "String::from(\"det\")")]
    task: String,

@@ -27,101 +27,101 @@ struct Args {
    #[argh(option, default = "8.0")]
    ver: f32,

-    /// device
+    /// device: cuda, cpu, mps
    #[argh(option, default = "String::from(\"cpu:0\")")]
    device: String,

-    /// scale
+    /// scale: n, s, m, l, x
    #[argh(option, default = "String::from(\"n\")")]
    scale: String,

-    /// trt_fp16
+    /// enable TensorRT FP16
    #[argh(option, default = "true")]
    trt_fp16: bool,

-    /// batch_size
+    /// batch size
    #[argh(option, default = "1")]
    batch_size: usize,

-    /// min_batch_size
+    /// bin batch size: For TensorRT
    #[argh(option, default = "1")]
    min_batch_size: usize,

-    /// max_batch_size
+    /// max Batch size: For TensorRT
    #[argh(option, default = "4")]
    max_batch_size: usize,

-    /// min_image_width
+    /// min image width: For TensorRT
    #[argh(option, default = "224")]
    min_image_width: isize,

-    /// image_width
+    /// image width: For TensorRT
    #[argh(option, default = "640")]
    image_width: isize,

-    /// max_image_width
+    /// max image width: For TensorRT
    #[argh(option, default = "1280")]
    max_image_width: isize,

-    /// min_image_height
+    /// min image height: For TensorRT
    #[argh(option, default = "224")]
    min_image_height: isize,

-    /// image_height
+    /// image height: For TensorRT
    #[argh(option, default = "640")]
    image_height: isize,

-    /// max_image_height
+    /// max image height: For TensorRT
    #[argh(option, default = "1280")]
    max_image_height: isize,

-    /// num_classes
+    /// num classes
    #[argh(option)]
    num_classes: Option<usize>,

-    /// num_keypoints
+    /// num keypoints
    #[argh(option)]
    num_keypoints: Option<usize>,

-    /// use_coco_80_classes
-    #[argh(switch)]
-    use_coco_80_classes: bool,
-
-    /// use_coco_17_keypoints_classes
-    #[argh(switch)]
-    use_coco_17_keypoints_classes: bool,
-
-    /// use_imagenet_1k_classes
-    #[argh(switch)]
-    use_imagenet_1k_classes: bool,
-
-    /// confs
-    #[argh(option)]
-    confs: Vec<f32>,
-
-    /// keypoint_confs
-    #[argh(option)]
-    keypoint_confs: Vec<f32>,
-
-    /// exclude_classes
-    #[argh(option)]
-    exclude_classes: Vec<usize>,
-
-    /// retain_classes
-    #[argh(option)]
-    retain_classes: Vec<usize>,
-
-    /// class_names
+    /// class names
    #[argh(option)]
    class_names: Vec<String>,

-    /// keypoint_names
+    /// keypoint names
    #[argh(option)]
    keypoint_names: Vec<String>,

-    /// topk
+    /// top-k
    #[argh(option, default = "5")]
    topk: usize,
+
+    /// use COCO 80 classes
+    #[argh(switch)]
+    use_coco_80_classes: bool,
+
+    /// use COCO 17 keypoints classes
+    #[argh(switch)]
+    use_coco_17_keypoints_classes: bool,
+
+    /// use ImageNet 1K classes
+    #[argh(switch)]
+    use_imagenet_1k_classes: bool,
+
+    /// confidences
+    #[argh(option)]
+    confs: Vec<f32>,
+
+    /// keypoint nonfidences
+    #[argh(option)]
+    keypoint_confs: Vec<f32>,
+
+    /// exclude nlasses
+    #[argh(option)]
+    exclude_classes: Vec<usize>,
+
+    /// retain classes
+    #[argh(option)]
+    retain_classes: Vec<usize>,
 }

 fn main() -> Result<()> {
@@ -129,17 +129,15 @@ fn main() -> Result<()> {
        .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
        .init();
-
    let args: Args = argh::from_env();
-
-    let mut options = Options::yolo()
+    let mut config = Config::yolo()
        .with_model_file(&args.model.unwrap_or_default())
-        .with_model_task(args.task.as_str().try_into()?)
-        .with_model_version(args.ver.try_into()?)
-        .with_model_scale(args.scale.as_str().try_into()?)
+        .with_task(args.task.as_str().try_into()?)
+        .with_version(args.ver.try_into()?)
+        .with_scale(args.scale.as_str().try_into()?)
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
-        .with_trt_fp16(args.trt_fp16)
+        .with_model_trt_fp16(args.trt_fp16)
        .with_model_ixx(
            0,
            0,
@@ -172,30 +170,25 @@ fn main() -> Result<()> {
        })
        .with_topk(args.topk)
        .retain_classes(&args.retain_classes)
-        .exclude_classes(&args.exclude_classes);
-
+        .exclude_classes(&args.exclude_classes)
+        .with_model_num_dry_run(2);
    if args.use_coco_80_classes {
-        options = options.with_class_names(&NAMES_COCO_80);
+        config = config.with_class_names(&NAMES_COCO_80);
    }
-
    if args.use_coco_17_keypoints_classes {
-        options = options.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17);
+        config = config.with_keypoint_names(&NAMES_COCO_KEYPOINTS_17);
    }
-
    if args.use_imagenet_1k_classes {
-        options = options.with_class_names(&NAMES_IMAGENET_1K);
+        config = config.with_class_names(&NAMES_IMAGENET_1K);
    }
-
    if let Some(nc) = args.num_classes {
-        options = options.with_nc(nc);
+        config = config.with_nc(nc);
    }
-
    if let Some(nk) = args.num_keypoints {
-        options = options.with_nk(nk);
+        config = config.with_nk(nk);
    }
-
    if !args.class_names.is_empty() {
-        options = options.with_class_names(
+        config = config.with_class_names(
            &args
                .class_names
                .iter()
@@ -203,9 +196,8 @@ fn main() -> Result<()> {
                .collect::<Vec<_>>(),
        );
    }
-
    if !args.keypoint_names.is_empty() {
-        options = options.with_keypoint_names(
+        config = config.with_keypoint_names(
            &args
                .keypoint_names
                .iter()
@@ -215,7 +207,7 @@ fn main() -> Result<()> {
    }

    // build model
-    let mut model = YOLO::try_from(options.commit()?)?;
+    let mut model = YOLO::new(config.commit()?)?;

    // build dataloader
    let dl = DataLoader::new(&args.source)?
@@ -255,6 +247,7 @@ fn main() -> Result<()> {
        }
    }

+    // summary
    model.summary();

    Ok(())
--- a/examples/yoloe/README.md
+++ b/examples/yoloe/README.md
@@ -1,6 +1,6 @@
 ## Quick Start

 ```shell
-cargo run -r --example yoloe 
+cargo run -r -F cuda --example yoloe -- --device cuda
 ```

--- a/examples/yoloe/main.rs
+++ b/examples/yoloe/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::YOLO, Annotator, DataLoader, Options, Style};
+use usls::{models::YOLO, Annotator, Config, DataLoader, Style};

 #[derive(argh::FromArgs)]
 /// Example
@@ -21,8 +21,8 @@ fn main() -> Result<()> {

    let args: Args = argh::from_env();

-    // options
-    let options = Options::yoloe_v8s_seg_pf()
+    // config
+    let config = Config::yoloe_v8s_seg_pf()
        // yoloe_v8m_seg_pf()
        // yoloe_v8l_seg_pf()
        // yoloe_11s_seg_pf()
@@ -31,7 +31,7 @@ fn main() -> Result<()> {
        .with_model_dtype(args.dtype.as_str().try_into()?)
        .with_model_device(args.device.as_str().try_into()?)
        .commit()?;
-    let mut model = YOLO::new(options)?;
+    let mut model = YOLO::new(config)?;

    // load
    let xs = DataLoader::try_read_n(&["./assets/bus.jpg"])?;
--- a/examples/yolop/main.rs
+++ b/examples/yolop/main.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use usls::{models::YOLOPv2, Annotator, DataLoader, Options};
+use usls::{models::YOLOPv2, Annotator, Config, DataLoader};

 fn main() -> Result<()> {
    tracing_subscriber::fmt()
@@ -8,8 +8,7 @@ fn main() -> Result<()> {
        .init();

    // build model
-    let options = Options::yolop_v2_480x800().commit()?;
-    let mut model = YOLOPv2::new(options)?;
+    let mut model = YOLOPv2::new(Config::yolop_v2_480x800().commit()?)?;

    // load image
    let xs = DataLoader::try_read_n(&["images/car-view.jpg"])?;
--- a/src/inference/engine.rs
+++ b/src/inference/engine.rs
@@ -13,8 +13,8 @@ use prost::Message;
 use std::collections::HashSet;

 use crate::{
-    build_progress_bar, elapsed, human_bytes_binary, onnx, DType, Device, Iiix, MinOptMax, Ops, Ts,
-    Xs, PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_FINISH, X,
+    build_progress_bar, elapsed, human_bytes_binary, onnx, DType, Device, Iiix, MinOptMax,
+    ORTConfig, Ops, Ts, Xs, PROGRESS_BAR_STYLE_CYAN_2, PROGRESS_BAR_STYLE_FINISH, X,
 };

 impl From<TensorElementType> for DType {
@@ -93,6 +93,20 @@ impl Default for Engine {
 }

 impl Engine {
+    pub fn try_from_config(config: &ORTConfig) -> Result<Self> {
+        Self {
+            file: config.file.clone(),
+            spec: config.spec.clone(),
+            iiixs: config.iiixs.clone(),
+            device: config.device,
+            trt_fp16: config.trt_fp16,
+            num_dry_run: config.num_dry_run,
+            graph_opt_level: config.graph_opt_level,
+            ..Default::default()
+        }
+        .build()
+    }
+
    pub fn build(mut self) -> Result<Self> {
        let name = format!("[{}] ort_initialization", self.spec);
        elapsed!(&name, self.ts, {
--- a/src/inference/hbb.rs
+++ b/src/inference/hbb.rs
@@ -17,7 +17,9 @@ impl std::fmt::Debug for Hbb {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Hbb")
            .field("xyxy", &[self.x, self.y, self.xmax(), self.ymax()])
-            .field("meta", &self.meta)
+            .field("id", &self.meta.id())
+            .field("name", &self.meta.name())
+            .field("confidence", &self.meta.confidence())
            .finish()
    }
 }
--- a/src/inference/image.rs
+++ b/src/inference/image.rs
@@ -308,12 +308,12 @@ impl Image {
            ));
        }

-        let (mut resizer, options) = build_resizer_filter(filter)?;
+        let (mut resizer, config) = build_resizer_filter(filter)?;
        let x: DynamicImage = self.to_dyn();

        if let ResizeMode::FitExact = mode {
            let mut dst = FImage::new(tw, th, PixelType::U8x3);
-            resizer.resize(&x, &mut dst, &options)?;
+            resizer.resize(&x, &mut dst, &config)?;
            trans_info = trans_info
                .with_height_scale(th as f32 / h0 as f32)
                .with_width_scale(tw as f32 / w0 as f32);
@@ -362,7 +362,7 @@ impl Image {
            };

            let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
-            resizer.resize(&x, &mut dst_cropped, &options)?;
+            resizer.resize(&x, &mut dst_cropped, &config)?;

            Ok((Self::from_u8s(&dst.into_vec(), tw, th)?, trans_info))
        }
--- a/src/inference/keypoint.rs
+++ b/src/inference/keypoint.rs
@@ -22,7 +22,6 @@ impl std::fmt::Debug for Keypoint {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Keypoint")
            .field("xy", &[self.x, self.y])
-            .field("uid", &self.meta.uid())
            .field("id", &self.meta.id())
            .field("name", &self.meta.name())
            .field("confidence", &self.meta.confidence())
--- a/src/inference/mask.rs
+++ b/src/inference/mask.rs
@@ -20,7 +20,6 @@ impl std::fmt::Debug for Mask {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Mask")
            .field("dimensions", &self.dimensions())
-            .field("uid", &self.meta.uid())
            .field("id", &self.meta.id())
            .field("name", &self.meta.name())
            .field("confidence", &self.meta.confidence())
--- a/src/inference/obb.rs
+++ b/src/inference/obb.rs
@@ -13,7 +13,7 @@ pub struct Obb {
 impl std::fmt::Debug for Obb {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Obb")
-            .field("uid", &self.meta.uid())
+            .field("vertices", &self.vertices)
            .field("id", &self.meta.id())
            .field("name", &self.meta.name())
            .field("confidence", &self.meta.confidence())
--- a/src/inference/polygon.rs
+++ b/src/inference/polygon.rs
@@ -27,8 +27,7 @@ impl Default for Polygon {
 impl std::fmt::Debug for Polygon {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Polygon")
-            .field("count", &self.count())
-            .field("uid", &self.meta.uid())
+            .field("n_points", &self.count())
            .field("id", &self.meta.id())
            .field("name", &self.meta.name())
            .field("confidence", &self.meta.confidence())
--- a/src/inference/prob.rs
+++ b/src/inference/prob.rs
@@ -31,7 +31,7 @@ impl Prob {
                    .with_confidence(confidence);

                if let Some(names) = names {
-                    if id < names.len() {
+                    if !names.is_empty() {
                        meta = meta.with_name(names[id]);
                    }
                }
--- a/src/io/dataloader.rs
+++ b/src/io/dataloader.rs
@@ -367,14 +367,14 @@ impl DataLoader {
    fn load_image_paths_from_folder(source: &str, exts: &[&str]) -> Result<Vec<PathBuf>> {
        let source_path = Path::new(source);
        let mut paths: Vec<PathBuf> = Vec::new();
-        let options = MatchOptions {
+        let config = MatchOptions {
            case_sensitive: false,
            require_literal_separator: false,
            require_literal_leading_dot: false,
        };
        for ext in exts.iter() {
            let pattern = source_path.join(format!("*.{}", ext));
-            let paths_: Vec<PathBuf> = glob_with(pattern.to_str().unwrap(), options)?
+            let paths_: Vec<PathBuf> = glob_with(pattern.to_str().unwrap(), config)?
                .filter_map(|entry| entry.ok())
                .collect();
            paths.extend(paths_);
@@ -393,12 +393,12 @@ impl DataLoader {
    }

    fn glob(pattern: &str, sort: bool, case_sensitive: bool) -> anyhow::Result<Vec<PathBuf>> {
-        let options = MatchOptions {
+        let config = MatchOptions {
            case_sensitive,
            require_literal_separator: false,
            require_literal_leading_dot: false,
        };
-        let mut paths: Vec<PathBuf> = glob_with(pattern, options)?
+        let mut paths: Vec<PathBuf> = glob_with(pattern, config)?
            .filter_map(|entry| entry.ok())
            .collect();

@@ -479,7 +479,7 @@ impl DataLoader {
        self
    }

-    pub fn with_batch_size(mut self, x: usize) -> Self {
+    pub fn with_batch_size_all(mut self, x: usize) -> Self {
        self.batch_size = x;
        self
    }
--- a/src/models/beit/config.rs
+++ b/src/models/beit/config.rs
@@ -1,10 +1,8 @@
-use crate::NAMES_IMAGENET_1K;
-
 /// Model configuration for `BEiT`
-impl crate::Options {
+impl crate::Config {
    pub fn beit() -> Self {
        Self::default()
-            .with_model_name("beit")
+            .with_name("beit")
            .with_model_ixx(0, 0, 1.into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 224.into())
@@ -13,7 +11,7 @@ impl crate::Options {
            .with_image_std(&[0.5, 0.5, 0.5])
            .with_normalize(true)
            .with_apply_softmax(true)
-            .with_class_names(&NAMES_IMAGENET_1K)
+            .with_class_names(&crate::NAMES_IMAGENET_1K)
    }

    pub fn beit_base() -> Self {
--- a/src/models/ben2/config.rs
+++ b/src/models/ben2/config.rs
@@ -1,5 +1,5 @@
 /// Model configuration for `BEN2`
-impl crate::Options {
+impl crate::Config {
    pub fn ben2_base() -> Self {
        Self::rmbg().with_model_file("ben2-base.onnx")
    }
--- a/src/models/blip/config.rs
+++ b/src/models/blip/config.rs
@@ -1,34 +1,24 @@
 /// Model configuration for `BLIP`
-impl crate::Options {
-    pub fn blip() -> Self {
-        Self::default().with_model_name("blip").with_batch_size(1)
-    }
-
+impl crate::Config {
    #[allow(clippy::excessive_precision)]
-    pub fn blip_visual() -> Self {
-        Self::blip()
-            .with_model_kind(crate::Kind::Vision)
-            .with_model_ixx(0, 2, 384.into())
-            .with_model_ixx(0, 3, 384.into())
+    pub fn blip() -> Self {
+        Self::default()
+            .with_name("blip")
+            .with_batch_size_all(1)
+            .with_visual_ixx(0, 1, 3.into())
+            .with_visual_ixx(0, 2, 384.into())
+            .with_visual_ixx(0, 3, 384.into())
            .with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
            .with_image_std(&[0.26862954, 0.26130258, 0.27577711])
-            .with_resize_filter("Bilinear")
-            .with_normalize(true)
    }

-    pub fn blip_textual() -> Self {
-        Self::blip().with_model_kind(crate::Kind::Language)
-    }
-
-    pub fn blip_v1_base_caption_visual() -> Self {
-        Self::blip_visual()
-            .with_model_version(1.into())
-            .with_model_file("v1-base-caption-visual.onnx")
-    }
-
-    pub fn blip_v1_base_caption_textual() -> Self {
-        Self::blip_textual()
-            .with_model_version(1.into())
-            .with_model_file("v1-base-caption-textual.onnx")
+    pub fn blip_v1_base_caption() -> Self {
+        Self::blip()
+            .with_version(1.into())
+            .with_visual_file("v1-base-caption-visual.onnx")
+            .with_textual_file("v1-base-caption-textual.onnx")
+            .with_tokenizer_file("blip/tokenizer.json")
+            .with_tokenizer_config_file("blip/tokenizer_config.json")
+            .with_special_tokens_map_file("blip/special_tokens_map.json")
    }
 }
--- a/src/models/blip/impl.rs
+++ b/src/models/blip/impl.rs
@@ -2,26 +2,34 @@ use aksr::Builder;
 use anyhow::Result;
 use ndarray::{s, Axis};

-use crate::{
-    elapsed,
-    models::{BaseModelTextual, BaseModelVisual},
-    Image, LogitsSampler, Options, Ts, Xs, X, Y,
-};
+use crate::{elapsed, Config, Engine, Image, LogitsSampler, Processor, Ts, Xs, X, Y};

 #[derive(Debug, Builder)]
 pub struct Blip {
-    visual: BaseModelVisual,
-    textual: BaseModelTextual,
-    ts: Ts,
+    visual: Engine,
+    textual: Engine,
+    batch: usize,
+    height: usize,
+    width: usize,
+    processor: Processor,
    max_length: usize,
    eos_token_id: u32,
+    ts: Ts,
 }

 impl Blip {
-    pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
-        let visual = BaseModelVisual::new(options_visual)?;
-        let textual = BaseModelTextual::new(options_textual)?;
-        let ts = Ts::merge(&[visual.engine().ts(), textual.engine().ts()]);
+    pub fn new(config: Config) -> Result<Self> {
+        let visual = Engine::try_from_config(&config.visual)?;
+        let textual = Engine::try_from_config(&config.textual)?;
+        let (batch, height, width) = (
+            visual.batch().opt(),
+            visual.try_height().unwrap_or(&384.into()).opt(),
+            visual.try_width().unwrap_or(&384.into()).opt(),
+        );
+        let ts = Ts::merge(&[visual.ts(), textual.ts()]);
+        let processor = Processor::try_from_config(&config.processor)?
+            .with_image_width(width as _)
+            .with_image_height(height as _);
        let max_length = 512;
        let eos_token_id = 102;

@@ -31,17 +39,24 @@ impl Blip {
            ts,
            max_length,
            eos_token_id,
+            batch,
+            height,
+            width,
+            processor,
        })
    }

    pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
-        self.visual.encode(xs)
+        let ys = self.processor.process_images(xs)?;
+        self.batch = xs.len(); // update
+        let ys = self.visual.run(ys.into())?;
+
+        Ok(ys[0].to_owned())
    }

    pub fn encode_texts(&mut self, text: Option<&str>) -> Result<Vec<Vec<f32>>> {
        let input_ids = self
-            .textual
-            .processor()
+            .processor
            .encode_text_ids(text.unwrap_or_default(), false)?;
        Ok(vec![input_ids.clone(); self.batch()])
    }
@@ -70,11 +85,11 @@ impl Blip {
            let input_ids_attn_mask = X::ones(input_ids_nd.dims());

            // decode
-            let outputs = self.textual.inference(Xs::from(vec![
+            let outputs = self.textual.run(Xs::from(vec![
                input_ids_nd,
                input_ids_attn_mask,
                image_embeds.clone(),
-                X::ones(&[self.visual().batch(), image_embeds.dims()[1]]), // image_embeds_attn_mask
+                X::ones(&[self.batch(), image_embeds.dims()[1]]),
            ]))?;

            // decode each token for each batch
@@ -102,7 +117,7 @@ impl Blip {
        }

        // batch decode
-        let texts = self.textual.processor().decode_tokens_batch(
+        let texts = self.processor.decode_tokens_batch(
            &token_ids
                .into_iter()
                .map(|v| v.into_iter().map(|x| x as u32).collect::<Vec<_>>())
@@ -114,7 +129,6 @@ impl Blip {
            .into_iter()
            .map(|x| Y::default().with_texts(&[&x]))
            .collect::<Vec<_>>();
-        // .into();

        Ok(ys)
    }
@@ -122,8 +136,4 @@ impl Blip {
    pub fn summary(&mut self) {
        self.ts.summary();
    }
-
-    pub fn batch(&self) -> usize {
-        self.visual.batch() as _
-    }
 }
--- a/src/models/clip/config.rs
+++ b/src/models/clip/config.rs
@@ -1,71 +1,57 @@
-use crate::Kind;
-
 /// Model configuration for `CLIP`
-impl crate::Options {
+impl crate::Config {
    pub fn clip() -> Self {
        Self::default()
-            .with_model_name("clip")
-            .with_model_ixx(0, 0, 1.into())
-    }
-
-    pub fn clip_visual() -> Self {
-        Self::clip()
-            .with_model_kind(Kind::Vision)
-            .with_model_ixx(0, 2, 224.into())
-            .with_model_ixx(0, 3, 224.into())
+            .with_name("clip")
+            .with_batch_size_all(1)
+            .with_visual_ixx(0, 1, 3.into())
+            .with_visual_ixx(0, 2, 224.into())
+            .with_visual_ixx(0, 3, 224.into())
            .with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
            .with_image_std(&[0.26862954, 0.2613026, 0.2757771])
-    }
-
-    pub fn clip_textual() -> Self {
-        Self::clip()
-            .with_model_kind(Kind::Language)
            .with_model_max_length(77)
+            .with_tokenizer_file("clip/tokenizer.json")
+            .with_tokenizer_config_file("clip/tokenizer_config.json")
+            .with_special_tokens_map_file("clip/special_tokens_map.json")
+            .with_config_file("clip/config.json")
    }

-    pub fn clip_vit_b16_visual() -> Self {
-        Self::clip_visual().with_model_file("vit-b16-visual.onnx")
+    pub fn clip_vit_b16() -> Self {
+        Self::clip()
+            .with_visual_file("vit-b16-visual.onnx")
+            .with_textual_file("vit-b16-textual.onnx")
    }

-    pub fn clip_vit_b16_textual() -> Self {
-        Self::clip_textual().with_model_file("vit-b16-textual.onnx")
+    pub fn clip_vit_b32() -> Self {
+        Self::clip()
+            .with_visual_file("vit-b32-visual.onnx")
+            .with_textual_file("vit-b32-textual.onnx")
    }

-    pub fn clip_vit_b32_visual() -> Self {
-        Self::clip_visual().with_model_file("vit-b32-visual.onnx")
+    pub fn clip_vit_l14() -> Self {
+        Self::clip()
+            .with_visual_file("vit-l14-visual.onnx")
+            .with_textual_file("vit-l14-textual.onnx")
    }

-    pub fn clip_vit_b32_textual() -> Self {
-        Self::clip_textual().with_model_file("vit-b32-textual.onnx")
-    }
-
-    pub fn clip_vit_l14_visual() -> Self {
-        Self::clip_visual().with_model_file("vit-l14-visual.onnx")
-    }
-
-    pub fn clip_vit_l14_textual() -> Self {
-        Self::clip_textual().with_model_file("vit-l14-textual.onnx")
+    pub fn jina_clip() -> Self {
+        Self::default()
+            .with_name("jina-clip-v1")
+            .with_batch_size_all(1)
+            .with_visual_ixx(0, 1, 3.into())
+            .with_visual_ixx(0, 2, 224.into())
+            .with_visual_ixx(0, 3, 224.into())
+            .with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
+            .with_image_std(&[0.26862954, 0.2613026, 0.2757771])
+            .with_tokenizer_file("jina-clip-v1/tokenizer.json")
+            .with_tokenizer_config_file("jina-clip-v1/tokenizer_config.json")
+            .with_special_tokens_map_file("jina-clip-v1/special_tokens_map.json")
+            .with_config_file("jina-clip-v1/config.json")
    }

    pub fn jina_clip_v1() -> Self {
-        Self::default()
-            .with_model_name("jina-clip-v1")
-            .with_model_ixx(0, 0, 1.into())
-    }
-
-    pub fn jina_clip_v1_visual() -> Self {
-        Self::jina_clip_v1()
-            .with_model_kind(Kind::Vision)
-            .with_model_ixx(0, 2, 224.into())
-            .with_model_ixx(0, 3, 224.into())
-            .with_image_mean(&[0.48145466, 0.4578275, 0.40821073])
-            .with_image_std(&[0.26862954, 0.2613026, 0.2757771])
-            .with_model_file("visual.onnx")
-    }
-
-    pub fn jina_clip_v1_textual() -> Self {
-        Self::jina_clip_v1()
-            .with_model_kind(Kind::Language)
-            .with_model_file("textual.onnx")
+        Self::jina_clip()
+            .with_visual_file("visual.onnx")
+            .with_textual_file("textual.onnx")
    }
 }
--- a/src/models/clip/impl.rs
+++ b/src/models/clip/impl.rs
@@ -2,11 +2,12 @@ use aksr::Builder;
 use anyhow::Result;
 use ndarray::Array2;

-use crate::{elapsed, Engine, Image, Options, Processor, Ts, Xs, X};
+use crate::{elapsed, Config, Engine, Image, Processor, Ts, X};

 #[derive(Debug, Builder)]
-pub struct ClipVisual {
-    engine: Engine,
+pub struct Clip {
+    visual: Engine,
+    textual: Engine,
    height: usize,
    width: usize,
    batch: usize,
@@ -14,22 +15,23 @@ pub struct ClipVisual {
    ts: Ts,
 }

-impl ClipVisual {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
-        let (batch, height, width, ts) = (
-            engine.batch().opt(),
-            engine.try_height().unwrap_or(&224.into()).opt(),
-            engine.try_width().unwrap_or(&224.into()).opt(),
-            engine.ts.clone(),
+impl Clip {
+    pub fn new(config: Config) -> Result<Self> {
+        let visual = Engine::try_from_config(&config.visual)?;
+        let textual = Engine::try_from_config(&config.textual)?;
+        let (batch, height, width) = (
+            visual.batch().opt(),
+            visual.try_height().unwrap_or(&224.into()).opt(),
+            visual.try_width().unwrap_or(&224.into()).opt(),
        );
-        let processor = options
-            .to_processor()?
+        let ts = Ts::merge(&[visual.ts(), textual.ts()]);
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);

        Ok(Self {
-            engine,
+            textual,
+            visual,
            height,
            width,
            batch,
@@ -38,111 +40,39 @@ impl ClipVisual {
        })
    }

-    pub fn preprocess(&mut self, xs: &[Image]) -> Result<Xs> {
-        let x = self.processor.process_images(xs)?;
-
-        Ok(x.into())
-    }
-
-    pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
-        self.engine.run(xs)
-    }
-
    pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
-        let xs = elapsed!("visual-preprocess", self.ts, { self.preprocess(xs)? });
-        let xs = elapsed!("visual-inference", self.ts, { self.inference(xs)? });
+        let xs = elapsed!("visual-preprocess", self.ts, {
+            self.processor.process_images(xs)?
+        });
+        let xs = elapsed!("visual-inference", self.ts, { self.visual.run(xs.into())? });
        let x = elapsed!("visual-postprocess", self.ts, { xs[0].to_owned() });

        Ok(x)
    }
-}
-
-#[derive(Debug, Builder)]
-pub struct ClipTextual {
-    engine: Engine,
-    batch: usize,
-    processor: Processor,
-    ts: Ts,
-}
-
-impl ClipTextual {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
-        let (batch, ts) = (engine.batch().opt(), engine.ts.clone());
-        let processor = options.to_processor()?;
-
-        Ok(Self {
-            engine,
-            batch,
-            processor,
-            ts,
-        })
-    }
-
-    pub fn preprocess(&self, xs: &[&str]) -> Result<Xs> {
-        let encodings: Vec<f32> = self
-            .processor
-            .encode_texts_ids(xs, false)? // skip_special_tokens
-            .into_iter()
-            .flatten()
-            .collect();
-
-        let x: X = Array2::from_shape_vec((xs.len(), encodings.len() / xs.len()), encodings)?
-            .into_dyn()
-            .into();
-
-        Ok(x.into())
-    }
-
-    pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
-        self.engine.run(xs)
-    }

    pub fn encode_texts(&mut self, xs: &[&str]) -> Result<X> {
-        let xs = elapsed!("textual-preprocess", self.ts, { self.preprocess(xs)? });
-        let xs = elapsed!("textual-inference", self.ts, { self.inference(xs)? });
+        let xs = elapsed!("textual-preprocess", self.ts, {
+            let encodings: Vec<f32> = self
+                .processor
+                .encode_texts_ids(xs, false)?
+                .into_iter()
+                .flatten()
+                .collect();
+
+            let x: X = Array2::from_shape_vec((xs.len(), encodings.len() / xs.len()), encodings)?
+                .into_dyn()
+                .into();
+            x
+        });
+        let xs = elapsed!("textual-inference", self.ts, {
+            self.textual.run(xs.into())?
+        });
        let x = elapsed!("textual-postprocess", self.ts, { xs[0].to_owned() });

        Ok(x)
    }
-}
-
-#[derive(Debug, Builder)]
-pub struct Clip {
-    textual: ClipTextual,
-    visual: ClipVisual,
-    ts: Ts,
-}
-
-impl Clip {
-    pub fn new(options_visual: Options, options_textual: Options) -> Result<Self> {
-        let visual = ClipVisual::new(options_visual)?;
-        let textual = ClipTextual::new(options_textual)?;
-        // let ts = Ts::merge(&[visual.engine().ts(), textual.engine().ts()]);
-        let ts = Ts::default();
-
-        Ok(Self {
-            textual,
-            visual,
-            ts,
-        })
-    }
-
-    pub fn encode_images(&mut self, xs: &[Image]) -> Result<X> {
-        let x = elapsed!("encode_images", self.ts, { self.visual.encode_images(xs)? });
-        Ok(x)
-    }
-
-    pub fn encode_texts(&mut self, xs: &[&str]) -> Result<X> {
-        let x = elapsed!("encode_texts", self.ts, { self.textual.encode_texts(xs)? });
-        Ok(x)
-    }

    pub fn summary(&mut self) {
-        // self.ts.clear();
-        // self.ts = Ts::merge(&[&self.ts, self.visual.ts(), self.textual.ts()]);
        self.ts.summary();
-        self.visual.ts().summary();
-        self.textual.ts().summary();
    }
 }
--- a/src/models/convnext/config.rs
+++ b/src/models/convnext/config.rs
@@ -1,10 +1,10 @@
 use crate::NAMES_IMAGENET_1K;

 /// Model configuration for `ConvNeXt`
-impl crate::Options {
+impl crate::Config {
    pub fn convnext() -> Self {
        Self::default()
-            .with_model_name("convnext")
+            .with_name("convnext")
            .with_model_ixx(0, 0, 1.into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 224.into())
@@ -13,6 +13,7 @@ impl crate::Options {
            .with_image_std(&[0.229, 0.224, 0.225])
            .with_normalize(true)
            .with_apply_softmax(true)
+            .with_topk(5)
            .with_class_names(&NAMES_IMAGENET_1K)
    }

--- a/src/models/d_fine/config.rs
+++ b/src/models/d_fine/config.rs
@@ -1,7 +1,7 @@
 /// Model configuration for `d_fine`
-impl crate::Options {
+impl crate::Config {
    pub fn d_fine() -> Self {
-        Self::rtdetr().with_model_name("d-fine")
+        Self::rtdetr().with_name("d-fine")
    }

    pub fn d_fine_n_coco() -> Self {
--- a/src/models/db/config.rs
+++ b/src/models/db/config.rs
@@ -1,8 +1,8 @@
 /// Model configuration for [DB](https://github.com/MhLiao/DB) and [PaddleOCR-Det](https://github.com/PaddlePaddle/PaddleOCR)
-impl crate::Options {
+impl crate::Config {
    pub fn db() -> Self {
        Self::default()
-            .with_model_name("db")
+            .with_name("db")
            .with_model_ixx(0, 0, (1, 1, 8).into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, (608, 960, 1600).into())
@@ -11,7 +11,7 @@ impl crate::Options {
            .with_normalize(true)
            .with_image_mean(&[0.485, 0.456, 0.406])
            .with_image_std(&[0.229, 0.224, 0.225])
-            .with_binary_thresh(0.2)
+            .with_db_binary_thresh(0.2)
            .with_class_confs(&[0.35])
            .with_min_width(5.0)
            .with_min_height(12.0)
--- a/src/models/db/impl.rs
+++ b/src/models/db/impl.rs
@@ -4,7 +4,7 @@ use ndarray::Axis;
 use rayon::prelude::*;

 use crate::{
-    elapsed, DynConf, Engine, Hbb, Image, Mask, Obb, Ops, Options, Polygon, Processor, Ts, Xs, Y,
+    elapsed, Config, DynConf, Engine, Hbb, Image, Mask, Obb, Ops, Polygon, Processor, Ts, Xs, Y,
 };

 #[derive(Debug, Builder)]
@@ -24,8 +24,8 @@ pub struct DB {
 }

 impl DB {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let (batch, height, width, ts, spec) = (
            engine.batch().opt(),
            engine.try_height().unwrap_or(&960.into()).opt(),
@@ -33,15 +33,14 @@ impl DB {
            engine.ts.clone(),
            engine.spec().to_owned(),
        );
-        let processor = options
-            .to_processor()?
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);
-        let confs = DynConf::new(options.class_confs(), 1);
-        let binary_thresh = options.binary_thresh().unwrap_or(0.2);
-        let unclip_ratio = options.unclip_ratio().unwrap_or(1.5);
-        let min_width = options.min_width().unwrap_or(12.0);
-        let min_height = options.min_height().unwrap_or(5.0);
+        let confs = DynConf::new(config.class_confs(), 1);
+        let binary_thresh = config.db_binary_thresh().unwrap_or(0.2);
+        let unclip_ratio = config.db_unclip_ratio().unwrap_or(1.5);
+        let min_width = config.min_width().unwrap_or(12.0);
+        let min_height = config.min_height().unwrap_or(5.0);

        Ok(Self {
            engine,
--- a/src/models/deim/config.rs
+++ b/src/models/deim/config.rs
@@ -1,7 +1,7 @@
 /// Model configuration for `DEIM`
-impl crate::Options {
+impl crate::Config {
    pub fn deim() -> Self {
-        Self::d_fine().with_model_name("deim")
+        Self::d_fine().with_name("deim")
    }

    pub fn deim_dfine_s_coco() -> Self {
--- a/src/models/deit/config.rs
+++ b/src/models/deit/config.rs
@@ -1,10 +1,10 @@
 use crate::NAMES_IMAGENET_1K;

 /// Model configuration for `DeiT`
-impl crate::Options {
+impl crate::Config {
    pub fn deit() -> Self {
        Self::default()
-            .with_model_name("deit")
+            .with_name("deit")
            .with_model_ixx(0, 0, 1.into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 224.into())
--- a/src/models/depth_anything/config.rs
+++ b/src/models/depth_anything/config.rs
@@ -1,8 +1,8 @@
 /// Model configuration for `DepthAnything`
-impl crate::Options {
+impl crate::Config {
    pub fn depth_anything() -> Self {
        Self::default()
-            .with_model_name("depth-anything")
+            .with_name("depth-anything")
            .with_model_ixx(0, 0, 1.into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, (384, 518, 1024).into())
@@ -14,26 +14,26 @@ impl crate::Options {
    }

    pub fn depth_anything_s() -> Self {
-        Self::depth_anything().with_model_scale(crate::Scale::S)
+        Self::depth_anything().with_scale(crate::Scale::S)
    }

    pub fn depth_anything_v1() -> Self {
-        Self::depth_anything().with_model_version(1.into())
+        Self::depth_anything().with_version(1.into())
    }

    pub fn depth_anything_v2() -> Self {
-        Self::depth_anything().with_model_version(2.into())
+        Self::depth_anything().with_version(2.into())
    }

    pub fn depth_anything_v1_small() -> Self {
        Self::depth_anything_v1()
-            .with_model_scale(crate::Scale::S)
+            .with_scale(crate::Scale::S)
            .with_model_file("v1-s.onnx")
    }

    pub fn depth_anything_v2_small() -> Self {
        Self::depth_anything_v2()
-            .with_model_scale(crate::Scale::S)
+            .with_scale(crate::Scale::S)
            .with_model_file("v2-s.onnx")
    }
 }
--- a/src/models/depth_anything/impl.rs
+++ b/src/models/depth_anything/impl.rs
@@ -1,7 +1,7 @@
 use aksr::Builder;
 use anyhow::Result;

-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};

 #[derive(Debug, Builder)]
 pub struct DepthAnything {
@@ -15,8 +15,8 @@ pub struct DepthAnything {
 }

 impl DepthAnything {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let spec = engine.spec().to_string();

        let (batch, height, width, ts) = (
@@ -25,9 +25,7 @@ impl DepthAnything {
            engine.try_width().unwrap_or(&518.into()).opt(),
            engine.ts().clone(),
        );
-
-        let processor = options
-            .to_processor()?
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);

--- a/src/models/depth_pro/config.rs
+++ b/src/models/depth_pro/config.rs
@@ -1,8 +1,8 @@
 /// Model configuration for `DepthPro`
-impl crate::Options {
+impl crate::Config {
    pub fn depth_pro() -> Self {
        Self::default()
-            .with_model_name("depth-pro")
+            .with_name("depth-pro")
            .with_model_ixx(0, 0, 1.into()) // batch. Note: now only support batch_size = 1
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 1536.into())
@@ -11,17 +11,6 @@ impl crate::Options {
            .with_image_std(&[0.5, 0.5, 0.5])
            .with_resize_mode(crate::ResizeMode::FitExact)
            .with_normalize(true)
+            .with_model_file("model.onnx")
    }
-
-    // pub fn depth_pro_q4f16() -> Self {
-    //     Self::depth_pro().with_model_file("q4f16.onnx")
-    // }
-
-    // pub fn depth_pro_fp16() -> Self {
-    //     Self::depth_pro().with_model_file("fp16.onnx")
-    // }
-
-    // pub fn depth_pro_bnb4() -> Self {
-    //     Self::depth_pro().with_model_file("bnb4.onnx")
-    // }
 }
--- a/src/models/depth_pro/impl.rs
+++ b/src/models/depth_pro/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
 use anyhow::Result;
 use ndarray::Axis;

-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};

 #[derive(Builder, Debug)]
 pub struct DepthPro {
@@ -16,8 +16,8 @@ pub struct DepthPro {
 }

 impl DepthPro {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let spec = engine.spec().to_string();
        let (batch, height, width, ts) = (
            engine.batch().opt(),
@@ -25,8 +25,7 @@ impl DepthPro {
            engine.try_width().unwrap_or(&512.into()).opt(),
            engine.ts().clone(),
        );
-        let processor = options
-            .to_processor()?
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);

--- a/src/models/dinov2/config.rs
+++ b/src/models/dinov2/config.rs
@@ -1,8 +1,8 @@
 /// Model configuration for `DINOv2`
-impl crate::Options {
+impl crate::Config {
    pub fn dinov2() -> Self {
        Self::default()
-            .with_model_name("dinov2")
+            .with_name("dinov2")
            .with_model_ixx(0, 0, (1, 1, 8).into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 224.into())
@@ -16,13 +16,13 @@ impl crate::Options {

    pub fn dinov2_small() -> Self {
        Self::dinov2()
-            .with_model_scale(crate::Scale::S)
+            .with_scale(crate::Scale::S)
            .with_model_file("s.onnx")
    }

    pub fn dinov2_base() -> Self {
        Self::dinov2()
-            .with_model_scale(crate::Scale::B)
+            .with_scale(crate::Scale::B)
            .with_model_file("b.onnx")
    }
 }
--- a/src/models/dinov2/impl.rs
+++ b/src/models/dinov2/impl.rs
@@ -1,7 +1,7 @@
 use aksr::Builder;
 use anyhow::Result;

-use crate::{elapsed, Engine, Image, Options, Processor, Scale, Ts, Xs, X};
+use crate::{elapsed, Config, Engine, Image, Processor, Scale, Ts, Xs, X};

 #[derive(Builder, Debug)]
 pub struct DINOv2 {
@@ -15,15 +15,15 @@ pub struct DINOv2 {
 }

 impl DINOv2 {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let (batch, height, width, ts) = (
            engine.batch().opt(),
            engine.try_height().unwrap_or(&384.into()).opt(),
            engine.try_width().unwrap_or(&384.into()).opt(),
            engine.ts.clone(),
        );
-        let dim = match options.model_scale() {
+        let dim = match &config.scale {
            Some(Scale::S) => 384,
            Some(Scale::B) => 768,
            Some(Scale::L) => 1024,
@@ -31,8 +31,7 @@ impl DINOv2 {
            Some(x) => anyhow::bail!("Unsupported scale: {:?}", x),
            None => anyhow::bail!("No model scale specified"),
        };
-        let processor = options
-            .to_processor()?
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);

--- a/src/models/fast/config.rs
+++ b/src/models/fast/config.rs
@@ -1,8 +1,8 @@
 /// Model configuration for [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://github.com/czczup/FAST)
-impl crate::Options {
+impl crate::Config {
    pub fn fast() -> Self {
        Self::db()
-            .with_model_name("fast")
+            .with_name("fast")
            .with_image_mean(&[0.798, 0.785, 0.772])
            .with_image_std(&[0.264, 0.2749, 0.287])
    }
--- a/src/models/fastvit/config.rs
+++ b/src/models/fastvit/config.rs
@@ -1,10 +1,10 @@
 use crate::NAMES_IMAGENET_1K;

 /// Model configuration for `FastViT`
-impl crate::Options {
+impl crate::Config {
    pub fn fastvit() -> Self {
        Self::default()
-            .with_model_name("fastvit")
+            .with_name("fastvit")
            .with_model_ixx(0, 0, 1.into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 224.into())
--- a/src/models/florence2/config.rs
+++ b/src/models/florence2/config.rs
@@ -1,59 +1,31 @@
 /// Model configuration for `Florence2`
-impl crate::Options {
+impl crate::Config {
    pub fn florence2() -> Self {
        Self::default()
-            .with_model_name("florence2")
-            .with_batch_size(1)
-    }
-
-    pub fn florence2_visual() -> Self {
-        Self::florence2()
-            .with_model_kind(crate::Kind::Vision)
-            .with_model_ixx(0, 2, 768.into())
-            .with_model_ixx(0, 3, 768.into())
+            .with_name("florence2")
+            .with_batch_size_all(1)
+            .with_visual_ixx(0, 1, 3.into())
+            .with_visual_ixx(0, 2, 768.into())
+            .with_visual_ixx(0, 3, 768.into())
            .with_image_mean(&[0.485, 0.456, 0.406])
            .with_image_std(&[0.229, 0.224, 0.225])
-            .with_resize_filter("Bilinear")
-            .with_normalize(true)
    }

-    pub fn florence2_textual() -> Self {
-        Self::florence2().with_model_kind(crate::Kind::Language)
+    pub fn florence2_base() -> Self {
+        Self::florence2()
+            .with_scale(crate::Scale::B)
+            .with_visual_file("base-vision-encoder.onnx")
+            .with_textual_file("base-embed-tokens.onnx")
+            .with_textual_encoder_file("base-encoder.onnx")
+            .with_textual_decoder_file("base-decoder.onnx")
+            .with_textual_decoder_merged_file("base-decoder-merged.onnx")
+            .with_tokenizer_file("florence2/tokenizer.json")
+            .with_config_file("florence2/config.json")
+            .with_special_tokens_map_file("florence2/special_tokens_map.json")
+            .with_tokenizer_config_file("florence2/tokenizer_config.json")
    }

-    pub fn florence2_visual_base() -> Self {
-        Self::florence2_visual().with_model_scale(crate::Scale::B)
-    }
-
-    pub fn florence2_textual_base() -> Self {
-        Self::florence2_textual().with_model_scale(crate::Scale::B)
-    }
-
-    pub fn florence2_visual_large() -> Self {
-        Self::florence2_visual().with_model_scale(crate::Scale::L)
-    }
-
-    pub fn florence2_textual_large() -> Self {
-        Self::florence2_textual().with_model_scale(crate::Scale::L)
-    }
-
-    pub fn florence2_visual_encoder_base() -> Self {
-        Self::florence2_visual_base().with_model_file("base-vision-encoder.onnx")
-    }
-
-    pub fn florence2_textual_embed_base() -> Self {
-        Self::florence2_textual_base().with_model_file("base-embed-tokens.onnx")
-    }
-
-    pub fn florence2_texual_encoder_base() -> Self {
-        Self::florence2_textual_base().with_model_file("base-encoder.onnx")
-    }
-
-    pub fn florence2_texual_decoder_base() -> Self {
-        Self::florence2_textual_base().with_model_file("base-decoder.onnx")
-    }
-
-    pub fn florence2_texual_decoder_merged_base() -> Self {
-        Self::florence2_textual_base().with_model_file("base-decoder-merged.onnx")
+    pub fn florence2_large() -> Self {
+        todo!()
    }
 }
--- a/src/models/florence2/impl.rs
+++ b/src/models/florence2/impl.rs
@@ -4,51 +4,56 @@ use ndarray::{s, Axis};
 use rayon::prelude::*;

 use crate::{
-    elapsed,
-    models::{BaseModelTextual, BaseModelVisual, Quantizer},
-    Hbb, Image, LogitsSampler, Options, Polygon, Scale, Task, Ts, Xs, X, Y,
+    elapsed, models::Quantizer, Config, Engine, Hbb, Image, LogitsSampler, Polygon, Processor,
+    Scale, Task, Ts, Xs, X, Y,
 };

 #[derive(Debug, Builder)]
 pub struct Florence2 {
-    pub vision_encoder: BaseModelVisual,
-    pub text_embed: BaseModelTextual,
-    pub encoder: BaseModelTextual,
-    pub decoder: BaseModelTextual,
-    pub decoder_merged: BaseModelTextual,
+    pub vision_encoder: Engine,
+    pub text_embed: Engine,
+    pub encoder: Engine,
+    pub decoder: Engine,
+    pub decoder_merged: Engine,
    ts: Ts,
    quantizer: Quantizer,
    max_length: usize,
    eos_token_id: u32,
    decoder_start_token_id: u32,
    n_kvs: usize,
+    height: usize,
+    width: usize,
+    batch: usize,
+    processor: Processor,
 }

 impl Florence2 {
-    pub fn new(
-        options_vision_encoder: Options,
-        options_text_embed: Options,
-        options_encoder: Options,
-        options_decoder: Options,
-        options_decoder_merged: Options,
-    ) -> Result<Self> {
-        let vision_encoder = BaseModelVisual::new(options_vision_encoder)?;
-        let text_embed = BaseModelTextual::new(options_text_embed)?;
-        let encoder = BaseModelTextual::new(options_encoder)?;
-        let decoder = BaseModelTextual::new(options_decoder)?;
-        let decoder_merged = BaseModelTextual::new(options_decoder_merged)?;
+    pub fn new(config: Config) -> Result<Self> {
+        let vision_encoder = Engine::try_from_config(&config.visual)?;
+        let text_embed = Engine::try_from_config(&config.textual)?;
+        let encoder = Engine::try_from_config(&config.textual_encoder)?;
+        let decoder = Engine::try_from_config(&config.textual_decoder)?;
+        let decoder_merged = Engine::try_from_config(&config.textual_decoder_merged)?;
+        let (batch, height, width) = (
+            vision_encoder.batch().opt(),
+            vision_encoder.try_height().unwrap_or(&1024.into()).opt(),
+            vision_encoder.try_width().unwrap_or(&1024.into()).opt(),
+        );
+        let processor = Processor::try_from_config(&config.processor)?
+            .with_image_width(width as _)
+            .with_image_height(height as _);
        let quantizer = Quantizer::default();
        let ts = Ts::merge(&[
-            vision_encoder.engine().ts(),
-            text_embed.engine().ts(),
-            encoder.engine().ts(),
-            decoder.engine().ts(),
-            decoder_merged.engine().ts(),
+            vision_encoder.ts(),
+            text_embed.ts(),
+            encoder.ts(),
+            decoder.ts(),
+            decoder_merged.ts(),
        ]);
        let max_length = 1024;
        let eos_token_id = 2;
        let decoder_start_token_id = 2;
-        let n_kvs = match decoder.scale() {
+        let n_kvs = match config.scale {
            Some(Scale::B) => 6,
            Some(Scale::L) => 12,
            _ => unimplemented!(),
@@ -66,6 +71,10 @@ impl Florence2 {
            eos_token_id,
            decoder_start_token_id,
            n_kvs,
+            batch,
+            height,
+            width,
+            processor,
        })
    }

@@ -97,12 +106,12 @@ impl Florence2 {
            .map(|im| {
                let text = Self::process_task(task, im.height() as _, im.width() as _)
                    .prompt_for_florence2()?;
-                let ids = self.text_embed.processor().encode_text_ids(&text, true)?;
+                let ids = self.processor.encode_text_ids(&text, true)?;
                X::from(ids).insert_axis(0)
            })
            .collect::<Result<Vec<_>, _>>()?;
        let x = X::concat(&xs, 0)?;
-        let xs = self.text_embed.inference(x.into())?;
+        let xs = self.text_embed.run(x.into())?;
        let x = xs[0].to_owned();

        Ok(x)
@@ -110,7 +119,10 @@ impl Florence2 {

    pub fn forward(&mut self, xs_visual: &[Image], x_textual: &Task) -> Result<Vec<Y>> {
        let visual_embeddings = elapsed!("visual-encode", self.ts, {
-            self.vision_encoder.encode(xs_visual)?
+            let xs = self.processor.process_images(xs_visual)?;
+            self.batch = xs_visual.len(); // update
+            let xs = self.vision_encoder.run(xs.into())?;
+            xs[0].to_owned()
        });

        let textual_embedding = elapsed!("textual-encode", self.ts, {
@@ -141,7 +153,7 @@ impl Florence2 {
        let attention_mask = X::ones(&[self.batch(), inputs_embeds.dims()[1]]);

        // encoder
-        let last_hidden_state = self.encoder.inference(Xs::from(vec![
+        let last_hidden_state = self.encoder.run(Xs::from(vec![
            attention_mask.clone(),
            inputs_embeds.clone(),
        ]))?[0]
@@ -150,7 +162,7 @@ impl Florence2 {
        // decoder
        let inputs_embeds = inputs_embeds.slice(s![.., -1.., ..]);
        let inputs_embeds = X::from(inputs_embeds.to_owned().into_dyn());
-        let mut decoder_outputs = self.decoder.inference(Xs::from(vec![
+        let mut decoder_outputs = self.decoder.run(Xs::from(vec![
            attention_mask.clone(),
            last_hidden_state.clone(),
            inputs_embeds,
@@ -215,7 +227,7 @@ impl Florence2 {

            // decode
            let next_tokens = X::from(last_tokens.clone()).insert_axis(1)?;
-            let inputs_embeds = &self.text_embed.inference(Xs::from(next_tokens))?[0].clone();
+            let inputs_embeds = &self.text_embed.run(Xs::from(next_tokens))?[0].clone();
            let use_cache = X::ones(&[1]);
            let mut xs = vec![
                attention_mask.clone(),
@@ -229,13 +241,13 @@ impl Florence2 {
                xs.push(encoder_kvs[i * 2 + 1].clone());
            }
            xs.push(use_cache);
-            decoder_outputs = self.decoder_merged.inference(xs.into())?;
+            decoder_outputs = self.decoder_merged.run(xs.into())?;
        }

        // batch decode
        let texts = self
-            .text_embed
-            .processor()
+            // .text_embed
+            .processor
            .decode_tokens_batch(&token_ids, false)?;

        Ok(texts)
@@ -416,10 +428,6 @@ impl Florence2 {
        Ok(ys)
    }

-    pub fn batch(&self) -> usize {
-        self.vision_encoder.batch() as _
-    }
-
    pub fn summary(&mut self) {
        self.ts.summary();
    }
--- a/src/models/grounding_dino/config.rs
+++ b/src/models/grounding_dino/config.rs
@@ -1,9 +1,8 @@
 /// Model configuration for `GroundingDino`
-impl crate::Options {
+impl crate::Config {
    pub fn grounding_dino() -> Self {
        Self::default()
-            .with_model_name("grounding-dino")
-            .with_model_kind(crate::Kind::VisionLanguage)
+            .with_name("grounding-dino")
            .with_model_ixx(0, 0, 1.into()) // TODO: current onnx model does not support bs > 1
            .with_model_ixx(0, 2, 800.into()) // TODO: matters
            .with_model_ixx(0, 3, 1200.into()) // TODO: matters
@@ -11,9 +10,10 @@ impl crate::Options {
            .with_resize_filter("CatmullRom")
            .with_image_mean(&[0.485, 0.456, 0.406])
            .with_image_std(&[0.229, 0.224, 0.225])
-            .with_normalize(true)
-            .with_class_confs(&[0.25])
-            .with_text_confs(&[0.25])
+            .with_tokenizer_file("grounding-dino/tokenizer.json")
+            .with_config_file("grounding-dino/config.json")
+            .with_special_tokens_map_file("grounding-dino/special_tokens_map.json")
+            .with_tokenizer_config_file("grounding-dino/tokenizer_config.json")
    }

    pub fn grounding_dino_tiny() -> Self {
--- a/src/models/grounding_dino/impl.rs
+++ b/src/models/grounding_dino/impl.rs
@@ -4,7 +4,7 @@ use ndarray::{s, Array2, Axis};
 use rayon::prelude::*;
 use std::fmt::Write;

-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};

 #[derive(Builder, Debug)]
 pub struct GroundingDINO {
@@ -24,8 +24,8 @@ pub struct GroundingDINO {
 }

 impl GroundingDINO {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let spec = engine.spec().to_string();
        let (batch, height, width, ts) = (
            engine.batch().opt(),
@@ -33,31 +33,29 @@ impl GroundingDINO {
            engine.try_width().unwrap_or(&1200.into()).opt(),
            engine.ts().clone(),
        );
-        let processor = options
-            .to_processor()?
-            .with_image_width(width as _)
-            .with_image_height(height as _);
-        let class_names = options
+        let class_names: Vec<_> = config
            .text_names
-            .as_ref()
-            .and_then(|v| {
-                let v: Vec<_> = v
-                    .iter()
-                    .map(|s| s.trim().to_ascii_lowercase())
-                    .filter(|s| !s.is_empty())
-                    .collect();
-                (!v.is_empty()).then_some(v)
-            })
-            .ok_or_else(|| anyhow::anyhow!("No valid class names were provided in the options. Ensure the 'text_names' field is non-empty and contains valid class names."))?;
+            .iter()
+            .map(|s| s.trim().to_ascii_lowercase())
+            .filter(|s| !s.is_empty())
+            .collect();
+        if class_names.is_empty() {
+            anyhow::bail!(
+                "No valid class names were provided in the config. Ensure the 'text_names' field is non-empty and contains valid class names."
+            );
+        }
        let text_prompt = class_names.iter().fold(String::new(), |mut acc, text| {
            write!(&mut acc, "{}.", text).unwrap();
            acc
        });
+        let confs_visual = DynConf::new(config.class_confs(), class_names.len());
+        let confs_textual = DynConf::new(config.text_confs(), class_names.len());
+        let processor = Processor::try_from_config(&config.processor)?
+            .with_image_width(width as _)
+            .with_image_height(height as _);
        let token_ids = processor.encode_text_ids(&text_prompt, true)?;
        let tokens = processor.encode_text_tokens(&text_prompt, true)?;
        let class_ids_map = Self::process_class_ids(&tokens);
-        let confs_visual = DynConf::new(options.class_confs(), class_names.len());
-        let confs_textual = DynConf::new(options.text_confs(), class_names.len());

        Ok(Self {
            engine,
--- a/src/models/linknet/config.rs
+++ b/src/models/linknet/config.rs
@@ -1,8 +1,8 @@
 /// Model configuration for [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/abs/1707.03718)
-impl crate::Options {
+impl crate::Config {
    pub fn linknet() -> Self {
        Self::fast()
-            .with_model_name("linknet")
+            .with_name("linknet")
            .with_image_mean(&[0.798, 0.785, 0.772])
            .with_image_std(&[0.264, 0.2749, 0.287])
    }
--- a/src/models/mobileone/config.rs
+++ b/src/models/mobileone/config.rs
@@ -1,10 +1,10 @@
 use crate::NAMES_IMAGENET_1K;

 /// Model configuration for `MobileOne`
-impl crate::Options {
+impl crate::Config {
    pub fn mobileone() -> Self {
        Self::default()
-            .with_model_name("mobileone")
+            .with_name("mobileone")
            .with_model_ixx(0, 0, 1.into())
            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 224.into())
--- a/src/models/modnet/config.rs
+++ b/src/models/modnet/config.rs
@@ -1,8 +1,8 @@
 /// Model configuration for `MODNet`
-impl crate::Options {
+impl crate::Config {
    pub fn modnet() -> Self {
        Self::default()
-            .with_model_name("modnet")
+            .with_name("modnet")
            .with_model_ixx(0, 0, 1.into())
            .with_model_ixx(0, 2, (416, 512, 800).into())
            .with_model_ixx(0, 3, (416, 512, 800).into())
--- a/src/models/modnet/impl.rs
+++ b/src/models/modnet/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
 use anyhow::Result;
 use ndarray::Axis;

-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};

 #[derive(Builder, Debug)]
 pub struct MODNet {
@@ -16,8 +16,8 @@ pub struct MODNet {
 }

 impl MODNet {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let spec = engine.spec().to_string();
        let (batch, height, width, ts) = (
            engine.batch().opt(),
@@ -25,8 +25,7 @@ impl MODNet {
            engine.try_width().unwrap_or(&512.into()).opt(),
            engine.ts().clone(),
        );
-        let processor = options
-            .to_processor()?
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);

--- a/src/models/moondream2/config.rs
+++ b/src/models/moondream2/config.rs
@@ -1,117 +1,47 @@
 /// Model configuration for `moondream2`
-impl crate::Options {
+impl crate::Config {
    pub fn moondream2() -> Self {
        Self::default()
-            .with_model_name("moondream2")
-            .with_model_num_dry_run(0)
+            .with_name("moondream2")
+            .with_visual_encoder_ixx(0, 0, (1, 3, 4).into()) // patch count
+            .with_image_mean(&[0.5, 0.5, 0.5])
+            .with_image_std(&[0.5, 0.5, 0.5])
+            .with_resize_mode(crate::ResizeMode::FitExact)
+            .with_resize_filter("catmullrom")
+            .with_visual_projection_ixx(0, 0, 1.into())
+            .with_textual_encoder_ixx(0, 0, 1.into())
+            .with_textual_decoder_ixx(0, 0, 1.into())
+            .with_size_encoder_ixx(0, 0, 1.into())
+            .with_size_decoder_ixx(0, 0, 1.into())
+            .with_coord_encoder_ixx(0, 0, 1.into())
+            .with_coord_decoder_ixx(0, 0, 1.into())
+            .with_tokenizer_file("moondream2/tokenizer.json")
+            .with_tokenizer_config_file("moondream2/tokenizer_config.json")
    }

    pub fn moondream2_0_5b() -> Self {
-        Self::moondream2().with_model_scale(crate::Scale::Billion(0.5))
+        Self::moondream2()
+            .with_scale(crate::Scale::Billion(0.5))
+            .with_visual_encoder_file("0.5b-vision-encoder.onnx")
+            .with_visual_projection_file("0.5b-vision-projection.onnx")
+            .with_textual_decoder_file("0.5b-text-decoder.onnx")
+            .with_textual_encoder_file("0.5b-text-encoder.onnx")
+            .with_coord_encoder_file("0.5b-coord-encoder.onnx")
+            .with_coord_decoder_file("0.5b-coord-decoder.onnx")
+            .with_size_encoder_file("0.5b-size-encoder.onnx")
+            .with_size_decoder_file("0.5b-size-decoder.onnx")
    }

-    pub fn moondream2_0_5b_vision_encoder() -> Self {
-        Self::moondream2_0_5b()
-            .with_model_ixx(0, 0, (1, 3, 4).into()) // patch count
-            .with_model_kind(crate::Kind::Vision)
-            .with_image_mean(&[0.5, 0.5, 0.5])
-            .with_image_std(&[0.5, 0.5, 0.5])
-            .with_normalize(true)
-            .with_resize_mode(crate::ResizeMode::FitExact)
-            .with_resize_filter("catmullrom")
-            .with_model_file("0.5b-vision-encoder.onnx")
-    }
-
-    pub fn moondream2_0_5b_vision_projection() -> Self {
-        Self::moondream2_0_5b()
-            .with_batch_size(1)
-            .with_model_kind(crate::Kind::Vision)
-            .with_model_file("0.5b-vision-projection.onnx")
-    }
-
-    pub fn moondream2_0_5b_text_decoder() -> Self {
-        Self::moondream2_0_5b()
-            .with_batch_size(1)
-            .with_model_kind(crate::Kind::Language)
-            .with_model_file("0.5b-text-decoder.onnx")
-    }
-
-    pub fn moondream2_0_5b_text_encoder() -> Self {
-        Self::moondream2_0_5b()
-            .with_batch_size(1)
-            .with_model_kind(crate::Kind::Language)
-            .with_model_file("0.5b-text-encoder.onnx")
-    }
-
-    pub fn moondream2_0_5b_coord_encoder() -> Self {
-        Self::moondream2_0_5b()
-            .with_batch_size(1)
-            .with_model_file("0.5b-coord-encoder.onnx")
-    }
-
-    pub fn moondream2_0_5b_coord_decoder() -> Self {
-        Self::moondream2_0_5b()
-            .with_batch_size(1)
-            .with_model_file("0.5b-coord-decoder.onnx")
-    }
-
-    pub fn moondream2_0_5b_size_encoder() -> Self {
-        Self::moondream2_0_5b()
-            .with_batch_size(1)
-            .with_model_file("0.5b-size-encoder.onnx")
-    }
-
-    pub fn moondream2_0_5b_size_decoder() -> Self {
-        Self::moondream2_0_5b()
-            .with_batch_size(1)
-            .with_model_file("0.5b-size-decoder.onnx")
-    }
-
-    pub fn moondream2_2b_vision_encoder() -> Self {
-        Self::moondream2_0_5b_vision_encoder()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-vision-encoder.onnx")
-    }
-
-    pub fn moondream2_2b_vision_projection() -> Self {
-        Self::moondream2_0_5b_vision_projection()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-vision-projection.onnx")
-    }
-
-    pub fn moondream2_2b_text_decoder() -> Self {
-        Self::moondream2_0_5b_text_decoder()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-text-decoder.onnx")
-    }
-
-    pub fn moondream2_2b_text_encoder() -> Self {
-        Self::moondream2_0_5b_text_encoder()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-text-encoder.onnx")
-    }
-
-    pub fn moondream2_2b_coord_encoder() -> Self {
-        Self::moondream2_0_5b_coord_encoder()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-coord-encoder.onnx")
-    }
-
-    pub fn moondream2_2b_coord_decoder() -> Self {
-        Self::moondream2_0_5b_coord_decoder()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-coord-decoder.onnx")
-    }
-
-    pub fn moondream2_2b_size_encoder() -> Self {
-        Self::moondream2_0_5b_size_encoder()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-size-encoder.onnx")
-    }
-
-    pub fn moondream2_2b_size_decoder() -> Self {
-        Self::moondream2_0_5b_size_decoder()
-            .with_model_scale(crate::Scale::Billion(2.))
-            .with_model_file("2b-size-decoder.onnx")
+    pub fn moondream2_2b() -> Self {
+        Self::moondream2()
+            .with_scale(crate::Scale::Billion(2.))
+            .with_visual_encoder_file("2b-vision-encoder.onnx")
+            .with_visual_projection_file("2b-vision-projection.onnx")
+            .with_textual_decoder_file("2b-text-decoder.onnx")
+            .with_textual_encoder_file("2b-text-encoder.onnx")
+            .with_coord_encoder_file("2b-coord-encoder.onnx")
+            .with_coord_decoder_file("2b-coord-decoder.onnx")
+            .with_size_encoder_file("2b-size-encoder.onnx")
+            .with_size_decoder_file("2b-size-decoder.onnx")
    }
 }
--- a/src/models/moondream2/impl.rs
+++ b/src/models/moondream2/impl.rs
@@ -5,66 +5,57 @@ use ndarray::{s, Array, Array2, Array3, Axis, IxDyn};
 use ndarray_npy::ReadNpyExt;

 use crate::{
-    BaseModelTextual, DType, Engine, Hbb, Hub, Image, Keypoint, LogitsSampler, Options, Processor,
-    Scale, Task, Ts, Xs, X, Y,
+    Config, DType, Engine, Hbb, Hub, Image, Keypoint, LogitsSampler, Processor, Scale, Task, Xs, X,
+    Y,
 };

 #[derive(Builder, Debug)]
 pub struct Moondream2 {
-    vision_encoder: VisionEncoder,
-    vision_projection: VisionProjection,
-    pub text_decoder: BaseModelTextual,
-    text_encoder: BaseModelTextual,
-    coord_decoder: Option<BaseModelTextual>,
-    coord_encoder: Option<BaseModelTextual>,
-    size_decoder: Option<BaseModelTextual>,
-    size_encoder: Option<BaseModelTextual>,
+    vision_encoder: Engine,
+    vision_projection: Engine,
+    text_decoder: Engine,
+    text_encoder: Engine,
+    coord_decoder: Option<Engine>,
+    coord_encoder: Option<Engine>,
+    size_decoder: Option<Engine>,
+    size_encoder: Option<Engine>,
    initial_kv_cache: X, // TODO: use f16
    scale: Scale,
    dtype: DType,
    max_length: usize,
    eos_token_id: u32,
    max_objects: usize,
+    num_patch: usize,
+    patch_size: usize,
+    processor: Processor,
+    seq_len: usize,
 }

 impl Moondream2 {
-    // TODO
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(
-        options_vision_encoder: Options,
-        options_vision_projection: Options,
-        options_text_encoder: Options,
-        options_text_decoder: Options,
-        options_coord_encoder: Option<Options>,
-        options_coord_decoder: Option<Options>,
-        options_size_encoder: Option<Options>,
-        options_size_decoder: Option<Options>,
-    ) -> Result<Self> {
+    pub fn new(config: Config) -> Result<Self> {
        let max_length = 2048;
        let max_objects = 50;
        let eos_token_id = 50256;
-        let dtype = options_vision_encoder.model_dtype;
-        let scale = options_vision_encoder
-            .model_scale
-            .clone()
-            .unwrap_or(Scale::Billion(0.5));
+        let dtype = config.visual_encoder.dtype;
+        let scale = config.scale.clone().unwrap_or(Scale::Billion(0.5));
        let initial_kv_cache: X = KVCache::new(&scale, &dtype)?.0.into();
-        let vision_encoder = VisionEncoder::new(options_vision_encoder)?;
-        let vision_projection = VisionProjection::new(options_vision_projection)?;
-        let text_decoder = BaseModelTextual::new(options_text_decoder)?;
-        let text_encoder = BaseModelTextual::new(options_text_encoder)?;
-        let coord_decoder = options_coord_decoder
-            .map(BaseModelTextual::new)
-            .transpose()?;
-        let coord_encoder = options_coord_encoder
-            .map(BaseModelTextual::new)
-            .transpose()?;
-        let size_decoder = options_size_decoder
-            .map(BaseModelTextual::new)
-            .transpose()?;
-        let size_encoder = options_size_encoder
-            .map(BaseModelTextual::new)
-            .transpose()?;
+        let vision_encoder = Engine::try_from_config(&config.visual_encoder)?;
+        let vision_projection = Engine::try_from_config(&config.visual_projection)?;
+        let text_decoder = Engine::try_from_config(&config.textual_decoder)?;
+        let text_encoder = Engine::try_from_config(&config.textual_encoder)?;
+        let coord_decoder = Engine::try_from_config(&config.coord_decoder).ok();
+        let coord_encoder = Engine::try_from_config(&config.coord_encoder).ok();
+        let size_decoder = Engine::try_from_config(&config.size_decoder).ok();
+        let size_encoder = Engine::try_from_config(&config.size_encoder).ok();
+        let (num_patch, patch_size, _ts) = (
+            vision_encoder.batch().opt(),
+            vision_encoder.try_height().unwrap_or(&378.into()).opt(),
+            vision_encoder.ts.clone(),
+        );
+        let seq_len = vision_projection.inputs_minoptmax[0][1].opt();
+        let processor = Processor::try_from_config(&config.processor)?
+            .with_image_width(patch_size as _)
+            .with_image_height(patch_size as _);

        Ok(Self {
            vision_encoder,
@@ -81,12 +72,16 @@ impl Moondream2 {
            eos_token_id,
            scale,
            dtype,
+            num_patch,
+            patch_size,
+            processor,
+            seq_len,
        })
    }

    pub fn encode_image(&mut self, x: &Image) -> Result<X> {
-        let patches_emb = self.vision_encoder.encode(x)?.clone().insert_axis(0)?;
-        let image_embedding = self.vision_projection.inference(patches_emb.into())?[0].to_owned();
+        let patches_emb = self.encode(x)?.clone().insert_axis(0)?;
+        let image_embedding = self.vision_projection.run(patches_emb.into())?[0].to_owned();

        Ok(image_embedding)
    }
@@ -119,12 +114,7 @@ impl Moondream2 {
            Task::Vqa(query) => {
                let input_ids: Vec<_> = [198., 198., 24361., 25.]
                    .iter()
-                    .chain(
-                        &self
-                            .text_encoder
-                            .processor()
-                            .encode_text_ids(query, false)?,
-                    )
+                    .chain(&self.processor.encode_text_ids(query, false)?)
                    .chain(&[198., 198., 33706., 25.])
                    .cloned()
                    .collect();
@@ -139,8 +129,7 @@ impl Moondream2 {
                    .iter()
                    .chain(
                        &self
-                            .text_encoder
-                            .processor()
+                            .processor
                            .encode_text_ids(&format!(" {}", object), false)?,
                    )
                    .chain(&[628.])
@@ -156,8 +145,7 @@ impl Moondream2 {
                    .iter()
                    .chain(
                        &self
-                            .text_encoder
-                            .processor()
+                            .processor
                            .encode_text_ids(&format!(" {}", object), false)?,
                    )
                    .chain(&[628.])
@@ -174,10 +162,10 @@ impl Moondream2 {

    fn generate_text(&mut self, input_ids: &[f32], kv_cache: Array<f32, IxDyn>) -> Result<String> {
        let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
-        let mut input_embeds = self.text_encoder.inference(Xs::from(input_ids))?[0].to_owned();
+        let mut input_embeds = self.text_encoder.run(Xs::from(input_ids))?[0].to_owned();
        let logits_sampler = LogitsSampler::new();
        let mut token_ids: Vec<u32> = Vec::new();
-        let mut pos = self.vision_projection.seq_len() + self.initial_kv_cache.shape()[4];
+        let mut pos = self.seq_len + self.initial_kv_cache.shape()[4];
        let mut inc = input_embeds.shape()[1];
        let mut kv_cache = kv_cache.clone();

@@ -192,7 +180,7 @@ impl Moondream2 {
                    .into_dyn()
                    .into(),
            ]);
-            let decoder_outputs = self.text_decoder.inference(input)?;
+            let decoder_outputs = self.text_decoder.run(input)?;

            // update
            let logits = &decoder_outputs["logits"];
@@ -221,13 +209,10 @@ impl Moondream2 {

            // encode
            let next_tokens = X::from(vec![token_id as f32]).insert_axis(1)?;
-            input_embeds = self.text_encoder.inference(Xs::from(next_tokens))?[0].to_owned();
+            input_embeds = self.text_encoder.run(Xs::from(next_tokens))?[0].to_owned();
        }

-        let text = self
-            .text_encoder
-            .processor()
-            .decode_tokens(&token_ids, true)?;
+        let text = self.processor.decode_tokens(&token_ids, true)?;

        Ok(text)
    }
@@ -242,16 +227,16 @@ impl Moondream2 {
        let mut y_bboxes: Vec<Hbb> = Vec::new();
        let mut y_kpts: Vec<Vec<Keypoint>> = Vec::new();
        let (image_height, image_width) = (
-            self.vision_encoder.processor.images_transform_info[0].height_src,
-            self.vision_encoder.processor.images_transform_info[0].width_src,
+            self.processor.images_transform_info[0].height_src,
+            self.processor.images_transform_info[0].width_src,
        );

-        let mut pos = self.vision_projection.seq_len() + self.initial_kv_cache.shape()[4];
+        let mut pos = self.seq_len + self.initial_kv_cache.shape()[4];
        let logits_sampler = LogitsSampler::new();

        // initial input_embeds
        let input_ids = X::from(input_ids.to_vec()).insert_axis(0)?;
-        let mut hidden = self.text_encoder.inference(Xs::from(input_ids))?[0].to_owned();
+        let mut hidden = self.text_encoder.run(Xs::from(input_ids))?[0].to_owned();
        let mut kv_cache = kv_cache;

        // generate
@@ -273,12 +258,7 @@ impl Moondream2 {

            // cx
            let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
-            let cx = self
-                .coord_decoder
-                .as_mut()
-                .unwrap()
-                .inference(Xs::from(input))?[0]
-                .clone(); // [1024]
+            let cx = self.coord_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone(); // [1024]
            let ratio = cx.shape()[0] as f32;
            let cx = logits_sampler
                .decode(cx.as_slice().context("Failed to get slice for `cx`")?)?
@@ -288,7 +268,7 @@ impl Moondream2 {
                .coord_encoder
                .as_mut()
                .unwrap()
-                .inference(Xs::from(X::from(vec![cx])))?[0]
+                .run(Xs::from(X::from(vec![cx])))?[0]
                .clone()
                .insert_axis(0)?
                .insert_axis(0)?;
@@ -296,12 +276,7 @@ impl Moondream2 {
            // cy
            let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
            let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
-            let cy = self
-                .coord_decoder
-                .as_mut()
-                .unwrap()
-                .inference(Xs::from(input))?[0]
-                .clone();
+            let cy = self.coord_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone();
            let ratio = cy.shape()[0] as f32;

            let cy = logits_sampler
@@ -313,7 +288,7 @@ impl Moondream2 {
                .coord_encoder
                .as_mut()
                .unwrap()
-                .inference(Xs::from(X::from(vec![cy])))?[0]
+                .run(Xs::from(X::from(vec![cy])))?[0]
                .clone()
                .insert_axis(0)?
                .insert_axis(0)?;
@@ -324,6 +299,7 @@ impl Moondream2 {
                    cy * image_height as f32,
                ))
                .with_id(0)
+                .with_confidence(1.)
                .with_name(object)]);

                // keep?
@@ -334,12 +310,7 @@ impl Moondream2 {
                // wh
                let _logits = self.run_decoder(&mut hidden, &mut kv_cache, &mut pos)?;
                let input: X = hidden.slice(s![0, -1, ..]).into_owned().into_dyn().into();
-                let size = self
-                    .size_decoder
-                    .as_mut()
-                    .unwrap()
-                    .inference(Xs::from(input))?[0]
-                    .clone(); // [2, 1024]
+                let size = self.size_decoder.as_mut().unwrap().run(Xs::from(input))?[0].clone(); // [2, 1024]

                let ratio = size.shape()[1] as f32;
                let w = logits_sampler.decode(
@@ -361,7 +332,7 @@ impl Moondream2 {
                    .size_encoder
                    .as_mut()
                    .unwrap()
-                    .inference(Xs::from(X::from(vec![w, h])))?[0]
+                    .run(Xs::from(X::from(vec![w, h])))?[0]
                    .clone()
                    .insert_axis(0)?
                    .insert_axis(0)?; // [1024]
@@ -392,7 +363,7 @@ impl Moondream2 {
    }

    fn prepare_kv_cache(&mut self, image_embedding: &X) -> Result<Array<f32, IxDyn>> {
-        let kv_cache_new = self.text_decoder.inference(Xs::from(vec![
+        let kv_cache_new = self.text_decoder.run(Xs::from(vec![
            image_embedding.clone(),
            self.initial_kv_cache.clone(),
        ]))?["new_kv_cache"]
@@ -421,7 +392,7 @@ impl Moondream2 {
        kv_cache: &mut Array<f32, IxDyn>,
        pos: &mut usize,
    ) -> Result<X> {
-        let decoder_outputs = self.text_decoder.inference(Xs::from(vec![
+        let decoder_outputs = self.text_decoder.run(Xs::from(vec![
            input_embeds.clone(),
            kv_cache
                .slice(s![.., .., .., .., ..*pos, ..])
@@ -442,38 +413,6 @@ impl Moondream2 {

        Ok(decoder_outputs["logits"].to_owned())
    }
-}
-
-#[derive(Debug, Builder)]
-pub struct VisionEncoder {
-    engine: Engine,
-    num_patch: usize,
-    patch_size: usize,
-    processor: Processor,
-    ts: Ts,
-}
-
-impl VisionEncoder {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
-        let (num_patch, patch_size, ts) = (
-            engine.batch().opt(),
-            engine.try_height().unwrap_or(&378.into()).opt(),
-            engine.ts.clone(),
-        );
-        let processor = options
-            .to_processor()?
-            .with_image_width(patch_size as _)
-            .with_image_height(patch_size as _);
-
-        Ok(Self {
-            engine,
-            patch_size,
-            num_patch,
-            processor,
-            ts,
-        })
-    }

    fn create_patches(image: &Image, image_patch_size: usize) -> (Vec<Image>, (u32, u32)) {
        let mut patches = vec![image.clone()];
@@ -515,10 +454,6 @@ impl VisionEncoder {
        (patches, selected_template)
    }

-    pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
-        self.engine.run(xs)
-    }
-
    pub fn encode(&mut self, x: &Image) -> Result<X> {
        let (patches, selected_template) = Self::create_patches(x, self.patch_size);
        let patches = self.processor.process_images(&patches)?;
@@ -526,7 +461,7 @@ impl VisionEncoder {
            (selected_template.0 as usize),
            (selected_template.1 as usize),
        );
-        let patch_emb = self.inference(patches.clone().into())?[0].clone();
+        let patch_emb = self.vision_encoder.run(patches.clone().into())?[0].clone();
        let patch_emb = patch_emb.clone().0.into_dimensionality::<ndarray::Ix3>()?;
        let patch_emb = Self::process_patch_emb(patch_emb, template)?;
        let patch_emb = X::from(patch_emb.into_dyn()); // TODO .insert_axis(x),
@@ -608,30 +543,6 @@ impl VisionEncoder {
    }
 }

-#[derive(Debug, Builder)]
-pub struct VisionProjection {
-    engine: Engine,
-    seq_len: usize,
-    ts: Ts,
-}
-
-impl VisionProjection {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
-        let (seq_len, ts) = (engine.inputs_minoptmax[0][1].opt(), engine.ts.clone());
-
-        Ok(Self {
-            engine,
-            seq_len,
-            ts,
-        })
-    }
-
-    pub fn inference(&mut self, xs: Xs) -> Result<Xs> {
-        self.engine.run(xs)
-    }
-}
-
 #[derive(Builder, Debug)]
 struct KVCache(pub Array<f32, IxDyn>);

--- a/src/models/owl/config.rs
+++ b/src/models/owl/config.rs
@@ -1,11 +1,10 @@
 /// Model configuration for `OWLv2`
-impl crate::Options {
+impl crate::Config {
    pub fn owlv2() -> Self {
        Self::default()
-            .with_model_name("owlv2")
-            .with_model_kind(crate::Kind::VisionLanguage)
+            .with_name("owlv2")
            // 1st & 3rd: text
-            .with_model_ixx(0, 0, (1, 1, 1).into()) // TODO
+            .with_model_ixx(0, 0, (1, 1, 1).into())
            .with_model_ixx(0, 1, 1.into())
            .with_model_ixx(2, 0, (1, 1, 1).into())
            .with_model_ixx(2, 1, 1.into())
@@ -21,6 +20,7 @@ impl crate::Options {
            .with_normalize(true)
            .with_class_confs(&[0.1])
            .with_model_num_dry_run(0)
+            .with_tokenizer_file("owlv2/tokenizer.json")
    }

    pub fn owlv2_base() -> Self {
--- a/src/models/owl/impl.rs
+++ b/src/models/owl/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use ndarray::{s, Axis};
 use rayon::prelude::*;

-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};

 #[derive(Debug, Builder)]
 pub struct OWLv2 {
@@ -22,8 +22,8 @@ pub struct OWLv2 {
 }

 impl OWLv2 {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let (batch, height, width, ts) = (
            engine.batch().opt(),
            engine.try_height().unwrap_or(&960.into()).opt(),
@@ -31,20 +31,19 @@ impl OWLv2 {
            engine.ts.clone(),
        );
        let spec = engine.spec().to_owned();
-        let processor = options
-            .to_processor()?
-            .with_image_width(width as _)
-            .with_image_height(height as _);
-        let names: Vec<String> = options
-            .class_names()
-            .expect("No class names specified.")
-            .iter()
-            .map(|x| x.to_string())
-            .collect();
+        let names: Vec<String> = config.text_names().to_vec();
+        if names.is_empty() {
+            anyhow::bail!(
+                "No valid class names were provided in the config. Ensure the 'text_names' field is non-empty and contains valid class names."
+            );
+        }
        let names_with_prompt: Vec<String> =
            names.iter().map(|x| format!("a photo of {}", x)).collect();
        let n = names.len();
-        let confs = DynConf::new(options.class_confs(), n);
+        let confs = DynConf::new(config.class_confs(), n);
+        let processor = Processor::try_from_config(&config.processor)?
+            .with_image_width(width as _)
+            .with_image_height(height as _);
        let input_ids: Vec<f32> = processor
            .encode_texts_ids(
                &names_with_prompt
--- a/src/models/picodet/config.rs
+++ b/src/models/picodet/config.rs
@@ -4,11 +4,11 @@ use crate::{
 };

 /// Model configuration for `PicoDet`
-impl crate::Options {
+impl crate::Config {
    pub fn picodet() -> Self {
        Self::default()
-            .with_model_name("picodet")
-            .with_batch_size(1) // TODO: ONNX model's batch size seems always = 1
+            .with_name("picodet")
+            .with_batch_size_all(1) // TODO: ONNX model's batch size seems always = 1
            .with_model_ixx(0, 2, 640.into())
            .with_model_ixx(0, 3, 640.into())
            .with_model_ixx(1, 0, (1, 1, 8).into())
--- a/src/models/picodet/impl.rs
+++ b/src/models/picodet/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use ndarray::Axis;
 use rayon::prelude::*;

-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};

 #[derive(Debug, Builder)]
 pub struct PicoDet {
@@ -19,8 +19,8 @@ pub struct PicoDet {
 }

 impl PicoDet {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let (batch, height, width, ts) = (
            engine.batch().opt(),
            engine.try_height().unwrap_or(&640.into()).opt(),
@@ -28,15 +28,11 @@ impl PicoDet {
            engine.ts.clone(),
        );
        let spec = engine.spec().to_owned();
-        let processor = options
-            .to_processor()?
+        let names: Vec<String> = config.class_names().to_vec();
+        let confs = DynConf::new(config.class_confs(), names.len());
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);
-        let names = options
-            .class_names()
-            .expect("No class names are specified.")
-            .to_vec();
-        let confs = DynConf::new(options.class_confs(), names.len());

        Ok(Self {
            engine,
@@ -95,14 +91,15 @@ impl PicoDet {
                    return None;
                }
                let (x1, y1, x2, y2) = (pred[2], pred[3], pred[4], pred[5]);
+                let mut hbb = Hbb::default()
+                    .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
+                    .with_confidence(confidence)
+                    .with_id(class_id);
+                if !self.names.is_empty() {
+                    hbb = hbb.with_name(&self.names[class_id]);
+                }

-                Some(
-                    Hbb::default()
-                        .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
-                        .with_confidence(confidence)
-                        .with_id(class_id)
-                        .with_name(&self.names[class_id]),
-                )
+                Some(hbb)
            })
            .collect();

--- a/src/models/pipeline/basemodel.rs
+++ b/src/models/pipeline/basemodel.rs
@@ -2,8 +2,7 @@ use aksr::Builder;
 use anyhow::Result;

 use crate::{
-    elapsed, DType, Device, Engine, Image, Kind, Options, Processor, Scale, Task, Ts, Version, Xs,
-    X,
+    elapsed, Config, DType, Device, Engine, Image, Processor, Scale, Task, Ts, Version, Xs, X,
 };

 #[derive(Debug, Builder)]
@@ -20,7 +19,6 @@ pub struct BaseModelVisual {
    dtype: DType,
    task: Option<Task>,
    scale: Option<Scale>,
-    kind: Option<Kind>,
    version: Option<Version>,
 }

@@ -29,8 +27,8 @@ impl BaseModelVisual {
        self.ts.summary();
    }

-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let err_msg = "You need to specify the image height and image width for visual model.";
        let (batch, height, width, ts, spec) = (
            engine.batch().opt(),
@@ -39,18 +37,15 @@ impl BaseModelVisual {
            engine.ts.clone(),
            engine.spec().to_owned(),
        );
-        let processor = options
-            .to_processor()?
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);
-
-        let device = options.model_device;
-        let task = options.model_task;
-        let scale = options.model_scale;
-        let dtype = options.model_dtype;
-        let kind = options.model_kind;
-        let name = options.model_name;
-        let version = options.model_version;
+        let device = config.model.device;
+        let task = config.task;
+        let scale = config.scale;
+        let dtype = config.model.dtype;
+        let name = config.name;
+        let version = config.version;

        Ok(Self {
            engine,
@@ -63,7 +58,6 @@ impl BaseModelVisual {
            dtype,
            task,
            scale,
-            kind,
            device,
            version,
            name,
@@ -101,7 +95,6 @@ pub struct BaseModelTextual {
    dtype: DType,
    task: Option<Task>,
    scale: Option<Scale>,
-    kind: Option<Kind>,
    version: Option<Version>,
 }

@@ -110,21 +103,20 @@ impl BaseModelTextual {
        self.ts.summary();
    }

-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let (batch, ts, spec) = (
            engine.batch().opt(),
            engine.ts.clone(),
            engine.spec().to_owned(),
        );
-        let processor = options.to_processor()?;
-        let device = options.model_device;
-        let task = options.model_task;
-        let scale = options.model_scale;
-        let dtype = options.model_dtype;
-        let kind = options.model_kind;
-        let name = options.model_name;
-        let version = options.model_version;
+        let processor = Processor::try_from_config(&config.processor)?;
+        let device = config.model.device;
+        let dtype = config.model.dtype;
+        let task = config.task;
+        let scale = config.scale;
+        let name = config.name;
+        let version = config.version;

        Ok(Self {
            engine,
@@ -135,7 +127,6 @@ impl BaseModelTextual {
            dtype,
            task,
            scale,
-            kind,
            device,
            version,
            name,
--- a/src/models/pipeline/image_classifier.rs
+++ b/src/models/pipeline/image_classifier.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use ndarray::Axis;
 use rayon::prelude::*;

-use crate::{elapsed, DynConf, Engine, Image, Options, Prob, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Prob, Processor, Ts, Xs, Y};

 #[derive(Debug, Builder)]
 pub struct ImageClassifier {
@@ -12,19 +12,24 @@ pub struct ImageClassifier {
    width: usize,
    batch: usize,
    apply_softmax: bool,
-    ts: Ts,
    processor: Processor,
-    confs: DynConf,
-    nc: usize,
    names: Vec<String>,
    spec: String,
+    topk: usize,
+    ts: Ts,
 }

-impl TryFrom<Options> for ImageClassifier {
+impl TryFrom<Config> for ImageClassifier {
    type Error = anyhow::Error;

-    fn try_from(options: Options) -> Result<Self, Self::Error> {
-        let engine = options.to_engine()?;
+    fn try_from(config: Config) -> Result<Self, Self::Error> {
+        Self::new(config)
+    }
+}
+
+impl ImageClassifier {
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let spec = engine.spec().to_string();
        let (batch, height, width, ts) = (
            engine.batch().opt(),
@@ -32,50 +37,27 @@ impl TryFrom<Options> for ImageClassifier {
            engine.try_width().unwrap_or(&224.into()).opt(),
            engine.ts().clone(),
        );
-        let processor = options
-            .to_processor()?
+        let names = config.class_names.to_vec();
+        let apply_softmax = config.apply_softmax.unwrap_or_default();
+        let topk = config.topk.unwrap_or(5);
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);
-        let (nc, names) = match (options.nc(), options.class_names()) {
-            (Some(nc), Some(names)) => {
-                if nc != names.len() {
-                    anyhow::bail!(
-                        "The length of the input class names: {} is inconsistent with the number of classes: {}.",
-                        names.len(),
-                        nc
-                    );
-                }
-                (nc, names.to_vec())
-            }
-            (Some(nc), None) => (
-                nc,
-                (0..nc).map(|x| format!("# {}", x)).collect::<Vec<String>>(),
-            ),
-            (None, Some(names)) => (names.len(), names.to_vec()),
-            (None, None) => {
-                anyhow::bail!("Neither class names nor class numbers were specified.");
-            }
-        };
-        let confs = DynConf::new(options.class_confs(), nc);
-        let apply_softmax = options.apply_softmax.unwrap_or_default();

        Ok(Self {
            engine,
            height,
            width,
            batch,
-            nc,
            ts,
            spec,
            processor,
-            confs,
            names,
            apply_softmax,
+            topk,
        })
    }
-}

-impl ImageClassifier {
    pub fn summary(&mut self) {
        self.ts.summary();
    }
@@ -113,7 +95,7 @@ impl ImageClassifier {
                let probs = Prob::new_probs(
                    &logits.into_raw_vec_and_offset().0,
                    Some(&self.names.iter().map(|x| x.as_str()).collect::<Vec<_>>()),
-                    3,
+                    self.topk,
                );

                Some(Y::default().with_probs(&probs))
--- a/src/models/rfdetr/config.rs
+++ b/src/models/rfdetr/config.rs
@@ -1,18 +1,17 @@
 use crate::NAMES_COCO_91;

 /// Model configuration for `RT-DETR`
-impl crate::Options {
+impl crate::Config {
    pub fn rfdetr() -> Self {
        Self::default()
-            .with_model_name("rfdetr")
-            .with_batch_size(1)
+            .with_name("rfdetr")
+            .with_model_ixx(0, 0, 1.into())
+            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 560.into())
            .with_model_ixx(0, 3, 560.into())
            .with_resize_mode(crate::ResizeMode::FitAdaptive)
-            .with_normalize(true)
            .with_image_mean(&[0.485, 0.456, 0.406])
            .with_image_std(&[0.229, 0.224, 0.225])
-            .with_class_confs(&[0.25])
            .with_class_names(&NAMES_COCO_91)
    }

--- a/src/models/rfdetr/impl.rs
+++ b/src/models/rfdetr/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use ndarray::{s, Axis};
 use rayon::prelude::*;

-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, Y};

 #[derive(Debug, Builder)]
 pub struct RFDETR {
@@ -19,8 +19,8 @@ pub struct RFDETR {
 }

 impl RFDETR {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let (batch, height, width, ts) = (
            engine.batch().opt(),
            engine.try_height().unwrap_or(&560.into()).opt(),
@@ -28,16 +28,11 @@ impl RFDETR {
            engine.ts.clone(),
        );
        let spec = engine.spec().to_owned();
-        let processor = options
-            .to_processor()?
+        let names: Vec<String> = config.class_names().to_vec();
+        let confs = DynConf::new(config.class_confs(), names.len());
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);
-        let names = options
-            .class_names()
-            .expect("No class names specified.")
-            .to_vec();
-        let confs = DynConf::new(options.class_confs(), names.len());
-
        Ok(Self {
            engine,
            height,
@@ -107,14 +102,15 @@ impl RFDETR {
                        let y = cy - h / 2.;
                        let x = x.max(0.0).min(image_width as _);
                        let y = y.max(0.0).min(image_height as _);
+                        let mut hbb = Hbb::default()
+                            .with_xywh(x, y, w, h)
+                            .with_confidence(conf)
+                            .with_id(class_id as _);
+                        if !self.names.is_empty() {
+                            hbb = hbb.with_name(&self.names[class_id]);
+                        }

-                        Some(
-                            Hbb::default()
-                                .with_xywh(x, y, w, h)
-                                .with_confidence(conf)
-                                .with_id(class_id as _)
-                                .with_name(&self.names[class_id]),
-                        )
+                        Some(hbb)
                    })
                    .collect();

--- a/src/models/rmbg/config.rs
+++ b/src/models/rmbg/config.rs
@@ -1,9 +1,10 @@
 /// Model configuration for `RMBG`
-impl crate::Options {
+impl crate::Config {
    pub fn rmbg() -> Self {
        Self::default()
-            .with_model_name("rmbg")
+            .with_name("rmbg")
            .with_model_ixx(0, 0, 1.into())
+            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 1024.into())
            .with_model_ixx(0, 3, 1024.into())
    }
--- a/src/models/rmbg/impl.rs
+++ b/src/models/rmbg/impl.rs
@@ -1,7 +1,7 @@
 use aksr::Builder;
 use anyhow::Result;

-use crate::{elapsed, Engine, Image, Mask, Ops, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, Engine, Image, Mask, Ops, Processor, Ts, Xs, Y};

 #[derive(Builder, Debug)]
 pub struct RMBG {
@@ -15,8 +15,8 @@ pub struct RMBG {
 }

 impl RMBG {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let spec = engine.spec().to_string();
        let (batch, height, width, ts) = (
            engine.batch().opt(),
@@ -24,8 +24,7 @@ impl RMBG {
            engine.try_width().unwrap_or(&1024.into()).opt(),
            engine.ts().clone(),
        );
-        let processor = options
-            .to_processor()?
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);

@@ -63,7 +62,6 @@ impl RMBG {
    fn postprocess(&mut self, xs: Xs) -> Result<Vec<Y>> {
        let mut ys: Vec<Y> = Vec::new();
        for (idx, luma) in xs[0].axis_iter(ndarray::Axis(0)).enumerate() {
-            // image size
            let (h1, w1) = (
                self.processor.images_transform_info[idx].height_src,
                self.processor.images_transform_info[idx].width_src,
--- a/src/models/rtdetr/config.rs
+++ b/src/models/rtdetr/config.rs
@@ -1,15 +1,15 @@
 use crate::NAMES_COCO_80;

 /// Model configuration for `RT-DETR`
-impl crate::Options {
+impl crate::Config {
    pub fn rtdetr() -> Self {
        Self::default()
-            .with_model_name("rtdetr")
-            .with_batch_size(1)
+            .with_name("rtdetr")
+            .with_model_ixx(0, 0, 1.into())
+            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 640.into())
            .with_model_ixx(0, 3, 640.into())
            .with_resize_mode(crate::ResizeMode::FitAdaptive)
-            .with_normalize(true)
            .with_class_confs(&[0.5])
            .with_class_names(&NAMES_COCO_80)
    }
--- a/src/models/rtdetr/impl.rs
+++ b/src/models/rtdetr/impl.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use ndarray::{s, Axis};
 use rayon::prelude::*;

-use crate::{elapsed, DynConf, Engine, Hbb, Image, Options, Processor, Ts, Xs, X, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Processor, Ts, Xs, X, Y};

 #[derive(Debug, Builder)]
 pub struct RTDETR {
@@ -19,8 +19,8 @@ pub struct RTDETR {
 }

 impl RTDETR {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let (batch, height, width, ts) = (
            engine.batch().opt(),
            engine.try_height().unwrap_or(&640.into()).opt(),
@@ -28,15 +28,11 @@ impl RTDETR {
            engine.ts.clone(),
        );
        let spec = engine.spec().to_owned();
-        let processor = options
-            .to_processor()?
+        let names: Vec<String> = config.class_names().to_vec();
+        let confs = DynConf::new(config.class_confs(), names.len());
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);
-        let names = options
-            .class_names()
-            .expect("No class names specified.")
-            .to_vec();
-        let confs = DynConf::new(options.class_confs(), names.len());

        Ok(Self {
            engine,
@@ -87,14 +83,12 @@ impl RTDETR {
            .enumerate()
            .filter_map(|(idx, ((labels, boxes), scores))| {
                let ratio = self.processor.images_transform_info[idx].height_scale;
-
                let mut y_bboxes = Vec::new();
                for (i, &score) in scores.iter().enumerate() {
                    let class_id = labels[i] as usize;
                    if score < self.confs[class_id] {
                        continue;
                    }
-
                    let xyxy = boxes.slice(s![i, ..]);
                    let (x1, y1, x2, y2) = (
                        xyxy[0] / ratio,
@@ -102,14 +96,14 @@ impl RTDETR {
                        xyxy[2] / ratio,
                        xyxy[3] / ratio,
                    );
-
-                    y_bboxes.push(
-                        Hbb::default()
-                            .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
-                            .with_confidence(score)
-                            .with_id(class_id)
-                            .with_name(&self.names[class_id]),
-                    );
+                    let mut hbb = Hbb::default()
+                        .with_xyxy(x1.max(0.0f32), y1.max(0.0f32), x2, y2)
+                        .with_confidence(score)
+                        .with_id(class_id);
+                    if !self.names.is_empty() {
+                        hbb = hbb.with_name(&self.names[class_id]);
+                    }
+                    y_bboxes.push(hbb);
                }

                let mut y = Y::default();
--- a/src/models/rtmo/config.rs
+++ b/src/models/rtmo/config.rs
@@ -1,9 +1,10 @@
 /// Model configuration for `RTMO`
-impl crate::Options {
+impl crate::Config {
    pub fn rtmo() -> Self {
        Self::default()
-            .with_model_name("rtmo")
+            .with_name("rtmo")
            .with_model_ixx(0, 0, 1.into())
+            .with_model_ixx(0, 1, 3.into())
            .with_model_ixx(0, 2, 640.into())
            .with_model_ixx(0, 3, 640.into())
            .with_resize_mode(crate::ResizeMode::FitAdaptive)
--- a/src/models/rtmo/impl.rs
+++ b/src/models/rtmo/impl.rs
@@ -2,7 +2,7 @@ use aksr::Builder;
 use anyhow::Result;
 use ndarray::Axis;

-use crate::{elapsed, DynConf, Engine, Hbb, Image, Keypoint, Options, Processor, Ts, Xs, Y};
+use crate::{elapsed, Config, DynConf, Engine, Hbb, Image, Keypoint, Processor, Ts, Xs, Y};

 #[derive(Builder, Debug)]
 pub struct RTMO {
@@ -18,8 +18,8 @@ pub struct RTMO {
 }

 impl RTMO {
-    pub fn new(options: Options) -> Result<Self> {
-        let engine = options.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let engine = Engine::try_from_config(&config.model)?;
        let spec = engine.spec().to_string();
        let (batch, height, width, ts) = (
            engine.batch().opt(),
@@ -27,15 +27,13 @@ impl RTMO {
            engine.try_width().unwrap_or(&512.into()).opt(),
            engine.ts().clone(),
        );
-        let processor = options
-            .to_processor()?
+        let nk = config.nk().unwrap_or(17);
+        let confs = DynConf::new(config.class_confs(), 1);
+        let kconfs = DynConf::new(config.keypoint_confs(), nk);
+        let processor = Processor::try_from_config(&config.processor)?
            .with_image_width(width as _)
            .with_image_height(height as _);

-        let nk = options.nk().unwrap_or(17);
-        let confs = DynConf::new(options.class_confs(), 1);
-        let kconfs = DynConf::new(options.keypoint_confs(), nk);
-
        Ok(Self {
            engine,
            height,
--- a/src/models/sam/config.rs
+++ b/src/models/sam/config.rs
@@ -1,100 +1,73 @@
-use crate::{models::SamKind, Options};
+use crate::{models::SamKind, Config};

 /// Model configuration for `Segment Anything Model`
-impl Options {
+impl Config {
    pub fn sam() -> Self {
        Self::default()
-            .with_model_name("sam")
-            .with_model_ixx(0, 0, 1.into())
-    }
-
-    pub fn sam_encoder() -> Self {
-        Self::sam()
-            .with_model_ixx(0, 2, 1024.into())
-            .with_model_ixx(0, 3, 1024.into())
+            .with_name("sam")
+            .with_encoder_ixx(0, 0, 1.into())
+            .with_encoder_ixx(0, 1, 3.into())
+            .with_encoder_ixx(0, 2, 1024.into())
+            .with_encoder_ixx(0, 3, 1024.into())
            .with_resize_mode(crate::ResizeMode::FitAdaptive)
            .with_resize_filter("Bilinear")
            .with_image_mean(&[123.5, 116.5, 103.5])
            .with_image_std(&[58.5, 57.0, 57.5])
            .with_normalize(false)
            .with_sam_kind(SamKind::Sam)
-            .with_low_res_mask(false)
+            .with_sam_low_res_mask(false)
            .with_find_contours(true)
    }

-    pub fn sam_decoder() -> Self {
+    pub fn sam_v1_base() -> Self {
        Self::sam()
+            .with_encoder_file("sam-vit-b-encoder.onnx")
+            .with_decoder_file("sam-vit-b-decoder.onnx")
    }

-    pub fn sam_v1_base_encoder() -> Self {
-        Self::sam_encoder().with_model_file("sam-vit-b-encoder.onnx")
+    // pub fn sam_v1_base_singlemask_decoder() -> Self {
+    //     Self::sam().with_decoder_file("sam-vit-b-decoder-singlemask.onnx")
+    // }
+
+    pub fn sam2_tiny() -> Self {
+        Self::sam()
+            .with_encoder_file("sam2-hiera-tiny-encoder.onnx")
+            .with_sam_kind(SamKind::Sam2)
+            .with_decoder_file("sam2-hiera-tiny-decoder.onnx")
    }

-    pub fn sam_v1_base_decoder() -> Self {
-        Self::sam_decoder().with_model_file("sam-vit-b-decoder.onnx")
-    }
-
-    pub fn sam_v1_base_singlemask_decoder() -> Self {
-        Self::sam_decoder().with_model_file("sam-vit-b-decoder-singlemask.onnx")
-    }
-
-    pub fn sam2_tiny_encoder() -> Self {
-        Self::sam_encoder()
-            .with_model_file("sam2-hiera-tiny-encoder.onnx")
+    pub fn sam2_small() -> Self {
+        Self::sam()
+            .with_encoder_file("sam2-hiera-small-encoder.onnx")
+            .with_decoder_file("sam2-hiera-small-decoder.onnx")
            .with_sam_kind(SamKind::Sam2)
    }

-    pub fn sam2_tiny_decoder() -> Self {
-        Self::sam_decoder().with_model_file("sam2-hiera-tiny-decoder.onnx")
-    }
-
-    pub fn sam2_small_encoder() -> Self {
-        Self::sam_encoder()
-            .with_model_file("sam2-hiera-small-encoder.onnx")
+    pub fn sam2_base_plus() -> Self {
+        Self::sam()
+            .with_encoder_file("sam2-hiera-base-plus-encoder.onnx")
+            .with_decoder_file("sam2-hiera-base-plus-decoder.onnx")
            .with_sam_kind(SamKind::Sam2)
    }

-    pub fn sam2_small_decoder() -> Self {
-        Self::sam_decoder().with_model_file("sam2-hiera-small-decoder.onnx")
-    }
-
-    pub fn sam2_base_plus_encoder() -> Self {
-        Self::sam_encoder()
-            .with_model_file("sam2-hiera-base-plus-encoder.onnx")
-            .with_sam_kind(SamKind::Sam2)
-    }
-
-    pub fn sam2_base_plus_decoder() -> Self {
-        Self::sam_decoder().with_model_file("sam2-hiera-base-plus-decoder.onnx")
-    }
-
-    pub fn mobile_sam_tiny_encoder() -> Self {
-        Self::sam_encoder()
-            .with_model_file("mobile-sam-vit-t-encoder.onnx")
+    pub fn mobile_sam_tiny() -> Self {
+        Self::sam()
+            .with_encoder_file("mobile-sam-vit-t-encoder.onnx")
            .with_sam_kind(SamKind::MobileSam)
+            .with_decoder_file("mobile-sam-vit-t-decoder.onnx")
    }

-    pub fn mobile_sam_tiny_decoder() -> Self {
-        Self::sam_decoder().with_model_file("mobile-sam-vit-t-decoder.onnx")
-    }
-
-    pub fn sam_hq_tiny_encoder() -> Self {
-        Self::sam_encoder()
-            .with_model_file("sam-hq-vit-t-encoder.onnx")
+    pub fn sam_hq_tiny() -> Self {
+        Self::sam()
+            .with_encoder_file("sam-hq-vit-t-encoder.onnx")
            .with_sam_kind(SamKind::SamHq)
+            .with_decoder_file("sam-hq-vit-t-decoder.onnx")
    }

-    pub fn sam_hq_tiny_decoder() -> Self {
-        Self::sam_decoder().with_model_file("sam-hq-vit-t-decoder.onnx")
-    }
-
-    pub fn edge_sam_3x_encoder() -> Self {
-        Self::sam_encoder()
-            .with_model_file("edge-sam-3x-encoder.onnx")
+    pub fn edge_sam_3x() -> Self {
+        Self::sam()
+            .with_encoder_file("edge-sam-3x-encoder.onnx")
+            .with_decoder_file("edge-sam-3x-decoder.onnx")
            .with_sam_kind(SamKind::EdgeSam)
    }
-
-    pub fn edge_sam_3x_decoder() -> Self {
-        Self::sam_decoder().with_model_file("edge-sam-3x-decoder.onnx")
-    }
 }
--- a/src/models/sam/impl.rs
+++ b/src/models/sam/impl.rs
@@ -4,8 +4,7 @@ use ndarray::{s, Axis};
 use rand::prelude::*;

 use crate::{
-    elapsed, DynConf, Engine, Image, Mask, Ops, Options, Polygon, Processor, SamPrompt, Ts, Xs, X,
-    Y,
+    elapsed, Config, DynConf, Engine, Image, Mask, Ops, Polygon, Processor, SamPrompt, Ts, Xs, X, Y,
 };

 #[derive(Debug, Clone)]
@@ -49,9 +48,10 @@ pub struct SAM {
 }

 impl SAM {
-    pub fn new(options_encoder: Options, options_decoder: Options) -> Result<Self> {
-        let encoder = options_encoder.to_engine()?;
-        let decoder = options_decoder.to_engine()?;
+    pub fn new(config: Config) -> Result<Self> {
+        let encoder = Engine::try_from_config(&config.encoder)?;
+        let decoder = Engine::try_from_config(&config.decoder)?;
+
        let (batch, height, width) = (
            encoder.batch().opt(),
            encoder.try_height().unwrap_or(&1024.into()).opt(),
@@ -60,24 +60,23 @@ impl SAM {
        let ts = Ts::merge(&[encoder.ts(), decoder.ts()]);
        let spec = encoder.spec().to_owned();

-        let processor = options_encoder
-            .to_processor()?
-            .with_image_width(width as _)
-            .with_image_height(height as _);
-
-        let conf = DynConf::new(options_encoder.class_confs(), 1);
-        let find_contours = options_encoder.find_contours;
-        let kind = match options_encoder.sam_kind {
+        let conf = DynConf::new(config.class_confs(), 1);
+        let find_contours = config.find_contours;
+        let kind = match config.sam_kind {
            Some(x) => x,
            None => anyhow::bail!("Error: no clear `SamKind` specified."),
        };
        let use_low_res_mask = match kind {
            SamKind::Sam | SamKind::MobileSam | SamKind::SamHq => {
-                options_encoder.low_res_mask.unwrap_or(false)
+                config.sam_low_res_mask.unwrap_or(false)
            }
            SamKind::EdgeSam | SamKind::Sam2 => true,
        };

+        let processor = Processor::try_from_config(&config.processor)?
+            .with_image_width(width as _)
+            .with_image_height(height as _);
+
        Ok(Self {
            encoder,
            decoder,
--- a/src/models/sam2/config.rs
+++ b/src/models/sam2/config.rs
@@ -1,50 +1,28 @@
-use crate::Options;
+use crate::Config;

 /// Model configuration for `SAM2.1`
-impl Options {
-    pub fn sam2_encoder() -> Self {
+impl Config {
+    pub fn sam2_1_tiny() -> Self {
        Self::sam()
-            .with_model_ixx(0, 2, 1024.into())
-            .with_model_ixx(0, 3, 1024.into())
-            .with_resize_mode(crate::ResizeMode::FitAdaptive)
-            .with_resize_filter("Bilinear")
-            .with_image_mean(&[0.485, 0.456, 0.406])
-            .with_image_std(&[0.229, 0.224, 0.225])
+            .with_encoder_file("sam2.1-hiera-tiny-encoder.onnx")
+            .with_decoder_file("sam2.1-hiera-tiny-decoder.onnx")
    }

-    pub fn sam2_decoder() -> Self {
+    pub fn sam2_1_small() -> Self {
        Self::sam()
+            .with_encoder_file("sam2.1-hiera-small-encoder.onnx")
+            .with_decoder_file("sam2.1-hiera-small-decoder.onnx")
    }

-    pub fn sam2_1_tiny_encoder() -> Self {
-        Self::sam2_encoder().with_model_file("sam2.1-hiera-tiny-encoder.onnx")
+    pub fn sam2_1_base_plus() -> Self {
+        Self::sam()
+            .with_encoder_file("sam2.1-hiera-base-plus-encoder.onnx")
+            .with_decoder_file("sam2.1-hiera-base-plus-decoder.onnx")
    }

-    pub fn sam2_1_tiny_decoder() -> Self {
-        Self::sam2_decoder().with_model_file("sam2.1-hiera-tiny-decoder.onnx")
-    }
-
-    pub fn sam2_1_small_encoder() -> Self {
-        Self::sam2_encoder().with_model_file("sam2.1-hiera-small-encoder.onnx")
-    }
-
-    pub fn sam2_1_small_decoder() -> Self {
-        Self::sam2_decoder().with_model_file("sam2.1-hiera-small-decoder.onnx")
-    }
-
-    pub fn sam2_1_base_plus_encoder() -> Self {
-        Self::sam2_encoder().with_model_file("sam2.1-hiera-base-plus-encoder.onnx")
-    }
-
-    pub fn sam2_1_base_plus_decoder() -> Self {
-        Self::sam2_decoder().with_model_file("sam2.1-hiera-base-plus-decoder.onnx")
-    }
-
-    pub fn sam2_1_large_encoder() -> Self {
-        Self::sam2_encoder().with_model_file("sam2.1-hiera-large-encoder.onnx")
-    }
-
-    pub fn sam2_1_large_decoder() -> Self {
-        Self::sam2_decoder().with_model_file("sam2.1-hiera-large-decoder.onnx")
+    pub fn sam2_1_large() -> Self {
+        Self::sam()
+            .with_encoder_file("sam2.1-hiera-large-encoder.onnx")
+            .with_decoder_file("sam2.1-hiera-large-decoder.onnx")
    }
 }
--- a/Show More
+++ b/Show More