mirror of
https://github.com/mii443/usls.git
synced 2025-08-22 15:45:41 +00:00
158 lines
5.1 KiB
Rust
158 lines
5.1 KiB
Rust
use anyhow::Result;
|
|
use usls::{models::Moondream2, Annotator, DataLoader, Options, Scale, Task};
|
|
|
|
#[derive(argh::FromArgs)]
|
|
/// Example
|
|
struct Args {
|
|
/// device
|
|
#[argh(option, default = "String::from(\"cpu:0\")")]
|
|
device: String,
|
|
|
|
/// source image
|
|
#[argh(
|
|
option,
|
|
default = "vec![
|
|
String::from(\"./assets/bus.jpg\"),
|
|
String::from(\"images/green-car.jpg\"),
|
|
]"
|
|
)]
|
|
source: Vec<String>,
|
|
|
|
/// dtype
|
|
#[argh(option, default = "String::from(\"int4\")")]
|
|
dtype: String,
|
|
|
|
/// scale
|
|
#[argh(option, default = "String::from(\"0.5b\")")]
|
|
scale: String,
|
|
|
|
/// task
|
|
#[argh(option, default = "String::from(\"Caption: 0\")")]
|
|
task: String,
|
|
}
|
|
|
|
fn main() -> Result<()> {
|
|
tracing_subscriber::fmt()
|
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
|
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
|
.init();
|
|
let args: Args = argh::from_env();
|
|
|
|
// build model
|
|
let (
|
|
options_vision_encoder,
|
|
options_vision_projection,
|
|
options_text_decoder,
|
|
options_text_encoder,
|
|
options_coord_decoder,
|
|
options_coord_encoder,
|
|
options_size_decoder,
|
|
options_size_encoder,
|
|
) = match args.scale.as_str().try_into()? {
|
|
Scale::Billion(2.) => (
|
|
Options::moondream2_2b_vision_encoder(),
|
|
Options::moondream2_2b_vision_projection(),
|
|
Options::moondream2_2b_text_decoder(),
|
|
Options::moondream2_2b_text_encoder(),
|
|
Options::moondream2_2b_coord_decoder(),
|
|
Options::moondream2_2b_coord_encoder(),
|
|
Options::moondream2_2b_size_decoder(),
|
|
Options::moondream2_2b_size_encoder(),
|
|
),
|
|
Scale::Billion(0.5) => (
|
|
Options::moondream2_0_5b_vision_encoder(),
|
|
Options::moondream2_0_5b_vision_projection(),
|
|
Options::moondream2_0_5b_text_decoder(),
|
|
Options::moondream2_0_5b_text_encoder(),
|
|
Options::moondream2_0_5b_coord_decoder(),
|
|
Options::moondream2_0_5b_coord_encoder(),
|
|
Options::moondream2_0_5b_size_decoder(),
|
|
Options::moondream2_0_5b_size_encoder(),
|
|
),
|
|
_ => unimplemented!(),
|
|
};
|
|
|
|
let mut model = Moondream2::new(
|
|
options_vision_encoder
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
options_vision_projection
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
options_text_encoder
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
options_text_decoder
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
Some(
|
|
options_coord_encoder
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
),
|
|
Some(
|
|
options_coord_decoder
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
),
|
|
Some(
|
|
options_size_encoder
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
),
|
|
Some(
|
|
options_size_decoder
|
|
.with_model_dtype(args.dtype.as_str().try_into()?)
|
|
.with_model_device(args.device.as_str().try_into()?)
|
|
.commit()?,
|
|
),
|
|
)?;
|
|
|
|
// load images
|
|
let xs = DataLoader::try_read_batch(&args.source)?;
|
|
|
|
// run with task
|
|
let task: Task = args.task.as_str().try_into()?;
|
|
let ys = model.forward(&xs, &task)?;
|
|
|
|
// annotate
|
|
match task {
|
|
Task::Caption(_) => {
|
|
println!("{}:", task);
|
|
for (i, y) in ys.iter().enumerate() {
|
|
if let Some(texts) = y.texts() {
|
|
println!("Image {}: {:?}\n", i, texts[0]);
|
|
}
|
|
}
|
|
}
|
|
Task::Vqa(query) => {
|
|
println!("Question: {}", query);
|
|
for (i, y) in ys.iter().enumerate() {
|
|
if let Some(texts) = y.texts() {
|
|
println!("Image {}: {:?}\n", i, texts[0]);
|
|
}
|
|
}
|
|
}
|
|
Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => {
|
|
println!("{:?}", ys);
|
|
let annotator = Annotator::default()
|
|
.with_bboxes_thickness(4)
|
|
.without_bboxes_conf(true)
|
|
.with_keypoints_radius(6)
|
|
.with_keypoints_name(true)
|
|
.with_saveout("moondream2");
|
|
annotator.annotate(&xs, &ys);
|
|
}
|
|
_ => unimplemented!("Unsupported moondream2 task."),
|
|
}
|
|
|
|
Ok(())
|
|
}
|