mirror of
https://github.com/mii443/usls.git
synced 2025-12-03 11:08:20 +00:00
Add moondream2
* Add moondream2 * Update README.md
This commit is contained in:
10
examples/moondream2/README.md
Normal file
10
examples/moondream2/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
## Quick Start
|
||||
|
||||
```shell
|
||||
cargo run -r -F cuda --example moondream2 -- --device 'cuda:0' --dtype i8 --scale 2b --task vqa:"What's in this image?"
|
||||
cargo run -r -F cuda --example moondream2 -- --device 'cuda:0' --dtype i8 --scale 2b --task cap:0
|
||||
cargo run -r -F cuda --example moondream2 -- --device 'cuda:0' --dtype i8 --scale 2b --task cap:1
|
||||
cargo run -r -F cuda --example moondream2 -- --device 'cuda:0' --dtype i8 --scale 2b --task open-od:person
|
||||
cargo run -r -F cuda --example moondream2 -- --device 'cuda:0' --dtype i8 --scale 2b --task open-kpt:person
|
||||
```
|
||||
|
||||
157
examples/moondream2/main.rs
Normal file
157
examples/moondream2/main.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
use anyhow::Result;
|
||||
use usls::{models::Moondream2, Annotator, DataLoader, Options, Scale, Task};
|
||||
|
||||
#[derive(argh::FromArgs)]
|
||||
/// Example
|
||||
struct Args {
|
||||
/// device
|
||||
#[argh(option, default = "String::from(\"cpu:0\")")]
|
||||
device: String,
|
||||
|
||||
/// source image
|
||||
#[argh(
|
||||
option,
|
||||
default = "vec![
|
||||
String::from(\"./assets/bus.jpg\"),
|
||||
String::from(\"images/green-car.jpg\"),
|
||||
]"
|
||||
)]
|
||||
source: Vec<String>,
|
||||
|
||||
/// dtype
|
||||
#[argh(option, default = "String::from(\"int4\")")]
|
||||
dtype: String,
|
||||
|
||||
/// scale
|
||||
#[argh(option, default = "String::from(\"0.5b\")")]
|
||||
scale: String,
|
||||
|
||||
/// task
|
||||
#[argh(option, default = "String::from(\"Caption: 0\")")]
|
||||
task: String,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
|
||||
.init();
|
||||
let args: Args = argh::from_env();
|
||||
|
||||
// build model
|
||||
let (
|
||||
options_vision_encoder,
|
||||
options_vision_projection,
|
||||
options_text_decoder,
|
||||
options_text_encoder,
|
||||
options_coord_decoder,
|
||||
options_coord_encoder,
|
||||
options_size_decoder,
|
||||
options_size_encoder,
|
||||
) = match args.scale.as_str().try_into()? {
|
||||
Scale::Billion(2.) => (
|
||||
Options::moondream2_2b_vision_encoder(),
|
||||
Options::moondream2_2b_vision_projection(),
|
||||
Options::moondream2_2b_text_decoder(),
|
||||
Options::moondream2_2b_text_encoder(),
|
||||
Options::moondream2_2b_coord_decoder(),
|
||||
Options::moondream2_2b_coord_encoder(),
|
||||
Options::moondream2_2b_size_decoder(),
|
||||
Options::moondream2_2b_size_encoder(),
|
||||
),
|
||||
Scale::Billion(0.5) => (
|
||||
Options::moondream2_0_5b_vision_encoder(),
|
||||
Options::moondream2_0_5b_vision_projection(),
|
||||
Options::moondream2_0_5b_text_decoder(),
|
||||
Options::moondream2_0_5b_text_encoder(),
|
||||
Options::moondream2_0_5b_coord_decoder(),
|
||||
Options::moondream2_0_5b_coord_encoder(),
|
||||
Options::moondream2_0_5b_size_decoder(),
|
||||
Options::moondream2_0_5b_size_encoder(),
|
||||
),
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
|
||||
let mut model = Moondream2::new(
|
||||
options_vision_encoder
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
options_vision_projection
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
options_text_encoder
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
options_text_decoder
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
Some(
|
||||
options_coord_encoder
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
),
|
||||
Some(
|
||||
options_coord_decoder
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
),
|
||||
Some(
|
||||
options_size_encoder
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
),
|
||||
Some(
|
||||
options_size_decoder
|
||||
.with_model_dtype(args.dtype.as_str().try_into()?)
|
||||
.with_model_device(args.device.as_str().try_into()?)
|
||||
.commit()?,
|
||||
),
|
||||
)?;
|
||||
|
||||
// load images
|
||||
let xs = DataLoader::try_read_batch(&args.source)?;
|
||||
|
||||
// run with task
|
||||
let task: Task = args.task.as_str().try_into()?;
|
||||
let ys = model.forward(&xs, &task)?;
|
||||
|
||||
// annotate
|
||||
match task {
|
||||
Task::Caption(_) => {
|
||||
println!("{}:", task);
|
||||
for (i, y) in ys.iter().enumerate() {
|
||||
if let Some(texts) = y.texts() {
|
||||
println!("Image {}: {:?}\n", i, texts[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
Task::Vqa(query) => {
|
||||
println!("Question: {}", query);
|
||||
for (i, y) in ys.iter().enumerate() {
|
||||
if let Some(texts) = y.texts() {
|
||||
println!("Image {}: {:?}\n", i, texts[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
Task::OpenSetDetection(_) | Task::OpenSetKeypointsDetection(_) => {
|
||||
println!("{:?}", ys);
|
||||
let annotator = Annotator::default()
|
||||
.with_bboxes_thickness(4)
|
||||
.without_bboxes_conf(true)
|
||||
.with_keypoints_radius(6)
|
||||
.with_keypoints_name(true)
|
||||
.with_saveout("moondream2");
|
||||
annotator.annotate(&xs, &ys);
|
||||
}
|
||||
_ => unimplemented!("Unsupported moondream2 task."),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user