🐍 v0.1.0 (#53)

This commit is contained in:
Jamjamjon
2025-01-12 16:59:57 +08:00
committed by GitHub
parent 4e932c4910
commit 0f2d84b8c5
256 changed files with 12485 additions and 9088 deletions

View File

@@ -3,18 +3,13 @@ This demo showcases how to use [CLIP](https://github.com/openai/CLIP) to compute
## Quick Start
```shell
cargo run -r --example clip
cargo run -r -F cuda --example clip -- --device cuda:0
```
## Results
```shell
(90.11472%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
[0.04573484, 0.0048218793, 0.0011618224, 0.90114725, 0.0036694852, 0.031348046, 0.0121166315]
(94.07785%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
[0.050406333, 0.0011632168, 0.0019338318, 0.0013227565, 0.003916758, 0.00047858112, 0.9407785]
(86.59852%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
[0.07032883, 0.00053773675, 0.0006372929, 0.06066096, 0.0007378078, 0.8659852, 0.0011121632]
```
(99.9675%) ./examples/clip/images/carrot.jpg => Some carrots
(99.93718%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
(100.0%) ./examples/clip/images/drink.jpg => Some people holding wine glasses in a restaurant
```

View File

Before

Width:  |  Height:  |  Size: 176 KiB

After

Width:  |  Height:  |  Size: 176 KiB

View File

@@ -1,43 +1,54 @@
use usls::{models::Clip, DataLoader, Options};
use anyhow::Result;
use usls::{models::Clip, DataLoader, Ops, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// visual
let options_visual = Options::default().with_model("clip/visual-base-dyn.onnx")?;
#[derive(argh::FromArgs)]
/// CLIP Example
struct Args {
/// device
#[argh(option, default = "String::from(\"cpu:0\")")]
device: String,
}
// textual
let options_textual = Options::default()
.with_model("clip/textual-base-dyn.onnx")?
.with_tokenizer("clip/tokenizer.json")?;
fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
let args: Args = argh::from_env();
// build model
let options_visual = Options::jina_clip_v1_visual()
// clip_vit_b32_visual()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let options_textual = Options::jina_clip_v1_textual()
// clip_vit_b32_textual()
.with_model_device(args.device.as_str().try_into()?)
.commit()?;
let mut model = Clip::new(options_visual, options_textual)?;
// texts
let texts = vec![
"A photo of a dinosaur ".to_string(),
"A photo of a cat".to_string(),
"A photo of a dog".to_string(),
"几个胡萝卜".to_string(),
"There are some playing cards on a striped table cloth".to_string(),
"There is a doll with red hair and a clock on a table".to_string(),
"Some people holding wine glasses in a restaurant".to_string(),
"A photo of a dinosaur",
"A photo of a cat",
"A photo of a dog",
"Some carrots",
"There are some playing cards on a striped table cloth",
"There is a doll with red hair and a clock on a table",
"Some people holding wine glasses in a restaurant",
];
let feats_text = model.encode_texts(&texts)?; // [n, ndim]
// load image
// load images
let dl = DataLoader::new("./examples/clip/images")?.build()?;
// loop
// run
for (images, paths) in dl {
let feats_image = model.encode_images(&images).unwrap();
let feats_image = model.encode_images(&images)?;
// use image to query texts
let matrix = match feats_image.embedding() {
Some(x) => x.dot2(feats_text.embedding().unwrap())?,
None => continue,
};
let matrix = Ops::dot2(&feats_image, &feats_text)?;
// summary
for i in 0..paths.len() {
let probs = &matrix[i];
let (id, &score) = probs
@@ -52,7 +63,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
paths[i].display(),
&texts[id]
);
println!("{:?}\n", probs);
}
}