🐍 v0.1.0 (#53)

2025-12-03 02:58:22 +00:00 · 2025-01-12 16:59:57 +08:00
parent 4e932c4910
commit 0f2d84b8c5
256 changed files with 12485 additions and 9088 deletions
--- a/examples/clip/README.md
+++ b/examples/clip/README.md
@@ -3,18 +3,13 @@ This demo showcases how to use [CLIP](https://github.com/openai/CLIP) to compute
 ## Quick Start

 ```shell
-cargo run -r --example clip
+cargo run -r -F cuda --example clip -- --device cuda:0
 ```

 ## Results

 ```shell
-(90.11472%) ./examples/clip/images/carrot.jpg => 几个胡萝卜 
-[0.04573484, 0.0048218793, 0.0011618224, 0.90114725, 0.0036694852, 0.031348046, 0.0121166315]
-
-(94.07785%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant 
-[0.050406333, 0.0011632168, 0.0019338318, 0.0013227565, 0.003916758, 0.00047858112, 0.9407785]
-
-(86.59852%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table 
-[0.07032883, 0.00053773675, 0.0006372929, 0.06066096, 0.0007378078, 0.8659852, 0.0011121632]
-```
+(99.9675%) ./examples/clip/images/carrot.jpg => Some carrots
+(99.93718%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
+(100.0%) ./examples/clip/images/drink.jpg => Some people holding wine glasses in a restaurant 
+```
--- a/examples/clip/images/peoples.jpg
+++ b/examples/clip/images/peoples.jpg
--- a/examples/clip/main.rs
+++ b/examples/clip/main.rs
@@ -1,43 +1,54 @@
-use usls::{models::Clip, DataLoader, Options};
+use anyhow::Result;
+use usls::{models::Clip, DataLoader, Ops, Options};

-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // visual
-    let options_visual = Options::default().with_model("clip/visual-base-dyn.onnx")?;
+#[derive(argh::FromArgs)]
+/// CLIP Example
+struct Args {
+    /// device
+    #[argh(option, default = "String::from(\"cpu:0\")")]
+    device: String,
+}

-    // textual
-    let options_textual = Options::default()
-        .with_model("clip/textual-base-dyn.onnx")?
-        .with_tokenizer("clip/tokenizer.json")?;
+fn main() -> Result<()> {
+    tracing_subscriber::fmt()
+        .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+        .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+        .init();

+    let args: Args = argh::from_env();
    // build model
+    let options_visual = Options::jina_clip_v1_visual()
+        // clip_vit_b32_visual()
+        .with_model_device(args.device.as_str().try_into()?)
+        .commit()?;
+    let options_textual = Options::jina_clip_v1_textual()
+        // clip_vit_b32_textual()
+        .with_model_device(args.device.as_str().try_into()?)
+        .commit()?;
    let mut model = Clip::new(options_visual, options_textual)?;

    // texts
    let texts = vec![
-        "A photo of a dinosaur ".to_string(),
-        "A photo of a cat".to_string(),
-        "A photo of a dog".to_string(),
-        "几个胡萝卜".to_string(),
-        "There are some playing cards on a striped table cloth".to_string(),
-        "There is a doll with red hair and a clock on a table".to_string(),
-        "Some people holding wine glasses in a restaurant".to_string(),
+        "A photo of a dinosaur",
+        "A photo of a cat",
+        "A photo of a dog",
+        "Some carrots",
+        "There are some playing cards on a striped table cloth",
+        "There is a doll with red hair and a clock on a table",
+        "Some people holding wine glasses in a restaurant",
    ];
    let feats_text = model.encode_texts(&texts)?; // [n, ndim]

-    // load image
+    // load images
    let dl = DataLoader::new("./examples/clip/images")?.build()?;

-    // loop
+    // run
    for (images, paths) in dl {
-        let feats_image = model.encode_images(&images).unwrap();
+        let feats_image = model.encode_images(&images)?;

        // use image to query texts
-        let matrix = match feats_image.embedding() {
-            Some(x) => x.dot2(feats_text.embedding().unwrap())?,
-            None => continue,
-        };
+        let matrix = Ops::dot2(&feats_image, &feats_text)?;

-        // summary
        for i in 0..paths.len() {
            let probs = &matrix[i];
            let (id, &score) = probs
@@ -52,7 +63,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                paths[i].display(),
                &texts[id]
            );
-            println!("{:?}\n", probs);
        }
    }