Adjust outputs (#16)

This commit is contained in:
Jamjamjon
2024-06-01 16:58:17 +08:00
committed by GitHub
parent a65e2aff7b
commit 20577e4242
9 changed files with 40 additions and 28 deletions

View File

@ -39,7 +39,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let feats_image = model.encode_images(&images).unwrap();
// use image to query texts
let matrix = feats_image.dot2(&feats_text)?;
let matrix = match feats_image.embedding() {
Some(x) => x.dot2(feats_text.embedding().unwrap())?,
None => continue,
};
// summary
for i in 0..paths.len() {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 134 KiB

View File

@ -1,4 +1,4 @@
use usls::{models::Dinov2, Options};
use usls::{models::Dinov2, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model
@ -7,8 +7,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i00((1, 1, 1).into())
.with_i02((224, 224, 224).into())
.with_i03((224, 224, 224).into());
let _model = Dinov2::new(options)?;
println!("TODO...");
let mut model = Dinov2::new(options)?;
let x = vec![DataLoader::try_read("./examples/dinov2/images/1.jpg")?];
let y = model.run(&x)?;
println!("{y:?}");
// query from vector
// let ys = model.query_from_vec(

View File

@ -4,11 +4,6 @@ use std::time::Duration;
pub struct Ts {
n: usize,
ts: Vec<Duration>,
// pub t0: Duration,
// pub t1: Duration,
// pub t2: Duration,
// pub t3: Duration,
// pub t4: Duration,
}
impl Ts {

View File

@ -42,7 +42,7 @@ impl Blip {
})
}
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> {
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
let xs_ = ops::resize(
xs,
self.height.opt as u32,
@ -56,7 +56,7 @@ impl Blip {
&[0.26862954, 0.2613026, 0.2757771],
);
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
Ok(Embedding::new(ys[0].to_owned()))
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
}
pub fn caption(
@ -67,8 +67,9 @@ impl Blip {
) -> Result<Vec<Y>> {
let mut ys: Vec<Y> = Vec::new();
let image_embeds = self.encode_images(x)?;
let image_embeds = image_embeds.embedding().unwrap();
let image_embeds_attn_mask: Array<f32, IxDyn> =
Array::ones((1, image_embeds.embedding().shape()[1])).into_dyn();
Array::ones((1, image_embeds.data().shape()[1])).into_dyn();
let mut y_text = String::new();
// conditional
@ -104,7 +105,7 @@ impl Blip {
let y = self.textual.run(&[
input_ids_nd,
input_ids_attn_mask,
image_embeds.embedding().to_owned(),
image_embeds.data().to_owned(),
image_embeds_attn_mask.to_owned(),
])?; // N, length, vocab_size
let y = y[0].slice(s!(0, -1.., ..));

View File

@ -1,4 +1,4 @@
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine};
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
use anyhow::Result;
use image::DynamicImage;
use ndarray::{Array, Array2, IxDyn};
@ -52,7 +52,7 @@ impl Clip {
})
}
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> {
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
let xs_ = ops::resize(
xs,
self.height.opt as u32,
@ -66,10 +66,10 @@ impl Clip {
&[0.26862954, 0.2613026, 0.2757771],
);
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
Ok(Embedding::new(ys[0].to_owned()))
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
}
pub fn encode_texts(&mut self, texts: &[String]) -> Result<Embedding> {
pub fn encode_texts(&mut self, texts: &[String]) -> Result<Y> {
let encodings = self
.tokenizer
.encode_batch(texts.to_owned(), false)
@ -80,7 +80,7 @@ impl Clip {
.collect();
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
let ys = self.textual.run(&[xs])?;
Ok(Embedding::new(ys[0].to_owned()))
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
}
pub fn batch_visual(&self) -> usize {

View File

@ -1,4 +1,4 @@
use crate::{ops, MinOptMax, Options, OrtEngine};
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
use anyhow::Result;
use image::DynamicImage;
use ndarray::{Array, IxDyn};
@ -48,22 +48,21 @@ impl Dinov2 {
})
}
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Y> {
let xs_ = ops::resize(
xs,
self.height.opt as u32,
self.width.opt as u32,
"lanczos3",
)?;
let xs_ = ops::normalize(xs_, 0.0, 255.0);
let xs_ = ops::normalize(xs_, 0., 255.);
let xs_ = ops::standardize(
xs_,
&[0.48145466, 0.4578275, 0.40821073],
&[0.26862954, 0.2613026, 0.2757771],
);
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
let ys = ys[0].to_owned();
Ok(ys)
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
}
// pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {

View File

@ -7,9 +7,7 @@ pub struct Embedding(Array<f32, IxDyn>);
impl std::fmt::Debug for Embedding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Embedding")
.field("Shape", &self.0.shape())
.finish()
f.debug_struct("").field("Shape", &self.0.shape()).finish()
}
}
@ -23,7 +21,7 @@ impl Embedding {
self
}
pub fn embedding(&self) -> &Array<f32, IxDyn> {
pub fn data(&self) -> &Array<f32, IxDyn> {
&self.0
}

View File

@ -1,4 +1,4 @@
use crate::{Bbox, Keypoint, Mask, Mbr, Polygon, Prob};
use crate::{Bbox, Embedding, Keypoint, Mask, Mbr, Polygon, Prob};
#[derive(Clone, PartialEq, Default)]
pub struct Y {
@ -9,6 +9,7 @@ pub struct Y {
polygons: Option<Vec<Polygon>>,
texts: Option<Vec<String>>,
masks: Option<Vec<Mask>>,
embedding: Option<Embedding>,
}
impl std::fmt::Debug for Y {
@ -47,6 +48,9 @@ impl std::fmt::Debug for Y {
f.field("Masks", &x);
}
}
if let Some(x) = &self.embedding {
f.field("Embedding", &x);
}
f.finish()
}
}
@ -71,11 +75,17 @@ impl Y {
self.mbrs = Some(mbrs.to_vec());
self
}
pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self {
self.bboxes = Some(bboxes.to_vec());
self
}
pub fn with_embedding(mut self, embedding: Embedding) -> Self {
self.embedding = Some(embedding);
self
}
pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self {
self.keypoints = Some(keypoints.to_vec());
self
@ -114,6 +124,10 @@ impl Y {
self.texts.as_ref()
}
pub fn embedding(&self) -> Option<&Embedding> {
self.embedding.as_ref()
}
pub fn apply_bboxes_nms(mut self, iou_threshold: f32) -> Self {
match &mut self.bboxes {
None => self,