Adjust outputs (#16)

This commit is contained in:
Jamjamjon
2024-06-01 16:58:17 +08:00
committed by GitHub
parent a65e2aff7b
commit 20577e4242
9 changed files with 40 additions and 28 deletions

View File

@@ -39,7 +39,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let feats_image = model.encode_images(&images).unwrap(); let feats_image = model.encode_images(&images).unwrap();
// use image to query texts // use image to query texts
let matrix = feats_image.dot2(&feats_text)?; let matrix = match feats_image.embedding() {
Some(x) => x.dot2(feats_text.embedding().unwrap())?,
None => continue,
};
// summary // summary
for i in 0..paths.len() { for i in 0..paths.len() {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 134 KiB

View File

@@ -1,4 +1,4 @@
use usls::{models::Dinov2, Options}; use usls::{models::Dinov2, DataLoader, Options};
fn main() -> Result<(), Box<dyn std::error::Error>> { fn main() -> Result<(), Box<dyn std::error::Error>> {
// build model // build model
@@ -7,8 +7,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_i00((1, 1, 1).into()) .with_i00((1, 1, 1).into())
.with_i02((224, 224, 224).into()) .with_i02((224, 224, 224).into())
.with_i03((224, 224, 224).into()); .with_i03((224, 224, 224).into());
let _model = Dinov2::new(options)?; let mut model = Dinov2::new(options)?;
println!("TODO..."); let x = vec![DataLoader::try_read("./examples/dinov2/images/1.jpg")?];
let y = model.run(&x)?;
println!("{y:?}");
// query from vector // query from vector
// let ys = model.query_from_vec( // let ys = model.query_from_vec(

View File

@@ -4,11 +4,6 @@ use std::time::Duration;
pub struct Ts { pub struct Ts {
n: usize, n: usize,
ts: Vec<Duration>, ts: Vec<Duration>,
// pub t0: Duration,
// pub t1: Duration,
// pub t2: Duration,
// pub t3: Duration,
// pub t4: Duration,
} }
impl Ts { impl Ts {

View File

@@ -42,7 +42,7 @@ impl Blip {
}) })
} }
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> { pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
let xs_ = ops::resize( let xs_ = ops::resize(
xs, xs,
self.height.opt as u32, self.height.opt as u32,
@@ -56,7 +56,7 @@ impl Blip {
&[0.26862954, 0.2613026, 0.2757771], &[0.26862954, 0.2613026, 0.2757771],
); );
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?; let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
Ok(Embedding::new(ys[0].to_owned())) Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
} }
pub fn caption( pub fn caption(
@@ -67,8 +67,9 @@ impl Blip {
) -> Result<Vec<Y>> { ) -> Result<Vec<Y>> {
let mut ys: Vec<Y> = Vec::new(); let mut ys: Vec<Y> = Vec::new();
let image_embeds = self.encode_images(x)?; let image_embeds = self.encode_images(x)?;
let image_embeds = image_embeds.embedding().unwrap();
let image_embeds_attn_mask: Array<f32, IxDyn> = let image_embeds_attn_mask: Array<f32, IxDyn> =
Array::ones((1, image_embeds.embedding().shape()[1])).into_dyn(); Array::ones((1, image_embeds.data().shape()[1])).into_dyn();
let mut y_text = String::new(); let mut y_text = String::new();
// conditional // conditional
@@ -104,7 +105,7 @@ impl Blip {
let y = self.textual.run(&[ let y = self.textual.run(&[
input_ids_nd, input_ids_nd,
input_ids_attn_mask, input_ids_attn_mask,
image_embeds.embedding().to_owned(), image_embeds.data().to_owned(),
image_embeds_attn_mask.to_owned(), image_embeds_attn_mask.to_owned(),
])?; // N, length, vocab_size ])?; // N, length, vocab_size
let y = y[0].slice(s!(0, -1.., ..)); let y = y[0].slice(s!(0, -1.., ..));

View File

@@ -1,4 +1,4 @@
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine}; use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
use anyhow::Result; use anyhow::Result;
use image::DynamicImage; use image::DynamicImage;
use ndarray::{Array, Array2, IxDyn}; use ndarray::{Array, Array2, IxDyn};
@@ -52,7 +52,7 @@ impl Clip {
}) })
} }
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> { pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
let xs_ = ops::resize( let xs_ = ops::resize(
xs, xs,
self.height.opt as u32, self.height.opt as u32,
@@ -66,10 +66,10 @@ impl Clip {
&[0.26862954, 0.2613026, 0.2757771], &[0.26862954, 0.2613026, 0.2757771],
); );
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?; let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
Ok(Embedding::new(ys[0].to_owned())) Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
} }
pub fn encode_texts(&mut self, texts: &[String]) -> Result<Embedding> { pub fn encode_texts(&mut self, texts: &[String]) -> Result<Y> {
let encodings = self let encodings = self
.tokenizer .tokenizer
.encode_batch(texts.to_owned(), false) .encode_batch(texts.to_owned(), false)
@@ -80,7 +80,7 @@ impl Clip {
.collect(); .collect();
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn(); let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
let ys = self.textual.run(&[xs])?; let ys = self.textual.run(&[xs])?;
Ok(Embedding::new(ys[0].to_owned())) Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
} }
pub fn batch_visual(&self) -> usize { pub fn batch_visual(&self) -> usize {

View File

@@ -1,4 +1,4 @@
use crate::{ops, MinOptMax, Options, OrtEngine}; use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
use anyhow::Result; use anyhow::Result;
use image::DynamicImage; use image::DynamicImage;
use ndarray::{Array, IxDyn}; use ndarray::{Array, IxDyn};
@@ -48,22 +48,21 @@ impl Dinov2 {
}) })
} }
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> { pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Y> {
let xs_ = ops::resize( let xs_ = ops::resize(
xs, xs,
self.height.opt as u32, self.height.opt as u32,
self.width.opt as u32, self.width.opt as u32,
"lanczos3", "lanczos3",
)?; )?;
let xs_ = ops::normalize(xs_, 0.0, 255.0); let xs_ = ops::normalize(xs_, 0., 255.);
let xs_ = ops::standardize( let xs_ = ops::standardize(
xs_, xs_,
&[0.48145466, 0.4578275, 0.40821073], &[0.48145466, 0.4578275, 0.40821073],
&[0.26862954, 0.2613026, 0.2757771], &[0.26862954, 0.2613026, 0.2757771],
); );
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?; let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
let ys = ys[0].to_owned(); Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
Ok(ys)
} }
// pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> { // pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {

View File

@@ -7,9 +7,7 @@ pub struct Embedding(Array<f32, IxDyn>);
impl std::fmt::Debug for Embedding { impl std::fmt::Debug for Embedding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Embedding") f.debug_struct("").field("Shape", &self.0.shape()).finish()
.field("Shape", &self.0.shape())
.finish()
} }
} }
@@ -23,7 +21,7 @@ impl Embedding {
self self
} }
pub fn embedding(&self) -> &Array<f32, IxDyn> { pub fn data(&self) -> &Array<f32, IxDyn> {
&self.0 &self.0
} }

View File

@@ -1,4 +1,4 @@
use crate::{Bbox, Keypoint, Mask, Mbr, Polygon, Prob}; use crate::{Bbox, Embedding, Keypoint, Mask, Mbr, Polygon, Prob};
#[derive(Clone, PartialEq, Default)] #[derive(Clone, PartialEq, Default)]
pub struct Y { pub struct Y {
@@ -9,6 +9,7 @@ pub struct Y {
polygons: Option<Vec<Polygon>>, polygons: Option<Vec<Polygon>>,
texts: Option<Vec<String>>, texts: Option<Vec<String>>,
masks: Option<Vec<Mask>>, masks: Option<Vec<Mask>>,
embedding: Option<Embedding>,
} }
impl std::fmt::Debug for Y { impl std::fmt::Debug for Y {
@@ -47,6 +48,9 @@ impl std::fmt::Debug for Y {
f.field("Masks", &x); f.field("Masks", &x);
} }
} }
if let Some(x) = &self.embedding {
f.field("Embedding", &x);
}
f.finish() f.finish()
} }
} }
@@ -71,11 +75,17 @@ impl Y {
self.mbrs = Some(mbrs.to_vec()); self.mbrs = Some(mbrs.to_vec());
self self
} }
pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self { pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self {
self.bboxes = Some(bboxes.to_vec()); self.bboxes = Some(bboxes.to_vec());
self self
} }
pub fn with_embedding(mut self, embedding: Embedding) -> Self {
self.embedding = Some(embedding);
self
}
pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self { pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self {
self.keypoints = Some(keypoints.to_vec()); self.keypoints = Some(keypoints.to_vec());
self self
@@ -114,6 +124,10 @@ impl Y {
self.texts.as_ref() self.texts.as_ref()
} }
pub fn embedding(&self) -> Option<&Embedding> {
self.embedding.as_ref()
}
pub fn apply_bboxes_nms(mut self, iou_threshold: f32) -> Self { pub fn apply_bboxes_nms(mut self, iou_threshold: f32) -> Self {
match &mut self.bboxes { match &mut self.bboxes {
None => self, None => self,