mirror of
https://github.com/mii443/usls.git
synced 2025-08-22 15:45:41 +00:00
Adjust outputs (#16)
This commit is contained in:
@ -39,7 +39,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let feats_image = model.encode_images(&images).unwrap();
|
||||
|
||||
// use image to query texts
|
||||
let matrix = feats_image.dot2(&feats_text)?;
|
||||
let matrix = match feats_image.embedding() {
|
||||
Some(x) => x.dot2(feats_text.embedding().unwrap())?,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// summary
|
||||
for i in 0..paths.len() {
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 134 KiB |
@ -1,4 +1,4 @@
|
||||
use usls::{models::Dinov2, Options};
|
||||
use usls::{models::Dinov2, DataLoader, Options};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// build model
|
||||
@ -7,8 +7,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
.with_i00((1, 1, 1).into())
|
||||
.with_i02((224, 224, 224).into())
|
||||
.with_i03((224, 224, 224).into());
|
||||
let _model = Dinov2::new(options)?;
|
||||
println!("TODO...");
|
||||
let mut model = Dinov2::new(options)?;
|
||||
let x = vec![DataLoader::try_read("./examples/dinov2/images/1.jpg")?];
|
||||
let y = model.run(&x)?;
|
||||
println!("{y:?}");
|
||||
|
||||
// query from vector
|
||||
// let ys = model.query_from_vec(
|
||||
|
@ -4,11 +4,6 @@ use std::time::Duration;
|
||||
pub struct Ts {
|
||||
n: usize,
|
||||
ts: Vec<Duration>,
|
||||
// pub t0: Duration,
|
||||
// pub t1: Duration,
|
||||
// pub t2: Duration,
|
||||
// pub t3: Duration,
|
||||
// pub t4: Duration,
|
||||
}
|
||||
|
||||
impl Ts {
|
||||
|
@ -42,7 +42,7 @@ impl Blip {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> {
|
||||
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
|
||||
let xs_ = ops::resize(
|
||||
xs,
|
||||
self.height.opt as u32,
|
||||
@ -56,7 +56,7 @@ impl Blip {
|
||||
&[0.26862954, 0.2613026, 0.2757771],
|
||||
);
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||
Ok(Embedding::new(ys[0].to_owned()))
|
||||
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||
}
|
||||
|
||||
pub fn caption(
|
||||
@ -67,8 +67,9 @@ impl Blip {
|
||||
) -> Result<Vec<Y>> {
|
||||
let mut ys: Vec<Y> = Vec::new();
|
||||
let image_embeds = self.encode_images(x)?;
|
||||
let image_embeds = image_embeds.embedding().unwrap();
|
||||
let image_embeds_attn_mask: Array<f32, IxDyn> =
|
||||
Array::ones((1, image_embeds.embedding().shape()[1])).into_dyn();
|
||||
Array::ones((1, image_embeds.data().shape()[1])).into_dyn();
|
||||
let mut y_text = String::new();
|
||||
|
||||
// conditional
|
||||
@ -104,7 +105,7 @@ impl Blip {
|
||||
let y = self.textual.run(&[
|
||||
input_ids_nd,
|
||||
input_ids_attn_mask,
|
||||
image_embeds.embedding().to_owned(),
|
||||
image_embeds.data().to_owned(),
|
||||
image_embeds_attn_mask.to_owned(),
|
||||
])?; // N, length, vocab_size
|
||||
let y = y[0].slice(s!(0, -1.., ..));
|
||||
|
@ -1,4 +1,4 @@
|
||||
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine};
|
||||
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{Array, Array2, IxDyn};
|
||||
@ -52,7 +52,7 @@ impl Clip {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> {
|
||||
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
|
||||
let xs_ = ops::resize(
|
||||
xs,
|
||||
self.height.opt as u32,
|
||||
@ -66,10 +66,10 @@ impl Clip {
|
||||
&[0.26862954, 0.2613026, 0.2757771],
|
||||
);
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||
Ok(Embedding::new(ys[0].to_owned()))
|
||||
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||
}
|
||||
|
||||
pub fn encode_texts(&mut self, texts: &[String]) -> Result<Embedding> {
|
||||
pub fn encode_texts(&mut self, texts: &[String]) -> Result<Y> {
|
||||
let encodings = self
|
||||
.tokenizer
|
||||
.encode_batch(texts.to_owned(), false)
|
||||
@ -80,7 +80,7 @@ impl Clip {
|
||||
.collect();
|
||||
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
|
||||
let ys = self.textual.run(&[xs])?;
|
||||
Ok(Embedding::new(ys[0].to_owned()))
|
||||
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||
}
|
||||
|
||||
pub fn batch_visual(&self) -> usize {
|
||||
|
@ -1,4 +1,4 @@
|
||||
use crate::{ops, MinOptMax, Options, OrtEngine};
|
||||
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{Array, IxDyn};
|
||||
@ -48,22 +48,21 @@ impl Dinov2 {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
|
||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Y> {
|
||||
let xs_ = ops::resize(
|
||||
xs,
|
||||
self.height.opt as u32,
|
||||
self.width.opt as u32,
|
||||
"lanczos3",
|
||||
)?;
|
||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
||||
let xs_ = ops::normalize(xs_, 0., 255.);
|
||||
let xs_ = ops::standardize(
|
||||
xs_,
|
||||
&[0.48145466, 0.4578275, 0.40821073],
|
||||
&[0.26862954, 0.2613026, 0.2757771],
|
||||
);
|
||||
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
|
||||
let ys = ys[0].to_owned();
|
||||
Ok(ys)
|
||||
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||
}
|
||||
|
||||
// pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {
|
||||
|
@ -7,9 +7,7 @@ pub struct Embedding(Array<f32, IxDyn>);
|
||||
|
||||
impl std::fmt::Debug for Embedding {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Embedding")
|
||||
.field("Shape", &self.0.shape())
|
||||
.finish()
|
||||
f.debug_struct("").field("Shape", &self.0.shape()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
@ -23,7 +21,7 @@ impl Embedding {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn embedding(&self) -> &Array<f32, IxDyn> {
|
||||
pub fn data(&self) -> &Array<f32, IxDyn> {
|
||||
&self.0
|
||||
}
|
||||
|
||||
|
16
src/ys/y.rs
16
src/ys/y.rs
@ -1,4 +1,4 @@
|
||||
use crate::{Bbox, Keypoint, Mask, Mbr, Polygon, Prob};
|
||||
use crate::{Bbox, Embedding, Keypoint, Mask, Mbr, Polygon, Prob};
|
||||
|
||||
#[derive(Clone, PartialEq, Default)]
|
||||
pub struct Y {
|
||||
@ -9,6 +9,7 @@ pub struct Y {
|
||||
polygons: Option<Vec<Polygon>>,
|
||||
texts: Option<Vec<String>>,
|
||||
masks: Option<Vec<Mask>>,
|
||||
embedding: Option<Embedding>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Y {
|
||||
@ -47,6 +48,9 @@ impl std::fmt::Debug for Y {
|
||||
f.field("Masks", &x);
|
||||
}
|
||||
}
|
||||
if let Some(x) = &self.embedding {
|
||||
f.field("Embedding", &x);
|
||||
}
|
||||
f.finish()
|
||||
}
|
||||
}
|
||||
@ -71,11 +75,17 @@ impl Y {
|
||||
self.mbrs = Some(mbrs.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self {
|
||||
self.bboxes = Some(bboxes.to_vec());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_embedding(mut self, embedding: Embedding) -> Self {
|
||||
self.embedding = Some(embedding);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self {
|
||||
self.keypoints = Some(keypoints.to_vec());
|
||||
self
|
||||
@ -114,6 +124,10 @@ impl Y {
|
||||
self.texts.as_ref()
|
||||
}
|
||||
|
||||
pub fn embedding(&self) -> Option<&Embedding> {
|
||||
self.embedding.as_ref()
|
||||
}
|
||||
|
||||
pub fn apply_bboxes_nms(mut self, iou_threshold: f32) -> Self {
|
||||
match &mut self.bboxes {
|
||||
None => self,
|
||||
|
Reference in New Issue
Block a user