mirror of
https://github.com/mii443/usls.git
synced 2025-12-03 11:08:20 +00:00
Adjust outputs (#16)
This commit is contained in:
@@ -39,7 +39,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
let feats_image = model.encode_images(&images).unwrap();
|
let feats_image = model.encode_images(&images).unwrap();
|
||||||
|
|
||||||
// use image to query texts
|
// use image to query texts
|
||||||
let matrix = feats_image.dot2(&feats_text)?;
|
let matrix = match feats_image.embedding() {
|
||||||
|
Some(x) => x.dot2(feats_text.embedding().unwrap())?,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
// summary
|
// summary
|
||||||
for i in 0..paths.len() {
|
for i in 0..paths.len() {
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 134 KiB |
@@ -1,4 +1,4 @@
|
|||||||
use usls::{models::Dinov2, Options};
|
use usls::{models::Dinov2, DataLoader, Options};
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
// build model
|
// build model
|
||||||
@@ -7,8 +7,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.with_i00((1, 1, 1).into())
|
.with_i00((1, 1, 1).into())
|
||||||
.with_i02((224, 224, 224).into())
|
.with_i02((224, 224, 224).into())
|
||||||
.with_i03((224, 224, 224).into());
|
.with_i03((224, 224, 224).into());
|
||||||
let _model = Dinov2::new(options)?;
|
let mut model = Dinov2::new(options)?;
|
||||||
println!("TODO...");
|
let x = vec![DataLoader::try_read("./examples/dinov2/images/1.jpg")?];
|
||||||
|
let y = model.run(&x)?;
|
||||||
|
println!("{y:?}");
|
||||||
|
|
||||||
// query from vector
|
// query from vector
|
||||||
// let ys = model.query_from_vec(
|
// let ys = model.query_from_vec(
|
||||||
|
|||||||
@@ -4,11 +4,6 @@ use std::time::Duration;
|
|||||||
pub struct Ts {
|
pub struct Ts {
|
||||||
n: usize,
|
n: usize,
|
||||||
ts: Vec<Duration>,
|
ts: Vec<Duration>,
|
||||||
// pub t0: Duration,
|
|
||||||
// pub t1: Duration,
|
|
||||||
// pub t2: Duration,
|
|
||||||
// pub t3: Duration,
|
|
||||||
// pub t4: Duration,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ts {
|
impl Ts {
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ impl Blip {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> {
|
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
|
||||||
let xs_ = ops::resize(
|
let xs_ = ops::resize(
|
||||||
xs,
|
xs,
|
||||||
self.height.opt as u32,
|
self.height.opt as u32,
|
||||||
@@ -56,7 +56,7 @@ impl Blip {
|
|||||||
&[0.26862954, 0.2613026, 0.2757771],
|
&[0.26862954, 0.2613026, 0.2757771],
|
||||||
);
|
);
|
||||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||||
Ok(Embedding::new(ys[0].to_owned()))
|
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn caption(
|
pub fn caption(
|
||||||
@@ -67,8 +67,9 @@ impl Blip {
|
|||||||
) -> Result<Vec<Y>> {
|
) -> Result<Vec<Y>> {
|
||||||
let mut ys: Vec<Y> = Vec::new();
|
let mut ys: Vec<Y> = Vec::new();
|
||||||
let image_embeds = self.encode_images(x)?;
|
let image_embeds = self.encode_images(x)?;
|
||||||
|
let image_embeds = image_embeds.embedding().unwrap();
|
||||||
let image_embeds_attn_mask: Array<f32, IxDyn> =
|
let image_embeds_attn_mask: Array<f32, IxDyn> =
|
||||||
Array::ones((1, image_embeds.embedding().shape()[1])).into_dyn();
|
Array::ones((1, image_embeds.data().shape()[1])).into_dyn();
|
||||||
let mut y_text = String::new();
|
let mut y_text = String::new();
|
||||||
|
|
||||||
// conditional
|
// conditional
|
||||||
@@ -104,7 +105,7 @@ impl Blip {
|
|||||||
let y = self.textual.run(&[
|
let y = self.textual.run(&[
|
||||||
input_ids_nd,
|
input_ids_nd,
|
||||||
input_ids_attn_mask,
|
input_ids_attn_mask,
|
||||||
image_embeds.embedding().to_owned(),
|
image_embeds.data().to_owned(),
|
||||||
image_embeds_attn_mask.to_owned(),
|
image_embeds_attn_mask.to_owned(),
|
||||||
])?; // N, length, vocab_size
|
])?; // N, length, vocab_size
|
||||||
let y = y[0].slice(s!(0, -1.., ..));
|
let y = y[0].slice(s!(0, -1.., ..));
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine};
|
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use image::DynamicImage;
|
use image::DynamicImage;
|
||||||
use ndarray::{Array, Array2, IxDyn};
|
use ndarray::{Array, Array2, IxDyn};
|
||||||
@@ -52,7 +52,7 @@ impl Clip {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Embedding> {
|
pub fn encode_images(&mut self, xs: &[DynamicImage]) -> Result<Y> {
|
||||||
let xs_ = ops::resize(
|
let xs_ = ops::resize(
|
||||||
xs,
|
xs,
|
||||||
self.height.opt as u32,
|
self.height.opt as u32,
|
||||||
@@ -66,10 +66,10 @@ impl Clip {
|
|||||||
&[0.26862954, 0.2613026, 0.2757771],
|
&[0.26862954, 0.2613026, 0.2757771],
|
||||||
);
|
);
|
||||||
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
let ys: Vec<Array<f32, IxDyn>> = self.visual.run(&[xs_])?;
|
||||||
Ok(Embedding::new(ys[0].to_owned()))
|
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_texts(&mut self, texts: &[String]) -> Result<Embedding> {
|
pub fn encode_texts(&mut self, texts: &[String]) -> Result<Y> {
|
||||||
let encodings = self
|
let encodings = self
|
||||||
.tokenizer
|
.tokenizer
|
||||||
.encode_batch(texts.to_owned(), false)
|
.encode_batch(texts.to_owned(), false)
|
||||||
@@ -80,7 +80,7 @@ impl Clip {
|
|||||||
.collect();
|
.collect();
|
||||||
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
|
let xs = Array2::from_shape_vec((texts.len(), self.context_length), xs)?.into_dyn();
|
||||||
let ys = self.textual.run(&[xs])?;
|
let ys = self.textual.run(&[xs])?;
|
||||||
Ok(Embedding::new(ys[0].to_owned()))
|
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn batch_visual(&self) -> usize {
|
pub fn batch_visual(&self) -> usize {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use crate::{ops, MinOptMax, Options, OrtEngine};
|
use crate::{ops, Embedding, MinOptMax, Options, OrtEngine, Y};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use image::DynamicImage;
|
use image::DynamicImage;
|
||||||
use ndarray::{Array, IxDyn};
|
use ndarray::{Array, IxDyn};
|
||||||
@@ -48,22 +48,21 @@ impl Dinov2 {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Array<f32, IxDyn>> {
|
pub fn run(&mut self, xs: &[DynamicImage]) -> Result<Y> {
|
||||||
let xs_ = ops::resize(
|
let xs_ = ops::resize(
|
||||||
xs,
|
xs,
|
||||||
self.height.opt as u32,
|
self.height.opt as u32,
|
||||||
self.width.opt as u32,
|
self.width.opt as u32,
|
||||||
"lanczos3",
|
"lanczos3",
|
||||||
)?;
|
)?;
|
||||||
let xs_ = ops::normalize(xs_, 0.0, 255.0);
|
let xs_ = ops::normalize(xs_, 0., 255.);
|
||||||
let xs_ = ops::standardize(
|
let xs_ = ops::standardize(
|
||||||
xs_,
|
xs_,
|
||||||
&[0.48145466, 0.4578275, 0.40821073],
|
&[0.48145466, 0.4578275, 0.40821073],
|
||||||
&[0.26862954, 0.2613026, 0.2757771],
|
&[0.26862954, 0.2613026, 0.2757771],
|
||||||
);
|
);
|
||||||
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
|
let ys: Vec<Array<f32, IxDyn>> = self.engine.run(&[xs_])?;
|
||||||
let ys = ys[0].to_owned();
|
Ok(Y::default().with_embedding(Embedding::new(ys[0].to_owned())))
|
||||||
Ok(ys)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {
|
// pub fn build_index(&self, metric: Metric) -> Result<usearch::Index> {
|
||||||
|
|||||||
@@ -7,9 +7,7 @@ pub struct Embedding(Array<f32, IxDyn>);
|
|||||||
|
|
||||||
impl std::fmt::Debug for Embedding {
|
impl std::fmt::Debug for Embedding {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
f.debug_struct("Embedding")
|
f.debug_struct("").field("Shape", &self.0.shape()).finish()
|
||||||
.field("Shape", &self.0.shape())
|
|
||||||
.finish()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -23,7 +21,7 @@ impl Embedding {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embedding(&self) -> &Array<f32, IxDyn> {
|
pub fn data(&self) -> &Array<f32, IxDyn> {
|
||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
16
src/ys/y.rs
16
src/ys/y.rs
@@ -1,4 +1,4 @@
|
|||||||
use crate::{Bbox, Keypoint, Mask, Mbr, Polygon, Prob};
|
use crate::{Bbox, Embedding, Keypoint, Mask, Mbr, Polygon, Prob};
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Default)]
|
#[derive(Clone, PartialEq, Default)]
|
||||||
pub struct Y {
|
pub struct Y {
|
||||||
@@ -9,6 +9,7 @@ pub struct Y {
|
|||||||
polygons: Option<Vec<Polygon>>,
|
polygons: Option<Vec<Polygon>>,
|
||||||
texts: Option<Vec<String>>,
|
texts: Option<Vec<String>>,
|
||||||
masks: Option<Vec<Mask>>,
|
masks: Option<Vec<Mask>>,
|
||||||
|
embedding: Option<Embedding>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for Y {
|
impl std::fmt::Debug for Y {
|
||||||
@@ -47,6 +48,9 @@ impl std::fmt::Debug for Y {
|
|||||||
f.field("Masks", &x);
|
f.field("Masks", &x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if let Some(x) = &self.embedding {
|
||||||
|
f.field("Embedding", &x);
|
||||||
|
}
|
||||||
f.finish()
|
f.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -71,11 +75,17 @@ impl Y {
|
|||||||
self.mbrs = Some(mbrs.to_vec());
|
self.mbrs = Some(mbrs.to_vec());
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self {
|
pub fn with_bboxes(mut self, bboxes: &[Bbox]) -> Self {
|
||||||
self.bboxes = Some(bboxes.to_vec());
|
self.bboxes = Some(bboxes.to_vec());
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn with_embedding(mut self, embedding: Embedding) -> Self {
|
||||||
|
self.embedding = Some(embedding);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self {
|
pub fn with_keypoints(mut self, keypoints: &[Vec<Keypoint>]) -> Self {
|
||||||
self.keypoints = Some(keypoints.to_vec());
|
self.keypoints = Some(keypoints.to_vec());
|
||||||
self
|
self
|
||||||
@@ -114,6 +124,10 @@ impl Y {
|
|||||||
self.texts.as_ref()
|
self.texts.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn embedding(&self) -> Option<&Embedding> {
|
||||||
|
self.embedding.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn apply_bboxes_nms(mut self, iou_threshold: f32) -> Self {
|
pub fn apply_bboxes_nms(mut self, iou_threshold: f32) -> Self {
|
||||||
match &mut self.bboxes {
|
match &mut self.bboxes {
|
||||||
None => self,
|
None => self,
|
||||||
|
|||||||
Reference in New Issue
Block a user