Add parallel acceleration for image processing

2025-12-03 02:58:22 +00:00 · 2025-01-14 22:56:10 +08:00
parent f511a475ac
commit afd1deb1f4
3 changed files with 171 additions and 35 deletions
--- a/src/misc/ops.rs
+++ b/src/misc/ops.rs
@@ -7,7 +7,8 @@ use fast_image_resize::{
    FilterType, ResizeAlg, ResizeOptions, Resizer,
 };
 use image::{DynamicImage, GenericImageView};
-use ndarray::{concatenate, s, Array, Array3, Axis, IntoDimension, Ix2, IxDyn};
+use ndarray::{concatenate, s, Array, Array3, ArrayView1, Axis, IntoDimension, Ix2, IxDyn, Zip};
 use rayon::prelude::*;
 pub enum Ops<'a> {
@@ -27,7 +28,7 @@ pub enum Ops<'a> {
 }
 impl Ops<'_> {
-    pub fn normalize(x: Array<f32, IxDyn>, min: f32, max: f32) -> Result<Array<f32, IxDyn>> {
+    pub fn normalize(x: &mut Array<f32, IxDyn>, min: f32, max: f32) -> Result<()> {
        if min >= max {
            anyhow::bail!(
                "Invalid range in `normalize`: `min` ({}) must be less than `max` ({}).",
@@ -35,7 +36,10 @@ impl Ops<'_> {
                max
            );
        }
-        Ok((x - min) / (max - min))
+        let range = max - min;
        x.par_mapv_inplace(|x| (x - min) / range);
        Ok(())
    }
    pub fn sigmoid(x: Array<f32, IxDyn>) -> Array<f32, IxDyn> {
@@ -74,11 +78,11 @@ impl Ops<'_> {
    }
    pub fn standardize(
-        x: Array<f32, IxDyn>,
+        x: &mut Array<f32, IxDyn>,
-        mean: &[f32],
+        mean: ArrayView1<f32>,
-        std: &[f32],
+        std: ArrayView1<f32>,
        dim: usize,
-    ) -> Result<Array<f32, IxDyn>> {
+    ) -> Result<()> {
        if mean.len() != std.len() {
            anyhow::bail!(
                "`standardize`: `mean` and `std` lengths are not equal. Mean length: {}, Std length: {}.",
@@ -86,6 +90,7 @@ impl Ops<'_> {
                std.len()
            );
        }
        let shape = x.shape();
        if dim >= shape.len() || shape[dim] != mean.len() {
            anyhow::bail!(
@@ -95,11 +100,20 @@ impl Ops<'_> {
                mean.len()
            );
        }
-        let mut shape = vec![1; shape.len()];
+        let mean_broadcast = mean.broadcast(shape).ok_or_else(|| {
-        shape[dim] = mean.len();
+            anyhow::anyhow!("Failed to broadcast `mean` to the shape of the input array.")
-        let mean = Array::from_shape_vec(shape.clone(), mean.to_vec())?;
+        })?;
-        let std = Array::from_shape_vec(shape, std.to_vec())?;
+        let std_broadcast = std.broadcast(shape).ok_or_else(|| {
-        Ok((x - mean) / std)
+            anyhow::anyhow!("Failed to broadcast `std` to the shape of the input array.")
        })?;
        Zip::from(x)
            .and(mean_broadcast)
            .and(std_broadcast)
            .par_for_each(|x_val, &mean_val, &std_val| {
                *x_val = (*x_val - mean_val) / std_val;
            });
        Ok(())
    }
    pub fn permute(x: Array<f32, IxDyn>, shape: &[usize]) -> Result<Array<f32, IxDyn>> {
--- a/src/misc/processor.rs
+++ b/src/misc/processor.rs
@@ -6,6 +6,8 @@ use fast_image_resize::{
 };
 use image::{DynamicImage, GenericImageView};
 use ndarray::{s, Array, Axis};
 use rayon::prelude::*;
 use std::sync::Mutex;
 use tokenizers::{Encoding, Tokenizer};
 use crate::{LogitsSampler, X};
@@ -67,10 +69,10 @@ impl Processor {
    }
    pub fn process_images(&mut self, xs: &[DynamicImage]) -> Result<X> {
-        // reset
+        // self.reset_image0_status();
-        self.reset_image0_status();
+        let (mut x, image0s_size, scale_factors_hw) = self.par_resize(xs)?;
-
+        self.image0s_size = image0s_size;
-        let mut x = self.resize_batch(xs)?;
+        self.scale_factors_hw = scale_factors_hw;
        if self.do_normalize {
            x = x.normalize(0., 255.)?;
        }
@@ -85,6 +87,7 @@ impl Processor {
        if self.unsigned {
            x = x.unsigned();
        }
        Ok(x)
    }
@@ -341,26 +344,144 @@ impl Processor {
        Ok(y.into())
    }
-    pub fn resize_batch(&mut self, xs: &[DynamicImage]) -> Result<X> {
+    #[allow(clippy::type_complexity)]
-        // TODO: par resize
+    pub fn resize2(&self, x: &DynamicImage) -> Result<(X, (u32, u32), Vec<f32>)> {
-        if xs.is_empty() {
+        if self.image_width + self.image_height == 0 {
-            anyhow::bail!("Found no input images.")
+            anyhow::bail!(
                "Invalid target height: {} or width: {}.",
                self.image_height,
                self.image_width
            );
        }
-        let mut ys = Array::ones((
+        let image0s_size: (u32, u32); // original image height and width
-            xs.len(),
+        let scale_factors_hw: Vec<f32>;
-            self.image_height as usize,
+
-            self.image_width as usize,
+        let buffer = match x.dimensions() {
-            3,
+            (w, h) if (w, h) == (self.image_height, self.image_width) => {
-        ))
+                image0s_size = (h, w);
                scale_factors_hw = vec![1., 1.];
                x.to_rgb8().into_raw()
            }
            (w0, h0) => {
                image0s_size = (h0, w0);
                let (mut resizer, options) = Self::build_resizer_filter(self.resize_filter)?;
                if let ResizeMode::FitExact = self.resize_mode {
                    let mut dst = Image::new(self.image_width, self.image_height, PixelType::U8x3);
                    resizer.resize(x, &mut dst, &options)?;
                    scale_factors_hw = vec![
                        (self.image_height as f32 / h0 as f32),
                        (self.image_width as f32 / w0 as f32),
                    ];
                    dst.into_vec()
                } else {
                    let (w, h) = match self.resize_mode {
                        ResizeMode::Letterbox | ResizeMode::FitAdaptive => {
                            let r = (self.image_width as f32 / w0 as f32)
                                .min(self.image_height as f32 / h0 as f32);
                            scale_factors_hw = vec![r, r];
                            (
                                (w0 as f32 * r).round() as u32,
                                (h0 as f32 * r).round() as u32,
                            )
                        }
                        ResizeMode::FitHeight => {
                            let r = self.image_height as f32 / h0 as f32;
                            scale_factors_hw = vec![1.0, r];
                            ((r * w0 as f32).round() as u32, self.image_height)
                        }
                        ResizeMode::FitWidth => {
                            // scale factor
                            let r = self.image_width as f32 / w0 as f32;
                            scale_factors_hw = vec![r, 1.0];
                            (self.image_width, (r * h0 as f32).round() as u32)
                        }
                        _ => unreachable!(),
                    };
                    let mut dst = Image::from_vec_u8(
                        self.image_width,
                        self.image_height,
                        vec![
                            self.padding_value;
                            3 * self.image_height as usize * self.image_width as usize
                        ],
                        PixelType::U8x3,
                    )?;
                    let (l, t) = if let ResizeMode::Letterbox = self.resize_mode {
                        if w == self.image_width {
                            (0, (self.image_height - h) / 2)
                        } else {
                            ((self.image_width - w) / 2, 0)
                        }
                    } else {
                        (0, 0)
                    };
                    let mut dst_cropped = CroppedImageMut::new(&mut dst, l, t, w, h)?;
                    resizer.resize(x, &mut dst_cropped, &options)?;
                    dst.into_vec()
                }
            }
        };
        let y = Array::from_shape_vec(
            (self.image_height as usize, self.image_width as usize, 3),
            buffer,
        )?
        .mapv(|x| x as f32)
        .into_dyn();
-        xs.iter().enumerate().try_for_each(|(idx, x)| {
+        Ok((y.into(), image0s_size, scale_factors_hw))
-            let y = self.resize(x)?;
+    }
            ys.slice_mut(s![idx, .., .., ..]).assign(&y);
            anyhow::Ok(())
        })?;
-        Ok(ys.into())
+    #[allow(clippy::type_complexity)]
    pub fn par_resize(&self, xs: &[DynamicImage]) -> Result<(X, Vec<(u32, u32)>, Vec<Vec<f32>>)> {
        match xs.len() {
            0 => anyhow::bail!("Found no input images."),
            1 => {
                let (y, image0_size, scale_factors) = self.resize2(&xs[0])?;
                Ok((y.insert_axis(0)?, vec![image0_size], vec![scale_factors]))
            }
            _ => {
                let ys = Mutex::new(
                    Array::zeros((
                        xs.len(),
                        self.image_height as usize,
                        self.image_width as usize,
                        3,
                    ))
                    .into_dyn(),
                );
                let results: Result<Vec<((u32, u32), Vec<f32>)>> = xs
                    .par_iter()
                    .enumerate()
                    .map(|(idx, x)| {
                        let (y, image0_size, scale_factors) = self.resize2(x)?;
                        {
                            let mut ys_guard = ys
                                .lock()
                                .map_err(|e| anyhow::anyhow!("Mutex lock error: {e}"))?;
                            ys_guard.slice_mut(s![idx, .., .., ..]).assign(&y);
                        }
                        Ok((image0_size, scale_factors))
                    })
                    .collect();
                let (image0s_size, scale_factors_hw): (Vec<_>, Vec<_>) =
                    results?.into_iter().unzip();
                let ys_inner = ys
                    .into_inner()
                    .map_err(|e| anyhow::anyhow!("Mutex into_inner error: {e}"))?;
                Ok((ys_inner.into(), image0s_size, scale_factors_hw))
            }
        }
    }
 }
--- a/src/xy/x.rs
+++ b/src/xy/x.rs
@@ -172,12 +172,13 @@ impl X {
    }
    pub fn normalize(mut self, min_: f32, max_: f32) -> Result<Self> {
-        self.0 = Ops::normalize(self.0, min_, max_)?;
+        Ops::normalize(&mut self.0, min_, max_)?;
        Ok(self)
    }
    pub fn standardize(mut self, mean: &[f32], std: &[f32], dim: usize) -> Result<Self> {
-        self.0 = Ops::standardize(self.0, mean, std, dim)?;
+        Ops::standardize(&mut self.0, mean.into(), std.into(), dim)?;
        Ok(self)
    }
@@ -187,7 +188,7 @@ impl X {
    }
    pub fn unsigned(mut self) -> Self {
-        self.0 = self.0.mapv(|x| if x < 0.0 { 0.0 } else { x });
+        self.0.par_mapv_inplace(|x| if x < 0.0 { 0.0 } else { x });
        self
    }