From a38539c6a098d1aff1a31fca65e05a99a61b4d67 Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 3 Dec 2024 21:50:48 +0000 Subject: [PATCH 1/9] Initial implementation of 1D symmetrical filtering for gaussian --- src/dynimage.rs | 4 +- src/imageops/filter_1d.rs | 967 ++++++++++++++++++++++++++++++++++++++ src/imageops/mod.rs | 5 +- src/imageops/sample.rs | 197 +++++++- 4 files changed, 1167 insertions(+), 6 deletions(-) create mode 100644 src/imageops/filter_1d.rs diff --git a/src/dynimage.rs b/src/dynimage.rs index e04944ad5e..ee0d15327a 100644 --- a/src/dynimage.rs +++ b/src/dynimage.rs @@ -15,6 +15,7 @@ use crate::error::{ImageError, ImageResult, ParameterError, ParameterErrorKind}; use crate::flat::FlatSamples; use crate::image::{GenericImage, GenericImageView, ImageDecoder, ImageEncoder, ImageFormat}; use crate::image_reader::free_functions; +use crate::imageops::gaussian_blur_dyn_image; use crate::math::resize_dimensions; use crate::metadata::Orientation; use crate::traits::Pixel; @@ -829,7 +830,8 @@ impl DynamicImage { /// accurate version. #[must_use] pub fn blur(&self, sigma: f32) -> DynamicImage { - dynamic_map!(*self, ref p => imageops::blur(p, sigma)) + gaussian_blur_dyn_image(self, sigma) + // dynamic_map!(*self, ref p => imageops::blur(p, sigma)) } /// Performs a fast blur on this image. diff --git a/src/imageops/filter_1d.rs b/src/imageops/filter_1d.rs new file mode 100644 index 0000000000..d70cf25221 --- /dev/null +++ b/src/imageops/filter_1d.rs @@ -0,0 +1,967 @@ +#![forbid(unsafe_code)] +use crate::error::{LimitError, LimitErrorKind}; +use crate::ImageError; +use num_traits::{AsPrimitive, MulAdd}; +use std::ops::{Add, Mul}; + +#[cfg(any( + all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "fma" + ), + all(target_arch = "aarch64", target_feature = "neon") +))] +#[inline(always)] +/// Uses fused multiply add when available +/// +/// It is important not to call it if FMA flag is not turned on, +/// Rust compiles extremely slow case in this case (probably does software FMA?), +/// and we want to avoid this +fn mla + Add + MulAdd>( + acc: T, + a: T, + b: T, +) -> T { + MulAdd::mul_add(a, b, acc) +} + +#[inline(always)] +#[cfg(not(any( + all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "fma" + ), + all(target_arch = "aarch64", target_feature = "neon") +)))] +fn mla + Add + MulAdd>( + acc: T, + a: T, + b: T, +) -> T { + acc + a * b +} + +struct RowArena { + arena: Vec, +} + +#[derive(Debug, Clone, Copy)] +struct KernelShape { + width: usize, + height: usize, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct FilterImageSize { + pub(crate) width: usize, + pub(crate) height: usize, +} + +/// Pads an image row with *clamp* strategy +/// +/// This method copies real content into center of new buffer +/// and filling leading and trailing physical padded parts with *clamp* strategy +fn make_arena_row( + image: &[T], + source_y: usize, + image_size: FilterImageSize, + kernel_size: KernelShape, +) -> RowArena +where + T: Default + Copy + Send + Sync + 'static, + f64: AsPrimitive, +{ + assert_eq!( + image.len(), + COMPONENTS * image_size.width * image_size.height + ); + + let pad_w = kernel_size.width / 2; + + let arena_width = image_size.width * COMPONENTS + pad_w * 2 * COMPONENTS; + let mut row = vec![T::default(); arena_width]; + + let source_offset = source_y * image_size.width * COMPONENTS; + + let source_row = &image[source_offset..(source_offset + image_size.width * COMPONENTS)]; + + let row_dst = + &mut row[pad_w * COMPONENTS..(pad_w * COMPONENTS + image_size.width * COMPONENTS)]; + + for (dst, src) in row_dst.iter_mut().zip(source_row.iter()) { + *dst = *src; + } + + for (x, dst) in (0..pad_w).zip(row.chunks_exact_mut(COMPONENTS)) { + let old_x = x.saturating_sub(pad_w).min(image_size.width - 1); + let old_px = old_x * COMPONENTS; + let src_iter = &source_row[old_px..(old_px + COMPONENTS)]; + for (dst, src) in dst.iter_mut().zip(src_iter.iter()) { + *dst = *src; + } + } + + for (x, dst) in + (image_size.width..(image_size.width + pad_w)).zip(row.chunks_exact_mut(COMPONENTS).rev()) + { + let old_x = x.max(0).min(image_size.width - 1); + let old_px = old_x * COMPONENTS; + let src_iter = &source_row[old_px..(old_px + COMPONENTS)]; + for (dst, src) in dst.iter_mut().zip(src_iter.iter()) { + *dst = *src; + } + } + + RowArena { arena: row } +} + +#[derive(Clone)] +struct ArenaColumns +where + T: Copy, +{ + pub top_pad: Vec, + pub bottom_pad: Vec, +} + +/// Pads a column image with *clamp* strategy +/// +/// This method is used for *virtual* column padding. +/// It produces two arrays that represents virtual image top part and bottom +fn make_columns_arenas( + image: &[T], + image_size: FilterImageSize, + kernel_size: KernelShape, +) -> ArenaColumns +where + T: Default + Copy + Send + Sync + 'static, + f64: AsPrimitive, +{ + assert_eq!( + image.len(), + COMPONENTS * image_size.width * image_size.height + ); + let pad_h = kernel_size.height / 2; + + let mut top_pad = vec![T::default(); pad_h * image_size.width * COMPONENTS]; + let mut bottom_pad = vec![T::default(); pad_h * image_size.width * COMPONENTS]; + + let top_pad_stride = image_size.width * COMPONENTS; + + for (ky, dst) in (0..pad_h).zip(top_pad.chunks_exact_mut(top_pad_stride)) { + for (kx, dst) in (0..image_size.width).zip(dst.chunks_exact_mut(COMPONENTS)) { + let y = ky.saturating_sub(pad_h).min(image_size.height - 1); + let v_src = y * top_pad_stride + kx * COMPONENTS; + + let src_iter = &image[v_src..(v_src + COMPONENTS)]; + for (dst, src) in dst.iter_mut().zip(src_iter.iter()) { + *dst = *src; + } + } + } + + let bottom_iter_dst = bottom_pad.chunks_exact_mut(top_pad_stride); + + for (ky, dst) in (0..pad_h).zip(bottom_iter_dst) { + for (kx, dst) in (0..image_size.width).zip(dst.chunks_exact_mut(COMPONENTS)) { + let y = (ky + image_size.height).min(image_size.height - 1); + let v_src = y * top_pad_stride + kx * COMPONENTS; + let src_iter = &image[v_src..(v_src + COMPONENTS)]; + for (dst, src) in dst.iter_mut().zip(src_iter.iter()) { + *dst = *src; + } + } + } + + ArenaColumns { + top_pad, + bottom_pad, + } +} + +trait ToStorage { + fn to_(self) -> S; +} + +const PRECISION: i32 = 15; + +impl ToStorage for i32 { + #[inline(always)] + #[allow(clippy::manual_clamp)] + fn to_(self) -> u8 { + ((self + (1 << (PRECISION - 1))) >> PRECISION) + .min(255) + .max(0) as u8 + } +} + +impl ToStorage for f32 { + #[inline(always)] + fn to_(self) -> u16 { + self.round().min(u16::MAX as f32).max(0f32) as u16 + } +} + +impl ToStorage for f32 { + #[inline(always)] + fn to_(self) -> f32 { + self + } +} + +#[derive(Copy, Clone)] +struct Arena<'a, T> { + src: &'a [T], + components: usize, +} + +#[repr(C)] +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Copy)] +struct ColorGroup { + r: J, + g: J, + b: J, + a: J, +} + +macro_rules! ld_group { + ($store: expr, $channels: expr, $offset: expr) => {{ + if $channels == 1 { + ColorGroup { + r: $store[$offset].as_(), + g: 0.as_(), + b: 0.as_(), + a: 0.as_(), + } + } else if $channels == 2 { + ColorGroup { + r: $store[$offset].as_(), + g: $store[$offset + 1].as_(), + b: 0.as_(), + a: 0.as_(), + } + } else if $channels == 3 { + ColorGroup { + r: $store[$offset].as_(), + g: $store[$offset + 1].as_(), + b: $store[$offset + 2].as_(), + a: 0.as_(), + } + } else if $channels == 4 { + ColorGroup { + r: $store[$offset].as_(), + g: $store[$offset + 1].as_(), + b: $store[$offset + 2].as_(), + a: $store[$offset + 3].as_(), + } + } else { + unimplemented!() + } + }}; +} + +impl ColorGroup +where + J: Copy + Default, +{ + #[inline] + fn from_components(r: J, g: J, b: J, a: J) -> ColorGroup { + ColorGroup { r, g, b, a } + } +} + +impl Mul for ColorGroup +where + J: Copy + Mul + Default + 'static, +{ + type Output = Self; + + #[inline] + fn mul(self, rhs: J) -> Self::Output { + if COMPS == 1 { + ColorGroup::from_components(self.r * rhs, self.g, self.b, self.a) + } else if COMPS == 2 { + ColorGroup::from_components(self.r * rhs, self.g * rhs, self.b, self.a) + } else if COMPS == 3 { + ColorGroup::from_components(self.r * rhs, self.g * rhs, self.b * rhs, self.a) + } else if COMPS == 4 { + ColorGroup::from_components(self.r * rhs, self.g * rhs, self.b * rhs, self.a * rhs) + } else { + unimplemented!() + } + } +} + +impl MulAdd, J> for ColorGroup +where + J: Copy + MulAdd + Default + 'static + Mul + Add, +{ + type Output = Self; + + #[inline] + fn mul_add(self, a: ColorGroup, b: J) -> Self::Output { + if COMPS == 1 { + ColorGroup::from_components(mla(a.r, self.r, b), self.g, self.b, self.a) + } else if COMPS == 2 { + ColorGroup::from_components(mla(a.r, self.r, b), mla(a.g, self.g, b), self.b, self.a) + } else if COMPS == 3 { + ColorGroup::from_components( + mla(a.r, self.r, b), + mla(a.g, self.g, b), + mla(a.b, self.b, b), + self.a, + ) + } else if COMPS == 4 { + ColorGroup::from_components( + mla(a.r, self.r, b), + mla(a.g, self.g, b), + mla(a.b, self.b, b), + mla(a.a, self.a, b), + ) + } else { + unimplemented!(); + } + } +} + +impl Add> for ColorGroup +where + J: Copy + Add + Default + 'static, +{ + type Output = Self; + + #[inline] + fn add(self, rhs: ColorGroup) -> Self::Output { + if COMPS == 1 { + ColorGroup::from_components(self.r + rhs.r, self.g, self.b, self.a) + } else if COMPS == 2 { + ColorGroup::from_components(self.r + rhs.r, self.g + rhs.g, self.b, self.a) + } else if COMPS == 3 { + ColorGroup::from_components(self.r + rhs.r, self.g + rhs.g, self.b + rhs.b, self.a) + } else if COMPS == 4 { + ColorGroup::from_components( + self.r + rhs.r, + self.g + rhs.g, + self.b + rhs.b, + self.a + rhs.a, + ) + } else { + unimplemented!(); + } + } +} + +macro_rules! st_group { + ($store: expr, $channels: expr, $offset: expr, $vl: expr) => {{ + if $channels == 1 { + $store[$offset] = $vl.r.to_(); + } else if $channels == 2 { + $store[$offset] = $vl.r.to_(); + $store[$offset + 1] = $vl.g.to_(); + } else if $channels == 3 { + $store[$offset] = $vl.r.to_(); + $store[$offset + 1] = $vl.g.to_(); + $store[$offset + 2] = $vl.b.to_(); + } else if $channels == 4 { + $store[$offset] = $vl.r.to_(); + $store[$offset + 1] = $vl.g.to_(); + $store[$offset + 2] = $vl.b.to_(); + $store[$offset + 3] = $vl.a.to_(); + } else { + unimplemented!() + } + }}; +} + +/// Performs column convolution pass for symmetrical filter +/// +/// Common convolution formula O(x,y)=∑K(k)⋅I(x,y+k); where sums goes from 0...R +/// when filter is symmetric that we can half kernel reads by using formula +/// O(x,y)=(∑K(k)⋅(I(x,y+k) + I(x,y+(R-k)))) + K(R/2)⋅I(x,y+R/2); where sums goes from 0...R/2 +fn filter_symmetric_column( + arena: Arena, + arena_src: &[&[T]], + dst_row: &mut [T], + image_size: FilterImageSize, + kernel: &[F], +) where + T: Copy + AsPrimitive, + F: ToStorage + + Mul + + MulAdd + + Add + + Default + + Copy + + 'static, +{ + let dst_stride = image_size.width * arena.components; + + let length = kernel.len(); + let half_len = length / 2; + + let mut cx = 0usize; + + while cx + 32 < dst_stride { + let coeff = kernel[half_len]; + + let mut store: [F; 32] = [F::default(); 32]; + + let v_src = &arena_src[half_len][cx..(cx + 32)]; + + for (dst, src) in store.iter_mut().zip(v_src) { + *dst = src.as_().mul(coeff); + } + + for (i, &coeff) in kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw = &arena_src[i][cx..(cx + 32)]; + let bw = &arena_src[rollback][cx..(cx + 32)]; + + for ((dst, fw), bw) in store.iter_mut().zip(fw).zip(bw) { + *dst = mla(*dst, fw.as_().add(bw.as_()), coeff); + } + } + + let shaped_dst = &mut dst_row[cx..(cx + 32)]; + for (src, dst) in store.iter().zip(shaped_dst.iter_mut()) { + *dst = src.to_(); + } + + cx += 32; + } + + while cx + 16 < dst_stride { + let coeff = kernel[half_len]; + + let mut store: [F; 16] = [F::default(); 16]; + + let v_src = &arena_src[half_len][cx..(cx + 16)]; + + for (dst, src) in store.iter_mut().zip(v_src) { + *dst = src.as_().mul(coeff); + } + + for (i, &coeff) in kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw = &arena_src[i][cx..(cx + 16)]; + let bw = &arena_src[rollback][cx..(cx + 16)]; + + for ((dst, fw), bw) in store.iter_mut().zip(fw).zip(bw) { + *dst = mla(*dst, fw.as_().add(bw.as_()), coeff); + } + } + + let shaped_dst = &mut dst_row[cx..(cx + 16)]; + for (src, dst) in store.iter().zip(shaped_dst.iter_mut()) { + *dst = src.to_(); + } + + cx += 16; + } + + while cx + 4 < dst_stride { + let coeff = kernel[half_len]; + + let v_src = &arena_src[half_len][cx..(cx + 4)]; + + let mut k0 = v_src[0].as_().mul(coeff); + let mut k1 = v_src[1].as_().mul(coeff); + let mut k2 = v_src[2].as_().mul(coeff); + let mut k3 = v_src[3].as_().mul(coeff); + + for (i, &coeff) in kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw = &arena_src[i][cx..(cx + 4)]; + let bw = &arena_src[rollback][cx..(cx + 4)]; + k0 = mla(k0, fw[0].as_().add(bw[0].as_()), coeff); + k1 = mla(k1, fw[1].as_().add(bw[1].as_()), coeff); + k2 = mla(k2, fw[2].as_().add(bw[2].as_()), coeff); + k3 = mla(k3, fw[3].as_().add(bw[3].as_()), coeff); + } + + let shaped_dst = &mut dst_row[cx..(cx + 4)]; + shaped_dst[0] = k0.to_(); + shaped_dst[1] = k1.to_(); + shaped_dst[2] = k2.to_(); + shaped_dst[3] = k3.to_(); + cx += 4; + } + + #[allow(clippy::needless_range_loop)] + for x in cx..dst_stride { + let coeff = kernel[half_len]; + + let v_src = &arena_src[half_len][x..(x + 1)]; + + let mut k0 = v_src[0].as_().mul(coeff); + + for (i, &coeff) in kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw = &arena_src[i][x..(x + 1)]; + let bw = &arena_src[rollback][x..(x + 1)]; + k0 = mla(k0, fw[0].as_().add(bw[0].as_()), coeff); + } + + dst_row[x] = k0.to_(); + } +} + +/// Performs horizontal convolution for row +/// +/// Common convolution formula O(x,y)=∑K(k)⋅I(x+k,y); where sums goes from 0...R +/// when filter is symmetric that we can half kernel reads by using formula +/// O(x,y)=(∑K(k)⋅(I(x+k,y) + I(x+(R-k),y))) + K(R/2)⋅I(x+R/2,y); where sums goes from 0...R/2 +/// +/// It is important to use color groups because for whole group one weight must be applied +fn filter_symmetric_row( + arena: Arena, + dst_row: &mut [T], + image_size: FilterImageSize, + scanned_kernel: &[F], +) where + T: Copy + AsPrimitive + Default, + F: ToStorage + + Mul + + MulAdd + + Default + + Add + + Copy + + 'static, + i32: AsPrimitive, +{ + let width = image_size.width; + + let src = arena.src; + + let length = scanned_kernel.len(); + let half_len = length / 2; + + let mut cx = 0usize; + + while cx + 8 < width { + let v_cx = cx * N; + let src = &src[cx * N..(v_cx + length * N + N * 8)]; + let coeff = scanned_kernel[half_len]; + + let mut k0: ColorGroup = ld_group!(src, N, half_len * N).mul(coeff); + let mut k1: ColorGroup = ld_group!(src, N, half_len * N + N).mul(coeff); + let mut k2: ColorGroup = ld_group!(src, N, half_len * N + N * 2).mul(coeff); + let mut k3: ColorGroup = ld_group!(src, N, half_len * N + N * 3).mul(coeff); + let mut k4: ColorGroup = ld_group!(src, N, half_len * N + N * 4).mul(coeff); + let mut k5: ColorGroup = ld_group!(src, N, half_len * N + N * 5).mul(coeff); + let mut k6: ColorGroup = ld_group!(src, N, half_len * N + N * 6).mul(coeff); + let mut k7: ColorGroup = ld_group!(src, N, half_len * N + N * 7).mul(coeff); + + for (i, &coeff) in scanned_kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw = &src[(i * N)..((i + 8) * N)]; + let bw = &src[(rollback * N)..((rollback + 8) * N)]; + k0 = ld_group!(fw, N, 0) + .add(ld_group!(bw, N, 0)) + .mul_add(k0, coeff); + k1 = ld_group!(fw, N, N) + .add(ld_group!(bw, N, N)) + .mul_add(k1, coeff); + k2 = ld_group!(fw, N, 2 * N) + .add(ld_group!(bw, N, 2 * N)) + .mul_add(k2, coeff); + k3 = ld_group!(fw, N, 3 * N) + .add(ld_group!(bw, N, 3 * N)) + .mul_add(k3, coeff); + k4 = ld_group!(fw, N, 4 * N) + .add(ld_group!(bw, N, 4 * N)) + .mul_add(k4, coeff); + k5 = ld_group!(fw, N, 5 * N) + .add(ld_group!(bw, N, 5 * N)) + .mul_add(k5, coeff); + k6 = ld_group!(fw, N, 6 * N) + .add(ld_group!(bw, N, 6 * N)) + .mul_add(k6, coeff); + k7 = ld_group!(fw, N, 7 * N) + .add(ld_group!(bw, N, 7 * N)) + .mul_add(k7, coeff); + } + + let dst_offset = cx * N; + + let shaped_dst = &mut dst_row[dst_offset..(dst_offset + N * 8)]; + st_group!(shaped_dst, N, 0, k0); + st_group!(shaped_dst, N, N, k1); + st_group!(shaped_dst, N, N * 2, k2); + st_group!(shaped_dst, N, N * 3, k3); + st_group!(shaped_dst, N, N * 4, k4); + st_group!(shaped_dst, N, N * 5, k5); + st_group!(shaped_dst, N, N * 6, k6); + st_group!(shaped_dst, N, N * 7, k7); + cx += 8; + } + + while cx + 4 < width { + let v_cx = cx * N; + let src = &src[cx * N..(v_cx + length * N + N * 4)]; + let coeff = scanned_kernel[half_len]; + + let mut k0: ColorGroup = ld_group!(src, N, half_len * N).mul(coeff); + let mut k1: ColorGroup = ld_group!(src, N, half_len * N + N).mul(coeff); + let mut k2: ColorGroup = ld_group!(src, N, half_len * N + N * 2).mul(coeff); + let mut k3: ColorGroup = ld_group!(src, N, half_len * N + N * 3).mul(coeff); + + for (i, &coeff) in scanned_kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw = &src[(i * N)..((i + 4) * N)]; + let bw = &src[(rollback * N)..((rollback + 4) * N)]; + k0 = ld_group!(fw, N, 0) + .add(ld_group!(bw, N, 0)) + .mul_add(k0, coeff); + k1 = ld_group!(fw, N, N) + .add(ld_group!(bw, N, N)) + .mul_add(k1, coeff); + k2 = ld_group!(fw, N, 2 * N) + .add(ld_group!(bw, N, 2 * N)) + .mul_add(k2, coeff); + k3 = ld_group!(fw, N, 3 * N) + .add(ld_group!(bw, N, 3 * N)) + .mul_add(k3, coeff); + } + + let dst_offset = cx * N; + let shaped_dst = &mut dst_row[dst_offset..(dst_offset + N * 4)]; + st_group!(shaped_dst, N, 0, k0); + st_group!(shaped_dst, N, N, k1); + st_group!(shaped_dst, N, N * 2, k2); + st_group!(shaped_dst, N, N * 3, k3); + cx += 4; + } + + #[allow(clippy::needless_range_loop)] + for x in cx..width { + let v_cx = x * N; + let src = &src[v_cx..(v_cx + length * N)]; + let coeff = scanned_kernel[half_len]; + + let mut k0: ColorGroup = ld_group!(src, N, half_len * N).mul(coeff); + + for (i, &coeff) in scanned_kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw = &src[(i * N)..((i + 1) * N)]; + let bw = &src[(rollback * N)..((rollback + 1) * N)]; + k0 = ld_group!(fw, N, 0) + .add(ld_group!(bw, N, 0)) + .mul_add(k0, coeff); + } + + let dst_offset = x * N; + let shaped_dst = &mut dst_row[dst_offset..(dst_offset + N)]; + st_group!(shaped_dst, N, 0, k0); + } +} + +trait KernelTransformer { + fn transform(input: F) -> I; +} + +impl KernelTransformer for u8 { + fn transform(input: f32) -> i32 { + const SCALE: f32 = (1 << PRECISION) as f32; + (input * SCALE).round() as i32 + } +} + +impl KernelTransformer for f32 { + fn transform(input: f32) -> f32 { + input + } +} + +impl KernelTransformer for u16 { + fn transform(input: f32) -> f32 { + input + } +} + +/// Removes meaningless values from kernel preserving symmetry +fn prepare_symmetric_kernel(kernel: &[I]) -> Vec +where + i32: AsPrimitive, +{ + let zeros: I = 0i32.as_(); + let mut new_kernel = kernel.to_vec(); + while new_kernel.len() > 2 + && (new_kernel.last().unwrap().eq(&zeros) && new_kernel.first().unwrap().eq(&zeros)) + { + new_kernel.remove(0); + new_kernel.remove(new_kernel.len() - 1); + } + + new_kernel +} + +/// Performs 2D separable convolution on the image +/// +/// Currently implemented only for symmetrical filters +/// +/// # Arguments +/// +/// * `image`: Single plane image +/// * `destination`: Destination image +/// * `image_size`: Image size see [FilterImageSize] +/// * `row_kernel`: Row kernel, *size must be odd*! +/// * `column_kernel`: Column kernel, *size must be odd*! +fn filter_1d( + image: &[T], + destination: &mut [T], + image_size: FilterImageSize, + row_kernel: &[F], + column_kernel: &[F], +) -> Result<(), ImageError> +where + T: Copy + AsPrimitive + Default + Send + Sync + KernelTransformer + AsPrimitive, + F: Default + 'static + Copy, + I: ToStorage + + Mul + + Add + + MulAdd + + Send + + Sync + + PartialEq + + Default + + 'static + + Copy, + i32: AsPrimitive + AsPrimitive, + f64: AsPrimitive, +{ + if image.len() != image_size.width * image_size.height * N { + return Err(ImageError::Limits(LimitError::from_kind( + LimitErrorKind::DimensionError, + ))); + } + if destination.len() != image_size.width * image_size.height * N { + return Err(ImageError::Limits(LimitError::from_kind( + LimitErrorKind::DimensionError, + ))); + } + + assert_ne!(row_kernel.len() & 1, 0, "Row kernel length must be odd"); + assert_ne!( + column_kernel.len() & 1, + 0, + "Column kernel length must be odd" + ); + + let mut scanned_row_kernel = row_kernel + .iter() + .map(|&x| T::transform(x)) + .collect::>(); + let mut scanned_column_kernel = column_kernel + .iter() + .map(|&x| T::transform(x)) + .collect::>(); + + scanned_row_kernel = prepare_symmetric_kernel(&scanned_row_kernel); + scanned_column_kernel = prepare_symmetric_kernel(&scanned_column_kernel); + + let zeros: I = 0i32.as_(); + + if scanned_row_kernel.is_empty() + || scanned_column_kernel.is_empty() + || (scanned_row_kernel.len() == 1 && scanned_row_kernel.first().unwrap().eq(&zeros)) + || (scanned_column_kernel.len() == 1 && scanned_column_kernel.first().unwrap().eq(&zeros)) + { + destination.fill(T::default()); + return Ok(()); + } + + let mut transient_image = vec![T::default(); image_size.width * image_size.height * N]; + + for (y, dst) in transient_image + .chunks_exact_mut(image_size.width * N) + .enumerate() + { + // This is important to perform padding before convolution. + // That approach allows to iterate without unnecessary branching + // and highly effective from low sized kernels to reasonable huge kernels. + // It is especially more effective for low sized kernels than copying + // specifically for big sized images. + // If image row is `asdfgh` then this method with clamp will produce row + // `aaa asdfgh hhh` padded exactly on half kernel size + let arena = make_arena_row::( + image, + y, + image_size, + KernelShape { + width: row_kernel.len(), + height: 0, + }, + ); + + filter_symmetric_row::( + Arena { + src: &arena.arena, + components: N, + }, + dst, + image_size, + &scanned_row_kernel, + ); + } + + let column_kernel_shape = KernelShape { + width: 0, + height: scanned_column_kernel.len(), + }; + + // This is important to perform padding before convolution. + // That approach allows to iterate without unnecessary branching + // and highly effective from low sized kernels to reasonable huge kernels. + // It is especially more effective for low sized kernels than copying + // specifically for big sized images. + // `This is virtual padding!` that means it produces two non-contiguous arrays. + // They will virtually replace image row, when convolution kernel goes out of bounds on Y coordinate. + let column_arena_k = + make_columns_arenas::(transient_image.as_slice(), image_size, column_kernel_shape); + + let top_pad = column_arena_k.top_pad.as_slice(); + let bottom_pad = column_arena_k.bottom_pad.as_slice(); + + let pad_h = column_kernel_shape.height / 2; + + let transient_image_slice = transient_image.as_slice(); + + let src_stride = image_size.width * N; + + for (y, dst) in destination + .chunks_exact_mut(image_size.width * N) + .enumerate() + { + let mut brows: Vec<&[T]> = vec![&transient_image_slice[0..]; column_kernel_shape.height]; + + for (k, row) in (0..column_kernel_shape.height).zip(brows.iter_mut()) { + if (y as i64 - pad_h as i64 + k as i64) < 0 { + *row = &top_pad[(pad_h - k - 1) * src_stride..]; + } else if (y as i64 - pad_h as i64 + k as i64) as usize >= image_size.height { + *row = &bottom_pad[(k - pad_h - 1) * src_stride..]; + } else { + let fy = (y as i64 + k as i64 - pad_h as i64) as usize; + let start_offset = src_stride * fy; + *row = &transient_image_slice[start_offset..(start_offset + src_stride)]; + } + } + + let empty_vec = vec![]; + + let brows_slice = brows.as_slice(); + + filter_symmetric_column( + Arena { + src: &empty_vec, + components: N, + }, + brows_slice, + dst, + image_size, + &scanned_column_kernel, + ); + } + + Ok(()) +} + +pub(crate) fn filter_1d_plane( + image: &[u8], + destination: &mut [u8], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_la( + image: &[u8], + destination: &mut [u8], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_rgb( + image: &[u8], + destination: &mut [u8], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_rgba( + image: &[u8], + destination: &mut [u8], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_rgb_f32( + image: &[f32], + destination: &mut [f32], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_rgba_f32( + image: &[f32], + destination: &mut [f32], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_rgb_u16( + image: &[u16], + destination: &mut [u16], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_rgba_u16( + image: &[u16], + destination: &mut [u16], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_la_u16( + image: &[u16], + destination: &mut [u16], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_plane_u16( + image: &[u16], + destination: &mut [u16], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} diff --git a/src/imageops/mod.rs b/src/imageops/mod.rs index 61a742e038..82b46133e5 100644 --- a/src/imageops/mod.rs +++ b/src/imageops/mod.rs @@ -17,8 +17,8 @@ pub use self::affine::{ /// Image sampling pub use self::sample::{ - blur, filter3x3, interpolate_bilinear, interpolate_nearest, resize, sample_bilinear, - sample_nearest, thumbnail, unsharpen, + blur, filter3x3, gaussian_blur_dyn_image, interpolate_bilinear, interpolate_nearest, resize, + sample_bilinear, sample_nearest, thumbnail, unsharpen, }; /// Color operations @@ -32,6 +32,7 @@ mod affine; // https://github.com/rust-lang/rust/issues/18241 pub mod colorops; mod fast_blur; +mod filter_1d; mod sample; pub use fast_blur::fast_blur; diff --git a/src/imageops/sample.rs b/src/imageops/sample.rs index 1f6ce53a77..4dab66510d 100644 --- a/src/imageops/sample.rs +++ b/src/imageops/sample.rs @@ -3,14 +3,23 @@ // See http://cs.brown.edu/courses/cs123/lectures/08_Image_Processing_IV.pdf // for some of the theory behind image scaling and convolution -use std::f32; - use num_traits::{NumCast, ToPrimitive, Zero}; +use std::f32; +use std::ops::Mul; +use crate::buffer_::{Gray16Image, GrayAlpha16Image, Rgb16Image, Rgba16Image}; use crate::image::{GenericImage, GenericImageView}; +use crate::imageops::filter_1d::{ + filter_1d_la, filter_1d_la_u16, filter_1d_plane, filter_1d_plane_u16, filter_1d_rgb, + filter_1d_rgb_f32, filter_1d_rgb_u16, filter_1d_rgba, filter_1d_rgba_f32, filter_1d_rgba_u16, + FilterImageSize, +}; use crate::traits::{Enlargeable, Pixel, Primitive}; use crate::utils::clamp; -use crate::{ImageBuffer, Rgba32FImage}; +use crate::{ + DynamicImage, GrayAlphaImage, GrayImage, ImageBuffer, Rgb32FImage, RgbImage, Rgba32FImage, + RgbaImage, +}; /// Available Sampling Filters. /// @@ -1013,6 +1022,188 @@ where horizontal_sample(&tmp, width, &mut method) } +/// In previous implementation sigma means radius, which is not the same one +pub fn gaussian_blur_dyn_image(image: &DynamicImage, radius: f32) -> DynamicImage { + fn get_sigma_size(kernel_size: usize) -> f32 { + 0.3f32 * ((kernel_size as f32 - 1.) * 0.5f32 - 1f32) + 0.8f32 + } + + fn get_gaussian_kernel_1d(width: usize, sigma: f32) -> Vec { + let mut sum_norm: f32 = 0f32; + let mut kernel = vec![0f32; width]; + let scale = 1f32 / (f32::sqrt(2f32 * f32::consts::PI) * sigma); + let mean = (width / 2) as f32; + + for (x, weight) in kernel.iter_mut().enumerate() { + let new_weight = + f32::exp(-0.5f32 * f32::powf((x as f32 - mean) / sigma, 2.0f32)) * scale; + *weight = new_weight; + sum_norm += new_weight; + } + + if sum_norm != 0f32 { + let sum_scale = 1f32 / sum_norm; + for weight in kernel.iter_mut() { + *weight = weight.mul(sum_scale); + } + } + + kernel + } + + let kernel_size = radius.max(1.) as usize * 2 + 1; + let sigma = get_sigma_size(kernel_size); + let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, sigma); + + let filter_image_size = FilterImageSize { + width: image.width() as usize, + height: image.height() as usize, + }; + + match image { + DynamicImage::ImageLuma8(img) => { + let mut dest_image = vec![0u8; img.len()]; + filter_1d_plane( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageLuma8( + GrayImage::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageLumaA8(img) => { + let mut dest_image = vec![0u8; img.len()]; + filter_1d_la( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageLumaA8( + GrayAlphaImage::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageRgb8(img) => { + let mut dest_image = vec![0u8; img.len()]; + filter_1d_rgb( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageRgb8( + RgbImage::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageRgba8(img) => { + let mut dest_image = vec![0u8; img.len()]; + filter_1d_rgba( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageRgba8( + RgbaImage::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageLuma16(img) => { + let mut dest_image = vec![0u16; img.len()]; + filter_1d_plane_u16( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageLuma16( + Gray16Image::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageLumaA16(img) => { + let mut dest_image = vec![0u16; img.len()]; + filter_1d_la_u16( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageLumaA16( + GrayAlpha16Image::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageRgb16(img) => { + let mut dest_image = vec![0u16; img.len()]; + filter_1d_rgb_u16( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageRgb16( + Rgb16Image::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageRgba16(img) => { + let mut dest_image = vec![0u16; img.len()]; + filter_1d_rgba_u16( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageRgba16( + Rgba16Image::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageRgb32F(img) => { + let mut dest_image = vec![0f32; img.len()]; + filter_1d_rgb_f32( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageRgb32F( + Rgb32FImage::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + DynamicImage::ImageRgba32F(img) => { + let mut dest_image = vec![0f32; img.len()]; + filter_1d_rgba_f32( + img.as_raw(), + &mut dest_image, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + DynamicImage::ImageRgba32F( + Rgba32FImage::from_raw(img.width(), img.height(), dest_image).unwrap(), + ) + } + } +} + /// Performs an unsharpen mask on the supplied image. /// ```sigma``` is the amount to blur the image by. /// ```threshold``` is the threshold for minimal brightness change that will be sharpened. From b5fa826c8608c401a667bfaa066835ce56f7ff43 Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 3 Dec 2024 22:14:13 +0000 Subject: [PATCH 2/9] Loop unrolling changed --- src/imageops/filter_1d.rs | 70 ++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/src/imageops/filter_1d.rs b/src/imageops/filter_1d.rs index d70cf25221..f09971ea63 100644 --- a/src/imageops/filter_1d.rs +++ b/src/imageops/filter_1d.rs @@ -2,7 +2,9 @@ use crate::error::{LimitError, LimitErrorKind}; use crate::ImageError; use num_traits::{AsPrimitive, MulAdd}; +use std::mem::size_of; use std::ops::{Add, Mul}; +use std::time::Instant; #[cfg(any( all( @@ -401,33 +403,55 @@ fn filter_symmetric_column( let mut cx = 0usize; - while cx + 32 < dst_stride { - let coeff = kernel[half_len]; + if size_of::() == 1 { + while cx + 32 < dst_stride { + let coeff = kernel[half_len]; - let mut store: [F; 32] = [F::default(); 32]; + let mut store0: [F; 16] = [F::default(); 16]; + let mut store1: [F; 16] = [F::default(); 16]; - let v_src = &arena_src[half_len][cx..(cx + 32)]; + let v_src0 = &arena_src[half_len][cx..(cx + 16)]; + let v_src1 = &arena_src[half_len][(cx + 16)..(cx + 32)]; - for (dst, src) in store.iter_mut().zip(v_src) { - *dst = src.as_().mul(coeff); - } + for (dst, src) in store0.iter_mut().zip(v_src0) { + *dst = src.as_().mul(coeff); + } + for (dst, src) in store1.iter_mut().zip(v_src1) { + *dst = src.as_().mul(coeff); + } - for (i, &coeff) in kernel.iter().take(half_len).enumerate() { - let rollback = length - i - 1; - let fw = &arena_src[i][cx..(cx + 32)]; - let bw = &arena_src[rollback][cx..(cx + 32)]; + for (i, &coeff) in kernel.iter().take(half_len).enumerate() { + let rollback = length - i - 1; + let fw_src = arena_src[i]; + let rw_src = arena_src[rollback]; + let fw0 = &fw_src[cx..(cx + 16)]; + let bw0 = &rw_src[cx..(cx + 16)]; + let fw1 = &fw_src[(cx + 16)..(cx + 32)]; + let bw1 = &rw_src[(cx + 16)..(cx + 32)]; + + for ((dst, fw), bw) in store0.iter_mut().zip(fw0).zip(bw0) { + *dst = mla(*dst, fw.as_().add(bw.as_()), coeff); + } + + for ((dst, fw), bw) in store1.iter_mut().zip(fw1).zip(bw1) { + *dst = mla(*dst, fw.as_().add(bw.as_()), coeff); + } + } - for ((dst, fw), bw) in store.iter_mut().zip(fw).zip(bw) { - *dst = mla(*dst, fw.as_().add(bw.as_()), coeff); + let shaped_dst0 = &mut dst_row[cx..(cx + 16)]; + + for (src, dst) in store0.iter().zip(shaped_dst0.iter_mut()) { + *dst = src.to_(); } - } - let shaped_dst = &mut dst_row[cx..(cx + 32)]; - for (src, dst) in store.iter().zip(shaped_dst.iter_mut()) { - *dst = src.to_(); - } + let shaped_dst1 = &mut dst_row[(cx + 16)..(cx + 32)]; + + for (src, dst) in store1.iter().zip(shaped_dst1.iter_mut()) { + *dst = src.to_(); + } - cx += 32; + cx += 32; + } } while cx + 16 < dst_stride { @@ -773,6 +797,8 @@ where let mut transient_image = vec![T::default(); image_size.width * image_size.height * N]; + let start_time = Instant::now(); + for (y, dst) in transient_image .chunks_exact_mut(image_size.width * N) .enumerate() @@ -805,6 +831,8 @@ where ); } + println!("Horizontal time {:?}", start_time.elapsed()); + let column_kernel_shape = KernelShape { width: 0, height: scanned_column_kernel.len(), @@ -829,6 +857,8 @@ where let src_stride = image_size.width * N; + let start_time = Instant::now(); + for (y, dst) in destination .chunks_exact_mut(image_size.width * N) .enumerate() @@ -863,6 +893,8 @@ where ); } + println!("Vertical time {:?}", start_time.elapsed()); + Ok(()) } From 1c73c52f4137162f0b1cc11197f36f1d46e5f6b6 Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 3 Dec 2024 22:34:11 +0000 Subject: [PATCH 3/9] Removed time --- src/imageops/filter_1d.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/imageops/filter_1d.rs b/src/imageops/filter_1d.rs index f09971ea63..e7bc137ec6 100644 --- a/src/imageops/filter_1d.rs +++ b/src/imageops/filter_1d.rs @@ -4,7 +4,6 @@ use crate::ImageError; use num_traits::{AsPrimitive, MulAdd}; use std::mem::size_of; use std::ops::{Add, Mul}; -use std::time::Instant; #[cfg(any( all( @@ -797,8 +796,6 @@ where let mut transient_image = vec![T::default(); image_size.width * image_size.height * N]; - let start_time = Instant::now(); - for (y, dst) in transient_image .chunks_exact_mut(image_size.width * N) .enumerate() @@ -831,8 +828,6 @@ where ); } - println!("Horizontal time {:?}", start_time.elapsed()); - let column_kernel_shape = KernelShape { width: 0, height: scanned_column_kernel.len(), @@ -857,8 +852,6 @@ where let src_stride = image_size.width * N; - let start_time = Instant::now(); - for (y, dst) in destination .chunks_exact_mut(image_size.width * N) .enumerate() @@ -893,8 +886,6 @@ where ); } - println!("Vertical time {:?}", start_time.elapsed()); - Ok(()) } From ed9078ab0d2962f69d168fc937ffc308885c3c65 Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Thu, 5 Dec 2024 10:03:58 +0000 Subject: [PATCH 4/9] Fix clippy --- src/imageops/filter_1d.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/imageops/filter_1d.rs b/src/imageops/filter_1d.rs index e7bc137ec6..8fb2a66fdc 100644 --- a/src/imageops/filter_1d.rs +++ b/src/imageops/filter_1d.rs @@ -121,8 +121,8 @@ struct ArenaColumns where T: Copy, { - pub top_pad: Vec, - pub bottom_pad: Vec, + top_pad: Vec, + bottom_pad: Vec, } /// Pads a column image with *clamp* strategy From 64022dcdec8f4b391ec5d78f8e67f1d43dddf157 Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 17 Dec 2024 00:07:35 +0000 Subject: [PATCH 5/9] Handle generic image --- src/imageops/filter_1d.rs | 20 ++++ src/imageops/mod.rs | 3 +- src/imageops/sample.rs | 232 ++++++++++++++++++++++++++++++-------- 3 files changed, 207 insertions(+), 48 deletions(-) diff --git a/src/imageops/filter_1d.rs b/src/imageops/filter_1d.rs index 8fb2a66fdc..2081faa562 100644 --- a/src/imageops/filter_1d.rs +++ b/src/imageops/filter_1d.rs @@ -929,6 +929,26 @@ pub(crate) fn filter_1d_rgba( filter_1d::(image, destination, image_size, row_kernel, column_kernel) } +pub(crate) fn filter_1d_la_f32( + image: &[f32], + destination: &mut [f32], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + +pub(crate) fn filter_1d_plane_f32( + image: &[f32], + destination: &mut [f32], + image_size: FilterImageSize, + row_kernel: &[f32], + column_kernel: &[f32], +) -> Result<(), ImageError> { + filter_1d::(image, destination, image_size, row_kernel, column_kernel) +} + pub(crate) fn filter_1d_rgb_f32( image: &[f32], destination: &mut [f32], diff --git a/src/imageops/mod.rs b/src/imageops/mod.rs index 82b46133e5..587ca6bed1 100644 --- a/src/imageops/mod.rs +++ b/src/imageops/mod.rs @@ -17,9 +17,10 @@ pub use self::affine::{ /// Image sampling pub use self::sample::{ - blur, filter3x3, gaussian_blur_dyn_image, interpolate_bilinear, interpolate_nearest, resize, + blur, filter3x3, interpolate_bilinear, interpolate_nearest, resize, sample_bilinear, sample_nearest, thumbnail, unsharpen, }; +pub(crate) use self::sample::gaussian_blur_dyn_image; /// Color operations pub use self::colorops::{ diff --git a/src/imageops/sample.rs b/src/imageops/sample.rs index 5d3949b4d0..573cf90524 100644 --- a/src/imageops/sample.rs +++ b/src/imageops/sample.rs @@ -10,9 +10,9 @@ use std::ops::Mul; use crate::buffer_::{Gray16Image, GrayAlpha16Image, Rgb16Image, Rgba16Image}; use crate::image::{GenericImage, GenericImageView}; use crate::imageops::filter_1d::{ - filter_1d_la, filter_1d_la_u16, filter_1d_plane, filter_1d_plane_u16, filter_1d_rgb, - filter_1d_rgb_f32, filter_1d_rgb_u16, filter_1d_rgba, filter_1d_rgba_f32, filter_1d_rgba_u16, - FilterImageSize, + filter_1d_la, filter_1d_la_f32, filter_1d_la_u16, filter_1d_plane, filter_1d_plane_f32, + filter_1d_plane_u16, filter_1d_rgb, filter_1d_rgb_f32, filter_1d_rgb_u16, filter_1d_rgba, + filter_1d_rgba_f32, filter_1d_rgba_u16, FilterImageSize, }; use crate::traits::{Enlargeable, Pixel, Primitive}; use crate::utils::clamp; @@ -1002,58 +1002,53 @@ pub fn blur( where I::Pixel: 'static, { - let sigma = if sigma <= 0.0 { 1.0 } else { sigma }; + // let sigma = if sigma <= 0.0 { 1.0 } else { sigma }; + // + // let mut method = Filter { + // kernel: Box::new(|x| gaussian(x, sigma)), + // support: 2.0 * sigma, + // }; + // + // let (width, height) = image.dimensions(); + // let is_empty = width == 0 || height == 0; + // + // if is_empty { + // return ImageBuffer::new(width, height); + // } + // + // // Keep width and height the same for horizontal and + // // vertical sampling. + // // Note: tmp is not necessarily actually Rgba + // let tmp: Rgba32FImage = vertical_sample(image, height, &mut method); + // horizontal_sample(&tmp, width, &mut method) + gaussian_blur_indirect(image, sigma) +} - let mut method = Filter { - kernel: Box::new(|x| gaussian(x, sigma)), - support: 2.0 * sigma, - }; +fn get_gaussian_kernel_1d(width: usize, sigma: f32) -> Vec { + let mut sum_norm: f32 = 0f32; + let mut kernel = vec![0f32; width]; + let scale = 1f32 / (f32::sqrt(2f32 * f32::consts::PI) * sigma); + let mean = (width / 2) as f32; - let (width, height) = image.dimensions(); - let is_empty = width == 0 || height == 0; + for (x, weight) in kernel.iter_mut().enumerate() { + let new_weight = f32::exp(-0.5f32 * f32::powf((x as f32 - mean) / sigma, 2.0f32)) * scale; + *weight = new_weight; + sum_norm += new_weight; + } - if is_empty { - return ImageBuffer::new(width, height); + if sum_norm != 0f32 { + let sum_scale = 1f32 / sum_norm; + for weight in kernel.iter_mut() { + *weight = weight.mul(sum_scale); + } } - // Keep width and height the same for horizontal and - // vertical sampling. - // Note: tmp is not necessarily actually Rgba - let tmp: Rgba32FImage = vertical_sample(image, height, &mut method); - horizontal_sample(&tmp, width, &mut method) + kernel } /// In previous implementation sigma means radius, which is not the same one -pub fn gaussian_blur_dyn_image(image: &DynamicImage, radius: f32) -> DynamicImage { - fn get_sigma_size(kernel_size: usize) -> f32 { - 0.3f32 * ((kernel_size as f32 - 1.) * 0.5f32 - 1f32) + 0.8f32 - } - - fn get_gaussian_kernel_1d(width: usize, sigma: f32) -> Vec { - let mut sum_norm: f32 = 0f32; - let mut kernel = vec![0f32; width]; - let scale = 1f32 / (f32::sqrt(2f32 * f32::consts::PI) * sigma); - let mean = (width / 2) as f32; - - for (x, weight) in kernel.iter_mut().enumerate() { - let new_weight = - f32::exp(-0.5f32 * f32::powf((x as f32 - mean) / sigma, 2.0f32)) * scale; - *weight = new_weight; - sum_norm += new_weight; - } - - if sum_norm != 0f32 { - let sum_scale = 1f32 / sum_norm; - for weight in kernel.iter_mut() { - *weight = weight.mul(sum_scale); - } - } - - kernel - } - - let kernel_size = radius.max(1.) as usize * 2 + 1; - let sigma = get_sigma_size(kernel_size); +pub(crate) fn gaussian_blur_dyn_image(image: &DynamicImage, sigma: f32) -> DynamicImage { + let kernel_size = sigma.max(1.) as usize * 2 + 1; let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, sigma); let filter_image_size = FilterImageSize { @@ -1205,6 +1200,149 @@ pub fn gaussian_blur_dyn_image(image: &DynamicImage, radius: f32) -> DynamicImag } } +pub(crate) fn gaussian_blur_indirect( + image: &I, + sigma: f32, +) -> ImageBuffer::Subpixel>> +where + I::Pixel: 'static, +{ + match I::Pixel::CHANNEL_COUNT { + 1 => gaussian_blur_indirect_impl::(image, sigma), + 2 => gaussian_blur_indirect_impl::(image, sigma), + 3 => gaussian_blur_indirect_impl::(image, sigma), + 4 => gaussian_blur_indirect_impl::(image, sigma), + _ => unimplemented!(), + } +} + +fn gaussian_blur_indirect_impl( + image: &I, + sigma: f32, +) -> ImageBuffer::Subpixel>> +where + I::Pixel: 'static, +{ + let mut transient = vec![0f32; image.width() as usize * image.height() as usize * CN]; + for (pixel, dst) in image.pixels().zip(transient.chunks_exact_mut(CN)) { + let px = pixel.2.channels(); + match CN { + 1 => { + dst[0] = NumCast::from(px[0]).unwrap(); + } + 2 => { + dst[0] = NumCast::from(px[0]).unwrap(); + dst[1] = NumCast::from(px[1]).unwrap(); + } + 3 => { + dst[0] = NumCast::from(px[0]).unwrap(); + dst[1] = NumCast::from(px[1]).unwrap(); + dst[2] = NumCast::from(px[2]).unwrap(); + } + 4 => { + dst[0] = NumCast::from(px[0]).unwrap(); + dst[1] = NumCast::from(px[1]).unwrap(); + dst[2] = NumCast::from(px[2]).unwrap(); + dst[3] = NumCast::from(px[3]).unwrap(); + } + _ => unreachable!(), + } + } + + let mut transient_dst = vec![0f32; image.width() as usize * image.height() as usize * CN]; + + let kernel_size = sigma.max(1.) as usize * 2 + 1; + let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, sigma); + + let filter_image_size = FilterImageSize { + width: image.width() as usize, + height: image.height() as usize, + }; + + match CN { + 1 => { + filter_1d_plane_f32( + &transient, + &mut transient_dst, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + } + 2 => { + filter_1d_la_f32( + &transient, + &mut transient_dst, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + } + 3 => { + filter_1d_rgb_f32( + &transient, + &mut transient_dst, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + } + 4 => { + filter_1d_rgba_f32( + &transient, + &mut transient_dst, + filter_image_size, + &gaussian_kernel, + &gaussian_kernel, + ) + .unwrap(); + } + _ => unreachable!(), + } + + let mut out = ImageBuffer::new(image.width(), image.height()); + for (dst, src) in out.pixels_mut().zip(transient_dst.chunks_exact_mut(CN)) { + match CN { + 1 => { + let v0 = NumCast::from(FloatNearest(src[0])).unwrap(); + #[allow(deprecated)] + let t = Pixel::from_channels(v0, v0, v0, v0); + *dst = t; + } + 2 => { + let v0 = NumCast::from(FloatNearest(src[0])).unwrap(); + let v1 = NumCast::from(FloatNearest(src[1])).unwrap(); + #[allow(deprecated)] + let t = Pixel::from_channels(v0, v1, v0, v0); + *dst = t; + } + 3 => { + let v0 = NumCast::from(FloatNearest(src[0])).unwrap(); + let v1 = NumCast::from(FloatNearest(src[1])).unwrap(); + let v2 = NumCast::from(FloatNearest(src[2])).unwrap(); + #[allow(deprecated)] + let t = Pixel::from_channels(v0, v1, v2, v0); + *dst = t; + } + 4 => { + let v0 = NumCast::from(FloatNearest(src[0])).unwrap(); + let v1 = NumCast::from(FloatNearest(src[1])).unwrap(); + let v2 = NumCast::from(FloatNearest(src[2])).unwrap(); + let v3 = NumCast::from(FloatNearest(src[3])).unwrap(); + #[allow(deprecated)] + let t = Pixel::from_channels(v0, v1, v2, v3); + *dst = t; + } + _ => unreachable!(), + } + } + + out +} + /// Performs an unsharpen mask on the supplied image. /// ```sigma``` is the amount to blur the image by. /// ```threshold``` is the threshold for minimal brightness change that will be sharpened. From c87304e58a5fe1011dbc5b83d7c56db783017dc6 Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 17 Dec 2024 00:14:02 +0000 Subject: [PATCH 6/9] Fix base sigma level --- src/imageops/sample.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/imageops/sample.rs b/src/imageops/sample.rs index 573cf90524..e66e9a5d01 100644 --- a/src/imageops/sample.rs +++ b/src/imageops/sample.rs @@ -1048,8 +1048,9 @@ fn get_gaussian_kernel_1d(width: usize, sigma: f32) -> Vec { /// In previous implementation sigma means radius, which is not the same one pub(crate) fn gaussian_blur_dyn_image(image: &DynamicImage, sigma: f32) -> DynamicImage { - let kernel_size = sigma.max(1.) as usize * 2 + 1; - let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, sigma); + let min_sigma = sigma.max(0.1); + let kernel_size = min_sigma as usize * 2 + 1; + let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, min_sigma); let filter_image_size = FilterImageSize { width: image.width() as usize, @@ -1251,8 +1252,10 @@ where let mut transient_dst = vec![0f32; image.width() as usize * image.height() as usize * CN]; - let kernel_size = sigma.max(1.) as usize * 2 + 1; - let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, sigma); + let min_sigma = sigma.max(0.1); + + let kernel_size = min_sigma as usize * 2 + 1; + let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, min_sigma); let filter_image_size = FilterImageSize { width: image.width() as usize, From 581362e06327879e97a5aab95c54e47ef0cf7d06 Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 17 Dec 2024 00:15:02 +0000 Subject: [PATCH 7/9] Cargo fmt --- src/imageops/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/imageops/mod.rs b/src/imageops/mod.rs index 587ca6bed1..fba6be4a31 100644 --- a/src/imageops/mod.rs +++ b/src/imageops/mod.rs @@ -15,12 +15,12 @@ pub use self::affine::{ rotate90, rotate90_in, }; +pub(crate) use self::sample::gaussian_blur_dyn_image; /// Image sampling pub use self::sample::{ - blur, filter3x3, interpolate_bilinear, interpolate_nearest, resize, - sample_bilinear, sample_nearest, thumbnail, unsharpen, + blur, filter3x3, interpolate_bilinear, interpolate_nearest, resize, sample_bilinear, + sample_nearest, thumbnail, unsharpen, }; -pub(crate) use self::sample::gaussian_blur_dyn_image; /// Color operations pub use self::colorops::{ From fea58bd935e8766a147084d9f9ea3496e56d251c Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 17 Dec 2024 00:32:34 +0000 Subject: [PATCH 8/9] Drop unnecessary public --- src/imageops/sample.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imageops/sample.rs b/src/imageops/sample.rs index e66e9a5d01..d4dd8665b2 100644 --- a/src/imageops/sample.rs +++ b/src/imageops/sample.rs @@ -1201,7 +1201,7 @@ pub(crate) fn gaussian_blur_dyn_image(image: &DynamicImage, sigma: f32) -> Dynam } } -pub(crate) fn gaussian_blur_indirect( +fn gaussian_blur_indirect( image: &I, sigma: f32, ) -> ImageBuffer::Subpixel>> From b7cf70aac93e2d22f0aa0a5783e5af623bb94add Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Tue, 17 Dec 2024 00:34:29 +0000 Subject: [PATCH 9/9] Drop old methods --- src/dynimage.rs | 1 - src/imageops/sample.rs | 24 ++---------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/src/dynimage.rs b/src/dynimage.rs index ee0d15327a..a86b8a85f0 100644 --- a/src/dynimage.rs +++ b/src/dynimage.rs @@ -831,7 +831,6 @@ impl DynamicImage { #[must_use] pub fn blur(&self, sigma: f32) -> DynamicImage { gaussian_blur_dyn_image(self, sigma) - // dynamic_map!(*self, ref p => imageops::blur(p, sigma)) } /// Performs a fast blur on this image. diff --git a/src/imageops/sample.rs b/src/imageops/sample.rs index d4dd8665b2..9b0a0743f7 100644 --- a/src/imageops/sample.rs +++ b/src/imageops/sample.rs @@ -1002,25 +1002,6 @@ pub fn blur( where I::Pixel: 'static, { - // let sigma = if sigma <= 0.0 { 1.0 } else { sigma }; - // - // let mut method = Filter { - // kernel: Box::new(|x| gaussian(x, sigma)), - // support: 2.0 * sigma, - // }; - // - // let (width, height) = image.dimensions(); - // let is_empty = width == 0 || height == 0; - // - // if is_empty { - // return ImageBuffer::new(width, height); - // } - // - // // Keep width and height the same for horizontal and - // // vertical sampling. - // // Note: tmp is not necessarily actually Rgba - // let tmp: Rgba32FImage = vertical_sample(image, height, &mut method); - // horizontal_sample(&tmp, width, &mut method) gaussian_blur_indirect(image, sigma) } @@ -1046,9 +1027,8 @@ fn get_gaussian_kernel_1d(width: usize, sigma: f32) -> Vec { kernel } -/// In previous implementation sigma means radius, which is not the same one pub(crate) fn gaussian_blur_dyn_image(image: &DynamicImage, sigma: f32) -> DynamicImage { - let min_sigma = sigma.max(0.1); + let min_sigma = sigma.max(1.0f32); let kernel_size = min_sigma as usize * 2 + 1; let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, min_sigma); @@ -1252,7 +1232,7 @@ where let mut transient_dst = vec![0f32; image.width() as usize * image.height() as usize * CN]; - let min_sigma = sigma.max(0.1); + let min_sigma = sigma.max(1.0); let kernel_size = min_sigma as usize * 2 + 1; let gaussian_kernel = get_gaussian_kernel_1d(kernel_size, min_sigma);