From bf2c26b5d9d99973d98a850084ea62af32b403e4 Mon Sep 17 00:00:00 2001 From: Lukasz Anforowicz Date: Thu, 21 Sep 2023 17:30:12 +0000 Subject: [PATCH 1/3] Scaffolding for direct benchmarking of `crate::filter::unfilter`. --- Cargo.toml | 6 +++++ benches/unfilter.rs | 56 +++++++++++++++++++++++++++++++++++++++++++ src/benchable_apis.rs | 17 +++++++++++++ src/common.rs | 22 ++++++++++------- src/lib.rs | 3 +++ 5 files changed, 95 insertions(+), 9 deletions(-) create mode 100644 benches/unfilter.rs create mode 100644 src/benchable_apis.rs diff --git a/Cargo.toml b/Cargo.toml index 37e7e5a7..fcc45e18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,3 +44,9 @@ benchmarks = [] path = "benches/decoder.rs" name = "decoder" harness = false + +[[bench]] +path = "benches/unfilter.rs" +name = "unfilter" +harness = false +required-features = ["benchmarks"] diff --git a/benches/unfilter.rs b/benches/unfilter.rs new file mode 100644 index 00000000..2f6e1f2f --- /dev/null +++ b/benches/unfilter.rs @@ -0,0 +1,56 @@ +//! Usage example: +//! +//! ``` +//! $ alias bench="rustup run nightly cargo bench" +//! $ bench --bench=unfilter --features=benchmarks -- --save-baseline my_baseline +//! ... tweak something, say the Sub filter ... +//! $ bench --bench=unfilter --features=benchmarks -- filter=Sub --baseline my_baseline +//! ``` + +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use png::benchable_apis::unfilter; +use png::FilterType; +use rand::Rng; + +fn unfilter_all(c: &mut Criterion) { + let bpps = [1, 2, 3, 4, 6, 8]; + let filters = [ + FilterType::Sub, + FilterType::Up, + FilterType::Avg, + FilterType::Paeth, + ]; + for &filter in filters.iter() { + for &bpp in bpps.iter() { + bench_unfilter(c, filter, bpp); + } + } +} + +criterion_group!(benches, unfilter_all); +criterion_main!(benches); + +fn bench_unfilter(c: &mut Criterion, filter: FilterType, bpp: u8) { + let mut group = c.benchmark_group("unfilter"); + + fn get_random_bytes(rng: &mut R, n: usize) -> Vec { + use rand::Fill; + let mut result = vec![0u8; n]; + result.as_mut_slice().try_fill(rng).unwrap(); + result + } + let mut rng = rand::thread_rng(); + let row_size = 4096 * (bpp as usize); + let two_rows = get_random_bytes(&mut rng, row_size * 2); + + group.throughput(Throughput::Bytes(row_size as u64)); + group.bench_with_input( + format!("filter={filter:?}/bpp={bpp}"), + &two_rows, + |b, two_rows| { + let (prev_row, curr_row) = two_rows.split_at(row_size); + let mut curr_row = curr_row.to_vec(); + b.iter(|| unfilter(filter, bpp, prev_row, curr_row.as_mut_slice())); + }, + ); +} diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs new file mode 100644 index 00000000..442b6ac5 --- /dev/null +++ b/src/benchable_apis.rs @@ -0,0 +1,17 @@ +//! Development-time-only helper module for exporting private APIs so that they can be benchmarked. +//! This module is gated behind the "benchmarks" feature. + +use crate::common::BytesPerPixel; +use crate::filter::FilterType; + +/// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only +/// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`. +pub fn unfilter( + filter: FilterType, + tbpp: u8, + previous: &[u8], + current: &mut [u8], +) { + let tbpp = BytesPerPixel::for_prediction(tbpp as usize); + crate::filter::unfilter(filter, tbpp, previous, current) +} diff --git a/src/common.rs b/src/common.rs index 6e5dbffe..8455d7aa 100644 --- a/src/common.rs +++ b/src/common.rs @@ -594,15 +594,7 @@ impl Info<'_> { /// has the consequence that the number of possible values is rather small. To make this fact /// more obvious in the type system and the optimizer we use an explicit enum here. pub(crate) fn bpp_in_prediction(&self) -> BytesPerPixel { - match self.bytes_per_pixel() { - 1 => BytesPerPixel::One, - 2 => BytesPerPixel::Two, - 3 => BytesPerPixel::Three, - 4 => BytesPerPixel::Four, - 6 => BytesPerPixel::Six, // Only rgb×16bit - 8 => BytesPerPixel::Eight, // Only rgba×16bit - _ => unreachable!("Not a possible byte rounded pixel width"), - } + BytesPerPixel::for_prediction(self.bytes_per_pixel()) } /// Returns the number of bytes needed for one deinterlaced image. @@ -695,6 +687,18 @@ impl Info<'_> { } impl BytesPerPixel { + pub(crate) fn for_prediction(bpp: usize) -> Self { + match bpp { + 1 => BytesPerPixel::One, + 2 => BytesPerPixel::Two, + 3 => BytesPerPixel::Three, + 4 => BytesPerPixel::Four, + 6 => BytesPerPixel::Six, // Only rgb×16bit + 8 => BytesPerPixel::Eight, // Only rgba×16bit + _ => unreachable!("Not a possible byte rounded pixel width"), + } + } + pub(crate) fn into_usize(self) -> usize { self as usize } diff --git a/src/lib.rs b/src/lib.rs index b3bb15b1..1bcfdb99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,3 +79,6 @@ pub use crate::decoder::{ }; pub use crate::encoder::{Encoder, EncodingError, StreamWriter, Writer}; pub use crate::filter::{AdaptiveFilterType, FilterType}; + +#[cfg(feature = "benchmarks")] +pub mod benchable_apis; From 324d1179b78ab14b2c15df74d791302b7f474dbe Mon Sep 17 00:00:00 2001 From: Lukasz Anforowicz Date: Thu, 21 Sep 2023 22:05:55 +0000 Subject: [PATCH 2/3] cargo fmt --- src/benchable_apis.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs index 442b6ac5..2e47829c 100644 --- a/src/benchable_apis.rs +++ b/src/benchable_apis.rs @@ -6,12 +6,7 @@ use crate::filter::FilterType; /// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only /// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`. -pub fn unfilter( - filter: FilterType, - tbpp: u8, - previous: &[u8], - current: &mut [u8], -) { +pub fn unfilter(filter: FilterType, tbpp: u8, previous: &[u8], current: &mut [u8]) { let tbpp = BytesPerPixel::for_prediction(tbpp as usize); crate::filter::unfilter(filter, tbpp, previous, current) } From 452ae89337810cbb216520e1a364537bab764c01 Mon Sep 17 00:00:00 2001 From: Lukasz Anforowicz Date: Fri, 22 Sep 2023 19:03:47 +0000 Subject: [PATCH 3/3] Renaming `for_prediction` into `from_usize` --- src/benchable_apis.rs | 2 +- src/common.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs index 2e47829c..0be8134f 100644 --- a/src/benchable_apis.rs +++ b/src/benchable_apis.rs @@ -7,6 +7,6 @@ use crate::filter::FilterType; /// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only /// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`. pub fn unfilter(filter: FilterType, tbpp: u8, previous: &[u8], current: &mut [u8]) { - let tbpp = BytesPerPixel::for_prediction(tbpp as usize); + let tbpp = BytesPerPixel::from_usize(tbpp as usize); crate::filter::unfilter(filter, tbpp, previous, current) } diff --git a/src/common.rs b/src/common.rs index 8455d7aa..400aca11 100644 --- a/src/common.rs +++ b/src/common.rs @@ -594,7 +594,7 @@ impl Info<'_> { /// has the consequence that the number of possible values is rather small. To make this fact /// more obvious in the type system and the optimizer we use an explicit enum here. pub(crate) fn bpp_in_prediction(&self) -> BytesPerPixel { - BytesPerPixel::for_prediction(self.bytes_per_pixel()) + BytesPerPixel::from_usize(self.bytes_per_pixel()) } /// Returns the number of bytes needed for one deinterlaced image. @@ -687,7 +687,7 @@ impl Info<'_> { } impl BytesPerPixel { - pub(crate) fn for_prediction(bpp: usize) -> Self { + pub(crate) fn from_usize(bpp: usize) -> Self { match bpp { 1 => BytesPerPixel::One, 2 => BytesPerPixel::Two,