From bf2c26b5d9d99973d98a850084ea62af32b403e4 Mon Sep 17 00:00:00 2001
From: Lukasz Anforowicz <lukasza@chromium.org>
Date: Thu, 21 Sep 2023 17:30:12 +0000
Subject: [PATCH 1/3] Scaffolding for direct benchmarking of
 `crate::filter::unfilter`.

---
 Cargo.toml            |  6 +++++
 benches/unfilter.rs   | 56 +++++++++++++++++++++++++++++++++++++++++++
 src/benchable_apis.rs | 17 +++++++++++++
 src/common.rs         | 22 ++++++++++-------
 src/lib.rs            |  3 +++
 5 files changed, 95 insertions(+), 9 deletions(-)
 create mode 100644 benches/unfilter.rs
 create mode 100644 src/benchable_apis.rs
diff --git a/Cargo.toml b/Cargo.toml
index 37e7e5a7..fcc45e18 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -44,3 +44,9 @@ benchmarks = []
 path = "benches/decoder.rs"
 name = "decoder"
 harness = false
+
+[[bench]]
+path = "benches/unfilter.rs"
+name = "unfilter"
+harness = false
+required-features = ["benchmarks"]
diff --git a/benches/unfilter.rs b/benches/unfilter.rs
new file mode 100644
index 00000000..2f6e1f2f
--- /dev/null
+++ b/benches/unfilter.rs
@@ -0,0 +1,56 @@
+//! Usage example:
+//!
+//! ```
+//! $ alias bench="rustup run nightly cargo bench"
+//! $ bench --bench=unfilter --features=benchmarks -- --save-baseline my_baseline
+//! ... tweak something, say the Sub filter ...
+//! $ bench --bench=unfilter --features=benchmarks -- filter=Sub --baseline my_baseline
+//! ```
+
+use criterion::{criterion_group, criterion_main, Criterion, Throughput};
+use png::benchable_apis::unfilter;
+use png::FilterType;
+use rand::Rng;
+
+fn unfilter_all(c: &mut Criterion) {
+    let bpps = [1, 2, 3, 4, 6, 8];
+    let filters = [
+        FilterType::Sub,
+        FilterType::Up,
+        FilterType::Avg,
+        FilterType::Paeth,
+    ];
+    for &filter in filters.iter() {
+        for &bpp in bpps.iter() {
+            bench_unfilter(c, filter, bpp);
+        }
+    }
+}
+
+criterion_group!(benches, unfilter_all);
+criterion_main!(benches);
+
+fn bench_unfilter(c: &mut Criterion, filter: FilterType, bpp: u8) {
+    let mut group = c.benchmark_group("unfilter");
+
+    fn get_random_bytes<R: Rng>(rng: &mut R, n: usize) -> Vec<u8> {
+        use rand::Fill;
+        let mut result = vec![0u8; n];
+        result.as_mut_slice().try_fill(rng).unwrap();
+        result
+    }
+    let mut rng = rand::thread_rng();
+    let row_size = 4096 * (bpp as usize);
+    let two_rows = get_random_bytes(&mut rng, row_size * 2);
+
+    group.throughput(Throughput::Bytes(row_size as u64));
+    group.bench_with_input(
+        format!("filter={filter:?}/bpp={bpp}"),
+        &two_rows,
+        |b, two_rows| {
+            let (prev_row, curr_row) = two_rows.split_at(row_size);
+            let mut curr_row = curr_row.to_vec();
+            b.iter(|| unfilter(filter, bpp, prev_row, curr_row.as_mut_slice()));
+        },
+    );
+}
diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs
new file mode 100644
index 00000000..442b6ac5
--- /dev/null
+++ b/src/benchable_apis.rs
@@ -0,0 +1,17 @@
+//! Development-time-only helper module for exporting private APIs so that they can be benchmarked.
+//! This module is gated behind the "benchmarks" feature.
+
+use crate::common::BytesPerPixel;
+use crate::filter::FilterType;
+
+/// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only
+/// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`.
+pub fn unfilter(
+    filter: FilterType,
+    tbpp: u8,
+    previous: &[u8],
+    current: &mut [u8],
+) {
+    let tbpp = BytesPerPixel::for_prediction(tbpp as usize);
+    crate::filter::unfilter(filter, tbpp, previous, current)
+}
diff --git a/src/common.rs b/src/common.rs
index 6e5dbffe..8455d7aa 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -594,15 +594,7 @@ impl Info<'_> {
     /// has the consequence that the number of possible values is rather small. To make this fact
     /// more obvious in the type system and the optimizer we use an explicit enum here.
     pub(crate) fn bpp_in_prediction(&self) -> BytesPerPixel {
-        match self.bytes_per_pixel() {
-            1 => BytesPerPixel::One,
-            2 => BytesPerPixel::Two,
-            3 => BytesPerPixel::Three,
-            4 => BytesPerPixel::Four,
-            6 => BytesPerPixel::Six,   // Only rgb×16bit
-            8 => BytesPerPixel::Eight, // Only rgba×16bit
-            _ => unreachable!("Not a possible byte rounded pixel width"),
-        }
+        BytesPerPixel::for_prediction(self.bytes_per_pixel())
     }
 
     /// Returns the number of bytes needed for one deinterlaced image.
@@ -695,6 +687,18 @@ impl Info<'_> {
 }
 
 impl BytesPerPixel {
+    pub(crate) fn for_prediction(bpp: usize) -> Self {
+        match bpp {
+            1 => BytesPerPixel::One,
+            2 => BytesPerPixel::Two,
+            3 => BytesPerPixel::Three,
+            4 => BytesPerPixel::Four,
+            6 => BytesPerPixel::Six,   // Only rgb×16bit
+            8 => BytesPerPixel::Eight, // Only rgba×16bit
+            _ => unreachable!("Not a possible byte rounded pixel width"),
+        }
+    }
+
     pub(crate) fn into_usize(self) -> usize {
         self as usize
     }
diff --git a/src/lib.rs b/src/lib.rs
index b3bb15b1..1bcfdb99 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -79,3 +79,6 @@ pub use crate::decoder::{
 };
 pub use crate::encoder::{Encoder, EncodingError, StreamWriter, Writer};
 pub use crate::filter::{AdaptiveFilterType, FilterType};
+
+#[cfg(feature = "benchmarks")]
+pub mod benchable_apis;

From 324d1179b78ab14b2c15df74d791302b7f474dbe Mon Sep 17 00:00:00 2001
From: Lukasz Anforowicz <lukasza@chromium.org>
Date: Thu, 21 Sep 2023 22:05:55 +0000
Subject: [PATCH 2/3] cargo fmt

---
 src/benchable_apis.rs | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs
index 442b6ac5..2e47829c 100644
--- a/src/benchable_apis.rs
+++ b/src/benchable_apis.rs
@@ -6,12 +6,7 @@ use crate::filter::FilterType;
 
 /// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only
 /// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`.
-pub fn unfilter(
-    filter: FilterType,
-    tbpp: u8,
-    previous: &[u8],
-    current: &mut [u8],
-) {
+pub fn unfilter(filter: FilterType, tbpp: u8, previous: &[u8], current: &mut [u8]) {
     let tbpp = BytesPerPixel::for_prediction(tbpp as usize);
     crate::filter::unfilter(filter, tbpp, previous, current)
 }

From 452ae89337810cbb216520e1a364537bab764c01 Mon Sep 17 00:00:00 2001
From: Lukasz Anforowicz <lukasza@chromium.org>
Date: Fri, 22 Sep 2023 19:03:47 +0000
Subject: [PATCH 3/3] Renaming `for_prediction` into `from_usize`

---
 src/benchable_apis.rs | 2 +-
 src/common.rs         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs
index 2e47829c..0be8134f 100644
--- a/src/benchable_apis.rs
+++ b/src/benchable_apis.rs
@@ -7,6 +7,6 @@ use crate::filter::FilterType;
 /// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only
 /// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`.
 pub fn unfilter(filter: FilterType, tbpp: u8, previous: &[u8], current: &mut [u8]) {
-    let tbpp = BytesPerPixel::for_prediction(tbpp as usize);
+    let tbpp = BytesPerPixel::from_usize(tbpp as usize);
     crate::filter::unfilter(filter, tbpp, previous, current)
 }
diff --git a/src/common.rs b/src/common.rs
index 8455d7aa..400aca11 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -594,7 +594,7 @@ impl Info<'_> {
     /// has the consequence that the number of possible values is rather small. To make this fact
     /// more obvious in the type system and the optimizer we use an explicit enum here.
     pub(crate) fn bpp_in_prediction(&self) -> BytesPerPixel {
-        BytesPerPixel::for_prediction(self.bytes_per_pixel())
+        BytesPerPixel::from_usize(self.bytes_per_pixel())
     }
 
     /// Returns the number of bytes needed for one deinterlaced image.
@@ -687,7 +687,7 @@ impl Info<'_> {
 }
 
 impl BytesPerPixel {
-    pub(crate) fn for_prediction(bpp: usize) -> Self {
+    pub(crate) fn from_usize(bpp: usize) -> Self {
         match bpp {
             1 => BytesPerPixel::One,
             2 => BytesPerPixel::Two,