From 6ab4609c51585e363d185797453df2d7c82bb1f5 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 15 Sep 2024 19:32:15 +0200 Subject: [PATCH] fix order with `repr(C)` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark 1 (161 runs): ./uncompress-baseline rs silesia-small.tar.gz measurement mean ± σ min … max outliers delta wall_time 31.0ms ± 1.16ms 30.2ms … 43.6ms 4 ( 2%) 0% peak_rss 24.1MB ± 65.2KB 23.9MB … 24.1MB 0 ( 0%) 0% cpu_cycles 90.1M ± 3.33M 89.4M … 129M 12 ( 7%) 0% instructions 273M ± 409 273M … 273M 2 ( 1%) 0% cache_references 2.11M ± 101K 2.04M … 2.99M 8 ( 5%) 0% cache_misses 29.9K ± 2.87K 27.4K … 61.0K 4 ( 2%) 0% branch_misses 953K ± 942 951K … 961K 10 ( 6%) 0% Benchmark 2 (163 runs): ./target/release/examples/blogpost-uncompress rs silesia-small.tar.gz measurement mean ± σ min … max outliers delta wall_time 30.7ms ± 304us 30.1ms … 32.3ms 1 ( 1%) - 1.0% ± 0.6% peak_rss 24.1MB ± 63.2KB 24.0MB … 24.1MB 0 ( 0%) + 0.0% ± 0.1% cpu_cycles 88.3M ± 563K 88.0M … 94.2M 10 ( 6%) ⚡- 2.0% ± 0.6% instructions 267M ± 266 267M … 267M 0 ( 0%) ⚡- 2.4% ± 0.0% cache_references 2.10M ± 105K 2.05M … 3.20M 5 ( 3%) - 0.5% ± 1.1% cache_misses 26.6K ± 1.28K 24.3K … 31.1K 6 ( 4%) ⚡- 11.0% ± 1.6% branch_misses 953K ± 818 952K … 960K 6 ( 4%) + 0.1% ± 0.0% --- zlib-rs/src/inflate.rs | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/zlib-rs/src/inflate.rs b/zlib-rs/src/inflate.rs index cf79a69..8bf8feb 100644 --- a/zlib-rs/src/inflate.rs +++ b/zlib-rs/src/inflate.rs @@ -305,11 +305,6 @@ impl Flags { self.0 & other.0 != 0 } - #[inline(always)] - pub(crate) fn union(&mut self, other: Self) { - *self = Self(self.0 | other.0); - } - #[inline(always)] pub(crate) fn update(&mut self, other: Self, value: bool) { if value { @@ -320,8 +315,7 @@ impl Flags { } } -const _SIZE: [u8; 14608] = [0; core::mem::size_of::()]; - +#[repr(C, align(64))] pub(crate) struct State<'a> { /// Current inflate mode mode: Mode, @@ -338,11 +332,11 @@ pub(crate) struct State<'a> { /// - bit 2 true to validate check value wrap: u8, + flush: InflateFlush, + // allocated window if needed (capacity == 0 if unused) window: Window<'a>, - _padding0: usize, - // /// number of code length code lengths ncode: usize, @@ -381,21 +375,16 @@ pub(crate) struct State<'a> { in_available: usize, out_available: usize, - /// temporary storage space for code lengths - lens: [u16; 320], - /// work area for code table building - work: [u16; 288], - - error_message: Option<&'static str>, - flush: InflateFlush, + gzip_flags: i32, checksum: u32, crc_fold: Crc32Fold, + error_message: Option<&'static str>, + /// place to store gzip header if needed head: Option<&'a mut gz_header>, dmax: usize, - gzip_flags: i32, /// table for length/literal codes len_table: Table, @@ -406,6 +395,11 @@ pub(crate) struct State<'a> { codes_codes: [Code; crate::ENOUGH_LENS], len_codes: [Code; crate::ENOUGH_LENS], dist_codes: [Code; crate::ENOUGH_DISTS], + + /// temporary storage space for code lengths + lens: [u16; 320], + /// work area for code table building + work: [u16; 288], } impl<'a> State<'a> { @@ -455,7 +449,6 @@ impl<'a> State<'a> { checksum: 0, crc_fold: Crc32Fold::new(), - _padding0: 0, dmax: 0, gzip_flags: 0, @@ -2155,7 +2148,6 @@ pub unsafe fn copy<'a>( flush: state.flush, checksum: state.checksum, crc_fold: state.crc_fold, - _padding0: state._padding0, dmax: state.dmax, gzip_flags: state.gzip_flags, codes_codes: state.codes_codes,