From a5b79a18c45608f9bcce0998cf0863cc9fb2f2e8 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Sun, 9 May 2021 00:10:58 +0300 Subject: [PATCH 1/2] Use memchr::memchr{,2} for archive parsing This makes parsing of the archive headers significantly faster. The `ar` example adjusted to parse the same archive 1 million times, when run with the rlib of the `object` crate itself produces the following metrics: 788.19 msec task-clock:u # 0.998 CPUs utilized 2,502,967,113 cycles:u # 3.176 GHz 7,780,571,392 instructions:u # 3.11 insn per cycle In contrast to the following for the old code: 1,061.09 msec task-clock:u # 0.998 CPUs utilized 3,374,141,510 cycles:u # 3.180 GHz 12,012,570,139 instructions:u # 3.56 insn per cycle This results in a reduction of about 1B cycles, or 25% reduction in wall clock time. Originally `perf` would show a heavy hotspot (in the area of 50% of the total runtime) in `parse_sysv_extended_name`. --- Cargo.toml | 5 +++-- src/read/archive.rs | 19 ++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5e3f6e5f..09c02f23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ crc32fast = { version = "1.2", optional = true } flate2 = { version = "1", optional = true } indexmap = { version = "1.1", optional = true } wasmparser = { version = "0.57", optional = true } +memchr = { version = "2.4", optional = true, default-features = false } # Internal feature, only used when building as part of libstd, not part of the # stable interface of this crate. @@ -45,7 +46,7 @@ write = ["write_core", "coff", "elf", "macho"] # Enable things that require libstd. # Currently, this provides an `Error` implementation. -std = [] +std = ["memchr/std"] # Enable decompression of compressed sections. # This feature is not required if you want to do your own decompression. compression = ["flate2", "std"] @@ -58,7 +59,7 @@ unaligned = [] #======================================= # File format features. -archive = [] +archive = ["memchr"] coff = [] elf = [] macho = [] diff --git a/src/read/archive.rs b/src/read/archive.rs index fb6dd362..97924bc6 100644 --- a/src/read/archive.rs +++ b/src/read/archive.rs @@ -197,13 +197,13 @@ impl<'data> ArchiveMember<'data> { parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size) .read_error("Invalid archive extended name length")? } else if header.name[0] == b'/' { - let name_len = - (header.name.iter().position(|&x| x == b' ')).unwrap_or_else(|| header.name.len()); + let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len()); &header.name[..name_len] } else { - let name_len = (header.name.iter().position(|&x| x == b'/')) - .or_else(|| header.name.iter().position(|&x| x == b' ')) - .unwrap_or_else(|| header.name.len()); + // FIXME: can `memchr::memchr2` be used here? + let name_len = memchr::memchr(b'/', &header.name) + .or_else(|| memchr::memchr(b' ', &header.name)) + .unwrap_or(header.name.len()); &header.name[..name_len] }; @@ -268,10 +268,7 @@ impl<'data> ArchiveMember<'data> { // Ignores bytes starting from the first space. fn parse_u64_digits(digits: &[u8], radix: u32) -> Option { - let len = digits - .iter() - .position(|&x| x == b' ') - .unwrap_or_else(|| digits.len()); + let len = memchr::memchr(b' ', digits).unwrap_or(digits.len()); let digits = &digits[..len]; if digits.is_empty() { return None; @@ -290,7 +287,7 @@ fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result< let offset = parse_u64_digits(digits, 10).ok_or(())?; let offset = offset.try_into().map_err(|_| ())?; let name_data = names.get(offset..).ok_or(())?; - let name = match name_data.iter().position(|&x| x == b'/' || x == 0) { + let name = match memchr::memchr2(b'/', 0, name_data) { Some(len) => &name_data[..len], None => name_data, }; @@ -307,7 +304,7 @@ fn parse_bsd_extended_name<'data, R: ReadRef<'data>>( let len = parse_u64_digits(digits, 10).ok_or(())?; *size = size.checked_sub(len).ok_or(())?; let name_data = data.read_bytes(offset, len)?; - let name = match name_data.iter().position(|&x| x == 0) { + let name = match memchr::memchr(0, name_data) { Some(len) => &name_data[..len], None => name_data, }; From 5bb34e7b6ba8aebf7221ba00e78a77a72273dda1 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Sun, 9 May 2021 00:34:11 +0300 Subject: [PATCH 2/2] Further reduce the runtime of parse_u64_digits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here instead of figuring out the extents of the integer ahead of time we check for the spaces while we compute the number itself. This further reduces the runtime of the beforementioned case (see previous commit) to: 580.57 msec task-clock:u # 0.997 CPUs utilized 1,843,957,595 cycles:u # 3.176 GHz 5,901,570,558 instructions:u # 3.20 insn per cycle `perf report` still shows that the most of the time is spent parsing sysv archive names (which makes sense – its pretty much all the program does after all!). --- src/read/archive.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/read/archive.rs b/src/read/archive.rs index 97924bc6..2f507cb9 100644 --- a/src/read/archive.rs +++ b/src/read/archive.rs @@ -268,17 +268,19 @@ impl<'data> ArchiveMember<'data> { // Ignores bytes starting from the first space. fn parse_u64_digits(digits: &[u8], radix: u32) -> Option { - let len = memchr::memchr(b' ', digits).unwrap_or(digits.len()); - let digits = &digits[..len]; - if digits.is_empty() { + if let [b' ', ..] = digits { return None; } let mut result: u64 = 0; for &c in digits { - let x = (c as char).to_digit(radix)?; - result = result - .checked_mul(u64::from(radix))? - .checked_add(u64::from(x))?; + if c == b' ' { + return Some(result); + } else { + let x = (c as char).to_digit(radix)?; + result = result + .checked_mul(u64::from(radix))? + .checked_add(u64::from(x))?; + } } Some(result) } @@ -287,7 +289,7 @@ fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result< let offset = parse_u64_digits(digits, 10).ok_or(())?; let offset = offset.try_into().map_err(|_| ())?; let name_data = names.get(offset..).ok_or(())?; - let name = match memchr::memchr2(b'/', 0, name_data) { + let name = match memchr::memchr2(b'/', b'\0', name_data) { Some(len) => &name_data[..len], None => name_data, }; @@ -304,7 +306,7 @@ fn parse_bsd_extended_name<'data, R: ReadRef<'data>>( let len = parse_u64_digits(digits, 10).ok_or(())?; *size = size.checked_sub(len).ok_or(())?; let name_data = data.read_bytes(offset, len)?; - let name = match memchr::memchr(0, name_data) { + let name = match memchr::memchr(b'\0', name_data) { Some(len) => &name_data[..len], None => name_data, };