From a5b79a18c45608f9bcce0998cf0863cc9fb2f2e8 Mon Sep 17 00:00:00 2001
From: Simonas Kazlauskas <git@kazlauskas.me>
Date: Sun, 9 May 2021 00:10:58 +0300
Subject: [PATCH 1/2] Use memchr::memchr{,2} for archive parsing

This makes parsing of the archive headers significantly faster. The `ar`
example adjusted to parse the same archive 1 million times, when run
with the rlib of the `object` crate itself produces the following
metrics:

    788.19 msec     task-clock:u              #    0.998 CPUs utilized
 2,502,967,113      cycles:u                  #    3.176 GHz
 7,780,571,392      instructions:u            #    3.11  insn per cycle

In contrast to the following for the old code:

  1,061.09 msec     task-clock:u              #    0.998 CPUs utilized
 3,374,141,510      cycles:u                  #    3.180 GHz
12,012,570,139      instructions:u            #    3.56  insn per cycle

This results in a reduction of about 1B cycles, or 25% reduction in wall
clock time.

Originally `perf` would show a heavy hotspot (in the area of 50% of the
total runtime) in `parse_sysv_extended_name`.
---
 Cargo.toml          |  5 +++--
 src/read/archive.rs | 19 ++++++++-----------
 2 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 5e3f6e5f..09c02f23 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,7 @@ crc32fast = { version = "1.2", optional = true }
 flate2 = { version = "1", optional = true }
 indexmap = { version = "1.1", optional = true }
 wasmparser = { version = "0.57", optional = true }
+memchr = { version = "2.4", optional = true, default-features = false }
 
 # Internal feature, only used when building as part of libstd, not part of the
 # stable interface of this crate.
@@ -45,7 +46,7 @@ write = ["write_core", "coff", "elf", "macho"]
 
 # Enable things that require libstd.
 # Currently, this provides an `Error` implementation.
-std = []
+std = ["memchr/std"]
 # Enable decompression of compressed sections.
 # This feature is not required if you want to do your own decompression.
 compression = ["flate2", "std"]
@@ -58,7 +59,7 @@ unaligned = []
 
 #=======================================
 # File format features.
-archive = []
+archive = ["memchr"]
 coff = []
 elf = []
 macho = []
diff --git a/src/read/archive.rs b/src/read/archive.rs
index fb6dd362..97924bc6 100644
--- a/src/read/archive.rs
+++ b/src/read/archive.rs
@@ -197,13 +197,13 @@ impl<'data> ArchiveMember<'data> {
             parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size)
                 .read_error("Invalid archive extended name length")?
         } else if header.name[0] == b'/' {
-            let name_len =
-                (header.name.iter().position(|&x| x == b' ')).unwrap_or_else(|| header.name.len());
+            let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len());
             &header.name[..name_len]
         } else {
-            let name_len = (header.name.iter().position(|&x| x == b'/'))
-                .or_else(|| header.name.iter().position(|&x| x == b' '))
-                .unwrap_or_else(|| header.name.len());
+            // FIXME: can `memchr::memchr2` be used here?
+            let name_len = memchr::memchr(b'/', &header.name)
+                .or_else(|| memchr::memchr(b' ', &header.name))
+                .unwrap_or(header.name.len());
             &header.name[..name_len]
         };
 
@@ -268,10 +268,7 @@ impl<'data> ArchiveMember<'data> {
 
 // Ignores bytes starting from the first space.
 fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> {
-    let len = digits
-        .iter()
-        .position(|&x| x == b' ')
-        .unwrap_or_else(|| digits.len());
+    let len = memchr::memchr(b' ', digits).unwrap_or(digits.len());
     let digits = &digits[..len];
     if digits.is_empty() {
         return None;
@@ -290,7 +287,7 @@ fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<
     let offset = parse_u64_digits(digits, 10).ok_or(())?;
     let offset = offset.try_into().map_err(|_| ())?;
     let name_data = names.get(offset..).ok_or(())?;
-    let name = match name_data.iter().position(|&x| x == b'/' || x == 0) {
+    let name = match memchr::memchr2(b'/', 0, name_data) {
         Some(len) => &name_data[..len],
         None => name_data,
     };
@@ -307,7 +304,7 @@ fn parse_bsd_extended_name<'data, R: ReadRef<'data>>(
     let len = parse_u64_digits(digits, 10).ok_or(())?;
     *size = size.checked_sub(len).ok_or(())?;
     let name_data = data.read_bytes(offset, len)?;
-    let name = match name_data.iter().position(|&x| x == 0) {
+    let name = match memchr::memchr(0, name_data) {
         Some(len) => &name_data[..len],
         None => name_data,
     };

From 5bb34e7b6ba8aebf7221ba00e78a77a72273dda1 Mon Sep 17 00:00:00 2001
From: Simonas Kazlauskas <git@kazlauskas.me>
Date: Sun, 9 May 2021 00:34:11 +0300
Subject: [PATCH 2/2] Further reduce the runtime of parse_u64_digits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Here instead of figuring out the extents of the integer ahead of time we
check for the spaces while we compute the number itself. This further
reduces the runtime of the beforementioned case (see previous commit) to:

    580.57 msec     task-clock:u              #    0.997 CPUs utilized
 1,843,957,595      cycles:u                  #    3.176 GHz
 5,901,570,558      instructions:u            #    3.20  insn per cycle

`perf report` still shows that the most of the time is spent parsing
sysv archive names (which makes sense – its pretty much all the program
does after all!).
---
 src/read/archive.rs | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/read/archive.rs b/src/read/archive.rs
index 97924bc6..2f507cb9 100644
--- a/src/read/archive.rs
+++ b/src/read/archive.rs
@@ -268,17 +268,19 @@ impl<'data> ArchiveMember<'data> {
 
 // Ignores bytes starting from the first space.
 fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> {
-    let len = memchr::memchr(b' ', digits).unwrap_or(digits.len());
-    let digits = &digits[..len];
-    if digits.is_empty() {
+    if let [b' ', ..] = digits {
         return None;
     }
     let mut result: u64 = 0;
     for &c in digits {
-        let x = (c as char).to_digit(radix)?;
-        result = result
-            .checked_mul(u64::from(radix))?
-            .checked_add(u64::from(x))?;
+        if c == b' ' {
+            return Some(result);
+        } else {
+            let x = (c as char).to_digit(radix)?;
+            result = result
+                .checked_mul(u64::from(radix))?
+                .checked_add(u64::from(x))?;
+        }
     }
     Some(result)
 }
@@ -287,7 +289,7 @@ fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<
     let offset = parse_u64_digits(digits, 10).ok_or(())?;
     let offset = offset.try_into().map_err(|_| ())?;
     let name_data = names.get(offset..).ok_or(())?;
-    let name = match memchr::memchr2(b'/', 0, name_data) {
+    let name = match memchr::memchr2(b'/', b'\0', name_data) {
         Some(len) => &name_data[..len],
         None => name_data,
     };
@@ -304,7 +306,7 @@ fn parse_bsd_extended_name<'data, R: ReadRef<'data>>(
     let len = parse_u64_digits(digits, 10).ok_or(())?;
     *size = size.checked_sub(len).ok_or(())?;
     let name_data = data.read_bytes(offset, len)?;
-    let name = match memchr::memchr(0, name_data) {
+    let name = match memchr::memchr(b'\0', name_data) {
         Some(len) => &name_data[..len],
         None => name_data,
     };