From 2bf0df777b7712d1b719cd5ac7cce63176b7384c Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 13 Dec 2017 01:02:19 -0600
Subject: [PATCH 01/14] Move rust memchr impl to libcore

---
 src/libcore/slice/memchr.rs      | 224 ++++++++++++++++++++++++++++++
 src/libcore/slice/mod.rs         |   5 +
 src/libstd/lib.rs                |   1 +
 src/libstd/sys/redox/memchr.rs   |   2 +-
 src/libstd/sys/unix/memchr.rs    |   2 +-
 src/libstd/sys/wasm/memchr.rs    |   2 +-
 src/libstd/sys/windows/memchr.rs |   2 +-
 src/libstd/sys_common/memchr.rs  | 227 -------------------------------
 src/libstd/sys_common/mod.rs     |   1 -
 9 files changed, 234 insertions(+), 232 deletions(-)
 create mode 100644 src/libcore/slice/memchr.rs
 delete mode 100644 src/libstd/sys_common/memchr.rs
diff --git a/src/libcore/slice/memchr.rs b/src/libcore/slice/memchr.rs
new file mode 100644
index 0000000000000..252a258c30456
--- /dev/null
+++ b/src/libcore/slice/memchr.rs
@@ -0,0 +1,224 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//
+// Original implementation taken from rust-memchr
+// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
+
+use cmp;
+use mem;
+
+const LO_U64: u64 = 0x0101010101010101;
+const HI_U64: u64 = 0x8080808080808080;
+
+// use truncation
+const LO_USIZE: usize = LO_U64 as usize;
+const HI_USIZE: usize = HI_U64 as usize;
+
+/// Return `true` if `x` contains any zero byte.
+///
+/// From *Matters Computational*, J. Arndt
+///
+/// "The idea is to subtract one from each of the bytes and then look for
+/// bytes where the borrow propagated all the way to the most significant
+/// bit."
+#[inline]
+fn contains_zero_byte(x: usize) -> bool {
+    x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
+}
+
+#[cfg(target_pointer_width = "32")]
+#[inline]
+fn repeat_byte(b: u8) -> usize {
+    let mut rep = (b as usize) << 8 | b as usize;
+    rep = rep << 16 | rep;
+    rep
+}
+
+#[cfg(target_pointer_width = "64")]
+#[inline]
+fn repeat_byte(b: u8) -> usize {
+    let mut rep = (b as usize) << 8 | b as usize;
+    rep = rep << 16 | rep;
+    rep = rep << 32 | rep;
+    rep
+}
+
+/// Return the first index matching the byte `a` in `text`.
+pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
+    // Scan for a single byte value by reading two `usize` words at a time.
+    //
+    // Split `text` in three parts
+    // - unaligned initial part, before the first word aligned address in text
+    // - body, scan by 2 words at a time
+    // - the last remaining part, < 2 word size
+    let len = text.len();
+    let ptr = text.as_ptr();
+    let usize_bytes = mem::size_of::<usize>();
+
+    // search up to an aligned boundary
+    let mut offset = ptr.align_offset(usize_bytes);
+    if offset > 0 {
+        offset = cmp::min(offset, len);
+        if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
+            return Some(index);
+        }
+    }
+
+    // search the body of the text
+    let repeated_x = repeat_byte(x);
+
+    if len >= 2 * usize_bytes {
+        while offset <= len - 2 * usize_bytes {
+            unsafe {
+                let u = *(ptr.offset(offset as isize) as *const usize);
+                let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
+
+                // break if there is a matching byte
+                let zu = contains_zero_byte(u ^ repeated_x);
+                let zv = contains_zero_byte(v ^ repeated_x);
+                if zu || zv {
+                    break;
+                }
+            }
+            offset += usize_bytes * 2;
+        }
+    }
+
+    // find the byte after the point the body loop stopped
+    text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i)
+}
+
+/// Return the last index matching the byte `a` in `text`.
+pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
+    // Scan for a single byte value by reading two `usize` words at a time.
+    //
+    // Split `text` in three parts
+    // - unaligned tail, after the last word aligned address in text
+    // - body, scan by 2 words at a time
+    // - the first remaining bytes, < 2 word size
+    let len = text.len();
+    let ptr = text.as_ptr();
+    let usize_bytes = mem::size_of::<usize>();
+
+    // search to an aligned boundary
+    let end_align = (ptr as usize + len) & (usize_bytes - 1);
+    let mut offset;
+    if end_align > 0 {
+        offset = if end_align >= len { 0 } else { len - end_align };
+        if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) {
+            return Some(offset + index);
+        }
+    } else {
+        offset = len;
+    }
+
+    // search the body of the text
+    let repeated_x = repeat_byte(x);
+
+    while offset >= 2 * usize_bytes {
+        unsafe {
+            let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize);
+            let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize);
+
+            // break if there is a matching byte
+            let zu = contains_zero_byte(u ^ repeated_x);
+            let zv = contains_zero_byte(v ^ repeated_x);
+            if zu || zv {
+                break;
+            }
+        }
+        offset -= 2 * usize_bytes;
+    }
+
+    // find the byte before the point the body loop stopped
+    text[..offset].iter().rposition(|elt| *elt == x)
+}
+
+// test fallback implementations on all platforms
+#[test]
+fn matches_one() {
+    assert_eq!(Some(0), memchr(b'a', b"a"));
+}
+
+#[test]
+fn matches_begin() {
+    assert_eq!(Some(0), memchr(b'a', b"aaaa"));
+}
+
+#[test]
+fn matches_end() {
+    assert_eq!(Some(4), memchr(b'z', b"aaaaz"));
+}
+
+#[test]
+fn matches_nul() {
+    assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00"));
+}
+
+#[test]
+fn matches_past_nul() {
+    assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z"));
+}
+
+#[test]
+fn no_match_empty() {
+    assert_eq!(None, memchr(b'a', b""));
+}
+
+#[test]
+fn no_match() {
+    assert_eq!(None, memchr(b'a', b"xyz"));
+}
+
+#[test]
+fn matches_one_reversed() {
+    assert_eq!(Some(0), memrchr(b'a', b"a"));
+}
+
+#[test]
+fn matches_begin_reversed() {
+    assert_eq!(Some(3), memrchr(b'a', b"aaaa"));
+}
+
+#[test]
+fn matches_end_reversed() {
+    assert_eq!(Some(0), memrchr(b'z', b"zaaaa"));
+}
+
+#[test]
+fn matches_nul_reversed() {
+    assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00"));
+}
+
+#[test]
+fn matches_past_nul_reversed() {
+    assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa"));
+}
+
+#[test]
+fn no_match_empty_reversed() {
+    assert_eq!(None, memrchr(b'a', b""));
+}
+
+#[test]
+fn no_match_reversed() {
+    assert_eq!(None, memrchr(b'a', b"xyz"));
+}
+
+#[test]
+fn each_alignment_reversed() {
+    let mut data = [1u8; 64];
+    let needle = 2;
+    let pos = 40;
+    data[pos] = needle;
+    for start in 0..16 {
+        assert_eq!(Some(pos - start), memrchr(needle, &data[start..]));
+    }
+}
diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs
index 49c51f4f04fdc..e4da1b7e5f5d8 100644
--- a/src/libcore/slice/mod.rs
+++ b/src/libcore/slice/mod.rs
@@ -50,6 +50,11 @@ use mem;
 use marker::{Copy, Send, Sync, Sized, self};
 use iter_private::TrustedRandomAccess;
 
+#[unstable(feature = "slice_internals", issue = "0",
+           reason = "exposed from core to be reused in std; use the memchr crate")]
+/// Pure rust memchr implementation, taken from rust-memchr
+pub mod memchr;
+
 mod rotate;
 mod sort;
 
diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs
index 12e6231136e16..536757336cd8b 100644
--- a/src/libstd/lib.rs
+++ b/src/libstd/lib.rs
@@ -302,6 +302,7 @@
 #![feature(sip_hash_13)]
 #![feature(slice_bytes)]
 #![feature(slice_concat_ext)]
+#![feature(slice_internals)]
 #![feature(slice_patterns)]
 #![feature(staged_api)]
 #![feature(stmt_expr_attributes)]
diff --git a/src/libstd/sys/redox/memchr.rs b/src/libstd/sys/redox/memchr.rs
index 4c314b7a47258..873b33535025b 100644
--- a/src/libstd/sys/redox/memchr.rs
+++ b/src/libstd/sys/redox/memchr.rs
@@ -11,4 +11,4 @@
 // Original implementation taken from rust-memchr
 // Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
 
-pub use sys_common::memchr::fallback::{memchr, memrchr};
+pub use core::slice::memchr::{memchr, memrchr};
diff --git a/src/libstd/sys/unix/memchr.rs b/src/libstd/sys/unix/memchr.rs
index aed04703ea117..f49adc24163ca 100644
--- a/src/libstd/sys/unix/memchr.rs
+++ b/src/libstd/sys/unix/memchr.rs
@@ -50,7 +50,7 @@ pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> {
 
     #[cfg(not(target_os = "linux"))]
     fn memrchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> {
-        ::sys_common::memchr::fallback::memrchr(needle, haystack)
+        ::core::slice::memchr::memrchr(needle, haystack)
     }
 
     memrchr_specific(needle, haystack)
diff --git a/src/libstd/sys/wasm/memchr.rs b/src/libstd/sys/wasm/memchr.rs
index e611d94af30b1..964e35994139b 100644
--- a/src/libstd/sys/wasm/memchr.rs
+++ b/src/libstd/sys/wasm/memchr.rs
@@ -8,4 +8,4 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-pub use sys_common::memchr::fallback::{memchr, memrchr};
+pub use core::slice::memchr::{memchr, memrchr};
diff --git a/src/libstd/sys/windows/memchr.rs b/src/libstd/sys/windows/memchr.rs
index 5a5386acaa531..fa7c816fd02ea 100644
--- a/src/libstd/sys/windows/memchr.rs
+++ b/src/libstd/sys/windows/memchr.rs
@@ -12,4 +12,4 @@
 // Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
 
 // Fallback memchr is fastest on windows
-pub use sys_common::memchr::fallback::{memchr, memrchr};
+pub use core::slice::memchr::{memchr, memrchr};
diff --git a/src/libstd/sys_common/memchr.rs b/src/libstd/sys_common/memchr.rs
deleted file mode 100644
index 50f998eb4867d..0000000000000
--- a/src/libstd/sys_common/memchr.rs
+++ /dev/null
@@ -1,227 +0,0 @@
-// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-//
-// Original implementation taken from rust-memchr
-// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
-
-#[allow(dead_code)]
-pub mod fallback {
-    use cmp;
-    use mem;
-
-    const LO_U64: u64 = 0x0101010101010101;
-    const HI_U64: u64 = 0x8080808080808080;
-
-    // use truncation
-    const LO_USIZE: usize = LO_U64 as usize;
-    const HI_USIZE: usize = HI_U64 as usize;
-
-    /// Return `true` if `x` contains any zero byte.
-    ///
-    /// From *Matters Computational*, J. Arndt
-    ///
-    /// "The idea is to subtract one from each of the bytes and then look for
-    /// bytes where the borrow propagated all the way to the most significant
-    /// bit."
-    #[inline]
-    fn contains_zero_byte(x: usize) -> bool {
-        x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
-    }
-
-    #[cfg(target_pointer_width = "32")]
-    #[inline]
-    fn repeat_byte(b: u8) -> usize {
-        let mut rep = (b as usize) << 8 | b as usize;
-        rep = rep << 16 | rep;
-        rep
-    }
-
-    #[cfg(target_pointer_width = "64")]
-    #[inline]
-    fn repeat_byte(b: u8) -> usize {
-        let mut rep = (b as usize) << 8 | b as usize;
-        rep = rep << 16 | rep;
-        rep = rep << 32 | rep;
-        rep
-    }
-
-    /// Return the first index matching the byte `a` in `text`.
-    pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
-        // Scan for a single byte value by reading two `usize` words at a time.
-        //
-        // Split `text` in three parts
-        // - unaligned initial part, before the first word aligned address in text
-        // - body, scan by 2 words at a time
-        // - the last remaining part, < 2 word size
-        let len = text.len();
-        let ptr = text.as_ptr();
-        let usize_bytes = mem::size_of::<usize>();
-
-        // search up to an aligned boundary
-        let mut offset = ptr.align_offset(usize_bytes);
-        if offset > 0 {
-            offset = cmp::min(offset, len);
-            if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
-                return Some(index);
-            }
-        }
-
-        // search the body of the text
-        let repeated_x = repeat_byte(x);
-
-        if len >= 2 * usize_bytes {
-            while offset <= len - 2 * usize_bytes {
-                unsafe {
-                    let u = *(ptr.offset(offset as isize) as *const usize);
-                    let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
-
-                    // break if there is a matching byte
-                    let zu = contains_zero_byte(u ^ repeated_x);
-                    let zv = contains_zero_byte(v ^ repeated_x);
-                    if zu || zv {
-                        break;
-                    }
-                }
-                offset += usize_bytes * 2;
-            }
-        }
-
-        // find the byte after the point the body loop stopped
-        text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i)
-    }
-
-    /// Return the last index matching the byte `a` in `text`.
-    pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
-        // Scan for a single byte value by reading two `usize` words at a time.
-        //
-        // Split `text` in three parts
-        // - unaligned tail, after the last word aligned address in text
-        // - body, scan by 2 words at a time
-        // - the first remaining bytes, < 2 word size
-        let len = text.len();
-        let ptr = text.as_ptr();
-        let usize_bytes = mem::size_of::<usize>();
-
-        // search to an aligned boundary
-        let end_align = (ptr as usize + len) & (usize_bytes - 1);
-        let mut offset;
-        if end_align > 0 {
-            offset = if end_align >= len { 0 } else { len - end_align };
-            if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) {
-                return Some(offset + index);
-            }
-        } else {
-            offset = len;
-        }
-
-        // search the body of the text
-        let repeated_x = repeat_byte(x);
-
-        while offset >= 2 * usize_bytes {
-            unsafe {
-                let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize);
-                let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize);
-
-                // break if there is a matching byte
-                let zu = contains_zero_byte(u ^ repeated_x);
-                let zv = contains_zero_byte(v ^ repeated_x);
-                if zu || zv {
-                    break;
-                }
-            }
-            offset -= 2 * usize_bytes;
-        }
-
-        // find the byte before the point the body loop stopped
-        text[..offset].iter().rposition(|elt| *elt == x)
-    }
-
-    // test fallback implementations on all platforms
-    #[test]
-    fn matches_one() {
-        assert_eq!(Some(0), memchr(b'a', b"a"));
-    }
-
-    #[test]
-    fn matches_begin() {
-        assert_eq!(Some(0), memchr(b'a', b"aaaa"));
-    }
-
-    #[test]
-    fn matches_end() {
-        assert_eq!(Some(4), memchr(b'z', b"aaaaz"));
-    }
-
-    #[test]
-    fn matches_nul() {
-        assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00"));
-    }
-
-    #[test]
-    fn matches_past_nul() {
-        assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z"));
-    }
-
-    #[test]
-    fn no_match_empty() {
-        assert_eq!(None, memchr(b'a', b""));
-    }
-
-    #[test]
-    fn no_match() {
-        assert_eq!(None, memchr(b'a', b"xyz"));
-    }
-
-    #[test]
-    fn matches_one_reversed() {
-        assert_eq!(Some(0), memrchr(b'a', b"a"));
-    }
-
-    #[test]
-    fn matches_begin_reversed() {
-        assert_eq!(Some(3), memrchr(b'a', b"aaaa"));
-    }
-
-    #[test]
-    fn matches_end_reversed() {
-        assert_eq!(Some(0), memrchr(b'z', b"zaaaa"));
-    }
-
-    #[test]
-    fn matches_nul_reversed() {
-        assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00"));
-    }
-
-    #[test]
-    fn matches_past_nul_reversed() {
-        assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa"));
-    }
-
-    #[test]
-    fn no_match_empty_reversed() {
-        assert_eq!(None, memrchr(b'a', b""));
-    }
-
-    #[test]
-    fn no_match_reversed() {
-        assert_eq!(None, memrchr(b'a', b"xyz"));
-    }
-
-    #[test]
-    fn each_alignment_reversed() {
-        let mut data = [1u8; 64];
-        let needle = 2;
-        let pos = 40;
-        data[pos] = needle;
-        for start in 0..16 {
-            assert_eq!(Some(pos - start), memrchr(needle, &data[start..]));
-        }
-    }
-}
diff --git a/src/libstd/sys_common/mod.rs b/src/libstd/sys_common/mod.rs
index 14e5697b94e57..534fcf4d11bbb 100644
--- a/src/libstd/sys_common/mod.rs
+++ b/src/libstd/sys_common/mod.rs
@@ -33,7 +33,6 @@ pub mod at_exit_imp;
 pub mod backtrace;
 pub mod condvar;
 pub mod io;
-pub mod memchr;
 pub mod mutex;
 pub mod poison;
 pub mod remutex;

From f8f28886e0d98c9cbd6cb3a719f9014960ec1d24 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 13 Dec 2017 09:11:42 -0600
Subject: [PATCH 02/14] Use memchr in [u8]::contains

---
 src/libcore/slice/mod.rs | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs
index e4da1b7e5f5d8..346ee27331121 100644
--- a/src/libcore/slice/mod.rs
+++ b/src/libcore/slice/mod.rs
@@ -624,7 +624,7 @@ impl<T> SliceExt for [T] {
 
     #[inline]
     fn contains(&self, x: &T) -> bool where T: PartialEq {
-        self.iter().any(|elt| *x == *elt)
+        x.slice_contains(self)
     }
 
     #[inline]
@@ -2619,3 +2619,19 @@ unsafe impl<'a, T> TrustedRandomAccess for IterMut<'a, T> {
     }
     fn may_have_side_effect() -> bool { false }
 }
+
+trait SliceContains: Sized {
+    fn slice_contains(&self, x: &[Self]) -> bool;
+}
+
+impl<T> SliceContains for T where T: PartialEq {
+    default fn slice_contains(&self, x: &[Self]) -> bool {
+        x.iter().any(|y| *y == *self)
+    }
+}
+
+impl SliceContains for u8 {
+    fn slice_contains(&self, x: &[Self]) -> bool {
+        memchr::memchr(*self, x).is_some()
+    }
+}

From 1d818a4d8c3fa5b15ad2e2ab30531316565d556c Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 13 Dec 2017 10:40:11 -0600
Subject: [PATCH 03/14] Support 16 bit platforms

---
 src/libcore/slice/memchr.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/libcore/slice/memchr.rs b/src/libcore/slice/memchr.rs
index 252a258c30456..00183be97e751 100644
--- a/src/libcore/slice/memchr.rs
+++ b/src/libcore/slice/memchr.rs
@@ -33,6 +33,12 @@ fn contains_zero_byte(x: usize) -> bool {
     x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
 }
 
+#[cfg(target_pointer_width = "16")]
+#[inline]
+fn repeat_byte(b: u8) -> usize {
+    (b as usize) << 8 | b as usize
+}
+
 #[cfg(target_pointer_width = "32")]
 #[inline]
 fn repeat_byte(b: u8) -> usize {

From 4550ea79f004215af1490e2c269a16d46b890b9f Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 13 Dec 2017 13:11:48 -0600
Subject: [PATCH 04/14] Remove the unused ascii_only field in CharEqSearcher

---
 src/libcore/str/pattern.rs | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index edb7bed4520fb..3200cfc498236 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -241,23 +241,16 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 #[doc(hidden)]
 trait CharEq {
     fn matches(&mut self, c: char) -> bool;
-    fn only_ascii(&self) -> bool;
 }
 
 impl CharEq for char {
     #[inline]
     fn matches(&mut self, c: char) -> bool { *self == c }
-
-    #[inline]
-    fn only_ascii(&self) -> bool { (*self as u32) < 128 }
 }
 
 impl<F> CharEq for F where F: FnMut(char) -> bool {
     #[inline]
     fn matches(&mut self, c: char) -> bool { (*self)(c) }
-
-    #[inline]
-    fn only_ascii(&self) -> bool { false }
 }
 
 impl<'a> CharEq for &'a [char] {
@@ -265,11 +258,6 @@ impl<'a> CharEq for &'a [char] {
     fn matches(&mut self, c: char) -> bool {
         self.iter().any(|&m| { let mut m = m; m.matches(c) })
     }
-
-    #[inline]
-    fn only_ascii(&self) -> bool {
-        self.iter().all(|m| m.only_ascii())
-    }
 }
 
 struct CharEqPattern<C: CharEq>(C);
@@ -279,8 +267,6 @@ struct CharEqSearcher<'a, C: CharEq> {
     char_eq: C,
     haystack: &'a str,
     char_indices: super::CharIndices<'a>,
-    #[allow(dead_code)]
-    ascii_only: bool,
 }
 
 impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
@@ -289,7 +275,6 @@ impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
     #[inline]
     fn into_searcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
         CharEqSearcher {
-            ascii_only: self.0.only_ascii(),
             haystack,
             char_eq: self.0,
             char_indices: haystack.char_indices(),
@@ -499,7 +484,6 @@ impl<'a, F> fmt::Debug for CharPredicateSearcher<'a, F>
         f.debug_struct("CharPredicateSearcher")
             .field("haystack", &self.0.haystack)
             .field("char_indices", &self.0.char_indices)
-            .field("ascii_only", &self.0.ascii_only)
             .finish()
     }
 }

From 72cab5e3263343502aeb1f21a8a17c7f7e917a50 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 13 Dec 2017 14:36:49 -0600
Subject: [PATCH 05/14] Split out char searcher from MultiCharSearcher

---
 src/libcore/str/pattern.rs | 87 +++++++++++++++++++++++++-------------
 1 file changed, 58 insertions(+), 29 deletions(-)

diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index 3200cfc498236..9dc828518278e 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -235,46 +235,41 @@ pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
 pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 
 /////////////////////////////////////////////////////////////////////////////
-// Impl for a CharEq wrapper
+// Impl for a MultiCharEq wrapper
 /////////////////////////////////////////////////////////////////////////////
 
 #[doc(hidden)]
-trait CharEq {
+trait MultiCharEq {
     fn matches(&mut self, c: char) -> bool;
 }
 
-impl CharEq for char {
-    #[inline]
-    fn matches(&mut self, c: char) -> bool { *self == c }
-}
-
-impl<F> CharEq for F where F: FnMut(char) -> bool {
+impl<F> MultiCharEq for F where F: FnMut(char) -> bool {
     #[inline]
     fn matches(&mut self, c: char) -> bool { (*self)(c) }
 }
 
-impl<'a> CharEq for &'a [char] {
+impl<'a> MultiCharEq for &'a [char] {
     #[inline]
     fn matches(&mut self, c: char) -> bool {
-        self.iter().any(|&m| { let mut m = m; m.matches(c) })
+        self.iter().any(|&m| { m == c })
     }
 }
 
-struct CharEqPattern<C: CharEq>(C);
+struct MultiCharEqPattern<C: MultiCharEq>(C);
 
 #[derive(Clone, Debug)]
-struct CharEqSearcher<'a, C: CharEq> {
+struct MultiCharEqSearcher<'a, C: MultiCharEq> {
     char_eq: C,
     haystack: &'a str,
     char_indices: super::CharIndices<'a>,
 }
 
-impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
-    type Searcher = CharEqSearcher<'a, C>;
+impl<'a, C: MultiCharEq> Pattern<'a> for MultiCharEqPattern<C> {
+    type Searcher = MultiCharEqSearcher<'a, C>;
 
     #[inline]
-    fn into_searcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
-        CharEqSearcher {
+    fn into_searcher(self, haystack: &'a str) -> MultiCharEqSearcher<'a, C> {
+        MultiCharEqSearcher {
             haystack,
             char_eq: self.0,
             char_indices: haystack.char_indices(),
@@ -282,7 +277,7 @@ impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
     }
 }
 
-unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
+unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
     #[inline]
     fn haystack(&self) -> &'a str {
         self.haystack
@@ -307,7 +302,7 @@ unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
     }
 }
 
-unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
+unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a, C> {
     #[inline]
     fn next_back(&mut self) -> SearchStep {
         let s = &mut self.char_indices;
@@ -327,7 +322,7 @@ unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
     }
 }
 
-impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
+impl<'a, C: MultiCharEq> DoubleEndedSearcher<'a> for MultiCharEqSearcher<'a, C> {}
 
 /////////////////////////////////////////////////////////////////////////////
 
@@ -400,14 +395,40 @@ macro_rules! searcher_methods {
 
 /// Associated type for `<char as Pattern<'a>>::Searcher`.
 #[derive(Clone, Debug)]
-pub struct CharSearcher<'a>(<CharEqPattern<char> as Pattern<'a>>::Searcher);
+pub struct CharSearcher<'a>(&'a str);
 
 unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
-    searcher_methods!(forward);
+    #[inline]
+    fn haystack(&self) -> &'a str {
+        unimplemented!();
+    }
+    #[inline]
+    fn next(&mut self) -> SearchStep {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_match(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_reject(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
 }
 
 unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
-    searcher_methods!(reverse);
+    #[inline]
+    fn next_back(&mut self) -> SearchStep {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_match_back(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_reject_back(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
 }
 
 impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
@@ -418,7 +439,7 @@ impl<'a> Pattern<'a> for char {
 
     #[inline]
     fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
-        CharSearcher(CharEqPattern(self).into_searcher(haystack))
+        CharSearcher(haystack)
     }
 
     #[inline]
@@ -433,13 +454,21 @@ impl<'a> Pattern<'a> for char {
 
     #[inline]
     fn is_prefix_of(self, haystack: &'a str) -> bool {
-        CharEqPattern(self).is_prefix_of(haystack)
+        if let Some(ch) = haystack.chars().next() {
+            self == ch
+        } else {
+            false
+        }
     }
 
     #[inline]
     fn is_suffix_of(self, haystack: &'a str) -> bool where Self::Searcher: ReverseSearcher<'a>
     {
-        CharEqPattern(self).is_suffix_of(haystack)
+        if let Some(ch) = haystack.chars().next_back() {
+            self == ch
+        } else {
+            false
+        }
     }
 }
 
@@ -451,7 +480,7 @@ impl<'a> Pattern<'a> for char {
 
 /// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
 #[derive(Clone, Debug)]
-pub struct CharSliceSearcher<'a, 'b>(<CharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
+pub struct CharSliceSearcher<'a, 'b>(<MultiCharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
 
 unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
     searcher_methods!(forward);
@@ -465,7 +494,7 @@ impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
 
 /// Searches for chars that are equal to any of the chars in the array
 impl<'a, 'b> Pattern<'a> for &'b [char] {
-    pattern_methods!(CharSliceSearcher<'a, 'b>, CharEqPattern, CharSliceSearcher);
+    pattern_methods!(CharSliceSearcher<'a, 'b>, MultiCharEqPattern, CharSliceSearcher);
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -474,7 +503,7 @@ impl<'a, 'b> Pattern<'a> for &'b [char] {
 
 /// Associated type for `<F as Pattern<'a>>::Searcher`.
 #[derive(Clone)]
-pub struct CharPredicateSearcher<'a, F>(<CharEqPattern<F> as Pattern<'a>>::Searcher)
+pub struct CharPredicateSearcher<'a, F>(<MultiCharEqPattern<F> as Pattern<'a>>::Searcher)
     where F: FnMut(char) -> bool;
 
 impl<'a, F> fmt::Debug for CharPredicateSearcher<'a, F>
@@ -504,7 +533,7 @@ impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F>
 
 /// Searches for chars that match the given predicate
 impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
-    pattern_methods!(CharPredicateSearcher<'a, F>, CharEqPattern, CharPredicateSearcher);
+    pattern_methods!(CharPredicateSearcher<'a, F>, MultiCharEqPattern, CharPredicateSearcher);
 }
 
 /////////////////////////////////////////////////////////////////////////////

From 585ad9ff30e579e929bca2b1221367cc440aa377 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 13 Dec 2017 14:37:35 -0600
Subject: [PATCH 06/14] Move CharSearcher to its own section in the file

---
 src/libcore/str/pattern.rs | 167 +++++++++++++++++++------------------
 1 file changed, 84 insertions(+), 83 deletions(-)

diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index 9dc828518278e..b1b66c9f8d8b3 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -234,6 +234,90 @@ pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
 /// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
 pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 
+
+/////////////////////////////////////////////////////////////////////////////
+// Impl for char
+/////////////////////////////////////////////////////////////////////////////
+
+/// Associated type for `<char as Pattern<'a>>::Searcher`.
+#[derive(Clone, Debug)]
+pub struct CharSearcher<'a>(&'a str);
+
+unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
+    #[inline]
+    fn haystack(&self) -> &'a str {
+        unimplemented!();
+    }
+    #[inline]
+    fn next(&mut self) -> SearchStep {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_match(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_reject(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
+}
+
+unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
+    #[inline]
+    fn next_back(&mut self) -> SearchStep {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_match_back(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
+    #[inline]
+    fn next_reject_back(&mut self) -> Option<(usize, usize)> {
+        unimplemented!();
+    }
+}
+
+impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
+
+/// Searches for chars that are equal to a given char
+impl<'a> Pattern<'a> for char {
+    type Searcher = CharSearcher<'a>;
+
+    #[inline]
+    fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
+        CharSearcher(haystack)
+    }
+
+    #[inline]
+    fn is_contained_in(self, haystack: &'a str) -> bool {
+        if (self as u32) < 128 {
+            haystack.as_bytes().contains(&(self as u8))
+        } else {
+            let mut buffer = [0u8; 4];
+            self.encode_utf8(&mut buffer).is_contained_in(haystack)
+        }
+    }
+
+    #[inline]
+    fn is_prefix_of(self, haystack: &'a str) -> bool {
+        if let Some(ch) = haystack.chars().next() {
+            self == ch
+        } else {
+            false
+        }
+    }
+
+    #[inline]
+    fn is_suffix_of(self, haystack: &'a str) -> bool where Self::Searcher: ReverseSearcher<'a>
+    {
+        if let Some(ch) = haystack.chars().next_back() {
+            self == ch
+        } else {
+            false
+        }
+    }
+}
+
 /////////////////////////////////////////////////////////////////////////////
 // Impl for a MultiCharEq wrapper
 /////////////////////////////////////////////////////////////////////////////
@@ -389,89 +473,6 @@ macro_rules! searcher_methods {
     }
 }
 
-/////////////////////////////////////////////////////////////////////////////
-// Impl for char
-/////////////////////////////////////////////////////////////////////////////
-
-/// Associated type for `<char as Pattern<'a>>::Searcher`.
-#[derive(Clone, Debug)]
-pub struct CharSearcher<'a>(&'a str);
-
-unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
-    #[inline]
-    fn haystack(&self) -> &'a str {
-        unimplemented!();
-    }
-    #[inline]
-    fn next(&mut self) -> SearchStep {
-        unimplemented!();
-    }
-    #[inline]
-    fn next_match(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
-    }
-    #[inline]
-    fn next_reject(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
-    }
-}
-
-unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
-    #[inline]
-    fn next_back(&mut self) -> SearchStep {
-        unimplemented!();
-    }
-    #[inline]
-    fn next_match_back(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
-    }
-    #[inline]
-    fn next_reject_back(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
-    }
-}
-
-impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
-
-/// Searches for chars that are equal to a given char
-impl<'a> Pattern<'a> for char {
-    type Searcher = CharSearcher<'a>;
-
-    #[inline]
-    fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
-        CharSearcher(haystack)
-    }
-
-    #[inline]
-    fn is_contained_in(self, haystack: &'a str) -> bool {
-        if (self as u32) < 128 {
-            haystack.as_bytes().contains(&(self as u8))
-        } else {
-            let mut buffer = [0u8; 4];
-            self.encode_utf8(&mut buffer).is_contained_in(haystack)
-        }
-    }
-
-    #[inline]
-    fn is_prefix_of(self, haystack: &'a str) -> bool {
-        if let Some(ch) = haystack.chars().next() {
-            self == ch
-        } else {
-            false
-        }
-    }
-
-    #[inline]
-    fn is_suffix_of(self, haystack: &'a str) -> bool where Self::Searcher: ReverseSearcher<'a>
-    {
-        if let Some(ch) = haystack.chars().next_back() {
-            self == ch
-        } else {
-            false
-        }
-    }
-}
-
 /////////////////////////////////////////////////////////////////////////////
 // Impl for &[char]
 /////////////////////////////////////////////////////////////////////////////

From d9dc44a5e9857864905e1cdbf40ab9ac617f65e7 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Wed, 13 Dec 2017 15:26:27 -0600
Subject: [PATCH 07/14] Fill in forward searcher impl for char

---
 src/libcore/str/pattern.rs | 78 +++++++++++++++++++++++++++++++-------
 1 file changed, 65 insertions(+), 13 deletions(-)

diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index b1b66c9f8d8b3..3f24374223cb6 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -19,6 +19,7 @@
 
 use cmp;
 use fmt;
+use slice::memchr;
 use usize;
 
 // Pattern
@@ -241,25 +242,66 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 
 /// Associated type for `<char as Pattern<'a>>::Searcher`.
 #[derive(Clone, Debug)]
-pub struct CharSearcher<'a>(&'a str);
+pub struct CharSearcher<'a> {
+    haystack: &'a str,
+    // invariant: `finger` must be a valid utf8 byte index of `haystack`
+    finger: usize,
+    needle: char,
+    // For ascii chars
+    // invariant: must be an ASCII byte (no high bit)
+    single_byte: Option<u8>,
+}
 
 unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
     #[inline]
     fn haystack(&self) -> &'a str {
-        unimplemented!();
+        self.haystack
     }
     #[inline]
     fn next(&mut self) -> SearchStep {
-        unimplemented!();
+        let old_finger = self.finger;
+        let slice = unsafe { self.haystack.get_unchecked(old_finger..) };
+        let mut iter = slice.chars();
+        let old_len = iter.iter.len();
+        if let Some(ch) = iter.next() {
+            // add byte offset of current character
+            // without recalculating
+            self.finger += iter.iter.len() - old_len;
+            if ch == self.needle {
+                SearchStep::Match(old_finger, self.finger)
+            } else {
+                SearchStep::Reject(old_finger, self.finger)
+            }
+        } else {
+            SearchStep::Done
+        }
     }
     #[inline]
     fn next_match(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
-    }
-    #[inline]
-    fn next_reject(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
+        if let Some(byte) = self.single_byte {
+            let old_finger = self.finger;
+            let slice = unsafe { self.haystack.get_unchecked(old_finger..) };
+            let bytes = slice.as_bytes();
+            if let Some(index) = memchr::memchr(byte, bytes) {
+                // index is the index of a valid ASCII byte,
+                // so we can add one to it
+                self.finger += index + 1;
+                Some((index, self.finger))
+            } else {
+                None
+            }
+        } else {
+            loop {
+                match self.next() {
+                    SearchStep::Match(a, b) => break Some((a, b)),
+                    SearchStep::Done => break None,
+                    _ => continue,
+                }
+            }
+        }
     }
+
+    // let next_reject use the default implementation from the Searcher trait
 }
 
 unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
@@ -271,10 +313,8 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
     fn next_match_back(&mut self) -> Option<(usize, usize)> {
         unimplemented!();
     }
-    #[inline]
-    fn next_reject_back(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
-    }
+
+    // let next_reject_back use the default implementation from the Searcher trait
 }
 
 impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
@@ -285,7 +325,19 @@ impl<'a> Pattern<'a> for char {
 
     #[inline]
     fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
-        CharSearcher(haystack)
+        let single_byte = if self.len_utf8() == 1 {
+            let mut storage = [0];
+            self.encode_utf8(&mut storage);
+            Some(storage[0])
+        } else {
+            None
+        };
+        CharSearcher {
+            haystack,
+            finger: 0,
+            needle: self,
+            single_byte,
+        }
     }
 
     #[inline]

From f865164030ccd167a9e9f9fae665373fb58295fb Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Thu, 14 Dec 2017 14:10:10 -0600
Subject: [PATCH 08/14] Fill in reverse searcher impl for char

---
 src/libcore/str/pattern.rs | 56 ++++++++++++++++++++++++++++++++++----
 1 file changed, 50 insertions(+), 6 deletions(-)

diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index 3f24374223cb6..54e426893bc7e 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -128,6 +128,11 @@ pub unsafe trait Searcher<'a> {
     fn next(&mut self) -> SearchStep;
 
     /// Find the next `Match` result. See `next()`
+    ///
+    /// Unlike next(), there is no guarantee that the returned ranges
+    /// of this and next_reject will overlap. This will return (start_match, end_match),
+    /// where start_match is the index of where the match begins, and end_match is
+    /// the index after the end of the match.
     #[inline]
     fn next_match(&mut self) -> Option<(usize, usize)> {
         loop {
@@ -139,7 +144,10 @@ pub unsafe trait Searcher<'a> {
         }
     }
 
-    /// Find the next `Reject` result. See `next()`
+    /// Find the next `Reject` result. See `next()` and `next_match()`
+    ///
+    /// Unlike next(), there is no guarantee that the returned ranges
+    /// of this and next_match will overlap.
     #[inline]
     fn next_reject(&mut self) -> Option<(usize, usize)> {
         loop {
@@ -244,8 +252,9 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 #[derive(Clone, Debug)]
 pub struct CharSearcher<'a> {
     haystack: &'a str,
-    // invariant: `finger` must be a valid utf8 byte index of `haystack`
+    // invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
     finger: usize,
+    finger_back: usize,
     needle: char,
     // For ascii chars
     // invariant: must be an ASCII byte (no high bit)
@@ -266,7 +275,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
         if let Some(ch) = iter.next() {
             // add byte offset of current character
             // without recalculating
-            self.finger += iter.iter.len() - old_len;
+            self.finger += old_len - iter.iter.len();
             if ch == self.needle {
                 SearchStep::Match(old_finger, self.finger)
             } else {
@@ -286,7 +295,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
                 // index is the index of a valid ASCII byte,
                 // so we can add one to it
                 self.finger += index + 1;
-                Some((index, self.finger))
+                Some((self.finger - 1, self.finger))
             } else {
                 None
             }
@@ -307,11 +316,45 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
 unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
     #[inline]
     fn next_back(&mut self) -> SearchStep {
-        unimplemented!();
+        let old_finger = self.finger_back;
+        let slice = unsafe { self.haystack.slice_unchecked(0, old_finger) };
+        let mut iter = slice.chars();
+        let old_len = iter.iter.len();
+        if let Some(ch) = iter.next_back() {
+            // subtract byte offset of current character
+            // without recalculating
+            self.finger_back -= old_len - iter.iter.len();
+            if ch == self.needle {
+                SearchStep::Match(self.finger_back, old_finger)
+            } else {
+                SearchStep::Reject(self.finger_back, old_finger)
+            }
+        } else {
+            SearchStep::Done
+        }
     }
     #[inline]
     fn next_match_back(&mut self) -> Option<(usize, usize)> {
-        unimplemented!();
+        if let Some(byte) = self.single_byte {
+            let old_finger = self.finger_back;
+            let slice = unsafe { self.haystack.slice_unchecked(0, old_finger) };
+            let bytes = slice.as_bytes();
+            if let Some(index) = memchr::memrchr(byte, bytes) {
+                // index is the index of a valid ASCII byte
+                self.finger_back = index;
+                Some((self.finger_back, self.finger_back + 1))
+            } else {
+                None
+            }
+        } else {
+            loop {
+                match self.next_back() {
+                    SearchStep::Match(a, b) => break Some((a, b)),
+                    SearchStep::Done => break None,
+                    _ => continue,
+                }
+            }
+        }
     }
 
     // let next_reject_back use the default implementation from the Searcher trait
@@ -335,6 +378,7 @@ impl<'a> Pattern<'a> for char {
         CharSearcher {
             haystack,
             finger: 0,
+            finger_back: haystack.len(),
             needle: self,
             single_byte,
         }

From 75c07a37ff352607523a3c7a4e8bc3809949cb4c Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Sat, 16 Dec 2017 22:17:27 -0600
Subject: [PATCH 09/14] Add memchr search support for multibyte characters

---
 src/libcore/str/pattern.rs | 150 +++++++++++++++++++++++++------------
 1 file changed, 102 insertions(+), 48 deletions(-)

diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index 54e426893bc7e..e44799bb9c5ab 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -252,13 +252,28 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 #[derive(Clone, Debug)]
 pub struct CharSearcher<'a> {
     haystack: &'a str,
-    // invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
+    // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
+    // This invariant can be broken *within* next_match and next_match_back, however
+    // they must exit with fingers on valid code point boundaries.
+
+    /// `finger` is the current byte index of the forward search.
+    /// Imagine that it exists before the byte at its index, i.e.
+    /// haystack[finger] is the first byte of the slice we must inspect during
+    /// forward searching
     finger: usize,
+    /// `finger_back` is the current byte index of the reverse search.
+    /// Imagine that it exists after the byte at its index, i.e.
+    /// haystack[finger_back - 1] is the last byte of the slice we must inspect during
+    /// forward searching (and thus the first byte to be inspected when calling next_back())
     finger_back: usize,
+    /// The character being searched for
     needle: char,
-    // For ascii chars
-    // invariant: must be an ASCII byte (no high bit)
-    single_byte: Option<u8>,
+
+    // safety invariant: `utf8_size` must be less than 5
+    /// The number of bytes `needle` takes up when encoded in utf8
+    utf8_size: usize,
+    /// A utf8 encoded copy of the `needle`
+    utf8_encoded: [u8; 4],
 }
 
 unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
@@ -269,12 +284,12 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
     #[inline]
     fn next(&mut self) -> SearchStep {
         let old_finger = self.finger;
-        let slice = unsafe { self.haystack.get_unchecked(old_finger..) };
+        let slice = unsafe { self.haystack.get_unchecked(old_finger..self.haystack.len()) };
         let mut iter = slice.chars();
         let old_len = iter.iter.len();
         if let Some(ch) = iter.next() {
             // add byte offset of current character
-            // without recalculating
+            // without re-encoding as utf-8
             self.finger += old_len - iter.iter.len();
             if ch == self.needle {
                 SearchStep::Match(old_finger, self.finger)
@@ -287,25 +302,44 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
     }
     #[inline]
     fn next_match(&mut self) -> Option<(usize, usize)> {
-        if let Some(byte) = self.single_byte {
-            let old_finger = self.finger;
-            let slice = unsafe { self.haystack.get_unchecked(old_finger..) };
-            let bytes = slice.as_bytes();
-            if let Some(index) = memchr::memchr(byte, bytes) {
-                // index is the index of a valid ASCII byte,
-                // so we can add one to it
-                self.finger += index + 1;
-                Some((self.finger - 1, self.finger))
+        loop {
+            // get the haystack after the last character found
+            let bytes = if let Some(slice) = self.haystack.as_bytes().get(self.finger..) {
+                slice
             } else {
-                None
-            }
-        } else {
-            loop {
-                match self.next() {
-                    SearchStep::Match(a, b) => break Some((a, b)),
-                    SearchStep::Done => break None,
-                    _ => continue,
+                return None;
+            };
+            // the last byte of the utf8 encoded needle
+            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
+            if let Some(index) = memchr::memchr(last_byte, bytes) {
+                // The new finger is the index of the byte we found,
+                // plus one, since we memchr'd for the last byte of the character.
+                //
+                // Note that this doesn't always give us a finger on a UTF8 boundary.
+                // If we *didn't* find our character
+                // we may have indexed to the non-last byte of a 3-byte or 4-byte character.
+                // We can't just skip to the next valid starting byte because a character like
+                // ꁁ (U+A041 YI SYLLABLE PA), utf-8 `EA 81 81` will have us always find
+                // the second byte when searching for the third.
+                //
+                // However, this is totally okay. While we have the invariant that
+                // self.finger is on a UTF8 boundary, this invariant is not relid upon
+                // within this method (it is relied upon in CharSearcher::next()).
+                //
+                // We only exit this method when we reach the end of the string, or if we
+                // find something. When we find something the `finger` will be set
+                // to a UTF8 boundary.
+                self.finger += index + 1;
+                let found_char = self.finger - self.utf8_size;
+                if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
+                    if slice == &self.utf8_encoded[0..self.utf8_size] {
+                        return Some((found_char, self.finger));
+                    }
                 }
+            } else {
+                // found nothing, exit
+                self.finger = self.haystack.len();
+                return None;
             }
         }
     }
@@ -322,7 +356,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
         let old_len = iter.iter.len();
         if let Some(ch) = iter.next_back() {
             // subtract byte offset of current character
-            // without recalculating
+            // without re-encoding as utf-8
             self.finger_back -= old_len - iter.iter.len();
             if ch == self.needle {
                 SearchStep::Match(self.finger_back, old_finger)
@@ -335,24 +369,47 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
     }
     #[inline]
     fn next_match_back(&mut self) -> Option<(usize, usize)> {
-        if let Some(byte) = self.single_byte {
-            let old_finger = self.finger_back;
-            let slice = unsafe { self.haystack.slice_unchecked(0, old_finger) };
-            let bytes = slice.as_bytes();
-            if let Some(index) = memchr::memrchr(byte, bytes) {
-                // index is the index of a valid ASCII byte
-                self.finger_back = index;
-                Some((self.finger_back, self.finger_back + 1))
+        let haystack = self.haystack.as_bytes();
+        loop {
+            // get the haystack up to but not including the last character searched
+            let bytes = if let Some(slice) = haystack.get(..self.finger_back) {
+                slice
             } else {
-                None
-            }
-        } else {
-            loop {
-                match self.next_back() {
-                    SearchStep::Match(a, b) => break Some((a, b)),
-                    SearchStep::Done => break None,
-                    _ => continue,
+                return None;
+            };
+            // the last byte of the utf8 encoded needle
+            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
+            if let Some(index) = memchr::memrchr(last_byte, bytes) {
+                // memrchr will return the index of the byte we wish to
+                // find. In case of an ASCII character, this is indeed
+                // were we wish our new finger to be ("after" the found
+                // char in the paradigm of reverse iteration). For
+                // multibyte chars we need to skip down by the number of more
+                // bytes they have than ASCII
+                let found_char = index - (self.utf8_size - 1);
+                if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
+                    if slice == &self.utf8_encoded[0..self.utf8_size] {
+                        // move finger to before the character found (i.e. at its start index)
+                        self.finger_back = found_char;
+                        return Some((self.finger_back, self.finger_back + self.utf8_size));
+                    }
                 }
+                // We can't use finger_back = index - size + 1 here. If we found the last char
+                // of a different-sized character (or the middle byte of a different character)
+                // we need to bump the finger_back down to `index`. This similarly makes
+                // `finger_back` have the potential to no longer be on a boundary,
+                // but this is OK since we only exit this function on a boundary
+                // or when the haystack has been searched completely.
+                //
+                // Unlike next_match this does not
+                // have the problem of repeated bytes in utf-8 because
+                // we're searching for the last byte, and we can only have
+                // found the last byte when searching in reverse.
+                self.finger_back = index;
+            } else {
+                self.finger_back = 0;
+                // found nothing, exit
+                return None;
             }
         }
     }
@@ -368,19 +425,16 @@ impl<'a> Pattern<'a> for char {
 
     #[inline]
     fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
-        let single_byte = if self.len_utf8() == 1 {
-            let mut storage = [0];
-            self.encode_utf8(&mut storage);
-            Some(storage[0])
-        } else {
-            None
-        };
+        let mut utf8_encoded = [0; 4];
+        self.encode_utf8(&mut utf8_encoded);
+        let utf8_size = self.len_utf8();
         CharSearcher {
             haystack,
             finger: 0,
             finger_back: haystack.len(),
             needle: self,
-            single_byte,
+            utf8_size,
+            utf8_encoded
         }
     }
 

From efcc447ebfafde91eba51ae04cdb8b0b776f8ac8 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Sun, 17 Dec 2017 14:44:03 -0800
Subject: [PATCH 10/14] Add simple test for pattern API

---
 src/libcore/tests/lib.rs     |  2 +
 src/libcore/tests/pattern.rs | 76 ++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 src/libcore/tests/pattern.rs

diff --git a/src/libcore/tests/lib.rs b/src/libcore/tests/lib.rs
index 0e445cdac358a..c4b85b829812c 100644
--- a/src/libcore/tests/lib.rs
+++ b/src/libcore/tests/lib.rs
@@ -28,6 +28,7 @@
 #![feature(iter_rfind)]
 #![feature(iter_rfold)]
 #![feature(nonzero)]
+#![feature(pattern)]
 #![feature(raw)]
 #![feature(refcell_replace_swap)]
 #![feature(sip_hash_13)]
@@ -61,6 +62,7 @@ mod nonzero;
 mod num;
 mod ops;
 mod option;
+mod pattern;
 mod ptr;
 mod result;
 mod slice;
diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs
new file mode 100644
index 0000000000000..e12f0bc9e5f6d
--- /dev/null
+++ b/src/libcore/tests/pattern.rs
@@ -0,0 +1,76 @@
+use std::str::pattern::*;
+
+// This macro makes it easier to write
+// tests that do a series of iterations
+macro_rules! search_asserts {
+    ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => {
+        let mut searcher = $needle.into_searcher($haystack);
+        let arr = [$( Step::from(searcher.$func()) ),+];
+        assert_eq!(&arr[..], &$result, $testname);
+    }
+}
+
+/// Combined enum for the results of next() and next_match()/next_reject()
+#[derive(Debug, PartialEq, Eq)]
+enum Step {
+    // variant names purposely chosen to
+    // be the same length for easy alignment
+    Matches(usize, usize),
+    Rejects(usize, usize),
+    InRange(usize, usize),
+    Done
+}
+
+use Step::*;
+
+impl From<SearchStep> for Step {
+    fn from(x: SearchStep) -> Self {
+        match x {
+            SearchStep::Match(a, b) => Matches(a, b),
+            SearchStep::Reject(a, b) => Rejects(a, b),
+            SearchStep::Done => Done
+        }
+    }
+}
+
+impl From<Option<(usize, usize)>> for Step {
+    fn from(x: Option<(usize, usize)>) -> Self {
+        match x {
+            Some((a, b)) => InRange(a, b),
+            None => Done
+        }
+    }
+}
+
+#[test]
+fn test_simple_iteration() {
+    search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string",
+        // a            b              c              d              e              a              b              c              d              EOF 
+        [next,          next,          next,          next,          next,          next,          next,          next,          next,          next],
+        [Matches(0, 1), Rejects(1, 2), Rejects(2, 3), Rejects(3, 4), Rejects(4, 5), Matches(5, 6), Rejects(6, 7), Rejects(7, 8), Rejects(8, 9), Done]
+    );
+
+    search_asserts! ("abcdeabcd", 'a', "reverse iteration for ASCII string",
+        // d            c              b              a            e                d              c              b              a             EOF
+        [next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back],
+        [Rejects(8, 9), Rejects(7, 8), Rejects(6, 7), Matches(5, 6), Rejects(4, 5), Rejects(3, 4), Rejects(2, 3), Rejects(1, 2), Matches(0, 1), Done]
+    );
+
+    search_asserts! ("我爱我的猫", '我', "forward iteration for Chinese string",
+        // 我           愛             我             的              貓               EOF
+        [next,          next,          next,          next,           next,            next],
+        [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done]
+    );
+
+    search_asserts! ("我的猫说meow", 'm', "forward iteration for mixed string",
+        // 我           的             猫             说              m                e                o                w                EOF
+        [next,          next,          next,          next,           next,            next,            next,            next,            next],
+        [Rejects(0, 3), Rejects(3, 6), Rejects(6, 9), Rejects(9, 12), Matches(12, 13), Rejects(13, 14), Rejects(14, 15), Rejects(15, 16), Done]
+    );
+
+    search_asserts! ("我的猫说meow", '猫', "reverse iteration for mixed string",
+        // w             o                 e                m                说              猫             的             我             EOF
+        [next_back,       next_back,       next_back,       next_back,       next_back,      next_back,      next_back,    next_back,     next_back],
+        [Rejects(15, 16), Rejects(14, 15), Rejects(13, 14), Rejects(12, 13), Rejects(9, 12), Matches(6, 9), Rejects(3, 6), Rejects(0, 3), Done]
+    );
+}

From bc5535557662fb7851d80ff1538b5518af921571 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Sun, 17 Dec 2017 15:05:29 -0800
Subject: [PATCH 11/14] Add simple search test for pattern API

---
 src/libcore/tests/pattern.rs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs
index e12f0bc9e5f6d..bb0a618f6b870 100644
--- a/src/libcore/tests/pattern.rs
+++ b/src/libcore/tests/pattern.rs
@@ -74,3 +74,27 @@ fn test_simple_iteration() {
         [Rejects(15, 16), Rejects(14, 15), Rejects(13, 14), Rejects(12, 13), Rejects(9, 12), Matches(6, 9), Rejects(3, 6), Rejects(0, 3), Done]
     );
 }
+
+#[test]
+fn test_simple_search() {
+    search_asserts!("abcdeabcdeabcde", 'a', "next_match for ASCII string",
+        [next_match,    next_match,    next_match,      next_match],
+        [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done]
+    );
+
+    search_asserts!("abcdeabcdeabcde", 'a', "next_match_back for ASCII string",
+        [next_match_back, next_match_back, next_match_back, next_match_back],
+        [InRange(10, 11), InRange(5, 6),   InRange(0, 1),   Done]
+    );
+
+    search_asserts!("abcdeab", 'a', "next_reject for ASCII string",
+        [next_reject,   next_reject,   next_match,    next_reject,   next_reject],
+        [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done]
+    );
+
+    search_asserts!("abcdeabcdeabcde", 'a', "next_reject_back for ASCII string",
+        [next_reject_back, next_reject_back, next_match_back, next_reject_back, next_reject_back, next_reject_back],
+        [InRange(14, 15),  InRange(13, 14),  InRange(10, 11), InRange(9, 10),   InRange(8, 9),    InRange(7, 8)]
+    );
+}
+

From 9b92a4419d6a76a9de6d56adb3084d97e3e31d20 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Mon, 18 Dec 2017 03:48:07 -0800
Subject: [PATCH 12/14] Add stresstests for shared bytes for pattern API

---
 src/libcore/tests/pattern.rs | 154 ++++++++++++++++++++++++++++++++++-
 1 file changed, 153 insertions(+), 1 deletion(-)

diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs
index bb0a618f6b870..7fe274a79ed08 100644
--- a/src/libcore/tests/pattern.rs
+++ b/src/libcore/tests/pattern.rs
@@ -21,7 +21,7 @@ enum Step {
     Done
 }
 
-use Step::*;
+use self::Step::*;
 
 impl From<SearchStep> for Step {
     fn from(x: SearchStep) -> Self {
@@ -42,6 +42,12 @@ impl From<Option<(usize, usize)>> for Step {
     }
 }
 
+// XXXManishearth these tests focus on single-character searching  (CharSearcher)
+// and on next()/next_match(), not next_reject(). This is because
+// the memchr changes make next_match() for single chars complex, but next_reject()
+// continues to use next() under the hood. We should add more test cases for all
+// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
+
 #[test]
 fn test_simple_iteration() {
     search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string",
@@ -98,3 +104,149 @@ fn test_simple_search() {
     );
 }
 
+// Á, 각, ก, 😀 all end in 0x81
+// 🁀, ᘀ do not end in 0x81 but contain the byte
+// ꁁ has 0x81 as its second and third bytes.
+//
+// The memchr-using implementation of next_match
+// and next_match_back temporarily violate
+// the property that the search is always on a unicode boundary,
+// which is fine as long as this never reaches next() or next_back().
+// So we test if next() is correct after each next_match() as well.
+const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
+
+#[test]
+fn test_stress_indices() {
+    // this isn't really a test, more of documentation on the indices of each character in the stresstest string
+
+    search_asserts!(STRESS, 'x', "Indices of characters in stress test",
+        [next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next],
+        [Rejects(0, 2), // Á
+         Rejects(2, 3), // a
+         Rejects(3, 7), // 🁀
+         Rejects(7, 8), // b
+         Rejects(8, 10), // Á
+         Rejects(10, 13), // ꁁ
+         Rejects(13, 14), // f
+         Rejects(14, 15), // g
+         Rejects(15, 19), // 😀
+         Rejects(19, 22), // 각
+         Rejects(22, 25), // ก
+         Rejects(25, 28), // ᘀ
+         Rejects(28, 31), // 각
+         Rejects(31, 32), // a
+         Rejects(32, 34), // Á
+         Rejects(34, 37), // 각
+         Rejects(37, 40), // ꁁ
+         Rejects(40, 43), // ก
+         Rejects(43, 47), // 😀
+         Rejects(47, 48), // a
+         Done]
+    );
+}
+
+#[test]
+fn test_forward_search_shared_bytes() {
+    search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character",
+        [next_match,    next_match,     next_match,      next_match],
+        [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
+    );
+
+    search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character; check if next() still works",
+        [next_match,    next,          next_match,     next,             next_match,     next,            next_match],
+        [InRange(0, 2), Rejects(2, 3), InRange(8, 10), Rejects(10, 13), InRange(32, 34), Rejects(34, 37), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character",
+        [next_match,      next,            next_match,      next_match,      next_match],
+        [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match,      next,            next_match],
+        [InRange(19, 22), Rejects(22, 25), InRange(28, 31), Rejects(31, 32), InRange(34, 37), Rejects(37, 40), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Forward search for four-byte emoji",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Forward search for four-byte emoji; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+}
+
+#[test]
+fn test_reverse_search_shared_bytes() {
+    search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character",
+        [next_match_back, next_match_back, next_match_back, next_match_back],
+        [InRange(32, 34), InRange(8, 10),  InRange(0, 2),   Done]
+    );
+
+    search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,     next_match_back, next_back],
+        [InRange(32, 34), Rejects(31, 32), InRange(8, 10),  Rejects(7, 8), InRange(0, 2),   Done]
+    );
+
+    search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character",
+        [next_match_back, next_back,        next_match_back, next_match_back, next_match_back],
+        [InRange(34, 37), Rejects(32, 34), InRange(28, 31),  InRange(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(34, 37), Rejects(32, 34), InRange(28, 31), Rejects(25, 28), InRange(19, 22), Rejects(15, 19), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes",
+        [next_match_back, next_back,       next_match_back, next_back,      next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,      next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+}

From 85919a0b5f474783cb56cd433292865a40539665 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Fri, 22 Dec 2017 11:19:50 +0530
Subject: [PATCH 13/14] Pass tidy for tests

---
 src/libcore/tests/pattern.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs
index 7fe274a79ed08..d0fd15263b219 100644
--- a/src/libcore/tests/pattern.rs
+++ b/src/libcore/tests/pattern.rs
@@ -1,3 +1,13 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
 use std::str::pattern::*;
 
 // This macro makes it easier to write
@@ -42,7 +52,9 @@ impl From<Option<(usize, usize)>> for Step {
     }
 }
 
-// XXXManishearth these tests focus on single-character searching  (CharSearcher)
+// ignore-tidy-linelength
+
+// FIXME(Manishearth) these tests focus on single-character searching  (CharSearcher)
 // and on next()/next_match(), not next_reject(). This is because
 // the memchr changes make next_match() for single chars complex, but next_reject()
 // continues to use next() under the hood. We should add more test cases for all
@@ -51,7 +63,7 @@ impl From<Option<(usize, usize)>> for Step {
 #[test]
 fn test_simple_iteration() {
     search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string",
-        // a            b              c              d              e              a              b              c              d              EOF 
+        // a            b              c              d              e              a              b              c              d              EOF
         [next,          next,          next,          next,          next,          next,          next,          next,          next,          next],
         [Matches(0, 1), Rejects(1, 2), Rejects(2, 3), Rejects(3, 4), Rejects(4, 5), Matches(5, 6), Rejects(6, 7), Rejects(7, 8), Rejects(8, 9), Done]
     );

From 5cf55165fae5c8538db5c00e252ad9ba42aaf246 Mon Sep 17 00:00:00 2001
From: Manish Goregaokar <manishsmail@gmail.com>
Date: Mon, 1 Jan 2018 19:55:21 +0530
Subject: [PATCH 14/14] handle overflow/underflow in index offsets

---
 src/libcore/str/pattern.rs | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index e44799bb9c5ab..677c0ecc33d7f 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -330,10 +330,12 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
                 // find something. When we find something the `finger` will be set
                 // to a UTF8 boundary.
                 self.finger += index + 1;
-                let found_char = self.finger - self.utf8_size;
-                if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
-                    if slice == &self.utf8_encoded[0..self.utf8_size] {
-                        return Some((found_char, self.finger));
+                if self.finger >= self.utf8_size {
+                    let found_char = self.finger - self.utf8_size;
+                    if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
+                        if slice == &self.utf8_encoded[0..self.utf8_size] {
+                            return Some((found_char, self.finger));
+                        }
                     }
                 }
             } else {
@@ -386,12 +388,15 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
                 // char in the paradigm of reverse iteration). For
                 // multibyte chars we need to skip down by the number of more
                 // bytes they have than ASCII
-                let found_char = index - (self.utf8_size - 1);
-                if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
-                    if slice == &self.utf8_encoded[0..self.utf8_size] {
-                        // move finger to before the character found (i.e. at its start index)
-                        self.finger_back = found_char;
-                        return Some((self.finger_back, self.finger_back + self.utf8_size));
+                let shift = self.utf8_size - 1;
+                if index >= shift {
+                    let found_char = index - shift;
+                    if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
+                        if slice == &self.utf8_encoded[0..self.utf8_size] {
+                            // move finger to before the character found (i.e. at its start index)
+                            self.finger_back = found_char;
+                            return Some((self.finger_back, self.finger_back + self.utf8_size));
+                        }
                     }
                 }
                 // We can't use finger_back = index - size + 1 here. If we found the last char