From 9419e9265950a16f873dbed49c715fd7ea4e08e7 Mon Sep 17 00:00:00 2001 From: nham Date: Tue, 19 Aug 2014 15:20:51 -0400 Subject: [PATCH 1/2] Fix TwoWaySearcher to work when used with periodic needles. There is a check in TwoWaySearcher::new to determine whether the needle is periodic. This is needed because during searching when a match fails, we cannot advance the position by the entire length of the needle when it is periodic, but can only advance by the length of the period. The reason "bananas".contains("nana") (and similar searches) were returning false was because the periodicity check was wrong. Closes #16589 --- src/libcore/str.rs | 7 ++++++- src/libcoretest/str.rs | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 800e2dcc27893..3c489719496d4 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -419,6 +419,8 @@ struct TwoWaySearcher { memory: uint } +// This is the Two-Way search algorithm, which was introduced in the paper: +// Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675. impl TwoWaySearcher { fn new(needle: &[u8]) -> TwoWaySearcher { let (critPos1, period1) = TwoWaySearcher::maximal_suffix(needle, false); @@ -437,7 +439,10 @@ impl TwoWaySearcher { let byteset = needle.iter() .fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a); - if needle.slice_to(critPos) == needle.slice_from(needle.len() - critPos) { + // Check if the needle is periodic. If so, during searching when we + // find a mismatch, we must only advance the position by the length + // of the period, not the length of the entire needle + if needle.slice_to(critPos) == needle.slice(period, period + critPos) { TwoWaySearcher { critPos: critPos, period: period, diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs index bac8d509b1314..be2275dcd4a02 100644 --- a/src/libcoretest/str.rs +++ b/src/libcoretest/str.rs @@ -8,7 +8,27 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +fn check_contains_all_substrings(s: &str) { + assert!(s.contains("")); + for i in range(0, s.len()) { + for j in range(i+1, s.len() + 1) { + assert!(s.contains(s.slice(i, j))); + } + } +} + #[test] fn strslice_issue_16589() { assert!("bananas".contains("nana")); + + // prior to the fix for #16589, x.contains("abcdabcd") returned false + // test all substrings for good measure + check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd"); +} + + +#[test] +fn test_strslice_contains() { + let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'"; + check_contains_all_substrings(x); } From 9a43492f59bfc38ed819e361c3cf99aa7b972e15 Mon Sep 17 00:00:00 2001 From: nham Date: Wed, 20 Aug 2014 13:57:54 -0400 Subject: [PATCH 2/2] Improve TwoWaySearcher comments. --- src/libcore/str.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 3c489719496d4..363ceaaa2b0dd 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -439,9 +439,13 @@ impl TwoWaySearcher { let byteset = needle.iter() .fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a); - // Check if the needle is periodic. If so, during searching when we - // find a mismatch, we must only advance the position by the length - // of the period, not the length of the entire needle + + // The logic here (calculating critPos and period, the final if statement to see which + // period to use for the TwoWaySearcher) is essentially an implementation of the + // "small-period" function from the paper (p. 670) + // + // In the paper they check whether `needle.slice_to(critPos)` is a suffix of + // `needle.slice(critPos, critPos + period)`, which is precisely what this does if needle.slice_to(critPos) == needle.slice(period, period + critPos) { TwoWaySearcher { critPos: critPos, @@ -513,6 +517,9 @@ impl TwoWaySearcher { } } + // returns (i, p) where i is the "critical position", the starting index of + // of maximal suffix, and p is the period of the suffix + // see p. 668 of the paper #[inline] fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) { let mut left = -1; // Corresponds to i in the paper