Skip to content

Commit 03fd90b

Browse files
committed
auto merge of #16612 : nham/rust/twoway_searcher_fix, r=alexcrichton
There is a check in TwoWaySearcher::new to determine whether the needle is periodic. This is needed because during searching when a match fails, we cannot advance the position by the entire length of the needle when it is periodic, but can only advance by the length of the period. The reason "bananas".contains("nana") (and similar searches) were returning false was because the periodicity check was wrong. Closes #16589 Also, thanks to @gankro, who came up with many buggy examples.
2 parents 3e3209a + 9a43492 commit 03fd90b

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

src/libcore/str.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,8 @@ struct TwoWaySearcher {
419419
memory: uint
420420
}
421421

422+
// This is the Two-Way search algorithm, which was introduced in the paper:
423+
// Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
422424
impl TwoWaySearcher {
423425
fn new(needle: &[u8]) -> TwoWaySearcher {
424426
let (critPos1, period1) = TwoWaySearcher::maximal_suffix(needle, false);
@@ -437,7 +439,14 @@ impl TwoWaySearcher {
437439
let byteset = needle.iter()
438440
.fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a);
439441

440-
if needle.slice_to(critPos) == needle.slice_from(needle.len() - critPos) {
442+
443+
// The logic here (calculating critPos and period, the final if statement to see which
444+
// period to use for the TwoWaySearcher) is essentially an implementation of the
445+
// "small-period" function from the paper (p. 670)
446+
//
447+
// In the paper they check whether `needle.slice_to(critPos)` is a suffix of
448+
// `needle.slice(critPos, critPos + period)`, which is precisely what this does
449+
if needle.slice_to(critPos) == needle.slice(period, period + critPos) {
441450
TwoWaySearcher {
442451
critPos: critPos,
443452
period: period,
@@ -508,6 +517,9 @@ impl TwoWaySearcher {
508517
}
509518
}
510519

520+
// returns (i, p) where i is the "critical position", the starting index of
521+
// of maximal suffix, and p is the period of the suffix
522+
// see p. 668 of the paper
511523
#[inline]
512524
fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) {
513525
let mut left = -1; // Corresponds to i in the paper

src/libcoretest/str.rs

+20
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,27 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
fn check_contains_all_substrings(s: &str) {
12+
assert!(s.contains(""));
13+
for i in range(0, s.len()) {
14+
for j in range(i+1, s.len() + 1) {
15+
assert!(s.contains(s.slice(i, j)));
16+
}
17+
}
18+
}
19+
1120
#[test]
1221
fn strslice_issue_16589() {
1322
assert!("bananas".contains("nana"));
23+
24+
// prior to the fix for #16589, x.contains("abcdabcd") returned false
25+
// test all substrings for good measure
26+
check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd");
27+
}
28+
29+
30+
#[test]
31+
fn test_strslice_contains() {
32+
let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'";
33+
check_contains_all_substrings(x);
1434
}

0 commit comments

Comments
 (0)