Skip to content

Commit 9b92a44

Browse files
committed
Add stresstests for shared bytes for pattern API
1 parent bc55355 commit 9b92a44

File tree

1 file changed

+153
-1
lines changed

1 file changed

+153
-1
lines changed

src/libcore/tests/pattern.rs

+153-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ enum Step {
2121
Done
2222
}
2323

24-
use Step::*;
24+
use self::Step::*;
2525

2626
impl From<SearchStep> for Step {
2727
fn from(x: SearchStep) -> Self {
@@ -42,6 +42,12 @@ impl From<Option<(usize, usize)>> for Step {
4242
}
4343
}
4444

45+
// XXXManishearth these tests focus on single-character searching (CharSearcher)
46+
// and on next()/next_match(), not next_reject(). This is because
47+
// the memchr changes make next_match() for single chars complex, but next_reject()
48+
// continues to use next() under the hood. We should add more test cases for all
49+
// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
50+
4551
#[test]
4652
fn test_simple_iteration() {
4753
search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string",
@@ -98,3 +104,149 @@ fn test_simple_search() {
98104
);
99105
}
100106

107+
// Á, 각, ก, 😀 all end in 0x81
108+
// 🁀, ᘀ do not end in 0x81 but contain the byte
109+
// ꁁ has 0x81 as its second and third bytes.
110+
//
111+
// The memchr-using implementation of next_match
112+
// and next_match_back temporarily violate
113+
// the property that the search is always on a unicode boundary,
114+
// which is fine as long as this never reaches next() or next_back().
115+
// So we test if next() is correct after each next_match() as well.
116+
const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
117+
118+
#[test]
119+
fn test_stress_indices() {
120+
// this isn't really a test, more of documentation on the indices of each character in the stresstest string
121+
122+
search_asserts!(STRESS, 'x', "Indices of characters in stress test",
123+
[next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next],
124+
[Rejects(0, 2), // Á
125+
Rejects(2, 3), // a
126+
Rejects(3, 7), // 🁀
127+
Rejects(7, 8), // b
128+
Rejects(8, 10), // Á
129+
Rejects(10, 13), // ꁁ
130+
Rejects(13, 14), // f
131+
Rejects(14, 15), // g
132+
Rejects(15, 19), // 😀
133+
Rejects(19, 22), // 각
134+
Rejects(22, 25), // ก
135+
Rejects(25, 28), // ᘀ
136+
Rejects(28, 31), // 각
137+
Rejects(31, 32), // a
138+
Rejects(32, 34), // Á
139+
Rejects(34, 37), // 각
140+
Rejects(37, 40), // ꁁ
141+
Rejects(40, 43), // ก
142+
Rejects(43, 47), // 😀
143+
Rejects(47, 48), // a
144+
Done]
145+
);
146+
}
147+
148+
#[test]
149+
fn test_forward_search_shared_bytes() {
150+
search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character",
151+
[next_match, next_match, next_match, next_match],
152+
[InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
153+
);
154+
155+
search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character; check if next() still works",
156+
[next_match, next, next_match, next, next_match, next, next_match],
157+
[InRange(0, 2), Rejects(2, 3), InRange(8, 10), Rejects(10, 13), InRange(32, 34), Rejects(34, 37), Done]
158+
);
159+
160+
search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character",
161+
[next_match, next, next_match, next_match, next_match],
162+
[InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
163+
);
164+
165+
search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character; check if next() still works",
166+
[next_match, next, next_match, next, next_match, next, next_match],
167+
[InRange(19, 22), Rejects(22, 25), InRange(28, 31), Rejects(31, 32), InRange(34, 37), Rejects(37, 40), Done]
168+
);
169+
170+
search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character",
171+
[next_match, next, next_match, next, next_match],
172+
[InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
173+
);
174+
175+
search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character; check if next() still works",
176+
[next_match, next, next_match, next, next_match],
177+
[InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
178+
);
179+
180+
search_asserts!(STRESS, '😁', "Forward search for four-byte emoji",
181+
[next_match, next, next_match, next, next_match],
182+
[InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
183+
);
184+
185+
search_asserts!(STRESS, '😁', "Forward search for four-byte emoji; check if next() still works",
186+
[next_match, next, next_match, next, next_match],
187+
[InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
188+
);
189+
190+
search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes",
191+
[next_match, next, next_match, next, next_match],
192+
[InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
193+
);
194+
195+
search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
196+
[next_match, next, next_match, next, next_match],
197+
[InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
198+
);
199+
}
200+
201+
#[test]
202+
fn test_reverse_search_shared_bytes() {
203+
search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character",
204+
[next_match_back, next_match_back, next_match_back, next_match_back],
205+
[InRange(32, 34), InRange(8, 10), InRange(0, 2), Done]
206+
);
207+
208+
search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character; check if next_back() still works",
209+
[next_match_back, next_back, next_match_back, next_back, next_match_back, next_back],
210+
[InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done]
211+
);
212+
213+
search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character",
214+
[next_match_back, next_back, next_match_back, next_match_back, next_match_back],
215+
[InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done]
216+
);
217+
218+
search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character; check if next_back() still works",
219+
[next_match_back, next_back, next_match_back, next_back, next_match_back, next_back, next_match_back],
220+
[InRange(34, 37), Rejects(32, 34), InRange(28, 31), Rejects(25, 28), InRange(19, 22), Rejects(15, 19), Done]
221+
);
222+
223+
search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character",
224+
[next_match_back, next_back, next_match_back, next_back, next_match_back],
225+
[InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
226+
);
227+
228+
search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character; check if next_back() still works",
229+
[next_match_back, next_back, next_match_back, next_back, next_match_back],
230+
[InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
231+
);
232+
233+
search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji",
234+
[next_match_back, next_back, next_match_back, next_back, next_match_back],
235+
[InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
236+
);
237+
238+
search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji; check if next_back() still works",
239+
[next_match_back, next_back, next_match_back, next_back, next_match_back],
240+
[InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
241+
);
242+
243+
search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes",
244+
[next_match_back, next_back, next_match_back, next_back, next_match_back],
245+
[InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
246+
);
247+
248+
search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
249+
[next_match_back, next_back, next_match_back, next_back, next_match_back],
250+
[InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
251+
);
252+
}

0 commit comments

Comments
 (0)