@@ -21,7 +21,7 @@ enum Step {
21
21
Done
22
22
}
23
23
24
- use Step :: * ;
24
+ use self :: Step :: * ;
25
25
26
26
impl From < SearchStep > for Step {
27
27
fn from ( x : SearchStep ) -> Self {
@@ -42,6 +42,12 @@ impl From<Option<(usize, usize)>> for Step {
42
42
}
43
43
}
44
44
45
+ // XXXManishearth these tests focus on single-character searching (CharSearcher)
46
+ // and on next()/next_match(), not next_reject(). This is because
47
+ // the memchr changes make next_match() for single chars complex, but next_reject()
48
+ // continues to use next() under the hood. We should add more test cases for all
49
+ // of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
50
+
45
51
#[ test]
46
52
fn test_simple_iteration ( ) {
47
53
search_asserts ! ( "abcdeabcd" , 'a' , "forward iteration for ASCII string" ,
@@ -98,3 +104,149 @@ fn test_simple_search() {
98
104
) ;
99
105
}
100
106
107
+ // Á, 각, ก, 😀 all end in 0x81
108
+ // 🁀, ᘀ do not end in 0x81 but contain the byte
109
+ // ꁁ has 0x81 as its second and third bytes.
110
+ //
111
+ // The memchr-using implementation of next_match
112
+ // and next_match_back temporarily violate
113
+ // the property that the search is always on a unicode boundary,
114
+ // which is fine as long as this never reaches next() or next_back().
115
+ // So we test if next() is correct after each next_match() as well.
116
+ const STRESS : & str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a" ;
117
+
118
+ #[ test]
119
+ fn test_stress_indices ( ) {
120
+ // this isn't really a test, more of documentation on the indices of each character in the stresstest string
121
+
122
+ search_asserts ! ( STRESS , 'x' , "Indices of characters in stress test" ,
123
+ [ next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next] ,
124
+ [ Rejects ( 0 , 2 ) , // Á
125
+ Rejects ( 2 , 3 ) , // a
126
+ Rejects ( 3 , 7 ) , // 🁀
127
+ Rejects ( 7 , 8 ) , // b
128
+ Rejects ( 8 , 10 ) , // Á
129
+ Rejects ( 10 , 13 ) , // ꁁ
130
+ Rejects ( 13 , 14 ) , // f
131
+ Rejects ( 14 , 15 ) , // g
132
+ Rejects ( 15 , 19 ) , // 😀
133
+ Rejects ( 19 , 22 ) , // 각
134
+ Rejects ( 22 , 25 ) , // ก
135
+ Rejects ( 25 , 28 ) , // ᘀ
136
+ Rejects ( 28 , 31 ) , // 각
137
+ Rejects ( 31 , 32 ) , // a
138
+ Rejects ( 32 , 34 ) , // Á
139
+ Rejects ( 34 , 37 ) , // 각
140
+ Rejects ( 37 , 40 ) , // ꁁ
141
+ Rejects ( 40 , 43 ) , // ก
142
+ Rejects ( 43 , 47 ) , // 😀
143
+ Rejects ( 47 , 48 ) , // a
144
+ Done ]
145
+ ) ;
146
+ }
147
+
148
+ #[ test]
149
+ fn test_forward_search_shared_bytes ( ) {
150
+ search_asserts ! ( STRESS , 'Á' , "Forward search for two-byte Latin character" ,
151
+ [ next_match, next_match, next_match, next_match] ,
152
+ [ InRange ( 0 , 2 ) , InRange ( 8 , 10 ) , InRange ( 32 , 34 ) , Done ]
153
+ ) ;
154
+
155
+ search_asserts ! ( STRESS , 'Á' , "Forward search for two-byte Latin character; check if next() still works" ,
156
+ [ next_match, next, next_match, next, next_match, next, next_match] ,
157
+ [ InRange ( 0 , 2 ) , Rejects ( 2 , 3 ) , InRange ( 8 , 10 ) , Rejects ( 10 , 13 ) , InRange ( 32 , 34 ) , Rejects ( 34 , 37 ) , Done ]
158
+ ) ;
159
+
160
+ search_asserts ! ( STRESS , '각' , "Forward search for three-byte Hangul character" ,
161
+ [ next_match, next, next_match, next_match, next_match] ,
162
+ [ InRange ( 19 , 22 ) , Rejects ( 22 , 25 ) , InRange ( 28 , 31 ) , InRange ( 34 , 37 ) , Done ]
163
+ ) ;
164
+
165
+ search_asserts ! ( STRESS , '각' , "Forward search for three-byte Hangul character; check if next() still works" ,
166
+ [ next_match, next, next_match, next, next_match, next, next_match] ,
167
+ [ InRange ( 19 , 22 ) , Rejects ( 22 , 25 ) , InRange ( 28 , 31 ) , Rejects ( 31 , 32 ) , InRange ( 34 , 37 ) , Rejects ( 37 , 40 ) , Done ]
168
+ ) ;
169
+
170
+ search_asserts ! ( STRESS , 'ก' , "Forward search for three-byte Thai character" ,
171
+ [ next_match, next, next_match, next, next_match] ,
172
+ [ InRange ( 22 , 25 ) , Rejects ( 25 , 28 ) , InRange ( 40 , 43 ) , Rejects ( 43 , 47 ) , Done ]
173
+ ) ;
174
+
175
+ search_asserts ! ( STRESS , 'ก' , "Forward search for three-byte Thai character; check if next() still works" ,
176
+ [ next_match, next, next_match, next, next_match] ,
177
+ [ InRange ( 22 , 25 ) , Rejects ( 25 , 28 ) , InRange ( 40 , 43 ) , Rejects ( 43 , 47 ) , Done ]
178
+ ) ;
179
+
180
+ search_asserts ! ( STRESS , '😁' , "Forward search for four-byte emoji" ,
181
+ [ next_match, next, next_match, next, next_match] ,
182
+ [ InRange ( 15 , 19 ) , Rejects ( 19 , 22 ) , InRange ( 43 , 47 ) , Rejects ( 47 , 48 ) , Done ]
183
+ ) ;
184
+
185
+ search_asserts ! ( STRESS , '😁' , "Forward search for four-byte emoji; check if next() still works" ,
186
+ [ next_match, next, next_match, next, next_match] ,
187
+ [ InRange ( 15 , 19 ) , Rejects ( 19 , 22 ) , InRange ( 43 , 47 ) , Rejects ( 47 , 48 ) , Done ]
188
+ ) ;
189
+
190
+ search_asserts ! ( STRESS , 'ꁁ' , "Forward search for three-byte Yi character with repeated bytes" ,
191
+ [ next_match, next, next_match, next, next_match] ,
192
+ [ InRange ( 10 , 13 ) , Rejects ( 13 , 14 ) , InRange ( 37 , 40 ) , Rejects ( 40 , 43 ) , Done ]
193
+ ) ;
194
+
195
+ search_asserts ! ( STRESS , 'ꁁ' , "Forward search for three-byte Yi character with repeated bytes; check if next() still works" ,
196
+ [ next_match, next, next_match, next, next_match] ,
197
+ [ InRange ( 10 , 13 ) , Rejects ( 13 , 14 ) , InRange ( 37 , 40 ) , Rejects ( 40 , 43 ) , Done ]
198
+ ) ;
199
+ }
200
+
201
+ #[ test]
202
+ fn test_reverse_search_shared_bytes ( ) {
203
+ search_asserts ! ( STRESS , 'Á' , "Reverse search for two-byte Latin character" ,
204
+ [ next_match_back, next_match_back, next_match_back, next_match_back] ,
205
+ [ InRange ( 32 , 34 ) , InRange ( 8 , 10 ) , InRange ( 0 , 2 ) , Done ]
206
+ ) ;
207
+
208
+ search_asserts ! ( STRESS , 'Á' , "Reverse search for two-byte Latin character; check if next_back() still works" ,
209
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back, next_back] ,
210
+ [ InRange ( 32 , 34 ) , Rejects ( 31 , 32 ) , InRange ( 8 , 10 ) , Rejects ( 7 , 8 ) , InRange ( 0 , 2 ) , Done ]
211
+ ) ;
212
+
213
+ search_asserts ! ( STRESS , '각' , "Reverse search for three-byte Hangul character" ,
214
+ [ next_match_back, next_back, next_match_back, next_match_back, next_match_back] ,
215
+ [ InRange ( 34 , 37 ) , Rejects ( 32 , 34 ) , InRange ( 28 , 31 ) , InRange ( 19 , 22 ) , Done ]
216
+ ) ;
217
+
218
+ search_asserts ! ( STRESS , '각' , "Reverse search for three-byte Hangul character; check if next_back() still works" ,
219
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back, next_back, next_match_back] ,
220
+ [ InRange ( 34 , 37 ) , Rejects ( 32 , 34 ) , InRange ( 28 , 31 ) , Rejects ( 25 , 28 ) , InRange ( 19 , 22 ) , Rejects ( 15 , 19 ) , Done ]
221
+ ) ;
222
+
223
+ search_asserts ! ( STRESS , 'ก' , "Reverse search for three-byte Thai character" ,
224
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
225
+ [ InRange ( 40 , 43 ) , Rejects ( 37 , 40 ) , InRange ( 22 , 25 ) , Rejects ( 19 , 22 ) , Done ]
226
+ ) ;
227
+
228
+ search_asserts ! ( STRESS , 'ก' , "Reverse search for three-byte Thai character; check if next_back() still works" ,
229
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
230
+ [ InRange ( 40 , 43 ) , Rejects ( 37 , 40 ) , InRange ( 22 , 25 ) , Rejects ( 19 , 22 ) , Done ]
231
+ ) ;
232
+
233
+ search_asserts ! ( STRESS , '😁' , "Reverse search for four-byte emoji" ,
234
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
235
+ [ InRange ( 43 , 47 ) , Rejects ( 40 , 43 ) , InRange ( 15 , 19 ) , Rejects ( 14 , 15 ) , Done ]
236
+ ) ;
237
+
238
+ search_asserts ! ( STRESS , '😁' , "Reverse search for four-byte emoji; check if next_back() still works" ,
239
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
240
+ [ InRange ( 43 , 47 ) , Rejects ( 40 , 43 ) , InRange ( 15 , 19 ) , Rejects ( 14 , 15 ) , Done ]
241
+ ) ;
242
+
243
+ search_asserts ! ( STRESS , 'ꁁ' , "Reverse search for three-byte Yi character with repeated bytes" ,
244
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
245
+ [ InRange ( 37 , 40 ) , Rejects ( 34 , 37 ) , InRange ( 10 , 13 ) , Rejects ( 8 , 10 ) , Done ]
246
+ ) ;
247
+
248
+ search_asserts ! ( STRESS , 'ꁁ' , "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works" ,
249
+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
250
+ [ InRange ( 37 , 40 ) , Rejects ( 34 , 37 ) , InRange ( 10 , 13 ) , Rejects ( 8 , 10 ) , Done ]
251
+ ) ;
252
+ }
0 commit comments