18
18
19
19
use exec:: ProgramCache ;
20
20
use input:: { Input , InputAt } ;
21
- use prog:: { InstPtr , Program } ;
21
+ use prog:: { BytesInst , InstPtr , InstTrait , Program , UnicodeInst } ;
22
22
use re_trait:: Slot ;
23
23
24
24
type Bits = u32 ;
@@ -41,8 +41,8 @@ pub fn should_exec(num_insts: usize, text_len: usize) -> bool {
41
41
42
42
/// A backtracking matching engine.
43
43
#[ derive( Debug ) ]
44
- pub struct Bounded < ' a , ' m , ' r , ' s , I > {
45
- prog : & ' r Program ,
44
+ pub struct Bounded < ' a , ' m , ' r , ' s , I , P : InstTrait > {
45
+ prog : & ' r Program < P > ,
46
46
input : I ,
47
47
matches : & ' m mut [ bool ] ,
48
48
slots : & ' s mut [ Slot ] ,
@@ -59,7 +59,7 @@ pub struct Cache {
59
59
60
60
impl Cache {
61
61
/// Create new empty cache for the backtracking engine.
62
- pub fn new ( _prog : & Program ) -> Self {
62
+ pub fn new < I : InstTrait > ( _prog : & Program < I > ) -> Self {
63
63
Cache { jobs : vec ! [ ] , visited : vec ! [ ] }
64
64
}
65
65
}
@@ -76,13 +76,15 @@ enum Job {
76
76
SaveRestore { slot : usize , old_pos : Option < usize > } ,
77
77
}
78
78
79
- impl < ' a , ' m , ' r , ' s , I : Input > Bounded < ' a , ' m , ' r , ' s , I > {
79
+ impl < ' a , ' m , ' r , ' s , I : Input , P : InstTrait + Step >
80
+ Bounded < ' a , ' m , ' r , ' s , I , P >
81
+ {
80
82
/// Execute the backtracking matching engine.
81
83
///
82
84
/// If there's a match, `exec` returns `true` and populates the given
83
85
/// captures accordingly.
84
86
pub fn exec (
85
- prog : & ' r Program ,
87
+ prog : & ' r Program < P > ,
86
88
cache : & ProgramCache ,
87
89
matches : & ' m mut [ bool ] ,
88
90
slots : & ' s mut [ Slot ] ,
@@ -93,14 +95,14 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
93
95
let mut cache = cache. borrow_mut ( ) ;
94
96
let cache = & mut cache. backtrack ;
95
97
let start = input. at ( start) ;
96
- let mut b = Bounded {
98
+ Bounded {
97
99
prog : prog,
98
100
input : input,
99
101
matches : matches,
100
102
slots : slots,
101
103
m : cache,
102
- } ;
103
- b . exec_ ( start, end)
104
+ }
105
+ . exec_ ( start, end)
104
106
}
105
107
106
108
/// Clears the cache such that the backtracking engine can be executed
@@ -196,7 +198,6 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
196
198
}
197
199
198
200
fn step ( & mut self , mut ip : InstPtr , mut at : InputAt ) -> bool {
199
- use prog:: Inst :: * ;
200
201
loop {
201
202
// This loop is an optimization to avoid constantly pushing/popping
202
203
// from the stack. Namely, if we're pushing a job only to run it
@@ -205,64 +206,12 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
205
206
if self . has_visited ( ip, at) {
206
207
return false ;
207
208
}
208
- match self . prog [ ip] {
209
- Match ( slot) => {
210
- if slot < self . matches . len ( ) {
211
- self . matches [ slot] = true ;
212
- }
213
- return true ;
214
- }
215
- Save ( ref inst) => {
216
- if let Some ( & old_pos) = self . slots . get ( inst. slot ) {
217
- // If this path doesn't work out, then we save the old
218
- // capture index (if one exists) in an alternate
219
- // job. If the next path fails, then the alternate
220
- // job is popped and the old capture index is restored.
221
- self . m . jobs . push ( Job :: SaveRestore {
222
- slot : inst. slot ,
223
- old_pos : old_pos,
224
- } ) ;
225
- self . slots [ inst. slot ] = Some ( at. pos ( ) ) ;
226
- }
227
- ip = inst. goto ;
228
- }
229
- Split ( ref inst) => {
230
- self . m . jobs . push ( Job :: Inst { ip : inst. goto2 , at : at } ) ;
231
- ip = inst. goto1 ;
232
- }
233
- EmptyLook ( ref inst) => {
234
- if self . input . is_empty_match ( at, inst) {
235
- ip = inst. goto ;
236
- } else {
237
- return false ;
238
- }
239
- }
240
- Char ( ref inst) => {
241
- if inst. c == at. char ( ) {
242
- ip = inst. goto ;
243
- at = self . input . at ( at. next_pos ( ) ) ;
244
- } else {
245
- return false ;
246
- }
247
- }
248
- Ranges ( ref inst) => {
249
- if inst. matches ( at. char ( ) ) {
250
- ip = inst. goto ;
251
- at = self . input . at ( at. next_pos ( ) ) ;
252
- } else {
253
- return false ;
254
- }
255
- }
256
- Bytes ( ref inst) => {
257
- if let Some ( b) = at. byte ( ) {
258
- if inst. matches ( b) {
259
- ip = inst. goto ;
260
- at = self . input . at ( at. next_pos ( ) ) ;
261
- continue ;
262
- }
263
- }
264
- return false ;
209
+ match self . prog [ ip] . step ( self , at) {
210
+ Ok ( ( next_ip, next_at) ) => {
211
+ ip = next_ip;
212
+ at = next_at;
265
213
}
214
+ Err ( res) => return res,
266
215
}
267
216
}
268
217
}
@@ -280,6 +229,125 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
280
229
}
281
230
}
282
231
232
+ pub trait Step : InstTrait + Sized {
233
+ fn step < I : Input > (
234
+ & self ,
235
+ bounded : & mut Bounded < ' _ , ' _ , ' _ , ' _ , I , Self > ,
236
+ at : InputAt ,
237
+ ) -> Result < ( InstPtr , InputAt ) , bool > ;
238
+ }
239
+
240
+ impl Step for UnicodeInst {
241
+ fn step < I : Input > (
242
+ & self ,
243
+ bounded : & mut Bounded < ' _ , ' _ , ' _ , ' _ , I , Self > ,
244
+ mut at : InputAt ,
245
+ ) -> Result < ( InstPtr , InputAt ) , bool > {
246
+ use prog:: UnicodeInst :: * ;
247
+ match * self {
248
+ Match ( slot) => {
249
+ if slot < bounded. matches . len ( ) {
250
+ bounded. matches [ slot] = true ;
251
+ }
252
+ Err ( true )
253
+ }
254
+ Save ( ref inst) => {
255
+ if let Some ( & old_pos) = bounded. slots . get ( inst. slot ) {
256
+ // If this path doesn't work out, then we save the old
257
+ // capture index (if one exists) in an alternate
258
+ // job. If the next path fails, then the alternate
259
+ // job is popped and the old capture index is restored.
260
+ bounded. m . jobs . push ( Job :: SaveRestore {
261
+ slot : inst. slot ,
262
+ old_pos : old_pos,
263
+ } ) ;
264
+ bounded. slots [ inst. slot ] = Some ( at. pos ( ) ) ;
265
+ }
266
+ Ok ( ( inst. goto , at) )
267
+ }
268
+ Split ( ref inst) => {
269
+ bounded. m . jobs . push ( Job :: Inst { ip : inst. goto2 , at : at } ) ;
270
+ Ok ( ( inst. goto1 , at) )
271
+ }
272
+ EmptyLook ( ref inst) => {
273
+ if bounded. input . is_empty_match ( at, inst) {
274
+ Ok ( ( inst. goto , at) )
275
+ } else {
276
+ Err ( false )
277
+ }
278
+ }
279
+ Char ( ref inst) => {
280
+ if inst. c == at. char ( ) {
281
+ at = bounded. input . at ( at. next_pos ( ) ) ;
282
+ Ok ( ( inst. goto , at) )
283
+ } else {
284
+ Err ( false )
285
+ }
286
+ }
287
+ Ranges ( ref inst) => {
288
+ if inst. matches ( at. char ( ) ) {
289
+ at = bounded. input . at ( at. next_pos ( ) ) ;
290
+ Ok ( ( inst. goto , at) )
291
+ } else {
292
+ Err ( false )
293
+ }
294
+ }
295
+ }
296
+ }
297
+ }
298
+
299
+ impl Step for BytesInst {
300
+ fn step < I : Input > (
301
+ & self ,
302
+ bounded : & mut Bounded < ' _ , ' _ , ' _ , ' _ , I , Self > ,
303
+ mut at : InputAt ,
304
+ ) -> Result < ( InstPtr , InputAt ) , bool > {
305
+ use prog:: BytesInst :: * ;
306
+ match * self {
307
+ Match ( slot) => {
308
+ if slot < bounded. matches . len ( ) {
309
+ bounded. matches [ slot] = true ;
310
+ }
311
+ Err ( true )
312
+ }
313
+ Save ( ref inst) => {
314
+ if let Some ( & old_pos) = bounded. slots . get ( inst. slot ) {
315
+ // If this path doesn't work out, then we save the old
316
+ // capture index (if one exists) in an alternate
317
+ // job. If the next path fails, then the alternate
318
+ // job is popped and the old capture index is restored.
319
+ bounded. m . jobs . push ( Job :: SaveRestore {
320
+ slot : inst. slot ,
321
+ old_pos : old_pos,
322
+ } ) ;
323
+ bounded. slots [ inst. slot ] = Some ( at. pos ( ) ) ;
324
+ }
325
+ Ok ( ( inst. goto , at) )
326
+ }
327
+ Split ( ref inst) => {
328
+ bounded. m . jobs . push ( Job :: Inst { ip : inst. goto2 , at : at } ) ;
329
+ Ok ( ( inst. goto1 , at) )
330
+ }
331
+ EmptyLook ( ref inst) => {
332
+ if bounded. input . is_empty_match ( at, inst) {
333
+ Ok ( ( inst. goto , at) )
334
+ } else {
335
+ Err ( false )
336
+ }
337
+ }
338
+ Bytes ( ref inst) => {
339
+ if let Some ( b) = at. byte ( ) {
340
+ if inst. matches ( b) {
341
+ at = bounded. input . at ( at. next_pos ( ) ) ;
342
+ return Ok ( ( inst. goto , at) ) ;
343
+ }
344
+ }
345
+ Err ( false )
346
+ }
347
+ }
348
+ }
349
+ }
350
+
283
351
fn usize_to_u32 ( n : usize ) -> u32 {
284
352
if ( n as u64 ) > ( :: std:: u32:: MAX as u64 ) {
285
353
panic ! ( "BUG: {} is too big to fit into u32" , n)
0 commit comments