Skip to content

Commit b45cfc4

Browse files
committed
Split Inst enum into BytesInst and UnicodeInst enums
1 parent cc0f2c9 commit b45cfc4

File tree

7 files changed

+772
-365
lines changed

7 files changed

+772
-365
lines changed

src/backtrack.rs

+135-67
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
use exec::ProgramCache;
2020
use input::{Input, InputAt};
21-
use prog::{InstPtr, Program};
21+
use prog::{BytesInst, InstPtr, InstTrait, Program, UnicodeInst};
2222
use re_trait::Slot;
2323

2424
type Bits = u32;
@@ -41,8 +41,8 @@ pub fn should_exec(num_insts: usize, text_len: usize) -> bool {
4141

4242
/// A backtracking matching engine.
4343
#[derive(Debug)]
44-
pub struct Bounded<'a, 'm, 'r, 's, I> {
45-
prog: &'r Program,
44+
pub struct Bounded<'a, 'm, 'r, 's, I, P: InstTrait> {
45+
prog: &'r Program<P>,
4646
input: I,
4747
matches: &'m mut [bool],
4848
slots: &'s mut [Slot],
@@ -59,7 +59,7 @@ pub struct Cache {
5959

6060
impl Cache {
6161
/// Create new empty cache for the backtracking engine.
62-
pub fn new(_prog: &Program) -> Self {
62+
pub fn new<I: InstTrait>(_prog: &Program<I>) -> Self {
6363
Cache { jobs: vec![], visited: vec![] }
6464
}
6565
}
@@ -76,13 +76,15 @@ enum Job {
7676
SaveRestore { slot: usize, old_pos: Option<usize> },
7777
}
7878

79-
impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
79+
impl<'a, 'm, 'r, 's, I: Input, P: InstTrait + Step>
80+
Bounded<'a, 'm, 'r, 's, I, P>
81+
{
8082
/// Execute the backtracking matching engine.
8183
///
8284
/// If there's a match, `exec` returns `true` and populates the given
8385
/// captures accordingly.
8486
pub fn exec(
85-
prog: &'r Program,
87+
prog: &'r Program<P>,
8688
cache: &ProgramCache,
8789
matches: &'m mut [bool],
8890
slots: &'s mut [Slot],
@@ -93,14 +95,14 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
9395
let mut cache = cache.borrow_mut();
9496
let cache = &mut cache.backtrack;
9597
let start = input.at(start);
96-
let mut b = Bounded {
98+
Bounded {
9799
prog: prog,
98100
input: input,
99101
matches: matches,
100102
slots: slots,
101103
m: cache,
102-
};
103-
b.exec_(start, end)
104+
}
105+
.exec_(start, end)
104106
}
105107

106108
/// Clears the cache such that the backtracking engine can be executed
@@ -196,7 +198,6 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
196198
}
197199

198200
fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool {
199-
use prog::Inst::*;
200201
loop {
201202
// This loop is an optimization to avoid constantly pushing/popping
202203
// from the stack. Namely, if we're pushing a job only to run it
@@ -205,64 +206,12 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
205206
if self.has_visited(ip, at) {
206207
return false;
207208
}
208-
match self.prog[ip] {
209-
Match(slot) => {
210-
if slot < self.matches.len() {
211-
self.matches[slot] = true;
212-
}
213-
return true;
214-
}
215-
Save(ref inst) => {
216-
if let Some(&old_pos) = self.slots.get(inst.slot) {
217-
// If this path doesn't work out, then we save the old
218-
// capture index (if one exists) in an alternate
219-
// job. If the next path fails, then the alternate
220-
// job is popped and the old capture index is restored.
221-
self.m.jobs.push(Job::SaveRestore {
222-
slot: inst.slot,
223-
old_pos: old_pos,
224-
});
225-
self.slots[inst.slot] = Some(at.pos());
226-
}
227-
ip = inst.goto;
228-
}
229-
Split(ref inst) => {
230-
self.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
231-
ip = inst.goto1;
232-
}
233-
EmptyLook(ref inst) => {
234-
if self.input.is_empty_match(at, inst) {
235-
ip = inst.goto;
236-
} else {
237-
return false;
238-
}
239-
}
240-
Char(ref inst) => {
241-
if inst.c == at.char() {
242-
ip = inst.goto;
243-
at = self.input.at(at.next_pos());
244-
} else {
245-
return false;
246-
}
247-
}
248-
Ranges(ref inst) => {
249-
if inst.matches(at.char()) {
250-
ip = inst.goto;
251-
at = self.input.at(at.next_pos());
252-
} else {
253-
return false;
254-
}
255-
}
256-
Bytes(ref inst) => {
257-
if let Some(b) = at.byte() {
258-
if inst.matches(b) {
259-
ip = inst.goto;
260-
at = self.input.at(at.next_pos());
261-
continue;
262-
}
263-
}
264-
return false;
209+
match self.prog[ip].step(self, at) {
210+
Ok((next_ip, next_at)) => {
211+
ip = next_ip;
212+
at = next_at;
265213
}
214+
Err(res) => return res,
266215
}
267216
}
268217
}
@@ -280,6 +229,125 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
280229
}
281230
}
282231

232+
pub trait Step: InstTrait + Sized {
233+
fn step<I: Input>(
234+
&self,
235+
bounded: &mut Bounded<'_, '_, '_, '_, I, Self>,
236+
at: InputAt,
237+
) -> Result<(InstPtr, InputAt), bool>;
238+
}
239+
240+
impl Step for UnicodeInst {
241+
fn step<I: Input>(
242+
&self,
243+
bounded: &mut Bounded<'_, '_, '_, '_, I, Self>,
244+
mut at: InputAt,
245+
) -> Result<(InstPtr, InputAt), bool> {
246+
use prog::UnicodeInst::*;
247+
match *self {
248+
Match(slot) => {
249+
if slot < bounded.matches.len() {
250+
bounded.matches[slot] = true;
251+
}
252+
Err(true)
253+
}
254+
Save(ref inst) => {
255+
if let Some(&old_pos) = bounded.slots.get(inst.slot) {
256+
// If this path doesn't work out, then we save the old
257+
// capture index (if one exists) in an alternate
258+
// job. If the next path fails, then the alternate
259+
// job is popped and the old capture index is restored.
260+
bounded.m.jobs.push(Job::SaveRestore {
261+
slot: inst.slot,
262+
old_pos: old_pos,
263+
});
264+
bounded.slots[inst.slot] = Some(at.pos());
265+
}
266+
Ok((inst.goto, at))
267+
}
268+
Split(ref inst) => {
269+
bounded.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
270+
Ok((inst.goto1, at))
271+
}
272+
EmptyLook(ref inst) => {
273+
if bounded.input.is_empty_match(at, inst) {
274+
Ok((inst.goto, at))
275+
} else {
276+
Err(false)
277+
}
278+
}
279+
Char(ref inst) => {
280+
if inst.c == at.char() {
281+
at = bounded.input.at(at.next_pos());
282+
Ok((inst.goto, at))
283+
} else {
284+
Err(false)
285+
}
286+
}
287+
Ranges(ref inst) => {
288+
if inst.matches(at.char()) {
289+
at = bounded.input.at(at.next_pos());
290+
Ok((inst.goto, at))
291+
} else {
292+
Err(false)
293+
}
294+
}
295+
}
296+
}
297+
}
298+
299+
impl Step for BytesInst {
300+
fn step<I: Input>(
301+
&self,
302+
bounded: &mut Bounded<'_, '_, '_, '_, I, Self>,
303+
mut at: InputAt,
304+
) -> Result<(InstPtr, InputAt), bool> {
305+
use prog::BytesInst::*;
306+
match *self {
307+
Match(slot) => {
308+
if slot < bounded.matches.len() {
309+
bounded.matches[slot] = true;
310+
}
311+
Err(true)
312+
}
313+
Save(ref inst) => {
314+
if let Some(&old_pos) = bounded.slots.get(inst.slot) {
315+
// If this path doesn't work out, then we save the old
316+
// capture index (if one exists) in an alternate
317+
// job. If the next path fails, then the alternate
318+
// job is popped and the old capture index is restored.
319+
bounded.m.jobs.push(Job::SaveRestore {
320+
slot: inst.slot,
321+
old_pos: old_pos,
322+
});
323+
bounded.slots[inst.slot] = Some(at.pos());
324+
}
325+
Ok((inst.goto, at))
326+
}
327+
Split(ref inst) => {
328+
bounded.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
329+
Ok((inst.goto1, at))
330+
}
331+
EmptyLook(ref inst) => {
332+
if bounded.input.is_empty_match(at, inst) {
333+
Ok((inst.goto, at))
334+
} else {
335+
Err(false)
336+
}
337+
}
338+
Bytes(ref inst) => {
339+
if let Some(b) = at.byte() {
340+
if inst.matches(b) {
341+
at = bounded.input.at(at.next_pos());
342+
return Ok((inst.goto, at));
343+
}
344+
}
345+
Err(false)
346+
}
347+
}
348+
}
349+
}
350+
283351
fn usize_to_u32(n: usize) -> u32 {
284352
if (n as u64) > (::std::u32::MAX as u64) {
285353
panic!("BUG: {} is too big to fit into u32", n)

0 commit comments

Comments
 (0)