Skip to content

Commit

Permalink
feat: add possessive repetitions
Browse files Browse the repository at this point in the history
  • Loading branch information
bergmannjg committed Jun 24, 2024
1 parent c245b6f commit 53df957
Show file tree
Hide file tree
Showing 12 changed files with 303 additions and 75 deletions.
43 changes: 37 additions & 6 deletions Regex/Backtrack.lean
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,33 @@ private def encodeChar? (c: Option Char) : String :=
@[inline] private def step_fail (state : SearchState n) : SearchState n :=
withMsg (fun _ => s!"{state.sid}: Fail") state

/-- eat frames until State `sid` found -/
@[inline] private def step_eat_until (sid next : Fin n) (state : SearchState n) : SearchState n :=
let stack := state.stack |> List.dropWhile (fun s => match s with | .Step f' _ => sid != f' | _ => true)

match stack with
| .Step _ _ :: stack' =>
withMsg (fun _ => s!"{state.sid}: EatUntil {sid} stack {stack'} => {next}") {state with stack := stack', sid := next }
| _ =>
withMsg (fun _ => s!"{state.sid}: EatUntil failed ") state

/-- eat frames inclusive last occurunce of State `sid` -/
@[inline] private def step_eat_to_last (sid next : Fin n) (state : SearchState n) : SearchState n :=
let index := state.stack |> List.reverse |> List.findIdx (fun s => match s with | .Step f' _ => sid = f' | _ => false)

if index < state.stack.length then
let index := state.stack.length -index
let stack := state.stack |> List.drop index

withMsg (fun _ => s!"{state.sid}: EatToLast {sid} stack {stack} => {next}") {state with stack := stack, sid := next }
else withMsg (fun _ => s!"{state.sid}: EatToLast {sid} stack {state.stack} => {next}") {state with sid := next }
--withMsg (fun _ => s!"{state.sid}: EatToLast failed ") state

@[inline] private def step_eat (mode : Checked.EatMode n) (next : Fin n) (state : SearchState n) : SearchState n :=
match mode with
| .Until sid => step_eat_until sid next state
| .ToLast sid => step_eat_to_last sid next state

@[inline] private def step_change_frame_step (f t : Fin n) (state : SearchState n) : SearchState n :=
let stack := state.stack |> List.dropWhile (fun s => match s with | .Step f' _ => f != f' | _ => true)
match stack with
Expand Down Expand Up @@ -491,14 +518,17 @@ private def encodeChar? (c: Option Char) : String :=

@[inline] private def step_byterange (trans : Checked.Transition n) (state : SearchState n)
: SearchState n :=
if state.at.atStop then state
if state.at.atStop then
(withMsg (fun _ =>
s!"{state.sid}: ByteRange failed at stop")
state)
else if state.at.curr?.any (Checked.Transition.matches trans) then
let next := state.at.next
(withMsg (fun _ => s!"{state.sid}: ByteRange '{encodeChar? state.at.curr?}' matched at charpos {state.at} -> {trans.next}")
(withMsg (fun _ => s!"{state.sid}: ByteRange matched '{encodeChar? state.at.curr?}' at charpos {state.at} -> {trans.next}")
{state with sid := trans.next, «at» := next})
else
(withMsg (fun _ =>
s!"{state.sid}: ByteRange '{encodeChar? state.at.curr?}' failed at charpos {state.at}")
s!"{state.sid}: ByteRange failed match '{encodeChar? state.at.curr?}' at charpos {state.at}")
state)

@[inline] private def step_backreference_loop (s : String) (i : Nat) (case_insensitive : Bool) (cp : CharPos)
Expand Down Expand Up @@ -645,6 +675,7 @@ private def encodeChar? (c: Option Char) : String :=
| .Empty next => step_empty next searchState
| .NextChar offset next => step_next_char offset next searchState
| .Fail => step_fail searchState
| .Eat s next => step_eat s next searchState
| .ChangeFrameStep f t => step_change_frame_step f t searchState
| .RemoveFrameStep s => step_remove_frame_step s searchState
| .Look look next => step_look look next searchState
Expand Down Expand Up @@ -801,7 +832,7 @@ theorem toNextFrameStep_true_lt_or_eq_lt (nfa : Checked.NFA) (s s1 : SearchState
if h : slot < state.slots.size
then
let state := {state with slots := state.slots.set ⟨slot,h⟩ offset, stack := stack}
let state := (withMsg (fun _ => s!"{state.sid}: RestoreCapture stack {stack} slots {state.slots}") state)
let state := (withMsg (fun _ => s!"{state.sid}: Backtrack.RestoreCapture stack {stack} slots {state.slots}") state)
(true, state)
else (false, state)

Expand All @@ -825,7 +856,7 @@ theorem toNextFrameRestoreCapture_true_lt_or_eq_lt (slot : Nat) (offset : ℕ ×
toNextFrameStep nfa
{state with sid := sid, «at» := «at», stack := stack,
msgs := if state.logEnabled
then state.msgs.push s!"{state.sid}: Step stack {stack} at pos {state.at} -> {sid}"
then state.msgs.push s!"{state.sid}: Backtrack.Step stack {stack} at pos {state.at} -> {sid}"
else state.msgs}
| .RestoreCapture _ slot offset => toNextFrameRestoreCapture slot offset stack state
| none =>
Expand All @@ -845,7 +876,7 @@ theorem toNextFrame_true_lt (nfa : Checked.NFA) (s s1 : SearchState nfa.n)
slots := s.slots, logEnabled := s.logEnabled,
recentCaptures := s.recentCaptures
msgs := if s.logEnabled
then s.msgs.push s!"{s.sid}: Step stack {stack} at pos {s.at} -> {sid}"
then s.msgs.push s!"{s.sid}: Backtrack.Step stack {stack} at pos {s.at} -> {sid}"
else s.msgs, halfMatch := s.halfMatch,
backtracks := s.backtracks }
have h1 : state.countVisited < s1.countVisited ∨
Expand Down
157 changes: 125 additions & 32 deletions Regex/Compiler.lean
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ private def patch («from» «to» : Unchecked.StateID) : CompilerM PUnit := do
| .Empty _ => set (states.set ⟨«from», h⟩ (Unchecked.State.Empty «to»))
| .NextChar offset _ => set (states.set ⟨«from», h⟩ (Unchecked.State.NextChar offset «to»))
| .Fail => Except.error s!"patch states .Fail unexpected"
| .Eat (.Until s) n =>
if s = 0 then set (states.set ⟨«from», h⟩ (Unchecked.State.Eat (.Until «to») n))
else if n = 0 then set (states.set ⟨«from», h⟩ (Unchecked.State.Eat (.Until s) «to»))
else Except.error "patch states, .Eat s and n not null"
| .Eat (.ToLast s) n =>
if s = 0 then set (states.set ⟨«from», h⟩ (Unchecked.State.Eat (.ToLast «to») n))
else if n = 0 then set (states.set ⟨«from», h⟩ (Unchecked.State.Eat (.ToLast s) «to»))
else Except.error "patch states, .Eat s and n not null"
| .ChangeFrameStep f t =>
if f = 0 then set (states.set ⟨«from», h⟩ (Unchecked.State.ChangeFrameStep «to» t))
else if t = 0 then set (states.set ⟨«from», h⟩ (Unchecked.State.ChangeFrameStep f «to»))
Expand Down Expand Up @@ -100,6 +108,9 @@ private def add_empty : CompilerM Unchecked.StateID :=
private def add_fail : CompilerM Unchecked.StateID :=
push (Unchecked.State.Fail)

private def add_eat (mode : Unchecked.EatMode) : CompilerM Unchecked.StateID :=
push (Unchecked.State.Eat mode 0)

private def add_change_state : CompilerM Unchecked.StateID :=
push (Unchecked.State.ChangeFrameStep 0 0)

Expand Down Expand Up @@ -207,57 +218,128 @@ private def c_exactly (hir : Hir) (n : Nat) : CompilerM ThompsonRef := do
termination_by sizeOf hir + sizeOf n

/-- Compile the given expression such that it may be matched `n` or more times -/
private def c_at_least (hir : Hir) (n : Nat) (greedy : Bool) : CompilerM ThompsonRef := do
private def c_at_least (hir : Hir) (n : Nat) (greedy : Bool) (possessive : Bool) : CompilerM ThompsonRef := do
if n = 0 then
/- compile x* as (x+)?, which preserves the correct preference order
if x can match the empty string. -/
let compiled ← c hir
let plus ← (if greedy then add_union else add_union_reverse)
patch compiled.end plus
patch plus compiled.start

let question ← (if greedy then add_union else add_union_reverse)
let empty ← add_empty
patch question compiled.start
patch question empty
patch plus empty
if possessive then
let plus ← add_union
patch compiled.end plus
patch plus compiled.start

let question ← add_union
let eat ← add_eat (Unchecked.EatMode.Until 0)
let empty ← add_empty

patch question compiled.start
patch plus eat
patch eat empty
patch eat empty
patch question empty

pure ⟨question, empty⟩
else
let plus ← (if greedy then add_union else add_union_reverse)
patch compiled.end plus
patch plus compiled.start

let question ← (if greedy then add_union else add_union_reverse)
let empty ← add_empty
patch question compiled.start
patch question empty
patch plus empty

pure ⟨question, empty⟩
pure ⟨question, empty⟩
else if n = 1 then
let compiled ← c hir
let union ← (if greedy then add_union else add_union_reverse)
patch compiled.end union
patch union compiled.start
if possessive then
let union ← add_union
let eat ← add_eat (Unchecked.EatMode.ToLast 0)
let empty ← add_empty

pure ⟨compiled.start, union⟩
patch compiled.end union
patch union compiled.start
patch union eat
patch eat eat
patch eat empty

pure ⟨compiled.start, empty⟩
else
let union ← (if greedy then add_union else add_union_reverse)
patch compiled.end union
patch union compiled.start

pure ⟨compiled.start, union⟩
else
let «prefix» ← c_exactly hir (n-1)
let last ← c hir
let union ← (if greedy then add_union else add_union_reverse)
patch «prefix».end last.start
patch last.end union
patch union last.start
if possessive then
let union ← add_union
let eat ← add_eat (Unchecked.EatMode.ToLast 0)
let empty ← add_empty

patch «prefix».end last.start
patch last.end union
patch union last.start
patch union eat
patch eat eat
patch eat empty

pure ⟨«prefix».start, empty⟩
else
let union ← (if greedy then add_union else add_union_reverse)

patch «prefix».end last.start
patch last.end union
patch union last.start

pure ⟨«prefix».start, union⟩
pure ⟨«prefix».start, union⟩
termination_by sizeOf hir + sizeOf n + 1

/-- Compile the given expression such that it matches at least `min` times,
but no more than `max` times.-/
private def c_bounded (hir : Hir) (min max : Nat) (greedy : Bool) (_ : min ≤ max)
private def c_bounded (hir : Hir) (min max : Nat) (greedy : Bool) (possessive : Bool)
: CompilerM ThompsonRef := do
if h : 0 < max then
let «prefix» ← c_exactly hir min
if min = max then pure «prefix»
if min = max then
if possessive then
let union ← add_union
let eat ← add_eat (Unchecked.EatMode.ToLast 0)
let empty ← add_empty

patch union «prefix».start
patch «prefix».end eat
patch union eat
patch eat eat
patch eat empty
pure ⟨union, empty⟩
else
pure «prefix»
else
let empty ← add_empty
let prev_end ← (List.replicate (max-min) 0).foldlM (init := «prefix».end)
(fun prev_end _ => do
let union ← (if greedy then add_union else add_union_reverse)
let compiled ← c hir
patch prev_end union
patch union compiled.start
patch union empty
pure compiled.end)
if possessive then
let union ← add_union
let eat ← add_eat (Unchecked.EatMode.ToLast 0)

patch prev_end union
patch union compiled.start
patch union eat
patch eat eat
patch eat empty

pure compiled.end
else
let union ← (if greedy then add_union else add_union_reverse)
patch prev_end union
patch union compiled.start
patch union empty
pure compiled.end)
patch prev_end empty
pure ⟨«prefix».start, empty⟩
else c_empty
Expand Down Expand Up @@ -328,15 +410,26 @@ termination_by sizeOf look

private def c_repetition (rep : Repetition) : CompilerM ThompsonRef := do
match rep with
| .mk 0 (some 1) greedy h => do
| .mk 0 (some 1) greedy false h => do
let union ← if greedy then add_union else add_union_reverse
let compiled ← c h
let empty ← add_empty
patch union compiled.start
patch union empty
patch compiled.end empty
pure ⟨union, empty⟩
| .mk min none greedy sub => do
| .mk 0 (some 1) _ true h => do
let union ← add_union
let compiled ← c h
let eat ← add_eat (Unchecked.EatMode.Until 0)
let empty ← add_empty
patch union compiled.start
patch union empty
patch compiled.end eat
patch eat empty
patch eat empty
pure ⟨union, empty⟩
| .mk min none greedy possessive sub => do
let cannotMatchEmptyString : Bool :=
match sub.properties.minimum_len with
| some n => n > 0
Expand All @@ -348,9 +441,9 @@ private def c_repetition (rep : Repetition) : CompilerM ThompsonRef := do
patch union compiled.start
patch compiled.end union
pure ⟨union, union⟩
else c_at_least sub min greedy
| .mk min (some max) greedy sub =>
if h : min ≤ max then c_bounded sub min max greedy h else c_empty
else c_at_least sub min greedy possessive
| .mk min (some max) greedy possessive sub =>
if min ≤ max then c_bounded sub min max greedy possessive else c_empty
termination_by sizeOf rep

private def c (hir : Hir) : CompilerM ThompsonRef :=
Expand Down Expand Up @@ -402,7 +495,7 @@ private def startsWithStart (hir : Hir) : Bool :=
/-- Compile the HIR expression given. -/
private def compile' (anchored : Bool) (expr : Hir) : CompilerM PUnit := do
let unanchored_prefix ← if anchored then c_empty
else c_repetition <| Repetition.mk 0 none false (dot Dot.AnyChar)
else c_repetition <| Repetition.mk 0 none false false (dot Dot.AnyChar)
let one ← c_cap (Capture.mk 0 none expr)
let match_state_id ← add_match 0
patch one.end match_state_id
Expand Down
Loading

0 comments on commit 53df957

Please sign in to comment.