Skip to content

Commit

Permalink
feat: use Parsec
Browse files Browse the repository at this point in the history
  • Loading branch information
bergmannjg committed Jul 15, 2024
1 parent 24c928d commit abc349c
Show file tree
Hide file tree
Showing 16 changed files with 1,566 additions and 1,377 deletions.
44 changes: 38 additions & 6 deletions Main.lean
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ structure RegexOptions where
path : Option String := none
unanchoredPrefix : Bool := true
unanchoredPrefixSimulation : Bool := false
flavor : Syntax.Flavor := Syntax.Flavor.Pcre
experimental : Bool := false

abbrev CliMainM := ExceptT CliError MainM
abbrev CliStateM := StateT RegexOptions CliMainM
Expand Down Expand Up @@ -55,13 +57,17 @@ def regexShortOption : (opt : Char) → CliM PUnit
| 's' => modifyThe RegexOptions ({· with unanchoredPrefixSimulation := true})
| 'u' => modifyThe RegexOptions ({· with unescape := true})
| 'v' => modifyThe RegexOptions ({· with verbosity := Lake.Verbosity.verbose})
| 'x' => modifyThe RegexOptions ({· with experimental := true})
| opt => throw <| CliError.unknownShortOption opt

def regexLongOption : (opt : String) → CliM PUnit
| "--help" => modifyThe RegexOptions ({· with wantsHelp := true})
| "--verbose" => modifyThe RegexOptions ({· with verbosity := Lake.Verbosity.verbose})
| "--no-unanchored-prefix" => modifyThe RegexOptions ({· with unanchoredPrefix := false})
| "--unanchored-prefix-simulation" => modifyThe RegexOptions ({· with unanchoredPrefixSimulation := true})
| "--flavor" => do
let flavor ← takeOptArg "--flavor" "path"
modifyThe RegexOptions ({· with flavor := if flavor = "Rust" then Syntax.Flavor.Rust else Syntax.Flavor.Pcre})
| opt => throw <| CliError.unknownLongOption opt

def regexOption :=
Expand Down Expand Up @@ -114,25 +120,50 @@ where
| '\\' :: '\\' :: tail => '\\' :: (loop tail acc)
| head :: tail => head :: (loop tail acc)

def grammar : CliM PUnit := do
processOptions regexOption
let opts ← getThe RegexOptions
match ← takeArg? with
| some re =>
let g ← Regex.Grammar.parse re opts.flavor
IO.println s!"Grammar {opts.flavor}\n{g}"
| none => throw <| CliError.missingArg "re"

def grammarT : CliM PUnit := do
processOptions regexOption
let opts ← getThe RegexOptions
match ← takeArg? with
| some re =>
let g ← Regex.Grammar.parse re opts.flavor
let g' ← Regex.Grammar.translate g
IO.println s!"Grammar translated\n{g'}"
| none => throw <| CliError.missingArg "re"

def ast : CliM PUnit := do
processOptions regexOption
let opts ← getThe RegexOptions
match ← takeArg? with
| some re =>
let ast ← AstItems.parse re default
IO.println s!"Ast\n{ast}"
let ast ← AstItems.parse re opts.flavor
IO.println s!"Ast{if opts.experimental then " experimental" else ""} {opts.flavor}\n{ast}"
| none => throw <| CliError.missingArg "re"

def hir : CliM PUnit := do
processOptions regexOption
let opts ← getThe RegexOptions
match ← takeArg? with
| some re =>
let ast ← AstItems.parse re default
let ast ← AstItems.parse re opts.flavor
let hir ← Syntax.translate default ast
IO.println s!"Hir\n{hir}"
| none => throw <| CliError.missingArg "re"

def compile : CliM PUnit := do
processOptions regexOption
let opts ← getThe RegexOptions
match ← takeArg? with
| some pat =>
let re ← build pat default default default
let re ← build pat opts.flavor default default
IO.println s!"{re.nfa}"
| none => throw <| CliError.missingArg "re"

Expand All @@ -156,13 +187,12 @@ def captures : CliM PUnit := do
let chars := haystack.data |> List.map (fun (c : Char) => (UInt32.intAsString c.val))
IO.println s!"re '{re}' haystack chars '{chars}'"

let flavor : Syntax.Flavor := default
let flags : Syntax.Flags := default
let config := {(default:Compiler.Config) with
unanchored_prefix := opts.unanchoredPrefix
unanchored_prefix_simulation := opts.unanchoredPrefixSimulation }

let regex ← Regex.build re flavor flags config
let regex ← Regex.build re opts.flavor flags config
if isVerbose then IO.println s!"nfa {regex.nfa}"
if isVerbose then IO.println s!"nfa unanchored_prefix_in_backtrack {regex.nfa.unanchored_prefix_in_backtrack}"

Expand Down Expand Up @@ -192,6 +222,8 @@ def captures : CliM PUnit := do
end regex

def regexCli : (cmd : String) → CliM PUnit
| "grammar" => regex.grammar
| "grammarT" => regex.grammarT
| "ast" => regex.ast
| "hir" => regex.hir
| "compile" => regex.compile
Expand Down
3 changes: 3 additions & 0 deletions Regex.lean
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import Regex.Syntax.Grammar.Grammar
import Regex.Syntax.Grammar.Translate
import Regex.Syntax.Ast.Parser
import Regex.Syntax.Hir
import Regex.Nfa
import Regex.Regex
Expand Down
3 changes: 2 additions & 1 deletion Regex/Regex.lean
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import Regex.Syntax.Flavor
import Regex.Unicode
import Regex.Nfa
import Regex.Backtrack
import Regex.Utils
import Regex.Syntax.Ast.Parse
import Regex.Syntax.Ast.Parser
import Regex.Syntax.Translate

open NFA
Expand Down
92 changes: 83 additions & 9 deletions Regex/Syntax/Ast/Ast.lean
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ namespace Literal
def toString (lit : Literal) : String :=
s!"Literal {spanToString lit.span} {lit.kind} '{UInt32.intAsString lit.c.val}'"

def toLiteral (c : Char) (p : String) (f t : String.Pos): Literal :=
⟨⟨p, f, t⟩, .Verbatim, c⟩

end Literal

instance : ToString Literal where
Expand Down Expand Up @@ -914,14 +917,23 @@ termination_by sizeOf r

def toStringClassSetItem (r : ClassSetItem) (col : Nat) : String :=
match r with
| .Empty _ => s!"Empty"
| .Literal lit => s!"{lit}"
| .Range range => s!"{range}"
| .Ascii cls => s!"{cls}"
| .Perl cls => s!"{cls}"
| .Unicode cls => s!"{cls}"
| .Bracketed ⟨_, neg, _⟩ => s!"Bracketed {neg}"
| .Union union => s!"Union {toStringClassSetUnion union col}"
| .Empty _ => s!"ItemEmpty"
| .Literal lit => s!"Item {lit}"
| .Range range => s!"Item {range}"
| .Ascii cls => s!"Item {cls}"
| .Perl cls => s!"Item {cls}"
| .Unicode cls => s!"Item {cls}"
| .Bracketed ⟨_, neg, cls⟩ =>
match cls with
| .Item item => s!"Bracketed Item {neg} {ClassSetItem.toStringClassSetItem item col}"
| .BinaryOp ⟨_, kind, lhs, rhs⟩ =>
let kind :=
match kind with
| .Intersection => s!"Intersection"
| .Difference => s!"Difference"
| .SymmetricDifference => s!"SymmetricDifference"
s!"Item Bracketed BinaryOp {neg} {kind}"
| .Union union => s!"Item Union {toStringClassSetUnion union col}"
termination_by sizeOf r

end
Expand Down Expand Up @@ -951,7 +963,7 @@ def toString (r : ClassSet) (col : Nat) : String :=
let pre := "\n" ++ (Char.multiple ' ' col "")

match r with
| .Item item => s!"{ClassSetItem.toStringClassSetItem item col}"
| .Item item => s!"ClassSet {ClassSetItem.toStringClassSetItem item col}"
| .BinaryOp ⟨_, kind, lhs, rhs⟩ =>
let op := s!"BinaryOp {ClassSetBinaryOpKind.toString kind}"
let lhs := s!"lhs {ClassSet.toString lhs col}"
Expand Down Expand Up @@ -1052,3 +1064,65 @@ partial def find (ast : Ast) (p : Ast -> Bool) : Option Ast :=
| _ => none

| _ => none

def get_fixed_width (pattern : String) (ast : Ast) : Except String Nat := do
match ast with
| .Literal _ => pure 1
| .Dot _ => pure 1
| .ClassBracketed _ => pure 1
| .ClassUnicode _ => pure 1
| .ClassPerl _ => pure 1
| .Assertion _ => pure 0
| .Flags _ => pure 0
| .Concat concat =>
match concat with
| ⟨_, asts⟩ =>
let width ← asts.attach.foldlM (init := 0) (fun s (ast : { x // x ∈ asts}) => do
have : sizeOf ast.val < sizeOf asts := Array.sizeOf_lt_of_mem ast.property
let width ← get_fixed_width pattern ast.val
pure (s + width))
pure width
| .Alternation alt =>
match alt with
| ⟨_, asts⟩ =>
let widths ← asts.attach.mapM (fun (ast : { x // x ∈ asts}) => do
have : sizeOf ast.val < sizeOf asts := Array.sizeOf_lt_of_mem ast.property
let width ← get_fixed_width pattern ast.val
pure width)
if h : 0 < widths.size
then
let width := widths.get ⟨0, h⟩
if Array.all widths (fun w => w = width)
then pure width
else throw (toError pattern .FixedWidtExcpected)
else throw (toError pattern .FixedWidtExcpected)
| .Repetition rep =>
match rep with
| AstItems.Repetition.mk _ (RepetitionOp.mk _ (.Range (.Exactly n))) _ _ _ => pure n
| _ => throw (toError pattern .FixedWidtExcpected)
| .Group ⟨_, GroupKind.CaptureIndex _ _, ast⟩ =>
let width ← get_fixed_width pattern ast
pure width
| .Group ⟨_, GroupKind.Lookaround _, _⟩ =>
pure 0
| _ => throw (toError pattern .FixedWidtExcpected)
termination_by sizeOf ast

def checkBackRefence (b : Nat) (ast: Ast) : Except String Bool := do
let check (ast : Ast) :=
match ast with
| .Group ⟨_, AstItems.GroupKind.CaptureIndex b' _, _⟩ => b = b'
| _ => false

match AstItems.find ast check with
| some ast =>
match get_fixed_width "" ast with
| Except.ok _ => pure true
| Except.error _ => Except.error s!"fixed width capture group of backreference {b} expected"
| none => Except.error s!"capture group {b} not found"

/-- capture groups with a backreference should have fixed width -/
def checkBackRefences (b : Nat) (ast: Ast) : Except String Bool := do
if b = 0 then pure true
else
if ← checkBackRefence b ast then checkBackRefences (b - 1) ast else pure false
Loading

0 comments on commit abc349c

Please sign in to comment.