-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
445c979
commit 61e54e8
Showing
9 changed files
with
863 additions
and
846 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
import Regex | ||
|
||
namespace RegexTest | ||
|
||
/-- A span of contiguous bytes, from start to end, represented via byte -/ | ||
structure Span where | ||
start: Nat | ||
«end»: Nat | ||
|
||
instance : ToString Span where | ||
toString s := s!"{s.start} {s.end}" | ||
|
||
/-- Captures represents a single group of captured matches from a regex search. -/ | ||
structure Captures where | ||
groups: Array $ Option Span | ||
|
||
instance : ToString Captures where | ||
toString s := s!"Captures {s.groups}" | ||
|
||
namespace Sum | ||
|
||
def val (v : Sum String (Array String)) : String := | ||
match v with | ||
| .inl s => s | ||
| .inr arr => arr[0]! | ||
|
||
end Sum | ||
|
||
instance : ToString $ Sum String (Array String) where | ||
toString v := | ||
match v with | ||
| .inl s => s!"{s}" | ||
| .inr arr => s!"{arr}" | ||
|
||
end RegexTest | ||
|
||
/-- A regex test describes the inputs and expected outputs of a regex match. -/ | ||
structure RegexTest where | ||
name : String | ||
regex : Sum String (Array String) | ||
haystack : String | ||
«matches» : Array RegexTest.Captures | ||
/-- An optional field whose value is a table with `start` and `end` fields-/ | ||
bounds : Option $ Array Nat | ||
/-- An optional field that specifies a limit on the number of matches. -/ | ||
«match-limit» : Option Nat | ||
/-- Whether to execute an anchored search or not. -/ | ||
anchored : Option Bool | ||
/-- Whether to match the regex case insensitively. -/ | ||
«case-insensitive» : Option Bool | ||
/-- When enabled, the haystack is unescaped. -/ | ||
unescape : Option Bool | ||
compiles : Option Bool | ||
/-- When enabled, the regex pattern should be compiled with its | ||
corresponding Unicode mode enabled. -/ | ||
unicode : Option Bool | ||
/-- When this is enabled, all regex match substrings should be entirely valid UTF-8. -/ | ||
utf8 : Option Bool | ||
/-- May be one of `all`, `leftmost-first` or `leftmost-longest`. -/ | ||
«match-kind» : Option String | ||
/-- May be one of `earliest`, `leftmost` or `overlapping`. -/ | ||
«search-kind» : Option String | ||
/-- This sets the line terminator used by the multi-line assertions -/ | ||
«line-terminator» : Option String | ||
|
||
namespace RegexTest | ||
|
||
def isMatch (t : RegexTest) : Bool := | ||
if h : 0 < t.matches.size | ||
then (t.matches[0]'h).groups.size > 0 | ||
else false | ||
|
||
structure RegexTests where | ||
test : Array RegexTest | ||
|
||
namespace String | ||
|
||
def containsSubstr (s pattern : String) : Bool := | ||
(s.splitOn pattern).length > 1 | ||
|
||
end String | ||
|
||
def checkFlagIsFalse (f : Option Bool) : Bool := | ||
match f with | some v => !v | none => false | ||
|
||
private def escape (s : String) : String := | ||
(s.replace "\n" "\\n").replace "\r" "\\r" | ||
|
||
instance : ToString RegexTest where | ||
toString s := | ||
let str := s!"{s.name} '{s.regex}' '{escape s.haystack}' {s.matches}" | ||
let str := str ++ (if s.anchored.isSome then " anchored" else "") | ||
let str := str ++ (if s.«case-insensitive».isSome then " case-insensitive" else "") | ||
let str := str ++ (if s.unescape.isSome then " unescape" else "") | ||
let str := str ++ (if s.unicode.isSome && !s.unicode.getD true then " !unicode" else "") | ||
let str := str ++ (if String.containsSubstr (Sum.val s.regex) "(?" then " flags" else "") | ||
let str := str ++ (if checkFlagIsFalse s.compiles then " !compiles" else "") | ||
str | ||
|
||
instance : ToString RegexTests where | ||
toString s := s!"{s.test}" | ||
|
||
def unescapeStr (s : String) : String := | ||
⟨loop s.data []⟩ | ||
where | ||
toChar (a b : Char) : Char := | ||
match Char.decodeHexDigit a, Char.decodeHexDigit b with | ||
| some n, some m => | ||
let val := 16*n+m | ||
if h : UInt32.isValidChar val then ⟨val, h⟩ else ⟨0, by simp_arith⟩ | ||
| _, _ => ⟨0, by simp_arith⟩ | ||
loop (chars : List Char) (acc : List Char) : List Char := | ||
match chars with | ||
| [] => acc | ||
| '\\' :: 'x' :: a :: b :: tail => (toChar a b) :: (loop tail acc) | ||
| '\\' :: 'n' :: tail => '\n' :: (loop tail acc) | ||
| head :: tail => head :: (loop tail acc) | ||
|
||
def checkCompiles (t : RegexTest) : Bool := | ||
let flags : Syntax.Flags := default | ||
let config : Compiler.Config := default | ||
match Regex.build (Sum.val t.regex) flags config with | ||
| Except.ok _ => true | ||
| Except.error _ => false | ||
|
||
def captures (t : RegexTest) : Except String (Array Regex.Captures) := do | ||
let flags : Syntax.Flags := default | ||
let config : Compiler.Config := default | ||
|
||
let flags := {flags with case_insensitive := t.«case-insensitive»} | ||
let config := {config with unanchored_prefix := !t.anchored.getD false} | ||
|
||
let haystack := if t.unescape.getD false then unescapeStr t.haystack else t.haystack | ||
let re ← Regex.build (Sum.val t.regex) flags config | ||
Except.ok (Regex.all_captures haystack.toSubstring re) | ||
|
||
def checkMatches (arr : Array Regex.Captures) (t : RegexTest) : Bool := | ||
let match_limit := t.«match-limit».getD 1000 | ||
let arr := arr |> Array.toList |> List.take match_limit |> List.toArray | ||
|
||
if arr.size != t.matches.size then false | ||
else | ||
let idx := Array.mapIdx arr (fun i v => (i, v)) | ||
Array.all idx (fun (i, m) => | ||
if h : i.val < t.matches.size | ||
then | ||
let t_matches := (t.matches[i.val]'h).groups | ||
let idx := Array.mapIdx (m.matches) (fun i v => (i, v)) | ||
Array.all idx (fun (i, v) => | ||
match t_matches.get? i.val, v with | ||
| some (some span), some v => | ||
span.start = v.startPos.byteIdx && span.end = v.stopPos.byteIdx | ||
| some none, none => true | ||
| _, _ => false) | ||
else false) | ||
|
||
private def captureToString (r : Regex.Captures) : String := | ||
r.matches |> Array.map (fun m => | ||
match m with | ||
| some m => s!"({m.startPos}, {m.stopPos}), " | ||
| none => "none") | ||
|> Array.toList | ||
|> String.join | ||
|> fun s => | ||
let s := if s.endsWith ", " | ||
then ((String.toSubstring s).dropRight 2).toString | ||
else s | ||
"[" ++ s ++ "]" | ||
|
||
private def capturesToString (arr : Array Regex.Captures) : String := | ||
arr | ||
|> Array.map (fun c => captureToString c ++ ", ") | ||
|> Array.toList | ||
|> String.join | ||
|> fun s => | ||
let s := if s.endsWith ", " | ||
then ((String.toSubstring s).dropRight 2).toString | ||
else s | ||
"[" ++ s ++ "]" | ||
|
||
/-- ignore test, feature not implemented -/ | ||
def ignoreTest (t : RegexTest) : Bool := | ||
checkFlagIsFalse t.unicode | ||
|| checkFlagIsFalse t.utf8 | ||
|| t.bounds.isSome -- no api | ||
|| t.«line-terminator».isSome -- Config | ||
|| t.«search-kind».any (· != "leftmost") -- only leftmost is valid for BoundedBacktracker | ||
|| t.«match-kind».any (· = "all") -- Sets | ||
|| match t.regex with | .inr _ => true | .inl _ => false -- Sets | ||
|
||
def testItem (filename : String) (t : RegexTest) : IO (Nat × Nat × Nat) := do | ||
if checkFlagIsFalse t.compiles | ||
then | ||
if checkCompiles t | ||
then | ||
IO.println s!"RegexTest: {t}" | ||
IO.println s!" should not compile" | ||
pure (0, 1, 0) | ||
else pure (1, 0, 0) | ||
else if ignoreTest t | ||
then | ||
pure (0, 0, 1) | ||
else | ||
match captures t with | ||
| Except.ok result => | ||
if result.size = 0 | ||
then | ||
if RegexTest.isMatch t | ||
then | ||
IO.println s!"RegexTest({filename}): {t}" | ||
IO.println s!" no match found, expected {t.matches}" | ||
pure (0, 1, 0) | ||
else | ||
pure (1, 0, 0) | ||
else | ||
if checkMatches result t | ||
then | ||
pure (1, 0, 0) | ||
else | ||
IO.println s!"RegexTest({filename}): {t}" | ||
IO.println s!" expected size {t.matches.size}, actual {result.size} " | ||
IO.println s!" match different, expected {t.matches}, actual {capturesToString result}" | ||
pure (0, 1, 0) | ||
| Except.error e => | ||
IO.println s!"RegexTest{filename}: {t}" | ||
IO.println s!" error {e}" | ||
pure (0, 1, 0) | ||
|
||
def testItems (filename : String) (items : Array RegexTest) : IO (Nat × Nat× Nat) := do | ||
items |> Array.foldlM (init := (0, 0, 0)) (fun (succeeds, failures, ignored) RegexTest => do | ||
let (succeed, failure, ignore) ← testItem filename RegexTest | ||
pure (succeeds + succeed, failures + failure, ignore + ignored)) | ||
|
||
end RegexTest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import Regex | ||
|
||
import Test.Interval | ||
import Test.Ast | ||
import Test.Hir | ||
import Test.Compiler | ||
|
||
import RegexTest | ||
import TomlLoader | ||
|
||
open Lean System | ||
|
||
open Regex | ||
|
||
def test (path : FilePath): IO (Nat × Nat × Nat) := do | ||
let filename : String := path.fileName.getD "" | ||
if #["no-unicode.toml", "regex-lite.toml", "utf8.toml"].contains filename | ||
then pure (0, 0, 0) else | ||
let tests ← Loader.loadToml path | ||
let (succeeds, failures, ignored) ← RegexTest.testItems filename tests | ||
IO.println s!"succeeds {succeeds} failures {failures} ignored {ignored} in file {path}" | ||
pure (succeeds, failures, ignored) | ||
|
||
def summary (arr : Array (Nat × Nat × Nat)) : IO UInt32 := do | ||
let (succeeds, failures, ignored) := arr |> Array.foldl | ||
(fun acc v => (acc.1+v.1, acc.2.1+v.2.1, acc.2.2+v.2.2) ) (0, 0, 0) | ||
IO.println s!"succeeds {succeeds} failures {failures} ignored {ignored} total" | ||
pure (if failures > 0 then 1 else 0) | ||
|
||
def testAll (path : FilePath): IO UInt32 := do | ||
if ← System.FilePath.isDir path | ||
then | ||
(← System.FilePath.walkDir path) | ||
|> Array.filter (fun f => f.toString.endsWith "toml") | ||
|> Array.mapM (fun file => test file) | ||
|> fun arr => do summary (← arr) | ||
else | ||
IO.println s!"no such directory '{path}'" | ||
pure 1 | ||
|
||
def main (args : List String): IO UInt32 := do | ||
let exitcode ← | ||
try | ||
match args with | ||
| [] => pure <| ← testAll "testdata" | ||
| ["--toml", path] => pure <| ← summary #[← test path] | ||
| ["--all", path] => pure <| ← testAll path | ||
| _ => | ||
IO.println s!"usage: Test [--toml <path>] [--all path]" | ||
pure 1 | ||
catch e => | ||
IO.println s!"Error: {e}" | ||
pure 1 | ||
|
||
pure exitcode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import Regex | ||
|
||
open Syntax | ||
open AstItems | ||
open Regex | ||
|
||
namespace Test.Ast | ||
|
||
private def toString (x : Except String Ast) : String := | ||
match x with | ||
| Except.ok ast => s!"{ast}" | ||
| Except.error e => s!"Error {e}" | ||
|
||
private def astOf'a' : Ast := | ||
Ast.Literal ⟨String.toSpan "a" 0 1, LiteralKind.Verbatim, 'a'⟩ | ||
|
||
private def astOf'a?' : Ast := | ||
Ast.Repetition | ||
(Repetition.mk | ||
(String.toSpan "a?" 0 2) | ||
⟨String.toSpan "a?" 1 2, RepetitionKind.ZeroOrOne⟩ | ||
true | ||
(Ast.Literal ⟨String.toSpan "a?" 0 1, LiteralKind.Verbatim, 'a'⟩)) | ||
|
||
private def astOf'ab' : Ast := | ||
Ast.Concat | ||
(Concat.mk | ||
(String.toSpan "ab" 0 2) | ||
#[Ast.Literal ⟨String.toSpan "ab" 0 1, LiteralKind.Verbatim, 'a'⟩, | ||
Ast.Literal ⟨String.toSpan "ab" 1 2, LiteralKind.Verbatim, 'b'⟩]) | ||
|
||
private def «astOf'[a]'» : Ast := | ||
Ast.ClassBracketed | ||
(ClassBracketed.mk | ||
(String.toSpan "[a]" 0 3) | ||
false | ||
(ClassSet.Item (ClassSetItem.Literal ⟨String.toSpan "[a]" 1 2, LiteralKind.Verbatim, 'a'⟩))) | ||
|
||
private def «astOf'[a-b]'» : Ast := | ||
Ast.ClassBracketed | ||
(ClassBracketed.mk | ||
(String.toSpan "[a-b]" 0 5) | ||
false | ||
(ClassSet.Item (ClassSetItem.Range ⟨ | ||
String.toSpan "[a-b]" 1 4, | ||
⟨String.toSpan "[a-b]" 1 2, LiteralKind.Verbatim, 'a'⟩, | ||
⟨String.toSpan "[a-b]" 3 4, LiteralKind.Verbatim, 'b'⟩, | ||
by simp_arith⟩))) | ||
|
||
private def «astOf'a|b'» : Ast := | ||
Ast.Alternation | ||
(Alternation.mk | ||
(String.toSpan "[a|b]" 0 3) | ||
#[Ast.Literal ⟨String.toSpan "a|b" 0 1, LiteralKind.Verbatim, 'a'⟩, | ||
Ast.Literal ⟨String.toSpan "a|b" 2 3, LiteralKind.Verbatim, 'b'⟩]) | ||
|
||
private def «astOf'(a)'» : Ast := | ||
Syntax.AstItems.Ast.Group | ||
(Syntax.AstItems.Group.mk | ||
(String.toSpan "(a)" 0 3) | ||
(GroupKind.CaptureIndex 1) | ||
(Ast.Literal ⟨String.toSpan "(a)" 1 2, LiteralKind.Verbatim, 'a'⟩)) | ||
|
||
example : (parse "a" |> toString) = s!"{astOf'a'}" := by native_decide | ||
|
||
example : (parse "a?" |> toString) = s!"{astOf'a?'}" := by native_decide | ||
|
||
example : (parse "ab" |> toString) = s!"{astOf'ab'}" := by native_decide | ||
|
||
example : (parse "[a]" |> toString) = s!"{«astOf'[a]'»}" := by native_decide | ||
|
||
example : (parse "[a-b]" |> toString) = s!"{«astOf'[a-b]'»}" := by native_decide | ||
|
||
example : (parse "a|b" |> toString) = s!"{«astOf'a|b'»}" := by native_decide | ||
|
||
example : (parse "(a)" |> toString) = s!"{«astOf'(a)'»}" := by native_decide | ||
|
||
end Test.Ast |
Oops, something went wrong.