Skip to content

Commit

Permalink
refactor: split tests
Browse files Browse the repository at this point in the history
  • Loading branch information
bergmannjg committed May 28, 2024
1 parent 445c979 commit 61e54e8
Show file tree
Hide file tree
Showing 9 changed files with 863 additions and 846 deletions.
844 changes: 0 additions & 844 deletions Test.lean

This file was deleted.

8 changes: 6 additions & 2 deletions lakefile.lean
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ lean_exe benchmark {
buildType := .debug
}

lean_lib Test where
srcDir := "test"
roots := #[`Test, `RegexTest, `TomlLoader]

@[test_runner]
lean_exe test {
lean_exe test where
srcDir := "test"
root := `Test
}
234 changes: 234 additions & 0 deletions test/RegexTest.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import Regex

namespace RegexTest

/-- A span of contiguous bytes, from start to end, represented via byte -/
structure Span where
start: Nat
«end»: Nat

instance : ToString Span where
toString s := s!"{s.start} {s.end}"

/-- Captures represents a single group of captured matches from a regex search. -/
structure Captures where
groups: Array $ Option Span

instance : ToString Captures where
toString s := s!"Captures {s.groups}"

namespace Sum

def val (v : Sum String (Array String)) : String :=
match v with
| .inl s => s
| .inr arr => arr[0]!

end Sum

instance : ToString $ Sum String (Array String) where
toString v :=
match v with
| .inl s => s!"{s}"
| .inr arr => s!"{arr}"

end RegexTest

/-- A regex test describes the inputs and expected outputs of a regex match. -/
structure RegexTest where
name : String
regex : Sum String (Array String)
haystack : String
«matches» : Array RegexTest.Captures
/-- An optional field whose value is a table with `start` and `end` fields-/
bounds : Option $ Array Nat
/-- An optional field that specifies a limit on the number of matches. -/
«match-limit» : Option Nat
/-- Whether to execute an anchored search or not. -/
anchored : Option Bool
/-- Whether to match the regex case insensitively. -/
«case-insensitive» : Option Bool
/-- When enabled, the haystack is unescaped. -/
unescape : Option Bool
compiles : Option Bool
/-- When enabled, the regex pattern should be compiled with its
corresponding Unicode mode enabled. -/
unicode : Option Bool
/-- When this is enabled, all regex match substrings should be entirely valid UTF-8. -/
utf8 : Option Bool
/-- May be one of `all`, `leftmost-first` or `leftmost-longest`. -/
«match-kind» : Option String
/-- May be one of `earliest`, `leftmost` or `overlapping`. -/
«search-kind» : Option String
/-- This sets the line terminator used by the multi-line assertions -/
«line-terminator» : Option String

namespace RegexTest

def isMatch (t : RegexTest) : Bool :=
if h : 0 < t.matches.size
then (t.matches[0]'h).groups.size > 0
else false

structure RegexTests where
test : Array RegexTest

namespace String

def containsSubstr (s pattern : String) : Bool :=
(s.splitOn pattern).length > 1

end String

def checkFlagIsFalse (f : Option Bool) : Bool :=
match f with | some v => !v | none => false

private def escape (s : String) : String :=
(s.replace "\n" "\\n").replace "\r" "\\r"

instance : ToString RegexTest where
toString s :=
let str := s!"{s.name} '{s.regex}' '{escape s.haystack}' {s.matches}"
let str := str ++ (if s.anchored.isSome then " anchored" else "")
let str := str ++ (if s.«case-insensitive».isSome then " case-insensitive" else "")
let str := str ++ (if s.unescape.isSome then " unescape" else "")
let str := str ++ (if s.unicode.isSome && !s.unicode.getD true then " !unicode" else "")
let str := str ++ (if String.containsSubstr (Sum.val s.regex) "(?" then " flags" else "")
let str := str ++ (if checkFlagIsFalse s.compiles then " !compiles" else "")
str

instance : ToString RegexTests where
toString s := s!"{s.test}"

def unescapeStr (s : String) : String :=
⟨loop s.data []⟩
where
toChar (a b : Char) : Char :=
match Char.decodeHexDigit a, Char.decodeHexDigit b with
| some n, some m =>
let val := 16*n+m
if h : UInt32.isValidChar val then ⟨val, h⟩ else0, by simp_arith⟩
| _, _ => ⟨0, by simp_arith⟩
loop (chars : List Char) (acc : List Char) : List Char :=
match chars with
| [] => acc
| '\\' :: 'x' :: a :: b :: tail => (toChar a b) :: (loop tail acc)
| '\\' :: 'n' :: tail => '\n' :: (loop tail acc)
| head :: tail => head :: (loop tail acc)

def checkCompiles (t : RegexTest) : Bool :=
let flags : Syntax.Flags := default
let config : Compiler.Config := default
match Regex.build (Sum.val t.regex) flags config with
| Except.ok _ => true
| Except.error _ => false

def captures (t : RegexTest) : Except String (Array Regex.Captures) := do
let flags : Syntax.Flags := default
let config : Compiler.Config := default

let flags := {flags with case_insensitive := t.«case-insensitive»}
let config := {config with unanchored_prefix := !t.anchored.getD false}

let haystack := if t.unescape.getD false then unescapeStr t.haystack else t.haystack
let re ← Regex.build (Sum.val t.regex) flags config
Except.ok (Regex.all_captures haystack.toSubstring re)

def checkMatches (arr : Array Regex.Captures) (t : RegexTest) : Bool :=
let match_limit := t.«match-limit».getD 1000
let arr := arr |> Array.toList |> List.take match_limit |> List.toArray

if arr.size != t.matches.size then false
else
let idx := Array.mapIdx arr (fun i v => (i, v))
Array.all idx (fun (i, m) =>
if h : i.val < t.matches.size
then
let t_matches := (t.matches[i.val]'h).groups
let idx := Array.mapIdx (m.matches) (fun i v => (i, v))
Array.all idx (fun (i, v) =>
match t_matches.get? i.val, v with
| some (some span), some v =>
span.start = v.startPos.byteIdx && span.end = v.stopPos.byteIdx
| some none, none => true
| _, _ => false)
else false)

private def captureToString (r : Regex.Captures) : String :=
r.matches |> Array.map (fun m =>
match m with
| some m => s!"({m.startPos}, {m.stopPos}), "
| none => "none")
|> Array.toList
|> String.join
|> fun s =>
let s := if s.endsWith ", "
then ((String.toSubstring s).dropRight 2).toString
else s
"[" ++ s ++ "]"

private def capturesToString (arr : Array Regex.Captures) : String :=
arr
|> Array.map (fun c => captureToString c ++ ", ")
|> Array.toList
|> String.join
|> fun s =>
let s := if s.endsWith ", "
then ((String.toSubstring s).dropRight 2).toString
else s
"[" ++ s ++ "]"

/-- ignore test, feature not implemented -/
def ignoreTest (t : RegexTest) : Bool :=
checkFlagIsFalse t.unicode
|| checkFlagIsFalse t.utf8
|| t.bounds.isSome -- no api
|| t.«line-terminator».isSome -- Config
|| t.«search-kind».any (· != "leftmost") -- only leftmost is valid for BoundedBacktracker
|| t.«match-kind».any (· = "all") -- Sets
|| match t.regex with | .inr _ => true | .inl _ => false -- Sets

def testItem (filename : String) (t : RegexTest) : IO (Nat × Nat × Nat) := do
if checkFlagIsFalse t.compiles
then
if checkCompiles t
then
IO.println s!"RegexTest: {t}"
IO.println s!" should not compile"
pure (0, 1, 0)
else pure (1, 0, 0)
else if ignoreTest t
then
pure (0, 0, 1)
else
match captures t with
| Except.ok result =>
if result.size = 0
then
if RegexTest.isMatch t
then
IO.println s!"RegexTest({filename}): {t}"
IO.println s!" no match found, expected {t.matches}"
pure (0, 1, 0)
else
pure (1, 0, 0)
else
if checkMatches result t
then
pure (1, 0, 0)
else
IO.println s!"RegexTest({filename}): {t}"
IO.println s!" expected size {t.matches.size}, actual {result.size} "
IO.println s!" match different, expected {t.matches}, actual {capturesToString result}"
pure (0, 1, 0)
| Except.error e =>
IO.println s!"RegexTest{filename}: {t}"
IO.println s!" error {e}"
pure (0, 1, 0)

def testItems (filename : String) (items : Array RegexTest) : IO (Nat × Nat× Nat) := do
items |> Array.foldlM (init := (0, 0, 0)) (fun (succeeds, failures, ignored) RegexTest => do
let (succeed, failure, ignore) ← testItem filename RegexTest
pure (succeeds + succeed, failures + failure, ignore + ignored))

end RegexTest
55 changes: 55 additions & 0 deletions test/Test.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import Regex

import Test.Interval
import Test.Ast
import Test.Hir
import Test.Compiler

import RegexTest
import TomlLoader

open Lean System

open Regex

def test (path : FilePath): IO (Nat × Nat × Nat) := do
let filename : String := path.fileName.getD ""
if #["no-unicode.toml", "regex-lite.toml", "utf8.toml"].contains filename
then pure (0, 0, 0) else
let tests ← Loader.loadToml path
let (succeeds, failures, ignored) ← RegexTest.testItems filename tests
IO.println s!"succeeds {succeeds} failures {failures} ignored {ignored} in file {path}"
pure (succeeds, failures, ignored)

def summary (arr : Array (Nat × Nat × Nat)) : IO UInt32 := do
let (succeeds, failures, ignored) := arr |> Array.foldl
(fun acc v => (acc.1+v.1, acc.2.1+v.2.1, acc.2.2+v.2.2) ) (0, 0, 0)
IO.println s!"succeeds {succeeds} failures {failures} ignored {ignored} total"
pure (if failures > 0 then 1 else 0)

def testAll (path : FilePath): IO UInt32 := do
if ← System.FilePath.isDir path
then
(← System.FilePath.walkDir path)
|> Array.filter (fun f => f.toString.endsWith "toml")
|> Array.mapM (fun file => test file)
|> fun arr => do summary (← arr)
else
IO.println s!"no such directory '{path}'"
pure 1

def main (args : List String): IO UInt32 := do
let exitcode ←
try
match args with
| [] => pure <| ← testAll "testdata"
| ["--toml", path] => pure <| ← summary #[← test path]
| ["--all", path] => pure <| ← testAll path
| _ =>
IO.println s!"usage: Test [--toml <path>] [--all path]"
pure 1
catch e =>
IO.println s!"Error: {e}"
pure 1

pure exitcode
78 changes: 78 additions & 0 deletions test/Test/Ast.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import Regex

open Syntax
open AstItems
open Regex

namespace Test.Ast

private def toString (x : Except String Ast) : String :=
match x with
| Except.ok ast => s!"{ast}"
| Except.error e => s!"Error {e}"

private def astOf'a' : Ast :=
Ast.Literal ⟨String.toSpan "a" 0 1, LiteralKind.Verbatim, 'a'

private def astOf'a?' : Ast :=
Ast.Repetition
(Repetition.mk
(String.toSpan "a?" 0 2)
⟨String.toSpan "a?" 1 2, RepetitionKind.ZeroOrOne⟩
true
(Ast.Literal ⟨String.toSpan "a?" 0 1, LiteralKind.Verbatim, 'a'⟩))

private def astOf'ab' : Ast :=
Ast.Concat
(Concat.mk
(String.toSpan "ab" 0 2)
#[Ast.Literal ⟨String.toSpan "ab" 0 1, LiteralKind.Verbatim, 'a'⟩,
Ast.Literal ⟨String.toSpan "ab" 1 2, LiteralKind.Verbatim, 'b'⟩])

private def «astOf'[a]'» : Ast :=
Ast.ClassBracketed
(ClassBracketed.mk
(String.toSpan "[a]" 0 3)
false
(ClassSet.Item (ClassSetItem.Literal ⟨String.toSpan "[a]" 1 2, LiteralKind.Verbatim, 'a'⟩)))

private def «astOf'[a-b]'» : Ast :=
Ast.ClassBracketed
(ClassBracketed.mk
(String.toSpan "[a-b]" 0 5)
false
(ClassSet.Item (ClassSetItem.Range ⟨
String.toSpan "[a-b]" 1 4,
⟨String.toSpan "[a-b]" 1 2, LiteralKind.Verbatim, 'a'⟩,
⟨String.toSpan "[a-b]" 3 4, LiteralKind.Verbatim, 'b'⟩,
by simp_arith⟩)))

private def «astOf'a|b'» : Ast :=
Ast.Alternation
(Alternation.mk
(String.toSpan "[a|b]" 0 3)
#[Ast.Literal ⟨String.toSpan "a|b" 0 1, LiteralKind.Verbatim, 'a'⟩,
Ast.Literal ⟨String.toSpan "a|b" 2 3, LiteralKind.Verbatim, 'b'⟩])

private def «astOf'(a)'» : Ast :=
Syntax.AstItems.Ast.Group
(Syntax.AstItems.Group.mk
(String.toSpan "(a)" 0 3)
(GroupKind.CaptureIndex 1)
(Ast.Literal ⟨String.toSpan "(a)" 1 2, LiteralKind.Verbatim, 'a'⟩))

example : (parse "a" |> toString) = s!"{astOf'a'}" := by native_decide

example : (parse "a?" |> toString) = s!"{astOf'a?'}" := by native_decide

example : (parse "ab" |> toString) = s!"{astOf'ab'}" := by native_decide

example : (parse "[a]" |> toString) = s!"{«astOf'[a]'»}" := by native_decide

example : (parse "[a-b]" |> toString) = s!"{«astOf'[a-b]'»}" := by native_decide

example : (parse "a|b" |> toString) = s!"{«astOf'a|b'»}" := by native_decide

example : (parse "(a)" |> toString) = s!"{«astOf'(a)'»}" := by native_decide

end Test.Ast
Loading

0 comments on commit 61e54e8

Please sign in to comment.