Skip to content

Commit

Permalink
NFA macro + better NFA matcher (#58)
Browse files Browse the repository at this point in the history
* NFA macro
* better NFA matcher
* benchmarks
  • Loading branch information
nitely authored Apr 4, 2020
1 parent 2d96bab commit 0570009
Show file tree
Hide file tree
Showing 32 changed files with 214,091 additions and 3,598 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
nimcache/
src/regex
src/regex.js
tests/tests
tests/tests.js
docs/ugh
bin/*
bench/bench
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
services:
- docker
env:
- NIM=0.19.0
- NIM=0.19.6
- NIM=0.20.0
- NIM=0.20.2
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
v0.14
==================

* Drop Nim 0.19.0 support (0.19.6 is supported)

v0.13.1
==================

Expand Down
16 changes: 16 additions & 0 deletions bench/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## Benchmarks

Run first:

```
nimble install nimbench
nimble develop
```

Run benchmarks:

```
nim c -r -d:release bench/bench.nim
```

> Try -d:danger as well, but release is what most users will set
112 changes: 112 additions & 0 deletions bench/bench.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import nimbench
import unicode
from re import nil
from regex import nil

var text = ""
for _ in 0 .. 100000:
text.add("a")
text.add("sol")
for _ in 0 .. 100000:
text.add("b")
#text.add("ฅ")

var pattern2 = re.re"^\w*sol\w*$"

bench(re_sol, m):
var d: bool
for i in 0 ..< m:
d = re.match(text, pattern2)
doNotOptimizeAway(d)

const pattern4 = regex.re(r"\w*sol\w*") #, {regex.RegexFlag.reAscii})

benchRelative(regex_sol, m):
var m2: regex.RegexMatch
for i in 0 ..< m:
discard regex.match(text, pattern4, m2)
doNotOptimizeAway(m2)

var dummyTextNums = """650-253-0001"""

var pattern_nums = re.re"^[0-9]+-[0-9]+-[0-9]+$"

bench(re_nums, m):
var d: bool
for i in 0 ..< m:
d = re.match(dummyTextNums, pattern_nums)
doNotOptimizeAway(d)

const n_pattern_nums = regex.re"[0-9]+-[0-9]+-[0-9]+"

benchRelative(regex_nums, m):
var m2: regex.RegexMatch
for i in 0 ..< m:
discard regex.match(dummyTextNums, n_pattern_nums, m2)
doNotOptimizeAway(m2)

var pattern_nums2 = re.re"^[0-9]+..*$"

bench(re_nums2, m):
var d: bool
for i in 0 ..< m:
d = re.match(dummyTextNums, pattern_nums2)
doNotOptimizeAway(d)

const n_pattern_nums2 = regex.re"[0-9]+..*"

benchRelative(regex_nums2, m):
var m3: regex.RegexMatch
for i in 0 ..< m:
discard regex.match(dummyTextNums, n_pattern_nums2, m3)
doNotOptimizeAway(m3)

var lits_find_re = re.re"do|re|mi|fa|sol"

bench(re_lits_find, m):
var d: int
for i in 0 ..< m:
d = re.find(text, lits_find_re)
doNotOptimizeAway(d)

const lits_find = regex.re"do|re|mi|fa|sol"

benchRelative(regex_lits_find, m):
var m2: regex.RegexMatch
for i in 0 ..< m:
discard regex.find(text, lits_find, m2)
doNotOptimizeAway(m2)

const bench_text = staticRead("input-text.txt")

var email_find_all_re = re.re"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"

bench(re_email_find_all, m):
var d = 0
for i in 0 ..< m:
for _ in re.findAll(bench_text, email_find_all_re):
d += 1
doAssert d == 92
doNotOptimizeAway(d)

const email_find_all = regex.re"(?-u)[\w\.+-]+@[\w\.-]+\.[\w\.-]+"

benchRelative(email_find_all, m):
var d = 0
for i in 0 ..< m:
for _ in regex.findAll(bench_text, email_find_all):
d += 1
doAssert d == 92
doNotOptimizeAway(d)

when false:
bench(runes, m):
for i in text.runes:
memoryClobber()

bench(dummy, m):
for i in 0 ..< m:
memoryClobber()

when isMainModule:
runBenchmarks()
Loading

0 comments on commit 0570009

Please sign in to comment.