Skip to content

Commit

Permalink
Merge pull request #9755 from flaviut/nre-missing-match
Browse files Browse the repository at this point in the history
Fix for #9243, nre returns "" instead of nil for missing matches
  • Loading branch information
Araq authored Dec 12, 2018
2 parents c682671 + c0a47f7 commit 3a70d18
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 83 deletions.
19 changes: 19 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,25 @@

- `options.UnpackError` is no longer a ref type and inherits from `System.Defect` instead of `System.ValueError`.

- nre's `RegexMatch.{captureBounds,captures}[]` no longer return `Option` or
`nil`/`""`, respectivly. Use the newly added `n in p.captures` method to
check if a group is captured, otherwise you'll recieve an exception.

- nre's `RegexMatch.{captureBounds,captures}.toTable` no longer accept a
default parameter. Instead uncaptured entries are left empty. Use
`Table.getOrDefault()` if you need defaults.

- nre's `RegexMatch.captures.{items,toSeq}` now returns an `Option[string]`
instead of a `string`. With the removal of `nil` strings, this is the only
way to indicate a missing match. Inside your loops, instead of `capture ==
""` or `capture == nil`, use `capture.isSome` to check if a capture is
present, and `capture.get` to get its value.

- nre's `replace()` no longer throws `ValueError` when the replacement string
has missing captures. It instead throws `KeyError` for named captures, and
`IndexError` for un-named captures. This is consistant with
`RegexMatch.{captureBounds,captures}[]`.

#### Breaking changes in the compiler

- The compiler now implements the "generic symbol prepass" for `when` statements
Expand Down
131 changes: 78 additions & 53 deletions lib/impure/nre.nim
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ runnableExamples:
let hasVowel = firstVowel.isSome()
if hasVowel:
let matchBounds = firstVowel.get().captureBounds[-1]
doAssert matchBounds.get().a == 1
doAssert matchBounds.a == 1


# Type definitions {{{
Expand Down Expand Up @@ -167,14 +167,15 @@ type
## - ``"abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"``
## - ``"abc".match(re"(\w)\w").get.captures[-1] == "ab"``
##
## ``captureBounds[]: Option[HSlice[int, int]]``
## ``captureBounds[]: HSlice[int, int]``
## gets the bounds of the given capture according to the same rules as
## the above. If the capture is not filled, then ``None`` is returned.
## The bounds are both inclusive.
##
## - ``"abc".match(re"(\w)").get.captureBounds[0].get == 0 .. 0``
## - ``"abc".match(re"").get.captureBounds[-1].get == 0 .. -1``
## - ``"abc".match(re"abc").get.captureBounds[-1].get == 0 .. 2``
## - ``"abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0``
## - ``0 in "abc".match(re"(\w)").get.captureBounds == true``
## - ``"abc".match(re"").get.captureBounds[-1] == 0 .. -1``
## - ``"abc".match(re"abc").get.captureBounds[-1] == 0 .. 2``
##
## ``match: string``
## the full text of the match.
Expand Down Expand Up @@ -227,9 +228,10 @@ runnableExamples:
doAssert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"

doAssert "abc".match(re"(\w)").get.captureBounds[0].get == 0 .. 0
doAssert "abc".match(re"").get.captureBounds[-1].get == 0 .. -1
doAssert "abc".match(re"abc").get.captureBounds[-1].get == 0 .. 2
doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
doAssert 0 in "abc".match(re"(\w)").get.captureBounds == true
doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
# }}}

proc getinfo[T](pattern: Regex, opt: cint): T =
Expand Down Expand Up @@ -269,78 +271,99 @@ proc matchesCrLf(pattern: Regex): bool =
# }}}

# Capture accessors {{{
proc captureBounds*(pattern: RegexMatch): CaptureBounds = return CaptureBounds(pattern)
func captureBounds*(pattern: RegexMatch): CaptureBounds = return CaptureBounds(pattern)

proc captures*(pattern: RegexMatch): Captures = return Captures(pattern)
func captures*(pattern: RegexMatch): Captures = return Captures(pattern)

proc `[]`*(pattern: CaptureBounds, i: int): Option[HSlice[int, int]] =
func contains*(pattern: CaptureBounds, i: int): bool =
let pattern = RegexMatch(pattern)
if pattern.pcreMatchBounds[i + 1].a != -1:
let bounds = pattern.pcreMatchBounds[i + 1]
return some(int(bounds.a) .. int(bounds.b-1))
else:
return none(HSlice[int, int])
pattern.pcreMatchBounds[i + 1].a != -1

func contains*(pattern: Captures, i: int): bool =
i in CaptureBounds(pattern)

proc `[]`*(pattern: Captures, i: int): string =
func `[]`*(pattern: CaptureBounds, i: int): HSlice[int, int] =
let pattern = RegexMatch(pattern)
if not (i in pattern.captureBounds):
raise newException(IndexError, "Group '" & $i & "' was not captured")

let bounds = pattern.pcreMatchBounds[i + 1]
int(bounds.a)..int(bounds.b-1)

func `[]`*(pattern: Captures, i: int): string =
let pattern = RegexMatch(pattern)
let bounds = pattern.captureBounds[i]

if bounds.isSome:
let bounds = bounds.get
return pattern.str.substr(bounds.a, bounds.b)
else:
return ""
pattern.str.substr(bounds.a, bounds.b)

proc match*(pattern: RegexMatch): string =
func match*(pattern: RegexMatch): string =
return pattern.captures[-1]

proc matchBounds*(pattern: RegexMatch): HSlice[int, int] =
return pattern.captureBounds[-1].get
func matchBounds*(pattern: RegexMatch): HSlice[int, int] =
return pattern.captureBounds[-1]

func contains*(pattern: CaptureBounds, name: string): bool =
let pattern = RegexMatch(pattern)
let nameToId = pattern.pattern.captureNameToId
if not (name in nameToId):
return false
nameToId[name] in pattern.captureBounds

func contains*(pattern: Captures, name: string): bool =
name in CaptureBounds(pattern)

proc `[]`*(pattern: CaptureBounds, name: string): Option[HSlice[int, int]] =
func checkNamedCaptured(pattern: RegexMatch, name: string): void =
if not (name in pattern.captureBounds):
raise newException(KeyError, "Group '" & name & "' was not captured")

func `[]`*(pattern: CaptureBounds, name: string): HSlice[int, int] =
let pattern = RegexMatch(pattern)
return pattern.captureBounds[pattern.pattern.captureNameToId.fget(name)]
checkNamedCaptured(pattern, name)
pattern.captureBounds[pattern.pattern.captureNameToId[name]]

proc `[]`*(pattern: Captures, name: string): string =
func `[]`*(pattern: Captures, name: string): string =
let pattern = RegexMatch(pattern)
return pattern.captures[pattern.pattern.captureNameToId.fget(name)]
checkNamedCaptured(pattern, name)
return pattern.captures[pattern.pattern.captureNameToId[name]]

template toTableImpl(cond: untyped) {.dirty.} =
template toTableImpl() {.dirty.} =
for key in RegexMatch(pattern).pattern.captureNameId.keys:
let nextVal = pattern[key]
if cond:
result[key] = default
else:
result[key] = nextVal
if key in pattern:
result[key] = pattern[key]

proc toTable*(pattern: Captures, default: string = ""): Table[string, string] =
func toTable*(pattern: Captures): Table[string, string] =
result = initTable[string, string]()
toTableImpl(nextVal.len == 0)
toTableImpl()

proc toTable*(pattern: CaptureBounds, default = none(HSlice[int, int])):
Table[string, Option[HSlice[int, int]]] =
result = initTable[string, Option[HSlice[int, int]]]()
toTableImpl(nextVal.isNone)
func toTable*(pattern: CaptureBounds): Table[string, HSlice[int, int]] =
result = initTable[string, HSlice[int, int]]()
toTableImpl()

template itemsImpl(cond: untyped) {.dirty.} =
template itemsImpl() {.dirty.} =
for i in 0 ..< RegexMatch(pattern).pattern.captureCount:
let nextVal = pattern[i]
# done in this roundabout way to avoid multiple yields (potential code
# bloat)
let nextYieldVal = if cond: default else: nextVal
yield nextYieldVal
let nextYieldVal = if i in pattern:
some(pattern[i])
else:
default

yield nextYieldVal

iterator items*(pattern: CaptureBounds, default = none(HSlice[int, int])): Option[HSlice[int, int]] =
itemsImpl(nextVal.isNone)
iterator items*(pattern: CaptureBounds,
default = none(HSlice[int, int])): Option[HSlice[int, int]] =
itemsImpl()

iterator items*(pattern: Captures, default: string = ""): string =
itemsImpl(nextVal.len == 0)
iterator items*(pattern: Captures,
default: Option[string] = none(string)): Option[string] =
itemsImpl()

proc toSeq*(pattern: CaptureBounds, default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] =
proc toSeq*(pattern: CaptureBounds,
default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] =
accumulateResult(pattern.items(default))

proc toSeq*(pattern: Captures, default: string = ""): seq[string] =
proc toSeq*(pattern: Captures,
default: Option[string] = none(string)): seq[Option[string]] =
accumulateResult(pattern.items(default))

proc `$`*(pattern: RegexMatch): string =
Expand Down Expand Up @@ -652,7 +675,8 @@ proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string]

for cap in match.captures:
# if there are captures, include them in the result
result.add(cap)
if cap.isSome:
result.add(cap.get)

if splits == maxSplit - 1:
break
Expand Down Expand Up @@ -706,7 +730,8 @@ proc replace*(str: string, pattern: Regex,
## - ``$#`` - first capture
## - ``$0`` - full match
##
## If a given capture is missing, a ``ValueError`` exception is thrown.
## If a given capture is missing, ``IndexError`` thrown for un-named captures
## and ``KeyError`` for named captures.
replaceImpl(str, pattern, subproc(match))

proc replace*(str: string, pattern: Regex,
Expand Down
16 changes: 4 additions & 12 deletions lib/impure/nre/private/util.nim
Original file line number Diff line number Diff line change
@@ -1,17 +1,9 @@
## INTERNAL FILE FOR USE ONLY BY nre.nim.
import tables

proc fget*[K, V](self: Table[K, V], key: K): V =
if self.hasKey(key):
return self[key]
else:
raise newException(KeyError, "Key does not exist in table: " & $key)

const Ident = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
const StartIdent = Ident - {'0'..'9'}

template checkNil(arg: string): string = arg

template formatStr*(howExpr, namegetter, idgetter): untyped =
let how = howExpr
var val = newStringOfCap(how.len)
Expand All @@ -28,7 +20,7 @@ template formatStr*(howExpr, namegetter, idgetter): untyped =
i += 2
elif how[i + 1] == '#':
var id {.inject.} = lastNum
val.add(checkNil(idgetter))
val.add(idgetter)
lastNum += 1
i += 2
elif how[i + 1] in {'0'..'9'}:
Expand All @@ -37,23 +29,23 @@ template formatStr*(howExpr, namegetter, idgetter): untyped =
while i < how.len and how[i] in {'0'..'9'}:
id += (id * 10) + (ord(how[i]) - ord('0'))
i += 1
val.add(checkNil(idgetter))
val.add(idgetter)
lastNum = id + 1
elif how[i + 1] in StartIdent:
i += 1
var name {.inject.} = ""
while i < how.len and how[i] in Ident:
name.add(how[i])
i += 1
val.add(checkNil(namegetter))
val.add(namegetter)
elif how[i + 1] == '{':
i += 2
var name {.inject.} = ""
while i < how.len and how[i] != '}':
name.add(how[i])
i += 1
i += 1
val.add(checkNil(namegetter))
val.add(namegetter)
else:
raise newException(Exception, "Syntax error in format string at " & $i)
val
31 changes: 18 additions & 13 deletions tests/stdlib/nre/captures.nim
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,36 @@ suite "captures":
test "capture bounds are correct":
let ex1 = re("([0-9])")
check("1 23".find(ex1).matchBounds == 0 .. 0)
check("1 23".find(ex1).captureBounds[0].get == 0 .. 0)
check("1 23".find(ex1).captureBounds[0] == 0 .. 0)
check("1 23".find(ex1, 1).matchBounds == 2 .. 2)
check("1 23".find(ex1, 3).matchBounds == 3 .. 3)

let ex2 = re("()()()()()()()()()()([0-9])")
check("824".find(ex2).captureBounds[0].get == 0 .. -1)
check("824".find(ex2).captureBounds[10].get == 0 .. 0)
check("824".find(ex2).captureBounds[0] == 0 .. -1)
check("824".find(ex2).captureBounds[10] == 0 .. 0)

let ex3 = re("([0-9]+)")
check("824".find(ex3).captureBounds[0].get == 0 .. 2)
check("824".find(ex3).captureBounds[0] == 0 .. 2)

test "named captures":
let ex1 = "foobar".find(re("(?<foo>foo)(?<bar>bar)"))
check(ex1.captures["foo"] == "foo")
check(ex1.captures["bar"] == "bar")

let ex2 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
check("foo" in ex2.captureBounds)
check(ex2.captures["foo"] == "foo")
check(ex2.captures["bar"] == "")
check(not ("bar" in ex2.captures))
expect KeyError:
discard ex2.captures["bar"]

test "named capture bounds":
let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
check(ex1.captureBounds["foo"] == some(0..2))
check(ex1.captureBounds["bar"] == none(Slice[int]))
check("foo" in ex1.captureBounds)
check(ex1.captureBounds["foo"] == 0..2)
check(not ("bar" in ex1.captures))
expect KeyError:
discard ex1.captures["bar"]

test "capture count":
let ex1 = re("(?<foo>foo)(?<bar>bar)?")
Expand All @@ -41,19 +47,18 @@ suite "captures":

test "named capture table":
let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
check(ex1.captures.toTable == {"foo" : "foo", "bar" : ""}.toTable())
check(ex1.captureBounds.toTable == {"foo" : some(0..2), "bar" : none(Slice[int])}.toTable())
check(ex1.captures.toTable("") == {"foo" : "foo", "bar" : ""}.toTable())
check(ex1.captures.toTable == {"foo" : "foo"}.toTable())
check(ex1.captureBounds.toTable == {"foo" : 0..2}.toTable())

let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
check(ex2.captures.toTable == {"foo" : "foo", "bar" : "bar"}.toTable())

test "capture sequence":
let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
check(ex1.captures.toSeq == @["foo", ""])
check(ex1.captures.toSeq == @[some("foo"), none(string)])
check(ex1.captureBounds.toSeq == @[some(0..2), none(Slice[int])])
check(ex1.captures.toSeq("") == @["foo", ""])
check(ex1.captures.toSeq(some("")) == @[some("foo"), some("")])

let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
check(ex2.captures.toSeq == @["foo", "bar"])
check(ex2.captures.toSeq == @[some("foo"), some("bar")])

6 changes: 3 additions & 3 deletions tests/stdlib/nre/match.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ suite "match":
check("abc".match(re"(\w)").captures[0] == "a")
check("abc".match(re"(?<letter>\w)").captures["letter"] == "a")
check("abc".match(re"(\w)\w").captures[-1] == "ab")
check("abc".match(re"(\w)").captureBounds[0].get == 0 .. 0)
check("abc".match(re"").captureBounds[-1].get == 0 .. -1)
check("abc".match(re"abc").captureBounds[-1].get == 0 .. 2)
check("abc".match(re"(\w)").captureBounds[0] == 0 .. 0)
check("abc".match(re"").captureBounds[-1] == 0 .. -1)
check("abc".match(re"abc").captureBounds[-1] == 0 .. 2)

test "match test cases":
check("123".match(re"").matchBounds == 0 .. -1)
6 changes: 4 additions & 2 deletions tests/stdlib/nre/replace.nim
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,7 @@ suite "replace":
check("123".replace(re"(?<foo>\d)(\d)", "${foo}$#$#") == "1123")

test "replacing missing captures should throw instead of segfaulting":
discard "ab".replace(re"(a)|(b)", "$1$2")
discard "b".replace(re"(a)?(b)", "$1$2")
expect IndexError: discard "ab".replace(re"(a)|(b)", "$1$2")
expect IndexError: discard "b".replace(re"(a)?(b)", "$1$2")
expect KeyError: discard "b".replace(re"(a)?", "${foo}")
expect KeyError: discard "b".replace(re"(?<foo>a)?", "${foo}")

0 comments on commit 3a70d18

Please sign in to comment.