Skip to content

Commit

Permalink
close nim-lang#14284 document semantics for start for re,nre; improve…
Browse files Browse the repository at this point in the history
… examples (nim-lang#14483)
  • Loading branch information
timotheecour authored May 28, 2020
1 parent fe7a2d6 commit 17d08ff
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 196 deletions.
28 changes: 17 additions & 11 deletions lib/impure/nre.nim
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ runnableExamples:
let matchBounds = firstVowel.get().captureBounds[-1]
doAssert matchBounds.a == 1

## as with module `re`, unless specified otherwise, `start` parameter in each
## proc indicates where the scan starts, but outputs are relative to the start
## of the input string, not to `start`:
doAssert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
doAssert find("uxabc", re"ab", start = 3).isNone

from pcre import nil
import nre/private/util
import tables
Expand Down Expand Up @@ -222,15 +228,15 @@ type
## code.

runnableExamples:
# This MUST be kept in sync with the examples in RegexMatch
doAssert "abc".match(re"(\w)").get.captures[0] == "a"
doAssert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
# This MUST be kept in sync with the examples in RegexMatch
doAssert "abc".match(re"(\w)").get.captures[0] == "a"
doAssert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"

doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
doAssert 0 in "abc".match(re"(\w)").get.captureBounds == true
doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
doAssert 0 in "abc".match(re"(\w)").get.captureBounds == true
doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2


proc destroyRegex(pattern: Regex) =
Expand Down Expand Up @@ -614,9 +620,9 @@ proc contains*(str: string, pattern: Regex, start = 0, endpos = int.high): bool
## This function is equivalent to ``isSome(str.find(pattern, start, endpos))``.
##
runnableExamples:
doAssert "abc".contains(re"bc") == true
doAssert "abc".contains(re"cd") == false
doAssert "abc".contains(re"a", start = 1) == false
doAssert "abc".contains(re"bc")
doAssert not "abc".contains(re"cd")
doAssert not "abc".contains(re"a", start = 1)

return isSome(str.find(pattern, start, endpos))

Expand Down
235 changes: 50 additions & 185 deletions lib/impure/re.nim
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ when defined(js):
## .. include:: ../../doc/regexprs.txt
##

runnableExamples:
## Unless specified otherwise, `start` parameter in each proc indicates
## where the scan starts, but outputs are relative to the start of the input
## string, not to `start`:
doAssert find("uxabc", re"(?<=x|y)ab", start = 1) == 2 # lookbehind assertion
doAssert find("uxabc", re"ab", start = 3) == -1 # we're past `start` => not found
doAssert not match("xabc", re"^abc$", start = 1)
# can't match start of string since we're starting at 1

import
pcre, strutils, rtarrays

Expand Down Expand Up @@ -209,11 +218,8 @@ proc findBounds*(s: string, pattern: Regex,
## If it does not match, ``(-1,0)`` is returned.
##
## Note: there is a speed improvement if the matches do not need to be captured.
##
## Example:
##
## .. code-block:: nim
## assert findBounds("01234abc89", re"abc") == (5,7)
runnableExamples:
assert findBounds("01234abc89", re"abc") == (5,7)
result = findBounds(cstring(s), pattern, start, s.len)

proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int, flags: cint): cint =
Expand Down Expand Up @@ -244,12 +250,10 @@ proc matchLen*(s: string, pattern: Regex, start = 0): int {.inline.} =
## if there is no match, ``-1`` is returned. Note that a match length
## of zero can happen.
##
## Example:
##
## .. code-block:: nim
## echo matchLen("abcdefg", re"cde", 2) # => 3
## echo matchLen("abcdefg", re"abcde") # => 5
## echo matchLen("abcdefg", re"cde") # => -1
runnableExamples:
doAssert matchLen("abcdefg", re"cde", 2) == 3
doAssert matchLen("abcdefg", re"abcde") == 5
doAssert matchLen("abcdefg", re"cde") == -1
result = matchOrFind(cstring(s), pattern, start.cint, s.len.cint, pcre.ANCHORED)

proc matchLen*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int {.inline.} =
Expand All @@ -269,13 +273,11 @@ proc match*(s: string, pattern: Regex, matches: var openArray[string],
## match, nothing is written into ``matches`` and ``false`` is
## returned.
##
## Example:
##
## .. code-block:: nim
## var matches: array[2, string]
## if match("abcdefg", re"c(d)ef(g)", matches, 2):
## for s in matches:
## echo s # => d g
runnableExamples:
import sequtils
var matches: array[2, string]
if match("abcdefg", re"c(d)ef(g)", matches, 2):
doAssert toSeq(matches) == @["d", "g"]
result = matchLen(cstring(s), pattern, matches, start, s.len) != -1

proc match*(buf: cstring, pattern: Regex, matches: var openArray[string],
Expand Down Expand Up @@ -327,14 +329,15 @@ proc find*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int =

proc find*(s: string, pattern: Regex, start = 0): int {.inline.} =
## returns the starting position of ``pattern`` in ``s``. If it does not
## match, ``-1`` is returned.
##
## Example:
##
## .. code-block:: nim
## echo find("abcdefg", re"cde") # => 2
## echo find("abcdefg", re"abc") # => 0
## echo find("abcdefg", re"zz") # => -1
## match, ``-1`` is returned. We start the scan at `start`.
runnableExamples:
doAssert find("abcdefg", re"cde") == 2
doAssert find("abcdefg", re"abc") == 0
doAssert find("abcdefg", re"zz") == -1 # not found
doAssert find("abcdefg", re"cde", start = 2) == 2 # still 2
doAssert find("abcdefg", re"cde", start = 3) == -1 # we're past the start position
doAssert find("xabc", re"(?<=x|y)abc", start = 1) == 1
# lookbehind assertion `(?<=x|y)` can look behind `start`
result = find(cstring(s), pattern, start, s.len)

iterator findAll*(s: string, pattern: Regex, start = 0): string =
Expand Down Expand Up @@ -389,21 +392,17 @@ when not defined(nimhygiene):
template `=~` *(s: string, pattern: Regex): untyped =
## This calls ``match`` with an implicit declared ``matches`` array that
## can be used in the scope of the ``=~`` call:
##
## .. code-block:: nim
##
## if line =~ re"\s*(\w+)\s*\=\s*(\w+)":
## # matches a key=value pair:
## echo("Key: ", matches[0])
## echo("Value: ", matches[1])
## elif line =~ re"\s*(\#.*)":
## # matches a comment
## # note that the implicit ``matches`` array is different from the
## # ``matches`` array of the first branch
## echo("comment: ", matches[0])
## else:
## echo("syntax error")
##
runnableExamples:
proc parse(line: string): string =
if line =~ re"\s*(\w+)\s*\=\s*(\w+)": # matches a key=value pair:
result = $(matches[0], matches[1])
elif line =~ re"\s*(\#.*)": # matches a comment
# note that the implicit ``matches`` array is different from 1st branch
result = $(matches[0],)
else: doAssert false
doAssert not declared(matches)
doAssert parse("NAME = LENA") == """("NAME", "LENA")"""
doAssert parse(" # comment ... ") == """("# comment ... ",)"""
bind MaxSubpatterns
when not declaredInScope(matches):
var matches {.inject.}: array[MaxSubpatterns, string]
Expand Down Expand Up @@ -432,17 +431,9 @@ proc endsWith*(s: string, suffix: Regex): bool {.inline.} =
proc replace*(s: string, sub: Regex, by = ""): string =
## Replaces ``sub`` in ``s`` by the string ``by``. Captures cannot be
## accessed in ``by``.
##
## Example:
##
## .. code-block:: nim
## "var1=key; var2=key2".replace(re"(\w+)=(\w+)")
##
## Results in:
##
## .. code-block:: nim
##
## "; "
runnableExamples:
doAssert "var1=key; var2=key2".replace(re"(\w+)=(\w+)") == "; "
doAssert "var1=key; var2=key2".replace(re"(\w+)=(\w+)", "?") == "?; ?"
result = ""
var prev = 0
while prev < s.len:
Expand All @@ -457,17 +448,9 @@ proc replace*(s: string, sub: Regex, by = ""): string =
proc replacef*(s: string, sub: Regex, by: string): string =
## Replaces ``sub`` in ``s`` by the string ``by``. Captures can be accessed in ``by``
## with the notation ``$i`` and ``$#`` (see strutils.\`%\`).
##
## Example:
##
## .. code-block:: nim
## "var1=key; var2=key2".replacef(re"(\w+)=(\w+)", "$1<-$2$2")
##
## Results in:
##
## .. code-block:: nim
##
## "var1<-keykey; var2<-key2key2"
runnableExamples:
doAssert "var1=key; var2=key2".replacef(re"(\w+)=(\w+)", "$1<-$2$2") ==
"var1<-keykey; var2<-key2key2"
result = ""
var caps: array[MaxSubpatterns, string]
var prev = 0
Expand Down Expand Up @@ -520,23 +503,10 @@ iterator split*(s: string, sep: Regex; maxsplit = -1): string =
##
## Substrings are separated by the regular expression ``sep``
## (and the portion matched by ``sep`` is not returned).
##
## Example:
##
## .. code-block:: nim
## for word in split("00232this02939is39an22example111", re"\d+"):
## writeLine(stdout, word)
##
## Results in:
##
## .. code-block:: nim
## ""
## "this"
## "is"
## "an"
## "example"
## ""
##
runnableExamples:
import sequtils
doAssert toSeq(split("00232this02939is39an22example111", re"\d+")) ==
@["", "this", "is", "an", "example", ""]
var last = 0
var splits = maxsplit
var x: int
Expand Down Expand Up @@ -576,108 +546,3 @@ proc escapeRe*(s: string): string =
else:
result.add("\\x")
result.add(toHex(ord(c), 2))

when isMainModule:
doAssert match("(a b c)", rex"\( .* \)")
doAssert match("WHiLe", re("while", {reIgnoreCase}))

doAssert "0158787".match(re"\d+")
doAssert "ABC 0232".match(re"\w+\s+\d+")
doAssert "ABC".match(rex"\d+ | \w+")

{.push warnings:off.}
doAssert matchLen("key", re"\b[a-zA-Z_]+[a-zA-Z_0-9]*\b") == 3
{.pop.}

var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+"
doAssert matchLen("key1= cal9", pattern) == 11

doAssert find("_____abc_______", re"abc") == 5
doAssert findBounds("_____abc_______", re"abc") == (5,7)

var matches: array[6, string]
if match("abcdefg", re"c(d)ef(g)", matches, 2):
doAssert matches[0] == "d"
doAssert matches[1] == "g"
else:
doAssert false

if "abc" =~ re"(a)bcxyz|(\w+)":
doAssert matches[1] == "abc"
else:
doAssert false

if "abc" =~ re"(cba)?.*":
doAssert matches[0] == ""
else: doAssert false

if "abc" =~ re"().*":
doAssert matches[0] == ""
else: doAssert false

doAssert "var1=key; var2=key2".endsWith(re"\w+=\w+")
doAssert("var1=key; var2=key2".replacef(re"(\w+)=(\w+)", "$1<-$2$2") ==
"var1<-keykey; var2<-key2key2")
doAssert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") ==
"$1<-$2$2; $1<-$2$2")

var accum: seq[string] = @[]
for word in split("00232this02939is39an22example111", re"\d+"):
accum.add(word)
doAssert(accum == @["", "this", "is", "an", "example", ""])

accum = @[]
for word in split("00232this02939is39an22example111", re"\d+", maxsplit=2):
accum.add(word)
doAssert(accum == @["", "this", "is39an22example111"])

accum = @[]
for word in split("AAA : : BBB", re"\s*:\s*"):
accum.add(word)
doAssert(accum == @["AAA", "", "BBB"])

doAssert(split("abc", re"") == @["a", "b", "c"])
doAssert(split("", re"") == @[])

doAssert(split("a;b;c", re";") == @["a", "b", "c"])
doAssert(split(";a;b;c", re";") == @["", "a", "b", "c"])
doAssert(split(";a;b;c;", re";") == @["", "a", "b", "c", ""])
doAssert(split("a;b;c;", re";") == @["a", "b", "c", ""])
doAssert(split("00232this02939is39an22example111", re"\d+", maxsplit=2) == @["", "this", "is39an22example111"])


for x in findAll("abcdef", re"^{.}", 3):
doAssert x == "d"
accum = @[]
for x in findAll("abcdef", re".", 3):
accum.add(x)
doAssert(accum == @["d", "e", "f"])

doAssert("XYZ".find(re"^\d*") == 0)
doAssert("XYZ".match(re"^\d*") == true)

block:
var matches: array[16, string]
if match("abcdefghijklmnop", re"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)(o)(p)", matches):
for i in 0..matches.high:
doAssert matches[i] == $chr(i + 'a'.ord)
else:
doAssert false

block: # Buffer based RE
var cs: cstring = "_____abc_______"
doAssert(cs.find(re"abc", bufSize=15) == 5)
doAssert(cs.matchLen(re"_*abc", bufSize=15) == 8)
doAssert(cs.matchLen(re"abc", start=5, bufSize=15) == 3)
doAssert(cs.matchLen(re"abc", start=5, bufSize=7) == -1)
doAssert(cs.matchLen(re"abc_*", start=5, bufSize=10) == 5)
var accum: seq[string] = @[]
for x in cs.findAll(re"[a-z]", start=3, bufSize=15):
accum.add($x)
doAssert(accum == @["a","b","c"])

block:
# bug #9306
doAssert replace("bar", re"^", "foo") == "foobar"
doAssert replace("foo", re"", "-") == "-foo"
doAssert replace("foo", re"$", "bar") == "foobar"
Loading

0 comments on commit 17d08ff

Please sign in to comment.