Skip to content

Commit

Permalink
code revision after comments
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Oct 16, 2017
1 parent 437c9c2 commit c24f155
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 16 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ Breaking changes

This section lists changes that do not have deprecation warnings.

* `idx` argument in `ismatch(r::Regex, s::AbstractString[, idx::Integer])` now
specifies an index at which to start the search. Previously it was undocumented and
was interpreted as an offset from the start of the string where the search should start.

* `getindex(s::String, r::UnitRange{Int})` now throws `UnicodeError` if `last(r)`
is not a valid index into `s` ([#22572]).

Expand Down
35 changes: 20 additions & 15 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,10 @@ end
getindex(m::RegexMatch, name::AbstractString) = m[Symbol(name)]

"""
ismatch(r::Regex, s::AbstractString) -> Bool
ismatch(r::Regex, s::AbstractString[, idx::Integer]) -> Bool
Test whether a string contains a match of the given regular expression.
The optional `idx` argument specifies an index at which to start the search.
# Examples
```jldoctest
Expand All @@ -154,22 +155,25 @@ r"a.a"
julia> ismatch(rx, "aba")
true
julia> ismatch(rx, "aba", 2)
true
julia> ismatch(rx, "abba")
false
julia> rx("aba")
true
```
"""
function ismatch(r::Regex, s::AbstractString, offset::Integer=0)
function ismatch(r::Regex, s::AbstractString, idx::Integer=start(s))
compile(r)
return PCRE.exec(r.regex, String(s), offset, r.match_options,
return PCRE.exec(r.regex, String(s), Csize_t(idx-1), r.match_options,
r.match_data)
end

function ismatch(r::Regex, s::SubString{String}, offset::Integer=0)
function ismatch(r::Regex, s::SubString{String}, idx::Integer=1)
compile(r)
return PCRE.exec(r.regex, s, offset, r.match_options,
return PCRE.exec(r.regex, s, Csize_t(idx-1), r.match_options,
r.match_data)
end

Expand All @@ -181,7 +185,7 @@ end
Search for the first match of the regular expression `r` in `s` and return a `RegexMatch`
object containing the match, or nothing if the match failed. The matching substring can be
retrieved by accessing `m.match` and the captured sequences can be retrieved by accessing
`m.captures` The optional `idx` argument specifies an index at which to start the search.
`m.captures`. The optional `idx` argument specifies an index at which to start the search.
# Examples
```jldoctest
Expand All @@ -208,7 +212,7 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
add_opts::UInt32=UInt32(0))
compile(re)
opts = re.match_options | add_opts
if !PCRE.exec(re.regex, str, idx-1, opts, re.match_data)
if !PCRE.exec(re.regex, str, Csize_t(idx-1), opts, re.match_data)
return nothing
end
ovec = re.ovec
Expand All @@ -223,7 +227,8 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
RegexMatch(mat, cap, ovec[1]+1, off, re)
end

match(r::Regex, s::AbstractString, i::Integer=start(s)) = match(r, String(s), i)
match(r::Regex, s::AbstractString) = match(r, s, start(s))
match(r::Regex, s::AbstractString, i::Integer) = match(r, String(s), i)

"""
matchall(r::Regex, s::AbstractString[, overlap::Bool=false]) -> Vector{AbstractString}
Expand Down Expand Up @@ -252,7 +257,7 @@ function matchall(re::Regex, str::Union{SubString{String}, String}, overlap::Boo
regex = compile(re).regex
n = sizeof(str)
matches = SubString{String}[]
offset = UInt32(0)
offset = Csize_t(0)
opts = re.match_options
opts_nonempty = opts | PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART
prevempty = false
Expand All @@ -261,7 +266,7 @@ function matchall(re::Regex, str::Union{SubString{String}, String}, overlap::Boo
result = PCRE.exec(regex, str, offset, prevempty ? opts_nonempty : opts, re.match_data)
if !result
if prevempty && offset < n
offset = UInt32(nextind(str, offset + 1) - 1)
offset = Csize_t(nextind(str, offset + 1) - 1)
prevempty = false
continue
else
Expand All @@ -273,7 +278,7 @@ function matchall(re::Regex, str::Union{SubString{String}, String}, overlap::Boo
prevempty = offset == ovec[2]
if overlap
if !prevempty
offset = UInt32(ovec[1]+1)
offset = Csize_t(ovec[1]+1)
end
else
offset = ovec[2]
Expand All @@ -286,13 +291,13 @@ matchall(re::Regex, str::AbstractString, overlap::Bool=false) =
matchall(re, String(str), overlap)

function search(str::Union{String,SubString{String}}, re::Regex, idx::Integer)
start(str) <= idx <= sizeof(str) || throw(BoundsError(str, idx))
@inbounds b = codeunit(str, idx))
is_valid_continuation(b) && throw(UnicodeError(UTF_ERR_INVALID_INDEX, idx, b))
if idx > nextind(str,endof(str))
throw(BoundsError())
end

opts = re.match_options
compile(re)
PCRE.exec(re.regex, str, idx-1, opts, re.match_data) ?
PCRE.exec(re.regex, str, Csize_t(idx-1), opts, re.match_data) ?
((Int(re.ovec[1])+1):prevind(str,Int(re.ovec[2])+1)) : (0:-1)
end
search(s::AbstractString, r::Regex, idx::Integer) = search(String(s), r, idx)
Expand Down
2 changes: 1 addition & 1 deletion test/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ show(buf, r"")
@test ismatch(Regex("^a\0b\$"), "a\0b")

@test search("", r"ABC") == 0:-1
@test_throws ArgumentError search("", r"ABC", 1)
@test search("", r"ABC", 1) == 0:-1
@test search("_ABC_", r"A.C") == 2:4
@test search("_ABC_", r"A.C") == 0:-1

Expand Down

0 comments on commit c24f155

Please sign in to comment.