Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add *(::Union{Regex, AbstractString, AbstractChar}...) #23422

Merged
merged 10 commits into from
Apr 29, 2019
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ New library functions
Standard library changes
------------------------

* Cmd interpolation (``` `$(x::Cmd) a b c` ``` where) now propagates `x`'s process flags (environment, flags, working directory, etc) if `x` is the first interpolant and errors otherwise ([#24353]).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bad rebase?

Copy link
Member

@StefanKarpinski StefanKarpinski Apr 29, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Fixed in #31874.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooops, sorry about that!

* `Regex` can now be multiplied (`*`) and exponentiated (`^`), like strings ([#23422]).

#### LinearAlgebra

Expand Down
104 changes: 104 additions & 0 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -517,3 +517,107 @@ function hash(r::Regex, h::UInt)
h = hash(r.compile_options, h)
h = hash(r.match_options, h)
end

## String operations ##

"""
*(s::Regex, t::Union{Regex,AbstractString,AbstractChar}) -> Regex
*(s::Union{Regex,AbstractString,AbstractChar}, t::Regex) -> Regex

Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref).
String and character arguments must be matched exactly in the resulting regex,
meaning that the contained characters are devoid of any special meaning
(they are quoted with "\\Q" and "\\E").

!!! compat "Julia 1.3"
This method requires at least Julia 1.3.

# Examples
```jldoctest
julia> match(r"Hello|Good bye" * ' ' * "world", "Hello world")
RegexMatch("Hello world")

julia> r = r"a|b" * "c|d"
r"(?:a|b)\\Qc|d\\E"

julia> match(r, "ac") == nothing
true

julia> match(r, "ac|d")
RegexMatch("ac|d")
```
"""
function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,AbstractString,AbstractChar}...)
mask = PCRE.CASELESS | PCRE.MULTILINE | PCRE.DOTALL | PCRE.EXTENDED # imsx
match_opts = nothing # all args must agree on this
compile_opts = nothing # all args must agree on this
shared = mask
for r in (r1, rs...)
r isa Regex || continue
if match_opts == nothing
match_opts = r.match_options
compile_opts = r.compile_options & ~mask
else
r.match_options == match_opts &&
r.compile_options & ~mask == compile_opts ||
throw(ArgumentError("cannot multiply regexes: incompatible options"))
end
shared &= r.compile_options
end
unshared = mask & ~shared
Regex(string(wrap_string(r1, unshared), wrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts)
end

*(r::Regex) = r # avoids wrapping r in a useless subpattern

wrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')')
# if s contains raw"\E", split '\' and 'E' within two distinct \Q...\E groups:
wrap_string(s::AbstractString, ::UInt32) = string("\\Q", replace(s, raw"\E" => raw"\\E\QE"), "\\E")
wrap_string(s::AbstractChar, ::UInt32) = string("\\Q", s, "\\E")

regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts]

# UInt32 to String mapping for some compile options
const _regex_opts_str = Ref{ImmutableDict{UInt32,String}}()

init_regex() = _regex_opts_str[] = foldl(0:15, init=ImmutableDict{UInt32,String}()) do d, o
opt = UInt32(0)
str = ""
if o & 1 != 0
opt |= PCRE.CASELESS
str *= 'i'
end
if o & 2 != 0
opt |= PCRE.MULTILINE
str *= 'm'
end
if o & 4 != 0
opt |= PCRE.DOTALL
str *= 's'
end
if o & 8 != 0
opt |= PCRE.EXTENDED
str *= 'x'
end
ImmutableDict(d, opt => str)
end


"""
^(s::Regex, n::Integer)

Repeat a regex `n` times.

!!! compat "Julia 1.3"
This method requires at least Julia 1.3.

# Examples
```jldoctest
julia> r"Test "^2
r"(?:Test ){2}"

julia> match(r"Test "^2, "Test Test ")
RegexMatch("Test Test ")
```
"""
^(r::Regex, i::Integer) = Regex(string("(?:", r.pattern, "){$i}"), r.compile_options, r.match_options)
6 changes: 3 additions & 3 deletions stdlib/REPL/src/REPL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ function setup_interface(
oldpos = firstindex(input)
firstline = true
isprompt_paste = false
jl_prompt_len = 7 # "julia> "
while oldpos <= lastindex(input) # loop until all lines have been executed
if JL_PROMPT_PASTE[]
# Check if the next statement starts with "julia> ", in that case
Expand All @@ -934,7 +935,6 @@ function setup_interface(
oldpos >= sizeof(input) && return
end
# Check if input line starts with "julia> ", remove it if we are in prompt paste mode
jl_prompt_len = 7
if (firstline || isprompt_paste) && startswith(SubString(input, oldpos), JULIA_PROMPT)
isprompt_paste = true
oldpos += jl_prompt_len
Expand All @@ -959,7 +959,7 @@ function setup_interface(
tail = lstrip(tail)
end
if isprompt_paste # remove indentation spaces corresponding to the prompt
tail = replace(tail, r"^ {7}"m => "") # 7: jl_prompt_len
tail = replace(tail, r"^"m * ' '^jl_prompt_len => "")
end
LineEdit.replace_line(s, tail, true)
LineEdit.refresh_line(s)
Expand All @@ -969,7 +969,7 @@ function setup_interface(
line = strip(input[oldpos:prevind(input, pos)])
if !isempty(line)
if isprompt_paste # remove indentation spaces corresponding to the prompt
line = replace(line, r"^ {7}"m => "") # 7: jl_prompt_len
line = replace(line, r"^"m * ' '^jl_prompt_len => "")
end
# put the line on the screen and history
LineEdit.replace_line(s, line)
Expand Down
40 changes: 40 additions & 0 deletions test/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,46 @@
@test !endswith("abc", r"C")
@test endswith("abc", r"C"i)

@testset "multiplication & exponentiation" begin
@test *(r"a") == r"a"

@test r"a" * r"b" == r"(?:a)(?:b)"
@test r"a" * "b" == r"(?:a)\Qb\E"
@test r"a" * 'b' == r"(?:a)\Qb\E"
@test "a" * r"b" == r"\Qa\E(?:b)"
@test 'a' * r"b" == r"\Qa\E(?:b)"
for a = (r"a", "a", 'a'),
b = (r"b", "b", 'b'),
c = (r"c", "c", 'c')
a isa Regex || b isa Regex || c isa Regex || continue
@test match(a * b * c, "abc") !== nothing
end
for s = ["thiscat", "thishat", "thatcat", "thathat"]
@test match(r"this|that" * r"cat|hat", s) !== nothing
end

@test r"a"i * r"b"i == r"(?:a)(?:b)"i
@test r"a"i * "b" == r"(?:a)\Qb\E"i
@test r"a"i * 'b' == r"(?:a)\Qb\E"i
@test "a" * r"b"i == r"\Qa\E(?:b)"i
@test 'a' * r"b"i == r"\Qa\E(?:b)"i

@test r"a"i * r"b"m == r"(?i:a)(?m:b)"
@test r"a"im * r"b"m == r"(?i:a)(?:b)"m
@test r"a"im * r"b"im == r"(?:a)(?:b)"im
@test r"a"im * r"b"i == r"(?m:a)(?:b)"i

r = r"" * raw"a\Eb|c"
@test match(r, raw"a\Eb|c").match == raw"a\Eb|c"
@test match(r, raw"c") == nothing

# error for really incompatible options
@test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS)
@test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS, Base.DEFAULT_MATCH_OPTS & ~Base.PCRE.NO_UTF_CHECK)

@test r"this|that"^2 == r"(?:this|that){2}"
end

# Test that PCRE throws the correct kind of error
# TODO: Uncomment this once the corresponding change has propagated to CI
#@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
Expand Down