From a9b756eaf85676866971dd9bb0499d6a77cc0d85 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Wed, 23 Aug 2017 22:49:50 +0200 Subject: [PATCH 01/10] add *(::Union{Regex, AbstractString, Char}...) --- NEWS.md | 2 +- base/regex.jl | 46 +++++++++++++++++++++++++++++++++++++++++ stdlib/REPL/src/REPL.jl | 6 +++--- test/regex.jl | 23 +++++++++++++++++++++ 4 files changed, 73 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index e5d3619986d08..74232e692af2d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,7 +24,7 @@ New library functions Standard library changes ------------------------ -* Cmd interpolation (``` `$(x::Cmd) a b c` ``` where) now propagates `x`'s process flags (environment, flags, working directory, etc) if `x` is the first interpolant and errors otherwise ([#24353]). +* `Regex` can now be multiplied (`*`) and exponentiated (`^`), like strings ([#23422]). #### LinearAlgebra diff --git a/base/regex.jl b/base/regex.jl index 3bee6dbd64947..24614fb976fa0 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -517,3 +517,49 @@ function hash(r::Regex, h::UInt) h = hash(r.compile_options, h) h = hash(r.match_options, h) end + +## String operations ## + +unwrap_string(r::Regex) = r.pattern +unwrap_string(s::Union{AbstractString,AbstractChar}) = s + +""" + *(s::Regex, t::Union{Regex,AbstractString,AbstractChar}) -> Regex + *(s::Union{Regex,AbstractString,AbstractChar}, t::Regex) -> Regex + +Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref). + +!!! compat "Julia 1.2" + This method requires at least Julia 1.2. + +# Examples +```jldoctest +julia> r"Hello " * "world" +r"Hello world" + +julia> 'j' * r"ulia" +r"julia" +``` +""" +function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,AbstractString,AbstractChar}...) + opts = unique((r.compile_options, r.match_options) for r in (r1, rs...) if r isa Regex) + length(opts) == 1 || + throw(ArgumentError("cannot multiply regexes with incompatible options")) + Regex(string(unwrap_string(r1), unwrap_string.(rs)...), opts[1][1], opts[1][2]) +end + +""" + ^(s::Regex, n::Integer) + +Repeat a regex `n` times. + +!!! compat "Julia 1.2" + This method requires at least Julia 1.2. + +# Examples +```jldoctest +julia> r"Test "^3 +r"Test Test Test " +``` +""" +^(r::Regex, i::Integer) = Regex(r.pattern^i, r.compile_options, r.match_options) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index f119ab3f8d7c4..0fc71c88d285d 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -925,6 +925,7 @@ function setup_interface( oldpos = firstindex(input) firstline = true isprompt_paste = false + jl_prompt_len = 7 # "julia> " while oldpos <= lastindex(input) # loop until all lines have been executed if JL_PROMPT_PASTE[] # Check if the next statement starts with "julia> ", in that case @@ -934,7 +935,6 @@ function setup_interface( oldpos >= sizeof(input) && return end # Check if input line starts with "julia> ", remove it if we are in prompt paste mode - jl_prompt_len = 7 if (firstline || isprompt_paste) && startswith(SubString(input, oldpos), JULIA_PROMPT) isprompt_paste = true oldpos += jl_prompt_len @@ -959,7 +959,7 @@ function setup_interface( tail = lstrip(tail) end if isprompt_paste # remove indentation spaces corresponding to the prompt - tail = replace(tail, r"^ {7}"m => "") # 7: jl_prompt_len + tail = replace(tail, r"^"m * ' '^jl_prompt_len => "") end LineEdit.replace_line(s, tail, true) LineEdit.refresh_line(s) @@ -969,7 +969,7 @@ function setup_interface( line = strip(input[oldpos:prevind(input, pos)]) if !isempty(line) if isprompt_paste # remove indentation spaces corresponding to the prompt - line = replace(line, r"^ {7}"m => "") # 7: jl_prompt_len + line = replace(line, r"^"m * ' '^jl_prompt_len => "") end # put the line on the screen and history LineEdit.replace_line(s, line) diff --git a/test/regex.jl b/test/regex.jl index cb3fa965f8a50..f73c2028469d4 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -78,6 +78,29 @@ @test !endswith("abc", r"C") @test endswith("abc", r"C"i) + @testset "multiplication & exponentiation" begin + @test r"a" * r"b" == r"ab" + @test r"a" * "b" == r"ab" + @test r"a" * 'b' == r"ab" + @test "a" * r"b" == r"ab" + @test 'a' * r"b" == r"ab" + for a = (r"a", "a", 'a'), + b = (r"b", "b", 'b'), + c = (r"c", "c", 'c') + a isa Regex || b isa Regex || c isa Regex || continue + @test a * b * c == r"abc" + end + @test r"a"i * r"b"i == r"ab"i + @test r"a"i * "b" == r"ab"i + @test r"a"i * 'b' == r"ab"i + @test "a" * r"b"i == r"ab"i + @test 'a' * r"b"i == r"ab"i + @test_throws ArgumentError r"a"i * r"b" + @test_throws ArgumentError r"a" * r"b"i + + @test r"abc"^ 2 == r"abcabc" + end + # Test that PCRE throws the correct kind of error # TODO: Uncomment this once the corresponding change has propagated to CI #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32) From 8883776539ae3e302ee993a4a4f01efcff0558d9 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Wed, 3 Apr 2019 14:02:56 +0200 Subject: [PATCH 02/10] use internal option setting and non-capturing subpatterns --- base/regex.jl | 60 ++++++++++++++++++++++++++++++++++++++++++++------- test/regex.jl | 41 ++++++++++++++++++++++------------- 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 24614fb976fa0..08c38d39114e2 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -520,9 +520,6 @@ end ## String operations ## -unwrap_string(r::Regex) = r.pattern -unwrap_string(s::Union{AbstractString,AbstractChar}) = s - """ *(s::Regex, t::Union{Regex,AbstractString,AbstractChar}) -> Regex *(s::Union{Regex,AbstractString,AbstractChar}, t::Regex) -> Regex @@ -542,12 +539,59 @@ r"julia" ``` """ function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,AbstractString,AbstractChar}...) - opts = unique((r.compile_options, r.match_options) for r in (r1, rs...) if r isa Regex) - length(opts) == 1 || - throw(ArgumentError("cannot multiply regexes with incompatible options")) - Regex(string(unwrap_string(r1), unwrap_string.(rs)...), opts[1][1], opts[1][2]) + mask = PCRE.CASELESS | PCRE.MULTILINE | PCRE.DOTALL | PCRE.EXTENDED # imsx + match_opts = typemax(UInt32) # all args must agree on this + compile_opts = typemax(UInt32) # all args must agree on this + shared = mask + for r in (r1, rs...) + r isa Regex || continue + if match_opts == typemax(UInt32) + match_opts = r.match_options + compile_opts = r.compile_options & ~mask + else + r.match_options == match_opts && + r.compile_options & ~mask == compile_opts || + throw(ArgumentError("cannot multiply regexes: incompatible options")) + end + shared &= r.compile_options & mask + end + unshared = mask & ~shared + Regex(string(unwrap_string(r1, unshared), unwrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts) +end + +unwrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') +unwrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = string("(?:", s, ')') + +regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts] + +# UInt32 to String mapping for some compile options +const _regex_opts_str = Ref{ImmutableDict{UInt32,String}}() + +init_regex() = _regex_opts_str[] = foldl(0:15, init=ImmutableDict{UInt32,String}()) do d, o + opt = UInt32(0) + str = "" + if o & 1 != 0 + opt |= PCRE.CASELESS + str *= 'i' + end + if o & 2 != 0 + opt |= PCRE.MULTILINE + str *= 'm' + end + if o & 4 != 0 + opt |= PCRE.DOTALL + str *= 's' + end + if o & 8 != 0 + opt |= PCRE.EXTENDED + str *= 'x' + end + ImmutableDict(d, opt => str) end + + + """ ^(s::Regex, n::Integer) @@ -562,4 +606,4 @@ julia> r"Test "^3 r"Test Test Test " ``` """ -^(r::Regex, i::Integer) = Regex(r.pattern^i, r.compile_options, r.match_options) +^(r::Regex, i::Integer) = Regex(string("(?:", r.pattern, "){$i}"), r.compile_options, r.match_options) diff --git a/test/regex.jl b/test/regex.jl index f73c2028469d4..0de40a9100834 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -79,26 +79,37 @@ @test endswith("abc", r"C"i) @testset "multiplication & exponentiation" begin - @test r"a" * r"b" == r"ab" - @test r"a" * "b" == r"ab" - @test r"a" * 'b' == r"ab" - @test "a" * r"b" == r"ab" - @test 'a' * r"b" == r"ab" + @test r"a" * r"b" == r"(?:a)(?:b)" + @test r"a" * "b" == r"(?:a)(?:b)" + @test r"a" * 'b' == r"(?:a)(?:b)" + @test "a" * r"b" == r"(?:a)(?:b)" + @test 'a' * r"b" == r"(?:a)(?:b)" for a = (r"a", "a", 'a'), b = (r"b", "b", 'b'), c = (r"c", "c", 'c') a isa Regex || b isa Regex || c isa Regex || continue - @test a * b * c == r"abc" + @test a * b * c == r"(?:a)(?:b)(?:c)" end - @test r"a"i * r"b"i == r"ab"i - @test r"a"i * "b" == r"ab"i - @test r"a"i * 'b' == r"ab"i - @test "a" * r"b"i == r"ab"i - @test 'a' * r"b"i == r"ab"i - @test_throws ArgumentError r"a"i * r"b" - @test_throws ArgumentError r"a" * r"b"i - - @test r"abc"^ 2 == r"abcabc" + for s = ["thiscat", "thishat", "thatcat", "thathat"] + @test match(r"this|that" * r"cat|hat", s) !== nothing + end + + @test r"a"i * r"b"i == r"(?:a)(?:b)"i + @test r"a"i * "b" == r"(?:a)(?:b)"i + @test r"a"i * 'b' == r"(?:a)(?:b)"i + @test "a" * r"b"i == r"(?:a)(?:b)"i + @test 'a' * r"b"i == r"(?:a)(?:b)"i + + @test r"a"i * r"b"m == r"(?i:a)(?m:b)" + @test r"a"im * r"b"m == r"(?i:a)(?:b)"m + @test r"a"im * r"b"im == r"(?:a)(?:b)"im + @test r"a"im * r"b"i == r"(?m:a)(?:b)"i + + # error for really incompatible options + @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS) + @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS, Base.DEFAULT_MATCH_OPTS & ~Base.PCRE.NO_UTF_CHECK) + + @test r"this|that"^2 == r"(?:this|that){2}" end # Test that PCRE throws the correct kind of error From 1d90dbcd9cb5cb810a58a66a8facb49e4fecaa22 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Wed, 3 Apr 2019 14:19:51 +0200 Subject: [PATCH 03/10] do nothing for one argument --- base/regex.jl | 2 ++ test/regex.jl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/base/regex.jl b/base/regex.jl index 08c38d39114e2..3e48f1f6c81a9 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -559,6 +559,8 @@ function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,Abstrac Regex(string(unwrap_string(r1, unshared), unwrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts) end +*(r::Regex) = r # avoids wrapping r in a useless subpattern + unwrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') unwrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = string("(?:", s, ')') diff --git a/test/regex.jl b/test/regex.jl index 0de40a9100834..2dad9f52d1dea 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -79,6 +79,8 @@ @test endswith("abc", r"C"i) @testset "multiplication & exponentiation" begin + @test *(r"a") == r"a" + @test r"a" * r"b" == r"(?:a)(?:b)" @test r"a" * "b" == r"(?:a)(?:b)" @test r"a" * 'b' == r"(?:a)(?:b)" From c8182c9efc165bb3d46e60395ac86ee2e744a11e Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Wed, 3 Apr 2019 17:27:26 +0200 Subject: [PATCH 04/10] don't wrap strings and chars --- base/regex.jl | 17 +++++++---------- test/regex.jl | 18 +++++++++--------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 3e48f1f6c81a9..dde3b2129a3e0 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -531,21 +531,18 @@ Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref). # Examples ```jldoctest -julia> r"Hello " * "world" -r"Hello world" - -julia> 'j' * r"ulia" -r"julia" +julia> r"Hello|Good bye" * ' ' * "world" +r"(?:Hello|Good bye) world" ``` """ function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,AbstractString,AbstractChar}...) mask = PCRE.CASELESS | PCRE.MULTILINE | PCRE.DOTALL | PCRE.EXTENDED # imsx - match_opts = typemax(UInt32) # all args must agree on this - compile_opts = typemax(UInt32) # all args must agree on this + match_opts = nothing # all args must agree on this + compile_opts = nothing # all args must agree on this shared = mask for r in (r1, rs...) r isa Regex || continue - if match_opts == typemax(UInt32) + if match_opts == nothing match_opts = r.match_options compile_opts = r.compile_options & ~mask else @@ -562,7 +559,7 @@ end *(r::Regex) = r # avoids wrapping r in a useless subpattern unwrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') -unwrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = string("(?:", s, ')') +unwrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = s # no need to wrap in subpattern regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts] @@ -605,7 +602,7 @@ Repeat a regex `n` times. # Examples ```jldoctest julia> r"Test "^3 -r"Test Test Test " +r"(?:Test ){3}" ``` """ ^(r::Regex, i::Integer) = Regex(string("(?:", r.pattern, "){$i}"), r.compile_options, r.match_options) diff --git a/test/regex.jl b/test/regex.jl index 2dad9f52d1dea..ca3c1eb5a1dca 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -82,25 +82,25 @@ @test *(r"a") == r"a" @test r"a" * r"b" == r"(?:a)(?:b)" - @test r"a" * "b" == r"(?:a)(?:b)" - @test r"a" * 'b' == r"(?:a)(?:b)" - @test "a" * r"b" == r"(?:a)(?:b)" - @test 'a' * r"b" == r"(?:a)(?:b)" + @test r"a" * "b" == r"(?:a)b" + @test r"a" * 'b' == r"(?:a)b" + @test "a" * r"b" == r"a(?:b)" + @test 'a' * r"b" == r"a(?:b)" for a = (r"a", "a", 'a'), b = (r"b", "b", 'b'), c = (r"c", "c", 'c') a isa Regex || b isa Regex || c isa Regex || continue - @test a * b * c == r"(?:a)(?:b)(?:c)" + @test match(a * b * c, "abc") !== nothing end for s = ["thiscat", "thishat", "thatcat", "thathat"] @test match(r"this|that" * r"cat|hat", s) !== nothing end @test r"a"i * r"b"i == r"(?:a)(?:b)"i - @test r"a"i * "b" == r"(?:a)(?:b)"i - @test r"a"i * 'b' == r"(?:a)(?:b)"i - @test "a" * r"b"i == r"(?:a)(?:b)"i - @test 'a' * r"b"i == r"(?:a)(?:b)"i + @test r"a"i * "b" == r"(?:a)b"i + @test r"a"i * 'b' == r"(?:a)b"i + @test "a" * r"b"i == r"a(?:b)"i + @test 'a' * r"b"i == r"a(?:b)"i @test r"a"i * r"b"m == r"(?i:a)(?m:b)" @test r"a"im * r"b"m == r"(?i:a)(?:b)"m From e6eda3268c274defd5a31ffa3d8c8939f68199bc Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Thu, 4 Apr 2019 16:37:52 +0200 Subject: [PATCH 05/10] use quotation for strings and chars --- base/regex.jl | 14 +++++++++++++- test/regex.jl | 16 ++++++++-------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index dde3b2129a3e0..a19f6110a357a 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -525,6 +525,9 @@ end *(s::Union{Regex,AbstractString,AbstractChar}, t::Regex) -> Regex Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref). +String and character arguments must be matched exactly in the resulting regex, +meaning that the contained characters are devoid of any special meaning +(they are quoted with "\\Q" and "\\E"). !!! compat "Julia 1.2" This method requires at least Julia 1.2. @@ -533,6 +536,15 @@ Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref). ```jldoctest julia> r"Hello|Good bye" * ' ' * "world" r"(?:Hello|Good bye) world" + +julia> r = r"a|b" * "c|d" +r"(?:a|b)\\Qc|d\\E" + +match(r, "ac") == nothing +true + +julia> match(r, "ac|d") +RegexMatch("ac|d") ``` """ function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,AbstractString,AbstractChar}...) @@ -559,7 +571,7 @@ end *(r::Regex) = r # avoids wrapping r in a useless subpattern unwrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') -unwrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = s # no need to wrap in subpattern +unwrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = string("\\Q", s, "\\E") regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts] diff --git a/test/regex.jl b/test/regex.jl index ca3c1eb5a1dca..56dfef0666bb4 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -82,10 +82,10 @@ @test *(r"a") == r"a" @test r"a" * r"b" == r"(?:a)(?:b)" - @test r"a" * "b" == r"(?:a)b" - @test r"a" * 'b' == r"(?:a)b" - @test "a" * r"b" == r"a(?:b)" - @test 'a' * r"b" == r"a(?:b)" + @test r"a" * "b" == r"(?:a)\Qb\E" + @test r"a" * 'b' == r"(?:a)\Qb\E" + @test "a" * r"b" == r"\Qa\E(?:b)" + @test 'a' * r"b" == r"\Qa\E(?:b)" for a = (r"a", "a", 'a'), b = (r"b", "b", 'b'), c = (r"c", "c", 'c') @@ -97,10 +97,10 @@ end @test r"a"i * r"b"i == r"(?:a)(?:b)"i - @test r"a"i * "b" == r"(?:a)b"i - @test r"a"i * 'b' == r"(?:a)b"i - @test "a" * r"b"i == r"a(?:b)"i - @test 'a' * r"b"i == r"a(?:b)"i + @test r"a"i * "b" == r"(?:a)\Qb\E"i + @test r"a"i * 'b' == r"(?:a)\Qb\E"i + @test "a" * r"b"i == r"\Qa\E(?:b)"i + @test 'a' * r"b"i == r"\Qa\E(?:b)"i @test r"a"i * r"b"m == r"(?i:a)(?m:b)" @test r"a"im * r"b"m == r"(?i:a)(?:b)"m From abbc367155a4a9e498f0631985a94cfaf9e9fd4a Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Thu, 4 Apr 2019 16:45:46 +0200 Subject: [PATCH 06/10] remove useless operation --- base/regex.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/regex.jl b/base/regex.jl index a19f6110a357a..00234ade2fe1a 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -562,7 +562,7 @@ function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,Abstrac r.compile_options & ~mask == compile_opts || throw(ArgumentError("cannot multiply regexes: incompatible options")) end - shared &= r.compile_options & mask + shared &= r.compile_options end unshared = mask & ~shared Regex(string(unwrap_string(r1, unshared), unwrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts) From dfb6b2ea50c1b98212eb973605c1a7b30381494e Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Thu, 4 Apr 2019 16:50:09 +0200 Subject: [PATCH 07/10] update wrong example in docstring --- base/regex.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 00234ade2fe1a..5b4df4999f4a1 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -534,13 +534,13 @@ meaning that the contained characters are devoid of any special meaning # Examples ```jldoctest -julia> r"Hello|Good bye" * ' ' * "world" -r"(?:Hello|Good bye) world" +julia> match(r"Hello|Good bye" * ' ' * "world", "Hello world") +RegexMatch("Hello world") julia> r = r"a|b" * "c|d" r"(?:a|b)\\Qc|d\\E" -match(r, "ac") == nothing +julia> match(r, "ac") == nothing true julia> match(r, "ac|d") From a17e926a11c1ddfa87c09ef83e413b435e7e80be Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Thu, 4 Apr 2019 16:58:27 +0200 Subject: [PATCH 08/10] add example and rename unwrap_string -> wrap_string --- base/regex.jl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 5b4df4999f4a1..cde0bac13eaa9 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -565,13 +565,13 @@ function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,Abstrac shared &= r.compile_options end unshared = mask & ~shared - Regex(string(unwrap_string(r1, unshared), unwrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts) + Regex(string(wrap_string(r1, unshared), wrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts) end *(r::Regex) = r # avoids wrapping r in a useless subpattern -unwrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') -unwrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = string("\\Q", s, "\\E") +wrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') +wrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = string("\\Q", s, "\\E") regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts] @@ -601,8 +601,6 @@ init_regex() = _regex_opts_str[] = foldl(0:15, init=ImmutableDict{UInt32,String} end - - """ ^(s::Regex, n::Integer) @@ -613,8 +611,11 @@ Repeat a regex `n` times. # Examples ```jldoctest -julia> r"Test "^3 -r"(?:Test ){3}" +julia> r"Test "^2 +r"(?:Test ){2}" + +julia> match(r"Test "^2, "Test Test ") +RegexMatch("Test Test ") ``` """ ^(r::Regex, i::Integer) = Regex(string("(?:", r.pattern, "){$i}"), r.compile_options, r.match_options) From 03b11caf8e5e0ec922e9d80eea0cb4f34a8ba9f6 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Fri, 5 Apr 2019 19:01:53 +0200 Subject: [PATCH 09/10] fix case where a string contains "\E" --- base/regex.jl | 4 +++- test/regex.jl | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/base/regex.jl b/base/regex.jl index cde0bac13eaa9..1e5b2a91163ff 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -571,7 +571,9 @@ end *(r::Regex) = r # avoids wrapping r in a useless subpattern wrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')') -wrap_string(s::Union{AbstractString,AbstractChar}, ::UInt32) = string("\\Q", s, "\\E") +# if s contains raw"\E", split '\' and 'E' within two distinct \Q...\E groups: +wrap_string(s::AbstractString, ::UInt32) = string("\\Q", replace(s, raw"\E" => raw"\\E\QE"), "\\E") +wrap_string(s::AbstractChar, ::UInt32) = string("\\Q", s, "\\E") regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts] diff --git a/test/regex.jl b/test/regex.jl index 56dfef0666bb4..76c36b76edf84 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -107,6 +107,10 @@ @test r"a"im * r"b"im == r"(?:a)(?:b)"im @test r"a"im * r"b"i == r"(?m:a)(?:b)"i + r = r"" * raw"a\Eb|c" + @test match(r, raw"a\Eb|c").match == raw"a\Eb|c" + @test match(r, raw"c") == nothing + # error for really incompatible options @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS) @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS, Base.DEFAULT_MATCH_OPTS & ~Base.PCRE.NO_UTF_CHECK) From 83821165d555bd97738c86fae8a25de7360cee52 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Thu, 25 Apr 2019 15:33:27 +0200 Subject: [PATCH 10/10] update to 1.3 --- base/regex.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 1e5b2a91163ff..084328a4a80f0 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -529,8 +529,8 @@ String and character arguments must be matched exactly in the resulting regex, meaning that the contained characters are devoid of any special meaning (they are quoted with "\\Q" and "\\E"). -!!! compat "Julia 1.2" - This method requires at least Julia 1.2. +!!! compat "Julia 1.3" + This method requires at least Julia 1.3. # Examples ```jldoctest @@ -608,8 +608,8 @@ end Repeat a regex `n` times. -!!! compat "Julia 1.2" - This method requires at least Julia 1.2. +!!! compat "Julia 1.3" + This method requires at least Julia 1.3. # Examples ```jldoctest