From c19fb8209a82f47980a12107462042d9305ab921 Mon Sep 17 00:00:00 2001 From: Sakse Dalum Date: Thu, 11 Oct 2018 15:01:27 +0800 Subject: [PATCH 01/14] Add `startswith`, `endswith` for `Regex` --- base/regex.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/base/regex.jl b/base/regex.jl index 384d6ca3dfa21..f0825ebe38689 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -174,6 +174,16 @@ function occursin(r::Regex, s::SubString; offset::Integer=0) r.match_data) end +function startswith(s::AbstractString, r::Regex) + rr = Regex("^"*r.pattern, r.compile_options, r.match_options) + return occursin(rr, s) +end + +function endswith(s::AbstractString, r::Regex) + rr = Regex(r.pattern*"\$", r.compile_options, r.match_options) + return occursin(rr, s) +end + """ match(r::Regex, s::AbstractString[, idx::Integer[, addopts]]) From 3193b3e7dcab619a38fa4da93bcdd8d59dd5e490 Mon Sep 17 00:00:00 2001 From: Sakse Dalum Date: Wed, 24 Oct 2018 15:55:09 +0800 Subject: [PATCH 02/14] Add tests --- test/regex.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/regex.jl b/test/regex.jl index bb665259f5b6c..0fb81e4ff79cb 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -61,3 +61,6 @@ end # 'a' flag to disable UCP @test match(r"\w+", "Düsseldorf").match == "Düsseldorf" @test match(r"\w+"a, "Düsseldorf").match == "D" + +@test startswith("abc", r"a") +@test endswith("abc", r"c") From 6560e5c9e3e14d622fb94cd6cfbd163345551d7c Mon Sep 17 00:00:00 2001 From: Sakse Dalum Date: Thu, 25 Oct 2018 14:27:09 +0800 Subject: [PATCH 03/14] Use ANCHORED and ENDANCHORED instead of manipulating pattern --- base/pcre.jl | 1 + base/regex.jl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/base/pcre.jl b/base/pcre.jl index 5955c3880f6d5..3eb99a8629e91 100644 --- a/base/pcre.jl +++ b/base/pcre.jl @@ -37,6 +37,7 @@ const COMPILE_MASK = CASELESS | DOLLAR_ENDONLY | DOTALL | + ENDANCHORED | EXTENDED | FIRSTLINE | MULTILINE | diff --git a/base/regex.jl b/base/regex.jl index f0825ebe38689..f7af1b2ead4b1 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -175,12 +175,12 @@ function occursin(r::Regex, s::SubString; offset::Integer=0) end function startswith(s::AbstractString, r::Regex) - rr = Regex("^"*r.pattern, r.compile_options, r.match_options) + rr = Regex(r.pattern, r.compile_options | PCRE.ANCHORED, r.match_options) return occursin(rr, s) end function endswith(s::AbstractString, r::Regex) - rr = Regex(r.pattern*"\$", r.compile_options, r.match_options) + rr = Regex(r.pattern, r.compile_options | PCRE.ENDANCHORED, r.match_options) return occursin(rr, s) end From 16d6cd3278326f86bd7cb61d4af8763f8d41ef8d Mon Sep 17 00:00:00 2001 From: Sakse Dalum Date: Thu, 25 Oct 2018 14:49:52 +0800 Subject: [PATCH 04/14] More tests --- test/regex.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/regex.jl b/test/regex.jl index 0fb81e4ff79cb..ada082084839c 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -64,3 +64,12 @@ end @test startswith("abc", r"a") @test endswith("abc", r"c") +@test !startswith("abc", r"b") +@test !startswith("abc", r"c") +@test !endswith("abc", r"a") +@test !endswith("abc", r"b") + +@test !startswith("abc", r"A") +@test startswith("abc", r"A"i) +@test !endswith("abc", r"C") +@test endswith("abc", r"C"i) From 0246784e7f7a9b41c2d69b59c5d15f5c110df1cf Mon Sep 17 00:00:00 2001 From: Sakse Dalum Date: Thu, 25 Oct 2018 23:05:13 +0800 Subject: [PATCH 05/14] Add docstrings --- base/regex.jl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/base/regex.jl b/base/regex.jl index f7af1b2ead4b1..3f036c8305d29 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -174,11 +174,37 @@ function occursin(r::Regex, s::SubString; offset::Integer=0) r.match_data) end +""" + startswith(s::AbstractString, prefix::Regex) + +Return `true` if `s` starts with the Regex pattern, `prefix`. + +See also [`endswith`](@ref). + +# Examples +```jldoctest +julia> startswith("JuliaLang", r"Julia|Romeo") +true +``` +""" function startswith(s::AbstractString, r::Regex) rr = Regex(r.pattern, r.compile_options | PCRE.ANCHORED, r.match_options) return occursin(rr, s) end +""" + endswith(s::AbstractString, suffix::Regex) + +Return `true` if `s` ends with the Regex pattern, `suffix`. + +See also [`startswith`](@ref). + +# Examples +```jldoctest +julia> endswith("JuliaLang", r"Lang|Roberts") +true +``` +""" function endswith(s::AbstractString, r::Regex) rr = Regex(r.pattern, r.compile_options | PCRE.ENDANCHORED, r.match_options) return occursin(rr, s) From 4bad579640b87ea6f8333497973a8c7a20f5ab0e Mon Sep 17 00:00:00 2001 From: chethega Date: Sun, 28 Oct 2018 10:21:36 +0800 Subject: [PATCH 06/14] Update base/regex.jl Co-Authored-By: dalum <17059936+dalum@users.noreply.github.com> --- base/regex.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/base/regex.jl b/base/regex.jl index 3f036c8305d29..87b0da242f28d 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -206,6 +206,16 @@ true ``` """ function endswith(s::AbstractString, r::Regex) + compile(r) + return PCRE.exec(r.regex, String(s), 0, r.match_options | PCRE.ENDANCHORED, + r.match_data) +end + +function endswith(s::SubString, r::Regex) + compile(r) + return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED, + r.match_data) +end rr = Regex(r.pattern, r.compile_options | PCRE.ENDANCHORED, r.match_options) return occursin(rr, s) end From 8556e590a96c862ecbea16151a100093c6eaad23 Mon Sep 17 00:00:00 2001 From: chethega Date: Sun, 28 Oct 2018 10:21:44 +0800 Subject: [PATCH 07/14] Update base/regex.jl Co-Authored-By: dalum <17059936+dalum@users.noreply.github.com> --- base/regex.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/base/regex.jl b/base/regex.jl index 87b0da242f28d..66bfedd71806e 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -188,6 +188,16 @@ true ``` """ function startswith(s::AbstractString, r::Regex) + compile(r) + return PCRE.exec(r.regex, String(s), 0, r.match_options | PCRE.ANCHORED, + r.match_data) +end + +function startswith(s::SubString, r::Regex) + compile(r) + return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ANCHORED, + r.match_data) +end rr = Regex(r.pattern, r.compile_options | PCRE.ANCHORED, r.match_options) return occursin(rr, s) end From 8f21ef3e73a86091e4101e22977a3abd150ab454 Mon Sep 17 00:00:00 2001 From: Sakse <17059936+dalum@users.noreply.github.com> Date: Mon, 29 Oct 2018 22:18:13 +0800 Subject: [PATCH 08/14] Update regex.jl --- base/regex.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 66bfedd71806e..58a8afb6f0a68 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -198,9 +198,6 @@ function startswith(s::SubString, r::Regex) return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ANCHORED, r.match_data) end - rr = Regex(r.pattern, r.compile_options | PCRE.ANCHORED, r.match_options) - return occursin(rr, s) -end """ endswith(s::AbstractString, suffix::Regex) @@ -226,9 +223,6 @@ function endswith(s::SubString, r::Regex) return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED, r.match_data) end - rr = Regex(r.pattern, r.compile_options | PCRE.ENDANCHORED, r.match_options) - return occursin(rr, s) -end """ match(r::Regex, s::AbstractString[, idx::Integer[, addopts]]) From 1ff3b90702898872a2901dc53c88b2904753fdd2 Mon Sep 17 00:00:00 2001 From: Sakse Dalum Date: Tue, 30 Oct 2018 15:37:03 +0800 Subject: [PATCH 09/14] Update docstrings --- base/regex.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 58a8afb6f0a68..8bbfdc7eae37f 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -177,9 +177,11 @@ end """ startswith(s::AbstractString, prefix::Regex) -Return `true` if `s` starts with the Regex pattern, `prefix`. +Return `true` if `s` starts with the regex pattern, `prefix`. Note: +the regex version of `occursin` is recommended over `startswith` in +performance critical situations. -See also [`endswith`](@ref). +See also [`occursin`](@ref) and [`endswith`](@ref). # Examples ```jldoctest @@ -202,9 +204,11 @@ end """ endswith(s::AbstractString, suffix::Regex) -Return `true` if `s` ends with the Regex pattern, `suffix`. +Return `true` if `s` ends with the regex pattern, `suffix`. Note: the +regex version of `occursin` is recommended over `endswith` in +performance critical situations. -See also [`startswith`](@ref). +See also [`occursin`](@ref) and [`startswith`](@ref). # Examples ```jldoctest From aebe2d4152f9de66a147416049274aa1e8ed28c9 Mon Sep 17 00:00:00 2001 From: Sakse <17059936+dalum@users.noreply.github.com> Date: Tue, 30 Oct 2018 15:41:22 +0800 Subject: [PATCH 10/14] Update regex.jl --- base/regex.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 8bbfdc7eae37f..c155ca373649d 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -178,7 +178,7 @@ end startswith(s::AbstractString, prefix::Regex) Return `true` if `s` starts with the regex pattern, `prefix`. Note: -the regex version of `occursin` is recommended over `startswith` in +`occursin` is recommended over the regex version of `startswith` in performance critical situations. See also [`occursin`](@ref) and [`endswith`](@ref). @@ -204,8 +204,8 @@ end """ endswith(s::AbstractString, suffix::Regex) -Return `true` if `s` ends with the regex pattern, `suffix`. Note: the -regex version of `occursin` is recommended over `endswith` in +Return `true` if `s` ends with the regex pattern, `suffix`. Note: +`occursin` is recommended over the regex version of `endswith` in performance critical situations. See also [`occursin`](@ref) and [`startswith`](@ref). From e0a0097d654e5063068dcb21e85d663199e1e7aa Mon Sep 17 00:00:00 2001 From: Sakse <17059936+dalum@users.noreply.github.com> Date: Tue, 30 Oct 2018 15:52:44 +0800 Subject: [PATCH 11/14] Update regex.jl --- base/regex.jl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index c155ca373649d..b213f95d71b8f 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -177,9 +177,11 @@ end """ startswith(s::AbstractString, prefix::Regex) -Return `true` if `s` starts with the regex pattern, `prefix`. Note: -`occursin` is recommended over the regex version of `startswith` in -performance critical situations. +Return `true` if `s` starts with the regex pattern, `prefix`. + +!!! note + `occursin` is recommended over the regex version of `startswith` + in performance critical situations. See also [`occursin`](@ref) and [`endswith`](@ref). @@ -204,9 +206,11 @@ end """ endswith(s::AbstractString, suffix::Regex) -Return `true` if `s` ends with the regex pattern, `suffix`. Note: -`occursin` is recommended over the regex version of `endswith` in -performance critical situations. +Return `true` if `s` ends with the regex pattern, `suffix`. + +!!! note + `occursin` is recommended over the regex version of `endswith` in + performance critical situations. See also [`occursin`](@ref) and [`startswith`](@ref). From fc03281c8b476e4629794e9f355417ed8cc0c808 Mon Sep 17 00:00:00 2001 From: Sakse <17059936+dalum@users.noreply.github.com> Date: Sat, 3 Nov 2018 14:40:41 +0800 Subject: [PATCH 12/14] Update regex.jl --- base/regex.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index b213f95d71b8f..5736ce57321f8 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -180,8 +180,10 @@ end Return `true` if `s` starts with the regex pattern, `prefix`. !!! note - `occursin` is recommended over the regex version of `startswith` - in performance critical situations. + `startswith` does not compile the anchoring into the regular + expression, but instead passes the anchoring as + `match_option` to PCRE. If compile time is amortized, + `occursin(r"^...", s)` is faster than `startswith(s, r"...")`. See also [`occursin`](@ref) and [`endswith`](@ref). @@ -209,8 +211,10 @@ end Return `true` if `s` ends with the regex pattern, `suffix`. !!! note - `occursin` is recommended over the regex version of `endswith` in - performance critical situations. + `endswith` does not compile the anchoring into the regular + expression, but instead passes the anchoring as + `match_option` to PCRE. If compile time is amortized, + `occursin(r"...$", s)` is faster than `endswith(s, r"...")`. See also [`occursin`](@ref) and [`startswith`](@ref). From b0d24ccc94bc3f9854b30ebd45046e96ddc9ad0d Mon Sep 17 00:00:00 2001 From: Sakse <17059936+dalum@users.noreply.github.com> Date: Fri, 1 Feb 2019 13:43:01 +0100 Subject: [PATCH 13/14] Update regex.jl Add newline --- test/regex.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/regex.jl b/test/regex.jl index afb67aaaa3d5a..e7b227d2a99e9 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -82,4 +82,4 @@ # Test that PCRE throws the correct kind of error # TODO: Uncomment this once the corresponding change has propagated to CI #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32) -end \ No newline at end of file +end From aa274842c38a1e36849ceeda56366a2ec29053ac Mon Sep 17 00:00:00 2001 From: Sakse Dalum Date: Fri, 1 Feb 2019 15:21:55 +0100 Subject: [PATCH 14/14] Fix docstring interpolation error --- base/regex.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/regex.jl b/base/regex.jl index 5736ce57321f8..d952f1f6ba4b8 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -214,7 +214,7 @@ Return `true` if `s` ends with the regex pattern, `suffix`. `endswith` does not compile the anchoring into the regular expression, but instead passes the anchoring as `match_option` to PCRE. If compile time is amortized, - `occursin(r"...$", s)` is faster than `endswith(s, r"...")`. + `occursin(r"...\$", s)` is faster than `endswith(s, r"...")`. See also [`occursin`](@ref) and [`startswith`](@ref).