From 596e81d5c1b793ee7506d874dad00eb56eede324 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 9 Feb 2023 23:19:26 -0500 Subject: [PATCH 1/9] add replace(io, str, patterns...) --- NEWS.md | 2 ++ base/strings/util.jl | 65 ++++++++++++++++++++++++++++++++++------- doc/src/base/strings.md | 2 +- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 631806296eebd..8907275925155 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,8 @@ New library functions New library features -------------------- +* `replace(string, pattern...)` now supports an optional `IO` argument to + write the output to a stream rather than returning a string ([#48625]). Standard library changes ------------------------ diff --git a/base/strings/util.jl b/base/strings/util.jl index 7a42d7fecfc91..20654f9dc93d6 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -700,8 +700,8 @@ _free_pat_replacer(x) = nothing _pat_replacer(x::AbstractChar) = isequal(x) _pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x) -function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N - count == 0 && return str +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N count < 0 && throw(DomainError(count, "`count` must be non-negative.")) n = 1 e1 = nextind(str, lastindex(str)) # sizeof(str) @@ -716,11 +716,12 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I r isa Int && (r = r:r) # findnext / performance fix return r end - if all(>(e1), map(first, rs)) - foreach(_free_pat_replacer, patterns) - return str - end - out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) + return patterns, replaces, rs, all(>(e1), map(first, rs)) +end + +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_finish(out::IO, str, count::Int, + patterns::NTuple{N}, replaces::NTuple{N}, rs::NTuple{N}) where N while true p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ? r = rs[p] @@ -756,12 +757,38 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I end foreach(_free_pat_replacer, patterns) write(out, SubString(str, i)) - return String(take!(out)) + return out end +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_io(out::IO, retval, str, pat_repl::Pair...; count::Integer=typemax(Int)) + if count == 0 + write(out, str) + return out + end + patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) + if notfound + foreach(_free_pat_replacer, patterns) + write(out, str) + return out + end + return _replace_finish(out, str, count, patterns, replaces, rs) +end + +# note: leave str untyped here to make it easier for packages like StringViews to hook in +function _replace_str(str, pat_repl::Pair...; count::Integer=typemax(Int)) + count == 0 && return str + patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) + if notfound + foreach(_free_pat_replacer, patterns) + return str + end + out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) + return String(take!(_replace_finish(out, str, count, patterns, replaces, rs))) +end """ - replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer]) + replace([out::IO], s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer]) Search for the given pattern `pat` in `s`, and replace each occurrence with `r`. If `count` is provided, replace at most `count` occurrences. @@ -774,6 +801,11 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then references in `r` are replaced with the corresponding matched text. To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`). +The return value is a new string after the replacements. If the `out::IO` argument +is supplied, the transformed string is instead written to `out` (returning `out`). +(For example, this can be used in conjunction with an [`IOBuffer`](@ref) to re-use +a pre-allocated buffer array in-place.) + Multiple patterns can be specified, and they will be applied left-to-right simultaneously, so only one pattern will be applied to any character, and the patterns will only be applied to the input text, not the replacements. @@ -781,6 +813,9 @@ patterns will only be applied to the input text, not the replacements. !!! compat "Julia 1.7" Support for multiple patterns requires version 1.7. +!!! compat "Julia 1.10" + The `out::IO` argument requires version 1.10. + # Examples ```jldoctest julia> replace("Python is a programming language.", "Python" => "Julia") @@ -799,8 +834,18 @@ julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a") "bca" ``` """ +replace(out::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) = + _replace_io(out, String(s), pat_f..., count=count) + replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) = - replace(String(s), pat_f..., count=count) + _replace_str(String(s), pat_f..., count=count) + +# no copy needed for SubString{String} +replace(out::IO, s::SubString{String}, pat_f::Pair...; count=typemax(Int)) = + _replace_io(out, s, pat_f..., count=count) +replace(s::SubString{String}, pat_f::Pair...; count=typemax(Int)) = + _replace_str(s, pat_f..., count=count) + # TODO: allow transform as the first argument to replace? diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index 263c0019788c3..226e8aa92a989 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -51,7 +51,7 @@ Base.findlast(::AbstractChar, ::AbstractString) Base.findprev(::AbstractString, ::AbstractString, ::Integer) Base.occursin Base.reverse(::Union{String,SubString{String}}) -Base.replace(s::AbstractString, ::Pair...) +Base.replace(::IO, s::AbstractString, ::Pair...) Base.eachsplit Base.split Base.rsplit From 8422f84e3db3f13dee0897ae9b8a950ee4493496 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 Feb 2023 07:46:16 -0500 Subject: [PATCH 2/9] optimize SubString methods in a different PR --- base/strings/util.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 20654f9dc93d6..a2a35f9610edf 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -840,12 +840,6 @@ replace(out::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) = replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) = _replace_str(String(s), pat_f..., count=count) -# no copy needed for SubString{String} -replace(out::IO, s::SubString{String}, pat_f::Pair...; count=typemax(Int)) = - _replace_io(out, s, pat_f..., count=count) -replace(s::SubString{String}, pat_f::Pair...; count=typemax(Int)) = - _replace_str(s, pat_f..., count=count) - # TODO: allow transform as the first argument to replace? From a17af9afc07a7d70d15097490f9e34ffe374b671 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 Feb 2023 07:51:48 -0500 Subject: [PATCH 3/9] bugfix --- base/strings/util.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index a2a35f9610edf..a6df3f6ef810d 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -703,9 +703,7 @@ _pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar # note: leave str untyped here to make it easier for packages like StringViews to hook in function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N count < 0 && throw(DomainError(count, "`count` must be non-negative.")) - n = 1 - e1 = nextind(str, lastindex(str)) # sizeof(str) - i = a = firstindex(str) + e1 = sizeof(str)+1 # nextind(str, lastindex(str)) patterns = map(p -> _pat_replacer(first(p)), pat_repl) replaces = map(last, pat_repl) rs = map(patterns) do p @@ -722,6 +720,9 @@ end # note: leave str untyped here to make it easier for packages like StringViews to hook in function _replace_finish(out::IO, str, count::Int, patterns::NTuple{N}, replaces::NTuple{N}, rs::NTuple{N}) where N + n = 1 + e1 = sizeof(str)+1 # nextind(str, lastindex(str)) + i = a = firstindex(str) while true p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ? r = rs[p] From 96a12779e282ca74b52ed94d44cf8e8b2d612cba Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 Feb 2023 07:53:07 -0500 Subject: [PATCH 4/9] rename out -> io --- base/strings/util.jl | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index a6df3f6ef810d..0bf08edba0f1e 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -718,7 +718,7 @@ function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N end # note: leave str untyped here to make it easier for packages like StringViews to hook in -function _replace_finish(out::IO, str, count::Int, +function _replace_finish(io::IO, str, count::Int, patterns::NTuple{N}, replaces::NTuple{N}, rs::NTuple{N}) where N n = 1 e1 = sizeof(str)+1 # nextind(str, lastindex(str)) @@ -729,10 +729,10 @@ function _replace_finish(out::IO, str, count::Int, j, k = first(r), last(r) j > e1 && break if i == a || i <= k - # copy out preserved portion - GC.@preserve str unsafe_write(out, pointer(str, i), UInt(j-i)) - # copy out replacement string - _replace(out, replaces[p], str, r, patterns[p]) + # copy io preserved portion + GC.@preserve str unsafe_write(io, pointer(str, i), UInt(j-i)) + # copy io replacement string + _replace(io, replaces[p], str, r, patterns[p]) end if k < j i = j @@ -757,23 +757,23 @@ function _replace_finish(out::IO, str, count::Int, n += 1 end foreach(_free_pat_replacer, patterns) - write(out, SubString(str, i)) - return out + write(io, SubString(str, i)) + return io end # note: leave str untyped here to make it easier for packages like StringViews to hook in -function _replace_io(out::IO, retval, str, pat_repl::Pair...; count::Integer=typemax(Int)) +function _replace_io(io::IO, retval, str, pat_repl::Pair...; count::Integer=typemax(Int)) if count == 0 - write(out, str) - return out + write(io, str) + return io end patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) if notfound foreach(_free_pat_replacer, patterns) - write(out, str) - return out + write(io, str) + return io end - return _replace_finish(out, str, count, patterns, replaces, rs) + return _replace_finish(io, str, count, patterns, replaces, rs) end # note: leave str untyped here to make it easier for packages like StringViews to hook in @@ -789,7 +789,7 @@ function _replace_str(str, pat_repl::Pair...; count::Integer=typemax(Int)) end """ - replace([out::IO], s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer]) + replace([io::IO], s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer]) Search for the given pattern `pat` in `s`, and replace each occurrence with `r`. If `count` is provided, replace at most `count` occurrences. @@ -802,8 +802,8 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then references in `r` are replaced with the corresponding matched text. To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`). -The return value is a new string after the replacements. If the `out::IO` argument -is supplied, the transformed string is instead written to `out` (returning `out`). +The return value is a new string after the replacements. If the `io::IO` argument +is supplied, the transformed string is instead written to `io` (returning `io`). (For example, this can be used in conjunction with an [`IOBuffer`](@ref) to re-use a pre-allocated buffer array in-place.) @@ -815,7 +815,7 @@ patterns will only be applied to the input text, not the replacements. Support for multiple patterns requires version 1.7. !!! compat "Julia 1.10" - The `out::IO` argument requires version 1.10. + The `io::IO` argument requires version 1.10. # Examples ```jldoctest @@ -835,8 +835,8 @@ julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a") "bca" ``` """ -replace(out::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) = - _replace_io(out, String(s), pat_f..., count=count) +replace(io::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) = + _replace_io(io, String(s), pat_f..., count=count) replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) = _replace_str(String(s), pat_f..., count=count) From 26ae60136f84a925d148b12805aa67b04f6daa20 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 Feb 2023 08:00:09 -0500 Subject: [PATCH 5/9] bugfix --- base/strings/util.jl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 0bf08edba0f1e..7b273db7b00b2 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -703,25 +703,24 @@ _pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar # note: leave str untyped here to make it easier for packages like StringViews to hook in function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N count < 0 && throw(DomainError(count, "`count` must be non-negative.")) - e1 = sizeof(str)+1 # nextind(str, lastindex(str)) + e1 = nextind(str, lastindex(str)) # sizeof(str)+1 patterns = map(p -> _pat_replacer(first(p)), pat_repl) replaces = map(last, pat_repl) rs = map(patterns) do p - r = findnext(p, str, a) + r = findfirst(p, str) if r === nothing || first(r) == 0 return e1+1:0 end r isa Int && (r = r:r) # findnext / performance fix return r end - return patterns, replaces, rs, all(>(e1), map(first, rs)) + return e1, patterns, replaces, rs, all(>(e1), map(first, rs)) end # note: leave str untyped here to make it easier for packages like StringViews to hook in function _replace_finish(io::IO, str, count::Int, - patterns::NTuple{N}, replaces::NTuple{N}, rs::NTuple{N}) where N + e1::Int, patterns::NTuple{N}, replaces::NTuple{N}, rs::NTuple{N}) where N n = 1 - e1 = sizeof(str)+1 # nextind(str, lastindex(str)) i = a = firstindex(str) while true p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ? @@ -767,25 +766,25 @@ function _replace_io(io::IO, retval, str, pat_repl::Pair...; count::Integer=type write(io, str) return io end - patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) + e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) if notfound foreach(_free_pat_replacer, patterns) write(io, str) return io end - return _replace_finish(io, str, count, patterns, replaces, rs) + return _replace_finish(io, str, count, e1, patterns, replaces, rs) end # note: leave str untyped here to make it easier for packages like StringViews to hook in function _replace_str(str, pat_repl::Pair...; count::Integer=typemax(Int)) count == 0 && return str - patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) + e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) if notfound foreach(_free_pat_replacer, patterns) return str end out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) - return String(take!(_replace_finish(out, str, count, patterns, replaces, rs))) + return String(take!(_replace_finish(out, str, count, e1, patterns, replaces, rs))) end """ From 166e0dcbea3b5df2294acd3a0c74d10d47cb0965 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 Feb 2023 08:14:45 -0500 Subject: [PATCH 6/9] bugfix --- base/strings/util.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 7b273db7b00b2..bdc44d5eeaabf 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -704,10 +704,11 @@ _pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N count < 0 && throw(DomainError(count, "`count` must be non-negative.")) e1 = nextind(str, lastindex(str)) # sizeof(str)+1 + a = firstindex(str) patterns = map(p -> _pat_replacer(first(p)), pat_repl) replaces = map(last, pat_repl) rs = map(patterns) do p - r = findfirst(p, str) + r = findnext(p, str, a) if r === nothing || first(r) == 0 return e1+1:0 end @@ -718,8 +719,7 @@ function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N end # note: leave str untyped here to make it easier for packages like StringViews to hook in -function _replace_finish(io::IO, str, count::Int, - e1::Int, patterns::NTuple{N}, replaces::NTuple{N}, rs::NTuple{N}) where N +function _replace_finish(io::IO, str, count::Int, e1, patterns, replaces, rs) n = 1 i = a = firstindex(str) while true From 87ad92c0a0f3ee12bd29746e7b303b428d77d0bd Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 Feb 2023 08:37:08 -0500 Subject: [PATCH 7/9] bugfixes and cleanups --- base/strings/util.jl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index bdc44d5eeaabf..c818d723d688f 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -719,7 +719,8 @@ function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N end # note: leave str untyped here to make it easier for packages like StringViews to hook in -function _replace_finish(io::IO, str, count::Int, e1, patterns, replaces, rs) +function _replace_finish(io::IO, str, count::Int, + e1::Int, patterns::Tuple, replaces::Tuple, rs::Tuple) n = 1 i = a = firstindex(str) while true @@ -728,9 +729,9 @@ function _replace_finish(io::IO, str, count::Int, e1, patterns, replaces, rs) j, k = first(r), last(r) j > e1 && break if i == a || i <= k - # copy io preserved portion + # copy out preserved portion GC.@preserve str unsafe_write(io, pointer(str, i), UInt(j-i)) - # copy io replacement string + # copy out replacement string _replace(io, replaces[p], str, r, patterns[p]) end if k < j @@ -761,7 +762,7 @@ function _replace_finish(io::IO, str, count::Int, e1, patterns, replaces, rs) end # note: leave str untyped here to make it easier for packages like StringViews to hook in -function _replace_io(io::IO, retval, str, pat_repl::Pair...; count::Integer=typemax(Int)) +function _replace_(io::IO, str, pat_repl::NTuple{N, Pair}, count::Int) where N if count == 0 write(io, str) return io @@ -776,7 +777,7 @@ function _replace_io(io::IO, retval, str, pat_repl::Pair...; count::Integer=type end # note: leave str untyped here to make it easier for packages like StringViews to hook in -function _replace_str(str, pat_repl::Pair...; count::Integer=typemax(Int)) +function _replace_(str, pat_repl::NTuple{N, Pair}, count::Int) where N count == 0 && return str e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count) if notfound @@ -835,10 +836,10 @@ julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a") ``` """ replace(io::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) = - _replace_io(io, String(s), pat_f..., count=count) + _replace_(io, String(s), pat_f, Int(count)) replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) = - _replace_str(String(s), pat_f..., count=count) + _replace_(String(s), pat_f, Int(count)) # TODO: allow transform as the first argument to replace? From 41ea3d821f8813ab78a1328ffc6d7f20bd9fc7ad Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 10 Feb 2023 19:14:38 -0500 Subject: [PATCH 8/9] test replace(io, ...) API --- test/strings/util.jl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/strings/util.jl b/test/strings/util.jl index 5218310c5c1c7..fd237594f4ad5 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -333,6 +333,24 @@ end # Issue 36953 @test replace("abc", "" => "_", count=1) == "_abc" + # tests for io::IO API (in addition to internals exercised above): + let buf = IOBuffer() + replace(buf, "aaa", 'a' => 'z', count=0) + replace(buf, "aaa", 'a' => 'z', count=1) + replace(buf, "bbb", 'a' => 'z') + replace(buf, "aaa", 'a' => 'z') + @test String(take!(buf)) == "aaazaabbbzzz" + end + let tempfile = tempname() + open(tempfile, "w") do f + replace(f, "aaa", 'a' => 'z', count=0) + replace(f, "aaa", 'a' => 'z', count=1) + replace(f, "bbb", 'a' => 'z') + replace(f, "aaa", 'a' => 'z') + print(f, "\n") + end + @test read(tempfile, String) == "aaazaabbbzzz\n" + end end @testset "replace many" begin From 46d01ae437d37aa07dd4c7b2a15ffe7f031802a0 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 11 Feb 2023 10:45:32 -0500 Subject: [PATCH 9/9] rm tempfile --- test/strings/util.jl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/strings/util.jl b/test/strings/util.jl index fd237594f4ad5..8b58c2f36d8c4 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -342,14 +342,18 @@ end @test String(take!(buf)) == "aaazaabbbzzz" end let tempfile = tempname() - open(tempfile, "w") do f - replace(f, "aaa", 'a' => 'z', count=0) - replace(f, "aaa", 'a' => 'z', count=1) - replace(f, "bbb", 'a' => 'z') - replace(f, "aaa", 'a' => 'z') - print(f, "\n") + try + open(tempfile, "w") do f + replace(f, "aaa", 'a' => 'z', count=0) + replace(f, "aaa", 'a' => 'z', count=1) + replace(f, "bbb", 'a' => 'z') + replace(f, "aaa", 'a' => 'z') + print(f, "\n") + end + @test read(tempfile, String) == "aaazaabbbzzz\n" + finally + rm(tempfile, force=true) end - @test read(tempfile, String) == "aaazaabbbzzz\n" end end