Skip to content

Commit

Permalink
add replace(io, str, patterns...) (#48625)
Browse files Browse the repository at this point in the history
(cherry picked from commit ce1b420)
  • Loading branch information
stevengj authored and KristofferC committed Jul 18, 2023
1 parent 208e928 commit 388c734
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 17 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ New library features
* A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]).
* `printstyled` now supports italic output ([#45164]).
* `parent` and `parentindices` support `SubString`s
* `replace(string, pattern...)` now supports an optional `IO` argument to
write the output to a stream rather than returning a string ([#48625]).

Standard library changes
------------------------
Expand Down
72 changes: 56 additions & 16 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -700,12 +700,11 @@ _free_pat_replacer(x) = nothing
_pat_replacer(x::AbstractChar) = isequal(x)
_pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x)

function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N
count == 0 && return str
# note: leave str untyped here to make it easier for packages like StringViews to hook in
function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N
count < 0 && throw(DomainError(count, "`count` must be non-negative."))
n = 1
e1 = nextind(str, lastindex(str)) # sizeof(str)
i = a = firstindex(str)
e1 = nextind(str, lastindex(str)) # sizeof(str)+1
a = firstindex(str)
patterns = map(p -> _pat_replacer(first(p)), pat_repl)
replaces = map(last, pat_repl)
rs = map(patterns) do p
Expand All @@ -716,21 +715,24 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
r isa Int && (r = r:r) # findnext / performance fix
return r
end
if all(>(e1), map(first, rs))
foreach(_free_pat_replacer, patterns)
return str
end
out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
return e1, patterns, replaces, rs, all(>(e1), map(first, rs))
end

# note: leave str untyped here to make it easier for packages like StringViews to hook in
function _replace_finish(io::IO, str, count::Int,
e1::Int, patterns::Tuple, replaces::Tuple, rs::Tuple)
n = 1
i = a = firstindex(str)
while true
p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ?
r = rs[p]
j, k = first(r), last(r)
j > e1 && break
if i == a || i <= k
# copy out preserved portion
GC.@preserve str unsafe_write(out, pointer(str, i), UInt(j-i))
GC.@preserve str unsafe_write(io, pointer(str, i), UInt(j-i))
# copy out replacement string
_replace(out, replaces[p], str, r, patterns[p])
_replace(io, replaces[p], str, r, patterns[p])
end
if k < j
i = j
Expand All @@ -755,13 +757,39 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
n += 1
end
foreach(_free_pat_replacer, patterns)
write(out, SubString(str, i))
return String(take!(out))
write(io, SubString(str, i))
return io
end

# note: leave str untyped here to make it easier for packages like StringViews to hook in
function _replace_(io::IO, str, pat_repl::NTuple{N, Pair}, count::Int) where N
if count == 0
write(io, str)
return io
end
e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count)
if notfound
foreach(_free_pat_replacer, patterns)
write(io, str)
return io
end
return _replace_finish(io, str, count, e1, patterns, replaces, rs)
end

# note: leave str untyped here to make it easier for packages like StringViews to hook in
function _replace_(str, pat_repl::NTuple{N, Pair}, count::Int) where N
count == 0 && return str
e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count)
if notfound
foreach(_free_pat_replacer, patterns)
return str
end
out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
return String(take!(_replace_finish(out, str, count, e1, patterns, replaces, rs)))
end

"""
replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
replace([io::IO], s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
Search for the given pattern `pat` in `s`, and replace each occurrence with `r`.
If `count` is provided, replace at most `count` occurrences.
Expand All @@ -774,13 +802,21 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then
references in `r` are replaced with the corresponding matched text.
To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`).
The return value is a new string after the replacements. If the `io::IO` argument
is supplied, the transformed string is instead written to `io` (returning `io`).
(For example, this can be used in conjunction with an [`IOBuffer`](@ref) to re-use
a pre-allocated buffer array in-place.)
Multiple patterns can be specified, and they will be applied left-to-right
simultaneously, so only one pattern will be applied to any character, and the
patterns will only be applied to the input text, not the replacements.
!!! compat "Julia 1.7"
Support for multiple patterns requires version 1.7.
!!! compat "Julia 1.10"
The `io::IO` argument requires version 1.10.
# Examples
```jldoctest
julia> replace("Python is a programming language.", "Python" => "Julia")
Expand All @@ -799,8 +835,12 @@ julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a")
"bca"
```
"""
replace(io::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
_replace_(io, String(s), pat_f, Int(count))

replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
replace(String(s), pat_f..., count=count)
_replace_(String(s), pat_f, Int(count))


# TODO: allow transform as the first argument to replace?

Expand Down
2 changes: 1 addition & 1 deletion doc/src/base/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ Base.findlast(::AbstractChar, ::AbstractString)
Base.findprev(::AbstractString, ::AbstractString, ::Integer)
Base.occursin
Base.reverse(::Union{String,SubString{String}})
Base.replace(s::AbstractString, ::Pair...)
Base.replace(::IO, s::AbstractString, ::Pair...)
Base.eachsplit
Base.split
Base.rsplit
Expand Down
22 changes: 22 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,28 @@ end
# Issue 36953
@test replace("abc", "" => "_", count=1) == "_abc"

# tests for io::IO API (in addition to internals exercised above):
let buf = IOBuffer()
replace(buf, "aaa", 'a' => 'z', count=0)
replace(buf, "aaa", 'a' => 'z', count=1)
replace(buf, "bbb", 'a' => 'z')
replace(buf, "aaa", 'a' => 'z')
@test String(take!(buf)) == "aaazaabbbzzz"
end
let tempfile = tempname()
try
open(tempfile, "w") do f
replace(f, "aaa", 'a' => 'z', count=0)
replace(f, "aaa", 'a' => 'z', count=1)
replace(f, "bbb", 'a' => 'z')
replace(f, "aaa", 'a' => 'z')
print(f, "\n")
end
@test read(tempfile, String) == "aaazaabbbzzz\n"
finally
rm(tempfile, force=true)
end
end
end

@testset "replace many" begin
Expand Down

0 comments on commit 388c734

Please sign in to comment.