From 03c073c91ee8f0c25b5970cddc7582c5bd242942 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 14 Feb 2018 21:15:50 +0100 Subject: [PATCH 1/4] Rework replace and replace! Introduce a new replace!(new::Callable, res::T, A::T, count::Union{Nothing,Int}) method which custom types can implement to support all replace and replace! methods automatically, instead of the current replace!(new::Callable, A::T, count::Int). This offers several advantages: - For arrays, instead of copying the input and then replace elements, we can do the copy and replace operations at the same time, which is quite faster for arrays when count=nothing. - For dicts and sets, copying up-front is still faster as long as most original elements are preserved, but for replace(), we can apply replacements directly instead of storing a them in a temporary vector. - When the LHS of a pair contains a singleton type, we can subtract it from the element type of the result, e.g. Union{T,Missing} becomes T. Also simplify the dispatch logic by removing the internal _replace! method in favor of replace!. --- base/set.jl | 121 ++++++++++++++++++++++++++++++++++++--------------- test/sets.jl | 9 ++++ 2 files changed, 94 insertions(+), 36 deletions(-) diff --git a/base/set.jl b/base/set.jl index 5775af5f3fca4..44117a77ceb2a 100644 --- a/base/set.jl +++ b/base/set.jl @@ -574,8 +574,14 @@ _copy_oftype(x::AbstractArray{T}, ::Type{T}) where {T} = copy(x) _copy_oftype(x::AbstractDict{K,V}, ::Type{Pair{K,V}}) where {K,V} = copy(x) _copy_oftype(x::AbstractSet{T}, ::Type{T}) where {T} = copy(x) +_similar_or_copy(x::Any) = similar(x) +_similar_or_copy(x::Any, ::Type{T}) where {T} = similar(x, T) +# Make a copy on construction since it is faster than inserting elements separately +_similar_or_copy(x::Union{AbstractDict,AbstractSet}) = copy(x) +_similar_or_copy(x::Union{AbstractDict,AbstractSet}, ::Type{T}) where {T} = _copy_oftype(x, T) + # to make replace/replace! work for a new container type Cont, only -# replace!(new::Callable, A::Cont; count::Integer=typemax(Int)) +# replace!(new::Callable, res::Cont, A::Cont; count::Integer=typemax(Int)) # has to be implemented """ @@ -600,16 +606,17 @@ julia> replace!(Set([1, 2, 3]), 1=>0) Set([0, 2, 3]) ``` """ -replace!(A, old_new::Pair...; count::Integer=typemax(Int)) = _replace!(A, count, old_new) +replace!(A, old_new::Pair...; count::Union{Integer,Nothing}=nothing) = + replace!(A, A, count, old_new) -function _replace!(A, count::Integer, old_new::Tuple{Vararg{Pair}}) +function replace!(res, A, count::Union{Integer,Nothing}, old_new::Tuple{Vararg{Pair}}) @inline function new(x) for o_n in old_new isequal(first(o_n), x) && return last(o_n) end return x # no replace end - replace!(new, A, count=count) + replace!(new, res, A, count) end """ @@ -630,7 +637,7 @@ julia> replace!(isodd, A, 0, count=2) 1 ``` """ -replace!(pred::Callable, A, new; count::Integer=typemax(Int)) = +replace!(pred::Callable, A, new; count::Union{Integer,Nothing}=nothing) = replace!(x -> ifelse(pred(x), new, x), A, count=count) """ @@ -661,9 +668,14 @@ Set([6, 12]) ``` """ function replace!(new::Callable, A::Union{AbstractArray,AbstractDict,AbstractSet}; - count::Integer=typemax(Int)) - count < 0 && throw(DomainError(count, "`count` must not be negative")) - count != 0 && _replace!(new, A, min(count, typemax(Int)) % Int) + count::Union{Integer,Nothing}=nothing) + if count === nothing + replace!(new, A, A, nothing) + elseif count < 0 + throw(DomainError(count, "`count` must not be negative")) + elseif count != 0 + replace!(new, A, A, min(count, typemax(Int)) % Int) + end A end @@ -686,16 +698,33 @@ julia> replace([1, 2, 1, 3], 1=>0, 2=>4, count=2) 3 ``` """ -function replace(A, old_new::Pair...; count::Integer=typemax(Int)) +function replace(A, old_new::Pair...; count::Union{Integer,Nothing}=nothing) V = promote_valuetype(old_new...) - T = promote_type(eltype(A), V) - _replace!(_copy_oftype(A, T), count, old_new) + if count isa Nothing + T = promote_type(subtract_singletontype(eltype(A), old_new...), V) + replace!(_similar_or_copy(A, T), A, nothing, old_new) + else + U = promote_type(eltype(A), V) + replace!(_similar_or_copy(A, U), A, min(count, typemax(Int)) % Int, old_new) + end end promote_valuetype(x::Pair{K, V}) where {K, V} = V promote_valuetype(x::Pair{K, V}, y::Pair...) where {K, V} = promote_type(V, promote_valuetype(y...)) +# Subtract singleton types which are going to be replaced +@pure issingletontype(::Type{T}) where {T} = isdefined(T, :instance) +function subtract_singletontype(::Type{T}, x::Pair{K}) where {T, K} + if issingletontype(K) # singleton type + Core.Compiler.typesubtract(T, K) + else + T + end +end +subtract_singletontype(::Type{T}, x::Pair{K}, y::Pair...) where {T, K} = + subtract_singletontype(subtract_singletontype(T, y...), x) + """ replace(pred::Function, A, new; [count::Integer]) @@ -713,9 +742,10 @@ julia> replace(isodd, [1, 2, 3, 1], 0, count=2) 1 ``` """ -function replace(pred::Callable, A, new; count::Integer=typemax(Int)) +function replace(pred::Callable, A, new; count::Union{Integer,Nothing}=nothing) T = promote_type(eltype(A), typeof(new)) - replace!(pred, _copy_oftype(A, T), new, count=count) + replace!(x -> ifelse(pred(x), new, x), _similar_or_copy(A, T), A, + count === nothing ? nothing : min(count, typemax(Int)) % Int) end """ @@ -742,7 +772,9 @@ Dict{Int64,Int64} with 2 entries: 1 => 3 ``` """ -replace(new::Callable, A; count::Integer=typemax(Int)) = replace!(new, copy(A), count=count) +replace(new::Callable, A; count::Union{Integer,Nothing}=nothing) = + replace!(new, _similar_or_copy(A), A, + count === nothing ? nothing : min(count, typemax(Int)) % Int) # Handle ambiguities replace!(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b))) @@ -757,36 +789,53 @@ replace(a::AbstractString, b::Pair, c::Pair) = throw(MethodError(replace, (a, b, askey(k, ::AbstractDict) = k.first askey(k, ::AbstractSet) = k -function _replace!(new::Callable, A::Union{AbstractDict,AbstractSet}, count::Int) - repl = Pair{eltype(A),eltype(A)}[] +function replace!(new::Callable, res::T, A::T, + count::Union{Int,Nothing}) where T<:Union{AbstractDict,AbstractSet} c = 0 - for x in A - y = new(x) - if x !== y - push!(repl, x => y) - c += 1 + if res === A + repl = Pair{eltype(A),eltype(A)}[] + for x in A + y = new(x) + if x !== y + push!(repl, x => y) + c += 1 + end + c == count && break + end + for oldnew in repl + pop!(res, askey(first(oldnew), res)) + end + for oldnew in repl + push!(res, last(oldnew)) + end + else + for x in A + y = new(x) + if x !== y + pop!(res, askey(x, res)) + push!(res, y) + c += 1 + end + c == count && break end - c == count && break - end - for oldnew in repl - pop!(A, askey(first(oldnew), A)) - end - for oldnew in repl - push!(A, last(oldnew)) end + res end -### AbstractArray +### replace! for AbstractArray -function _replace!(new::Callable, A::AbstractArray, count::Int) +function replace!(new::Callable, res::AbstractArray, A::AbstractArray, + count::Union{Int,Nothing}) c = 0 for i in eachindex(A) @inbounds Ai = A[i] - y = new(Ai) - if Ai !== y - @inbounds A[i] = y - c += 1 + if count === nothing || c < count + y = new(Ai) + @inbounds res[i] = y + c += (Ai !== y) + else + @inbounds res[i] = Ai end - c == count && break end -end + res +end \ No newline at end of file diff --git a/test/sets.jl b/test/sets.jl index 554cb4c2d03dc..5aae8fdea67e6 100644 --- a/test/sets.jl +++ b/test/sets.jl @@ -548,6 +548,15 @@ end x = @inferred replace(x -> x > 1, [1, 2], missing) @test isequal(x, [1, missing]) && x isa Vector{Union{Int, Missing}} + x = @inferred replace([1, missing], missing=>2) + @test x == [1, 2] && x isa Vector{Int} + x = @inferred replace([1, missing], missing=>2, count=1) + @test x == [1, 2] && x isa Vector{Union{Int, Missing}} + x = @inferred replace([1, missing], missing=>missing) + @test isequal(x, [1, missing]) && x isa Vector{Union{Int, Missing}} + x = @inferred replace([1, missing], missing=>2, 1=>missing) + @test isequal(x, [missing, 2]) && x isa Vector{Union{Int, Missing}} + # test that isequal is used @test replace([NaN, 1.0], NaN=>0.0) == [0.0, 1.0] @test replace([1, missing], missing=>0) == [1, 0] From 184c1bda07dcec73957292918c6150a6a806af86 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Fri, 23 Mar 2018 09:02:52 +0100 Subject: [PATCH 2/4] alternative with less nothingness --- base/set.jl | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/base/set.jl b/base/set.jl index 44117a77ceb2a..3a6e79688e241 100644 --- a/base/set.jl +++ b/base/set.jl @@ -606,17 +606,17 @@ julia> replace!(Set([1, 2, 3]), 1=>0) Set([0, 2, 3]) ``` """ -replace!(A, old_new::Pair...; count::Union{Integer,Nothing}=nothing) = - replace!(A, A, count, old_new) +replace!(A, old_new::Pair...; count::Integer=typemax(Int)) = + _replace!(A, A, count, old_new) -function replace!(res, A, count::Union{Integer,Nothing}, old_new::Tuple{Vararg{Pair}}) +function _replace!(res, A, count::Integer, old_new::Tuple{Vararg{Pair}}) @inline function new(x) for o_n in old_new isequal(first(o_n), x) && return last(o_n) end return x # no replace end - replace!(new, res, A, count) + replaceimpl!(new, res, A, count=count) end """ @@ -637,7 +637,7 @@ julia> replace!(isodd, A, 0, count=2) 1 ``` """ -replace!(pred::Callable, A, new; count::Union{Integer,Nothing}=nothing) = +replace!(pred::Callable, A, new; count::Integer=typemax(Int)) = replace!(x -> ifelse(pred(x), new, x), A, count=count) """ @@ -667,18 +667,23 @@ julia> replace!(x->2x, Set([3, 6])) Set([6, 12]) ``` """ -function replace!(new::Callable, A::Union{AbstractArray,AbstractDict,AbstractSet}; - count::Union{Integer,Nothing}=nothing) - if count === nothing - replace!(new, A, A, nothing) - elseif count < 0 +function replaceimpl!(new::Callable, res::Union{AbstractArray,AbstractDict,AbstractSet}, + A::Union{AbstractArray,AbstractDict,AbstractSet}; + count::Integer=typemax(Int)) + if count < 0 throw(DomainError(count, "`count` must not be negative")) elseif count != 0 - replace!(new, A, A, min(count, typemax(Int)) % Int) + if count >= length(A) + _replace!(new, res, A, nothing) + else + _replace!(new, res, A, min(count, typemax(Int)) % Int) + end end A end +replace!(new::Callable, A, count::Integer=typemax(Int)) = replaceimpl!(new, A, A, count=count) + """ replace(A, old_new::Pair...; [count::Integer]) @@ -702,10 +707,10 @@ function replace(A, old_new::Pair...; count::Union{Integer,Nothing}=nothing) V = promote_valuetype(old_new...) if count isa Nothing T = promote_type(subtract_singletontype(eltype(A), old_new...), V) - replace!(_similar_or_copy(A, T), A, nothing, old_new) + _replace!(_similar_or_copy(A, T), A, typemax(Int), old_new) else U = promote_type(eltype(A), V) - replace!(_similar_or_copy(A, U), A, min(count, typemax(Int)) % Int, old_new) + _replace!(_similar_or_copy(A, U), A, count, old_new) end end @@ -742,10 +747,9 @@ julia> replace(isodd, [1, 2, 3, 1], 0, count=2) 1 ``` """ -function replace(pred::Callable, A, new; count::Union{Integer,Nothing}=nothing) +function replace(pred::Callable, A, new; count::Integer=typemax(Int)) T = promote_type(eltype(A), typeof(new)) - replace!(x -> ifelse(pred(x), new, x), _similar_or_copy(A, T), A, - count === nothing ? nothing : min(count, typemax(Int)) % Int) + replaceimpl!(x -> ifelse(pred(x), new, x), _similar_or_copy(A, T), A, count=count) end """ @@ -772,9 +776,8 @@ Dict{Int64,Int64} with 2 entries: 1 => 3 ``` """ -replace(new::Callable, A; count::Union{Integer,Nothing}=nothing) = - replace!(new, _similar_or_copy(A), A, - count === nothing ? nothing : min(count, typemax(Int)) % Int) +replace(new::Callable, A; count::Integer=typemax(Int)) = + replaceimpl!(new, _similar_or_copy(A), A, count=count) # Handle ambiguities replace!(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b))) @@ -789,8 +792,8 @@ replace(a::AbstractString, b::Pair, c::Pair) = throw(MethodError(replace, (a, b, askey(k, ::AbstractDict) = k.first askey(k, ::AbstractSet) = k -function replace!(new::Callable, res::T, A::T, - count::Union{Int,Nothing}) where T<:Union{AbstractDict,AbstractSet} +function _replace!(new::Callable, res::T, A::T, + count::Union{Int,Nothing}) where T<:Union{AbstractDict,AbstractSet} c = 0 if res === A repl = Pair{eltype(A),eltype(A)}[] @@ -824,12 +827,12 @@ end ### replace! for AbstractArray -function replace!(new::Callable, res::AbstractArray, A::AbstractArray, +function _replace!(new::Callable, res::AbstractArray, A::AbstractArray, count::Union{Int,Nothing}) c = 0 for i in eachindex(A) @inbounds Ai = A[i] - if count === nothing || c < count + if count === nothing c < count y = new(Ai) @inbounds res[i] = y c += (Ai !== y) @@ -838,4 +841,4 @@ function replace!(new::Callable, res::AbstractArray, A::AbstractArray, end end res -end \ No newline at end of file +end From 06624abfc822f0bbed54534976e60cad38a92ae6 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sat, 24 Mar 2018 13:09:04 +0100 Subject: [PATCH 3/4] More fixes --- base/set.jl | 74 ++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/base/set.jl b/base/set.jl index 3a6e79688e241..3eaf49a6c8423 100644 --- a/base/set.jl +++ b/base/set.jl @@ -564,6 +564,11 @@ convert(::Type{T}, s::AbstractSet) where {T<:AbstractSet} = T(s) ## replace/replace! ## +function check_count(count::Integer) + count < 0 && throw(DomainError(count, "`count` must not be negative (got $count)")) + return min(count, typemax(Int)) % Int +end + # TODO: use copy!, which is currently unavailable from here since it is defined in Future _copy_oftype(x, ::Type{T}) where {T} = copyto!(similar(x, T), x) # TODO: use similar() once deprecation is removed and it preserves keys @@ -581,7 +586,7 @@ _similar_or_copy(x::Union{AbstractDict,AbstractSet}) = copy(x) _similar_or_copy(x::Union{AbstractDict,AbstractSet}, ::Type{T}) where {T} = _copy_oftype(x, T) # to make replace/replace! work for a new container type Cont, only -# replace!(new::Callable, res::Cont, A::Cont; count::Integer=typemax(Int)) +# _replace!(new::Callable, res::Cont, A::Cont, count::Int) # has to be implemented """ @@ -607,16 +612,16 @@ Set([0, 2, 3]) ``` """ replace!(A, old_new::Pair...; count::Integer=typemax(Int)) = - _replace!(A, A, count, old_new) + replace_pairs!(A, A, check_count(count), old_new) -function _replace!(res, A, count::Integer, old_new::Tuple{Vararg{Pair}}) +function replace_pairs!(res, A, count::Int, old_new::Tuple{Vararg{Pair}}) @inline function new(x) for o_n in old_new isequal(first(o_n), x) && return last(o_n) end return x # no replace end - replaceimpl!(new, res, A, count=count) + _replace!(new, res, A, count) end """ @@ -638,7 +643,7 @@ julia> replace!(isodd, A, 0, count=2) ``` """ replace!(pred::Callable, A, new; count::Integer=typemax(Int)) = - replace!(x -> ifelse(pred(x), new, x), A, count=count) + replace!(x -> ifelse(pred(x), new, x), A, count=check_count(count)) """ replace!(new::Function, A; [count::Integer]) @@ -667,22 +672,8 @@ julia> replace!(x->2x, Set([3, 6])) Set([6, 12]) ``` """ -function replaceimpl!(new::Callable, res::Union{AbstractArray,AbstractDict,AbstractSet}, - A::Union{AbstractArray,AbstractDict,AbstractSet}; - count::Integer=typemax(Int)) - if count < 0 - throw(DomainError(count, "`count` must not be negative")) - elseif count != 0 - if count >= length(A) - _replace!(new, res, A, nothing) - else - _replace!(new, res, A, min(count, typemax(Int)) % Int) - end - end - A -end - -replace!(new::Callable, A, count::Integer=typemax(Int)) = replaceimpl!(new, A, A, count=count) +replace!(new::Callable, A; count::Integer=typemax(Int)) = + _replace!(new, A, A, check_count(count)) """ replace(A, old_new::Pair...; [count::Integer]) @@ -707,10 +698,10 @@ function replace(A, old_new::Pair...; count::Union{Integer,Nothing}=nothing) V = promote_valuetype(old_new...) if count isa Nothing T = promote_type(subtract_singletontype(eltype(A), old_new...), V) - _replace!(_similar_or_copy(A, T), A, typemax(Int), old_new) + replace_pairs!(_similar_or_copy(A, T), A, typemax(Int), old_new) else U = promote_type(eltype(A), V) - _replace!(_similar_or_copy(A, U), A, count, old_new) + replace_pairs!(_similar_or_copy(A, U), A, check_count(count), old_new) end end @@ -719,9 +710,10 @@ promote_valuetype(x::Pair{K, V}, y::Pair...) where {K, V} = promote_type(V, promote_valuetype(y...)) # Subtract singleton types which are going to be replaced -@pure issingletontype(::Type{T}) where {T} = isdefined(T, :instance) +@pure issingletontype(T::DataType) = isdefined(T, :instance) +issingletontype(::Type) = false function subtract_singletontype(::Type{T}, x::Pair{K}) where {T, K} - if issingletontype(K) # singleton type + if issingletontype(K) Core.Compiler.typesubtract(T, K) else T @@ -749,7 +741,7 @@ julia> replace(isodd, [1, 2, 3, 1], 0, count=2) """ function replace(pred::Callable, A, new; count::Integer=typemax(Int)) T = promote_type(eltype(A), typeof(new)) - replaceimpl!(x -> ifelse(pred(x), new, x), _similar_or_copy(A, T), A, count=count) + _replace!(x -> ifelse(pred(x), new, x), _similar_or_copy(A, T), A, check_count(count)) end """ @@ -777,7 +769,7 @@ Dict{Int64,Int64} with 2 entries: ``` """ replace(new::Callable, A; count::Integer=typemax(Int)) = - replaceimpl!(new, _similar_or_copy(A), A, count=count) + _replace!(new, _similar_or_copy(A), A, check_count(count)) # Handle ambiguities replace!(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b))) @@ -786,16 +778,15 @@ replace(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace, (a replace(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b, c))) replace(a::AbstractString, b::Pair, c::Pair) = throw(MethodError(replace, (a, b, c))) - ### replace! for AbstractDict/AbstractSet askey(k, ::AbstractDict) = k.first askey(k, ::AbstractSet) = k function _replace!(new::Callable, res::T, A::T, - count::Union{Int,Nothing}) where T<:Union{AbstractDict,AbstractSet} + count::Int) where T<:Union{AbstractDict,AbstractSet} c = 0 - if res === A + if res === A # cannot replace elements while iterating over A repl = Pair{eltype(A),eltype(A)}[] for x in A y = new(x) @@ -827,17 +818,24 @@ end ### replace! for AbstractArray -function _replace!(new::Callable, res::AbstractArray, A::AbstractArray, - count::Union{Int,Nothing}) +function _replace!(new::Callable, res::AbstractArray, A::AbstractArray, count::Int) c = 0 - for i in eachindex(A) - @inbounds Ai = A[i] - if count === nothing c < count + if count >= length(A) # simpler loop allows for SIMD + for i in eachindex(A) + @inbounds Ai = A[i] y = new(Ai) @inbounds res[i] = y - c += (Ai !== y) - else - @inbounds res[i] = Ai + end + else + for i in eachindex(A) + @inbounds Ai = A[i] + if c < count + y = new(Ai) + @inbounds res[i] = y + c += (Ai !== y) + else + @inbounds res[i] = Ai + end end end res From 1b3f5ba06b2c4afac190a85aede32533d51d545e Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 11 Apr 2018 15:17:10 +0200 Subject: [PATCH 4/4] Mention computation of type in docstring --- base/set.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/base/set.jl b/base/set.jl index 3eaf49a6c8423..0b20134003ba7 100644 --- a/base/set.jl +++ b/base/set.jl @@ -682,6 +682,14 @@ Return a copy of collection `A` where, for each pair `old=>new` in `old_new`, all occurrences of `old` are replaced by `new`. Equality is determined using [`isequal`](@ref). If `count` is specified, then replace at most `count` occurrences in total. + +The element type of the result is chosen using promotion (see [`promote_type`](@ref)) +based on the element type of `A` and on the types of the `new` values in pairs. +If `count` is omitted and the element type of `A` is a `Union`, the element type +of the result will not include singleton types which are replaced with values of +a different type: for example, `Union{T,Missing}` will become `T` if `missing` is +replaced. + See also [`replace!`](@ref). # Examples @@ -692,6 +700,11 @@ julia> replace([1, 2, 1, 3], 1=>0, 2=>4, count=2) 4 1 3 + +julia> replace([1, missing], missing=>0) +2-element Array{Int64,1}: + 1 + 0 ``` """ function replace(A, old_new::Pair...; count::Union{Integer,Nothing}=nothing)