From f38a7348fc724d7124659ebbaec710357f3c04ef Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Tue, 27 Feb 2018 13:16:45 -0600 Subject: [PATCH] More thorough aliasing detection in nonscalar copy, broadcast and assignment (#25890) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit puts together the beginnings of an "interface" for detecting and avoiding aliasing between arrays before performing some mutating operation on one of them. `A′ = unalias(dest, A)` is the main entry point; it checks to see if `dest` and `A` might alias one another, and then either returns `A` or a copy of `A`. When it returns a copy of `A`, it absolutely must preserve the same type. As such, this function calls an internal `unaliascopy` function, which defaults to `copy` but ensures it returns the same type. `mightalias(A, B)` is a quick and conservative check to see if two arrays alias eachother. Instead of requiring every array to know about every other array type, it simply asks both arrays for their `dataids` and then looks for an empty intersection. Finally, `dataids(A)` returns a tuple of `UInt`s that describe something about the region of memory the array occupies. It defaults to simply `(objectid(A),)`. A custom implementation for an array with multiple fields of arrays would look something like `(dataids(A.a)..., dataids(A.b)..., ...)`. Fixes #21693, fixes #15209, and a pre-req to get tests passing for #24368 (since we had spot checks for `Array` aliasing, but moving to broadcast means we need a slightly more generic solution). These functions remain un-exported for now since I'd like us to use them a bit more before we officially support them for all custom arrays. --- base/abstractarray.jl | 104 +++++++++++++++++++++ base/array.jl | 16 ++-- base/broadcast.jl | 16 +++- base/multidimensional.jl | 85 +++++++++++++---- base/reinterpretarray.jl | 1 + base/reshapedarray.jl | 3 + base/subarray.jl | 45 ++++----- stdlib/LinearAlgebra/src/adjtrans.jl | 3 + stdlib/LinearAlgebra/test/adjtrans.jl | 13 +++ stdlib/Serialization/src/Serialization.jl | 25 +---- stdlib/SparseArrays/src/higherorderfns.jl | 12 ++- stdlib/SparseArrays/src/sparsematrix.jl | 5 + stdlib/SparseArrays/src/sparsevector.jl | 4 + stdlib/SparseArrays/test/higherorderfns.jl | 70 +++++++++++++- test/arrayops.jl | 77 ++++++++++++++- 15 files changed, 395 insertions(+), 84 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index ff81756a8cc2f..2ca029d8b2d16 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1047,6 +1047,110 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) whe r end +""" + parent(A) + +Returns the "parent array" of an array view type (e.g., `SubArray`), or the array itself if +it is not a view. + +# Examples +```jldoctest +julia> A = [1 2; 3 4] +2×2 Array{Int64,2}: + 1 2 + 3 4 + +julia> V = view(A, 1:2, :) +2×2 view(::Array{Int64,2}, 1:2, :) with eltype Int64: + 1 2 + 3 4 + +julia> parent(V) +2×2 Array{Int64,2}: + 1 2 + 3 4 +``` +""" +parent(a::AbstractArray) = a + +## rudimentary aliasing detection ## +""" + Base.unalias(dest, A) + +Return either `A` or a copy of `A` in a rough effort to prevent modifications to `dest` from +affecting the returned object. No guarantees are provided. + +Custom arrays that wrap or use fields containing arrays that might alias against other +external objects should provide a [`Base.dataids`](@ref) implementation. + +This function must return an object of exactly the same type as `A` for performance and type +stability. Mutable custom arrays for which [`copy(A)`](@ref) is not `typeof(A)` should +provide a [`Base.unaliascopy`](@ref) implementation. + +See also [`Base.mightalias`](@ref). +""" +unalias(dest, A::AbstractArray) = mightalias(dest, A) ? unaliascopy(A) : A +unalias(dest, A::AbstractRange) = A +unalias(dest, A) = A + +""" + Base.unaliascopy(A) + +Make a preventative copy of `A` in an operation where `A` [`Base.mightalias`](@ref) against +another array in order to preserve consistent semantics as that other array is mutated. + +This must return an object of the same type as `A` to preserve optimal performance in the +much more common case where aliasing does not occur. By default, +`unaliascopy(A::AbstractArray)` will attempt to use [`copy(A)`](@ref), but in cases where +`copy(A)` is not a `typeof(A)`, then the array should provide a custom implementation of +`Base.unaliascopy(A)`. +""" +unaliascopy(A::Array) = copy(A) +unaliascopy(A::AbstractArray)::typeof(A) = (@_noinline_meta; _unaliascopy(A, copy(A))) +_unaliascopy(A::T, C::T) where {T} = C +_unaliascopy(A, C) = throw(ArgumentError(""" + an array of type `$(typeof(A).name)` shares memory with another argument and must + make a preventative copy of itself in order to maintain consistent semantics, + but `copy(A)` returns a new array of type `$(typeof(C))`. To fix, implement: + `Base.unaliascopy(A::$(typeof(A).name))::typeof(A)`""")) +unaliascopy(A) = A + +""" + Base.mightalias(A::AbstractArray, B::AbstractArray) + +Perform a conservative test to check if arrays `A` and `B` might share the same memory. + +By default, this simply checks if either of the arrays reference the same memory +regions, as identified by their [`Base.dataids`](@ref). +""" +mightalias(A::AbstractArray, B::AbstractArray) = !_isdisjoint(dataids(A), dataids(B)) +mightalias(x, y) = false + +_isdisjoint(as::Tuple{}, bs::Tuple{}) = true +_isdisjoint(as::Tuple{}, bs::Tuple{Any}) = true +_isdisjoint(as::Tuple{}, bs::Tuple) = true +_isdisjoint(as::Tuple{Any}, bs::Tuple{}) = true +_isdisjoint(as::Tuple{Any}, bs::Tuple{Any}) = as[1] != bs[1] +_isdisjoint(as::Tuple{Any}, bs::Tuple) = !(as[1] in bs) +_isdisjoint(as::Tuple, bs::Tuple{}) = true +_isdisjoint(as::Tuple, bs::Tuple{Any}) = !(bs[1] in as) +_isdisjoint(as::Tuple, bs::Tuple) = !(as[1] in bs) && _isdisjoint(tail(as), bs) + +""" + Base.dataids(A::AbstractArray) + +Return a tuple of `UInt`s that represent the mutable data segments of an array. + +Custom arrays that would like to opt-in to aliasing detection of their component +parts can specialize this method to return the concatenation of the `dataids` of +their component parts. A typical definition for an array that wraps a parent is +`Base.dataids(C::CustomArray) = dataids(C.parent)`. +""" +dataids(A::AbstractArray) = (UInt(objectid(A)),) +dataids(A::Array) = (UInt(pointer(A)),) +dataids(::AbstractRange) = () +dataids(x) = () + ## get (getindex with a default value) ## RangeVecIntList{A<:AbstractVector{Int}} = Union{Tuple{Vararg{Union{AbstractRange, AbstractVector{Int}}}}, diff --git a/base/array.jl b/base/array.jl index 70f721e6b7be7..e5c191667d2a4 100644 --- a/base/array.jl +++ b/base/array.jl @@ -692,8 +692,8 @@ function setindex! end # These are redundant with the abstract fallbacks but needed for bootstrap function setindex!(A::Array, x, I::AbstractVector{Int}) @_propagate_inbounds_meta - A === I && (I = copy(I)) - for i in I + I′ = unalias(A, I) + for i in I′ A[i] = x end return A @@ -701,15 +701,11 @@ end function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int}) @_propagate_inbounds_meta @boundscheck setindex_shape_check(X, length(I)) + X′ = unalias(A, X) + I′ = unalias(A, I) count = 1 - if X === A - X = copy(X) - I===A && (I = X::typeof(I)) - elseif I === A - I = copy(I) - end - for i in I - @inbounds x = X[count] + for i in I′ + @inbounds x = X′[count] A[i] = x count += 1 end diff --git a/base/broadcast.jl b/base/broadcast.jl index 131e468ede19a..f2c550c65aba9 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -5,7 +5,7 @@ module Broadcast using .Base.Cartesian using .Base: Indices, OneTo, linearindices, tail, to_shape, _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache, - isoperator, promote_typejoin + isoperator, promote_typejoin, unalias import .Base: broadcast, broadcast! export BroadcastStyle, broadcast_indices, broadcast_similar, broadcast_getindex, broadcast_setindex!, dotview, @__dot__ @@ -472,13 +472,23 @@ end return dest end +# For broadcasted assignments like `broadcast!(f, A, ..., A, ...)`, where `A` +# appears on both the LHS and the RHS of the `.=`, then we know we're only +# going to make one pass through the array, and even though `A` is aliasing +# against itself, the mutations won't affect the result as the indices on the +# LHS and RHS will always match. This is not true in general, but with the `.op=` +# syntax it's fairly common for an argument to be `===` a source. +broadcast_unalias(dest, src) = dest === src ? src : unalias(dest, src) + # This indirection allows size-dependent implementations. @inline function _broadcast!(f, C, A, Bs::Vararg{Any,N}) where N shape = broadcast_indices(C) @boundscheck check_broadcast_indices(shape, A, Bs...) - keeps, Idefaults = map_newindexer(shape, A, Bs) + A′ = broadcast_unalias(C, A) + Bs′ = map(B->broadcast_unalias(C, B), Bs) + keeps, Idefaults = map_newindexer(shape, A′, Bs′) iter = CartesianIndices(shape) - _broadcast!(f, C, keeps, Idefaults, A, Bs, Val(N), iter) + _broadcast!(f, C, keeps, Idefaults, A′, Bs′, Val(N), iter) return C end diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 5147f7f9f058d..ebb5cf6347a97 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -675,21 +675,19 @@ _iterable(v) = Iterators.repeated(v) @generated function _unsafe_setindex!(::IndexStyle, A::AbstractArray, x, I::Union{Real,AbstractArray}...) N = length(I) quote - X = _iterable(x) - @nexprs $N d->(I_d = I[d]) + x′ = _iterable(unalias(A, x)) + @nexprs $N d->(I_d = unalias(A, I[d])) idxlens = @ncall $N index_lengths I - @ncall $N setindex_shape_check X (d->idxlens[d]) - Xs = start(X) + @ncall $N setindex_shape_check x′ (d->idxlens[d]) + xs = start(x′) @inbounds @nloops $N i d->I_d begin - v, Xs = next(X, Xs) + v, xs = next(x′, xs) @ncall $N setindex! A v i end A end end -## - # see discussion in #18364 ... we try not to widen type of the resulting array # from cumsum or cumprod, but in some cases (+, Bool) we may not have a choice. rcum_promote_type(op, ::Type{T}, ::Type{S}) where {T,S<:Number} = promote_op(op, T, S) @@ -1091,6 +1089,57 @@ end ### from abstractarray.jl +# In the common case where we have two views into the same parent, aliasing checks +# are _much_ easier and more important to get right +function mightalias(A::SubArray{T,<:Any,P}, B::SubArray{T,<:Any,P}) where {T,P} + if !_parentsmatch(A.parent, B.parent) + # We cannot do any better than the usual dataids check + return !_isdisjoint(dataids(A), dataids(B)) + end + # Now we know that A.parent === B.parent. This means that the indices of A + # and B are the same length and indexing into the same dimensions. We can + # just walk through them and check for overlaps: O(ndims(A)). We must finally + # ensure that the indices don't alias with either parent + return _indicesmightoverlap(A.indices, B.indices) || + !_isdisjoint(dataids(A.parent), _splatmap(dataids, B.indices)) || + !_isdisjoint(dataids(B.parent), _splatmap(dataids, A.indices)) +end +_parentsmatch(A::AbstractArray, B::AbstractArray) = A === B +# Two reshape(::Array)s of the same size aren't `===` because they have different headers +_parentsmatch(A::Array, B::Array) = pointer(A) == pointer(B) && size(A) == size(B) + +_indicesmightoverlap(A::Tuple{}, B::Tuple{}) = true +_indicesmightoverlap(A::Tuple{}, B::Tuple) = error("malformed subarray") +_indicesmightoverlap(A::Tuple, B::Tuple{}) = error("malformed subarray") +# For ranges, it's relatively cheap to construct the intersection +@inline function _indicesmightoverlap(A::Tuple{AbstractRange, Vararg{Any}}, B::Tuple{AbstractRange, Vararg{Any}}) + !isempty(intersect(A[1], B[1])) ? _indicesmightoverlap(tail(A), tail(B)) : false +end +# But in the common AbstractUnitRange case, there's an even faster shortcut +@inline function _indicesmightoverlap(A::Tuple{AbstractUnitRange, Vararg{Any}}, B::Tuple{AbstractUnitRange, Vararg{Any}}) + max(first(A[1]),first(B[1])) <= min(last(A[1]),last(B[1])) ? _indicesmightoverlap(tail(A), tail(B)) : false +end +# And we can check scalars against eachother and scalars against arrays quite easily +@inline _indicesmightoverlap(A::Tuple{Real, Vararg{Any}}, B::Tuple{Real, Vararg{Any}}) = + A[1] == B[1] ? _indicesmightoverlap(tail(A), tail(B)) : false +@inline _indicesmightoverlap(A::Tuple{Real, Vararg{Any}}, B::Tuple{AbstractArray, Vararg{Any}}) = + A[1] in B[1] ? _indicesmightoverlap(tail(A), tail(B)) : false +@inline _indicesmightoverlap(A::Tuple{AbstractArray, Vararg{Any}}, B::Tuple{Real, Vararg{Any}}) = + B[1] in A[1] ? _indicesmightoverlap(tail(A), tail(B)) : false +# And small arrays are quick, too +@inline function _indicesmightoverlap(A::Tuple{AbstractArray, Vararg{Any}}, B::Tuple{AbstractArray, Vararg{Any}}) + if length(A[1]) == 1 + return A[1][1] in B[1] ? _indicesmightoverlap(tail(A), tail(B)) : false + elseif length(B[1]) == 1 + return B[1][1] in A[1] ? _indicesmightoverlap(tail(A), tail(B)) : false + else + # But checking larger arrays requires O(m*n) and is too much work + return true + end +end +# And in general, checking the intersection is too much work +_indicesmightoverlap(A::Tuple{Any, Vararg{Any}}, B::Tuple{Any, Vararg{Any}}) = true + """ fill!(A, x) @@ -1161,33 +1210,35 @@ julia> y copyto!(dest, src) function copyto!(dest::AbstractArray{T,N}, src::AbstractArray{T,N}) where {T,N} - @boundscheck checkbounds(dest, axes(src)...) - for I in eachindex(IndexStyle(src,dest), src) - @inbounds dest[I] = src[I] + checkbounds(dest, axes(src)...) + src′ = unalias(dest, src) + for I in eachindex(IndexStyle(src′,dest), src′) + @inbounds dest[I] = src′[I] end dest end function copyto!(dest::AbstractArray{T1,N}, Rdest::CartesianIndices{N}, - src::AbstractArray{T2,N}, Rsrc::CartesianIndices{N}) where {T1,T2,N} + src::AbstractArray{T2,N}, Rsrc::CartesianIndices{N}) where {T1,T2,N} isempty(Rdest) && return dest if size(Rdest) != size(Rsrc) throw(ArgumentError("source and destination must have same size (got $(size(Rsrc)) and $(size(Rdest)))")) end - @boundscheck checkbounds(dest, first(Rdest)) - @boundscheck checkbounds(dest, last(Rdest)) - @boundscheck checkbounds(src, first(Rsrc)) - @boundscheck checkbounds(src, last(Rsrc)) + checkbounds(dest, first(Rdest)) + checkbounds(dest, last(Rdest)) + checkbounds(src, first(Rsrc)) + checkbounds(src, last(Rsrc)) + src′ = unalias(dest, src) ΔI = first(Rdest) - first(Rsrc) if @generated quote @nloops $N i (n->Rsrc.indices[n]) begin - @inbounds @nref($N,dest,n->i_n+ΔI[n]) = @nref($N,src,i) + @inbounds @nref($N,dest,n->i_n+ΔI[n]) = @nref($N,src′,i) end end else for I in Rsrc - @inbounds dest[I + ΔI] = src[I] + @inbounds dest[I + ΔI] = src′[I] end end dest diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index 81a45af7bc33f..efbec54c1e7e2 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -36,6 +36,7 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N} end parent(a::ReinterpretArray) = a.parent +dataids(a::ReinterpretArray) = dataids(a.parent) function size(a::ReinterpretArray{T,N,S} where {N}) where {T,S} psize = size(a.parent) diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index e7749b9a97960..2c94ea834c8a8 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -180,6 +180,9 @@ parent(A::ReshapedArray) = A.parent parentindices(A::ReshapedArray) = map(s->1:s, size(parent(A))) reinterpret(::Type{T}, A::ReshapedArray, dims::Dims) where {T} = reinterpret(T, parent(A), dims) +unaliascopy(A::ReshapedArray) = typeof(A)(unaliascopy(A.parent), A.dims, A.mi) +dataids(A::ReshapedArray) = dataids(A.parent) + @inline ind2sub_rs(::Tuple{}, i::Int) = i @inline ind2sub_rs(strds, i) = _ind2sub_rs(strds, i - 1) @inline _ind2sub_rs(::Tuple{}, ind) = (ind + 1,) diff --git a/base/subarray.jl b/base/subarray.jl index b93b9f3881550..d975a07a27581 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -59,34 +59,9 @@ similar(V::SubArray, T::Type, dims::Dims) = similar(V.parent, T, dims) sizeof(V::SubArray) = length(V) * sizeof(eltype(V)) -""" - parent(A) - -Returns the "parent array" of an array view type (e.g., `SubArray`), or the array itself if -it is not a view. - -# Examples -```jldoctest -julia> A = [1 2; 3 4] -2×2 Array{Int64,2}: - 1 2 - 3 4 - -julia> V = view(A, 1:2, :) -2×2 view(::Array{Int64,2}, 1:2, :) with eltype Int64: - 1 2 - 3 4 - -julia> parent(V) -2×2 Array{Int64,2}: - 1 2 - 3 4 -``` -""" parent(V::SubArray) = V.parent parentindices(V::SubArray) = V.indices -parent(a::AbstractArray) = a """ parentindices(A) @@ -94,6 +69,24 @@ From an array view `A`, returns the corresponding indices in the parent. """ parentindices(a::AbstractArray) = ntuple(i->OneTo(size(a,i)), ndims(a)) +## Aliasing detection +dataids(A::SubArray) = (dataids(A.parent)..., _splatmap(dataids, A.indices)...) +_splatmap(f, ::Tuple{}) = () +_splatmap(f, t::Tuple) = (f(t[1])..., _splatmap(f, tail(t))...) +unaliascopy(A::SubArray) = typeof(A)(unaliascopy(A.parent), map(unaliascopy, A.indices), A.offset1, A.stride1) + +# When the parent is an Array we can trim the size down a bit. In the future this +# could possibly be extended to any mutable array. +function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{Real,AbstractRange,Array}}},LD} + dest = Array{T}(uninitialized, index_lengths(V.indices...)) + copyto!(dest, V) + SubArray{T,N,A,I,LD}(dest, map(_trimmedindex, V.indices), 0, Int(LD)) +end +# Transform indices to be "dense" +_trimmedindex(i::Real) = oftype(i, 1) +_trimmedindex(i::AbstractUnitRange) = i +_trimmedindex(i::AbstractArray) = oftype(i, reshape(linearindices(i), axes(i))) + ## SubArray creation # We always assume that the dimensionality of the parent matches the number of # indices that end up getting passed to it, so we store the parent as a @@ -135,7 +128,7 @@ julia> A # Note A has changed even though we modified b """ function view(A::AbstractArray, I::Vararg{Any,N}) where {N} @_inline_meta - J = to_indices(A, I) + J = map(i->unalias(A,i), to_indices(A, I)) @boundscheck checkbounds(A, J...) unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...) end diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 45063f983268a..7e9c4c4bfdaaf 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -47,6 +47,9 @@ end Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A) Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A) +Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent) +Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent)) + # wrapping lowercase quasi-constructors """ adjoint(A) diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl index 0d5f3362c4ee5..9dd5b068a5dcd 100644 --- a/stdlib/LinearAlgebra/test/adjtrans.jl +++ b/stdlib/LinearAlgebra/test/adjtrans.jl @@ -447,4 +447,17 @@ end @test adjoint!(b, a) === b end +@testset "aliasing with adjoint and transpose" begin + A = collect(reshape(1:25, 5, 5)) .+ rand.().*im + B = copy(A) + B .= B' + @test B == A' + B = copy(A) + B .= transpose(B) + @test B == transpose(A) + B = copy(A) + B .= B .* B' + @test B == A .* A' +end + end # module TestAdjointTranspose diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl index cd202ab90c6cf..42ec3cd38408b 100644 --- a/stdlib/Serialization/src/Serialization.jl +++ b/stdlib/Serialization/src/Serialization.jl @@ -11,7 +11,7 @@ module Serialization import Base: GMP, Bottom, unsafe_convert, uncompressed_ast import Core: svec, SimpleVector -using Base: ViewIndex, Slice, index_lengths, unwrap_unionall +using Base: unaliascopy, unwrap_unionall using Core.IR export serialize, deserialize, AbstractSerializer, Serializer @@ -274,29 +274,12 @@ function serialize(s::AbstractSerializer, a::Array) end function serialize(s::AbstractSerializer, a::SubArray{T,N,A}) where {T,N,A<:Array} - b = trimmedsubarray(a) + # SubArray's copy only selects the relevant data (and reduces the size) but does not + # preserve the type of the argument. This internal function does both: + b = unaliascopy(a) serialize_any(s, b) end -function trimmedsubarray(V::SubArray{T,N,A}) where {T,N,A<:Array} - dest = Array{eltype(V)}(uninitialized, trimmedsize(V)) - copyto!(dest, V) - _trimmedsubarray(dest, V, (), V.indices...) -end - -trimmedsize(V) = index_lengths(V.indices...) - -function _trimmedsubarray(A, V::SubArray{T,N,P,I,LD}, newindices) where {T,N,P,I,LD} - LD && return SubArray{T,N,P,I,LD}(A, newindices, Base.compute_offset1(A, 1, newindices), 1) - SubArray{T,N,P,I,LD}(A, newindices, 0, 0) -end -_trimmedsubarray(A, V, newindices, index::ViewIndex, indices...) = _trimmedsubarray(A, V, (newindices..., trimmedindex(V.parent, length(newindices)+1, index)), indices...) - -trimmedindex(P, d, i::Real) = oftype(i, 1) -trimmedindex(P, d, i::Colon) = i -trimmedindex(P, d, i::Slice) = i -trimmedindex(P, d, i::AbstractArray) = oftype(i, reshape(linearindices(i), axes(i))) - function serialize(s::AbstractSerializer, ss::String) len = sizeof(ss) if len <= 255 diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl index 4e7cc6a88656a..abbbd9dfb92df 100644 --- a/stdlib/SparseArrays/src/higherorderfns.jl +++ b/stdlib/SparseArrays/src/higherorderfns.jl @@ -1008,12 +1008,14 @@ function broadcast!(f::Tf, dest::SparseVecOrMat, ::SPVM, A::SparseVecOrMat, Bs:: if f isa typeof(identity) && N == 0 && Base.axes(dest) == Base.axes(A) return copyto!(dest, A) end - _aresameshape(dest, A, Bs...) && return _noshapecheck_map!(f, dest, A, Bs...) - Base.Broadcast.check_broadcast_indices(axes(dest), A, Bs...) - fofzeros = f(_zeros_eltypes(A, Bs...)...) + A′ = Base.unalias(dest, A) + Bs′ = map(B->Base.unalias(dest, B), Bs) + _aresameshape(dest, A′, Bs′...) && return _noshapecheck_map!(f, dest, A′, Bs′...) + Base.Broadcast.check_broadcast_indices(axes(dest), A′, Bs′...) + fofzeros = f(_zeros_eltypes(A′, Bs′...)...) fpreszeros = _iszero(fofzeros) - fpreszeros ? _broadcast_zeropres!(f, dest, A, Bs...) : - _broadcast_notzeropres!(f, fofzeros, dest, A, Bs...) + fpreszeros ? _broadcast_zeropres!(f, dest, A′, Bs′...) : + _broadcast_notzeropres!(f, fofzeros, dest, A′, Bs′...) return dest end function broadcast!(f::Tf, dest::SparseVecOrMat, ::SPVM, mixedsrcargs::Vararg{Any,N}) where {Tf,N} diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index 168871503da70..f4b8a8a549e34 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -260,6 +260,11 @@ function copy(ra::ReshapedArray{<:Any,2,<:SparseMatrixCSC}) return SparseMatrixCSC(mS, nS, colptr, rowval, nzval) end +## Alias detection and prevention +using Base: dataids, unaliascopy +Base.dataids(S::SparseMatrixCSC) = (dataids(S.colptr)..., dataids(S.rowval)..., dataids(S.nzval)...) +Base.unaliascopy(S::SparseMatrixCSC) = typeof(S)(S.m, S.n, unaliascopy(S.colptr), unaliascopy(S.rowval), unaliascopy(S.nzval)) + ## Constructors copy(S::SparseMatrixCSC) = diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl index e4d89ab7e442a..da090dbece4b3 100644 --- a/stdlib/SparseArrays/src/sparsevector.jl +++ b/stdlib/SparseArrays/src/sparsevector.jl @@ -92,6 +92,10 @@ similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer) where {TvNew, similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer, n::Integer) where {TvNew,TiNew} = _sparsesimilar(S, TvNew, TiNew, (m, n)) +## Alias detection and prevention +using Base: dataids, unaliascopy +Base.dataids(S::SparseVector) = (dataids(S.nzind)..., dataids(S.nzval)...) +Base.unaliascopy(S::SparseVector) = typeof(S)(S.n, unaliascopy(S.nzind), unaliascopy(S.nzval)) ### Construct empty sparse vector diff --git a/stdlib/SparseArrays/test/higherorderfns.jl b/stdlib/SparseArrays/test/higherorderfns.jl index 536eddb48e657..fdbb120519de9 100644 --- a/stdlib/SparseArrays/test/higherorderfns.jl +++ b/stdlib/SparseArrays/test/higherorderfns.jl @@ -554,4 +554,72 @@ end @test spzeros(1,2) .* spzeros(0,1) == zeros(0,2) end -end # module \ No newline at end of file +@testset "aliasing and indexed assignment or broadcast!" begin + A = sparsevec([0, 0, 1, 1]) + B = sparsevec([1, 1, 0, 0]) + A .+= B + @test A == sparse([1,1,1,1]) + + A = sprandn(10, 10, 0.1) + fA = Array(A) + b = randn(10); + broadcast!(/, A, A, b) + @test A == fA ./ Array(b) + + a = sparse([1,3,5]) + b = sparse([3,1,2]) + a[b] = a + @test a == [3,5,1] + a = sparse([3,2,1]) + a[a] = [4,5,6] + @test a == [6,5,4] + + A = sparse([1,2,3,4]) + V = view(A, A) + @test V == A + V[1] = 2 + @test V == A == [2,2,3,4] + V[1] = 2^30 + @test V == A == [2^30, 2, 3, 4] + + A = sparse([2,1,4,3]) + V = view(A, :) + A[V] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(view(A, :), 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(A, 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + # And broadcasting + a = sparse([1,3,5]) + b = sparse([3,1,2]) + a[b] .= a + @test a == [3,5,1] + a = sparse([3,2,1]) + a[a] .= [4,5,6] + @test a == [6,5,4] + + A = sparse([2,1,4,3]) + V = view(A, :) + A[V] .= (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(view(A, :), 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 + + A = sparse([2,1,4,3]) + R = reshape(A, 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 +end + +end # module diff --git a/test/arrayops.jl b/test/arrayops.jl index fa94995359e3e..4e0118a20808b 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -1015,7 +1015,7 @@ end @test a == [8,3,8] end -@testset "assigning an array into itself" begin +@testset "assigning an array into itself and other aliasing issues" begin a = [1,3,5] b = [3,1,2] a[b] = a @@ -1023,6 +1023,81 @@ end a = [3,2,1] a[a] = [4,5,6] @test a == [6,5,4] + + A = [1,2,3,4] + V = view(A, A) + @test V == A + V[1] = 2 + @test V == A == [2,2,3,4] + V[1] = 2^30 + @test V == A == [2^30, 2, 3, 4] + + A = [2,1,4,3] + V = view(A, :) + A[V] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(view(A, :), 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(A, 2, 2) + A[R] = (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + # And broadcasting + a = [1,3,5] + b = [3,1,2] + a[b] .= a + @test a == [3,5,1] + a = [3,2,1] + a[a] .= [4,5,6] + @test a == [6,5,4] + + A = [2,1,4,3] + V = view(A, :) + A[V] .= (1:4) .+ 2^30 + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(view(A, :), 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 + + A = [2,1,4,3] + R = reshape(A, 2, 2) + A[R] .= reshape((1:4) .+ 2^30, 2, 2) + @test A == [2,1,4,3] .+ 2^30 +end + +@testset "Base.mightalias unit tests" begin + using Base: mightalias + A = rand(5,4) + @test @views mightalias(A[:], A[:]) + @test @views mightalias(A[:,:], A[:,:]) + @test @views mightalias(A[1:2,1:2], A[1:2,1:2]) + @test @views !mightalias(A[3:4,1:2], A[1:2,:]) + @test @views mightalias(A[3,1:1], A) + @test @views mightalias(A[3,1:1], A[:]) + @test @views mightalias(A[3,1:1], A[:,:]) + @test @views mightalias(A, A[3,1:1]) + @test @views mightalias(A[:], A[3,1:1]) + @test @views mightalias(A[:,:], A[3,1:1]) + + B = reshape(A,10,2) + @test mightalias(A, A) + @test mightalias(A, B) + @test mightalias(B, A) + @test @views mightalias(B[:], A[:]) + @test @views mightalias(B[1:2], A[1:2]) + @test @views !mightalias(B[1:end÷2], A[end÷2+1:end]) + + AA = [[1],[2]] + @test @views mightalias(AA, AA[:]) + @test @views mightalias(AA[:], AA[:]) + @test @views mightalias(AA[1:1], AA[1:2]) end @testset "lexicographic comparison" begin