diff --git a/base/array.jl b/base/array.jl index 32c543ff12638..bec9e91580d50 100644 --- a/base/array.jl +++ b/base/array.jl @@ -637,7 +637,7 @@ function _collect(::Type{T}, itr, isz::SizeUnknown) where T end # make a collection similar to `c` and appropriate for collecting `itr` -_similar_for(c, ::Type{T}, itr, isz, shp) where {T} = similar(c, T) +_similar_for(c, ::Type{T}, itr, isz, shp) where {T} = similar(c, T, shp) _similar_shape(itr, ::SizeUnknown) = nothing _similar_shape(itr, ::HasLength) = length(itr)::Integer @@ -699,6 +699,12 @@ collect(A::AbstractArray) = _collect_indices(axes(A), A) collect_similar(cont, itr) = _collect(cont, itr, IteratorEltype(itr), IteratorSize(itr)) +struct _Allocator{T} + f::T +end +similar(f::_Allocator, ::Type{T}, dims) where {T} = f.f(T, dims) +collect_allocator(f::F, itr) where {F} = _collect(_Allocator(f), itr, IteratorEltype(itr), IteratorSize(itr)) + _collect(cont, itr, ::HasEltype, isz::Union{HasLength,HasShape}) = copyto!(_similar_for(cont, eltype(itr), itr, isz, _similar_shape(itr, isz)), itr) diff --git a/base/broadcast.jl b/base/broadcast.jl index 57eac7f3a094c..bf6a235b7229f 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -234,6 +234,9 @@ Base.similar(::Broadcasted{ArrayConflict}, ::Type{ElType}, dims) where ElType = similar(Array{ElType, length(dims)}, dims) Base.similar(::Broadcasted{ArrayConflict}, ::Type{Bool}, dims) = similar(BitArray, dims) +# As well as the default behavior +Base.similar(::Broadcasted, ::Type{ElType}, dims) where ElType = + similar(Array{ElType, length(dims)}, dims) @inline Base.axes(bc::Broadcasted) = _axes(bc, bc.axes) _axes(::Broadcasted, axes::Tuple) = axes diff --git a/base/deprecated.jl b/base/deprecated.jl index f88a53526aa37..50e62dfdb8f04 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -531,4 +531,8 @@ end @deprecate stat(fd::Integer) stat(RawFD(fd)) +function reducedim_initarray end +const _dep_message_reducedim_initarray = ", these internals have been removed. To customize the array returned by dimensional reductions, implement mapreduce_similar instead" +deprecate(Base, :reducedim_initarray) + # END 1.12 deprecations diff --git a/base/fastmath.jl b/base/fastmath.jl index b82d613f1fc76..b757f16809f4d 100644 --- a/base/fastmath.jl +++ b/base/fastmath.jl @@ -403,11 +403,6 @@ minimum_fast(a; kw...) = Base.reduce(min_fast, a; kw...) maximum_fast(f, a; kw...) = Base.mapreduce(f, max_fast, a; kw...) minimum_fast(f, a; kw...) = Base.mapreduce(f, min_fast, a; kw...) -Base.reducedim_init(f, ::typeof(max_fast), A::AbstractArray, region) = - Base.reducedim_init(f, max, A::AbstractArray, region) -Base.reducedim_init(f, ::typeof(min_fast), A::AbstractArray, region) = - Base.reducedim_init(f, min, A::AbstractArray, region) - maximum!_fast(r::AbstractArray, A::AbstractArray; kw...) = maximum!_fast(identity, r, A; kw...) minimum!_fast(r::AbstractArray, A::AbstractArray; kw...) = diff --git a/base/reduce.jl b/base/reduce.jl index bbfd66e5686ed..c9a7744a2755a 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -245,8 +245,7 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...) @noinline function mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer, blksize::Int) if ifirst == ilast - @inbounds a1 = A[ifirst] - return mapreduce_first(f, op, a1) + throw(AssertionError("mapreduce_impl must not be called with only one element")) elseif ilast - ifirst < blksize # sequential portion @inbounds a1 = A[ifirst] @@ -348,6 +347,7 @@ reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:BitUnsignedSmall} = one(UI reduce_empty(op::BottomRF, ::Type{T}) where {T} = reduce_empty(op.rf, T) reduce_empty(op::MappingRF, ::Type{T}) where {T} = mapreduce_empty(op.f, op.rf, T) +reduce_empty(op::MappingRF{<:Any,<:BottomRF}, ::Type{T}) where {T} = mapreduce_empty(op.f, op.rf.rf, T) reduce_empty(op::FilteringRF, ::Type{T}) where {T} = reduce_empty(op.rf, T) reduce_empty(op::FlipArgs, ::Type{T}) where {T} = reduce_empty(op.f, T) @@ -1335,7 +1335,9 @@ end ## count -_bool(f) = x->f(x)::Bool +_assert_bool(x) = x::Bool +_bool(f) = _assert_bool ∘ f +mapreduce_empty(::ComposedFunction{typeof(_assert_bool), <:Any}, ::typeof(add_sum), itr) = false+false """ count([f=identity,] itr; init=0) -> Integer diff --git a/base/reducedim.jl b/base/reducedim.jl index 4ab786804ff4c..b3379835167c3 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -2,7 +2,7 @@ ## Functions to compute the reduced shape -# for reductions that expand 0 dims to 1 +# for reductions that don't reduce over a zero-length dimension reduced_index(i::OneTo{T}) where {T} = OneTo(one(T)) reduced_index(i::Union{Slice, IdentityUnitRange}) = oftype(i, first(i):first(i)) reduced_index(i::AbstractUnitRange) = @@ -13,9 +13,19 @@ reduced_index for this index type or report this as an issue. """ )) reduced_indices(a::AbstractArrayOrBroadcasted, region) = reduced_indices(axes(a), region) +reduced_indices_array(a::AbstractArrayOrBroadcasted, r::AbstractArray) = reduced_indices_axes(axes(a), axes(r), a, r) -# for reductions that keep 0 dims as 0 +# for reductions that keep 0-length reduced dims as 0-length reduced_indices0(a::AbstractArray, region) = reduced_indices0(axes(a), region) +reduced_indices0(a::AbstractArray, r::AbstractArray) = reduced_indices(a, r) + +function reduced_indices_axes(axs::Tuple, raxs::Tuple, a, r) + length(raxs[1]) in (1,length(axs[1])) || throw(DimensionMismatch("cannot reduce $(size(a)) array into an array of $(size(r))")) + (raxs[1], reduced_indices_axes(tail(axs), tail(raxs), a, r)...) +end +reduced_indices_axes(axs::Tuple{}, raxs::Tuple{}, a, r) = () +reduced_indices_axes(axs::Tuple{}, raxs::Tuple, a, r) = throw(DimensionMismatch("cannot reduce $(size(a)) array into an array of $(size(r))")) +reduced_indices_axes(axs::Tuple, raxs::Tuple{}, a, r) = (reduced_index(axs[1]), reduced_indices_axes(tail(axs), (), a, r)...) function reduced_indices(axs::Indices{N}, region) where N _check_valid_region(region) @@ -27,244 +37,73 @@ function reduced_indices0(axs::Indices{N}, region) where N ntuple(d -> d in region && !isempty(axs[d]) ? reduced_index(axs[d]) : axs[d], Val(N)) end -function _check_valid_region(region) - for d in region - isa(d, Integer) || throw(ArgumentError("reduced dimension(s) must be integers")) - Int(d) < 1 && throw(ArgumentError("region dimension(s) must be ≥ 1, got $d")) - end +# The inverse of reduced_indices +inner_indices(a::AbstractArrayOrBroadcasted, region) = inner_indices(axes(a), region) +function inner_indices(axs::Indices{N}, region) where N + ntuple(d -> d in region ? axs[d] : reduced_index(axs[d]), Val(N)) end -###### Generic reduction functions ##### +# Given an outer and an inner cartesian index, merge them depending on the dims +sliceall(x) = @inbounds x[begin:end] +sliceat(x, y) = @inbounds x[y:y] -## initialization -# initarray! is only called by sum!, prod!, etc. -for (Op, initfun) in ((:(typeof(add_sum)), :zero), (:(typeof(mul_prod)), :one)) - @eval initarray!(a::AbstractArray{T}, ::Any, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a) -end +mergeindices(b, x::CartesianIndices, y::CartesianIndices) = CartesianIndices(map((b,x,y)->sliceall(ifelse(b, x, y)), b, x.indices, y.indices)) +mergeindices(b, x::CartesianIndex, y::CartesianIndices) = mergeindices(b, CartesianIndices(map(sliceat, y.indices, x.I)), CartesianIndices(map(sliceall, y.indices))) +mergeindices(b, x::CartesianIndex, y::CartesianIndex) = CartesianIndex(map((b,x,y)->ifelse(b, x, y), b, x.I, y.I)) -initarray!(a::AbstractArray{T}, f, ::Union{typeof(min),typeof(max),typeof(_extrema_rf)}, - init::Bool, src::AbstractArray) where {T} = (init && mapfirst!(f, a, src); a) - -for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false)) - @eval initarray!(a::AbstractArray, ::Any, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a) -end - -# reducedim_initarray is called by -reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), init) -reducedim_initarray(A::AbstractArrayOrBroadcasted, region, init::T) where {T} = reducedim_initarray(A, region, init, T) - -# TODO: better way to handle reducedim initialization -# -# The current scheme is basically following Steven G. Johnson's original implementation -# -promote_union(T::Union) = promote_type(promote_union(T.a), promote_union(T.b)) -promote_union(T) = T - -_realtype(::Type{<:Complex}) = Real -_realtype(::Type{Complex{T}}) where T<:Real = T -_realtype(T::Type) = T -_realtype(::Union{typeof(abs),typeof(abs2)}, T) = _realtype(T) -_realtype(::Any, T) = T - -function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::AbstractArray, region) - _reducedim_init(f, op, zero, sum, A, region) -end -function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::AbstractArray, region) - _reducedim_init(f, op, one, prod, A, region) -end -function _reducedim_init(f, op, fv, fop, A, region) - T = _realtype(f, promote_union(eltype(A))) - if T !== Any && applicable(zero, T) - x = f(zero(T)) - z = op(fv(x), fv(x)) - Tr = z isa T ? T : typeof(z) - else - z = fv(fop(f, A)) - Tr = typeof(z) - end - return reducedim_initarray(A, region, z, Tr) -end - -# initialization when computing minima and maxima requires a little care -for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min, :(-Inf), :typemin)) - @eval function reducedim_init(f, op::typeof($f1), A::AbstractArray, region) - # First compute the reduce indices. This will throw an ArgumentError - # if any region is invalid - ri = reduced_indices(A, region) - - # Next, throw if reduction is over a region with length zero - any(i -> isempty(axes(A, i)), region) && _empty_reduce_error() - - # Make a view of the first slice of the region - A1 = view(A, ri...) - - if isempty(A1) - # If the slice is empty just return non-view version as the initial array - return map(f, A1) - else - # otherwise use the min/max of the first slice as initial value - v0 = mapreduce(f, $f2, A1) - - T = _realtype(f, promote_union(eltype(A))) - Tr = v0 isa T ? T : typeof(v0) - - # but NaNs, missing and unordered values need to be avoided as initial values - if v0 isa Number && isnan(v0) - # v0 is NaN - v0 = oftype(v0, $initval) - elseif isunordered(v0) - # v0 is missing or a third-party unordered value - Tnm = nonmissingtype(Tr) - if Tnm <: Union{BitInteger, IEEEFloat, BigFloat} - v0 = $typeextreme(Tnm) - elseif !all(isunordered, A1) - v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1)) - end - end - # v0 may have changed type. - Tr = v0 isa T ? T : typeof(v0) - - return reducedim_initarray(A, region, v0, Tr) - end - end -end - -function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray, region) - # First compute the reduce indices. This will throw an ArgumentError - # if any region is invalid - ri = reduced_indices(A, region) - - # Next, throw if reduction is over a region with length zero - any(i -> isempty(axes(A, i)), region) && _empty_reduce_error() - - # Make a view of the first slice of the region - A1 = view(A, ri...) - - isempty(A1) && return map(f, A1) - # use the max/min of the first slice as initial value for non-empty cases - v0 = reverse(mapreduce(f, op, A1)) # turn minmax to maxmin - - T = _realtype(f.f, promote_union(eltype(A))) - Tmin = v0[1] isa T ? T : typeof(v0[1]) - Tmax = v0[2] isa T ? T : typeof(v0[2]) - - # but NaNs and missing need to be avoided as initial values - if v0[1] isa Number && isnan(v0[1]) - # v0 is NaN - v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf) - elseif isunordered(v0[1]) - # v0 is missing or a third-party unordered value - Tminnm = nonmissingtype(Tmin) - Tmaxnm = nonmissingtype(Tmax) - if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} && - Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat} - v0 = (typemax(Tminnm), typemin(Tmaxnm)) - elseif !all(isunordered, A1) - v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1))) - end +function _check_valid_region(region) + for d in region + isa(d, Integer) || throw(ArgumentError("reduced dimension(s) must be integers")) + Int(d) < 1 && throw(ArgumentError("region dimension(s) must be ≥ 1, got $d")) end - # v0 may have changed type. - Tmin = v0[1] isa T ? T : typeof(v0[1]) - Tmax = v0[2] isa T ? T : typeof(v0[2]) - - return reducedim_initarray(A, region, v0, Tuple{Tmin,Tmax}) end -reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} = - reducedim_initarray(A, region, zero(f(zero(T))), _realtype(f, T)) - -reducedim_init(f, op::typeof(&), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, true) -reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reducedim_initarray(A, region, false) - -# specialize to make initialization more efficient for common cases - -let - BitIntFloat = Union{BitInteger, IEEEFloat} - T = Union{ - Any[AbstractArray{t} for t in uniontypes(BitIntFloat)]..., - Any[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...} - - global function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region) - z = zero(f(zero(eltype(A)))) - reducedim_initarray(A, region, op(z, z)) - end - global function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::T, region) - u = one(f(one(eltype(A)))) - reducedim_initarray(A, region, op(u, u)) - end -end +reducedim1(R, A) = length(axes1(R)) == 1 ## generic (map)reduction has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear() has_fast_linear_indexing(a::AbstractVector) = true -function check_reducedims(R, A) - # Check whether R has compatible dimensions w.r.t. A for reduction - # +_linear_reduction_length(A, Rax) = _linear_reduction_length(IndexStyle(A), A, Rax) +_linear_reduction_length(_, _, _) = 0 +function _linear_reduction_length(::IndexLinear, A, Rax) # It returns an integer value (useful for choosing implementation) # - If it reduces only along leading dimensions, e.g. sum(A, dims=1) or sum(A, dims=(1,2)), # it returns the length of the leading slice. For the two examples above, # it will be size(A, 1) or size(A, 1) * size(A, 2). # - Otherwise, e.g. sum(A, dims=2) or sum(A, dims=(1,3)), it returns 0. # - ndims(R) <= ndims(A) || throw(DimensionMismatch("cannot reduce $(ndims(A))-dimensional array to $(ndims(R)) dimensions")) + Aax = axes(A) lsiz = 1 - had_nonreduc = false - for i = 1:ndims(A) - Ri, Ai = axes(R, i), axes(A, i) + for i = 1:length(Aax) + Ri, Ai = get(Rax, i, OneTo(1)), Aax[i] sRi, sAi = length(Ri), length(Ai) if sRi == 1 - if sAi > 1 - if had_nonreduc - lsiz = 0 # to reduce along i, but some previous dimensions were non-reducing - else - lsiz *= sAi # if lsiz was set to zero, it will stay to be zero - end - end + lsiz *= sAi else - Ri == Ai || throw(DimensionMismatch("reduction on array with indices $(axes(A)) with output with indices $(axes(R))")) - had_nonreduc = true + return 0 end end return lsiz end -""" -Extract first entry of slices of array A into existing array R. -""" -copyfirst!(R::AbstractArray, A::AbstractArray) = mapfirst!(identity, R, A) - -function mapfirst!(f::F, R::AbstractArray, A::AbstractArray{<:Any,N}) where {N, F} - lsiz = check_reducedims(R, A) - t = _firstreducedslice(axes(R), axes(A)) - map!(f, R, view(A, t...)) -end -# We know that the axes of R and A are compatible, but R might have a different number of -# dimensions than A, which is trickier than it seems due to offset arrays and type stability -_firstreducedslice(::Tuple{}, a::Tuple{}) = () -_firstreducedslice(::Tuple, ::Tuple{}) = () -@inline _firstreducedslice(::Tuple{}, a::Tuple) = (_firstslice(a[1]), _firstreducedslice((), tail(a))...) -@inline _firstreducedslice(r::Tuple, a::Tuple) = (length(r[1])==1 ? _firstslice(a[1]) : r[1], _firstreducedslice(tail(r), tail(a))...) -_firstslice(i::OneTo) = OneTo(1) -_firstslice(i::Slice) = Slice(_firstslice(i.indices)) -_firstslice(i) = i[firstindex(i):firstindex(i)] - -function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) - lsiz = check_reducedims(R,A) +# !!! this internal function assumes that R is valid for use as an operand to `op`, and only +# visits values in A within the shape passed in `Aax`. +function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted, Aax=axes(A)) isempty(A) && return R + lsiz = _linear_reduction_length(A, axes(R)) - if has_fast_linear_indexing(A) && lsiz > 16 + if has_fast_linear_indexing(A) && lsiz > 16 && axes(A) === Aax # use mapreduce_impl, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) ibase = first(LinearIndices(A))-1 - for i = 1:nslices + for i in eachindex(R) # TODO: add tests for this change @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz)) ibase += lsiz end return R end - indsAt, indsRt = safe_tail(axes(A)), safe_tail(axes(R)) # handle d=1 manually + indsAt, indsRt = safe_tail(Aax), safe_tail(axes(R)) # handle d=1 manually keep, Idefault = Broadcast.shapeindexer(indsRt) if reducedim1(R, A) # keep the accumulator as a local variable when reducing along the first dimension @@ -272,15 +111,15 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) @inbounds for IA in CartesianIndices(indsAt) IR = Broadcast.newindex(IA, keep, Idefault) r = R[i1,IR] - @simd for i in axes(A, 1) - r = op(r, f(A[i, IA])) + @simd for i in Aax[1] + r = op(r, f(A[i, IA])) # this could also be done pairwise end R[i1,IR] = r end else @inbounds for IA in CartesianIndices(indsAt) IR = Broadcast.newindex(IA, keep, Idefault) - @simd for i in axes(A, 1) + @simd for i in Aax[1] R[i,IR] = op(R[i,IR], f(A[i,IA])) end end @@ -288,10 +127,233 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) return R end +if isdefined(Core, :Compiler) + _mapreduce_might_widen(_, _, _, _) = false + function _mapreduce_might_widen(f::F, op::G, A::T, ::Nothing) where {F,G,T} + return !isconcretetype(Core.Compiler.return_type(x->op(f(first(x)), f(first(x))), Tuple{T})) + end +else + _mapreduce_might_widen(_, _, _, ::Nothing) = true + _mapreduce_might_widen(_, _, _, _) = false +end + +mapreduce_similar(A, ::Type{T}, dims) where {T} = similar(A, T, dims) +function _mapreduce_similar_curried(A) + return function(::Type{T}, dims) where {T} + mapreduce_similar(A, T, dims) + end +end + +#### Dimensional reduction implementation #### +# +# There are four (!!) orthogonal switches that affect the possible code paths here: +# * There's the number of elements in the reduction (n == 0, 1, or 2+) +# * There's the possibility of a passed initial value (specified or not) +# * There's in-place or not +# +# The relatively simpler scalar mapreduce has 2*3 branches: +# * 0 values: +# * Have init? Return it. Otherwise: +# * Return mapreduce_empty (usually an error, sometimes an unambiguous value like 0.0) +# * 1 value (a): +# * Have init? Return op(init, f(a)). Otherwise: +# * Return mapreduce_first (typically f(a)) +# * 2+ values (a1, a2): +# * Have init? Start every chain with op(op(init, f(a1)), f(a2)) and continue. Otherwise: +# * Start every chain with op(f(a1), f(a2)) and continue +# +# In the context of dimensional reduce we _additionally_ need to divine out an element type +# for the returned array. So all of the above branches need to accomodate incremental widening, +# both as more elements are added to each reduction and across the multiple reductions. +# +# There's one additional consideration for the 0 values case — the returned array might or +# might not be empty. In both cases, however, the ability to return an array (and its element +# type) is determined by `mapreduce_empty`. We also don't want to include this consideration +# _inside_ the incremental widening portion as it adds an additional `Union{}` element type +# for the error path. +# +# Finally, actually doing the work wouldn't normally be so tricky — it's just two nested `for` +# loops. The trouble is that the naive double for loop is leaving 100x+ performance on the +# table when it doesn't iterate in a cache-friendly manner. So you might indeed want to update +# a single value in `R` multiple times — and possibly widen as you do so. +# +# It's **_so very tempting_** to do the standard Julian idiom of: +# mapreduce(f, op, A, dims) = mapreduce!(f, op, init(f, op, A, dims), A) +# But that simply doesn't work because there's not _a single way_ to initialize the beginning +# of a reduction. There are SIX. But then we add support for pre-allocated result vectors and +# you even _moreso_ want to be able to just write, e.g., +# sum!(r, A) = mapreduce!(identity, +, first_step!(identity, +, r, A), A) +# But at that point you've _already done the first step_, so you need to be able to _continue_ +# the reduction from an arbitrary point in the iteration. +_mapreducedim_impl(f::Type{F}, op, init, A::AbstractArrayOrBroadcasted, raxs, out) where {F} = + _mapreducedim_impl(x->F(x), op, init, A::AbstractArrayOrBroadcasted, raxs, out) +function _mapreducedim_impl(f, op, init, A::AbstractArrayOrBroadcasted, raxs, out) + # @show f, op, init, A, raxs, out + outer_inds = CartesianIndices(raxs) + a_inds = CartesianIndices(axes(A)) + Aaxs = axes(A) + @assert length(Aaxs) == length(raxs) + # The "outer dimension" of the reduction are the preserved dimensions + is_outer_dim = map((>(1))∘length, raxs) + n = prod(map((b, ax)->ifelse(b, 1, length(ax)), is_outer_dim, Aaxs)) + + # We have special support for returning empty arrays + # isempty(outer_inds) && return collect_allocator(_mapreduce_similar_curried(A), + # (_mapreduce_naive_inner_loop(f, op, init, A, is_outer_dim, outer_ind, a_inds) for outer_ind in outer_inds)) + (n == 0 || isempty(A)) && return _mapreduce_empty_array(f, op, init, A, raxs, out) + n == 1 && return _mapreduce_one_array(f, op, init, A, raxs, out) + + _mapreduce_might_widen(f, op, A, out) && return collect_allocator(_mapreduce_similar_curried(A), + (_mapreduce_naive_inner_loop(f, op, init, A, is_outer_dim, outer_ind, a_inds) for outer_ind in outer_inds)) + + # Create the result vector with the first step of the reduction, using the passed output if given + # note that this is using a (potentially) bad cache ordering, so we don't want to continue like this + i1 = first(outer_inds) + inner1 = mergeindices(is_outer_dim, i1, a_inds) + i1, i2 = inner1 + @inbounds a1, a2 = A[i1], A[i2] + v = _mapreduce_start(f, op, init, a1, a2) + R = isnothing(out) ? mapreduce_similar(A, typeof(v), raxs) : out + @inbounds R[i1] = v + for i in Iterators.drop(outer_inds, 1) + innerj = mergeindices(is_outer_dim, i, a_inds) + j1, j2 = innerj + @inbounds c1, c2 = A[j1], A[j2] + @inbounds R[i] = _mapreduce_start(f, op, init, c1, c2) + end + # If we only had two elements we can quickly return + # n == 2 && return R + + lsiz = _linear_reduction_length(A, raxs) + if has_fast_linear_indexing(A) && lsiz > 0 + ibase = first(LinearIndices(A))-1 + if lsiz > 16 + # use mapreduce_impl, which is better tuned to achieve higher performance and works pairwise + for i in eachindex(R) + @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+3, ibase+lsiz)) + ibase += lsiz + end + else + for i in eachindex(R) + @inbounds r = R[i] + @simd for Ai in ibase+3:ibase+lsiz + r = op(r, f(@inbounds(A[Ai]))) + end + @inbounds R[i] = r + ibase += lsiz + end + end + else + # Take care of the smallest cartesian "rectangle" that includes our two peeled elements + small_inner, large_inner = _split2(inner1) + # and we may as well try to do this small part in a cache-advantageous manner + if something(findfirst((>)(1), size(inner1))) < something(findfirst((>)(1), size(outer_inds)), typemax(Int)) + for iR in outer_inds + @inbounds r = R[iR] + inner_inds = mergeindices(is_outer_dim, CartesianIndices(map(sliceat, Aaxs, iR.I)), small_inner) + for iA in Iterators.drop(inner_inds, 2) + r = op(r, f(@inbounds(A[iA]))) + end + @inbounds R[iR] = r + end + else + for iA in Iterators.drop(small_inner, 2) + for iR in outer_inds + iA = mergeindices(is_outer_dim, iR, iA) + v = op(@inbounds(R[iR]), f(@inbounds(A[iA]))) + @inbounds R[iR] = v + end + end + end + # And then if there's anything left, defer to _mapreducedim! + if !isempty(large_inner) + large_inds = mergeindices(is_outer_dim, outer_inds, large_inner) + _mapreducedim!(f, op, R, A, large_inds.indices) + end + end + return R +end + +function _mapreduce_naive_inner_loop(f, op, init, A, is_outer_dim, outer_ind, a_inds) + inner_inds = mergeindices(is_outer_dim, outer_ind, a_inds) + i1, i2 = inner_inds + @inbounds a1, a2 = A[i1], A[i2] + r = _mapreduce_start(f, op, init, a1, a2) + for i in Iterators.drop(inner_inds, 2) + r = op(r, f(@inbounds A[i])) + end + return r +end + +# Split a CartesianIndices into two parts such that we can Iterators.drop 2 elements from the smallest +# rectangular subsection and then iterate more quickly over the larger remainder. +# +# We have a choice in how we do this split: we can take the away the first column, the first *two* rows, or the first page or... +# We want to pick the _smallest_ split. For example, given four non-singleton dimensions, we have a choice of: +# first: (:, :, :, 1:1) or (:, :, 1:1, :) or (:, 1:1, :, :) or (1:2, :, :, :) +# rest: (:, :, :, 2:end) or (:, :, 2:end, :) or (:, 2:end, :, :) or (3:end, :, :, :) +# With dimension lengths of (a, b, c, d), that's minimizing: (a*b*c) vs (a*b*d) vs (a*c*d) vs (2*b*c*d) +# The smallest subsection will always be the one that slices on the _longest_ dimension. +function _split2(ci::CartesianIndices{N}) where {N} + inds = ci.indices + first_non_singleton = something(findfirst((>)(1), size(ci))) + # The first non-singleton dimension "costs half" because we take two elements + lengths = map(length, inds) + weights = setindex(lengths, lengths[first_non_singleton]÷2, first_non_singleton) + sliced_dim = argmax(weights) + + nskip = Int(sliced_dim == first_non_singleton) + return (CartesianIndices(ntuple(d->d==sliced_dim ? inds[d][begin:begin+nskip] : inds[d][begin:end], Val(N))), + CartesianIndices(ntuple(d->d==sliced_dim ? inds[d][begin+nskip+1:end] : inds[d][begin:end], Val(N)))) +end + +_mapreduce_empty_array(_, _, init, A, axes, ::Nothing) = fill!(mapreduce_similar(A, typeof(init), axes), init) +_mapreduce_empty_array(_, _, init, A, axes, out) = fill!(out, init) +function _mapreduce_empty_array(f, op, ::_InitialValue, A, axes, ::Nothing) + init = mapreduce_empty(f, op, eltype(A)) + return fill!(mapreduce_similar(A, typeof(init), axes), init) +end +function _mapreduce_empty_array(f, op, ::_InitialValue, A, axes, out) + init = mapreduce_empty(f, op, eltype(A)) + return fill!(out, init) +end + +_mapreduce_one_array(f, op, init, A, axes, ::Nothing) = collect_allocator(_mapreduce_similar_curried(A), (op(init, f(@inbounds(A[i]))) for i in CartesianIndices(axes))) +_mapreduce_one_array(f, op, init, A, axes, out) = (for i in CartesianIndices(axes); out[i] = op(init, f(@inbounds(A[i]))); end; out) +_mapreduce_one_array(f, op, ::_InitialValue, A, axes, ::Nothing) = collect_allocator(_mapreduce_similar_curried(A), (mapreduce_first(f, op, @inbounds(A[i])) for i in CartesianIndices(axes))) +_mapreduce_one_array(f, op, ::_InitialValue, A, axes, out) = (for i in CartesianIndices(axes); out[i] = mapreduce_first(f, op, @inbounds(A[i])); end; out) + +# Given two initial values in a reduction, compute the beginning of a op chain +_mapreduce_start(f, op, init, a1, a2) = op(op(init, f(a1)), f(a2)) +_mapreduce_start(f, op, ::_InitialValue, a1, a2) = op(f(a1), f(a2)) + +""" + mapreduce!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted; [init]) + +Compute `mapreduce(f, op, A; init, dims)` where `dims` are the singleton dimensions of `R`, storing the result into `R`. + +!!! note + The previous values in `R` are _not_ used as initial values; they are completely ignored +""" +function mapreduce!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted; init=_InitialValue()) + raxs = reduced_indices_array(A,R) + _mapreducedim_impl(f, op, init, A, raxs, R) +end +""" + reduce!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted; [init]) + +Compute `reduce(op, A; init, dims)` where `dims` are the singleton dimensions of `R`, storing the result into `R`. + +!!! note + The previous values in `R` are _not_ used as initial values; they are completely ignored +""" +reduce!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted; init=_InitialValue()) = mapreduce!(identity, op, R, A; init) + +# !!! this assumes that R holds the initial values mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted) = - (_mapreducedim!(f, op, R, A); R) + (reduced_indices_array(A, R); _mapreducedim!(f, op, R, A); R) -reducedim!(op, R::AbstractArray{RT}, A::AbstractArrayOrBroadcasted) where {RT} = +reducedim!(op, R::AbstractArray, A::AbstractArrayOrBroadcasted) = mapreducedim!(identity, op, R, A) """ @@ -329,15 +391,14 @@ mapreduce(f, op, A::AbstractArrayOrBroadcasted, B::AbstractArrayOrBroadcasted... _mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) = mapfoldl_impl(f, op, nt, A) +function _mapreduce_dim(f, op, init, A::AbstractArrayOrBroadcasted, dims) + raxs = reduced_indices(A, dims) + _mapreducedim_impl(f, op, init, A, raxs, nothing) +end + _mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) = _mapreduce(f, op, IndexStyle(A), A) -_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims) = - mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A) - -_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) = - mapreducedim!(f, op, reducedim_init(f, op, A, dims), A) - """ reduce(f, A::AbstractArray; dims=:, [init]) @@ -403,11 +464,7 @@ julia> count(<=(2), A, dims=2) 0 ``` """ -count(A::AbstractArrayOrBroadcasted; dims=:, init=0) = count(identity, A; dims, init) -count(f, A::AbstractArrayOrBroadcasted; dims=:, init=0) = _count(f, A, dims, init) - -_count(f, A::AbstractArrayOrBroadcasted, dims::Colon, init) = _simple_count(f, A, init) -_count(f, A::AbstractArrayOrBroadcasted, dims, init) = mapreduce(_bool(f), add_sum, A; dims, init) +count(f, A::AbstractArray; dims=:) """ count!([f=identity,] r, A) @@ -437,9 +494,7 @@ julia> count!(<=(2), [1; 1], A) 0 ``` """ -count!(r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) = count!(identity, r, A; init=init) -count!(f, r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) = - mapreducedim!(_bool(f), add_sum, initarray!(r, f, add_sum, init, A), A) +count!(f, r::AbstractArray, A::AbstractArray) """ sum(A::AbstractArray; dims) @@ -970,8 +1025,9 @@ any!(r, A) for (fname, _fname, op) in [(:sum, :_sum, :add_sum), (:prod, :_prod, :mul_prod), (:maximum, :_maximum, :max), (:minimum, :_minimum, :min), - (:extrema, :_extrema, :_extrema_rf)] - mapf = fname === :extrema ? :(ExtremaMap(f)) : :f + (:count, :_count, :add_sum), (:extrema, :_extrema, :_extrema_rf)] + mapf = fname === :extrema ? :(ExtremaMap(f)) : + fname === :count ? :(_bool(f)) : :f @eval begin # User-facing methods with keyword arguments @inline ($fname)(a::AbstractArray; dims=:, kw...) = ($_fname)(a, dims; kw...) @@ -982,7 +1038,7 @@ for (fname, _fname, op) in [(:sum, :_sum, :add_sum), (:prod, :_prod, ($_fname)(f, a, ::Colon; kw...) = mapreduce($mapf, $op, a; kw...) end end - +# TODO: why are any/all different? any(a::AbstractArray; dims=:) = _any(a, dims) any(f::Function, a::AbstractArray; dims=:) = _any(f, a, dims) _any(a, ::Colon) = _any(identity, a, :) @@ -993,13 +1049,14 @@ _all(a, ::Colon) = _all(identity, a, :) for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod), (:maximum, :max), (:minimum, :min), (:all, :&), (:any, :|), - (:extrema, :_extrema_rf)] + (:count, :add_sum), (:extrema, :_extrema_rf)] fname! = Symbol(fname, '!') _fname = Symbol('_', fname) - mapf = fname === :extrema ? :(ExtremaMap(f)) : :f + mapf = fname === :extrema ? :(ExtremaMap(f)) : + fname === :count ? :(_bool(f)) : :f @eval begin - $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) = - mapreducedim!($mapf, $(op), initarray!(r, $mapf, $(op), init, A), A) + $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) = init ? + mapreduce!($mapf, $op, r, A) : mapreducedim!($mapf, $op, r, A) $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init) $(_fname)(A, dims; kw...) = $(_fname)(identity, A, dims; kw...) @@ -1008,57 +1065,62 @@ for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod), end ##### findmin & findmax ##### -# The initial values of Rval are not used if the corresponding indices in Rind are 0. -# -function findminmax!(f, op, Rval, Rind, A::AbstractArray{T,N}) where {T,N} - (isempty(Rval) || isempty(A)) && return Rval, Rind - lsiz = check_reducedims(Rval, A) - for i = 1:N - axes(Rval, i) == axes(Rind, i) || throw(DimensionMismatch("Find-reduction: outputs must have the same indices")) - end - # If we're reducing along dimension 1, for efficiency we can make use of a temporary. - # Otherwise, keep the result in Rval/Rind so that we traverse A in storage order. - indsAt, indsRt = safe_tail(axes(A)), safe_tail(axes(Rval)) - keep, Idefault = Broadcast.shapeindexer(indsRt) - ks = keys(A) - y = iterate(ks) - zi = zero(eltype(ks)) - if reducedim1(Rval, A) - i1 = first(axes1(Rval)) - @inbounds for IA in CartesianIndices(indsAt) - IR = Broadcast.newindex(IA, keep, Idefault) - tmpRv = Rval[i1,IR] - tmpRi = Rind[i1,IR] - for i in axes(A,1) - k, kss = y::Tuple - tmpAv = f(A[i,IA]) - if tmpRi == zi || op(tmpRv, tmpAv) - tmpRv = tmpAv - tmpRi = k - end - y = iterate(ks, kss) - end - Rval[i1,IR] = tmpRv - Rind[i1,IR] = tmpRi - end - else - @inbounds for IA in CartesianIndices(indsAt) - IR = Broadcast.newindex(IA, keep, Idefault) - for i in axes(A, 1) - k, kss = y::Tuple - tmpAv = f(A[i,IA]) - tmpRv = Rval[i,IR] - tmpRi = Rind[i,IR] - if tmpRi == zi || op(tmpRv, tmpAv) - Rval[i,IR] = tmpAv - Rind[i,IR] = k - end - y = iterate(ks, kss) - end - end - end - Rval, Rind + +struct PairsArray{T,N,A} <: AbstractArray{T,N} + array::A +end +PairsArray(array::AbstractArray{T,N}) where {T, N} = PairsArray{Tuple{keytype(array),T}, N, typeof(array)}(array) +const PairsVector{T,A} = PairsArray{T, 1, A} +IndexStyle(::PairsVector) = IndexLinear() +IndexStyle(::PairsArray) = IndexCartesian() +size(P::PairsArray) = size(P.array) +axes(P::PairsArray) = axes(P.array) +@inline function getindex(P::PairsVector, i::Int) + @boundscheck checkbounds(P, i) + @inbounds (i, P.array[i]) end +@inline function getindex(P::PairsArray{<:Any,N}, I::CartesianIndex{N}) where {N} + @boundscheck checkbounds(P, I) + @inbounds (I, P.array[I]) +end +@propagate_inbounds getindex(P::PairsVector, i::CartesianIndex{1}) = P[i.I[1]] +@propagate_inbounds getindex(P::PairsArray{<:Any,N}, I::Vararg{Int, N}) where {N} = P[CartesianIndex(I)] +# Defer a few functions to the parent array +similar(P::PairsArray, ::Type{T}, dims::Tuple{Union{Int, AbstractUnitRange}, Vararg{Union{Int, AbstractUnitRange}}}) where {T} = similar(P.array, T, dims) +similar(P::PairsArray, ::Type{T}, dims::Tuple{Union{Int, Base.OneTo}, Vararg{Union{Int, Base.OneTo}}}) where {T} = similar(P.array, T, dims) +similar(P::PairsArray, ::Type{T}, dims::Tuple{Int, Vararg{Int}}) where {T} = similar(P.array, T, dims) +similar(P::PairsArray, ::Type{T}, dims::Tuple{}) where {T} = similar(P.array, T, dims) +mapreduce_similar(P::PairsArray, ::Type{T}, dims) where {T} = mapreduce_similar(P.array, T, dims) + +# Use an ad-hoc specialized StructArray to allow in-place AoS->SoA transform +struct ZippedArray{T,N,Style,A,B} <: AbstractArray{T,N} + first::A + second::B +end +function ZippedArray(A::AbstractArray{T,N},B::AbstractArray{S,N}) where {T,S,N} + axes(A) == axes(B) || throw(DimensionMismatch("both arrays must have the same shape")) + # TODO: It'd be better if we could transform a Tuple{Int, Union{Int, Missing}} to Union{Tuple{Int,Int}, Tuple{Int, Missing}} + ZippedArray{Tuple{T,S},N,IndexStyle(A,B),typeof(A),typeof(B)}(A,B) +end +size(Z::ZippedArray) = size(Z.first) +axes(Z::ZippedArray) = axes(Z.first) +IndexStyle(::ZippedArray{<:Any,<:Any,Style}) where {Style} = Style +@inline function getindex(Z::ZippedArray, I::Int...) + @boundscheck checkbounds(Z, I...) + @inbounds (Z.first[I...], Z.second[I...]) +end +@propagate_inbounds setindex!(Z::ZippedArray{T}, v, I::Int...) where {T} = setindex!(Z, convert(T, v), I...) +@inline function setindex!(Z::ZippedArray{T}, v::T, I::Int...) where {T} + @boundscheck checkbounds(Z, I...) + @inbounds Z.first[I...] = v[1] + @inbounds Z.second[I...] = v[2] + return Z +end +_unzip(Z::ZippedArray) = (Z.first, Z.second) +_unzip(A::AbstractArray) = ([a[1] for a in A], [a[2] for a in A]) + +_transform_pair(f) = x-> (x[1], f(x[2])) +_transform_pair(f::typeof(identity)) = f """ findmin!(rval, rind, A) -> (minval, index) @@ -1069,9 +1131,15 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`. $(_DOCS_ALIASING_WARNING) """ -function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; +findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) = findmin!(identity, rval, rind, A; init) +function findmin!(f, rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) - findminmax!(identity, isgreater, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A) + if init + mapreduce!(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A)) + else + mapreducedim!(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A)) + end + return (rval, rind) end """ @@ -1120,19 +1188,16 @@ julia> findmin(abs2, A, dims=2) findmin(f, A::AbstractArray; dims=:) = _findmin(f, A, dims) function _findmin(f, A, region) - ri = reduced_indices0(A, region) - if isempty(A) - if prod(map(length, reduced_indices(A, region))) != 0 - throw(ArgumentError("collection slices must be non-empty")) - end - similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri) + if f === identity + # Fast path with pre-allocated arrays + axs = reduced_indices(A, region) + return findmin!(identity, mapreduce_similar(A, eltype(A), axs), mapreduce_similar(A, keytype(A), axs), A) else - fA = f(first(A)) - findminmax!(f, isgreater, fill!(similar(A, _findminmax_inittype(f, A), ri), fA), - zeros(eltype(keys(A)), ri), A) + P = _mapreduce_dim(_transform_pair(f), (x,y)->ifelse(isgreater(x[2], y[2]), y, x), _InitialValue(), PairsArray(A), region) + (inds, vals) = _unzip(P) + return (vals, inds) end end - """ findmax!(rval, rind, A) -> (maxval, index) @@ -1142,9 +1207,15 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`. $(_DOCS_ALIASING_WARNING) """ -function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; +findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) = findmax!(identity, rval, rind, A; init) +function findmax!(f, rval::AbstractArray, rind::AbstractArray, A::AbstractArray; init::Bool=true) - findminmax!(identity, isless, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A) + if init + mapreduce!(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A)) + else + mapreducedim!(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), ZippedArray(rind, rval), PairsArray(A)) + end + return (rval, rind) end """ @@ -1191,33 +1262,17 @@ julia> findmax(abs2, A, dims=2) ``` """ findmax(f, A::AbstractArray; dims=:) = _findmax(f, A, dims) - function _findmax(f, A, region) - ri = reduced_indices0(A, region) - if isempty(A) - if prod(map(length, reduced_indices(A, region))) != 0 - throw(ArgumentError("collection slices must be non-empty")) - end - similar(A, promote_op(f, eltype(A)), ri), zeros(eltype(keys(A)), ri) + if f === identity + axs = reduced_indices(A, region) + return findmax!(identity, mapreduce_similar(A, eltype(A), axs), mapreduce_similar(A, keytype(A), axs), A) else - fA = f(first(A)) - findminmax!(f, isless, fill!(similar(A, _findminmax_inittype(f, A), ri), fA), - zeros(eltype(keys(A)), ri), A) + P = _mapreduce_dim(_transform_pair(f), (x,y)->ifelse(isless(x[2], y[2]), y, x), _InitialValue(), PairsArray(A), region) + (inds, vals) = _unzip(P) + return (vals, inds) end end -function _findminmax_inittype(f, A::AbstractArray) - T = _realtype(f, promote_union(eltype(A))) - v0 = f(first(A)) - # First conditional: T is >: typeof(v0), so return it - # Second conditional: handle missing specifically, as most often, f(missing) = missing; - # certainly, some predicate functions return Bool, but not all. - # Else, return the type of the transformation. - Tr = v0 isa T ? T : Missing <: eltype(A) ? Union{Missing, typeof(v0)} : typeof(v0) -end - -reducedim1(R, A) = length(axes1(R)) == 1 - """ argmin(A; dims) -> indices diff --git a/test/reduce.jl b/test/reduce.jl index f5140c8a34bd9..484c5c14636a4 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -666,10 +666,12 @@ x = [j+7 for j in i] end # make sure we specialize on mapfoldl(::Type, ...) -@test @inferred(mapfoldl(Int, +, [1, 2, 3]; init=0)) === 6 +@testset "inference" begin + @test @inferred(mapfoldl(Int, +, [1, 2, 3]; init=0)) === 6 -# issue #39281 -@test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}} + # issue #39281 + @test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}} +end # issue #38627 @testset "overflow in mapreduce" begin @@ -732,3 +734,11 @@ end Val(any(in((:one,:two,:three)),(:four,:three))) end |> only == Val{true} end + +@testset "type stability of internals for mean (etc); Statistics#160" begin + @test (@inferred Missing mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1.0,2.0,missing],1:2), dims=:)) == 3.0 + @test (@inferred Union{Missing,Int} mapreduce(identity, (x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:)) == 3.0 + + @test (@inferred Missing reduce((x,y)->Base.add_sum(x, y)/1, view([1.0,2.0,missing],1:2), dims=:)) == 3.0 + @test (@inferred Union{Missing,Int} reduce((x,y)->Base.add_sum(x, y)/1, view([1,2,missing],1:2), dims=:)) == 3.0 +end diff --git a/test/reducedim.jl b/test/reducedim.jl index 8f629fa83f28d..11f6975756763 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -2,6 +2,9 @@ using Random +isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl") +using .Main.OffsetArrays: OffsetVector, OffsetArray + # main tests # issue #35800 @@ -173,7 +176,7 @@ end x = sum(Real[1.0], dims=1) @test x == [1.0] - @test x isa Vector{Real} + @test x isa Vector{<:Real} x = mapreduce(cos, +, Union{Int,Missing}[1, 2], dims=1) @test x == mapreduce(cos, +, [1, 2], dims=1) @@ -209,19 +212,19 @@ end for f in (minimum, maximum) @test_throws "reducing over an empty collection is not allowed" f(A, dims=1) - @test isequal(f(A, dims=2), zeros(Int, 0, 1)) + @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=2), zeros(Int, 0, 1)) @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2)) - @test isequal(f(A, dims=3), zeros(Int, 0, 1)) + @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=3), zeros(Int, 0, 1)) end for f in (findmin, findmax) - @test_throws ArgumentError f(A, dims=1) - @test isequal(f(A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1))) - @test_throws ArgumentError f(A, dims=(1, 2)) - @test isequal(f(A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1))) - @test_throws ArgumentError f(abs2, A, dims=1) - @test isequal(f(abs2, A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1))) - @test_throws ArgumentError f(abs2, A, dims=(1, 2)) - @test isequal(f(abs2, A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1))) + @test_throws "reducing over an empty collection is not allowed" f(A, dims=1) + @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1))) + @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2)) + @test_throws "reducing over an empty collection is not allowed" isequal(f(A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1))) + @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=1) + @test_throws "reducing over an empty collection is not allowed" isequal(f(abs2, A, dims=2), (zeros(Int, 0, 1), zeros(Int, 0, 1))) + @test_throws "reducing over an empty collection is not allowed" f(abs2, A, dims=(1, 2)) + @test_throws "reducing over an empty collection is not allowed" isequal(f(abs2, A, dims=3), (zeros(Int, 0, 1), zeros(Int, 0, 1))) end end @@ -541,6 +544,14 @@ end end end +f44906(x) = maximum(f44906, x); f44906(x::Number) = x +g44906(x) = mapreduce(g44906, max, x); g44906(x::Number) = x +@testset "inference with minimum/maximum, issue #44906" begin + x = [[1, 2], [3, 4]]; + @test @inferred(f44906(x)) == 4 + @test @inferred(g44906(x)) == 4 +end + # issue #6672 @test sum(Real[1 2 3; 4 5.3 7.1], dims=2) == reshape([6, 16.4], 2, 1) @test sum(Any[1 2;3 4], dims=1) == [4 6] @@ -610,9 +621,10 @@ function unordered_test_for_extrema(a; dims_test = ((), 1, 2, (1,2), 3)) for dims in dims_test vext = extrema(a; dims) vmin, vmax = minimum(a; dims), maximum(a; dims) - @test isequal(extrema!(copy(vext), a), vext) + @test isequal(extrema!(similar(vext, NTuple{2, eltype(a)}), a), vext) @test all(x -> isequal(x[1], x[2:3]), zip(vext,vmin,vmax)) end + true end @testset "0.0,-0.0 test for extrema with dims" begin @test extrema([-0.0;0.0], dims = 1)[1] === (-0.0,0.0) @@ -623,13 +635,13 @@ end for T in (Int, Float64, BigFloat, BigInt) Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz)) Aₘ[rand(1:sz*sz, sz)] .= missing - unordered_test_for_extrema(Aₘ) + @test unordered_test_for_extrema(Aₘ) if T <: AbstractFloat Aₙ = map(i -> ismissing(i) ? T(NaN) : i, Aₘ) - unordered_test_for_extrema(Aₙ) + @test unordered_test_for_extrema(Aₙ) p = rand(1:sz*sz, sz) Aₘ[p] .= NaN - unordered_test_for_extrema(Aₘ) + @test unordered_test_for_extrema(Aₘ) end end end @@ -696,3 +708,225 @@ end @test_broken @inferred(maximum(exp, A; dims = 1))[1] === missing @test_broken @inferred(extrema(exp, A; dims = 1))[1] === (missing, missing) end + +@testset "generic sum reductions; issue #31427" begin + add31427(a, b) = a+b + A = rand(1:10, 5, 5) + @test reduce(add31427, A, dims = (1, 2)) == reduce(+, A, dims = (1, 2)) == [sum(A);;] + + As = [rand(5, 4) for _ in 1:2, _ in 1:3] + @test reduce(hcat, reduce(vcat, As, dims=1)) == [As[1,1] As[1,2] As[1,3]; As[2,1] As[2,2] As[2,3]] +end + +@testset "sum with `missing`s; issue #55213 and #32366" begin + @test isequal(sum([0.0 1; 0.0 missing], dims=2), [1.0; missing;;]) + @test sum([0.0 1; 0.0 missing], dims=2)[1] === 1.0 + @test isequal(sum(Any[0.0 1; 0.0 missing], dims=2), [1.0; missing;;]) + @test sum(Any[0.0 1; 0.0 missing], dims=2)[1] === 1.0 + + @test isequal(sum([1 0.0; 0.0 missing], dims=1), [1.0 missing]) + @test sum([1 0.0; 0.0 missing], dims=1)[1] === 1.0 + @test isequal(sum(Any[1 0.0; 0.0 missing], dims=1), [1.0 missing]) + @test sum(Any[1 0.0; 0.0 missing], dims=1)[1] === 1.0 + + @test isequal(sum([true false missing], dims=2), sum([1 0 missing], dims=2)) + @test isequal(sum([true false missing], dims=2), [sum([true false missing]);;]) + @test isequal(sum([true false missing], dims=2), [missing;;]) +end + +@testset "issues #45566 and #47231; initializers" begin + @test reduce(gcd, [1]; dims=1) == [reduce(gcd, [1])] == [1] + + x = reshape(1:6, 2, 3) + @test reduce(+, x, dims=2) == [9;12;;] + @test reduce(-, x, dims=1) == [1-2 3-4 5-6] + @test reduce(/, x, dims=1) == [1/2 3/4 5/6] + @test reduce(^, x, dims=1) == [1^2 3^4 5^6] + # These have arbitrary associativity, but they shouldn't error + @test reduce(-, x, dims=2) in ([(1-3)-5; (2-4)-6;;], [1-(3-5); 2-(4-6);;]) + @test reduce(/, x, dims=2) in ([(1/3)/5; (2/4)/6;;], [1/(3/5); 2/(4/6);;]) + @test reduce(^, x, dims=2) in ([(1^3)^5; (2^4)^6;;], [1^(3^5); 2^(4^6);;]) +end + +@testset "better initializations" begin + @test mapreduce(_ -> pi, +, [1,2]; dims=1) == [mapreduce(_ -> pi, +, [1,2])] == [2pi] + @test mapreduce(_ -> pi, +, [1 2]; dims=2) == [mapreduce(_ -> pi, +, [1 2]);;] == [2pi;;] + @test mapreduce(_ -> pi, +, [1 2]; dims=(1,2)) == [2pi;;] + @test mapreduce(_ -> pi, +, [1]; dims=1) == [mapreduce(_ -> pi, +, [1])] == [pi] + @test mapreduce(_ -> pi, +, [1;;]; dims=2) == [mapreduce(_ -> pi, +, [1;;]);;] == [pi;;] + @test_throws ArgumentError mapreduce(_ -> pi, +, []; dims=1) + @test_throws ArgumentError mapreduce(_ -> pi, +, [;;]; dims=2) + @test_throws ArgumentError mapreduce(_ -> pi, +, [;;]; dims=(1,2)) + @test_throws ArgumentError mapreduce(_ -> pi, +, []) + @test_throws ArgumentError mapreduce(_ -> pi, +, [;;]) + + @test mapreduce(x -> log(x-1), +, [2,3,4]; dims=1) == [mapreduce(x -> log(x-1), +, [2,3,4])] == [log(1) + log(2) + log(3)] + @test mapreduce(x -> log(x-1), +, [2,3]; dims=1) == [mapreduce(x -> log(x-1), +, [2,3])] == [log(1) + log(2)] + @test mapreduce(x -> log(x-1), +, [2]; dims=1) == [mapreduce(x -> log(x-1), +, [2])] == [log(1)] + @test mapreduce(x -> log(x-1), +, [2 3 4]; dims=2) == [mapreduce(x -> log(x-1), +, [2,3,4]);;] == [log(1) + log(2) + log(3);;] + @test mapreduce(x -> log(x-1), +, [2 3]; dims=2) == [mapreduce(x -> log(x-1), +, [2,3]);;] == [log(1) + log(2);;] + @test mapreduce(x -> log(x-1), +, [2;;]; dims=2) == [mapreduce(x -> log(x-1), +, [2]);;] == [log(1);;] + @test_throws ArgumentError mapreduce(x -> log(x-1), +, []; dims=1) + @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;]; dims=2) + @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;]; dims=(1,2)) + @test_throws ArgumentError mapreduce(x -> log(x-1), +, []) + @test_throws ArgumentError mapreduce(x -> log(x-1), +, [;;]) + + @test sum(x->sqrt(x-1), ones(5); dims=1) == [sum(x->sqrt(x-1), ones(5))] == [0.0] + @test sum(x->sqrt(x-1), ones(1); dims=1) == [sum(x->sqrt(x-1), ones(1))] == [0.0] + @test_throws ArgumentError sum(x->sqrt(x-1), ones(0); dims=1) + @test_throws ArgumentError sum(x->sqrt(x-1), ones(0)) +end + +@testset "reductions on broadcasted; issue #41054" begin + A = clamp.(randn(3,4), -1, 1) + bc = Base.broadcasted(+, A, 2) + @test sum(bc, dims=1) ≈ sum(A .+ 2, dims=1) + @test mapreduce(sqrt, +, bc, dims=1) ≈ mapreduce(sqrt, +, A .+ 2, dims=1) + + @test sum(bc, dims=2) ≈ sum(A .+ 2, dims=2) + @test mapreduce(sqrt, +, bc, dims=2) ≈ mapreduce(sqrt, +, A .+ 2, dims=2) + + @test sum(bc, dims=(1,2)) ≈ [sum(A .+ 2)] + @test mapreduce(sqrt, +, bc, dims=(1,2)) ≈ [mapreduce(sqrt, +, A .+ 2)] +end + +@testset "reductions over complex values; issue #54920" begin + A = Complex{Int}.(rand(Complex{Int8}, 2, 2, 2)); + @test maximum(abs, A; dims=(1,)) == mapreduce(abs, max, A, dims=(1,)) == [maximum(abs, A[:,1,1]) maximum(abs, A[:,2,1]);;; maximum(abs, A[:,1,2]) maximum(abs, A[:,2,2])] + @test maximum(abs, A; dims=(2,)) == mapreduce(abs, max, A, dims=(2,)) == [maximum(abs, A[1,:,1]); maximum(abs, A[2,:,1]);;; maximum(abs, A[1,:,2]); maximum(abs, A[2,:,2])] + @test maximum(abs, A; dims=(1, 2)) == mapreduce(abs, max, A, dims=(1, 2)) == [maximum(abs, A[:,:,1]);;; maximum(abs, A[:,:,2])] + @test maximum(abs, A; dims=(1, 2, 3)) == mapreduce(abs, max, A, dims=(1, 2, 3)) == [maximum(abs, A);;;] +end + +@testset "bitwise operators on integers; part of issue #45562" begin + @test mapreduce(identity, &, [3,3,3]; dims=1) == [mapreduce(identity, &, [3,3,3])] == [3 & 3 & 3] == [3] + @test mapreduce(identity, |, [3,3,3]; dims=1) == [mapreduce(identity, |, [3,3,3])] == [3 | 3 | 3] == [3] + @test mapreduce(identity, xor, [3,3,3]; dims=1) == [mapreduce(identity, xor, [3,3,3])] == [xor(xor(3, 3), 3)] == [3] + + @test mapreduce(identity, &, [3,7,6]; dims=1) == [mapreduce(identity, &, [3,7,6])] == [3 & 7 & 6] == [2] + @test mapreduce(identity, |, [3,7,6]; dims=1) == [mapreduce(identity, |, [3,7,6])] == [3 | 7 | 6] == [7] + @test mapreduce(identity, xor, [3,7,6]; dims=1) == [mapreduce(identity, xor, [3,7,6])] == [xor(xor(3, 7), 6)] == [2] +end + +@testset "indexing" begin + A = [1 2; 3 4] + B = [5 6; 7 8] + + @test mapreduce(/, +, A, B) ≈ sum(A./B) + @test mapreduce(i -> A[i]/B[i], +, eachindex(A,B)) ≈ sum(A./B) + @test mapreduce(i -> A[i]/B'[i], +, eachindex(A,B')) ≈ sum(A./B') + + @test mapreduce(/, +, A, B; dims=1) ≈ sum(A./B; dims=1) + @test mapreduce(i -> A[i]/B[i], +, reshape(eachindex(A,B), size(A)); dims=1) ≈ sum(A./B; dims=1) # BoundsError + @test mapreduce(i -> A[i]/B'[i], +, eachindex(A,B'); dims=1) ≈ sum(A./B'; dims=1) # BoundsError +end + +@testset "more related to issue #26488" begin + @test mapreduce(x -> log10(x-1), +, [11, 101]) ≈ 3.0 + @test mapreduce(x -> log10(x-1), *, [11, 101]) ≈ 2.0 + @test mapreduce(x -> log10(x-1), +, [11, 101]; dims=1) ≈ [3.0] + @test mapreduce(x -> log10(x-1), +, [11, 101]; init=-10) ≈ -7.0 + @test mapreduce(x -> log10(x-1), +, [11, 101]; init=-10, dims=1) ≈ [-7.0] + + @test mapreduce(_ -> pi, +, [1,2]) ≈ 2pi + @test mapreduce(_ -> pi, *, [1,2]) ≈ pi^2 + @test mapreduce(_ -> pi, +, [1,2]; dims=1) ≈ [2pi] +end + +_add(x,y) = x+y # this avoids typeof(+) dispatch + +@testset "mapreduce fast paths, dims=:, op=$op, kw=$kw" for op in (+,_add), + kw in ((;), (; init=0.0)) + @test_broken 0 == @allocated mapreduce(/, op, 1:3, 4:7; kw...) + @test_broken 0 == @allocated mapreduce(/, op, 1:3, 4:9; kw...) # stops early + @test mapreduce(/, op, 1:3, 4:7; kw...) ≈ reduce(op, map(/, 1:3, 4:7); kw...) + @test mapreduce(/, op, 1:3, 4:9; kw...) ≈ reduce(op, map(/, 1:3, 4:9); kw...) + + A = [1 2; 3 4] + B = [5 6; 7 8] + + @test_broken 0 == @allocated mapreduce(*, op, A, B; kw...) # LinearIndices + @test_broken 0 == @allocated mapreduce(*, op, A, B'; kw...) # CartesianIndices + + @test mapreduce(*, op, A, B; kw...) == reduce(op, map(*, A, B); kw...) + @test mapreduce(*, op, A, B'; kw...) == reduce(op, map(*, A, B'); kw...) + + @test_broken 0 == @allocated mapreduce(*, op, A, 5:7; kw...) # stops early + @test_broken 0 == @allocated mapreduce(*, op, 1:3, B'; kw...) # stops early + @test mapreduce(*, op, A, 5:7; kw...) == reduce(op, map(*, A, 5:7); kw...) + @test mapreduce(*, op, 1:3, B'; kw...) == reduce(op, map(*, 1:3, B'); kw...) + @test mapreduce(*, op, 1:3, B', 10:20; kw...) == reduce(op, map(*, 1:3, B', 10:20); kw...) + + @test_throws DimensionMismatch map(*, A, hcat(B, 9:10)) # same ndims, does not stop early + @test_throws DimensionMismatch mapreduce(*, op, A, hcat(B, 9:10); kw...) +end + +@testset "mapreduce fast paths, dims=$dims, op=$op, kw=$kw" for dims in (1,2,[2],(1,2),3), + op in (+,*,_add), + kw in ((;), (; init=0.0)) + (kw == (;) && op == _add) && continue + + @test mapreduce(/, op, 1:3, 4:7; dims, kw...) ≈ reduce(op, map(/, 1:3, 4:7); dims, kw...) + @test mapreduce(/, op, 1:3, 4:9; dims, kw...) ≈ reduce(op, map(/, 1:3, 4:9); dims, kw...) + + A = [1 2; 3 4] + B = [5 6; 7 8] + @test mapreduce(*, op, A, B; dims, kw...) == reduce(op, map(*, A, B); dims, kw...) # LinearIndices + @test mapreduce(*, op, A, B'; dims, kw...) == reduce(op, map(*, A, B'); dims, kw...) # CartesianIndices + + @test_broken @allocated(mapreduce(*, op, A, B; dims, kw...)) < @allocated(reduce(op, map(*, A, B); dims, kw...)) + @test_broken @allocated(mapreduce(*, op, A, B'; dims, kw...)) < @allocated(reduce(op, map(*, A, B'); dims, kw...)) + + @test mapreduce(*, op, A, 5:7; dims, kw...) == reduce(op, map(*, A, 5:7); dims, kw...) # stops early + @test mapreduce(*, op, 1:3, B'; dims, kw...) == reduce(op, map(*, 1:3, B'); dims, kw...) + + @test_throws DimensionMismatch mapreduce(*, +, A, hcat(B, 9:10); dims, kw...) +end + +struct Infinity21097 <: Number +end +Base.:+(::Infinity21097,::Infinity21097) = Infinity21097() +@testset "don't call zero on unknown numbers, issue #21097" begin + @test reduce(+, [Infinity21097()], init=Infinity21097()) == Infinity21097() + @test reduce(+, [Infinity21097()], init=Infinity21097(), dims=1) == [Infinity21097()] + @test reduce(+, [Infinity21097();;], init=Infinity21097(), dims=2) == [Infinity21097();;] + for len in [2,15,16,17,31,32,33,63,64,65,127,128,129] + for init in ((), (; init=Infinity21097())) + @test reduce(+,fill(Infinity21097(), len); init...) == Infinity21097() + @test reduce(+,fill(Infinity21097(), len, 2); init...) == Infinity21097() + @test reduce(+,fill(Infinity21097(), len, 16); init...) == Infinity21097() + @test reduce(+,fill(Infinity21097(), len); dims = 1, init...) == [Infinity21097()] + @test reduce(+,fill(Infinity21097(), len, 2); dims = 1, init...) == fill(Infinity21097(), 1, 2) + @test reduce(+,fill(Infinity21097(), len, 16); dims = 1, init...) == fill(Infinity21097(), 1, 16) + @test reduce(+,fill(Infinity21097(), len, 2); dims = 2, init...) == fill(Infinity21097(), len, 1) + @test reduce(+,fill(Infinity21097(), len, 16); dims = 2, init...) == fill(Infinity21097(), len, 1) + end + end +end + +@testset "don't assume zero is the neutral element; issue #54875" begin + A = rand(3,4) + @test sum(I -> A[I], CartesianIndices(A); dims=1) == sum(I -> A[I], CartesianIndices(A); dims=1, init=0.) +end + +@testset "cannot construct a value of type Union{} for return result; issue #43731" begin + a = collect(1.0:4) + f(x) = x < 3 ? missing : x + @test ismissing(minimum(f, a)) + @test ismissing(minimum(f, a); dims = 1) +end + +@testset "zero indices are not special, issue #38660" begin + v = OffsetVector([-1, 1], 0:1) + @test findmin(v, dims=1) == OffsetVector.(([-1], [0]), (0:0,)) + + A = rand(3,4,5) + OA = OffsetArray(A, (-1,-2,-3)) + for dims in ((), 1, 2, 3, (1,2), (1,3), (2,3)) + av, ai = findmin(A; dims) + ov, oi = findmin(OA; dims) + @test av == ov.parent + @test ai == oi.parent .+ CartesianIndex(1,2,3) + end +end diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl index f8da243da6b63..cb6c7b0ded4ea 100644 --- a/test/testhelpers/OffsetArrays.jl +++ b/test/testhelpers/OffsetArrays.jl @@ -197,7 +197,6 @@ Base.show(io::IO, r::IdOffsetRange) = print(io, IdOffsetRange, "(values=",first( # Optimizations @inline Base.checkindex(::Type{Bool}, inds::IdOffsetRange, i::Real) = Base.checkindex(Bool, inds.parent, i - inds.offset) -Base._firstslice(r::IdOffsetRange) = IdOffsetRange(Base._firstslice(r.parent), r.offset) ######################################################################################################## # origin.jl