diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl index 94dcc146f0afb..12c6c7f96a280 100644 --- a/base/sparse/sparsematrix.jl +++ b/base/sparse/sparsematrix.jl @@ -738,73 +738,191 @@ end # macro (.<)(A::SparseMatrixCSC, B::Number) = (.<)(full(A), B) (.<)(A::Number, B::SparseMatrixCSC) = (.<)(A, full(B)) -# Reductions - -# TODO: Should the results of sparse reductions be sparse? -function reducedim{Tv,Ti}(f::Function, A::SparseMatrixCSC{Tv,Ti}, region, v0) - if region == 1 || region == (1,) - - S = Array(Tv, 1, A.n) - @inbounds for i = 1 : A.n - Si = v0 - ccount = 0 - for j = A.colptr[i] : A.colptr[i+1]-1 - Si = f(Si, A.nzval[j]) - ccount += 1 - end - if ccount != A.m; Si = f(Si, zero(Tv)); end - S[i] = Si +## Reductions + +# In general, output of sparse matrix reductions will not be sparse, +# and computing reductions along columns into SparseMatrixCSC is +# non-trivial, so use Arrays for output +Base.reducedim_initarray{R}(A::SparseMatrixCSC, region, v0, ::Type{R}) = + fill!(Array(R,Base.reduced_dims(A,region)), v0) +Base.reducedim_initarray0{R}(A::SparseMatrixCSC, region, v0, ::Type{R}) = + fill!(Array(R,Base.reduced_dims0(A,region)), v0) + +# General mapreduce +function _mapreducezeros(f, op, T::Type, nzeros::Int, v0) + nzeros == 0 && return v0 + + # Reduce over first zero + zeroval = f(zero(T)) + v = op(v0, zeroval) + isequal(v, v0) && return v + + # Reduce over remaining zeros + for i = 2:nzeros + lastv = v + v = op(v, zeroval) + # Bail out early if we reach a fixed point + isequal(v, lastv) && break + end + + v +end + +function Base._mapreduce{T}(f, op, A::SparseMatrixCSC{T}) + z = nnz(A) + n = length(A) + if z == 0 + if n == 0 + Base.mr_empty(f, op, T) + else + _mapreducezeros(f, op, T, n-z-1, f(zero(T))) end - return S - - elseif region == 2 || region == (2,) + else + _mapreducezeros(f, op, T, n-z, Base._mapreduce(f, op, A.nzval)) + end +end - S = fill(v0, A.m, 1) - rcounts = zeros(Ti, A.m) - @inbounds for i = 1 : A.n, j = A.colptr[i] : A.colptr[i+1]-1 - row = A.rowval[j] - S[row] = f(S[row], A.nzval[j]) - rcounts[row] += 1 - end - for i = 1:A.m - if rcounts[i] != A.n; S[i] = f(S[i], zero(Tv)); end - end - return S +# Specialized mapreduce for AddFun/MulFun +_mapreducezeros(f, ::Base.AddFun, T::Type, nzeros::Int, v0) = + nzeros == 0 ? v0 : f(zero(T))*nzeros + v0 +_mapreducezeros(f, ::Base.MulFun, T::Type, nzeros::Int, v0) = + nzeros == 0 ? v0 : f(zero(T))^nzeros * v0 - elseif region == (1,2) +function Base._mapreduce{T}(f, op::Base.MulFun, A::SparseMatrixCSC{T}) + nzeros = length(A)-nnz(A) + if nzeros == 0 + # No zeros, so don't compute f(0) since it might throw + Base._mapreduce(f, op, A.nzval) + else + v = f(zero(T))^(nzeros) + # Bail out early if initial reduction value is zero + v == zero(T) ? v : v*Base._mapreduce(f, op, A.nzval) + end +end - S = v0 - @inbounds for i = 1 : A.n, j = A.colptr[i] : A.colptr[i+1]-1 - S = f(S, A.nzval[j]) +# General mapreducedim +function _mapreducerows!{T}(f, op, R::AbstractArray, A::SparseMatrixCSC{T}) + colptr = A.colptr + rowval = A.rowval + nzval = A.nzval + m, n = size(A) + @inbounds for col = 1:n + r = R[1, col] + @simd for j = colptr[col]:colptr[col+1]-1 + r = op(r, f(nzval[j])) end - if nnz(A) != A.m*A.n; S = f(S, zero(Tv)); end - - return fill(S, 1, 1) - - else - throw(ArgumentError("invalid value for region; must be 1, 2, or (1,2)")) + R[1, col] = _mapreducezeros(f, op, T, m-(colptr[col+1]-colptr[col]), r) end + R end -function maximum{T}(A::SparseMatrixCSC{T}) - isempty(A) && throw(ArgumentError("argument must not be empty")) - (nnz(A) == 0) && (return zero(T)) - m = maximum(A.nzval) - nnz(A)!=length(A) ? max(m,zero(T)) : m +function _mapreducecols!{Tv,Ti}(f, op, R::AbstractArray, A::SparseMatrixCSC{Tv,Ti}) + colptr = A.colptr + rowval = A.rowval + nzval = A.nzval + m, n = size(A) + rownz = fill(convert(Ti, n), m) + @inbounds for col = 1:n + @simd for j = colptr[col]:colptr[col+1]-1 + row = rowval[j] + R[row, 1] = op(R[row, 1], f(nzval[j])) + rownz[row] -= 1 + end + end + @inbounds for i = 1:m + R[i, 1] = _mapreducezeros(f, op, Tv, rownz[i], R[i, 1]) + end + R end -maximum{T}(A::SparseMatrixCSC{T}, region) = - isempty(A) ? similar(A, reduced_dims0(A,region)) : reducedim(Base.scalarmax,A,region,typemin(T)) +function Base._mapreducedim!{T}(f, op, R::AbstractArray, A::SparseMatrixCSC{T}) + lsiz = Base.check_reducedims(R,A) + isempty(A) && return R -function minimum{T}(A::SparseMatrixCSC{T}) - isempty(A) && throw(ArgumentError("argument must not be empty")) - (nnz(A) == 0) && (return zero(T)) - m = minimum(A.nzval) - nnz(A)!=length(A) ? min(m,zero(T)) : m + if size(R, 1) == size(R, 2) == 1 + # Reduction along both columns and rows + R[1, 1] = mapreduce(f, op, A) + elseif size(R, 1) == 1 + # Reduction along rows + _mapreducerows!(f, op, R, A) + elseif size(R, 2) == 1 + # Reduction along columns + _mapreducecols!(f, op, R, A) + else + # Reduction along a dimension > 2 + # Compute op(R, f(A)) + m, n = size(A) + nzval = A.nzval + if length(nzval) == m*n + # No zeros, so don't compute f(0) since it might throw + for col = 1:n + @simd for row = 1:size(A, 1) + @inbounds R[row, col] = op(R[row, col], f(nzval[(col-1)*m+row])) + end + end + else + colptr = A.colptr + rowval = A.rowval + zeroval = f(zero(T)) + @inbounds for col = 1:n + lastrow = 0 + for j = colptr[col]:colptr[col+1]-1 + row = rowval[j] + @simd for i = lastrow+1:row-1 # Zeros before this nonzero + R[i, col] = op(R[i, col], zeroval) + end + R[row, col] = op(R[row, col], f(nzval[j])) + lastrow = row + end + @simd for i = lastrow+1:m # Zeros at end + R[i, col] = op(R[i, col], zeroval) + end + end + end + end + R end -minimum{T}(A::SparseMatrixCSC{T}, region) = - isempty(A) ? similar(A, reduced_dims0(A,region)) : reducedim(Base.scalarmin,A,region,typemax(T)) +# Specialized mapreducedim for AddFun cols to avoid allocating a +# temporary array when f(0) == 0 +function _mapreducecols!{Tv,Ti}(f, op::Base.AddFun, R::AbstractArray, A::SparseMatrixCSC{Tv,Ti}) + nzval = A.nzval + m, n = size(A) + if length(nzval) == m*n + # No zeros, so don't compute f(0) since it might throw + for col = 1:n + @simd for row = 1:size(A, 1) + @inbounds R[row, 1] = op(R[row, 1], f(nzval[(col-1)*m+row])) + end + end + else + colptr = A.colptr + rowval = A.rowval + zeroval = f(zero(Tv)) + if isequal(zeroval, zero(Tv)) + # Case where f(0) == 0 + @inbounds for col = 1:size(A, 2) + @simd for j = colptr[col]:colptr[col+1]-1 + R[rowval[j], 1] += f(nzval[j]) + end + end + else + # Case where f(0) != 0 + rownz = fill(convert(Ti, n), m) + @inbounds for col = 1:size(A, 2) + @simd for j = colptr[col]:colptr[col+1]-1 + row = rowval[j] + R[row, 1] += f(nzval[j]) + rownz[row] -= 1 + end + end + for i = 1:m + R[i, 1] += rownz[i]*zeroval + end + end + end + R +end # findmax/min and indmax/min methods function _findz{Tv,Ti}(A::SparseMatrixCSC{Tv,Ti}, rows=1:A.m, cols=1:A.n) @@ -905,15 +1023,6 @@ findmax{Tv,Ti}(A::SparseMatrixCSC{Tv,Ti}) = (r=findmax(A,(1,2)); (r[1][1], r[2][ indmin(A::SparseMatrixCSC) = findmin(A)[2] indmax(A::SparseMatrixCSC) = findmax(A)[2] - -sum{T}(A::SparseMatrixCSC{T}) = sum(A.nzval) -sum{T}(A::SparseMatrixCSC{T}, region) = reducedim(+,A,region,zero(T)) - -prod{T}(A::SparseMatrixCSC{T}) = nnz(A)!=length(A) ? zero(T) : prod(A.nzval) -prod{T}(A::SparseMatrixCSC{T}, region) = reducedim(*,A,region,one(T)) - -mean(A::SparseMatrixCSC, region::Integer) = sum(A, region) / size(A, region) - #all(A::SparseMatrixCSC{Bool}, region) = reducedim(all,A,region,true) #any(A::SparseMatrixCSC{Bool}, region) = reducedim(any,A,region,false) #sum(A::SparseMatrixCSC{Bool}, region) = reducedim(+,A,region,0,Int) @@ -2400,3 +2509,64 @@ function hash{T}(A::SparseMatrixCSC{T}, h::UInt) h = hashrun(lastnz, runlength, h) # Hash previous run hashrun(0, length(A)-lastidx, h) # Hash zeros at end end + +## Statistics + +# This is the function that does the reduction underlying var/std +function Base.centralize_sumabs2!{S,Tv,Ti}(R::AbstractArray{S}, A::SparseMatrixCSC{Tv,Ti}, means::AbstractArray) + lsiz = Base.check_reducedims(R,A) + size(means) == size(R) || error("size of means must match size of R") + isempty(R) || fill!(R, zero(S)) + isempty(A) && return R + + colptr = A.colptr + rowval = A.rowval + nzval = A.nzval + m = size(A, 1) + n = size(A, 2) + + if size(R, 1) == size(R, 2) == 1 + # Reduction along both columns and rows + R[1, 1] = Base.centralize_sumabs2(A, means[1]) + elseif size(R, 1) == 1 + # Reduction along rows + @inbounds for col = 1:n + mu = means[col] + r = convert(S, (m-colptr[col+1]+colptr[col])*abs2(mu)) + @simd for j = colptr[col]:colptr[col+1]-1 + r += abs2(nzval[j] - mu) + end + R[1, col] = r + end + elseif size(R, 2) == 1 + # Reduction along columns + rownz = fill(convert(Ti, n), m) + @inbounds for col = 1:n + @simd for j = colptr[col]:colptr[col+1]-1 + row = rowval[j] + R[row, 1] += abs2(nzval[j] - means[row]) + rownz[row] -= 1 + end + end + for i = 1:m + R[i, 1] += rownz[i]*abs2(means[i]) + end + else + # Reduction along a dimension > 2 + @inbounds for col = 1:n + lastrow = 0 + @simd for j = colptr[col]:colptr[col+1]-1 + row = rowval[j] + for i = lastrow+1:row-1 + R[i, col] = abs2(means[i, col]) + end + R[row, col] = abs2(nzval[j] - means[row, col]) + lastrow = row + end + for i = lastrow+1:m + R[i, col] = abs2(means[i, col]) + end + end + end + return R +end diff --git a/base/statistics.jl b/base/statistics.jl index 956e61a1ec444..62d5dd3c35c68 100644 --- a/base/statistics.jl +++ b/base/statistics.jl @@ -93,6 +93,8 @@ immutable CentralizedAbs2Fun{T<:Number} <: Func{1} m::T end call(f::CentralizedAbs2Fun, x) = abs2(x - f.m) +centralize_sumabs2(A::AbstractArray, m::Number) = + mapreduce(CentralizedAbs2Fun(m), AddFun(), A) centralize_sumabs2(A::AbstractArray, m::Number, ifirst::Int, ilast::Int) = mapreduce_impl(CentralizedAbs2Fun(m), AddFun(), A, ifirst, ilast) @@ -137,7 +139,7 @@ function varm{T}(A::AbstractArray{T}, m::Number; corrected::Bool=true) n = length(A) n == 0 && return convert(momenttype(T), NaN) n == 1 && return convert(momenttype(T), abs2(A[1] - m)/(1 - Int(corrected))) - return centralize_sumabs2(A, m, 1, n) / (n - Int(corrected)) + return centralize_sumabs2(A, m) / (n - Int(corrected)) end function varm!{S}(R::AbstractArray{S}, A::AbstractArray, m::AbstractArray; corrected::Bool=true) diff --git a/base/sysimg.jl b/base/sysimg.jl index b291b971ab92a..f2865162de948 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -245,13 +245,13 @@ const × = cross include("broadcast.jl") importall .Broadcast +# statistics +include("statistics.jl") + # sparse matrices and sparse linear algebra include("sparse.jl") importall .SparseMatrix -# statistics -include("statistics.jl") - # signal processing include("fftw.jl") include("dsp.jl") diff --git a/doc/stdlib/collections.rst b/doc/stdlib/collections.rst index 385c9b1eea3de..5fd4d013543d5 100644 --- a/doc/stdlib/collections.rst +++ b/doc/stdlib/collections.rst @@ -464,16 +464,18 @@ Iterable Collections :func:`mapreduce` is functionally equivalent to calling ``reduce(op, v0, map(f, itr))``, but will in general execute faster since no intermediate collection needs to be created. See documentation for - :func:`reduce` and ``map``. + :func:`reduce` and :func:`map`. .. doctest:: julia> mapreduce(x->x^2, +, [1:3]) # == 1 + 4 + 9 14 - The associativity of the reduction is implementation-dependent. Use - :func:`mapfoldl` or :func:`mapfoldr` instead for guaranteed left or - right associativity. + The associativity of the reduction is implementation-dependent. + Additionally, some implementations may reuse the return value of + ``f`` for elements that appear multiple times in ``itr``. + Use :func:`mapfoldl` or :func:`mapfoldr` instead for guaranteed + left or right associativity and invocation of ``f`` for every value. .. function:: mapreduce(f, op, itr) diff --git a/test/sparsedir/sparse.jl b/test/sparsedir/sparse.jl index bf7959482fbcb..d7c9f9dfd3d46 100644 --- a/test/sparsedir/sparse.jl +++ b/test/sparsedir/sparse.jl @@ -178,12 +178,46 @@ b = randn(3) @test scale(0.5, dA) == scale!(0.5, copy(sA)) # reductions -@test sum(se33)[1] == 3.0 -@test sum(se33, 1) == [1.0 1.0 1.0] -@test sum(se33, 2) == [1.0 1.0 1.0]' -@test prod(se33)[1] == 0.0 -@test prod(se33, 1) == [0.0 0.0 0.0] -@test prod(se33, 2) == [0.0 0.0 0.0]' +pA = sparse(rand(3, 7)) + +for arr in (se33, sA, pA) + for f in (sum, prod, minimum, maximum, var) + farr = full(arr) + @test_approx_eq f(arr) f(farr) + @test_approx_eq f(arr, 1) f(farr, 1) + @test_approx_eq f(arr, 2) f(farr, 2) + @test_approx_eq f(arr, (1, 2)) [f(farr)] + @test isequal(f(arr, 3), f(farr, 3)) + end +end + +for f in (sum, prod, minimum, maximum) + # Test with a map function that maps to non-zero + for arr in (se33, sA, pA) + @test_approx_eq f(x->x+1, arr) f(arr+1) + end + + # case where f(0) would throw + @test_approx_eq f(x->sqrt(x-1), pA+1) f(sqrt(pA)) + # these actually throw due to #10533 + # @test_approx_eq f(x->sqrt(x-1), pA+1, 1) f(sqrt(pA), 1) + # @test_approx_eq f(x->sqrt(x-1), pA+1, 2) f(sqrt(pA), 2) + # @test_approx_eq f(x->sqrt(x-1), pA+1, 3) f(pA) +end + +# empty cases +@test sum(sparse(Int[])) === 0 +@test prod(sparse(Int[])) === 1 +@test_throws ArgumentError minimum(sparse(Int[])) +@test_throws ArgumentError maximum(sparse(Int[])) +@test var(sparse(Int[])) === NaN + +for f in (sum, prod, minimum, maximum, var) + @test isequal(f(spzeros(0, 1), 1), f(Array(Int, 0, 1), 1)) + @test isequal(f(spzeros(0, 1), 2), f(Array(Int, 0, 1), 2)) + @test isequal(f(spzeros(0, 1), (1, 2)), f(Array(Int, 0, 1), (1, 2))) + @test isequal(f(spzeros(0, 1), 3), f(Array(Int, 0, 1), 3)) +end # spdiagm @test full(spdiagm((ones(2), ones(2)), (0, -1), 3, 3)) ==