From 0277d0a736d653b4df2b10e77e37ee9b434b80de Mon Sep 17 00:00:00 2001
From: Sacha Verweij <sacha@stanford.edu>
Date: Sat, 5 Nov 2016 19:15:03 -0700
Subject: [PATCH] Make broadcast over a single SparseMatrixCSC (and possibly
 broadcast scalars) more generic.

---
 base/sparse/sparsematrix.jl | 123 ++++++++++++++++++++++--------------
 test/sparse/sparse.jl       |  15 ++++-
 2 files changed, 90 insertions(+), 48 deletions(-)

diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
index f7ddb3d70486d4..9f6049c47fa1a3 100644
--- a/base/sparse/sparsematrix.jl
+++ b/base/sparse/sparsematrix.jl
@@ -1390,7 +1390,77 @@ function one{T}(S::SparseMatrixCSC{T})
     speye(T, m)
 end
 
-## Unary arithmetic and boolean operators
+
+## Broadcast operations involving a single sparse matrix and possibly broadcast scalars
+
+function broadcast{Tf}(f::Tf, A::SparseMatrixCSC)
+    fofzero = f(zero(eltype(A)))
+    fpreszero = fofzero == zero(fofzero)
+    return fpreszero ? _broadcast_zeropres(f, A) : _broadcast_notzeropres(f, A)
+end
+"""
+Returns a `SparseMatrixCSC` that shares `A`'s structure but with stored entries
+`broadcast(f, A.nzval)`.
+"""
+function _broadcast_zeropres{Tf}(f::Tf, A::SparseMatrixCSC)
+    Bcolptr = copy(A.colptr); resize!(Bcolptr, A.n + 1)
+    Browval = copy(A.rowval); resize!(Browval, nnz(A))
+    Bnzval = broadcast(f, A.nzval); resize!(Bnzval, nnz(A))
+    return SparseMatrixCSC(A.m, A.n, Bcolptr, Browval, Bnzval)
+    # TODO: The following lines for constructing Bcolptr and Browval might be safer than
+    # those above: If A.colptr and/or A.rowval are shorter than their expected lengths, the
+    # copy!s might catch the error, whereas the former copy/resize! combinations won't. The
+    # lines above are simpler/cleaner though. Thoughts?
+    # Bcolptr = similar(A.colptr, A.n + 1)
+    # copy!(Bcolptr, 1, A.colptr, 1, A.n + 1)
+    # Browval = similar(A.rowval, nnz(A))
+    # copy!(Browval, 1, A.rowval, 1, nnz(A))
+end
+"""
+Returns a (dense) `SparseMatrixCSC` with `f(zero(eltype(A)))` stored in place of all unstored
+entries in `A` and `broadcast(f, A.nzval)` stored in place of all stored entries in `A`.
+"""
+function _broadcast_notzeropres{Tf}(f::Tf, A::SparseMatrixCSC)
+    nnzB = A.m * A.n
+    # Build structure
+    Bcolptr = similar(A.colptr, A.n + 1)
+    copy!(Bcolptr, 1:A.m:(nnzB + 1))
+    Browval = similar(A.rowval, nnzB)
+    for k in 1:A.m:(nnzB - A.m + 1)
+        copy!(Browval, k, 1:A.m)
+    end
+    # Populate values
+    Bnzval = fill(f(zero(eltype(A))), nnzB)
+    @inbounds for (j, jo) in zip(1:A.n, 0:A.m:(nnzB - 1)), k in nzrange(A, j)
+        Bnzval[jo + A.rowval[k]] = f(A.nzval[k])
+    end
+    # NOTE: Combining the fill call into the loop above to avoid multiple sweeps over /
+    # nonsequential access of Bnzval does not appear to improve performance
+    return SparseMatrixCSC(A.m, A.n, Bcolptr, Browval, Bnzval)
+end
+
+# Cover common broadcast operations involving a single sparse matrix and one or more
+# broadcast scalars.
+#
+# TODO: The minimal snippet below is not satisfying: A better solution would achieve
+# the same for (1) all broadcast scalar types (Base.Broadcast.containertype(x) == Any?) and
+# (2) any combination (number, order, type mixture) of broadcast scalars. One might achieve
+# (2) via a generated function. But how one might simultaneously and gracefully achieve (1)
+# eludes me. Thoughts much appreciated.
+#
+broadcast{Tf}(f::Tf, x::Union{Number,Bool}, A::SparseMatrixCSC) = broadcast(y -> f(x, y), A)
+broadcast{Tf}(f::Tf, A::SparseMatrixCSC, y::Union{Number,Bool}) = broadcast(x -> f(x, y), A)
+# NOTE: The following two method definitions work around #19096. These definitions should
+# be folded into the two preceding definitions on resolution of #19096.
+broadcast{Tf,T}(f::Tf, ::Type{T}, A::SparseMatrixCSC) = broadcast(y -> f(T, y), A)
+broadcast{Tf,T}(f::Tf, A::SparseMatrixCSC, ::Type{T}) = broadcast(x -> f(x, T), A)
+
+# TODO: Determine what to do about collapsing the nz2nz_z2z and nz2z_z2z classes. Either (1)
+# the generic unary sparse broadcast behavior should be nz2z_z2z instead of the nz2z_z2z
+# behavior presently above (this would be consistent with existing binary sparse broadcast
+# behavior); or (2) the generic unary broadcast behavior should be nz2nz_z2z as above, and the
+# nz2z_z2z specializations below should ultimately go away in favor of `broadcast`+`dropzeros!`
+# (this would not be consistent with existing binary sparse broadcast behavior).
 
 """
 Helper macro for the unary broadcast definitions below. Takes parent method `fp` and a set
@@ -1439,31 +1509,17 @@ end
     sin, sinh, sind, asin, asinh, asind,
     tan, tanh, tand, atan, atanh, atand,
     sinpi, cosc, ceil, floor, trunc, round)
-broadcast(::typeof(real), A::SparseMatrixCSC) = copy(A)
-broadcast{Tv,Ti}(::typeof(imag), A::SparseMatrixCSC{Tv,Ti}) = spzeros(Tv, Ti, A.m, A.n)
-broadcast{TTv}(::typeof(real), A::SparseMatrixCSC{Complex{TTv}}) = _broadcast_unary_nz2z_z2z_T(real, A, TTv)
-broadcast{TTv}(::typeof(imag), A::SparseMatrixCSC{Complex{TTv}}) = _broadcast_unary_nz2z_z2z_T(imag, A, TTv)
+broadcast{Tf<:typeof(real)}(::Tf, A::SparseMatrixCSC) = copy(A)
+broadcast{Tf<:typeof(imag),Tv,Ti}(::Tf, A::SparseMatrixCSC{Tv,Ti}) = spzeros(Tv, Ti, A.m, A.n)
+broadcast{Tf<:typeof(real),TTv}(::Tf, A::SparseMatrixCSC{Complex{TTv}}) = _broadcast_unary_nz2z_z2z_T(real, A, TTv)
+broadcast{Tf<:typeof(imag),TTv}(::Tf, A::SparseMatrixCSC{Complex{TTv}}) = _broadcast_unary_nz2z_z2z_T(imag, A, TTv)
+# NOTE: Avoiding ambiguities with the general method (broadcast{Tf}(f::Tf, A::SparseMatrixCSC))
+# necessitates the less-than-graceful Tf<:typeof(fn) type parameters in the above definitions
 ceil{To}(::Type{To}, A::SparseMatrixCSC) = _broadcast_unary_nz2z_z2z_T(ceil, A, To)
 floor{To}(::Type{To}, A::SparseMatrixCSC) = _broadcast_unary_nz2z_z2z_T(floor, A, To)
 trunc{To}(::Type{To}, A::SparseMatrixCSC) = _broadcast_unary_nz2z_z2z_T(trunc, A, To)
 round{To}(::Type{To}, A::SparseMatrixCSC) = _broadcast_unary_nz2z_z2z_T(round, A, To)
 
-# Operations that map zeros to zeros and map nonzeros to nonzeros, yielding a sparse matrix
-"""
-Takes unary function `f` that maps zeros to zeros and nonzeros to nonzeros, and returns a
-new `SparseMatrixCSC{TiA,TvB}` `B` generated by applying `f` to each nonzero entry in `A`.
-"""
-function _broadcast_unary_nz2nz_z2z{TvA,TiA,Tf<:Function}(f::Tf, A::SparseMatrixCSC{TvA,TiA})
-    Bcolptr = Vector{TiA}(A.n + 1)
-    Browval = Vector{TiA}(nnz(A))
-    copy!(Bcolptr, 1, A.colptr, 1, A.n + 1)
-    copy!(Browval, 1, A.rowval, 1, nnz(A))
-    Bnzval = broadcast(f, A.nzval)
-    resize!(Bnzval, nnz(A))
-    return SparseMatrixCSC(A.m, A.n, Bcolptr, Browval, Bnzval)
-end
-@_enumerate_childmethods(_broadcast_unary_nz2nz_z2z,
-    log1p, expm1, abs, abs2, conj)
 function conj!(A::SparseMatrixCSC)
     @inbounds @simd for k in 1:nnz(A)
         A.nzval[k] = conj(A.nzval[k])
@@ -1471,31 +1527,6 @@ function conj!(A::SparseMatrixCSC)
     return A
 end
 
-# Operations that map both zeros and nonzeros to zeros, yielding a dense matrix
-"""
-Takes unary function `f` that maps both zeros and nonzeros to nonzeros, constructs a new
-`Matrix{TvB}` `B`, populates all entries of `B` with the result of a single `f(one(zero(Tv)))`
-call, and then, for each stored entry in `A`, calls `f` on the entry's value and stores the
-result in the corresponding location in `B`.
-"""
-function _broadcast_unary_nz2nz_z2nz{Tv}(f::Function, A::SparseMatrixCSC{Tv})
-    B = fill(f(zero(Tv)), size(A))
-    @inbounds for j in 1:A.n
-        for k in nzrange(A, j)
-            i = A.rowval[k]
-            x = A.nzval[k]
-            B[i,j] = f(x)
-        end
-    end
-    return B
-end
-@_enumerate_childmethods(_broadcast_unary_nz2nz_z2nz,
-    log, log2, log10, exp, exp2, exp10, sinc, cospi,
-    cos, cosh, cosd, acos, acosd,
-    cot, coth, cotd, acot, acotd,
-    sec, sech, secd, asech,
-    csc, csch, cscd, acsch)
-
 
 ## Broadcasting kernels specialized for returning a SparseMatrixCSC
 
diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl
index b56ad98a84d97d..0b880237fe69b3 100644
--- a/test/sparse/sparse.jl
+++ b/test/sparse/sparse.jl
@@ -499,7 +499,12 @@ end
         @test R == real.(S)
         @test I == imag.(S)
         @test real.(sparse(R)) == R
-        @test nnz(imag.(sparse(R))) == 0
+        # @test nnz(imag.(sparse(R))) == 0
+        # TODO: Revise the preceding now-failing test: The new generic broadcast
+        # over SparseMatrixCSCs presently behaves like the former nz2nz_z2z-class broadcast
+        # (retains stored entries mapped to zero), so the preceding expression yields four.
+        # Revise once the question of which behavior (nz2nz_z2z or nz2z_z2z) generic
+        # broadcast over a single SparseMatrixCSC should have is resolved.
         @test abs.(S) == abs.(D)
         @test abs2.(S) == abs2.(D)
     end
@@ -1633,5 +1638,11 @@ let
     A = spdiagm(1.0:5.0)
     @test isa(sin.(A), SparseMatrixCSC) # representative for _unary_nz2z_z2z class
     @test isa(abs.(A), SparseMatrixCSC) # representative for _unary_nz2nz_z2z class
-    @test isa(exp.(A), Array) # representative for _unary_nz2nz_z2nz class
+    # TODO: Once the question of whether generic broadcast over a single SparseMatrixCSC
+    # should behave like nz2z_z2z or nz2nz_z2z is resolved, the two tests above should
+    # probably change / be collapsed.
+    #
+    # Also, revise/remove the following now-failing test: broadcast over a single
+    # SparseMatrixCSC now returns a SparseMatrixCSC independent of the broadcast function.
+    # @test isa(exp.(A), Array) # representative for _unary_nz2nz_z2nz class
 end