stricter buffer sizes in SparseMatrixCSC (JuliaLang#40523)

Make length(A.nzval)==nnz(A) and add strict buffer checking (#30662) * Add sizehint!(::SparseMatrixCSC, args...), * Fix illegal SparseMatrixCSC construction in cholmod and linalg. * Remove tests targetting now illegal buffers * Fix invalid buffer creation in kron and more * use widelength in sizehint! to cope with large matrices in 32 bit systems
antoine-levitt · May 9, 2021 · c55ecc7 · c55ecc7
1 parent d96d9d3
commit c55ecc7
Show file tree

Hide file tree

Showing 9 changed files with 215 additions and 277 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -113,6 +113,7 @@ Standard library changes
 
 #### SparseArrays
 
+* new `sizehint!(::SparseMatrixCSC, ::Integer)` method ([#30676]).
 
 #### Dates
 

diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl
@@ -8,7 +8,7 @@ import Base: map, map!, broadcast, copy, copyto!, _extrema_dims, _extrema_itr
 
 using Base: front, tail, to_shape
 using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector, AbstractSparseMatrixCSC,
-                      AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange,
+                      AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange, spzeros,
                       SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros, rowvals, getcolptr, widelength
 using Base.Broadcast: BroadcastStyle, Broadcasted, flatten
 using LinearAlgebra
@@ -132,12 +132,17 @@ function trimstorage!(A::SparseVecOrMat, maxstored)
     resize!(storedvals(A), maxstored)
     return maxstored
 end
+
 function expandstorage!(A::SparseVecOrMat, maxstored)
-    length(storedinds(A)) < maxstored && resize!(storedinds(A), maxstored)
-    length(storedvals(A)) < maxstored && resize!(storedvals(A), maxstored)
+    if length(storedinds(A)) < maxstored
+        resize!(storedinds(A), maxstored)
+        resize!(storedvals(A), maxstored)
+    end
     return maxstored
 end
 
+_checkbuffers(S::SparseMatrixCSC) = (@assert length(getcolptr(S)) == size(S, 2) + 1 && getcolptr(S)[end] - 1 == length(rowvals(S)) == length(nonzeros(S)); S)
+_checkbuffers(S::SparseVector) = (@assert length(storedvals(S)) == length(storedinds(S)); S)
 
 # (2) map[!] entry points
 map(f::Tf, A::SparseVector) where {Tf} = _noshapecheck_map(f, A)
@@ -181,7 +186,7 @@ copy(bc::SpBroadcasted1) = _noshapecheck_map(bc.f, bc.args[1])
         storedvals(C)[1] = fofnoargs
         broadcast!(f, view(storedvals(C), 2:length(storedvals(C))))
     end
-    return C
+    return _checkbuffers(C)
 end
 
 function _diffshape_broadcast(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N}
@@ -224,22 +229,17 @@ _maxnnzfrom(shape::NTuple{2}, A::AbstractSparseMatrixCSC) = nnz(A) * div(shape[1
 @inline _unchecked_maxnnzbcres(shape, As...) = _unchecked_maxnnzbcres(shape, As)
 @inline _checked_maxnnzbcres(shape::NTuple{1}, As...) = shape[1] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
 @inline _checked_maxnnzbcres(shape::NTuple{2}, As...) = shape[1] != 0 && shape[2] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
-@inline function _allocres(shape::NTuple{1}, indextype, entrytype, maxnnz)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseVector(shape..., storedinds, storedvals)
-end
-@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
-    pointers = ones(indextype, shape[2] + 1)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
+@inline function _allocres(shape::Union{NTuple{1},NTuple{2}}, indextype, entrytype, maxnnz)
+    X = spzeros(entrytype, indextype, shape)
+    resize!(storedinds(X), maxnnz)
+    resize!(storedvals(X), maxnnz)
+    return X
 end
 
 # (4) _map_zeropres!/_map_notzeropres! specialized for a single sparse vector/matrix
 "Stores only the nonzero entries of `map(f, Array(A))` in `C`."
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     Ck = 1
     @inbounds for j in columns(C)
         setcolptr!(C, j, Ck)
@@ -255,7 +255,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 """
 Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
@@ -274,7 +274,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
     end
     # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
     # nonsequential access of storedvals(C) does not appear to improve performance.
-    return C
+    return _checkbuffers(C)
 end
 # helper functions for these methods and some of those below
 @inline _densecoloffsets(A::SparseVector) = 0
@@ -297,7 +297,7 @@ end
 
 # (5) _map_zeropres!/_map_notzeropres! specialized for a pair of sparse vectors/matrices
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
     Ck = 1
@@ -336,7 +336,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVe
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     # Build dense matrix structure in C, expanding storage if necessary
@@ -368,13 +368,13 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
             Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 
 # (6) _map_zeropres!/_map_notzeropres! for more than two sparse matrices / vectors
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     rowsentinel = numrows(C) + 1
     Ck = 1
     stopks = _colstartind_all(1, As)
@@ -398,7 +398,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     # Build dense matrix structure in C, expanding storage if necessary
@@ -421,7 +421,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{Spars
             activerow = min(rows...)
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 # helper methods for map/map! methods just above
@@ -462,7 +462,7 @@ end
 # (7) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a single (input) sparse vector/matrix
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     # C and A cannot have the same shape, as we directed that case to map in broadcast's
     # entry point; here we need efficiently handle only heterogeneous C-A combinations where
     # one or both of C and A has at least one singleton dimension.
@@ -509,7 +509,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     # For information on this code, see comments in similar code in _broadcast_zeropres! above
@@ -540,14 +540,14 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 
 # (8) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse vectors/matrices
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
     # C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's
@@ -711,7 +711,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     # For information on this code, see comments in similar code in _broadcast_zeropres! above
@@ -810,7 +810,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 _finishempty!(C::SparseVector) = C
 _finishempty!(C::AbstractSparseMatrixCSC) = (fill!(getcolptr(C), 1); C)
@@ -861,7 +861,7 @@ end
 # (9) _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse vectors/matrices
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     expandsverts = _expandsvert_all(C, As)
     expandshorzs = _expandshorz_all(C, As)
     rowsentinel = numrows(C) + 1
@@ -909,7 +909,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMa
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     isempty(C) && return _finishempty!(C)
@@ -950,7 +950,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 # helper method for broadcast/broadcast! methods just above

diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl
@@ -935,16 +935,15 @@ function triu(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
     end
     rowval = Vector{Ti}(undef, nnz)
     nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
     for col = max(k+1,1) : n
         c1 = getcolptr(S)[col]
-        for c2 in nzrange(A, col)
-            rowvals(A)[c2] = rowvals(S)[c1]
-            nonzeros(A)[c2] = nonzeros(S)[c1]
+        for c2 in colptr[col]:colptr[col+1]-1
+            rowval[c2] = rowvals(S)[c1]
+            nzval[c2] = nonzeros(S)[c1]
             c1 += 1
         end
     end
-    A
+    SparseMatrixCSC(m, n, colptr, rowval, nzval)
 end
 
 function tril(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
@@ -965,17 +964,16 @@ function tril(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
     end
     rowval = Vector{Ti}(undef, nnz)
     nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
     for col = 1 : min(n, m+k)
         c1 = getcolptr(S)[col+1]-1
-        l2 = getcolptr(A)[col+1]-1
-        for c2 = 0 : l2 - getcolptr(A)[col]
-            rowvals(A)[l2 - c2] = rowvals(S)[c1]
-            nonzeros(A)[l2 - c2] = nonzeros(S)[c1]
+        l2 = colptr[col+1]-1
+        for c2 = 0 : l2 - colptr[col]
+            rowval[l2 - c2] = rowvals(S)[c1]
+            nzval[l2 - c2] = nonzeros(S)[c1]
             c1 -= 1
         end
     end
-    A
+    SparseMatrixCSC(m, n, colptr, rowval, nzval)
 end
 
 ## diff
@@ -1340,19 +1338,16 @@ end
 
 ## kron
 @inline function kron!(C::SparseMatrixCSC, A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC)
-    nnzC = nnz(A)*nnz(B)
     mA, nA = size(A); mB, nB = size(B)
     mC, nC = mA*mB, nA*nB
 
     rowvalC = rowvals(C)
     nzvalC = nonzeros(C)
     colptrC = getcolptr(C)
 
-    @boundscheck begin
-        length(colptrC) == nC+1 || throw(DimensionMismatch("expect C to be preallocated with $(nC+1) colptrs "))
-        length(rowvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) rowvals"))
-        length(nzvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) nzvals"))
-    end
+    nnzC = nnz(A)*nnz(B)
+    resize!(nzvalC, nnzC)
+    resize!(rowvalC, nnzC)
 
     col = 1
     @inbounds for j = 1:nA
@@ -1381,16 +1376,13 @@ end
 end
 
 @inline function kron!(z::SparseVector, x::SparseVector, y::SparseVector)
-    nnzx = nnz(x); nnzy = nnz(y); nnzz = nnz(z);
+    nnzx = nnz(x); nnzy = nnz(y);
     nzind = nonzeroinds(z)
     nzval = nonzeros(z)
 
-    @boundscheck begin
-        nnzval = length(nzval); nnzind = length(nzind)
-        nnzz = nnzx*nnzy
-        nnzval == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
-        nnzind == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
-    end
+    nnzz = nnzx*nnzy
+    resize!(nzind, nnzz)
+    resize!(nzval, nnzz)
 
     @inbounds for i = 1:nnzx, j = 1:nnzy
         this_ind = (i-1)*nnzy+j
@@ -1402,17 +1394,12 @@ end
 
 # sparse matrix ⊗ sparse matrix
 function kron(A::AbstractSparseMatrixCSC{T1,S1}, B::AbstractSparseMatrixCSC{T2,S2}) where {T1,S1,T2,S2}
-    nnzC = nnz(A)*nnz(B)
     mA, nA = size(A); mB, nB = size(B)
     mC, nC = mA*mB, nA*nB
     Tv = typeof(one(T1)*one(T2))
     Ti = promote_type(S1,S2)
-    colptrC = Vector{Ti}(undef, nC+1)
-    rowvalC = Vector{Ti}(undef, nnzC)
-    nzvalC  = Vector{Tv}(undef, nnzC)
-    colptrC[1] = 1
-    # skip sparse_check
-    C = SparseMatrixCSC{Tv, Ti}(mC, nC, colptrC, rowvalC, nzvalC)
+    C = spzeros(Tv, Ti, mC, nC)
+    sizehint!(C, nnz(A)*nnz(B))
     return @inbounds kron!(C, A, B)
 end
Original file line number	Diff line number	Diff line change
Expand Up		@@ -113,6 +113,7 @@ Standard library changes

		#### SparseArrays

		* new `sizehint!(::SparseMatrixCSC, ::Integer)` method ([#30676]).

		#### Dates

Expand Down