Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some mapreduce improvement #63

Merged
merged 5 commits into from
Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 1 addition & 58 deletions src/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module HigherOrderFns

# This module provides higher order functions specialized for sparse arrays,
# particularly map[!]/broadcast[!] for SparseVectors and SparseMatrixCSCs at present.
import Base: map, map!, broadcast, copy, copyto!, _extrema_dims, _extrema_itr
import Base: map, map!, broadcast, copy, copyto!

using Base: front, tail, to_shape
using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector, AbstractSparseMatrixCSC,
Expand All @@ -29,7 +29,6 @@ using LinearAlgebra
# (11) Define broadcast[!] methods handling combinations of scalars, sparse vectors/matrices,
# structured matrices, and one- and two-dimensional Arrays.
# (12) Define map[!] methods handling combinations of sparse and structured matrices.
# (13) Define extrema methods optimized for sparse vectors/matrices.


# (0) BroadcastStyle rules and convenience types for dispatch
Expand Down Expand Up @@ -1163,60 +1162,4 @@ map(f::Tf, A::SparseOrStructuredMatrix, Bs::Vararg{SparseOrStructuredMatrix,N})
map!(f::Tf, C::AbstractSparseMatrixCSC, A::SparseOrStructuredMatrix, Bs::Vararg{SparseOrStructuredMatrix,N}) where {Tf,N} =
(_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, _sparsifystructured(A), map(_sparsifystructured, Bs)...))


# (13) extrema methods optimized for sparse vectors/matrices.
function _extrema_itr(f, A::SparseVecOrMat)
M = length(A)
iszero(M) && throw(ArgumentError("Sparse array must have at least one element."))
N = nnz(A)
iszero(N) && return f(zero(eltype(A))), f(zero(eltype(A)))
vmin, vmax = _extrema_itr(f, nonzeros(A))
if N != M
f0 = f(zero(eltype(A)))
vmin = min(f0, vmin)
vmax = max(f0, vmax)
end
vmin, vmax
end

function _extrema_dims(f, x::SparseVector, dims)
sz = zeros(1)
for d in dims
sz[d] = 1
end
if sz == [1] && !iszero(length(x))
return [_extrema_itr(f, x)]
end
invoke(_extrema_dims, Tuple{Any, AbstractArray, Any}, f, x, dims)
end

function _extrema_dims(f, A::AbstractSparseMatrix, dims)
sz = zeros(2)
for d in dims
sz[d] = 1
end
if sz == [1, 0] && !iszero(length(A))
T = eltype(A)
B = Array{Tuple{T,T}}(undef, 1, size(A, 2))
@inbounds for col_idx in 1:size(A, 2)
col = @view A[:,col_idx]
fx = (nnz(col) == size(A, 1)) ? f(A[1,col_idx]) : f(zero(T))
B[col_idx] = (fx, fx)
for x in nonzeros(col)
fx = f(x)
if fx < B[col_idx][1]
B[col_idx] = (fx, B[col_idx][2])
elseif fx > B[col_idx][2]
B[col_idx] = (B[col_idx][1], fx)
end
end
end
return B
end
invoke(_extrema_dims, Tuple{Any, AbstractArray, Any}, f, A, dims)
end

_extrema_dims(f, A::SparseVector, ::Colon) = _extrema_itr(f, A)
_extrema_dims(f, A::AbstractSparseMatrix, ::Colon) = _extrema_itr(f, A)

end
20 changes: 13 additions & 7 deletions src/sparsematrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1914,13 +1914,19 @@ function Base._mapreduce(f, op, ::Base.IndexCartesian, A::AbstractSparseMatrixCS
end
end

# Specialized mapreduce for +/*
_mapreducezeros(f, ::typeof(+), ::Type{T}, nzeros::Integer, v0) where {T} =
nzeros == 0 ? v0 : f(zero(T))*nzeros + v0
_mapreducezeros(f, ::typeof(*), ::Type{T}, nzeros::Integer, v0) where {T} =
nzeros == 0 ? v0 : f(zero(T))^nzeros * v0

function Base._mapreduce(f, op::typeof(*), A::AbstractSparseMatrixCSC{T}) where T
# Specialized mapreduce for +/*/min/max/_extrema_rf
_mapreducezeros(f, op::Union{typeof(Base.add_sum),typeof(+)}, ::Type{T}, nzeros::Integer, v0) where {T} =
nzeros == 0 ? op(zero(v0), v0) : op(f(zero(T))*nzeros, v0)
_mapreducezeros(f, op::Union{typeof(Base.mul_prod),typeof(*)},::Type{T}, nzeros::Integer, v0) where {T} =
nzeros == 0 ? op(one(v0), v0) : op(f(zero(T))^nzeros, v0)
_mapreducezeros(f, op::Union{typeof(min),typeof(max)}, ::Type{T}, nzeros::Integer, v0) where {T} =
nzeros == 0 ? v0 : op(v0, f(zero(T)))
if isdefined(Base, :_extrema_rf)
_mapreducezeros(f::Base.ExtremaMap, op::typeof(Base._extrema_rf), ::Type{T}, nzeros::Integer, v0) where {T} =
nzeros == 0 ? v0 : op(v0, f(zero(T)))
end

function Base._mapreduce(f, op::typeof(*), ::Base.IndexCartesian, A::AbstractSparseMatrixCSC{T}) where T
nzeros = widelength(A)-nnz(A)
if nzeros == 0
# No zeros, so don't compute f(0) since it might throw
Expand Down
56 changes: 21 additions & 35 deletions src/sparsevector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1392,46 +1392,32 @@ for (fun, mode) in [(:+, 1), (:-, 1), (:*, 0), (:min, 2), (:max, 2)]
end

### Reduction

function sum(f, x::AbstractSparseVector)
n = length(x)
n > 0 || return sum(f, nonzeros(x)) # return zero() of proper type
m = nnz(x)
(m == 0 ? n * f(zero(eltype(x))) :
m == n ? sum(f, nonzeros(x)) :
Base.add_sum((n - m) * f(zero(eltype(x))), sum(f, nonzeros(x))))
end

sum(x::AbstractSparseVector) = sum(nonzeros(x))

function maximum(f, x::AbstractSparseVector)
n = length(x)
if n == 0
if f === abs || f === abs2
return zero(eltype(x)) # preserving maximum(abs/abs2, x) behaviour in 1.0.x
else
throw(ArgumentError("maximum over an empty array is not allowed."))
end
Base.reducedim_initarray(A::AbstractSparseVector, region, v0, ::Type{R}) where {R} =
fill!(Array{R}(undef, Base.to_shape(Base.reduced_indices(A, region))), v0)

function Base._mapreduce(f, op, ::IndexCartesian, A::AbstractSparseVector{T}) where {T}
isempty(A) && return Base.mapreduce_empty(f, op, T)
z = nnz(A)
rest, ini = if z == 0
length(A)-z-1, f(zero(T))
else
length(A)-z, Base.mapreduce_impl(f, op, nonzeros(A), 1, z)
end
m = nnz(x)
(m == 0 ? f(zero(eltype(x))) :
m == n ? maximum(f, nonzeros(x)) :
max(f(zero(eltype(x))), maximum(f, nonzeros(x))))
_mapreducezeros(f, op, T, rest, ini)
end

maximum(x::AbstractSparseVector) = maximum(identity, x)

function minimum(f, x::AbstractSparseVector)
n = length(x)
n > 0 || throw(ArgumentError("minimum over an empty array is not allowed."))
m = nnz(x)
(m == 0 ? f(zero(eltype(x))) :
m == n ? minimum(f, nonzeros(x)) :
min(f(zero(eltype(x))), minimum(f, nonzeros(x))))
function Base.mapreducedim!(f, op, R::AbstractVector, A::AbstractSparseVector)
# dim1 reduction could be safely replaced with a mapreduce
if length(R) == 1
I = firstindex(R)
v = Base._mapreduce(f, op, IndexCartesian(), A)
R[I] = op(R[I], v)
return R
end
# otherwise there's no reduction
map!((x, y) -> op(x, f(y)), R, R, A)
end

minimum(x::AbstractSparseVector) = minimum(identity, x)

for (fun, comp, word) in ((:findmin, :(<), "minimum"), (:findmax, :(>), "maximum"))
@eval function $fun(f, x::AbstractSparseVector{T}) where {T}
n = length(x)
Expand Down
38 changes: 34 additions & 4 deletions test/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -709,24 +709,54 @@ end
@test extrema(f, x) == extrema(f, y)
@test extrema(spzeros(n, n)) == (0.0, 0.0)
@test extrema(spzeros(n)) == (0.0, 0.0)
@test_throws ArgumentError extrema(spzeros(0, 0))
@test_throws ArgumentError extrema(spzeros(0))
# TODO: Remove the temporary skip once https://github.com/JuliaLang/julia/pull/43604 is merged
if isdefined(Base, :_extrema_rf)
@test_throws "reducing over an empty" extrema(spzeros(0, 0))
@test_throws "reducing over an empty" extrema(spzeros(0))
end
@test extrema(sparse(ones(n, n))) == (1.0, 1.0)
@test extrema(sparse(ones(n))) == (1.0, 1.0)
@test extrema(A; dims=:) == extrema(B; dims=:)
@test extrema(A; dims=1) == extrema(B; dims=1)
@test extrema(A; dims=2) == extrema(B; dims=2)
@test extrema(A; dims=(1,2)) == extrema(B; dims=(1,2))
@test extrema(f, A; dims=1) == extrema(f, B; dims=1)
@test extrema(sparse(C); dims=1) == extrema(C; dims=1)
# TODO: Remove the temporary skip once https://github.com/JuliaLang/julia/pull/43604 is merged
if isdefined(Base, :_extrema_rf)
@test_throws "reducing over an empty" extrema(sparse(C); dims=1) == extrema(C; dims=1)
end
@test extrema(A; dims=[]) == extrema(B; dims=[])
@test extrema(x; dims=:) == extrema(y; dims=:)
@test extrema(x; dims=1) == extrema(y; dims=1)
@test extrema(f, x; dims=1) == extrema(f, y; dims=1)
@test_throws BoundsError extrema(sparse(z); dims=1)
# TODO: Remove the temporary skip once https://github.com/JuliaLang/julia/pull/43604 is merged
if isdefined(Base, :_extrema_rf)
@test_throws "reducing over an empty" extrema(sparse(z); dims=1)
end
@test extrema(x; dims=[]) == extrema(y; dims=[])
end

# TODO: Remove the temporary skip once https://github.com/JuliaLang/julia/pull/43604 is merged
if isdefined(Base, :_extrema_rf)
function test_extrema(a; dims_test = ((), 1, 2, (1,2), 3))
for dims in dims_test
vext = extrema(a; dims)
vmin, vmax = minimum(a; dims), maximum(a; dims)
@test all(x -> isequal(x[1], x[2:3]), zip(vext,vmin,vmax))
end
end
@testset "NaN test for sparse extrema" begin
for sz = (3, 10, 100)
A = sprand(sz, sz, 0.3)
A[rand(1:sz^2,sz)] .= NaN
test_extrema(A)
A = sprand(sz*sz, 0.3)
A[rand(1:sz^2,sz)] .= NaN
test_extrema(A; dims_test = ((), 1, 2))
end
end
end

@testset "issue #42670 - error in sparsevec outer product" begin
A = spzeros(Int, 4)
B = copy(A)
Expand Down
4 changes: 2 additions & 2 deletions test/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -871,8 +871,8 @@ end
occursin("collection slices must be non-empty", str)
@test sum(sparse(Int[])) === 0
@test prod(sparse(Int[])) === 1
@test_throws ArgumentError minimum(sparse(Int[]))
@test_throws ArgumentError maximum(sparse(Int[]))
@test_throws "reducing over an empty" minimum(sparse(Int[]))
@test_throws "reducing over an empty" maximum(sparse(Int[]))

for f in (sum, prod)
@test isequal(f(spzeros(0, 1), dims=1), f(Matrix{Int}(I, 0, 1), dims=1))
Expand Down
4 changes: 2 additions & 2 deletions test/sparsevector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -893,8 +893,8 @@ end
end

let x = spzeros(Float64, 0)
@test_throws ArgumentError minimum(t -> true, x)
@test_throws ArgumentError maximum(t -> true, x)
@test_throws "reducing over an empty" minimum(t -> true, x)
@test_throws "reducing over an empty" maximum(t -> true, x)
@test_throws ArgumentError findmin(x)
@test_throws ArgumentError findmax(x)
end
Expand Down