Skip to content

Commit

Permalink
simplify mapreduce_impl
Browse files Browse the repository at this point in the history
this is possible now that Functors are not needed for performance
fixes #16185
  • Loading branch information
vtjnash committed May 5, 2016
1 parent 280310d commit fbaa2c0
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 41 deletions.
8 changes: 4 additions & 4 deletions base/linalg/dense.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@ isposdef(x::Number) = imag(x)==0 && real(x) > 0
stride1(x::Array) = 1
stride1(x::StridedVector) = stride(x, 1)::Int

import Base: mapreduce_seq_impl
import Base: mapreduce_impl

mapreduce_seq_impl{T<:BlasReal}(::typeof(abs), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int) =
mapreduce_impl{T<:BlasReal}(::typeof(abs), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int) =
BLAS.asum(ilast-ifirst+1, pointer(a, ifirst), stride1(a))

function mapreduce_seq_impl{T<:BlasReal}(::typeof(abs2), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int)
function mapreduce_impl{T<:BlasReal}(::typeof(abs2), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int)
n = ilast-ifirst+1
px = pointer(a, ifirst)
incx = stride1(a)
BLAS.dot(n, px, incx, px, incx)
end

function mapreduce_seq_impl{T<:BlasComplex}(::typeof(abs2), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int)
function mapreduce_impl{T<:BlasComplex}(::typeof(abs2), ::typeof(+), a::Union{Array{T},StridedVector{T}}, ifirst::Int, ilast::Int)
n = ilast-ifirst+1
px = pointer(a, ifirst)
incx = stride1(a)
Expand Down
58 changes: 21 additions & 37 deletions base/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,33 +93,36 @@ foldr(op, itr) = mapfoldr(identity, op, itr)

## reduce & mapreduce

# mapreduce_***_impl require ifirst < ilast
function mapreduce_seq_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int)
fx1 = r_promote(op, f(A[ifirst]))
fx2 = f(A[ifirst+=1])
v = op(fx1, fx2)
while ifirst < ilast
@inbounds Ai = A[ifirst+=1]
v = op(v, f(Ai))
end
return v
end

function mapreduce_pairwise_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int, blksize::Int)
function mapreduce_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int, blksize::Int=pairwise_blocksize(f, op))
if ifirst + blksize > ilast
return mapreduce_seq_impl(f, op, A, ifirst, ilast)
# sequential portion
fx1 = r_promote(op, f(A[ifirst]))
fx2 = r_promote(op, f(A[ifirst + 1]))
v = op(fx1, fx2)
@simd for i = ifirst + 2 : ilast
@inbounds Ai = A[i]
v = op(v, f(Ai))
end
return v
else
# pairwise portion
imid = (ifirst + ilast) >>> 1
v1 = mapreduce_pairwise_impl(f, op, A, ifirst, imid, blksize)
v2 = mapreduce_pairwise_impl(f, op, A, imid+1, ilast, blksize)
v1 = mapreduce_impl(f, op, A, ifirst, imid, blksize)
v2 = mapreduce_impl(f, op, A, imid+1, ilast, blksize)
return op(v1, v2)
end
end

mapreduce(f, op, itr) = mapfoldl(f, op, itr)
mapreduce(f, op, v0, itr) = mapfoldl(f, op, v0, itr)
mapreduce_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int) =
mapreduce_pairwise_impl(f, op, A, ifirst, ilast, 1024)

# Note: sum_seq usually uses four or more accumulators after partial
# unrolling, so each accumulator gets at most 256 numbers
pairwise_blocksize(f, op) = 1024

# This combination appears to show a benefit from a larger block size
pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096


# handling empty arrays
mr_empty(f, op, T) = throw(ArgumentError("reducing over an empty collection is not allowed"))
Expand Down Expand Up @@ -228,25 +231,6 @@ mapreduce(f, op::ShortCircuiting, itr::Any) = mapreduce_sc(f,op,itr)

## sum

function mapreduce_seq_impl(f, op::typeof(+), a::AbstractArray, ifirst::Int, ilast::Int)
s = r_promote(op, f(a[ifirst])) + f(a[ifirst+1])
@simd for i = ifirst+2:ilast
@inbounds ai = a[i]
s += f(ai)
end
s
end

# Note: sum_seq usually uses four or more accumulators after partial
# unrolling, so each accumulator gets at most 256 numbers
sum_pairwise_blocksize(f) = 1024

# This appears to show a benefit from a larger block size
sum_pairwise_blocksize(::typeof(abs2)) = 4096

mapreduce_impl(f, op::typeof(+), A::AbstractArray, ifirst::Int, ilast::Int) =
mapreduce_pairwise_impl(f, op, A, ifirst, ilast, sum_pairwise_blocksize(f))

sum(f::Callable, a) = mapreduce(f, +, a)
sum(a) = mapreduce(identity, +, a)
sum(a::AbstractArray{Bool}) = countnz(a)
Expand Down

0 comments on commit fbaa2c0

Please sign in to comment.