Skip to content

Commit

Permalink
Merge pull request #33533 from JuliaLang/mb/fast-partitions
Browse files Browse the repository at this point in the history
Make Iterators.partition split arrays into views for faster and easier parallelism
  • Loading branch information
mbauman authored Nov 1, 2019
2 parents 2f9eebd + 47e49ab commit 3f0e7d6
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 19 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ Standard library changes

* Verbose `display` of `Char` (`text/plain` output) now shows the codepoint value in standard-conforming `"U+XXXX"` format ([#33291]).

* `Iterators.partition` now uses views (or smartly re-computed ranges) for partitions of all `AbstractArray`s ([#33533]).

* Sets are now displayed less compactly in the REPL, as a column of elements, like vectors
and dictionaries ([#33300]).

Expand Down
21 changes: 10 additions & 11 deletions base/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -919,20 +919,19 @@ end
length(dest) < 256 && return invoke(copyto!, Tuple{AbstractArray, Broadcasted{Nothing}}, dest, bc)
tmp = Vector{Bool}(undef, bitcache_size)
destc = dest.chunks
ind = cind = 1
cind = 1
bc′ = preprocess(dest, bc)
@simd for I in eachindex(bc′)
@inbounds tmp[ind] = bc′[I]
ind += 1
if ind > bitcache_size
dumpbitcache(destc, cind, tmp)
cind += bitcache_chunks
ind = 1
for P in Iterators.partition(eachindex(bc′), bitcache_size)
ind = 1
@simd for I in P
@inbounds tmp[ind] = bc′[I]
ind += 1
end
@simd for i in ind:bitcache_size
@inbounds tmp[i] = false
end
end
if ind > 1
@inbounds tmp[ind:bitcache_size] .= false
dumpbitcache(destc, cind, tmp)
cind += bitcache_chunks
end
return dest
end
Expand Down
31 changes: 25 additions & 6 deletions base/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1022,37 +1022,56 @@ Iterate over a collection `n` elements at a time.
# Examples
```jldoctest
julia> collect(Iterators.partition([1,2,3,4,5], 2))
3-element Array{Array{Int64,1},1}:
3-element Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},1}:
[1, 2]
[3, 4]
[5]
```
"""
partition(c::T, n::Integer) where {T} = PartitionIterator{T}(c, Int(n))
function partition(c, n::Integer)
n < 1 && throw(ArgumentError("cannot create partitions of length $n"))
return PartitionIterator(c, Int(n))
end

struct PartitionIterator{T}
c::T
n::Int
end
# Partitions are explicitly a linear indexing operation, so reshape to 1-d immediately
PartitionIterator(A::AbstractArray, n::Int) = PartitionIterator(vec(A), n)
PartitionIterator(v::AbstractVector, n::Int) = PartitionIterator{typeof(v)}(v, n)

eltype(::Type{PartitionIterator{T}}) where {T} = Vector{eltype(T)}
# Arrays use a generic `view`-of-a-`vec`, so we cannot exactly predict what we'll get back
eltype(::Type{PartitionIterator{T}}) where {T<:AbstractArray} = AbstractVector{eltype(T)}
# But for some common implementations in Base we know the answer exactly
eltype(::Type{PartitionIterator{T}}) where {T<:Vector} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}

IteratorEltype(::Type{<:PartitionIterator{T}}) where {T} = IteratorEltype(T)
IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:AbstractArray} = EltypeUnknown()
IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Vector} = IteratorEltype(T)

partition_iteratorsize(::HasShape) = HasLength()
partition_iteratorsize(isz) = isz
function IteratorSize(::Type{PartitionIterator{T}}) where {T}
partition_iteratorsize(IteratorSize(T))
end

IteratorEltype(::Type{<:PartitionIterator{T}}) where {T} = IteratorEltype(T)

function length(itr::PartitionIterator)
l = length(itr.c)
return div(l, itr.n) + ((mod(l, itr.n) > 0) ? 1 : 0)
end

function iterate(itr::PartitionIterator{<:Vector}, state=1)
function iterate(itr::PartitionIterator{<:AbstractRange}, state=1)
state > length(itr.c) && return nothing
r = min(state + itr.n - 1, length(itr.c))
return @inbounds itr.c[state:r], r + 1
end

function iterate(itr::PartitionIterator{<:AbstractArray}, state=1)
state > length(itr.c) && return nothing
r = min(state + itr.n - 1, length(itr.c))
return view(itr.c, state:r), r + 1
return @inbounds view(itr.c, state:r), r + 1
end

struct IterationCutShort; end
Expand Down
59 changes: 57 additions & 2 deletions base/multidimensional.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ module IteratorsMD

import .Base: +, -, *, (:)
import .Base: simd_outer_range, simd_inner_length, simd_index
using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail
using .Base.Iterators: Reverse
using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail,
ReshapedArray, ReshapedArrayLF, OneTo
using .Base.Iterators: Reverse, PartitionIterator

export CartesianIndex, CartesianIndices

Expand Down Expand Up @@ -463,6 +464,60 @@ module IteratorsMD
iterate(iter::Reverse{<:CartesianIndices{0}}, state=false) = state ? nothing : (CartesianIndex(), true)

Base.LinearIndices(inds::CartesianIndices{N,R}) where {N,R} = LinearIndices{N,R}(inds.indices)

# Views of reshaped CartesianIndices are used for partitions — ensure these are fast
const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,Tuple{UnitRange{Int}},false}
eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArrayLF} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}
eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArray} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, false}
Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:ReshapedArray} = Iterators.IteratorEltype(T)

eltype(::Type{PartitionIterator{T}}) where {T<:OneTo} = UnitRange{eltype(T)}
eltype(::Type{PartitionIterator{T}}) where {T<:Union{UnitRange, StepRange, StepRangeLen, LinRange}} = T
Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Union{OneTo, UnitRange, StepRange, StepRangeLen, LinRange}} = Iterators.IteratorEltype(T)


@inline function iterate(iter::CartesianPartition)
isempty(iter) && return nothing
f = first(iter)
return (f, (f, 1))
end
@inline function iterate(iter::CartesianPartition, (state, n))
n >= length(iter) && return nothing
I = IteratorsMD.inc(state.I, first(iter.parent.parent).I, last(iter.parent.parent).I)
return I, (I, n+1)
end

@inline function simd_outer_range(iter::CartesianPartition)
# In general, the Cartesian Partition might start and stop in the middle of the outer
# dimensions — thus the outer range of a CartesianPartition is itself a
# CartesianPartition.
t = tail(iter.parent.parent.indices)
ci = CartesianIndices(t)
li = LinearIndices(t)
return @inbounds view(ci, li[tail(iter[1].I)...]:li[tail(iter[end].I)...])
end
function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}})
# But for two-dimensional Partitions the above is just a simple one-dimensional range
# over the second dimension; we don't need to worry about non-rectangular staggers in
# higher dimensions.
return @inbounds CartesianIndices((iter[1][2]:iter[end][2],))
end
@inline function simd_inner_length(iter::CartesianPartition, I::CartesianIndex)
inner = iter.parent.parent.indices[1]
@inbounds fi = iter[1].I
@inbounds li = iter[end].I
inner_start = I.I == tail(fi) ? fi[1] : first(inner)
inner_end = I.I == tail(li) ? li[1] : last(inner)
return inner_end - inner_start + 1
end
@inline function simd_index(iter::CartesianPartition, Ilast::CartesianIndex, I1::Int)
# I1 is the 0-based distance from the first dimension's offest
offset = first(iter.parent.parent.indices[1]) # (this is 1 for 1-based arrays)
# In the first column we need to also add in the iter's starting point (branchlessly)
f = @inbounds iter[1]
startoffset = (Ilast.I == tail(f.I))*(f[1] - 1)
CartesianIndex((I1 + offset + startoffset, Ilast.I...))
end
end # IteratorsMD


Expand Down
79 changes: 79 additions & 0 deletions test/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,85 @@ for n in [5,6]
[(1,1),(2,2),(3,3),(4,4),(5,5)]
end

function iterate_length(iter)
n=0
for i in iter
n += 1
end
return n
end
function simd_iterate_length(iter)
n=0
@simd for i in iter
n += 1
end
return n
end
function simd_trip_count(iter)
return sum(Base.SimdLoop.simd_inner_length(iter, i) for i in Base.SimdLoop.simd_outer_range(iter))
end
function iterate_elements(iter)
vals = Vector{eltype(iter)}(undef, length(iter))
i = 1
for v in iter
@inbounds vals[i] = v
i += 1
end
return vals
end
function simd_iterate_elements(iter)
vals = Vector{eltype(iter)}(undef, length(iter))
i = 1
@simd for v in iter
@inbounds vals[i] = v
i += 1
end
return vals
end
function index_elements(iter)
vals = Vector{eltype(iter)}(undef, length(iter))
i = 1
for j in eachindex(iter)
@inbounds vals[i] = iter[j]
i += 1
end
return vals
end

@testset "CartesianPartition optimizations" for dims in ((1,), (64,), (101,),
(1,1), (8,8), (11, 13),
(1,1,1), (8, 4, 2), (11, 13, 17)),
part in (1, 7, 8, 11, 63, 64, 65, 142, 143, 144)
P = partition(CartesianIndices(dims), part)
for I in P
@test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I)
@test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I)
end
@test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), CartesianIndices(dims)))
end
@testset "empty/invalid partitions" begin
@test_throws ArgumentError partition(1:10, 0)
@test_throws ArgumentError partition(1:10, -1)
@test_throws ArgumentError partition(1:0, 0)
@test_throws ArgumentError partition(1:0, -1)
@test isempty(partition(1:0, 1))
@test isempty(partition(CartesianIndices((0,1)), 1))
end
@testset "exact partition eltypes" for a in (Base.OneTo(24), 1:24, 1:1:24, LinRange(1,10,24), .1:.1:2.4, Vector(1:24),
CartesianIndices((4, 6)), Dict((1:24) .=> (1:24)))
P = partition(a, 2)
@test eltype(P) === typeof(first(P))
@test Iterators.IteratorEltype(P) == Iterators.HasEltype()
if a isa AbstractArray
P = partition(vec(a), 2)
@test eltype(P) === typeof(first(P))
P = partition(reshape(a, 6, 4), 2)
@test eltype(P) === typeof(first(P))
P = partition(reshape(a, 2, 3, 4), 2)
@test eltype(P) === typeof(first(P))
end
end

@test join(map(x->string(x...), partition("Hello World!", 5)), "|") ==
"Hello| Worl|d!"

Expand Down

2 comments on commit 3f0e7d6

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @ararslan

Please sign in to comment.