Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APL indexing #15431

Merged
merged 4 commits into from
May 1, 2016
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
APL indexing
  • Loading branch information
mbauman committed May 1, 2016

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 242b67c70a5c6ae38a0918e22f175039d6e2f5bc
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -101,7 +101,9 @@ Library improvements
* Linear algebra:

* All dimensions indexed by scalars are now dropped, whereas previously only
trailing scalar dimensions would be omitted from the result.
trailing scalar dimensions would be omitted from the result ([#13612]).

* Dimensions indexed by multidimensional arrays add dimensions. More generally, the dimensionality of the result is the sum of the dimensionalities of the indices ([#15431]).

* New `normalize` and `normalize!` convenience functions for normalizing
vectors ([#13681]).
@@ -183,6 +185,7 @@ Deprecated or removed
[#13480]: https://github.com/JuliaLang/julia/issues/13480
[#13496]: https://github.com/JuliaLang/julia/issues/13496
[#13542]: https://github.com/JuliaLang/julia/issues/13542
[#13612]: https://github.com/JuliaLang/julia/issues/13612
[#13680]: https://github.com/JuliaLang/julia/issues/13680
[#13681]: https://github.com/JuliaLang/julia/issues/13681
[#13780]: https://github.com/JuliaLang/julia/issues/13780
@@ -199,6 +202,7 @@ Deprecated or removed
[#15242]: https://github.com/JuliaLang/julia/issues/15242
[#15258]: https://github.com/JuliaLang/julia/issues/15258
[#15409]: https://github.com/JuliaLang/julia/issues/15409
[#15430]: https://github.com/JuliaLang/julia/issues/15431
[#15550]: https://github.com/JuliaLang/julia/issues/15550
[#15609]: https://github.com/JuliaLang/julia/issues/15609
[#15763]: https://github.com/JuliaLang/julia/issues/15763
104 changes: 50 additions & 54 deletions base/multidimensional.jl
Original file line number Diff line number Diff line change
@@ -224,31 +224,27 @@ end
# Recursively compute the lengths of a list of indices, without dropping scalars
# These need to be inlined for more than 3 indexes
index_lengths(A::AbstractArray, I::Colon) = (length(A),)
index_lengths(A::AbstractArray, I::AbstractArray{Bool}) = (sum(I),)
index_lengths(A::AbstractArray, I::AbstractArray) = (length(I),)
@inline index_lengths(A::AbstractArray, I...) = index_lengths_dim(A, 1, I...)
index_lengths_dim(A, dim) = ()
index_lengths_dim(A, dim, ::Colon) = (trailingsize(A, dim),)
@inline index_lengths_dim(A, dim, ::Colon, i, I...) = (size(A, dim), index_lengths_dim(A, dim+1, i, I...)...)
@inline index_lengths_dim(A, dim, ::Real, I...) = (1, index_lengths_dim(A, dim+1, I...)...)
@inline index_lengths_dim{N}(A, dim, ::CartesianIndex{N}, I...) = (1, index_shape_dim(A, dim+N, I...)...)
@inline index_lengths_dim(A, dim, i::AbstractArray{Bool}, I...) = (sum(i), index_lengths_dim(A, dim+1, I...)...)
@inline index_lengths_dim(A, dim, i::AbstractArray, I...) = (length(i), index_lengths_dim(A, dim+1, I...)...)
@inline index_lengths_dim(A, dim, i::AbstractArray{Bool}, I...) = (sum(i), index_lengths_dim(A, dim+1, I...)...)
@inline index_lengths_dim{N}(A, dim, i::AbstractArray{CartesianIndex{N}}, I...) = (length(i), index_lengths_dim(A, dim+N, I...)...)

# shape of array to create for getindex() with indexes I, dropping scalars
index_shape(A::AbstractArray, I::AbstractArray) = size(I) # Linear index reshape
index_shape(A::AbstractArray, I::AbstractArray{Bool}) = (sum(I),) # Logical index
index_shape(A::AbstractArray, I::Colon) = (length(A),)
@inline index_shape(A::AbstractArray, I...) = index_shape_dim(A, 1, I...)
index_shape_dim(A, dim, I::Real...) = ()
index_shape_dim(A, dim, ::Colon) = (trailingsize(A, dim),)
@inline index_shape_dim(A, dim, ::Colon, i, I...) = (size(A, dim), index_shape_dim(A, dim+1, i, I...)...)
@inline index_shape_dim(A, dim, ::Real, I...) = (index_shape_dim(A, dim+1, I...)...)
@inline index_shape_dim{N}(A, dim, ::CartesianIndex{N}, I...) = (index_shape_dim(A, dim+N, I...)...)
@inline index_shape_dim(A, dim, i::AbstractVector{Bool}, I...) = (sum(i), index_shape_dim(A, dim+1, I...)...)
@inline index_shape_dim(A, dim, i::AbstractVector, I...) = (length(i), index_shape_dim(A, dim+1, I...)...)
@inline index_shape_dim{N}(A, dim, i::AbstractVector{CartesianIndex{N}}, I...) = (length(i), index_shape_dim(A, dim+N, I...)...)
@inline index_shape_dim(A, dim, i::AbstractArray, I...) = (size(i)..., index_shape_dim(A, dim+1, I...)...)
@inline index_shape_dim(A, dim, i::AbstractArray{Bool}, I...) = (sum(i), index_shape_dim(A, dim+1, I...)...)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm reading this right, does this mean all AbstractArray{Bool} act like they are vectors, for the purpose of determining output dimensionality?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. They can't really do anything else.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One potentially crazy interpretation would be that they should remove dimensions from the parent:

A = rand(1:10, 3,4,5)
A[bitrand(3,4), 1] # would select a vector of random elements from the first page of `A`

That's a separate "feature" though, I think.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that's pretty much the only thing they can do. I'd simply suggest that we document this, it's slightly at odds with the "sum of the dimensions of the indexes".

One additional option is to support AbstractArray{Bool,N} if it's the only index, and otherwise require each dimension to be an AbstractVector{Bool}.

Copy link
Member Author

@mbauman mbauman Apr 23, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's probably a reasonable restriction. As it stands, I have a hard time imagining a real use-case for indexing with a multidimensional logical array that's somewhere between 1- and N- indices unless we allow it to span multiple dimensions. One of the nice things about this APL indexing patch, though, is how it generalizes the first-index behavior and removes special cases like that.

On the other hand, I must say that I really value how restrictive Julia currently is with regards to logical indexing. I frequently hit Matlab bugs where vectorized comparisons of struct arrays (mask = [s.a] == x) will silently drop empty elements, and then it'll silently allow me to index a larger vector with it (e.g., [s(mask).b]). So artificial restrictions here can be very good.

@inline index_shape_dim{N}(A, dim, i::AbstractArray{CartesianIndex{N}}, I...) = (size(i)..., index_shape_dim(A, dim+N, I...)...)

### From abstractarray.jl: Internal multidimensional indexing definitions ###
# These are not defined on directly on getindex to avoid
@@ -264,8 +260,9 @@ end
quote
# This is specifically *not* inlined.
@nexprs $N d->(I_d = to_index(I[d]))
dest = similar(A, @ncall $N index_shape A I)
@ncall $N checksize dest I
shape = @ncall $N index_shape A I
dest = similar(A, shape)
size(dest) == shape || throw_checksize_error(dest, shape)
@ncall $N _unsafe_getindex! dest A I
end
end
@@ -274,10 +271,10 @@ end
# This is inherently a linear operation in the source, but we could potentially
# use fast dividing integers to speed it up.
function _unsafe_getindex(::LinearIndexing, src::AbstractArray, I::AbstractArray{Bool})
# Both index_shape and checksize compute sum(I); manually hoist it out
N = sum(I)
dest = similar(src, (N,))
size(dest) == (N,) || throw(DimensionMismatch())
shape = index_shape(src, I)
dest = similar(src, shape)
size(dest) == shape || throw_checksize_error(dest, shape)

D = eachindex(dest)
Ds = start(D)
for (i, s) in zip(eachindex(I), eachindex(src))
@@ -290,20 +287,8 @@ function _unsafe_getindex(::LinearIndexing, src::AbstractArray, I::AbstractArray
dest
end

# Indexing with an array of indices is inherently linear in the source, but
# might be able to be optimized with fast dividing integers
@inline function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::AbstractArray)
D = eachindex(dest)
Ds = start(D)
for idx in I
d, Ds = next(D, Ds)
@inbounds dest[d] = src[idx]
end
dest
end

# Always index with exactly the indices provided.
@generated function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::Union{Real, AbstractVector, Colon}...)
# Always index with the exactly indices provided.
@generated function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::Union{Real, AbstractArray, Colon}...)
N = length(I)
quote
$(Expr(:meta, :inline))
@@ -318,26 +303,7 @@ end
end
end

# checksize ensures the output array A is the correct size for the given indices
@noinline throw_checksize_error(A, dim, idx) = throw(DimensionMismatch("index $dim selects $(length(idx)) elements, but size(A, $dim) = $(size(A,dim))"))
@noinline throw_checksize_error(A, dim, idx::AbstractArray{Bool}) = throw(DimensionMismatch("index $dim selects $(sum(idx)) elements, but size(A, $dim) = $(size(A,dim))"))

checksize(A::AbstractArray, I::AbstractArray) = size(A) == size(I) || throw_checksize_error(A, 1, I)
checksize(A::AbstractArray, I::AbstractArray{Bool}) = length(A) == sum(I) || throw_checksize_error(A, 1, I)

@inline checksize(A::AbstractArray, I...) = _checksize(A, 1, I...)
_checksize(A::AbstractArray, dim) = true
# Drop dimensions indexed by scalars, ignore colons
@inline _checksize(A::AbstractArray, dim, ::Real, J...) = _checksize(A, dim, J...)
@inline _checksize(A::AbstractArray, dim, ::Colon, J...) = _checksize(A, dim+1, J...)
@inline function _checksize(A::AbstractArray, dim, I, J...)
size(A, dim) == length(I) || throw_checksize_error(A, dim, I)
_checksize(A, dim+1, J...)
end
@inline function _checksize(A::AbstractArray, dim, I::AbstractVector{Bool}, J...)
size(A, dim) == sum(I) || throw_checksize_error(A, dim, I)
_checksize(A, dim+1, J...)
end
@noinline throw_checksize_error(A, sz) = throw(DimensionMismatch("output array is the wrong size; expected $sz, got $(size(A))"))

## setindex! ##
# For multi-element setindex!, we check bounds, convert the indices (to_index),
@@ -520,11 +486,41 @@ inlinemap(f, t::Tuple{}, s::Tuple) = ()
inlinemap(f, t::Tuple, s::Tuple{}) = ()

# Otherwise, we fall back to the slow div/rem method, using ind2sub.
@inline merge_indexes{N}(V, indexes::NTuple{N}, index) = merge_indexes_div(V, indexes, index, index_lengths_dim(V.parent, length(V.indexes)-N+1, indexes...))

@inline merge_indexes_div{N}(V, indexes::NTuple{N}, index::Real, dimlengths) = CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, index)))
merge_indexes_div{N}(V, indexes::NTuple{N}, index, dimlengths) = [CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in index]
merge_indexes_div{N}(V, indexes::NTuple{N}, index::Colon, dimlengths) = [CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in 1:prod(dimlengths)]
@inline merge_indexes{N}(V, indexes::NTuple{N}, index) =
merge_indexes_div(V, indexes, index, index_lengths_dim(V.parent, length(V.indexes)-N+1, indexes...))

@inline merge_indexes_div{N}(V, indexes::NTuple{N}, index::Real, dimlengths) =
CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, index)))
merge_indexes_div{N}(V, indexes::NTuple{N}, index::AbstractArray, dimlengths) =
reshape([CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in index], size(index))
merge_indexes_div{N}(V, indexes::NTuple{N}, index::Colon, dimlengths) =
[CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in 1:prod(dimlengths)]

# Merging indices is particularly difficult in the case where we partially linearly
# index through a multidimensional array. It's easiest if we can simply reduce the
# partial indices to a single linear index into the parent index array.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't tried this yet, but I wonder if we can now eliminate the trauma by reshaping the parent first?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's what I threw together this morning: 10076f7 — this reshapes a lazy array of the cartesian indices. I think it'll be a little more efficient since it's only the final index that gets the reshaped treatment.

I can put that commit here, too, but I figured it deserved a bit more benchmarking that I didn't have time to do.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fine, let's tackle that issue separately.

function merge_indexes{N}(V, indexes::NTuple{N}, index::Tuple{Colon, Vararg{Colon}})
shape = index_shape(indexes[1], index...)
reshape(merge_indexes(V, indexes, :), (shape[1:end-1]..., shape[end]*prod(index_lengths_dim(V.parent, length(V.indexes)-length(indexes)+2, tail(indexes)...))))
end
@inline merge_indexes{N}(V, indexes::NTuple{N}, index::Tuple{Real, Vararg{Real}}) = merge_indexes(V, indexes, sub2ind(size(indexes[1]), index...))
# In general, it's a little trickier, but we can use the product iterator
# if we replace colons with ranges. This can be optimized further.
function merge_indexes{N}(V, indexes::NTuple{N}, index::Tuple)
I = replace_colons(V, indexes, index)
shp = index_shape(indexes[1], I...) # index_shape does no bounds checking
dimlengths = index_lengths_dim(V.parent, length(V.indexes)-N+1, indexes...)
sz = size(indexes[1])
reshape([CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, sub2ind(sz, i...)))) for i in product(I...)], shp)
end
@inline replace_colons(V, indexes, I) = replace_colons_dim(V, indexes, 1, I)
@inline replace_colons_dim(V, indexes, dim, I::Tuple{}) = ()
@inline replace_colons_dim(V, indexes, dim, I::Tuple{Colon}) =
(1:trailingsize(indexes[1], dim)*prod(index_lengths_dim(V.parent, length(V.indexes)-length(indexes)+2, tail(indexes)...)),)
@inline replace_colons_dim(V, indexes, dim, I::Tuple{Colon, Vararg{Any}}) =
(1:size(indexes[1], dim), replace_colons_dim(V, indexes, dim+1, tail(I))...)
@inline replace_colons_dim(V, indexes, dim, I::Tuple{Any, Vararg{Any}}) =
(I[1], replace_colons_dim(V, indexes, dim+1, tail(I))...)


cumsum(A::AbstractArray, axis::Integer=1) = cumsum!(similar(A, Base._cumsum_type(A)), A, axis)
@@ -637,7 +633,7 @@ end
# in the general multidimensional non-scalar case, can we do about 10% better
# in most cases by manually hoisting the bitarray chunks access out of the loop
# (This should really be handled by the compiler or with an immutable BitArray)
@generated function _unsafe_getindex!(X::BitArray, B::BitArray, I::Union{Int,AbstractVector{Int},Colon}...)
@generated function _unsafe_getindex!(X::BitArray, B::BitArray, I::Union{Int,AbstractArray{Int},Colon}...)
N = length(I)
quote
$(Expr(:meta, :inline))
67 changes: 52 additions & 15 deletions base/subarray.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

typealias NonSliceIndex Union{Colon, AbstractVector}
typealias NonSliceIndex Union{Colon, AbstractArray}
typealias ViewIndex Union{Real, NonSliceIndex}
abstract AbstractCartesianIndex{N} # This is a hacky forward declaration for CartesianIndex

@@ -117,24 +117,61 @@ reindex(V, idxs::Tuple{}, subidxs::Tuple{DroppedScalar, Vararg{Any}}) =
reindex(V, idxs::Tuple{}, subidxs::Tuple{Any, Vararg{Any}}) =
(@_propagate_inbounds_meta; (subidxs[1], reindex(V, idxs, tail(subidxs))...))

reindex(V, idxs::Tuple{Any}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]],))
reindex(V, idxs::Tuple{Any}, subidxs::Tuple{Any, Any, Vararg{Any}}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]],))
reindex(V, idxs::Tuple{Any, Any, Vararg{Any}}, subidxs::Tuple{Any}) =
# Skip dropped scalars, so simply peel them off the parent indices and continue
reindex(V, idxs::Tuple{DroppedScalar, Vararg{Any}}, subidxs::Tuple{Vararg{Any}}) =
(@_propagate_inbounds_meta; (idxs[1], reindex(V, tail(idxs), subidxs)...))

# Colons simply pass their subindexes straight through
reindex(V, idxs::Tuple{Colon}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (subidxs[1],))
reindex(V, idxs::Tuple{Colon, Vararg{Any}}, subidxs::Tuple{Any, Vararg{Any}}) =
(@_propagate_inbounds_meta; (subidxs[1], reindex(V, tail(idxs), tail(subidxs))...))
reindex(V, idxs::Tuple{Colon, Vararg{Any}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs[1]),))
# As an optimization, we don't need to merge indices if all trailing indices are dropped scalars
reindex(V, idxs::Tuple{Any, DroppedScalar, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]], tail(idxs)...))
reindex(V, idxs::Tuple{Any, Any, Vararg{Any}}, subidxs::Tuple{Any, Any, Vararg{Any}}) =
reindex(V, idxs::Tuple{Colon, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (subidxs[1], tail(idxs)...))

# Re-index into parent vectors with one subindex
reindex(V, idxs::Tuple{AbstractVector}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]],))
reindex(V, idxs::Tuple{AbstractVector, Vararg{Any}}, subidxs::Tuple{Any, Vararg{Any}}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]], reindex(V, tail(idxs), tail(subidxs))...))
reindex(V, idxs::Tuple{AbstractVector, Vararg{Any}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs[1]),))
reindex(V, idxs::Tuple{AbstractVector, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]], tail(idxs)...))

reindex(V, idxs::Tuple{DroppedScalar}, subidxs::Tuple{Any}) = idxs
reindex(V, idxs::Tuple{DroppedScalar}, subidxs::Tuple{Any, Any, Vararg{Any}}) = idxs
reindex(V, idxs::Tuple{DroppedScalar, Any, Vararg{Any}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (idxs[1], reindex(V, tail(idxs), subidxs)...))
reindex(V, idxs::Tuple{DroppedScalar, Any, Vararg{Any}}, subidxs::Tuple{Any, Any, Vararg{Any}}) =
(@_propagate_inbounds_meta; (idxs[1], reindex(V, tail(idxs), subidxs)...))
# Parent matrices are re-indexed with two sub-indices
reindex(V, idxs::Tuple{AbstractMatrix}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]],))
reindex(V, idxs::Tuple{AbstractMatrix}, subidxs::Tuple{Any, Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1], subidxs[2]],))
reindex(V, idxs::Tuple{AbstractMatrix, Vararg{Any}}, subidxs::Tuple{Any, Any, Vararg{Any}}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1], subidxs[2]], reindex(V, tail(idxs), tail(tail(subidxs)))...))
reindex(V, idxs::Tuple{AbstractMatrix, Vararg{Any}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs[1]),))
reindex(V, idxs::Tuple{AbstractMatrix, Vararg{Any}}, subidxs::Tuple{Any, Any}) =
(@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs),))
reindex(V, idxs::Tuple{AbstractMatrix, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1]], tail(idxs)...))
reindex(V, idxs::Tuple{AbstractMatrix, Vararg{DroppedScalar}}, subidxs::Tuple{Any, Any}) =
(@_propagate_inbounds_meta; (idxs[1][subidxs[1], subidxs[2]], tail(idxs)...))

# In general, we index N-dimensional parent arrays with N indices
@generated function reindex{T,N}(V, idxs::Tuple{AbstractArray{T,N}, Vararg{Any}}, subidxs::Tuple{Vararg{Any}})
if length(subidxs.parameters) >= N
subs = [:(subidxs[$d]) for d in 1:N]
tail = [:(subidxs[$d]) for d in N+1:length(subidxs.parameters)]
:(@_propagate_inbounds_meta; (idxs[1][$(subs...)], reindex(V, tail(idxs), ($(tail...),))...))
elseif length(idxs.parameters) == 1
:(@_propagate_inbounds_meta; (idxs[1][subidxs...],))
elseif all(T->T<:DroppedScalar, idxs.parameters[2:end])
:(@_propagate_inbounds_meta; (idxs[1][subidxs...], tail(idxs)...))
else
:(@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs),))
end
end

# In general, we simply re-index the parent indices by the provided ones
getindex(V::SubArray) = (@_propagate_inbounds_meta; getindex(V, 1))
Loading