From 242b67c70a5c6ae38a0918e22f175039d6e2f5bc Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Wed, 9 Mar 2016 19:57:59 -0500 Subject: [PATCH 1/4] APL indexing --- NEWS.md | 6 ++- base/multidimensional.jl | 104 +++++++++++++++++++-------------------- base/subarray.jl | 67 +++++++++++++++++++------ doc/manual/arrays.rst | 87 +++++++++++++++++++++----------- test/subarray.jl | 15 +++--- 5 files changed, 173 insertions(+), 106 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3509e3dd93445..4f8c574bb0e29 100644 --- a/NEWS.md +++ b/NEWS.md @@ -101,7 +101,9 @@ Library improvements * Linear algebra: * All dimensions indexed by scalars are now dropped, whereas previously only - trailing scalar dimensions would be omitted from the result. + trailing scalar dimensions would be omitted from the result ([#13612]). + + * Dimensions indexed by multidimensional arrays add dimensions. More generally, the dimensionality of the result is the sum of the dimensionalities of the indices ([#15431]). * New `normalize` and `normalize!` convenience functions for normalizing vectors ([#13681]). @@ -183,6 +185,7 @@ Deprecated or removed [#13480]: https://github.com/JuliaLang/julia/issues/13480 [#13496]: https://github.com/JuliaLang/julia/issues/13496 [#13542]: https://github.com/JuliaLang/julia/issues/13542 +[#13612]: https://github.com/JuliaLang/julia/issues/13612 [#13680]: https://github.com/JuliaLang/julia/issues/13680 [#13681]: https://github.com/JuliaLang/julia/issues/13681 [#13780]: https://github.com/JuliaLang/julia/issues/13780 @@ -199,6 +202,7 @@ Deprecated or removed [#15242]: https://github.com/JuliaLang/julia/issues/15242 [#15258]: https://github.com/JuliaLang/julia/issues/15258 [#15409]: https://github.com/JuliaLang/julia/issues/15409 +[#15430]: https://github.com/JuliaLang/julia/issues/15431 [#15550]: https://github.com/JuliaLang/julia/issues/15550 [#15609]: https://github.com/JuliaLang/julia/issues/15609 [#15763]: https://github.com/JuliaLang/julia/issues/15763 diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 2b1127d70d4eb..9f526b7b6566b 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -224,21 +224,17 @@ end # Recursively compute the lengths of a list of indices, without dropping scalars # These need to be inlined for more than 3 indexes index_lengths(A::AbstractArray, I::Colon) = (length(A),) -index_lengths(A::AbstractArray, I::AbstractArray{Bool}) = (sum(I),) -index_lengths(A::AbstractArray, I::AbstractArray) = (length(I),) @inline index_lengths(A::AbstractArray, I...) = index_lengths_dim(A, 1, I...) index_lengths_dim(A, dim) = () index_lengths_dim(A, dim, ::Colon) = (trailingsize(A, dim),) @inline index_lengths_dim(A, dim, ::Colon, i, I...) = (size(A, dim), index_lengths_dim(A, dim+1, i, I...)...) @inline index_lengths_dim(A, dim, ::Real, I...) = (1, index_lengths_dim(A, dim+1, I...)...) @inline index_lengths_dim{N}(A, dim, ::CartesianIndex{N}, I...) = (1, index_shape_dim(A, dim+N, I...)...) -@inline index_lengths_dim(A, dim, i::AbstractArray{Bool}, I...) = (sum(i), index_lengths_dim(A, dim+1, I...)...) @inline index_lengths_dim(A, dim, i::AbstractArray, I...) = (length(i), index_lengths_dim(A, dim+1, I...)...) +@inline index_lengths_dim(A, dim, i::AbstractArray{Bool}, I...) = (sum(i), index_lengths_dim(A, dim+1, I...)...) @inline index_lengths_dim{N}(A, dim, i::AbstractArray{CartesianIndex{N}}, I...) = (length(i), index_lengths_dim(A, dim+N, I...)...) # shape of array to create for getindex() with indexes I, dropping scalars -index_shape(A::AbstractArray, I::AbstractArray) = size(I) # Linear index reshape -index_shape(A::AbstractArray, I::AbstractArray{Bool}) = (sum(I),) # Logical index index_shape(A::AbstractArray, I::Colon) = (length(A),) @inline index_shape(A::AbstractArray, I...) = index_shape_dim(A, 1, I...) index_shape_dim(A, dim, I::Real...) = () @@ -246,9 +242,9 @@ index_shape_dim(A, dim, ::Colon) = (trailingsize(A, dim),) @inline index_shape_dim(A, dim, ::Colon, i, I...) = (size(A, dim), index_shape_dim(A, dim+1, i, I...)...) @inline index_shape_dim(A, dim, ::Real, I...) = (index_shape_dim(A, dim+1, I...)...) @inline index_shape_dim{N}(A, dim, ::CartesianIndex{N}, I...) = (index_shape_dim(A, dim+N, I...)...) -@inline index_shape_dim(A, dim, i::AbstractVector{Bool}, I...) = (sum(i), index_shape_dim(A, dim+1, I...)...) -@inline index_shape_dim(A, dim, i::AbstractVector, I...) = (length(i), index_shape_dim(A, dim+1, I...)...) -@inline index_shape_dim{N}(A, dim, i::AbstractVector{CartesianIndex{N}}, I...) = (length(i), index_shape_dim(A, dim+N, I...)...) +@inline index_shape_dim(A, dim, i::AbstractArray, I...) = (size(i)..., index_shape_dim(A, dim+1, I...)...) +@inline index_shape_dim(A, dim, i::AbstractArray{Bool}, I...) = (sum(i), index_shape_dim(A, dim+1, I...)...) +@inline index_shape_dim{N}(A, dim, i::AbstractArray{CartesianIndex{N}}, I...) = (size(i)..., index_shape_dim(A, dim+N, I...)...) ### From abstractarray.jl: Internal multidimensional indexing definitions ### # These are not defined on directly on getindex to avoid @@ -264,8 +260,9 @@ end quote # This is specifically *not* inlined. @nexprs $N d->(I_d = to_index(I[d])) - dest = similar(A, @ncall $N index_shape A I) - @ncall $N checksize dest I + shape = @ncall $N index_shape A I + dest = similar(A, shape) + size(dest) == shape || throw_checksize_error(dest, shape) @ncall $N _unsafe_getindex! dest A I end end @@ -274,10 +271,10 @@ end # This is inherently a linear operation in the source, but we could potentially # use fast dividing integers to speed it up. function _unsafe_getindex(::LinearIndexing, src::AbstractArray, I::AbstractArray{Bool}) - # Both index_shape and checksize compute sum(I); manually hoist it out - N = sum(I) - dest = similar(src, (N,)) - size(dest) == (N,) || throw(DimensionMismatch()) + shape = index_shape(src, I) + dest = similar(src, shape) + size(dest) == shape || throw_checksize_error(dest, shape) + D = eachindex(dest) Ds = start(D) for (i, s) in zip(eachindex(I), eachindex(src)) @@ -290,20 +287,8 @@ function _unsafe_getindex(::LinearIndexing, src::AbstractArray, I::AbstractArray dest end -# Indexing with an array of indices is inherently linear in the source, but -# might be able to be optimized with fast dividing integers -@inline function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::AbstractArray) - D = eachindex(dest) - Ds = start(D) - for idx in I - d, Ds = next(D, Ds) - @inbounds dest[d] = src[idx] - end - dest -end - -# Always index with exactly the indices provided. -@generated function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::Union{Real, AbstractVector, Colon}...) +# Always index with the exactly indices provided. +@generated function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::Union{Real, AbstractArray, Colon}...) N = length(I) quote $(Expr(:meta, :inline)) @@ -318,26 +303,7 @@ end end end -# checksize ensures the output array A is the correct size for the given indices -@noinline throw_checksize_error(A, dim, idx) = throw(DimensionMismatch("index $dim selects $(length(idx)) elements, but size(A, $dim) = $(size(A,dim))")) -@noinline throw_checksize_error(A, dim, idx::AbstractArray{Bool}) = throw(DimensionMismatch("index $dim selects $(sum(idx)) elements, but size(A, $dim) = $(size(A,dim))")) - -checksize(A::AbstractArray, I::AbstractArray) = size(A) == size(I) || throw_checksize_error(A, 1, I) -checksize(A::AbstractArray, I::AbstractArray{Bool}) = length(A) == sum(I) || throw_checksize_error(A, 1, I) - -@inline checksize(A::AbstractArray, I...) = _checksize(A, 1, I...) -_checksize(A::AbstractArray, dim) = true -# Drop dimensions indexed by scalars, ignore colons -@inline _checksize(A::AbstractArray, dim, ::Real, J...) = _checksize(A, dim, J...) -@inline _checksize(A::AbstractArray, dim, ::Colon, J...) = _checksize(A, dim+1, J...) -@inline function _checksize(A::AbstractArray, dim, I, J...) - size(A, dim) == length(I) || throw_checksize_error(A, dim, I) - _checksize(A, dim+1, J...) -end -@inline function _checksize(A::AbstractArray, dim, I::AbstractVector{Bool}, J...) - size(A, dim) == sum(I) || throw_checksize_error(A, dim, I) - _checksize(A, dim+1, J...) -end +@noinline throw_checksize_error(A, sz) = throw(DimensionMismatch("output array is the wrong size; expected $sz, got $(size(A))")) ## setindex! ## # For multi-element setindex!, we check bounds, convert the indices (to_index), @@ -520,11 +486,41 @@ inlinemap(f, t::Tuple{}, s::Tuple) = () inlinemap(f, t::Tuple, s::Tuple{}) = () # Otherwise, we fall back to the slow div/rem method, using ind2sub. -@inline merge_indexes{N}(V, indexes::NTuple{N}, index) = merge_indexes_div(V, indexes, index, index_lengths_dim(V.parent, length(V.indexes)-N+1, indexes...)) - -@inline merge_indexes_div{N}(V, indexes::NTuple{N}, index::Real, dimlengths) = CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, index))) -merge_indexes_div{N}(V, indexes::NTuple{N}, index, dimlengths) = [CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in index] -merge_indexes_div{N}(V, indexes::NTuple{N}, index::Colon, dimlengths) = [CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in 1:prod(dimlengths)] +@inline merge_indexes{N}(V, indexes::NTuple{N}, index) = + merge_indexes_div(V, indexes, index, index_lengths_dim(V.parent, length(V.indexes)-N+1, indexes...)) + +@inline merge_indexes_div{N}(V, indexes::NTuple{N}, index::Real, dimlengths) = + CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, index))) +merge_indexes_div{N}(V, indexes::NTuple{N}, index::AbstractArray, dimlengths) = + reshape([CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in index], size(index)) +merge_indexes_div{N}(V, indexes::NTuple{N}, index::Colon, dimlengths) = + [CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, i))) for i in 1:prod(dimlengths)] + +# Merging indices is particularly difficult in the case where we partially linearly +# index through a multidimensional array. It's easiest if we can simply reduce the +# partial indices to a single linear index into the parent index array. +function merge_indexes{N}(V, indexes::NTuple{N}, index::Tuple{Colon, Vararg{Colon}}) + shape = index_shape(indexes[1], index...) + reshape(merge_indexes(V, indexes, :), (shape[1:end-1]..., shape[end]*prod(index_lengths_dim(V.parent, length(V.indexes)-length(indexes)+2, tail(indexes)...)))) +end +@inline merge_indexes{N}(V, indexes::NTuple{N}, index::Tuple{Real, Vararg{Real}}) = merge_indexes(V, indexes, sub2ind(size(indexes[1]), index...)) +# In general, it's a little trickier, but we can use the product iterator +# if we replace colons with ranges. This can be optimized further. +function merge_indexes{N}(V, indexes::NTuple{N}, index::Tuple) + I = replace_colons(V, indexes, index) + shp = index_shape(indexes[1], I...) # index_shape does no bounds checking + dimlengths = index_lengths_dim(V.parent, length(V.indexes)-N+1, indexes...) + sz = size(indexes[1]) + reshape([CartesianIndex(inlinemap(getindex, indexes, ind2sub(dimlengths, sub2ind(sz, i...)))) for i in product(I...)], shp) +end +@inline replace_colons(V, indexes, I) = replace_colons_dim(V, indexes, 1, I) +@inline replace_colons_dim(V, indexes, dim, I::Tuple{}) = () +@inline replace_colons_dim(V, indexes, dim, I::Tuple{Colon}) = + (1:trailingsize(indexes[1], dim)*prod(index_lengths_dim(V.parent, length(V.indexes)-length(indexes)+2, tail(indexes)...)),) +@inline replace_colons_dim(V, indexes, dim, I::Tuple{Colon, Vararg{Any}}) = + (1:size(indexes[1], dim), replace_colons_dim(V, indexes, dim+1, tail(I))...) +@inline replace_colons_dim(V, indexes, dim, I::Tuple{Any, Vararg{Any}}) = + (I[1], replace_colons_dim(V, indexes, dim+1, tail(I))...) cumsum(A::AbstractArray, axis::Integer=1) = cumsum!(similar(A, Base._cumsum_type(A)), A, axis) @@ -637,7 +633,7 @@ end # in the general multidimensional non-scalar case, can we do about 10% better # in most cases by manually hoisting the bitarray chunks access out of the loop # (This should really be handled by the compiler or with an immutable BitArray) -@generated function _unsafe_getindex!(X::BitArray, B::BitArray, I::Union{Int,AbstractVector{Int},Colon}...) +@generated function _unsafe_getindex!(X::BitArray, B::BitArray, I::Union{Int,AbstractArray{Int},Colon}...) N = length(I) quote $(Expr(:meta, :inline)) diff --git a/base/subarray.jl b/base/subarray.jl index b0cefb240f93b..95f1f7c990422 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: http://julialang.org/license -typealias NonSliceIndex Union{Colon, AbstractVector} +typealias NonSliceIndex Union{Colon, AbstractArray} typealias ViewIndex Union{Real, NonSliceIndex} abstract AbstractCartesianIndex{N} # This is a hacky forward declaration for CartesianIndex @@ -117,24 +117,61 @@ reindex(V, idxs::Tuple{}, subidxs::Tuple{DroppedScalar, Vararg{Any}}) = reindex(V, idxs::Tuple{}, subidxs::Tuple{Any, Vararg{Any}}) = (@_propagate_inbounds_meta; (subidxs[1], reindex(V, idxs, tail(subidxs))...)) -reindex(V, idxs::Tuple{Any}, subidxs::Tuple{Any}) = - (@_propagate_inbounds_meta; (idxs[1][subidxs[1]],)) -reindex(V, idxs::Tuple{Any}, subidxs::Tuple{Any, Any, Vararg{Any}}) = - (@_propagate_inbounds_meta; (idxs[1][subidxs[1]],)) -reindex(V, idxs::Tuple{Any, Any, Vararg{Any}}, subidxs::Tuple{Any}) = +# Skip dropped scalars, so simply peel them off the parent indices and continue +reindex(V, idxs::Tuple{DroppedScalar, Vararg{Any}}, subidxs::Tuple{Vararg{Any}}) = + (@_propagate_inbounds_meta; (idxs[1], reindex(V, tail(idxs), subidxs)...)) + +# Colons simply pass their subindexes straight through +reindex(V, idxs::Tuple{Colon}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (subidxs[1],)) +reindex(V, idxs::Tuple{Colon, Vararg{Any}}, subidxs::Tuple{Any, Vararg{Any}}) = + (@_propagate_inbounds_meta; (subidxs[1], reindex(V, tail(idxs), tail(subidxs))...)) +reindex(V, idxs::Tuple{Colon, Vararg{Any}}, subidxs::Tuple{Any}) = (@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs[1]),)) # As an optimization, we don't need to merge indices if all trailing indices are dropped scalars -reindex(V, idxs::Tuple{Any, DroppedScalar, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) = - (@_propagate_inbounds_meta; (idxs[1][subidxs[1]], tail(idxs)...)) -reindex(V, idxs::Tuple{Any, Any, Vararg{Any}}, subidxs::Tuple{Any, Any, Vararg{Any}}) = +reindex(V, idxs::Tuple{Colon, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (subidxs[1], tail(idxs)...)) + +# Re-index into parent vectors with one subindex +reindex(V, idxs::Tuple{AbstractVector}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (idxs[1][subidxs[1]],)) +reindex(V, idxs::Tuple{AbstractVector, Vararg{Any}}, subidxs::Tuple{Any, Vararg{Any}}) = (@_propagate_inbounds_meta; (idxs[1][subidxs[1]], reindex(V, tail(idxs), tail(subidxs))...)) +reindex(V, idxs::Tuple{AbstractVector, Vararg{Any}}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs[1]),)) +reindex(V, idxs::Tuple{AbstractVector, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (idxs[1][subidxs[1]], tail(idxs)...)) -reindex(V, idxs::Tuple{DroppedScalar}, subidxs::Tuple{Any}) = idxs -reindex(V, idxs::Tuple{DroppedScalar}, subidxs::Tuple{Any, Any, Vararg{Any}}) = idxs -reindex(V, idxs::Tuple{DroppedScalar, Any, Vararg{Any}}, subidxs::Tuple{Any}) = - (@_propagate_inbounds_meta; (idxs[1], reindex(V, tail(idxs), subidxs)...)) -reindex(V, idxs::Tuple{DroppedScalar, Any, Vararg{Any}}, subidxs::Tuple{Any, Any, Vararg{Any}}) = - (@_propagate_inbounds_meta; (idxs[1], reindex(V, tail(idxs), subidxs)...)) +# Parent matrices are re-indexed with two sub-indices +reindex(V, idxs::Tuple{AbstractMatrix}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (idxs[1][subidxs[1]],)) +reindex(V, idxs::Tuple{AbstractMatrix}, subidxs::Tuple{Any, Any}) = + (@_propagate_inbounds_meta; (idxs[1][subidxs[1], subidxs[2]],)) +reindex(V, idxs::Tuple{AbstractMatrix, Vararg{Any}}, subidxs::Tuple{Any, Any, Vararg{Any}}) = + (@_propagate_inbounds_meta; (idxs[1][subidxs[1], subidxs[2]], reindex(V, tail(idxs), tail(tail(subidxs)))...)) +reindex(V, idxs::Tuple{AbstractMatrix, Vararg{Any}}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs[1]),)) +reindex(V, idxs::Tuple{AbstractMatrix, Vararg{Any}}, subidxs::Tuple{Any, Any}) = + (@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs),)) +reindex(V, idxs::Tuple{AbstractMatrix, Vararg{DroppedScalar}}, subidxs::Tuple{Any}) = + (@_propagate_inbounds_meta; (idxs[1][subidxs[1]], tail(idxs)...)) +reindex(V, idxs::Tuple{AbstractMatrix, Vararg{DroppedScalar}}, subidxs::Tuple{Any, Any}) = + (@_propagate_inbounds_meta; (idxs[1][subidxs[1], subidxs[2]], tail(idxs)...)) + +# In general, we index N-dimensional parent arrays with N indices +@generated function reindex{T,N}(V, idxs::Tuple{AbstractArray{T,N}, Vararg{Any}}, subidxs::Tuple{Vararg{Any}}) + if length(subidxs.parameters) >= N + subs = [:(subidxs[$d]) for d in 1:N] + tail = [:(subidxs[$d]) for d in N+1:length(subidxs.parameters)] + :(@_propagate_inbounds_meta; (idxs[1][$(subs...)], reindex(V, tail(idxs), ($(tail...),))...)) + elseif length(idxs.parameters) == 1 + :(@_propagate_inbounds_meta; (idxs[1][subidxs...],)) + elseif all(T->T<:DroppedScalar, idxs.parameters[2:end]) + :(@_propagate_inbounds_meta; (idxs[1][subidxs...], tail(idxs)...)) + else + :(@_propagate_inbounds_meta; (merge_indexes(V, idxs, subidxs),)) + end +end # In general, we simply re-index the parent indices by the provided ones getindex(V::SubArray) = (@_propagate_inbounds_meta; getindex(V, 1)) diff --git a/doc/manual/arrays.rst b/doc/manual/arrays.rst index 8acd638b6ecf9..f7e80b75f5e4c 100644 --- a/doc/manual/arrays.rst +++ b/doc/manual/arrays.rst @@ -263,29 +263,40 @@ where each ``I_k`` may be: 1. A scalar integer 2. A ``Range`` of the form ``a:b``, or ``a:b:c`` 3. A ``:`` or ``Colon()`` to select entire dimensions -4. An arbitrary integer vector, including the empty vector ``[]`` -5. A boolean vector +4. An arbitrary integer array, including the empty array ``[]`` +5. A boolean array to select elements at its ``true`` indices -The result ``X`` generally has dimensions +If all the indices are scalars, then the result ``X`` is a single element from +the array ``A``. Otherwise, ``X`` is an array with the same number of +dimensions as the sum of the dimensionalities of all the indices. + +If all indices are vectors, for example, then the shape of ``X`` would be ``(length(I_1), length(I_2), ..., length(I_n))``, with location ``(i_1, i_2, ..., i_n)`` of ``X`` containing the value -``A[I_1[i_1], I_2[i_2], ..., I_n[i_n]]``. All dimensions indexed with scalars are -dropped. For example, the result of ``A[2, I, 3]`` will be a vector with size -``(length(I),)``. Boolean vectors are first transformed with ``find``; the size of -a dimension indexed by a boolean vector will be the number of true values in the vector. -As a special part of this syntax, the ``end`` keyword may be used to represent the last -index of each dimension within the indexing brackets, as determined by the size of the -innermost array being indexed. - -Alternatively, single elements of a multidimensional array can be indexed as -:: - - x = A[I] - -where ``I`` is a ``CartesianIndex``, effectively an ``n``-tuple of integers. -See :ref:`man-array-iteration` below. - -Indexing syntax is equivalent to a call to ``getindex``:: +``A[I_1[i_1], I_2[i_2], ..., I_n[i_n]]``. If ``I_1`` is changed to a +two-dimensional matrix, then ``X`` becomes an ``n+1``-dimensional array of +shape ``(size(I_1, 1), size(I_1, 2), length(I_2), ..., length(I_n))``. The +matrix adds a dimension. The location ``(i_1, i_2, i_3, ..., i_{n+1})`` contains +the value at ``A[I_1[i_1, i_2], I_2[i_3], ..., I_n[i_{n+1}]]``. All dimensions +indexed with scalars are dropped. For example, the result of ``A[2, I, 3]`` is +an array with size ``size(I)``. Its ``i``\ th element is populated by +``A[2, I[i], 3]``. + +Boolean arrays must be the same length as the dimension they are indexing into. +Indexing by a boolean array ``B`` is the same as indexing by the vector that is +returned by ``find(B)``; the size of a dimension indexed by a boolean array will +be the number of true values in the vector. It is generally more efficient to +use boolean arrays as indices directly instead of first calling ``find``. + +Additionally, single elements of a multidimensional array can be indexed as +``x = A[I]``, where ``I`` is a ``CartesianIndex``. It effectively behaves like +an ``n``-tuple of integers spanning multiple dimensions of ``A``. See +:ref:`man-array-iteration` below. + +As a special part of this syntax, the ``end`` keyword may be used to represent +the last index of each dimension within the indexing brackets, as determined by +the size of the innermost array being indexed. Indexing syntax without the +``end`` keyword is equivalent to a call to ``getindex``:: X = getindex(A, I_1, I_2, ..., I_n) @@ -305,6 +316,20 @@ Example: 6 10 7 11 + julia> x[map(isprime, x)] + 6-element Array{Int64,1}: + 2 + 3 + 5 + 7 + 11 + 13 + + julia> x[1, [2 3; 4 1]] + 2x2 Array{Int64,2}: + 5 9 + 13 1 + Empty ranges of the form ``n:n-1`` are sometimes used to indicate the inter-index location between ``n-1`` and ``n``. For example, the :func:`searchsorted` function uses this convention to indicate the insertion point of a value not found in a sorted @@ -329,16 +354,18 @@ where each ``I_k`` may be: 1. A scalar integer 2. A ``Range`` of the form ``a:b``, or ``a:b:c`` 3. A ``:`` or ``Colon()`` to select entire dimensions -4. An arbitrary integer vector, including the empty vector ``[]`` -5. A boolean vector - -If ``X`` is an array, its size must be ``(length(I_1), length(I_2), ..., length(I_n))``, -and the value in location ``i_1, i_2, ..., i_n`` of ``A`` is overwritten with -the value ``X[I_1[i_1], I_2[i_2], ..., I_n[i_n]]``. If ``X`` is not an array, its -value is written to all referenced locations of ``A``. - -A boolean vector used as an index behaves as in :func:`getindex` (it is first transformed -with :func:`find`). +4. An arbitrary integer array, including the empty array ``[]`` +5. A boolean array to select elements at its ``true`` indices + +If ``X`` is an array, it must have the same number of elements as the product +of the lengths of the indices: +``prod(length(I_1), length(I_2), ..., length(I_n))``. The value in location +``i_1, i_2, ..., i_n`` of ``A`` is overwritten with the value +``X[I_1[i_1], I_2[i_2], ..., I_n[i_n]]``. If ``X`` is not an array, its value +is written to all referenced locations of ``A``. + +A boolean array used as an index behaves as in :func:`getindex`, behaving as +though it is first transformed with :func:`find`. Index assignment syntax is equivalent to a call to :func:`setindex!`:: diff --git a/test/subarray.jl b/test/subarray.jl index c3ce9f25c6e67..6b278a466a80f 100644 --- a/test/subarray.jl +++ b/test/subarray.jl @@ -245,7 +245,8 @@ function runtests(A::ANY, I...) end # indexN is a cartesian index, indexNN is a linear index for 2 dimensions, and indexNNN is a linear index for 3 dimensions -function runviews{T}(SB::AbstractArray{T,3}, indexN, indexNN, indexNNN) +function runviews(SB::AbstractArray, indexN, indexNN, indexNNN) + @assert ndims(SB) > 2 for i3 in indexN, i2 in indexN, i1 in indexN runtests(SB, i1, i2, i3) end @@ -279,9 +280,9 @@ runviews{T}(SB::AbstractArray{T,0}, indexN, indexNN, indexNNN) = nothing testfull = Bool(parse(Int,(get(ENV, "JULIA_TESTFULL", "0")))) ### Views from Arrays ### -index5 = (1, 2, :, 2:5, 1:2:5, [1], [4,1,5], sub(1:5,[2,1,5])) # all work with at least size 5 -index25 = (3, 8, :, 2:11, 12:3:22, [4,1,5,9], sub(1:25,[13,22,24])) -index125 = (113, :, 85:121, 2:15:92, [99,14,103], sub(1:125,[66,18,59])) +index5 = (1, :, 2:5, 1:2:5, [4,1,5], reshape([2]), sub(1:5,[2,1,5]), [2 3 4 1]) # all work with at least size 5 +index25 = (3, :, 2:11, 12:3:22, [4,1,5,9], reshape([10]), sub(1:25,[13,22,24]), [19 15; 4 24]) +index125 = (113, :, 85:121, 2:15:92, [99,14,103], reshape([72]), sub(1:125,[66,18,59]), reshape([25,4,102,67], 1, 2, 2)) if testfull let A = copy(reshape(1:5*7*11, 11, 7, 5)) @@ -293,7 +294,7 @@ end # "outer" indexes create snips that have at least size 5 along each dimension, # with the exception of Int-slicing -oindex = (:, 6, 3:7, 13:-2:1, [8,4,6,12,5,7]) +oindex = (:, 6, 3:7, reshape([12]), [8,4,6,12,5,7], [3:7 1:5 2:6 4:8 5:9]) if testfull let B = copy(reshape(1:13^3, 13, 13, 13)) @@ -321,7 +322,9 @@ if !testfull ([8,4,6,12,5,7],:,3:7), (6,6,[8,4,6,12,5,7]), (1,:,sub(1:13,[9,12,4,13,1])), - (sub(1:13,[9,12,4,13,1]),2:6,4)) + (sub(1:13,[9,12,4,13,1]),2:6,4), + ([1:5 2:6 3:7 4:8 5:9], :, 3), + (:, [46:-1:42 88:-1:84 22:-1:18 49:-1:45 8:-1:4])) runtests(B, oind...) sliceB = slice(B, oind...) runviews(sliceB, index5, index25, index125) From 7c97596fa73a078e84f606f970271fff1bd62b69 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 10 Mar 2016 18:06:31 -0500 Subject: [PATCH 2/4] Speed up SubArray tests * Just do spot-checks on throwing bounds errors * Be more judicious in the index types tested * Run the subarray test first --- test/choosetests.jl | 8 ++++---- test/subarray.jl | 17 ++++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/test/choosetests.jl b/test/choosetests.jl index 0864267316921..2c6b4eb99a225 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -15,11 +15,11 @@ Upon return, `tests` is a vector of fully-expanded test names, and """ -> function choosetests(choices = []) testnames = [ - "linalg", "core", "inference", "keywordargs", "numbers", "printf", - "char", "string", "triplequote", "unicode", + "linalg", "subarray", "core", "inference", "keywordargs", "numbers", + "printf", "char", "string", "triplequote", "unicode", "dates", "dict", "hashing", "remote", "iobuffer", "staged", - "arrayops", "tuple", "subarray", "reduce", "reducedim", "random", - "abstractarray", "intfuncs", "simdloop", "vecelement", "blas", "sparse", + "arrayops", "tuple", "reduce", "reducedim", "random", "abstractarray", + "intfuncs", "simdloop", "vecelement", "blas", "sparse", "bitarray", "copy", "math", "fastmath", "functional", "operators", "path", "ccall", "parse", "loading", "bigint", "bigfloat", "sorting", "statistics", "spawn", "backtrace", diff --git a/test/subarray.jl b/test/subarray.jl index 6b278a466a80f..b354fa627d521 100644 --- a/test/subarray.jl +++ b/test/subarray.jl @@ -181,13 +181,11 @@ function runtests(A::Array, I...) test_linear(S, C) test_cartesian(S, C) test_mixed(S, C) - test_bounds(S) # slice S = slice(A, I...) test_linear(S, C) test_cartesian(S, C) test_mixed(S, C) - test_bounds(S) end function runtests(A::ANY, I...) @@ -228,7 +226,6 @@ function runtests(A::ANY, I...) test_linear(S, C) test_cartesian(S, C) test_mixed(S, C) - test_bounds(S) # slice try S = slice(A, I...) @@ -241,7 +238,6 @@ function runtests(A::ANY, I...) test_linear(S, C) test_cartesian(S, C) test_mixed(S, C) - test_bounds(S) end # indexN is a cartesian index, indexNN is a linear index for 2 dimensions, and indexNNN is a linear index for 3 dimensions @@ -280,9 +276,9 @@ runviews{T}(SB::AbstractArray{T,0}, indexN, indexNN, indexNNN) = nothing testfull = Bool(parse(Int,(get(ENV, "JULIA_TESTFULL", "0")))) ### Views from Arrays ### -index5 = (1, :, 2:5, 1:2:5, [4,1,5], reshape([2]), sub(1:5,[2,1,5]), [2 3 4 1]) # all work with at least size 5 -index25 = (3, :, 2:11, 12:3:22, [4,1,5,9], reshape([10]), sub(1:25,[13,22,24]), [19 15; 4 24]) -index125 = (113, :, 85:121, 2:15:92, [99,14,103], reshape([72]), sub(1:125,[66,18,59]), reshape([25,4,102,67], 1, 2, 2)) +index5 = (1, :, 2:5, [4,1,5], reshape([2]), sub(1:5,[2 3 4 1])) # all work with at least size 5 +index25 = (3, :, 2:11, [19,9,7], reshape([10]), sub(1:25,[19 15; 4 24])) +index125 = (113, :, 85:121, [99,14,103], reshape([72]), sub(1:125,reshape([25,4,102,67], 1, 2, 2))) if testfull let A = copy(reshape(1:5*7*11, 11, 7, 5)) @@ -357,6 +353,7 @@ sA[2:5:end] = -1 @test strides(sA) == (1,3,15) @test stride(sA,3) == 15 @test stride(sA,4) == 120 +test_bounds(sA) sA = sub(A, 1:3, 1:5, 5) @test Base.parentdims(sA) == [1:2;] sA[1:3,1:5] = -2 @@ -364,11 +361,13 @@ sA[1:3,1:5] = -2 sA[:] = -3 @test all(A[:,:,5] .== -3) @test strides(sA) == (1,3) +test_bounds(sA) sA = sub(A, 1:3, 3, 2:5) @test Base.parentdims(sA) == [1:3;] @test size(sA) == (3,1,4) @test sA == A[1:3,3:3,2:5] @test sA[:] == A[1:3,3,2:5][:] +test_bounds(sA) sA = sub(A, 1:2:3, 1:3:5, 1:2:8) @test Base.parentdims(sA) == [1:3;] @test strides(sA) == (2,9,30) @@ -377,6 +376,7 @@ sA = sub(A, 1:2:3, 1:3:5, 1:2:8) @test sub(sub([1:5;], 1:5), 1:5) == [1:5;] # Test with mixed types @test sA[:, Int16[1,2], big(2)] == [31 40; 33 42] +test_bounds(sA) # sub logical indexing #4763 A = sub([1:10;], 5:8) @@ -401,15 +401,18 @@ sA = slice(A, 2, :, 1:8) sA[2:5:end] = -1 @test all(sA[2:5:end] .== -1) @test all(A[5:15:120] .== -1) +test_bounds(sA) sA = slice(A, 1:3, 1:5, 5) @test Base.parentdims(sA) == [1:2;] @test size(sA) == (3,5) @test strides(sA) == (1,3) +test_bounds(sA) sA = slice(A, 1:2:3, 3, 1:2:8) @test Base.parentdims(sA) == [1,3] @test size(sA) == (2,4) @test strides(sA) == (2,30) @test sA[:] == A[sA.indexes...][:] +test_bounds(sA) a = [5:8;] @test parent(a) == a From ddedbdb7a2736b6b89b216171dd9c53102541116 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 28 Apr 2016 19:29:52 -0400 Subject: [PATCH 3/4] Only allow logical indexing with vectors or... a single index that matches the size of the array it indexes into --- base/abstractarray.jl | 2 +- base/multidimensional.jl | 2 +- doc/manual/arrays.rst | 15 +++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index a47260ec498df..5f4567073ba91 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -104,7 +104,7 @@ function checkbounds(::Type{Bool}, sz::Integer, r::Range) @_propagate_inbounds_meta isempty(r) | (checkbounds(Bool, sz, first(r)) & checkbounds(Bool, sz, last(r))) end -checkbounds(::Type{Bool}, sz::Integer, I::AbstractArray{Bool}) = length(I) == sz +checkbounds{N}(::Type{Bool}, sz::Integer, I::AbstractArray{Bool,N}) = N == 1 && length(I) == sz function checkbounds(::Type{Bool}, sz::Integer, I::AbstractArray) @_inline_meta b = true diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 9f526b7b6566b..a0f0359bb87a3 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -192,7 +192,7 @@ using .IteratorsMD # improvement over the general definitions in abstractarray.jl for N = 1:5 args = [:($(symbol(:I, d))) for d = 1:N] - targs = [:($(symbol(:I, d))::Union{Colon,Number,AbstractVector}) for d = 1:N] # prevent co-opting the CartesianIndex version + targs = [:($(symbol(:I, d))::Union{Colon,Number,AbstractArray}) for d = 1:N] # prevent co-opting the CartesianIndex version exs = [:(checkbounds(Bool, size(A, $d), $(args[d]))) for d = 1:N] cbexpr = exs[1] for d = 2:N diff --git a/doc/manual/arrays.rst b/doc/manual/arrays.rst index f7e80b75f5e4c..8cae33f78f30e 100644 --- a/doc/manual/arrays.rst +++ b/doc/manual/arrays.rst @@ -264,7 +264,7 @@ where each ``I_k`` may be: 2. A ``Range`` of the form ``a:b``, or ``a:b:c`` 3. A ``:`` or ``Colon()`` to select entire dimensions 4. An arbitrary integer array, including the empty array ``[]`` -5. A boolean array to select elements at its ``true`` indices +5. A boolean array to select a vector of elements at its ``true`` indices If all the indices are scalars, then the result ``X`` is a single element from the array ``A``. Otherwise, ``X`` is an array with the same number of @@ -282,11 +282,14 @@ indexed with scalars are dropped. For example, the result of ``A[2, I, 3]`` is an array with size ``size(I)``. Its ``i``\ th element is populated by ``A[2, I[i], 3]``. -Boolean arrays must be the same length as the dimension they are indexing into. -Indexing by a boolean array ``B`` is the same as indexing by the vector that is -returned by ``find(B)``; the size of a dimension indexed by a boolean array will -be the number of true values in the vector. It is generally more efficient to -use boolean arrays as indices directly instead of first calling ``find``. +Indexing by a boolean array ``B`` is effectively the same as indexing by the +vector that is returned by :func:`find(B) `. Often referred to as logical +indexing, this selects elements at the indices where the values are ``true``, +akin to a mask. A logical index must be a vector of the same length as the +dimension it indexes into, or it must be the only index provided and match the +size and dimensionality of the array it indexes into. It is generally more +efficient to use boolean arrays as indices directly instead of first calling +:func:`find`. Additionally, single elements of a multidimensional array can be indexed as ``x = A[I]``, where ``I`` is a ``CartesianIndex``. It effectively behaves like From 393fd7263c04e1b1e0673fdf962c0668cfbc39a6 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 28 Apr 2016 20:00:19 -0400 Subject: [PATCH 4/4] Add abstractarray tests for APL indexing --- test/abstractarray.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/abstractarray.jl b/test/abstractarray.jl index 0c1726c40c37e..946bc03e9bcc1 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -198,6 +198,26 @@ function test_vector_indexing{T}(::Type{T}, shape, ::Type{TestAbstractArray}) # Test with containers that aren't Int[] @test B[[]] == A[[]] == [] @test B[convert(Array{Any}, idxs)] == A[convert(Array{Any}, idxs)] == idxs + + # Test adding dimensions with matrices + idx1 = rand(1:size(A, 1), 3) + idx2 = rand(1:Base.trailingsize(A, 2), 4, 5) + @test B[idx1, idx2] == A[idx1, idx2] == reshape(A[idx1, vec(idx2)], 3, 4, 5) == reshape(B[idx1, vec(idx2)], 3, 4, 5) + @test B[1, idx2] == A[1, idx2] == reshape(A[1, vec(idx2)], 4, 5) == reshape(B[1, vec(idx2)], 4, 5) + + # test removing dimensions with 0-d arrays + idx0 = reshape([rand(1:size(A, 1))]) + @test B[idx0, idx2] == A[idx0, idx2] == reshape(A[idx0[], vec(idx2)], 4, 5) == reshape(B[idx0[], vec(idx2)], 4, 5) + @test B[reshape([end]), reshape([end])] == A[reshape([end]), reshape([end])] == reshape([A[end,end]]) == reshape([B[end,end]]) + + # test logical indexing + mask = bitrand(shape) + @test B[mask] == A[mask] == B[find(mask)] == A[find(mask)] == find(mask) + @test B[vec(mask)] == A[vec(mask)] == find(mask) + mask1 = bitrand(size(A, 1)) + mask2 = bitrand(Base.trailingsize(A, 2)) + @test B[mask1, mask2] == A[mask1, mask2] == B[find(mask1), find(mask2)] + @test B[mask1, 1] == A[mask1, 1] == find(mask1) end function test_primitives{T}(::Type{T}, shape, ::Type{TestAbstractArray})