diff --git a/base/exports.jl b/base/exports.jl index b1cba3ce80895..58c5376ae5e6c 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -531,8 +531,11 @@ export findprev, findnz, first, + firstinds, flipdim, gradient, + groupinds, + groupslices, hcat, hvcat, ind2sub, @@ -546,6 +549,7 @@ export isperm, issorted, last, + lastinds, linspace, logspace, mapslices, diff --git a/base/multidimensional.jl b/base/multidimensional.jl index f050701da89ff..2752630c67d44 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -940,3 +940,178 @@ If `dim` is specified, returns unique regions of the array `itr` along `dim`. @nref $N A d->d == dim ? sort!(uniquerows) : (1:size(A, d)) end end + +""" + groupslices(A, dim) + +Returns a vector of integers where each integer element of the returned vector +is a group number corresponding to the unique slices along dimension `dim` as +returned from `unique(A, dim)`, where `A` can be a multidimensional array. + +Example usage: + +If `C = unique(A, dim)`, `ic = groupslices(A, dim)`, and +rank(A) == rank(C) == 3, then: + +if dim == 1 + all(A .== C[ic,:,:]) +elseif dim == 2 + all(A .== C[:,ic,:]) +elseif dim == 3 + all(A .== C[:,:,ic]) +end +""" + +@generated function groupslices{T,N}(A::AbstractArray{T,N}, dim::Int) + quote + if !(1 <= dim <= $N) + ArgumentError("Input argument dim must be 1 <= dim <= $N, but is currently $dim") + end + hashes = zeros(UInt, size(A, dim)) + + # Compute hash for each row + k = 0 + @nloops $N i A d->(if d == dim; k = i_d; end) begin + @inbounds hashes[k] = hash(hashes[k], hash((@nref $N A i))) + end + + # Collect index of first row for each hash + uniquerow = Array(Int, size(A, dim)) + firstrow = Dict{Prehashed,Int}() + for k = 1:size(A, dim) + uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k) + end + uniquerows = collect(values(firstrow)) + + # Check for collisions + collided = falses(size(A, dim)) + @inbounds begin + @nloops $N i A d->(if d == dim + k = i_d + j_d = uniquerow[k] + else + j_d = i_d + end) begin + if (@nref $N A j) != (@nref $N A i) + collided[k] = true + end + end + end + + if any(collided) + nowcollided = BitArray(size(A, dim)) + while any(collided) + # Collect index of first row for each collided hash + empty!(firstrow) + for j = 1:size(A, dim) + collided[j] || continue + uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j) + end + for v in values(firstrow) + push!(uniquerows, v) + end + + # Check for collisions + fill!(nowcollided, false) + @nloops $N i A d->begin + if d == dim + k = i_d + j_d = uniquerow[k] + (!collided[k] || j_d == k) && continue + else + j_d = i_d + end + end begin + if (@nref $N A j) != (@nref $N A i) + nowcollided[k] = true + end + end + (collided, nowcollided) = (nowcollided, collided) + end + end + ic = uniquerow + return ic + end +end + +""" + groupinds(ic) + +Returns a vector of vectors of integers wherein the vector of group slice +index integers as returned from `groupslices(A, dim)` is converted into a +grouped vector of vectors. Each vector entry in the returned vector of +vectors contains all of the positional indices of slices in the original +input array `A` that correspond to the unique slices along dimension `dim` +that are present in the array `C` as returned from `unique(A, dim)`. +""" + +function groupinds(ic::Vector{Int}) + d = Dict{Int, Int}() + ia = unique(ic) + n = length(ia) + for i = 1:n + d[ia[i]]= i + end + + ib = Array(Vector{Int},n) + for k = 1:n + ib[k] = Int[] + end + + for h = 1:length(ic) + push!(ib[d[ic[h]]], h) + end + return ib +end + +""" + firstinds(ic::Vector{Int}) + firstinds(ib::Vector{Vector{Int}}) + +Returns a vector of integers containing the first index position of each unique +value in the input integer vector `ic`, or the first index position of each +entry in the input vector of integer vectors `ib`. + +When operating on the output returned from `unique(A, dim)`, the returned +vector of integers correspond to the positions of the first of each unique slice +present in the original input multidimensional array `A` along dimension `dim`. + +The implementation of `firstinds` accepting a vector of integers operates on the +output returned from `groupslices(A, dim)`. + +The implementation of `firstinds` accepting a vector of vector of integers +operates on the output returned from `groupinds(ic::Vector{Int})` +""" + +function firstinds(ic::Vector{Int}) + id = unique(ic) + n = length(id) + ia = Array(Int,n) + for i = 1:n + ia[i] = findfirst(ic, id[i]) + end + return ia +end + +function firstinds(ib::Vector{Vector{Int}}) + ia = map(first, ib) +end + +""" + lastinds(ic::Vector{Int}) + +Returns a vector of integers containing the last index position of each unique +value in the input integer vector `ic`. + +When operating on the output returned from `groupinds(unique(A, dim))`, the +returned vector of integers correspond to the positions of the last of each +unique slice present in the original input multidimensional array `A` along +dimension `dim`. + +The implementation of `firstinds` accepting a vector of vector of integers +operates on the output returned from `groupinds(ic::Vector{Int})` +""" + +function lastinds(ib::Vector{Vector{Int}}) + ia = map(last, ib) +end \ No newline at end of file diff --git a/test/choosetests.jl b/test/choosetests.jl index 7b0ec83d48b90..014b6fb546f88 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -33,7 +33,7 @@ function choosetests(choices = []) "markdown", "base64", "serialize", "misc", "threads", "enums", "cmdlineargs", "i18n", "workspace", "libdl", "int", "checked", "intset", "floatfuncs", "compile", "parallel", "inline", - "boundscheck", "error", "ambiguous" + "boundscheck", "error", "ambiguous", "multidimensional" ] if Base.USE_GPL_LIBS diff --git a/test/multidimensional.jl b/test/multidimensional.jl new file mode 100644 index 0000000000000..4f32c9193a851 --- /dev/null +++ b/test/multidimensional.jl @@ -0,0 +1,50 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license + +using Base.Test + +# Tests for groupslices, groupinds, firstinds, lastinds +A = [1 2 3 ; 4 5 6 ; 7 8 9] +B = [11 12 13 ; 14 15 16 ; 17 18 19] +C = [21 22 23 ; 24 25 26 ; 27 28 29] +D = cat(3, A, B, C, C, B, C, A) + +ic = [1;2;3;3;2;3;1] +ib = Vector{Int}[] +push!(ib,[1;7]) +push!(ib,[2;5]) +push!(ib,[3;4;6]) +ia = [1;2;3] +ia1 = [1;2;3] +ia2 = [7;5;6] + +ic_test = groupslices(D,3) +ib_test = groupinds(ic_test) +ia_test = firstinds(ic_test) +ia1_test = firstinds(ib_test) +ia2_test = lastinds(ib_test) + +@test isequal(ic, ic_test) +@test isequal(ib, ib_test) +@test isequal(ia, ia_test) +@test isequal(ia1, ia1_test) +@test isequal(ia2, ia2_test) + +E = [1.0 1.0 2.0 2.0 1.0 1.0 1.0 2.0 2.0 2.0 + 1.0 1.0 2.0 2.0 2.0 1.0 2.0 2.0 2.0 1.0 + 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 2.0] + +ib = Vector{Int}[[1,2,6],[3,4,8,9],[5,7],[10]] +ia = [1; 3; 5; 10] +ic = [1; 1; 3; 3; 5; 1; 5; 3; 3; 10] + +ic_test = groupslices(E,2) +ib_test = groupinds(ic_test) +ia_test = firstinds(ic_test) +ia1_test = firstinds(ib_test) +ia2_test = lastinds(ib_test) + +@test isequal(ic, ic_test) +@test isequal(ib, ib_test) +@test isequal(ia, ia_test) +@test isequal(ia1, ia1_test) +@test isequal(ia2, ia2_test) \ No newline at end of file