From a44a6fcea948376ab58e88973f21eff705d60d01 Mon Sep 17 00:00:00 2001 From: AndyGreenwell Date: Fri, 11 Mar 2016 13:20:44 -0500 Subject: [PATCH] uniquerow is ic, and seems to handle hash collisions already This commit alters the current groupslices function to return the vector uniquerow that was originally calculated within the existing unique function. The values contained within uniquerow for cases where there are no hash collisions are actually equal to what I was calculating in array ic. As @simonster pointed out in comment https://github.com/JuliaLang/julia/pull/14142#issuecomment-193089681 the previous commit was not taking into account hash collisions for the values in ic. As uniquerow within unique was already calculating the values in ic, taking into account hash collisons, and updating its values accordingly, we can just return uniquerow from groupslices. For continuity with the conversation in #14142, I currently have assigned ic as an alias for uniquerow, but that can certainly be removed. --- base/multidimensional.jl | 58 +++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index 6d1bf301e44f4..0297e7069792a 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -867,21 +867,61 @@ end @inbounds hashes[k] = hash(hashes[k], hash((@nref $N A i))) end - ic = Array(Int, size(A, dim)) + # Collect index of first row for each hash + uniquerow = Array(Int, size(A, dim)) firstrow = Dict{Prehashed,Int}() - icdict = Dict{Int,Int}() - h = 0 for k = 1:size(A, dim) - tmp = get!(firstrow, Prehashed(hashes[k]), k) - if !haskey(icdict,tmp) - h += 1 - icdict[tmp] = h - ic[k] = h + uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k) + end + uniquerows = collect(values(firstrow)) + + # Check for collisions + collided = falses(size(A, dim)) + @inbounds begin + @nloops $N i A d->(if d == dim + k = i_d + j_d = uniquerow[k] else - ic[k] = icdict[tmp] + j_d = i_d + end) begin + if (@nref $N A j) != (@nref $N A i) + collided[k] = true + end end end + if any(collided) + nowcollided = BitArray(size(A, dim)) + while any(collided) + # Collect index of first row for each collided hash + empty!(firstrow) + for j = 1:size(A, dim) + collided[j] || continue + uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j) + end + for v in values(firstrow) + push!(uniquerows, v) + end + + # Check for collisions + fill!(nowcollided, false) + @nloops $N i A d->begin + if d == dim + k = i_d + j_d = uniquerow[k] + (!collided[k] || j_d == k) && continue + else + j_d = i_d + end + end begin + if (@nref $N A j) != (@nref $N A i) + nowcollided[k] = true + end + end + (collided, nowcollided) = (nowcollided, collided) + end + end + ic = uniquerow return ic end end