Skip to content

Commit

Permalink
uniquerow is ic, and seems to handle hash collisions already
Browse files Browse the repository at this point in the history
This commit alters the current groupslices function to return the vector uniquerow that was originally calculated within the existing unique function.  The values contained within uniquerow for cases where there are no hash collisions are actually equal to what I was calculating in array ic.  As @simonster pointed out in comment JuliaLang#14142 (comment) the previous commit was not taking into account hash collisions for the values in ic.  As uniquerow within unique was already calculating the values in ic, taking into account hash collisons, and updating its values accordingly, we can just return uniquerow from groupslices.  For continuity with the conversation in JuliaLang#14142, I currently have assigned ic as an alias for uniquerow, but that can certainly be removed.
  • Loading branch information
AndyGreenwell committed Mar 11, 2016
1 parent 51f0321 commit a44a6fc
Showing 1 changed file with 49 additions and 9 deletions.
58 changes: 49 additions & 9 deletions base/multidimensional.jl
Original file line number Diff line number Diff line change
Expand Up @@ -867,21 +867,61 @@ end
@inbounds hashes[k] = hash(hashes[k], hash((@nref $N A i)))
end

ic = Array(Int, size(A, dim))
# Collect index of first row for each hash
uniquerow = Array(Int, size(A, dim))
firstrow = Dict{Prehashed,Int}()
icdict = Dict{Int,Int}()
h = 0
for k = 1:size(A, dim)
tmp = get!(firstrow, Prehashed(hashes[k]), k)
if !haskey(icdict,tmp)
h += 1
icdict[tmp] = h
ic[k] = h
uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k)
end
uniquerows = collect(values(firstrow))

# Check for collisions
collided = falses(size(A, dim))
@inbounds begin
@nloops $N i A d->(if d == dim
k = i_d
j_d = uniquerow[k]
else
ic[k] = icdict[tmp]
j_d = i_d
end) begin
if (@nref $N A j) != (@nref $N A i)
collided[k] = true
end
end
end

if any(collided)
nowcollided = BitArray(size(A, dim))
while any(collided)
# Collect index of first row for each collided hash
empty!(firstrow)
for j = 1:size(A, dim)
collided[j] || continue
uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j)
end
for v in values(firstrow)
push!(uniquerows, v)
end

# Check for collisions
fill!(nowcollided, false)
@nloops $N i A d->begin
if d == dim
k = i_d
j_d = uniquerow[k]
(!collided[k] || j_d == k) && continue
else
j_d = i_d
end
end begin
if (@nref $N A j) != (@nref $N A i)
nowcollided[k] = true
end
end
(collided, nowcollided) = (nowcollided, collided)
end
end
ic = uniquerow
return ic
end
end
Expand Down

0 comments on commit a44a6fc

Please sign in to comment.