Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Swiss tables design for Dict #44513

Merged
merged 27 commits into from
Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ae2aad5
Improve performance of 'Dict'
petvana Feb 24, 2022
0acaa9a
Fix Set
petvana Feb 24, 2022
8306858
Fix most of the tests
petvana Feb 24, 2022
9a1a660
Use explicite types for pairs
petvana Feb 24, 2022
82e7a9d
Mark broken test of precompilation
petvana Feb 25, 2022
263dc86
Merge branch 'master' into pv-dict
petvana Feb 25, 2022
96d80a5
Reenable broken test
petvana Feb 25, 2022
f37cc2d
Use simplified OldDict for testing precompilation
petvana Feb 25, 2022
25d9bec
Fix precompile test
petvana Feb 25, 2022
8d39aef
Fix white space
petvana Feb 25, 2022
51a6c15
Merge branch 'master' into pv-dict
petvana Feb 25, 2022
08d7d88
Apply suggestions from code review
petvana Feb 25, 2022
afd7975
Apply suggestions from code review
petvana Feb 25, 2022
e69826f
Fix performance of Set constructor if both {T,V} are bitstypes
petvana Feb 25, 2022
6600b03
Apply suggestions from code review
petvana Feb 26, 2022
d789afc
Add inbounds
petvana Feb 26, 2022
ae9483c
Swiss tables based hashing
petvana Mar 7, 2022
cafda6e
Fix whitespaces
petvana Mar 8, 2022
4e03425
Fix Set for Swiss tables
petvana Mar 8, 2022
2cdb1aa
Update comments + rename _shorthash7 function
petvana Mar 10, 2022
99e24d7
Apply suggestions from code review
petvana Mar 11, 2022
67694ba
Remove unreachable code
petvana Mar 11, 2022
e0d59ac
Focus PR purely on Swiss table design
petvana Mar 12, 2022
aca0272
Make empty slot 0x00
petvana Mar 12, 2022
8fd9617
Improve backward compatibility
petvana Mar 12, 2022
90d5f75
Merge branch 'master' into pv-dict-swiss
petvana Mar 13, 2022
380c160
Minor change (revert fill() to zeros())
petvana Mar 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 62 additions & 45 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ Dict{String, Int64} with 2 entries:
```
"""
mutable struct Dict{K,V} <: AbstractDict{K,V}
slots::Array{UInt8,1}
# Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
slots::Vector{UInt8}
keys::Array{K,1}
vals::Array{V,1}
ndel::Int
Expand Down Expand Up @@ -166,11 +167,22 @@ end

empty(a::AbstractDict, ::Type{K}, ::Type{V}) where {K, V} = Dict{K, V}()

hashindex(key, sz) = (((hash(key)::UInt % Int) & (sz-1)) + 1)::Int
# Gets 7 most significant bits from the hash (hsh), first bit is 1
_shorthash7(hsh::UInt32) = (hsh >> UInt(25))%UInt8 | 0x80
_shorthash7(hsh::UInt64) = (hsh >> UInt(57))%UInt8 | 0x80

@propagate_inbounds isslotempty(h::Dict, i::Int) = h.slots[i] == 0x0
@propagate_inbounds isslotfilled(h::Dict, i::Int) = h.slots[i] == 0x1
@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x2
# hashindex (key, sz) - computes optimal position and shorthash7
# idx - optimal position in the hash table
# sh::UInt8 - short hash (7 highest hash bits)
function hashindex(key, sz)
hsh = hash(key)::UInt
idx = (((hsh % Int) & (sz-1)) + 1)::Int
return idx, _shorthash7(hsh)
end

@propagate_inbounds isslotempty(h::Dict, i::Int) = h.slots[i] == 0x00
@propagate_inbounds isslotfilled(h::Dict, i::Int) = (h.slots[i] & 0x80) != 0
@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x7f

@constprop :none function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
olds = h.slots
Expand All @@ -182,7 +194,7 @@ hashindex(key, sz) = (((hash(key)::UInt % Int) & (sz-1)) + 1)::Int
h.idxfloor = 1
if h.count == 0
resize!(h.slots, newsz)
fill!(h.slots, 0)
fill!(h.slots, 0x0)
resize!(h.keys, newsz)
resize!(h.vals, newsz)
h.ndel = 0
Expand All @@ -197,16 +209,17 @@ hashindex(key, sz) = (((hash(key)::UInt % Int) & (sz-1)) + 1)::Int
maxprobe = 0

for i = 1:sz
@inbounds if olds[i] == 0x1
@inbounds if (olds[i] & 0x80) != 0
k = oldk[i]
v = oldv[i]
index0 = index = hashindex(k, newsz)
index, sh = hashindex(k, newsz)
index0 = index
while slots[index] != 0
index = (index & (newsz-1)) + 1
end
probe = (index - index0) & (newsz-1)
probe > maxprobe && (maxprobe = probe)
slots[index] = 0x1
slots[index] = olds[i]
keys[index] = k
vals[index] = v
count += 1
Expand Down Expand Up @@ -282,41 +295,40 @@ function ht_keyindex(h::Dict{K,V}, key) where V where K
sz = length(h.keys)
iter = 0
maxprobe = h.maxprobe
index = hashindex(key, sz)
index, sh = hashindex(key, sz)
keys = h.keys

@inbounds while true
if isslotempty(h,index)
break
end
if !isslotmissing(h,index) && (key === keys[index] || isequal(key,keys[index]))
return index
isslotempty(h,index) && return -1
if h.slots[index] == sh
k = keys[index]
if (key === k || isequal(key, k))
return index
end
end

index = (index & (sz-1)) + 1
iter += 1
iter > maxprobe && break
(iter += 1) > maxprobe && return -1
end
return -1
# This line is unreachable
end

# get the index where a key is stored, or -pos if not present
# and the key would be inserted at pos
# get (index, sh) for the key
# index - where a key is stored, or -pos if not present
# and the key would be inserted at pos
# sh::UInt8 - short hash (7 highest hash bits)
# This version is for use by setindex! and get!
function ht_keyindex2!(h::Dict{K,V}, key) where V where K
function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
sz = length(h.keys)
iter = 0
maxprobe = h.maxprobe
index = hashindex(key, sz)
index, sh = hashindex(key, sz)
avail = 0
keys = h.keys

@inbounds while true
if isslotempty(h,index)
if avail < 0
return avail
end
return -index
return (avail < 0 ? avail : -index), sh
end

if isslotmissing(h,index)
Expand All @@ -325,35 +337,41 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K
# in case "key" already exists in a later collided slot.
avail = -index
end
elseif key === keys[index] || isequal(key, keys[index])
return index
elseif h.slots[index] == sh
k = keys[index]
if key === k || isequal(key, k)
return index, sh
end
end

index = (index & (sz-1)) + 1
iter += 1
iter > maxprobe && break
end

avail < 0 && return avail
avail < 0 && return avail, sh

maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
# Check if key is not present, may need to keep searching to find slot
@inbounds while iter < maxallowed
if !isslotfilled(h,index)
h.maxprobe = iter
return -index
return -index, sh
end
index = (index & (sz-1)) + 1
iter += 1
end

rehash!(h, h.count > 64000 ? sz*2 : sz*4)

return ht_keyindex2!(h, key)
return ht_keyindex2_shorthash!(h, key)
end

@propagate_inbounds function _setindex!(h::Dict, v, key, index)
h.slots[index] = 0x1
# Only for better backward compatibility. It can be removed in the future.
ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]

@propagate_inbounds function _setindex!(h::Dict, v, key, index, sh = _shorthash7(hash(key)))
h.slots[index] = sh
h.keys[index] = key
h.vals[index] = v
h.count += 1
Expand Down Expand Up @@ -381,29 +399,29 @@ end

function setindex!(h::Dict{K,V}, v0, key::K) where V where K
v = convert(V, v0)
index = ht_keyindex2!(h, key)
index, sh = ht_keyindex2_shorthash!(h, key)

if index > 0
h.age += 1
@inbounds h.keys[index] = key
@inbounds h.vals[index] = v
else
@inbounds _setindex!(h, v, key, -index)
@inbounds _setindex!(h, v, key, -index, sh)
end

return h
end

function setindex!(h::Dict{K,Any}, v, key::K) where K
@nospecialize v
index = ht_keyindex2!(h, key)
index, sh = ht_keyindex2_shorthash!(h, key)

if index > 0
h.age += 1
@inbounds h.keys[index] = key
@inbounds h.vals[index] = v
else
@inbounds _setindex!(h, v, key, -index)
@inbounds _setindex!(h, v, key, -index, sh)
end

return h
Expand Down Expand Up @@ -474,26 +492,25 @@ function get!(default::Callable, h::Dict{K,V}, key0) where V where K
end

function get!(default::Callable, h::Dict{K,V}, key::K) where V where K
index = ht_keyindex2!(h, key)
index, sh = ht_keyindex2_shorthash!(h, key)

index > 0 && return h.vals[index]

age0 = h.age
v = convert(V, default())
if h.age != age0
index = ht_keyindex2!(h, key)
index, sh = ht_keyindex2_shorthash!(h, key)
end
if index > 0
h.age += 1
@inbounds h.keys[index] = key
@inbounds h.vals[index] = v
else
@inbounds _setindex!(h, v, key, -index)
@inbounds _setindex!(h, v, key, -index, sh)
end
return v
end


function getindex(h::Dict{K,V}, key) where V where K
index = ht_keyindex(h, key)
@inbounds return (index < 0) ? throw(KeyError(key)) : h.vals[index]::V
Expand Down Expand Up @@ -644,7 +661,7 @@ function pop!(h::Dict)
end

function _delete!(h::Dict{K,V}, index) where {K,V}
@inbounds h.slots[index] = 0x2
@inbounds h.slots[index] = 0x7f
@inbounds _unsetindex!(h.keys, index)
@inbounds _unsetindex!(h.vals, index)
h.ndel += 1
Expand Down Expand Up @@ -721,7 +738,7 @@ end
function filter!(pred, h::Dict{K,V}) where {K,V}
h.count == 0 && return h
@inbounds for i=1:length(h.slots)
if h.slots[i] == 0x01 && !pred(Pair{K,V}(h.keys[i], h.vals[i]))
if ((h.slots[i] & 0x80) != 0) && !pred(Pair{K,V}(h.keys[i], h.vals[i]))
_delete!(h, i)
end
end
Expand All @@ -748,14 +765,14 @@ end

function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
for (k, v) in d2
i = ht_keyindex2!(d1, k)
i, sh = ht_keyindex2_shorthash!(d1, k)
if i > 0
d1.vals[i] = combine(d1.vals[i], v)
else
if !isequal(k, convert(K, k))
throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
end
@inbounds _setindex!(d1, convert(V, v), k, -i)
@inbounds _setindex!(d1, convert(V, v), k, -i, sh)
end
end
return d1
Expand Down
8 changes: 4 additions & 4 deletions base/set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -406,18 +406,18 @@ function allunique(C)
if haslength(C) && length(C) > 1000
for i in OneTo(1000)
v, s = x
idx = ht_keyindex2!(seen, v)
idx, sh = ht_keyindex2_shorthash!(seen, v)
idx > 0 && return false
_setindex!(seen, nothing, v, -idx)
_setindex!(seen, nothing, v, -idx, sh)
x = iterate(C, s)
end
sizehint!(seen, length(C))
end
while x !== nothing
v, s = x
idx = ht_keyindex2!(seen, v)
idx, sh = ht_keyindex2_shorthash!(seen, v)
idx > 0 && return false
_setindex!(seen, nothing, v, -idx)
_setindex!(seen, nothing, v, -idx, sh)
x = iterate(C, s)
end
return true
Expand Down