diff --git a/base/dict.jl b/base/dict.jl index 8093042d496b4..996c965ee210f 100644 --- a/base/dict.jl +++ b/base/dict.jl @@ -315,15 +315,13 @@ copy(o::ObjectIdDict) = ObjectIdDict(o) # dict type Dict{K,V} <: Associative{K,V} - slots::Array{UInt8,1} + slots::Array{Int32,1} keys::Array{K,1} vals::Array{V,1} ndel::Int - count::Int function Dict() - n = 16 - new(zeros(UInt8,n), Array(K,n), Array(V,n), 0, 0) + new(zeros(Int32,16), Array(K,0), Array(V,0), 0) end function Dict(kv) h = Dict{K,V}() @@ -346,7 +344,7 @@ type Dict{K,V} <: Associative{K,V} rehash!(d) end @assert d.ndel == 0 - new(copy(d.slots), copy(d.keys), copy(d.vals), 0, d.count) + new(copy(d.slots), copy(d.keys), copy(d.vals), 0) end end Dict() = Dict{Any,Any}() @@ -380,6 +378,9 @@ dict_with_eltype(kv, t) = Dict{Any,Any}(kv) similar{K,V}(d::Dict{K,V}) = Dict{K,V}() +length(d::Dict) = length(d.keys) - d.ndel +isempty(d::Dict) = (length(d)==0) + # conversion between Dict types function convert{K,V}(::Type{Dict{K,V}},d::Associative) h = Dict{K,V}() @@ -417,99 +418,124 @@ end hashindex(key, sz) = ((hash(key)%Int) & (sz-1)) + 1 -isslotempty(h::Dict, i::Int) = h.slots[i] == 0x0 -isslotfilled(h::Dict, i::Int) = h.slots[i] == 0x1 -isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x2 +function isdeleted{K}(keys::Vector{K}, ki, slots) + if isbits(K) + sz = length(slots) + iter = 0 + maxprobe = max(16, sz>>6) + index = hashindex(keys[ki], sz) + while iter <= maxprobe + si = slots[index] + (si == 0 || si == ki) && return false + si == -ki && return true + index = (index & (sz-1)) + 1 + iter += 1 + end + return false + else + !isdefined(keys, ki) + end +end -function rehash!{K,V}(h::Dict{K,V}, newsz = length(h.keys)) +function rehash!{K,V}(h::Dict{K,V}, newsz = length(h.slots)) olds = h.slots - oldk = h.keys - oldv = h.vals + keys = h.keys + vals = h.vals sz = length(olds) newsz = _tablesz(newsz) - if h.count == 0 + count0 = length(h) + if count0 == 0 resize!(h.slots, newsz) fill!(h.slots, 0) - resize!(h.keys, newsz) - resize!(h.vals, newsz) + resize!(h.keys, 0) + resize!(h.vals, 0) h.ndel = 0 return h end - slots = zeros(UInt8,newsz) - keys = Array(K, newsz) - vals = Array(V, newsz) - count0 = h.count - count = 0 - - for i = 1:sz - if olds[i] == 0x1 - k = oldk[i] - v = oldv[i] + slots = zeros(Int32,newsz) + + if h.ndel > 0 + to = 1 + @inbounds for from = 1:length(keys) + if !isdeleted(keys, from, olds) + # TODO avoid computing hash twice for isbits + k = keys[from] + index = hashindex(k, newsz) + while slots[index] != 0 + index = (index & (newsz-1)) + 1 + end + slots[index] = to + keys[to] = k + vals[to] = vals[from] + to += 1 + end + end + resize!(keys, to-1) + resize!(vals, to-1) + else + @inbounds for i = 1:count0 + k = keys[i] index = hashindex(k, newsz) while slots[index] != 0 index = (index & (newsz-1)) + 1 end - slots[index] = 0x1 - keys[index] = k - vals[index] = v - count += 1 + slots[index] = i + end + end + # TODO restore this piece of logic: +#= if h.count != count0 # if items are removed by finalizers, retry return rehash!(h, newsz) end - end - end - +=# h.slots = slots - h.keys = keys - h.vals = vals - h.count = count h.ndel = 0 + est = div(newsz*2, 3) + sizehint!(h.keys, est) + sizehint!(h.vals, est) return h end function sizehint!(d::Dict, newsz) + slotsz = (newsz*3)>>1 oldsz = length(d.slots) - if newsz <= oldsz + if slotsz <= oldsz # todo: shrink # be careful: rehash!() assumes everything fits. it was only designed # for growing. return d end # grow at least 25% - newsz = max(newsz, (oldsz*5)>>2) - rehash!(d, newsz) + slotsz = max(slotsz, (oldsz*5)>>2) + rehash!(d, slotsz) end function empty!{K,V}(h::Dict{K,V}) - fill!(h.slots, 0x0) - sz = length(h.slots) + fill!(h.slots, 0) empty!(h.keys) empty!(h.vals) - resize!(h.keys, sz) - resize!(h.vals, sz) h.ndel = 0 - h.count = 0 return h end # get the index where a key is stored, or -1 if not present -function ht_keyindex{K,V}(h::Dict{K,V}, key) - sz = length(h.keys) +function ht_keyindex{K,V}(h::Dict{K,V}, key, direct) + slots = h.slots + sz = length(slots) iter = 0 maxprobe = max(16, sz>>6) index = hashindex(key, sz) keys = h.keys - while true - if isslotempty(h,index) - break - end - if !isslotmissing(h,index) && isequal(key,keys[index]) - return index + @inbounds while true + si = slots[index] + si == 0 && break + if si > 0 && isequal(key, keys[si]) + return ifelse(direct, oftype(index, si), index) end index = (index & (sz-1)) + 1 @@ -524,27 +550,19 @@ end # and the key would be inserted at pos # This version is for use by setindex! and get! function ht_keyindex2{K,V}(h::Dict{K,V}, key) - sz = length(h.keys) + slots = h.slots + sz = length(slots) iter = 0 maxprobe = max(16, sz>>6) index = hashindex(key, sz) - avail = 0 keys = h.keys - while true - if isslotempty(h,index) - avail < 0 && return avail + @inbounds while true + si = slots[index] + if si == 0 return -index - end - - if isslotmissing(h,index) - if avail == 0 - # found an available slot, but need to keep scanning - # in case "key" already exists in a later collided slot. - avail = -index - end - elseif isequal(key, keys[index]) - return index + elseif si > 0 && isequal(key, keys[si]) + return oftype(index, si) end index = (index & (sz-1)) + 1 @@ -552,24 +570,28 @@ function ht_keyindex2{K,V}(h::Dict{K,V}, key) iter > maxprobe && break end - avail < 0 && return avail - - rehash!(h, h.count > 64000 ? sz*2 : sz*4) + rehash!(h, length(h) > 64000 ? sz*2 : sz*4) return ht_keyindex2(h, key) end function _setindex!(h::Dict, v, key, index) - h.slots[index] = 0x1 - h.keys[index] = key - h.vals[index] = v - h.count += 1 + hk, hv = h.keys, h.vals + #push!(h.keys, key) + ccall(:jl_array_grow_end, Void, (Any, UInt), hk, 1) + nk = length(hk) + @inbounds hk[nk] = key + #push!(h.vals, v) + ccall(:jl_array_grow_end, Void, (Any, UInt), hv, 1) + @inbounds hv[nk] = v + @inbounds h.slots[index] = nk - sz = length(h.keys) + sz = length(h.slots) + cnt = nk - h.ndel # Rehash now if necessary - if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2 + if h.ndel >= ((3*nk)>>2) || cnt*3 > sz*2 # > 3/4 deleted or > 2/3 full - rehash!(h, h.count > 64000 ? h.count*2 : h.count*4) + rehash!(h, cnt > 64000 ? sz*2 : sz*4) end end @@ -633,9 +655,8 @@ macro get!(h, key0, default) end idx = ht_keyindex2($(esc(h)), key) if idx < 0 - idx = -idx v = convert(V, $(esc(default))) - _setindex!($(esc(h)), v, key, idx) + _setindex!($(esc(h)), v, key, -idx) else @inbounds v = $(esc(h)).vals[idx] end @@ -645,76 +666,68 @@ end function getindex{K,V}(h::Dict{K,V}, key) - index = ht_keyindex(h, key) + index = ht_keyindex(h, key, true) return (index<0) ? throw(KeyError(key)) : h.vals[index]::V end function get{K,V}(h::Dict{K,V}, key, default) - index = ht_keyindex(h, key) + index = ht_keyindex(h, key, true) return (index<0) ? default : h.vals[index]::V end function get{K,V}(default::Callable, h::Dict{K,V}, key) - index = ht_keyindex(h, key) + index = ht_keyindex(h, key, true) return (index<0) ? default() : h.vals[index]::V end -haskey(h::Dict, key) = (ht_keyindex(h, key) >= 0) -in{T<:Dict}(key, v::KeyIterator{T}) = (ht_keyindex(v.dict, key) >= 0) +haskey(h::Dict, key) = (ht_keyindex(h, key, true) >= 0) +in{T<:Dict}(key, v::KeyIterator{T}) = (ht_keyindex(v.dict, key, true) >= 0) function getkey{K,V}(h::Dict{K,V}, key, default) - index = ht_keyindex(h, key) + index = ht_keyindex(h, key, true) return (index<0) ? default : h.keys[index]::K end function _pop!(h::Dict, index) - val = h.vals[index] + val = h.vals[h.slots[index]] _delete!(h, index) return val end function pop!(h::Dict, key) - index = ht_keyindex(h, key) + index = ht_keyindex(h, key, false) index > 0 ? _pop!(h, index) : throw(KeyError(key)) end function pop!(h::Dict, key, default) - index = ht_keyindex(h, key) + index = ht_keyindex(h, key, false) index > 0 ? _pop!(h, index) : default end function _delete!(h::Dict, index) - h.slots[index] = 0x2 - ccall(:jl_arrayunset, Void, (Any, UInt), h.keys, index-1) - ccall(:jl_arrayunset, Void, (Any, UInt), h.vals, index-1) + ki = h.slots[index] + h.slots[index] = -ki + ccall(:jl_arrayunset, Void, (Any, UInt), h.keys, ki-1) + ccall(:jl_arrayunset, Void, (Any, UInt), h.vals, ki-1) h.ndel += 1 - h.count -= 1 h end function delete!(h::Dict, key) - index = ht_keyindex(h, key) + index = ht_keyindex(h, key, false) if index > 0; _delete!(h, index); end h end -function skip_deleted(h::Dict, i) - L = length(h.slots) - while i<=L && !isslotfilled(h,i) - i += 1 - end - return i +function start(t::Dict) + t.ndel > 0 && rehash!(t) + 1 end +done(t::Dict, i) = done(t.keys, i) +next(t::Dict, i) = ((t.keys[i],t.vals[i]), i+1) -start(t::Dict) = skip_deleted(t, 1) -done(t::Dict, i) = done(t.vals, i) -next(t::Dict, i) = ((t.keys[i],t.vals[i]), skip_deleted(t,i+1)) - -isempty(t::Dict) = (t.count == 0) -length(t::Dict) = t.count - -next{T<:Dict}(v::KeyIterator{T}, i) = (v.dict.keys[i], skip_deleted(v.dict,i+1)) -next{T<:Dict}(v::ValueIterator{T}, i) = (v.dict.vals[i], skip_deleted(v.dict,i+1)) +next{T<:Dict}(v::KeyIterator{T}, i) = (v.dict.keys[i], i+1) +next{T<:Dict}(v::ValueIterator{T}, i) = (v.dict.vals[i], i+1) # weak key dictionaries diff --git a/base/precompile.jl b/base/precompile.jl index 045f184dd8ea5..cfa314f3edb72 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -281,7 +281,6 @@ precompile(Base.isequal, (VersionNumber, VersionNumber)) precompile(Base.isequal, (Void, Void)) precompile(Base.isfile, (ASCIIString,)) precompile(Base.ismatch, (Regex, ASCIIString)) -precompile(Base.isslotempty, (Dict{Any,Any}, Int)) precompile(Base.istaskdone, (Task,)) precompile(Base.joinpath, (ASCIIString, ASCIIString)) precompile(Base.joinpath, (ASCIIString, ASCIIString, ASCIIString)) diff --git a/base/profile.jl b/base/profile.jl index 1c5e800f4ec77..778c3ce63d1a3 100644 --- a/base/profile.jl +++ b/base/profile.jl @@ -336,7 +336,7 @@ function tree{T<:Unsigned}(io::IO, bt::Vector{Vector{T}}, counts::Vector{Int}, l for i = 1:length(bt) ip = bt[i][level+1] key = lidict[ip] - indx = Base.ht_keyindex(d, key) + indx = Base.ht_keyindex(d, key, true) if indx == -1 d[key] = [i] else @@ -360,7 +360,7 @@ function tree{T<:Unsigned}(io::IO, bt::Vector{Vector{T}}, counts::Vector{Int}, l d = Dict{T,Vector{Int}}() for i = 1:length(bt) key = bt[i][level+1] - indx = Base.ht_keyindex(d, key) + indx = Base.ht_keyindex(d, key, true) if indx == -1 d[key] = [i] else diff --git a/base/set.jl b/base/set.jl index bfd41c5992929..9d91e864f6de4 100644 --- a/base/set.jl +++ b/base/set.jl @@ -39,7 +39,7 @@ rehash!(s::Set) = (rehash!(s.dict); s) start(s::Set) = start(s.dict) done(s::Set, state) = done(s.dict, state) # NOTE: manually optimized to take advantage of Dict representation -next(s::Set, i) = (s.dict.keys[i], skip_deleted(s.dict,i+1)) +next(s::Set, i) = (s.dict.keys[i], i+1) union() = Set() union(s::Set) = copy(s)