Skip to content

Commit

Permalink
make fallback hash a constant instead of based on object id
Browse files Browse the repository at this point in the history
fixes #12198
  • Loading branch information
JeffBezanson committed Jan 18, 2018
1 parent a4cd91a commit d4b8f16
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 3 deletions.
2 changes: 2 additions & 0 deletions base/Enums.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Base.cconvert(::Type{T}, x::Enum{T2}) where {T<:Integer,T2<:Integer} = T(x)
Base.write(io::IO, x::Enum{T}) where {T<:Integer} = write(io, T(x))
Base.read(io::IO, ::Type{T}) where {T<:Enum} = T(read(io, Enums.basetype(T)))

Base.hash(x::Enum, h::UInt) = Base.hash_by_id(x, h)

# generate code to test whether expr is in the given set of values
function membershiptest(expr, values)
lo, hi = extrema(values)
Expand Down
11 changes: 9 additions & 2 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,8 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K
sz = length(h.keys)
iter = 0
maxprobe = h.maxprobe
index = hashindex(key, sz)
hashval = hash(key)
index = (((hashval%Int) & (sz-1)) + 1)::Int
avail = 0
keys = h.keys

Expand Down Expand Up @@ -361,7 +362,13 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K

avail < 0 && return avail

maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
if hashval == 0
# assume a hash value of 0 means we're using the fallback hash function,
# so gracefully decay to linear lookup.
maxallowed = sz - 1
else
maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
end
# Check if key is not present, may need to keep searching to find slot
@inbounds while iter < maxallowed
if !isslotfilled(h,index)
Expand Down
17 changes: 16 additions & 1 deletion base/hashing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,28 @@ Typically, any type that implements `hash` should also implement its own `==` (h
`isequal`) to guarantee the property mentioned above. Types supporting subtraction
(operator `-`) should also implement [`widen`](@ref), which is required to hash
values inside heterogeneous arrays.
The default hash function simply returns `h`, in order to be compatible with any
possible user-defined equality function. Therefore hash-based collections like `Dict`
work correctly but very slowly for user-defined types without `hash` methods.
"""
hash(x::Any) = hash(x, zero(UInt))
hash(w::WeakRef, h::UInt) = hash(w.value, h)

## hashing general objects ##

hash(@nospecialize(x), h::UInt) = hash_uint(3h - object_id(x))
# do no hashing by default, to be consistent with any user-defined equality method
hash(@nospecialize(x), h::UInt) = h

hash_by_id(@nospecialize(x), h::UInt) = hash_uint(3h - object_id(x))

# some types for which == and === are the same
hash(x::Union{Symbol, Task, TypeName, Method, Module, Nothing, Missing, Core.IntrinsicFunction,
GlobalRef, LineNumberNode, LabelNode, GotoNode, Slot, SSAValue, NewvarNode, TypeVar},
h::UInt) = hash_by_id(x, h)

# this is too strict, but the best we can do
hash(x::Type, h::UInt) = hash_by_id(x, h)

## core data hashing functions ##

Expand Down
3 changes: 3 additions & 0 deletions base/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ Collections should generally implement `==` by calling `==` recursively on all c
New numeric types should implement this function for two arguments of the new type, and
handle comparison to other types via promotion rules where possible.
If a type that implements `==` will be used in sets or as dictionary keys, it should also
implement [`hash`](@ref).
"""
==(x, y) = x === y

Expand Down
13 changes: 13 additions & 0 deletions doc/src/manual/performance-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -1509,3 +1509,16 @@ The following examples may help you interpret expressions marked as containing n
field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
* Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
of `ArrayContainer`
## Define `hash` for custom types used as dictionary keys
The default `hash` function returns a constant `0` value, causing `Dict`s and `Set`s to perform
linear-time (instead of near-constant-time) lookup. This allows these collections to work
correctly by default for arbitrary new definitions of `==`, but can cause performance problems.
If you encounter this problem, simply add an appropriate method for `Base.hash(x::MyType, h::UInt)`.
If the built-in equality function `===` already has the desired behavior for your type, this
definition can be used:
```julia
Base.hash(x::MyType, h::UInt) = Base.hash_by_id(x, h)
```
17 changes: 17 additions & 0 deletions test/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,23 @@ Base.show(io::IO, ::Alpha) = print(io,"α")
@test endswith(str, "α => 1")
end

mutable struct Foo12198
x
end
Base.:(==)(x::Foo12198, y::Foo12198) = x.x == y.x
@testset "issue #12198" begin
a = [Foo12198(1), Foo12198(1)]
@test length(unique(a)) == 1
a = [Foo12198(1), Foo12198(2)]
@test length(unique(a)) == 2
d = Dict()
d[Foo12198(1)] = 1
@test haskey(d, Foo12198(1))
d[Foo12198(1)] = 2
@test length(d) == 1
@test d[Foo12198(1)] == 2
end

@testset "issue #2540" begin
d = Dict{Any,Any}(Dict(x => 1 for x in ['a', 'b', 'c']))
@test d == Dict('a'=>1, 'b'=>1, 'c'=> 1)
Expand Down

0 comments on commit d4b8f16

Please sign in to comment.