Skip to content

Commit

Permalink
Merge pull request #6624 from JuliaLang/sk/hashing
Browse files Browse the repository at this point in the history
RFC: new approach to efficiently hashing 1, 1.0, big(1), the same.
  • Loading branch information
JeffBezanson committed May 7, 2014
2 parents ea70e4d + 607558a commit 5b20688
Show file tree
Hide file tree
Showing 41 changed files with 415 additions and 304 deletions.
7 changes: 3 additions & 4 deletions base/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,9 @@ type Colon
end
const (:) = Colon()

hash(w::WeakRef) = hash(w.value)
isequal(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
isequal(w::WeakRef, v) = isequal(w.value, v)
isequal(w, v::WeakRef) = isequal(w, v.value)
==(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
==(w::WeakRef, v) = isequal(w.value, v)
==(w, v::WeakRef) = isequal(w, v.value)

function finalizer(o::ANY, f::Union(Function,Ptr))
if isimmutable(o)
Expand Down
6 changes: 0 additions & 6 deletions base/bitarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1720,9 +1720,3 @@ function cat(catdim::Integer, X::Union(BitArray, Integer)...)
end

# hvcat -> use fallbacks in abstractarray.jl

isequal(A::BitArray, B::BitArray) = (A == B)

# Hashing

hash(B::BitArray) = hash((size(B), B.chunks))
2 changes: 1 addition & 1 deletion base/bool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ typemax(::Type{Bool}) = true
(|)(x::Bool, y::Bool) = box(Bool,or_int(unbox(Bool,x),unbox(Bool,y)))
($)(x::Bool, y::Bool) = (x!=y)

signbit(x::Bool) = 0
signbit(x::Bool) = false
sign(x::Bool) = x
abs(x::Bool) = x
abs2(x::Bool) = x
Expand Down
Empty file added base/comparison.jl
Empty file.
2 changes: 0 additions & 2 deletions base/complex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ end

isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w))

hash(z::Complex) = bitmix(hash(real(z)),hash(imag(z)))

conj(z::Complex) = Complex(real(z),-imag(z))
abs(z::Complex) = hypot(real(z), imag(z))
abs2(z::Complex) = real(z)*real(z) + imag(z)*imag(z)
Expand Down
5 changes: 5 additions & 0 deletions base/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ convert(::Type{Float16}, x::MathConst) = float16(float32(x))
convert{T<:Real}(::Type{Complex{T}}, x::MathConst) = convert(Complex{T}, float64(x))
convert{T<:Integer}(::Type{Rational{T}}, x::MathConst) = convert(Rational{T}, float64(x))

=={s}(::MathConst{s}, ::MathConst{s}) = true
==(::MathConst, ::MathConst) = false

hash(x::MathConst, h::Uint) = hash(object_id(x), h)

-(x::MathConst) = -float64(x)
for op in {:+, :-, :*, :/, :^}
@eval $op(x::MathConst, y::MathConst) = $op(float64(x),float64(y))
Expand Down
99 changes: 1 addition & 98 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,6 @@ filter(f::Function, d::Associative) = filter!(f,copy(d))

eltype{K,V}(a::Associative{K,V}) = (K,V)

function hash(d::Associative)
h::Uint = 0
for (k,v) in d
h $= bitmix(hash(k),~hash(v))
end
h
end

function isequal(l::Associative, r::Associative)
if isa(l,ObjectIdDict) != isa(r,ObjectIdDict)
return false
Expand Down Expand Up @@ -201,96 +193,6 @@ function length(d::ObjectIdDict)
n
end

# hashing

function int32hash(n::Uint32)
local a::Uint32 = n
a = (a + 0x7ed55d16) + a << 12
a = (a $ 0xc761c23c) $ a >> 19
a = (a + 0x165667b1) + a << 5
a = (a + 0xd3a2646c) $ a << 9
a = (a + 0xfd7046c5) + a << 3
a = (a $ 0xb55a4f09) $ a >> 16
return a
end

function int64hash(n::Uint64)
local a::Uint64 = n
a = ~a + (a << 21)
a = a $ (a >> 24)
a = (a + (a << 3)) + (a << 8)
a = a $ (a >> 14)
a = (a + (a << 2)) + (a << 4)
a = a $ (a >> 28)
a = a + (a << 31)
return a
end

function int64to32hash(n::Uint64)
local key::Uint64 = n
key = ~key + (key << 18)
key = key $ (key >> 31)
key = key * 21
key = key $ (key >> 11)
key = key + (key << 6 )
key = key $ (key >> 22)
return uint32(key)
end

bitmix(a::Union(Int32,Uint32), b::Union(Int32,Uint32)) = int64to32hash((uint64(a)<<32)|uint64(b))
bitmix(a::Union(Int64,Uint64), b::Union(Int64, Uint64)) = int64hash(uint64(a$((b<<32)|(b>>>32))))

if WORD_SIZE == 64
hash64(x::Float64) = int64hash(reinterpret(Uint64,x))
hash64(x::Union(Int64,Uint64)) = int64hash(reinterpret(Uint64,x))
else
hash64(x::Float64) = int64to32hash(reinterpret(Uint64,x))
hash64(x::Union(Int64,Uint64)) = int64to32hash(reinterpret(Uint64,x))
end

hash(x::Union(Bool,Char,Int8,Uint8,Int16,Uint16,Int32,Uint32,Int64,Uint64)) =
hash64(uint64(x))

function hash(x::Integer)
h::Uint = hash(uint64(x&0xffffffffffffffff))
if typemin(Int64) <= x <= typemax(Uint64)
return h
end
x >>>= 64
while x != 0 && x != -1
h = bitmix(h, hash(uint64(x&0xffffffffffffffff)))
x >>>= 64
end
return h
end

hash(x::Float32) = hash(reinterpret(Uint32, ifelse(isnan(x), NaN32, x)))
hash(x::Float64) = hash(reinterpret(Uint64, ifelse(isnan(x), NaN, x)))

function hash(t::Tuple)
h::Uint = 0
for i=1:length(t)
h = bitmix(h,int(hash(t[i]))+42)
end
return h
end

function hash(a::AbstractArray)
h::Uint = hash(size(a))+1
for i=1:length(a)
h = bitmix(h,int(hash(a[i])))
end
return h
end

# make sure Array{Bool} and BitArray can be equivalent
hash(a::AbstractArray{Bool}) = hash(bitpack(a))

hash(x::ANY) = object_id(x)

hash(x::Expr) = bitmix(hash(x.head),hash(x.args)+43)


# dict

type Dict{K,V} <: Associative{K,V}
Expand Down Expand Up @@ -538,6 +440,7 @@ function setindex!{K,V}(h::Dict{K,V}, v0, key0)
index = ht_keyindex2(h, key)

if index > 0
h.keys[index] = key
h.vals[index] = v
else
_setindex!(h, v, key, -index)
Expand Down
1 change: 0 additions & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ export
atanh,
big,
binomial,
bitmix,
bool,
bswap,
cbrt,
Expand Down
4 changes: 2 additions & 2 deletions base/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ astcopy(x::Union(SymbolNode,GetfieldNode,Expr)) = copy(x)
astcopy(x::Array{Any,1}) = map(astcopy, x)
astcopy(x) = x

isequal(x::Expr, y::Expr) = (is(x.head,y.head) && isequal(x.args,y.args))
isequal(x::QuoteNode, y::QuoteNode) = isequal(x.value, y.value)
==(x::Expr, y::Expr) = x.head === y.head && x.args == y.args
==(x::QuoteNode, y::QuoteNode) = x.value == y.value

function show(io::IO, tv::TypeVar)
if !is(tv.lb, None)
Expand Down
23 changes: 6 additions & 17 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,11 @@ mod{T<:FloatingPoint}(x::T, y::T) = rem(y+rem(x,y),y)
<=(x::Float32, y::Float32) = le_float(unbox(Float32,x),unbox(Float32,y))
<=(x::Float64, y::Float64) = le_float(unbox(Float64,x),unbox(Float64,y))

isequal{T<:FloatingPoint}(x::T, y::T) =
((x==y) & (signbit(x)==signbit(y))) | (isnan(x)&isnan(y))

isequal(x::Float32, y::Float32) = fpiseq(unbox(Float32,x),unbox(Float32,y))
isequal(x::Float64, y::Float64) = fpiseq(unbox(Float64,x),unbox(Float64,y))
isless (x::Float32, y::Float32) = fpislt(unbox(Float32,x),unbox(Float32,y))
isless (x::Float64, y::Float64) = fpislt(unbox(Float64,x),unbox(Float64,y))

isless(a::FloatingPoint, b::FloatingPoint) =
(a<b) | (!isnan(a) & (isnan(b) | (signbit(a)>signbit(b))))
isless(a::Real, b::FloatingPoint) = (a<b) | isless(float(a),b)
isless(a::FloatingPoint, b::Real) = (a<b) | isless(a,float(b))

function cmp(x::FloatingPoint, y::FloatingPoint)
(isnan(x) || isnan(y)) && throw(DomainError())
ifelse(x<y, -1, ifelse(x>y, 1, 0))
Expand Down Expand Up @@ -220,18 +212,15 @@ end
abs(x::Float64) = box(Float64,abs_float(unbox(Float64,x)))
abs(x::Float32) = box(Float32,abs_float(unbox(Float32,x)))

isnan(x::FloatingPoint) = (x != x)
isnan(x::Real) = isnan(float(x))
isnan(x::Integer) = false
isnan(x::FloatingPoint) = x != x
isnan(x::Real) = false

isinf(x::FloatingPoint) = (abs(x) == Inf)
isinf(x::Real) = isinf(float(x))
isinf(x::Integer) = false

isfinite(x::FloatingPoint) = (x-x == 0)
isfinite(x::Real) = isfinite(float(x))
isfinite(x::FloatingPoint) = x - x == 0
isfinite(x::Real) = decompose(x)[3] != 0
isfinite(x::Integer) = true

isinf(x::Real) = !isnan(x) & !isfinite(x)

## floating point traits ##

const Inf16 = box(Float16,unbox(Uint16,0x7c00))
Expand Down
2 changes: 0 additions & 2 deletions base/float16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,5 +139,3 @@ hypot(a::Float16, b::Float16) = float16(hypot(float32(a), float32(b)))
ldexp(a::Float16, b::Integer) = float16(ldexp(float32(a), b))
exponent(x::Float16) = exponent(float32(x))
^(x::Float16, y::Integer) = x^float16(y)

hash(x::Float16) = hash(reinterpret(Uint16, isnan(x) ? NaN16 : x))
1 change: 0 additions & 1 deletion base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,6 @@ end
binomial(n::BigInt, k::Integer) = k < 0 ? throw(DomainError()) : binomial(n, uint(k))

==(x::BigInt, y::BigInt) = cmp(x,y) == 0
isequal(x::BigInt, y::BigInt) = cmp(x,y) == 0
<=(x::BigInt, y::BigInt) = cmp(x,y) <= 0
>=(x::BigInt, y::BigInt) = cmp(x,y) >= 0
<(x::BigInt, y::BigInt) = cmp(x,y) < 0
Expand Down
81 changes: 81 additions & 0 deletions base/hashing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
## hashing a single value ##

hash(x::Any) = hash(x, zero(Uint))
hash(w::WeakRef, h::Uint) = hash(w.value, h)

## core data hashing functions ##

function hash_64_64(n::Uint64)
local a::Uint64 = n
a = ~a + a << 21
a = a $ a >> 24
a = a + a << 3 + a << 8
a = a $ a >> 14
a = a + a << 2 + a << 4
a = a $ a >> 28
a = a + a << 31
return a
end

function hash_64_32(n::Uint64)
local a::Uint64 = n
a = ~a + a << 18
a = a $ a >> 31
a = a * 21
a = a $ a >> 11
a = a + a << 6
a = a $ a >> 22
return uint32(a)
end

function hash_32_32(n::Uint32)
local a::Uint32 = n
a = a + 0x7ed55d16 + a << 12
a = a $ 0xc761c23c $ a >> 19
a = a + 0x165667b1 + a << 5
a = a + 0xd3a2646c $ a << 9
a = a + 0xfd7046c5 + a << 3
a = a $ 0xb55a4f09 $ a >> 16
return a
end

if Uint == Uint64
hash_uint64(x::Uint64) = hash_64_64(x)
hash_uint(x::Uint) = hash_64_64(x)
else
hash_uint64(x::Uint64) = hash_64_32(x)
hash_uint(x::Uint) = hash_32_32(x)
end

## hashing small, built-in numeric types ##

hx(a::Uint64, b::Float64, h::Uint) = hash_uint64((3a + reinterpret(Uint64,b)) - h)
const hx_NaN = hx(uint64(0), NaN, uint(0 ))

hash(x::Uint64, h::Uint) = hx(x, float64(x), h)
hash(x::Int64, h::Uint) = hx(reinterpret(Uint64,x), float64(x), h)
hash(x::Float64, h::Uint) = isnan(x) ? (hx_NaN $ h) : hx(box(Uint64,fptosi(unbox(Float64,x))), x, h)

hash(x::Union(Int8,Uint8,Int16,Uint16,Int32,Uint32), h::Uint) = hash(int64(x), h)
hash(x::Float32, h::Uint) = hash(float64(x), h)

## hashing complex numbers ##

const h_imag = uint(0x32a7a07f3e7cd1f9)
const hash_0_imag = hash(0, h_imag)

function hash(z::Complex, h::Uint)
# TODO: with default argument specialization, this would be better:
# hash(real(z), h $ hash(imag(z), h $ h_imag) $ hash(0, h $ h_imag))
hash(real(z), h $ hash(imag(z), h_imag) $ hash_0_imag)
end

## special hashing for booleans and characters ##

hash(x::Bool, h::Uint) = hash(int(x), h + uint(0x4cd135a1755139a5))
hash(x::Char, h::Uint) = hash(int(x), h + uint(0x10f989ff0f886f11))

## symbol & expression hashing ##

hash(x::Symbol, h::Uint) = hash(object_id(x), h)
hash(x::Expr, h::Uint) = hash(x.args, hash(x.head, h + uint(0x83c7900696d26dc6)))
Loading

0 comments on commit 5b20688

Please sign in to comment.