Skip to content

Commit

Permalink
faster hashing by avoiding UB
Browse files Browse the repository at this point in the history
In LLVM (inherited from C), fptosi has undefined behavior if the result
does not fit the integer size after rounding down. But by using the same
strategy as generic hashing of Real values, we actually can end up with
a sitatuion that is faster for the CPU to deal with and avoids the UB.

Refs #6624 (3696968)
Fixes #37800
  • Loading branch information
vtjnash committed Oct 15, 2020
1 parent 86bbe09 commit f08d525
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 17 deletions.
32 changes: 26 additions & 6 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -551,12 +551,32 @@ isinf(x::Real) = !isnan(x) & !isfinite(x)

## hashing small, built-in numeric types ##

hx(a::UInt64, b::Float64, h::UInt) = hash_uint64((3a + reinterpret(UInt64,b)) - h)
const hx_NaN = hx(UInt64(0), NaN, UInt(0 ))

hash(x::UInt64, h::UInt) = hx(x, Float64(x), h)
hash(x::Int64, h::UInt) = hx(reinterpret(UInt64, abs(x)), Float64(x), h)
hash(x::Float64, h::UInt) = isnan(x) ? (hx_NaN h) : hx(fptoui(UInt64, abs(x)), x, h)
hash(x::Int64, h::UInt) = hash_uint64(bitcast(UInt64, x)) - 3h

# switch UInt64 from two's complement to sign-magnitude format and mix up some
# of the bits so that typemax(UInt64) isn't near -1 (since we have a test for this)
hash(x::UInt64, h::UInt) = hash_uint64(bitcast(Int64, x) < 0 ? 0xc653_0000_0000_0000 ~x : x) - 3h

const hx_NaN = hash_uint64(reinterpret(UInt64, NaN))
let Tf = Float64, Tu = UInt64, Ti = Int64
@eval function hash(x::$Tf, h::UInt)
# see comments on trunc and hash(Real, UInt)
if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
xi = fptosi($Ti, x)
if xi == x
return hash(xi, h)
end
elseif $(Tf(typemin(Tu))) <= x < $(Tf(typemax(Tu)))
xu = fptoui($Tu, x)
if xu == x
return hash(xu, h)
end
elseif isnan(x)
return hx_NaN h # NaN does not have a stable bit pattern
end
return hash_uint64(5bitcast(UInt64, x)) - 3h
end
end

hash(x::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}, h::UInt) = hash(Int64(x), h)
hash(x::Float32, h::UInt) = hash(Float64(x), h)
Expand Down
14 changes: 3 additions & 11 deletions test/hashing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,12 @@ for T = types[2:end],
@test hash(a,one(UInt)) == invoke(hash, Tuple{Real, UInt}, a, one(UInt))
end

for T = types,
S = types,
x = vals,
a = coerce(T, x),
for T = types, S = types, x = vals
a = coerce(T, x)
b = coerce(S, x)
#println("$(typeof(a)) $a")
#println("$(typeof(b)) $b")
@test isequal(a,b) == (hash(a)==hash(b))
# for y=vals
# println("T=$T; S=$S; x=$x; y=$y")
# c = convert(T,x//y)
# d = convert(S,x//y)
# @test !isequal(a,b) || hash(a)==hash(b)
# end
@test isequal(a, b) == (hash(a) == hash(b))
end

# issue #8619
Expand Down

0 comments on commit f08d525

Please sign in to comment.