Skip to content

Commit

Permalink
Merge pull request #31 from milankl/mk/intarith
Browse files Browse the repository at this point in the history
Stochastic round to NaN?
  • Loading branch information
milankl authored Nov 4, 2020
2 parents df02585 + d6e0408 commit 763d9fe
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 175 deletions.
2 changes: 1 addition & 1 deletion src/StochasticRounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ module StochasticRounding

# faster random number generator
using RandomNumbers.Xorshifts
const Xor128 = Ref{Xoroshiro128Plus}()
const Xor128 = Ref{Xoroshiro128Plus}(Xoroshiro128Plus())

"""Reseed the PRNG randomly by recalling."""
function __init__()
Expand Down
66 changes: 9 additions & 57 deletions src/bfloat16sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,8 @@ UInt32(x::BFloat16sr) = UInt32(Float32(x))
UInt16(x::BFloat16sr) = UInt16(Float32(x))
UInt8(x::BFloat16sr) = UInt8(Float32(x))

const epsBF16 = 0.0078125f0 # machine epsilon of BFloat16 as Float32
const epsBF16_half = epsBF16/2 # half the machine epsilon
const eps_quarter = 0x0000_4000 # a quarter of eps as Float32 sig bits
const F32_one = reinterpret(UInt32,one(Float32)) # Float32 one as UInt32

# The smallest non-subnormal exponent of BFloat16 as Float32 reinterpreted as UInt32
# floatmin(Float32) = floatmin(BFloat16)
const min_expBF16 = reinterpret(UInt32,floatmin(Float32))

"""Convert to BFloat16sr from Float32 via round-to-nearest
and tie to even. Identical to BFloat16(::Float32)."""
function BFloat16sr(x::Float32)
Expand All @@ -72,64 +65,23 @@ end
"""Convert to BFloat16sr from Float32 with stochastic rounding.
Binary arithmetic version."""
function BFloat16_stochastic_round(x::Float32)
iszero(x) && return zero(BFloat16sr)
ix = reinterpret(Int32,x)
# if deterministically round to 0 return 0
# to avoid a stochastic rounding to NaN
# push to the left to get rid of sign
# push to the right to get rid of the insignificant bits
((ix << 1) >> 16) == 0x0000_0000 && return zero(BFloat16sr)
# r are random bits for the last 15
# >> either introduces 0s for the first 17 bits
# or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2)
# or 1s. Interpreted as Int32 this corresponds to [-ulp/2,ulp/2)
# which is added with binary arithmetic subsequently
# this is the stochastic perturbation.
# Then deterministic round to nearest to either round up or round down.
r = rand(Xor128[],Int32) >> 16
ui = reinterpret(Int32,x) + r
return BFloat16sr(reinterpret(Float32,ui))
xr = reinterpret(Float32,ix + r)
return BFloat16sr(xr)
end

# """Convert to BFloat16sr from Float32 with stochastic rounding."""
# function BFloat16_stochastic_round(x::Float32)
# isnan(x) && return NaNB16sr
#
# ui = reinterpret(UInt32, x)
#
# # e is the base 2 exponent of x (with signficand is set to zero)
# # e.g. e is 2 for pi, e is -2 for -pi, e is 0.25 for 0.3
# # e is at least min_exp for stochastic rounding for subnormals
# e = (ui & sign_mask(Float32)) | max(min_expBF16,ui & exponent_mask(Float32))
# e = reinterpret(Float32,e)
#
# # sig is the signficand (exponents & sign is masked out)
# sig = ui & significand_mask(Float32)
#
# # STOCHASTIC ROUNDING
# # In most cases, perturb any x between x0 and x1 with a random number
# # that is in (-ulp/2,ulp/2) where ulp is the distance between x0 and x1.
# # ulp = e*eps, with e the next base 2 exponent to zero from x.
#
# # However, there is a special case (aka the "quarter-case") for rounding
# # below ulp/4 when x0 is 2^n for any n (i.e. above an exponent bit flip)
# # due to doubling of ulp towards x1.
# quartercase = sig < eps_quarter # true for special case false otherwise
#
# # frac is in most cases 0.5 to shift the randum number [0,1) to [-0.5,0.5)
#
# # However, in the special case frac is (x-x0)/(x1-x0), that means the fraction
# # of the distance where x is in between x0 and x1
# # Then shift the random number [0,1) to be [-frac/2,-frac/2+ulp/2)
# # such that e.g. x = x0 + ulp/8 gets perturbed to be in [x0+ulp/16,x0+ulp/16+ulp/2)
# # and so the chance of a round-up is indeed 1/8
# # Illustration, let x be at 1/8, then perturb such that x can be in (--)
# # 1 -- x --1/4-- --1/2-- -- -- -- 2
# # 1 (-x-----------------) 2
# # i.e. starting from 1/16 up to 1/2+1/16
# frac = quartercase ? reinterpret(Float32,F32_one | (sig << 7)) - 1f0 : 0.5f0
# eps = quartercase ? epsBF16_half : epsBF16 # in this case use eps/2
#
# # stochastically perturb x before rounding (equiv to stochastic rounding)
# x += e*eps*(rand(Xor128[],Float32) - frac)
#
# # Round to nearest after stochastic perturbation
# return BFloat16sr(x)
# end

"""Chance that x::Float32 is round up when converted to BFloat16sr."""
function BFloat16_chance_roundup(x::Float32)
isnan(x) && return NaNB16sr
Expand Down
69 changes: 11 additions & 58 deletions src/float16sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ const NaN16sr = reinterpret(Float16sr, NaN16)

# basic operations
abs(x::Float16sr) = reinterpret(Float16sr, reinterpret(UInt16, x) & 0x7fff)
isnan(x::Float16sr) = reinterpret(UInt16,x) & 0x7fff > 0x7c00
isfinite(x::Float16sr) = reinterpret(UInt16,x) & 0x7c00 != 0x7c00
isnan(x::Float16sr) = isnan(Float16(x))
isfinite(x::Float16sr) = isfinite(Float16(x))

nextfloat(x::Float16sr) = Float16sr(nextfloat(Float16(x)))
prevfloat(x::Float16sr) = Float16sr(prevfloat(Float16(x)))
Expand All @@ -49,74 +49,27 @@ Float64(x::Float16sr) = Float64(Float16(x))
Float16sr(x::Integer) = Float16sr(Float32(x))
(::Type{T})(x::Float16sr) where {T<:Integer} = T(Float32(x))

# constants needed for stochastic perturbation
const epsF16 = Float32(eps(Float16)) # machine epsilon of Float16 as Float32
const epsF16_half = epsF16/2 # machine epsilon half

# The smallest non-subnormal exponent of Float16 as Float32 reinterpreted as UInt32
const min_expF16 = reinterpret(UInt32,Float32(floatmin(Float16)))

"""Convert to Float16sr from Float32 with stochastic rounding.
Binary arithmetic version."""
function Float16_stochastic_round(x::Float32)
iszero(x) && return zero(Float16sr)
ix = reinterpret(Int32,x)
# if deterministically round to 0 return 0
# to avoid a stochastic rounding to NaN
# push to the left to get rid of sign
# push to the right to get rid of the insignificant bits
((ix << 1) >> 13) == 0x0000_0000 && return zero(BFloat16sr)

# r are random bits for the last 15
# >> either introduces 0s for the first 17 bits
# or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2)
# which is added with binary arithmetic subsequently
# this is the stochastic perturbation.
# Then deterministic round to nearest to either round up or round down.
r = rand(Xor128[],Int32) >> 19 # = 16sbits+3expbits difference between f16,f32
ui = reinterpret(Int32,x) + r
return Float16sr(reinterpret(Float32,ui))
xr = reinterpret(Float32,ix + r)
return Float16sr(xr)
end

# """Convert to BFloat16sr from Float32 with stochastic rounding."""
# function Float16_stochastic_round(x::Float32)
# isnan(x) && return NaN16sr
#
# ui = reinterpret(UInt32, x)
#
# # e is the base 2 exponent of x (with signficand is set to zero)
# # e.g. e is 2 for pi, e is -2 for -pi, e is 0.25 for 0.3
# # e is at least min_exp for stsochastic rounding for subnormals
# e = (ui & sign_mask(Float32)) | max(min_expF16,ui & exponent_mask(Float32))
# e = reinterpret(Float32,e)
#
# # sig is the signficand (exponents & sign is masked out)
# sig = ui & significand_mask(Float32)
#
# # STOCHASTIC ROUNDING
# # In most cases, perturb any x between x0 and x1 with a random number
# # that is in (-ulp/2,ulp/2) where ulp is the distance between x0 and x1.
# # ulp = e*eps, with e the next base 2 exponent to zero from x.
#
# # However, there is a special case (aka the "quarter-case") for rounding
# # below ulp/4 when x0 is 2^n for any n (i.e. above an exponent bit flip)
# # due to doubling of ulp towards x1.
# quartercase = sig < eps_quarter # true for special case false otherwise
#
# # frac is in most cases 0.5 to shift the randum number [0,1) to [-0.5,0.5)
#
# # However, in the special case frac is (x-x0)/(x1-x0), that means the fraction
# # of the distance where x is in between x0 and x1
# # Then shift the random number [0,1) to be [-frac/2,-frac/2+ulp/2)
# # such that e.g. x = x0 + ulp/8 gets perturbed to be in [x0+ulp/16,x0+ulp/16+ulp/2)
# # and so the chance of a round-up is indeed 1/8
# # Illustration, let x be at 1/8, then perturb such that x can be in (--)
# # 1 -- x --1/4-- --1/2-- -- -- -- 2
# # 1 (-x-----------------) 2
# # i.e. starting from 1/16 up to 1/2+1/16
# frac = quartercase ? reinterpret(Float32,F32_one | (sig << 10)) - 1f0 : 0.5f0
# eps = quartercase ? epsF16_half : epsF16
#
# # stochastically perturb x before rounding (equiv to stochastic rounding)
# x += e*eps*(rand(Xor128[],Float32) - frac)
#
# # Round to nearest after stochastic perturbation
# return Float16sr(x)
# end

"""Chance that x::Float32 is round up when converted to Float16sr."""
function Float16_chance_roundup(x::Float32)
isnan(x) && return NaN16sr
Expand Down
67 changes: 10 additions & 57 deletions src/float32sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ const Inf32sr = reinterpret(Float32sr, Inf32)
const NaN32sr = reinterpret(Float32sr, NaN32)

# basic operations
abs(x::Float32sr) = reinterpret(Float32sr, reinterpret(UInt32, x) & 0x7fff_ffff)
isnan(x::Float32sr) = reinterpret(UInt32,x) & 0x7fff_ffff > 0x7f80_0000
isfinite(x::Float32sr) = reinterpret(UInt32,x) & 0x7fff_ffff != 0x7f80_0000
abs(x::Float32sr) = reinterpret(Float32sr, abs(reinterpret(Float32)))
isnan(x::Float32sr) = isnan(reinterpret(Float32,x))
isfinite(x::Float32sr) = isfinite(reinterpret(Float32,x))

nextfloat(x::Float32sr) = Float32sr(nextfloat(Float32(x)))
prevfloat(x::Float32sr) = Float32sr(prevfloat(Float32(x)))
Expand All @@ -52,72 +52,25 @@ Float64(x::Float32sr) = Float64(Float32(x))
Float32sr(x::Integer) = Float32sr(Float32(x))
(::Type{T})(x::Float32sr) where {T<:Integer} = T(Float32(x))

const epsF32 = Float64(eps(Float32))
const epsF32_half = epsF32/2
const eps64_quarter = 0x0000_0000_4000_0000 # a quarter of eps as Float64 sig bits
const F64_one = reinterpret(UInt64,one(Float64))

# The smallest non-subnormal exponent of Float32 as Float64 reinterpreted as UInt64
const min_expF32 = reinterpret(UInt64,Float64(floatmin(Float32)))

"""Convert to Float32sr from Float64 with stochastic rounding.
Binary arithmetic version."""
function Float32_stochastic_round(x::Float64)
iszero(x) && return zero(Float32sr)
# check whether round to nearest maps to zero(Float32)
# to avoid stochastic rounding to NaN
iszero(Float32(x)) && return zero(Float32sr)

# r are random bits for the last 31
# >> either introduces 0s for the first 33 bits
# or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2)
# which is added with binary arithmetic subsequently
# this is the stochastic perturbation.
# Then deterministic round to nearest to either round up or round down.
r = rand(Xor128[],Int64) >> 35 # = 32sbits+3expbits difference between f32,f64
ui = reinterpret(Int64,x) + r
return Float32sr(reinterpret(Float64,ui))
xr = reinterpret(Float64,reinterpret(Int64,x) + r)
return Float32sr(xr) # round to nearest
end

# """Convert to Float32sr from Float64 with stochastic rounding."""
# function Float32_stochastic_round(x::Float64)
#
# ui = reinterpret(UInt64, x)
#
# # stochastic rounding
# # e is the base 2 exponent of x (signficand set to zero)
# # e is at least min_exp for stochastic rounding for subnormals
# e = (ui & sign_mask(Float64)) | max(min_expF32,ui & exponent_mask(Float64))
# e = reinterpret(Float64,e)
#
# # sig is the signficand (exponents & sign is masked out)
# sig = ui & significand_mask(Float64)
#
# # STOCHASTIC ROUNDING
# # In most cases, perturb any x between x0 and x1 with a random number
# # that is in (-ulp/2,ulp/2) where ulp is the distance between x0 and x1.
# # ulp = e*eps, with e the next base 2 exponent to zero from x.
#
# # However, there is a special case (aka the "quarter-case") for rounding
# # below ulp/4 when x0 is 2^n for any n (i.e. above an exponent bit flip)
# # due to doubling of ulp towards x1.
# quartercase = sig < eps64_quarter # true for special case false otherwise
#
# # frac is in most cases 0.5 to shift the randum number [0,1) to [-0.5,0.5)
# # However, in the special case frac is (x-x0)/(x1-x0), that means the fraction
# # of the distance where x is in between x0 and x1
# # Then shift the random number [0,1) to be [-frac/2,-frac/2+ulp/2)
# # such that e.g. x = x0 + ulp/8 gets perturbed to be in [x0+ulp/16,x0+ulp/16+ulp/2)
# # and so the chance of a round-up is indeed 1/8
# # Illustration, let x be at 1/8, then perturb such that x can be in (--)
# # 1 -- x --1/4-- --1/2-- -- -- -- 2
# # 1 (-x-----------------) 2
# # i.e. starting from 1/16 up to 1/2+1/16
# frac = quartercase ? reinterpret(Float64,F64_one | (sig << 23)) - 1.0 : 0.5
# eps = quartercase ? epsF32_half : epsF32
#
# # stochastically perturb x before rounding (equiv to stochastic rounding)
# x += e*eps*(rand(Xor128[],Float64) - frac)
#
# # Round to nearest after stochastic perturbation
# return Float32sr(x)
# end
const F64_one = reinterpret(UInt64,one(Float64))

"""Chance that x::Float64 is round up when converted to Float32sr."""
function Float32_chance_roundup(x::Float64)
Expand Down
15 changes: 15 additions & 0 deletions test/bfloat16sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ end
@test 0f == BFloat16sr(0)
end

@testset "NaN and Inf" begin
@test isnan(NaNB16sr)
@test ~isfinite(NaNB16sr)
@test ~isfinite(InfB16sr)
end

@testset "No stochastic round to NaN" begin
f1 = nextfloat(0f0)
f2 = prevfloat(0f0)
for i in 1:100
@test isfinite(BFloat16_stochastic_round(f1))
@test isfinite(BFloat16_stochastic_round(f2))
end
end

@testset "Rounding" begin
@test 1 == Int(round(BFloat16sr(1.2)))
@test 1 == Int(floor(BFloat16sr(1.2)))
Expand Down
17 changes: 15 additions & 2 deletions test/float16sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
@test zero(Float16sr) == -(zero(Float16sr))
end

@testset "NaN and Inf" begin
@test isnan(NaN16sr)
@test ~isfinite(NaN16sr)
@test ~isfinite(Inf16sr)
end

@testset "No stochastic round to NaN" begin
f1 = nextfloat(0f0)
f2 = prevfloat(0f0)
for i in 1:100
@test isfinite(Float16_stochastic_round(f1))
@test isfinite(Float16_stochastic_round(f2))
end
end

@testset "Integer promotion" begin
f = Float16sr(1)
@test 2f == Float16sr(2)
Expand Down Expand Up @@ -271,8 +286,6 @@ end
# for some reason 0x0200 fails...?
for hex in vcat(0x0001:0x01ff,0x0201:0x03ff) # test for all subnormals of Float16

println(hex)

# add ulp/2 to have stochastic rounding that is 50/50 up/down.
x = Float32(reinterpret(Float16,hex)) + ulp_half

Expand Down
24 changes: 24 additions & 0 deletions test/float32sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,27 @@ end
@test p1/N < 0.55
end
end

@testset "NaN and Inf" begin
@test isnan(NaN32sr)
@test ~isfinite(NaN32sr)
@test ~isfinite(Inf32sr)
end

@testset "No stochastic round to NaN" begin
f1 = nextfloat(0.0)
f2 = prevfloat(0.0)
for i in 1:1000
@test isfinite(Float32_stochastic_round(f1))
@test isfinite(Float32_stochastic_round(f2))
end
end

@testset "No stochastic round to NaN" begin
f1 = Float64(floatmax(Float32)) # larger can map to Inf though!
f2 = -f1
for i in 1:1000
@test isfinite(Float32_stochastic_round(f1))
@test isfinite(Float32_stochastic_round(f2))
end
end

0 comments on commit 763d9fe

Please sign in to comment.