diff --git a/src/StochasticRounding.jl b/src/StochasticRounding.jl index 6234217..00a8cde 100644 --- a/src/StochasticRounding.jl +++ b/src/StochasticRounding.jl @@ -22,7 +22,7 @@ module StochasticRounding # faster random number generator using RandomNumbers.Xorshifts - const Xor128 = Ref{Xoroshiro128Plus}() + const Xor128 = Ref{Xoroshiro128Plus}(Xoroshiro128Plus()) """Reseed the PRNG randomly by recalling.""" function __init__() diff --git a/src/bfloat16sr.jl b/src/bfloat16sr.jl index 10686bc..00165ef 100644 --- a/src/bfloat16sr.jl +++ b/src/bfloat16sr.jl @@ -51,15 +51,8 @@ UInt32(x::BFloat16sr) = UInt32(Float32(x)) UInt16(x::BFloat16sr) = UInt16(Float32(x)) UInt8(x::BFloat16sr) = UInt8(Float32(x)) -const epsBF16 = 0.0078125f0 # machine epsilon of BFloat16 as Float32 -const epsBF16_half = epsBF16/2 # half the machine epsilon -const eps_quarter = 0x0000_4000 # a quarter of eps as Float32 sig bits const F32_one = reinterpret(UInt32,one(Float32)) # Float32 one as UInt32 -# The smallest non-subnormal exponent of BFloat16 as Float32 reinterpreted as UInt32 -# floatmin(Float32) = floatmin(BFloat16) -const min_expBF16 = reinterpret(UInt32,floatmin(Float32)) - """Convert to BFloat16sr from Float32 via round-to-nearest and tie to even. Identical to BFloat16(::Float32).""" function BFloat16sr(x::Float32) @@ -72,64 +65,23 @@ end """Convert to BFloat16sr from Float32 with stochastic rounding. Binary arithmetic version.""" function BFloat16_stochastic_round(x::Float32) - iszero(x) && return zero(BFloat16sr) + ix = reinterpret(Int32,x) + # if deterministically round to 0 return 0 + # to avoid a stochastic rounding to NaN + # push to the left to get rid of sign + # push to the right to get rid of the insignificant bits + ((ix << 1) >> 16) == 0x0000_0000 && return zero(BFloat16sr) # r are random bits for the last 15 # >> either introduces 0s for the first 17 bits - # or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2) + # or 1s. Interpreted as Int32 this corresponds to [-ulp/2,ulp/2) # which is added with binary arithmetic subsequently # this is the stochastic perturbation. # Then deterministic round to nearest to either round up or round down. r = rand(Xor128[],Int32) >> 16 - ui = reinterpret(Int32,x) + r - return BFloat16sr(reinterpret(Float32,ui)) + xr = reinterpret(Float32,ix + r) + return BFloat16sr(xr) end -# """Convert to BFloat16sr from Float32 with stochastic rounding.""" -# function BFloat16_stochastic_round(x::Float32) -# isnan(x) && return NaNB16sr -# -# ui = reinterpret(UInt32, x) -# -# # e is the base 2 exponent of x (with signficand is set to zero) -# # e.g. e is 2 for pi, e is -2 for -pi, e is 0.25 for 0.3 -# # e is at least min_exp for stochastic rounding for subnormals -# e = (ui & sign_mask(Float32)) | max(min_expBF16,ui & exponent_mask(Float32)) -# e = reinterpret(Float32,e) -# -# # sig is the signficand (exponents & sign is masked out) -# sig = ui & significand_mask(Float32) -# -# # STOCHASTIC ROUNDING -# # In most cases, perturb any x between x0 and x1 with a random number -# # that is in (-ulp/2,ulp/2) where ulp is the distance between x0 and x1. -# # ulp = e*eps, with e the next base 2 exponent to zero from x. -# -# # However, there is a special case (aka the "quarter-case") for rounding -# # below ulp/4 when x0 is 2^n for any n (i.e. above an exponent bit flip) -# # due to doubling of ulp towards x1. -# quartercase = sig < eps_quarter # true for special case false otherwise -# -# # frac is in most cases 0.5 to shift the randum number [0,1) to [-0.5,0.5) -# -# # However, in the special case frac is (x-x0)/(x1-x0), that means the fraction -# # of the distance where x is in between x0 and x1 -# # Then shift the random number [0,1) to be [-frac/2,-frac/2+ulp/2) -# # such that e.g. x = x0 + ulp/8 gets perturbed to be in [x0+ulp/16,x0+ulp/16+ulp/2) -# # and so the chance of a round-up is indeed 1/8 -# # Illustration, let x be at 1/8, then perturb such that x can be in (--) -# # 1 -- x --1/4-- --1/2-- -- -- -- 2 -# # 1 (-x-----------------) 2 -# # i.e. starting from 1/16 up to 1/2+1/16 -# frac = quartercase ? reinterpret(Float32,F32_one | (sig << 7)) - 1f0 : 0.5f0 -# eps = quartercase ? epsBF16_half : epsBF16 # in this case use eps/2 -# -# # stochastically perturb x before rounding (equiv to stochastic rounding) -# x += e*eps*(rand(Xor128[],Float32) - frac) -# -# # Round to nearest after stochastic perturbation -# return BFloat16sr(x) -# end - """Chance that x::Float32 is round up when converted to BFloat16sr.""" function BFloat16_chance_roundup(x::Float32) isnan(x) && return NaNB16sr diff --git a/src/float16sr.jl b/src/float16sr.jl index 5d5cae1..ba8d7cc 100644 --- a/src/float16sr.jl +++ b/src/float16sr.jl @@ -30,8 +30,8 @@ const NaN16sr = reinterpret(Float16sr, NaN16) # basic operations abs(x::Float16sr) = reinterpret(Float16sr, reinterpret(UInt16, x) & 0x7fff) -isnan(x::Float16sr) = reinterpret(UInt16,x) & 0x7fff > 0x7c00 -isfinite(x::Float16sr) = reinterpret(UInt16,x) & 0x7c00 != 0x7c00 +isnan(x::Float16sr) = isnan(Float16(x)) +isfinite(x::Float16sr) = isfinite(Float16(x)) nextfloat(x::Float16sr) = Float16sr(nextfloat(Float16(x))) prevfloat(x::Float16sr) = Float16sr(prevfloat(Float16(x))) @@ -49,17 +49,16 @@ Float64(x::Float16sr) = Float64(Float16(x)) Float16sr(x::Integer) = Float16sr(Float32(x)) (::Type{T})(x::Float16sr) where {T<:Integer} = T(Float32(x)) -# constants needed for stochastic perturbation -const epsF16 = Float32(eps(Float16)) # machine epsilon of Float16 as Float32 -const epsF16_half = epsF16/2 # machine epsilon half - -# The smallest non-subnormal exponent of Float16 as Float32 reinterpreted as UInt32 -const min_expF16 = reinterpret(UInt32,Float32(floatmin(Float16))) - """Convert to Float16sr from Float32 with stochastic rounding. Binary arithmetic version.""" function Float16_stochastic_round(x::Float32) - iszero(x) && return zero(Float16sr) + ix = reinterpret(Int32,x) + # if deterministically round to 0 return 0 + # to avoid a stochastic rounding to NaN + # push to the left to get rid of sign + # push to the right to get rid of the insignificant bits + ((ix << 1) >> 13) == 0x0000_0000 && return zero(BFloat16sr) + # r are random bits for the last 15 # >> either introduces 0s for the first 17 bits # or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2) @@ -67,56 +66,10 @@ function Float16_stochastic_round(x::Float32) # this is the stochastic perturbation. # Then deterministic round to nearest to either round up or round down. r = rand(Xor128[],Int32) >> 19 # = 16sbits+3expbits difference between f16,f32 - ui = reinterpret(Int32,x) + r - return Float16sr(reinterpret(Float32,ui)) + xr = reinterpret(Float32,ix + r) + return Float16sr(xr) end -# """Convert to BFloat16sr from Float32 with stochastic rounding.""" -# function Float16_stochastic_round(x::Float32) -# isnan(x) && return NaN16sr -# -# ui = reinterpret(UInt32, x) -# -# # e is the base 2 exponent of x (with signficand is set to zero) -# # e.g. e is 2 for pi, e is -2 for -pi, e is 0.25 for 0.3 -# # e is at least min_exp for stsochastic rounding for subnormals -# e = (ui & sign_mask(Float32)) | max(min_expF16,ui & exponent_mask(Float32)) -# e = reinterpret(Float32,e) -# -# # sig is the signficand (exponents & sign is masked out) -# sig = ui & significand_mask(Float32) -# -# # STOCHASTIC ROUNDING -# # In most cases, perturb any x between x0 and x1 with a random number -# # that is in (-ulp/2,ulp/2) where ulp is the distance between x0 and x1. -# # ulp = e*eps, with e the next base 2 exponent to zero from x. -# -# # However, there is a special case (aka the "quarter-case") for rounding -# # below ulp/4 when x0 is 2^n for any n (i.e. above an exponent bit flip) -# # due to doubling of ulp towards x1. -# quartercase = sig < eps_quarter # true for special case false otherwise -# -# # frac is in most cases 0.5 to shift the randum number [0,1) to [-0.5,0.5) -# -# # However, in the special case frac is (x-x0)/(x1-x0), that means the fraction -# # of the distance where x is in between x0 and x1 -# # Then shift the random number [0,1) to be [-frac/2,-frac/2+ulp/2) -# # such that e.g. x = x0 + ulp/8 gets perturbed to be in [x0+ulp/16,x0+ulp/16+ulp/2) -# # and so the chance of a round-up is indeed 1/8 -# # Illustration, let x be at 1/8, then perturb such that x can be in (--) -# # 1 -- x --1/4-- --1/2-- -- -- -- 2 -# # 1 (-x-----------------) 2 -# # i.e. starting from 1/16 up to 1/2+1/16 -# frac = quartercase ? reinterpret(Float32,F32_one | (sig << 10)) - 1f0 : 0.5f0 -# eps = quartercase ? epsF16_half : epsF16 -# -# # stochastically perturb x before rounding (equiv to stochastic rounding) -# x += e*eps*(rand(Xor128[],Float32) - frac) -# -# # Round to nearest after stochastic perturbation -# return Float16sr(x) -# end - """Chance that x::Float32 is round up when converted to Float16sr.""" function Float16_chance_roundup(x::Float32) isnan(x) && return NaN16sr diff --git a/src/float32sr.jl b/src/float32sr.jl index 5449990..a5981a7 100644 --- a/src/float32sr.jl +++ b/src/float32sr.jl @@ -32,9 +32,9 @@ const Inf32sr = reinterpret(Float32sr, Inf32) const NaN32sr = reinterpret(Float32sr, NaN32) # basic operations -abs(x::Float32sr) = reinterpret(Float32sr, reinterpret(UInt32, x) & 0x7fff_ffff) -isnan(x::Float32sr) = reinterpret(UInt32,x) & 0x7fff_ffff > 0x7f80_0000 -isfinite(x::Float32sr) = reinterpret(UInt32,x) & 0x7fff_ffff != 0x7f80_0000 +abs(x::Float32sr) = reinterpret(Float32sr, abs(reinterpret(Float32))) +isnan(x::Float32sr) = isnan(reinterpret(Float32,x)) +isfinite(x::Float32sr) = isfinite(reinterpret(Float32,x)) nextfloat(x::Float32sr) = Float32sr(nextfloat(Float32(x))) prevfloat(x::Float32sr) = Float32sr(prevfloat(Float32(x))) @@ -52,18 +52,13 @@ Float64(x::Float32sr) = Float64(Float32(x)) Float32sr(x::Integer) = Float32sr(Float32(x)) (::Type{T})(x::Float32sr) where {T<:Integer} = T(Float32(x)) -const epsF32 = Float64(eps(Float32)) -const epsF32_half = epsF32/2 -const eps64_quarter = 0x0000_0000_4000_0000 # a quarter of eps as Float64 sig bits -const F64_one = reinterpret(UInt64,one(Float64)) - -# The smallest non-subnormal exponent of Float32 as Float64 reinterpreted as UInt64 -const min_expF32 = reinterpret(UInt64,Float64(floatmin(Float32))) - """Convert to Float32sr from Float64 with stochastic rounding. Binary arithmetic version.""" function Float32_stochastic_round(x::Float64) - iszero(x) && return zero(Float32sr) + # check whether round to nearest maps to zero(Float32) + # to avoid stochastic rounding to NaN + iszero(Float32(x)) && return zero(Float32sr) + # r are random bits for the last 31 # >> either introduces 0s for the first 33 bits # or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2) @@ -71,53 +66,11 @@ function Float32_stochastic_round(x::Float64) # this is the stochastic perturbation. # Then deterministic round to nearest to either round up or round down. r = rand(Xor128[],Int64) >> 35 # = 32sbits+3expbits difference between f32,f64 - ui = reinterpret(Int64,x) + r - return Float32sr(reinterpret(Float64,ui)) + xr = reinterpret(Float64,reinterpret(Int64,x) + r) + return Float32sr(xr) # round to nearest end -# """Convert to Float32sr from Float64 with stochastic rounding.""" -# function Float32_stochastic_round(x::Float64) -# -# ui = reinterpret(UInt64, x) -# -# # stochastic rounding -# # e is the base 2 exponent of x (signficand set to zero) -# # e is at least min_exp for stochastic rounding for subnormals -# e = (ui & sign_mask(Float64)) | max(min_expF32,ui & exponent_mask(Float64)) -# e = reinterpret(Float64,e) -# -# # sig is the signficand (exponents & sign is masked out) -# sig = ui & significand_mask(Float64) -# -# # STOCHASTIC ROUNDING -# # In most cases, perturb any x between x0 and x1 with a random number -# # that is in (-ulp/2,ulp/2) where ulp is the distance between x0 and x1. -# # ulp = e*eps, with e the next base 2 exponent to zero from x. -# -# # However, there is a special case (aka the "quarter-case") for rounding -# # below ulp/4 when x0 is 2^n for any n (i.e. above an exponent bit flip) -# # due to doubling of ulp towards x1. -# quartercase = sig < eps64_quarter # true for special case false otherwise -# -# # frac is in most cases 0.5 to shift the randum number [0,1) to [-0.5,0.5) -# # However, in the special case frac is (x-x0)/(x1-x0), that means the fraction -# # of the distance where x is in between x0 and x1 -# # Then shift the random number [0,1) to be [-frac/2,-frac/2+ulp/2) -# # such that e.g. x = x0 + ulp/8 gets perturbed to be in [x0+ulp/16,x0+ulp/16+ulp/2) -# # and so the chance of a round-up is indeed 1/8 -# # Illustration, let x be at 1/8, then perturb such that x can be in (--) -# # 1 -- x --1/4-- --1/2-- -- -- -- 2 -# # 1 (-x-----------------) 2 -# # i.e. starting from 1/16 up to 1/2+1/16 -# frac = quartercase ? reinterpret(Float64,F64_one | (sig << 23)) - 1.0 : 0.5 -# eps = quartercase ? epsF32_half : epsF32 -# -# # stochastically perturb x before rounding (equiv to stochastic rounding) -# x += e*eps*(rand(Xor128[],Float64) - frac) -# -# # Round to nearest after stochastic perturbation -# return Float32sr(x) -# end +const F64_one = reinterpret(UInt64,one(Float64)) """Chance that x::Float64 is round up when converted to Float32sr.""" function Float32_chance_roundup(x::Float64) diff --git a/test/bfloat16sr.jl b/test/bfloat16sr.jl index 3bd1008..d1d0e4d 100644 --- a/test/bfloat16sr.jl +++ b/test/bfloat16sr.jl @@ -9,6 +9,21 @@ end @test 0f == BFloat16sr(0) end +@testset "NaN and Inf" begin + @test isnan(NaNB16sr) + @test ~isfinite(NaNB16sr) + @test ~isfinite(InfB16sr) +end + +@testset "No stochastic round to NaN" begin + f1 = nextfloat(0f0) + f2 = prevfloat(0f0) + for i in 1:100 + @test isfinite(BFloat16_stochastic_round(f1)) + @test isfinite(BFloat16_stochastic_round(f2)) + end +end + @testset "Rounding" begin @test 1 == Int(round(BFloat16sr(1.2))) @test 1 == Int(floor(BFloat16sr(1.2))) diff --git a/test/float16sr.jl b/test/float16sr.jl index 76359d2..67735f3 100644 --- a/test/float16sr.jl +++ b/test/float16sr.jl @@ -3,6 +3,21 @@ @test zero(Float16sr) == -(zero(Float16sr)) end +@testset "NaN and Inf" begin + @test isnan(NaN16sr) + @test ~isfinite(NaN16sr) + @test ~isfinite(Inf16sr) +end + +@testset "No stochastic round to NaN" begin + f1 = nextfloat(0f0) + f2 = prevfloat(0f0) + for i in 1:100 + @test isfinite(Float16_stochastic_round(f1)) + @test isfinite(Float16_stochastic_round(f2)) + end +end + @testset "Integer promotion" begin f = Float16sr(1) @test 2f == Float16sr(2) @@ -271,8 +286,6 @@ end # for some reason 0x0200 fails...? for hex in vcat(0x0001:0x01ff,0x0201:0x03ff) # test for all subnormals of Float16 - println(hex) - # add ulp/2 to have stochastic rounding that is 50/50 up/down. x = Float32(reinterpret(Float16,hex)) + ulp_half diff --git a/test/float32sr.jl b/test/float32sr.jl index fb26cbd..11f13a6 100644 --- a/test/float32sr.jl +++ b/test/float32sr.jl @@ -270,3 +270,27 @@ end @test p1/N < 0.55 end end + +@testset "NaN and Inf" begin + @test isnan(NaN32sr) + @test ~isfinite(NaN32sr) + @test ~isfinite(Inf32sr) +end + +@testset "No stochastic round to NaN" begin + f1 = nextfloat(0.0) + f2 = prevfloat(0.0) + for i in 1:1000 + @test isfinite(Float32_stochastic_round(f1)) + @test isfinite(Float32_stochastic_round(f2)) + end +end + +@testset "No stochastic round to NaN" begin + f1 = Float64(floatmax(Float32)) # larger can map to Inf though! + f2 = -f1 + for i in 1:1000 + @test isfinite(Float32_stochastic_round(f1)) + @test isfinite(Float32_stochastic_round(f2)) + end +end