diff --git a/base/gmp.jl b/base/gmp.jl index ddc1256c504ca..dbc46f75a3617 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -374,26 +374,72 @@ function (::Type{T})(n::BigInt, ::RoundingMode{:Up}) where T<:CdoubleMax x < n ? nextfloat(x) : x end -function (::Type{T})(n::BigInt, ::RoundingMode{:Nearest}) where T<:CdoubleMax - x = T(n,RoundToZero) - if maxintfloat(T) <= abs(x) < T(Inf) - r = n-BigInt(x) - h = eps(x)/2 - if iseven(reinterpret(Unsigned,x)) # check if last bit is odd/even - if r < -h - return prevfloat(x) - elseif r > h - return nextfloat(x) - end +function Float64(x::BigInt, ::RoundingMode{:Nearest}) + x == 0 && return 0.0 + xsize = abs(x.size) + if xsize*BITS_PER_LIMB > 1024 + z = Inf64 + elseif xsize == 1 + z = Float64(unsafe_load(x.d)) + elseif Limb == UInt32 && xsize == 2 + z = Float64((unsafe_load(x.d, 2) % UInt64) << BITS_PER_LIMB + unsafe_load(x.d)) + else + y1 = unsafe_load(x.d, xsize) % UInt64 + n = 64 - leading_zeros(y1) + # load first 54(1 + 52 bits of fraction + 1 for rounding) + y = y1 >> (n - (precision(Float64)+1)) + if Limb == UInt64 + y += n > precision(Float64) ? 0 : (unsafe_load(x.d, xsize-1) >> (10+n)) else - if r <= -h - return prevfloat(x) - elseif r >= h - return nextfloat(x) - end + y += (unsafe_load(x.d, xsize-1) % UInt64) >> (n-22) + y += n > (precision(Float64) - 32) ? 0 : (unsafe_load(x.d, xsize-2) >> (10+n)) end + y = (y + 1) >> 1 # round, ties up + y &= ~UInt64(trailing_zeros(x) == (n-54 + (xsize-1)*BITS_PER_LIMB)) # fix last bit to round to even + d = ((n+1021) % UInt64) << 52 + z = reinterpret(Float64, d+y) + z = ldexp(z, (xsize-1)*BITS_PER_LIMB) + end + return flipsign(z, x.size) +end + +function Float32(x::BigInt, ::RoundingMode{:Nearest}) + x == 0 && return 0f0 + xsize = abs(x.size) + if xsize*BITS_PER_LIMB > 128 + z = Inf32 + elseif xsize == 1 + z = Float32(unsafe_load(x.d)) + else + y1 = unsafe_load(x.d, xsize) + n = BITS_PER_LIMB - leading_zeros(y1) + # load first 25(1 + 23 bits of fraction + 1 for rounding) + y = (y1 >> (n - (precision(Float32)+1))) % UInt32 + y += (n > precision(Float32) ? 0 : unsafe_load(x.d, xsize-1) >> (BITS_PER_LIMB - (25-n))) % UInt32 + y = (y + one(UInt32)) >> 1 # round, ties up + y &= ~UInt32(trailing_zeros(x) == (n-25 + (xsize-1)*BITS_PER_LIMB)) # fix last bit to round to even + d = ((n+125) % UInt32) << 23 + z = reinterpret(Float32, d+y) + z = ldexp(z, (xsize-1)*BITS_PER_LIMB) + end + return flipsign(z, x.size) +end + +function Float16(x::BigInt, ::RoundingMode{:Nearest}) + x == 0 && return Float16(0.0) + y1 = unsafe_load(x.d) + n = BITS_PER_LIMB - leading_zeros(y1) + if n > 16 || abs(x.size) > 1 + z = Inf16 + else + # load first 12(1 + 10 bits for fraction + 1 for rounding) + y = (y1 >> (n - (precision(Float16)+1))) % UInt16 + y = (y + one(UInt16)) >> 1 # round, ties up + y &= ~UInt16(trailing_zeros(x) == (n-12)) # fix last bit to round to even + d = ((n+13) % UInt16) << 10 + z = reinterpret(Float16, d+y) end - x + return flipsign(z, x.size) end Float64(n::BigInt) = Float64(n, RoundNearest) diff --git a/test/bigint.jl b/test/bigint.jl index e2427dea09738..9f6e5f39f59b7 100644 --- a/test/bigint.jl +++ b/test/bigint.jl @@ -440,3 +440,33 @@ end @test big(Int64(-9223372036854775808)) == big"-9223372036854775808" @test big(Int128(-170141183460469231731687303715884105728)) == big"-170141183460469231731687303715884105728" end + +@testset "conversion to Float" begin + x = big"2"^256 + big"2"^(256-53) + 1 + @test Float64(x) == reinterpret(Float64, 0x4ff0000000000001) + @test Float64(-x) == -Float64(x) + x = (x >> 1) + 1 + @test Float64(x) == reinterpret(Float64, 0x4fe0000000000001) + @test Float64(-x) == -Float64(x) + + x = big"2"^64 + big"2"^(64-24) + 1 + @test Float32(x) == reinterpret(Float32, 0x5f800001) + @test Float32(-x) == -Float32(x) + x = (x >> 1) + 1 + @test Float32(x) == reinterpret(Float32, 0x5f000001) + @test Float32(-x) == -Float32(x) + + x = big"2"^15 + big"2"^(15-11) + 1 + @test Float16(x) == reinterpret(Float16, 0x7801) + @test Float16(-x) == -Float16(x) + x = (x >> 1) + 1 + @test Float16(x) == reinterpret(Float16, 0x7401) + @test Float16(-x) == -Float16(x) + + for T in (Float16, Float32, Float64) + n = exponent(floatmax(T)) + @test T(big"2"^(n+1)) === T(Inf) + @test T(big"2"^(n+1) - big"2"^(n-precision(T))) === T(Inf) + @test T(big"2"^(n+1) - big"2"^(n-precision(T)) - 1) === floatmax(T) + end +end