diff --git a/base/float.jl b/base/float.jl index fad7146655ade..4fe7bf48a7975 100644 --- a/base/float.jl +++ b/base/float.jl @@ -148,6 +148,67 @@ signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the r uabs(x::Integer) = abs(x) uabs(x::BitSigned) = unsigned(abs(x)) +function float_representation( + ::Type{F}, + signbit::Bool, exponent_field::Integer, mantissa_field::Integer, +) where {F<:IEEEFloat} + T = uinttype(F) + sign_and_exp = (T(signbit) << exponent_bits(F)) | T(exponent_field) + ret = (sign_and_exp << significand_bits(F)) | T(mantissa_field) + ret::T +end + +float_representation_of_infinity(::Type{F}, signbit::Bool) where {F<:IEEEFloat} = + float_representation(F, signbit, exponent_raw_max(F), false) + +float_representation_of_zero(::Type{F}, signbit::Bool) where {F<:IEEEFloat} = + float_representation(F, signbit, false, false) + +function float_representation_from_components( + ::Type{F}, + sign::Real, exp::Integer, mantissa::Integer, +) where {F<:IEEEFloat} + T = uinttype(F) + sb = signbit(sign) + + iszero(sign) && return float_representation_of_zero(F, sb) + + normalized_exp = exp + significand_bits(F) + + if exponent_max(F) < normalized_exp + # overflow (infinity) + float_representation_of_infinity(F, sb) + elseif normalized_exp < true - exponent_bias(F) + # underflow (subnormal or zero) + ed = true - exponent_bias(F) - normalized_exp + float_representation(F, sb, false, mantissa >> ed) + else + # normal: `true - exponent_bias(F) ≤ normalized_exp ≤ exponent_max(F)` + mantissa_field = T(mantissa) & significand_mask(F) # clear the leading set bit + e = normalized_exp + exponent_bias(F) + float_representation(F, sb, e, mantissa_field) + end +end + +float_from_components( + ::Type{F}, + sign::Real, exp::Integer, mantissa::Integer, +) where {F<:IEEEFloat} = + reinterpret(F, float_representation_from_components(F, sign, exp, mantissa)) + +# The input parameters represent the number `sign * 2^exp * mantissa`, +# let's call it `n`. The sign is expected to be an integer between `-1` +# and `1`, and the mantissa is expected to be as wide as the mantissa +# of the floating-point type `F`, not counting the leading bit. Let's +# call the number `x`. +# +# Returns the same value as `ldexp(sign * F(mantissa), exp)` +float_from_components( + ::Type{F}, + sign::Real, exp::Integer, mantissa::Integer, +) where {F<:AbstractFloat} = + ldexp(sign * F(mantissa), exp) + ## conversions to floating-point ## # TODO: deprecate in 2.0 diff --git a/base/rational.jl b/base/rational.jl index e77e8a31aef03..7ffbd46e95402 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -360,11 +360,10 @@ function rational_to_float_impl( T, RoundNearest, ) - mantissa = to_float(components.mantissa) - - # TODO: `ldexp` could be replaced with a mere bit of bit twiddling - # in the case of `Float16`, `Float32`, `Float64` - ret = ldexp(s * mantissa, components.exponent) + ret = float_from_components( + typeof(to_float(false)), + s, components.exponent, components.mantissa, + ) # TODO: faster? if iszero(ret) | issubnormal(ret) @@ -377,11 +376,10 @@ function rational_to_float_impl( T, RoundToZero, ) - mantissa = to_float(components.mantissa | !components.is_exact) - - # TODO: `ldexp` could be replaced with a mere bit of bit - # twiddling in the case of `Float16`, `Float32`, `Float64` - ret = ldexp(s * mantissa, components.exponent) + ret = float_from_components( + typeof(to_float(false)), + s, components.exponent, components.mantissa | !components.is_exact, + ) end ret