Skip to content

Commit

Permalink
WIP: use bit twiddling instead of ldexp for IEEE floats
Browse files Browse the repository at this point in the history
  • Loading branch information
nsajko committed May 12, 2023
1 parent c517246 commit 50d5aa8
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 10 deletions.
61 changes: 61 additions & 0 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,67 @@ signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the r
uabs(x::Integer) = abs(x)
uabs(x::BitSigned) = unsigned(abs(x))

function float_representation(
::Type{F},
signbit::Bool, exponent_field::Integer, mantissa_field::Integer,
) where {F<:IEEEFloat}
T = uinttype(F)
sign_and_exp = (T(signbit) << exponent_bits(F)) | T(exponent_field)
ret = (sign_and_exp << significand_bits(F)) | T(mantissa_field)
ret::T
end

float_representation_of_infinity(::Type{F}, signbit::Bool) where {F<:IEEEFloat} =
float_representation(F, signbit, exponent_raw_max(F), false)

float_representation_of_zero(::Type{F}, signbit::Bool) where {F<:IEEEFloat} =
float_representation(F, signbit, false, false)

function float_representation_from_components(
::Type{F},
sign::Real, exp::Integer, mantissa::Integer,
) where {F<:IEEEFloat}
T = uinttype(F)
sb = signbit(sign)

iszero(sign) && return float_representation_of_zero(F, sb)

normalized_exp = exp + significand_bits(F)

if exponent_max(F) < normalized_exp
# overflow (infinity)
float_representation_of_infinity(F, sb)
elseif normalized_exp < true - exponent_bias(F)
# underflow (subnormal or zero)
ed = true - exponent_bias(F) - normalized_exp
float_representation(F, sb, false, mantissa >> ed)
else
# normal: `true - exponent_bias(F) ≤ normalized_exp ≤ exponent_max(F)`
mantissa_field = T(mantissa) & significand_mask(F) # clear the leading set bit
e = normalized_exp + exponent_bias(F)
float_representation(F, sb, e, mantissa_field)
end
end

float_from_components(
::Type{F},
sign::Real, exp::Integer, mantissa::Integer,
) where {F<:IEEEFloat} =
reinterpret(F, float_representation_from_components(F, sign, exp, mantissa))

# The input parameters represent the number `sign * 2^exp * mantissa`,
# let's call it `n`. The sign is expected to be an integer between `-1`
# and `1`, and the mantissa is expected to be as wide as the mantissa
# of the floating-point type `F`, not counting the leading bit. Let's
# call the number `x`.
#
# Returns the same value as `ldexp(sign * F(mantissa), exp)`
float_from_components(
::Type{F},
sign::Real, exp::Integer, mantissa::Integer,
) where {F<:AbstractFloat} =
ldexp(sign * F(mantissa), exp)

## conversions to floating-point ##

# TODO: deprecate in 2.0
Expand Down
18 changes: 8 additions & 10 deletions base/rational.jl
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,10 @@ function rational_to_float_impl(
T,
RoundNearest,
)
mantissa = to_float(components.mantissa)

# TODO: `ldexp` could be replaced with a mere bit of bit twiddling
# in the case of `Float16`, `Float32`, `Float64`
ret = ldexp(s * mantissa, components.exponent)
ret = float_from_components(
typeof(to_float(false)),
s, components.exponent, components.mantissa,
)

# TODO: faster?
if iszero(ret) | issubnormal(ret)
Expand All @@ -377,11 +376,10 @@ function rational_to_float_impl(
T,
RoundToZero,
)
mantissa = to_float(components.mantissa | !components.is_exact)

# TODO: `ldexp` could be replaced with a mere bit of bit
# twiddling in the case of `Float16`, `Float32`, `Float64`
ret = ldexp(s * mantissa, components.exponent)
ret = float_from_components(
typeof(to_float(false)),
s, components.exponent, components.mantissa | !components.is_exact,
)
end

ret
Expand Down

0 comments on commit 50d5aa8

Please sign in to comment.