From 5639ac657538aeb7791ae749d16b2c7c984d3a32 Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Sun, 7 May 2023 15:53:01 +0200 Subject: [PATCH] Base: correctly rounded floats constructed from rationals Constructing a floating-point number from a `Rational` should now be correctly rounded. Implementation approach: 1. Convert the (numerator, denominator) pair to a (sign bit, integral significand, exponent) triplet using integer arithmetic. The integer type in question must be wide enough. 2. Convert the above triplet into an instance of the chosen FP type. There is special support for IEEE 754 floating-point and for `BigFloat`, otherwise a fallback using `ldexp` is used. As a bonus, constructing a `BigFloat` from a `Rational` should now be thread-safe when the rounding mode and precision are provided to the constructor, because there is no access to the global precision or rounding mode settings. Updates #45213 Updates #50940 Updates #52507 Fixes #52394 Closes #52395 Fixes #52859 --- base/Base.jl | 1 + base/docs/basedocs.jl | 19 +++ base/mpfr.jl | 52 +++++- base/rational.jl | 26 +-- base/rational_to_float.jl | 209 ++++++++++++++++++++++++ test/choosetests.jl | 2 +- test/rational.jl | 333 ++++++++++++++++++++++++++++++++++++++ test/rational_to_float.jl | 160 ++++++++++++++++++ 8 files changed, 782 insertions(+), 20 deletions(-) create mode 100644 base/rational_to_float.jl create mode 100644 test/rational_to_float.jl diff --git a/base/Base.jl b/base/Base.jl index 3b95b44a33f0b..0c1d8629c71f7 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -284,6 +284,7 @@ include("rounding.jl") include("div.jl") include("rawbigints.jl") include("float.jl") +include("rational_to_float.jl") include("twiceprecision.jl") include("complex.jl") include("rational.jl") diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index cc2d32f6b3deb..c30dc1f413bfa 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -3049,6 +3049,25 @@ julia> 4/2 julia> 4.5/2 2.25 ``` + +This function may convert integer arguments to a floating-point number type +([`AbstractFloat`](@ref)), potentially resulting in a loss of accuracy. To avoid this, +instead construct a [`Rational`](@ref) from the arguments, then convert the resulting +rational number to a specific floating-point type of your choice: + +```jldoctest +julia> n = 100000000000000000 +100000000000000000 + +julia> m = n + 6 +100000000000000006 + +julia> n/m +1.0 + +julia> Float64(n//m) # `//` constructs a `Rational` +0.9999999999999999 +``` """ /(x, y) diff --git a/base/mpfr.jl b/base/mpfr.jl index e61f115b35093..10c4ea0bbb1fe 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -19,7 +19,8 @@ import isone, big, _string_n, decompose, minmax, _precision_with_base_2, sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand, uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask, - RawBigIntRoundingIncrementHelper, truncated, RawBigInt + RawBigIntRoundingIncrementHelper, truncated, RawBigInt, RationalToFloat, + rational_to_floating_point using .Base.Libc @@ -317,12 +318,51 @@ BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=rounding_raw(BigFloa BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) = BigFloat(Float64(x), r; precision=precision) -function BigFloat(x::Rational, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) - setprecision(BigFloat, precision) do - setrounding_raw(BigFloat, r) do - BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat +function set_2exp!(z::BigFloat, n::BigInt, exp::Int, rm::MPFRRoundingMode) + ccall( + (:mpfr_set_z_2exp, libmpfr), + Int32, + (Ref{BigFloat}, Ref{BigInt}, Int, MPFRRoundingMode), + z, n, exp, rm, + ) + nothing +end + +function RationalToFloat.to_floating_point_impl(::Type{BigFloat}, ::Type{BigInt}, num, den, romo, prec) + num_is_zero = iszero(num) + den_is_zero = iszero(den) + s = Int8(sign(num)) + sb = signbit(s) + is_zero = num_is_zero & !den_is_zero + is_inf = !num_is_zero & den_is_zero + is_regular = !num_is_zero & !den_is_zero + + if is_regular + let rtfc = RationalToFloat.to_float_components + c = rtfc(BigInt, num, den, prec, nothing, romo, sb) + ret = BigFloat(precision = prec) + mpfr_romo = convert(MPFRRoundingMode, romo) + set_2exp!(ret, s * c.integral_significand, Int(c.exponent - prec + true), mpfr_romo) + ret end - end + else + if is_zero + BigFloat(false, MPFRRoundToZero, precision = prec) + elseif is_inf + BigFloat(s * Inf, MPFRRoundToZero, precision = prec) + else + BigFloat(precision = prec) + end + end::BigFloat +end + +function BigFloat(x::Rational, r::RoundingMode; precision::Integer = DEFAULT_PRECISION[]) + rational_to_floating_point(BigFloat, x, r, precision) +end + +function BigFloat(x::Rational, r::MPFRRoundingMode = ROUNDING_MODE[]; + precision::Integer = DEFAULT_PRECISION[]) + rational_to_floating_point(BigFloat, x, r, precision) end function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=_precision_with_base_2(BigFloat), rounding::MPFRRoundingMode=rounding_raw(BigFloat)) diff --git a/base/rational.jl b/base/rational.jl index 00a630c396c8c..5b38550c485bb 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -151,19 +151,19 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true : (::Type{T})(x::Rational) where {T<:Integer} = (isinteger(x) ? convert(T, x.num)::T : throw(InexactError(nameof(T), T, x))) -AbstractFloat(x::Rational) = (float(x.num)/float(x.den))::AbstractFloat -function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S - P = promote_type(T,S) - convert(T, convert(P,x.num)/convert(P,x.den))::T -end - # avoid spurious overflow (#52394). (Needed for UInt16 or larger; - # we also include Int16 for consistency of accuracy.) -Float16(x::Rational{<:Union{Int16,Int32,Int64,UInt16,UInt32,UInt64}}) = - Float16(Float32(x)) -Float16(x::Rational{<:Union{Int128,UInt128}}) = - Float16(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency -Float32(x::Rational{<:Union{Int128,UInt128}}) = - Float32(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency +function rational_to_floating_point(::Type{F}, x, rm, prec) where {F} + nd = (numerator(x), denominator(x)) + RationalToFloat.to_floating_point(F, nd..., rm, prec)::F +end + +function (::Type{F})(x::Rational, rm::RoundingMode = RoundNearest) where {F<:AbstractFloat} + rational_to_floating_point(F, x, rm, precision(F))::F +end + +function AbstractFloat(x::Q) where {Q<:Rational} + T = float(Q) + T(x)::T::AbstractFloat +end function Rational{T}(x::AbstractFloat) where T<:Integer r = rationalize(T, x, tol=0) diff --git a/base/rational_to_float.jl b/base/rational_to_float.jl new file mode 100644 index 0000000000000..973c329838355 --- /dev/null +++ b/base/rational_to_float.jl @@ -0,0 +1,209 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module RationalToFloat + +# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed +# to compile to a single instruction, because the semantics correspond +# to what hardware usually provides. +function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger} + @inline begin + mask = 8*sizeof(T) - 1 + c = b & mask + shift(a, c) + end +end + +machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported") + +# Fallback for `BigInt` etc. +machine_shift(shift::S, a, b) where {S} = shift(a, b) + +# Arguments are positive integers. +function div_significand_with_remainder(num, den, minimum_significand_size) + clamped = x -> max(zero(x), x)::typeof(x) + bw = Base.top_set_bit # bit width + shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2) + t = machine_shift(<<, num, shift) + (divrem(t, den, RoundToZero)..., shift) +end + +# `divrem(n, 1<>>, n, k) + tmp = machine_shift(<<, quo, k) + rem = n - tmp + (quo, rem) +end + +function to_float_components_impl(num, den, precision, max_subnormal_exp) + # `+1` because we need an extra, "round", bit for some rounding modes. + # + # TODO: as a performance optimization, only do this when required + # by the rounding mode + prec_p_1 = precision + true + + (quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1) + width = Base.top_set_bit(quo0) + excess_width = width - prec_p_1 + exp = width - shift - true + + exp_underflow = if isnothing(max_subnormal_exp) + zero(exp) + else + let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T + (signbit(d) ? z : d + true)::T + end + end + + (quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width) + integral_significand = quo1 >>> true + round_bit = quo1 % Bool + sticky_bit = !iszero(rem1) | !iszero(rem0) + + (; integral_significand, exponent = exp, round_bit, sticky_bit) +end + +struct RoundingIncrementHelper + final_bit::Bool + round_bit::Bool + sticky_bit::Bool +end + +(h::RoundingIncrementHelper)(::Base.Rounding.FinalBit) = h.final_bit +(h::RoundingIncrementHelper)(::Base.Rounding.RoundBit) = h.round_bit +(h::RoundingIncrementHelper)(::Base.Rounding.StickyBit) = h.sticky_bit + +function to_float_components_rounded(num, den, precision, max_subnormal_exp, romo, sign_bit) + overflows = (x, p) -> x == machine_shift(<<, one(x), p) + t = to_float_components_impl(num, den, precision, max_subnormal_exp) + raw_significand = t.integral_significand + rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit) + incr = Base.Rounding.correct_rounding_requires_increment(rh, romo, sign_bit) + rounded = raw_significand + incr + (integral_significand, exponent) = let exp = t.exponent + if overflows(rounded, precision) + (rounded >>> true, exp + true) + else + (rounded, exp) + end + end + (; integral_significand, exponent) +end + +function to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T} + to_float_components_rounded(abs(T(num)), den, precision, max_subnormal_exp, romo, sb) +end + +function to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S} + num_is_zero = iszero(num) + den_is_zero = iszero(den) + sb = signbit(num) + is_zero = num_is_zero & !den_is_zero + is_inf = !num_is_zero & den_is_zero + is_regular = !num_is_zero & !den_is_zero + if is_regular + let + c = to_float_components(S, num, den, prec, nothing, rm, sb) + signif = T(c.integral_significand)::T + x = ldexp(signif, c.exponent - prec + true)::T + sb ? -x : x + end::T + else + if is_zero + zero(T)::T + elseif is_inf + T(Inf)::T + else + T(NaN)::T + end + end::T +end + +function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S} + to_floating_point_fallback(T, S, num, den, rm, prec) +end + +function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S} + num_is_zero = iszero(num) + den_is_zero = iszero(den) + sb = signbit(num) + is_zero = num_is_zero & !den_is_zero + is_inf = !num_is_zero & den_is_zero + is_regular = !num_is_zero & !den_is_zero + Rnd = Base.Rounding + (rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm) + (false, false) + else + let from = Rnd.rounds_away_from_zero(rm, sb) + (!from, from) + end + end::NTuple{2,Bool} + exp_max = Base.exponent_max(T) + exp_min = Base.exponent_min(T) + ieee_repr = Base.ieee754_representation + repr_zero = ieee_repr(T, sb, Val(:zero)) + repr_inf = ieee_repr(T, sb, Val(:inf)) + repr_nan = ieee_repr(T, sb, Val(:nan)) + U = typeof(repr_zero) + repr_zero::U + repr_inf::U + repr_nan::U + + ret_u = if is_regular + let + c = to_float_components(S, num, den, prec, exp_min - 1, rm, sb) + exp = c.exponent + exp_diff = exp - exp_min + is_normal = 0 ≤ exp_diff + exp_is_huge_p = exp_max < exp + exp_is_huge_n = signbit(exp_diff + prec) + rounds_to_inf = exp_is_huge_p & !rm_is_to_zero + rounds_to_zero = exp_is_huge_n & !rm_is_from_zero + + if !rounds_to_zero & !exp_is_huge_p + let signif = (c.integral_significand % U) & Base.significand_mask(T) + exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U + ieee_repr(T, sb, exp_field, signif)::U + end + elseif rounds_to_zero + repr_zero + elseif rounds_to_inf + repr_inf + else + ieee_repr(T, sb, Val(:omega)) + end + end + else + if is_zero + repr_zero + elseif is_inf + repr_inf + else + repr_nan + end + end::U + + reinterpret(T, ret_u)::T +end + +# `BigInt` is a safe default. +to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt + +const BitIntegerOrBool = Union{Bool,Base.BitInteger} + +# As an optimization, use an integer type narrower than `BigInt` when possible. +function to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool} + Max = if sizeof(F) ≤ sizeof(S) + S + else + (S <: Signed) ? Base.inttype(F) : Base.uinttype(F) + end + widen(Max) +end + +function to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T} + S = to_float_promote_type(F, T) + to_floating_point_impl(F, S, num, den, rm, prec) +end + +end diff --git a/test/choosetests.jl b/test/choosetests.jl index 96d230d185c71..6d47ebc560ee1 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -11,7 +11,7 @@ const TESTNAMES = [ "char", "strings", "triplequote", "unicode", "intrinsics", "dict", "hashing", "iobuffer", "staged", "offsetarray", "arrayops", "tuple", "reduce", "reducedim", "abstractarray", - "intfuncs", "simdloop", "vecelement", "rational", + "intfuncs", "simdloop", "vecelement", "rational", "rational_to_float", "bitarray", "copy", "math", "fastmath", "functional", "iterators", "operators", "ordering", "path", "ccall", "parse", "loading", "gmp", "sorting", "spawn", "backtrace", "exceptions", diff --git a/test/rational.jl b/test/rational.jl index c6f81372de0b9..68dd59922a5ef 100644 --- a/test/rational.jl +++ b/test/rational.jl @@ -801,3 +801,336 @@ end @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im) end + +function naive_rational_to_bigfloat(x) + f = let x = x + () -> BigFloat(numerator(x))/denominator(x) + end + setprecision(f, BigFloat, 1024) +end + +function test_rational_to_float_systematic_with_naive_impl(::Type{F}, q, rm) where {F} + no_alloc_rat = let + f(::Rational{Bool}) = true + function f(x) + if (sizeof(Int) < 8) && (8 < sizeof(x)) + # `div(::Int128, ::Int128)` is implemented via `BigInt`on 32-bit Julia + false + else + isbits(widen(x)) + end + end + f + end + want_no_alloc = no_alloc_rat(q) && isbitstype(widen(F)) + want_no_alloc && @test iszero(@allocated F(q, rm)) + @test F(naive_rational_to_bigfloat(q), rm) == @inferred F(q, rm) +end + +function test_rational_to_float_systematic_with_naive_impl(::Type{BigFloat}, q, rm, prec) + reference = BigFloat(naive_rational_to_bigfloat(q), rm, precision = prec) + @test precision(reference) == prec + t = let reference = reference, prec = prec + function(res) + @test res == reference + @test precision(res) == prec + end + end + + t(BigFloat(q, rm, precision = prec)) + + let + res = setprecision(BigFloat, prec) do + BigFloat(q, rm) + end + t(res) + end + + let + if rm isa RoundingMode + res = setrounding(BigFloat, rm) do + BigFloat(q, precision = prec) + end + t(res) + end + end +end + +function test_rational_to_float_systematic_with_naive_impl(::Type{BigFloat}, q, rm) + for prec ∈ vcat(Base.OneTo(5), 256, 512) + test_rational_to_float_systematic_with_naive_impl(BigFloat, q, rm, prec) + end +end + +function test_rational_to_float_systematic_with_naive_impl(::Type{F}, q) where {F} + M = Base.MPFR + mpfr_rms = ( + M.MPFRRoundDown, M.MPFRRoundUp, M.MPFRRoundFromZero, + M.MPFRRoundToZero, M.MPFRRoundNearest + ) + rms = ( + RoundDown, RoundFromZero, RoundNearest, RoundToZero, RoundUp, + # XXX: these could be tested, but would require more effort to + # test, as MPFR doesn't support them: + # + # ```julia + # RoundNearestTiesAway, RoundNearestTiesUp, + # ``` + ((F <: BigFloat) ? mpfr_rms : ())... + ) + for rm ∈ rms + test_rational_to_float_systematic_with_naive_impl(F, q, rm) + end +end + +function min_no_promotion_impl(isless::F, a::T, b) where {F, T} + if isless(b, a) + T(b)::T + else + a + end +end +min_no_promotion(a::T, b) where {T} = min_no_promotion_impl(isless, a, b) +max_no_promotion(a::T, b) where {T} = min_no_promotion_impl(((l, r) -> isless(r, l)), a, b) + +safe_typemin(::Type{T}) where {T} = typemin(T)::T +safe_typemax(::Type{T}) where {T} = typemax(T)::T + +safe_typemin(::Type{BigInt}) = -safe_typemax(BigInt) - 1 +safe_typemax(::Type{BigInt}) = (one(BigInt) << 511) - 1 + +function test_rational_to_float_systematic_with_naive(::Type{F}, ::Type{T}) where {F,T} + range_typemin = function(x) + l = safe_typemin(T) + u = safe_typemax(T) + b = min_no_promotion(u, l + x) + l:b + end + + range_typemax = function(x) + l = safe_typemin(T) + u = safe_typemax(T) + a = max_no_promotion(l, u - x) + a:u + end + + range_zero = function(x) + l = safe_typemin(T) + u = safe_typemax(T) + a = max_no_promotion(l, -x) + b = min_no_promotion(u, x) + a:b + end + + k = 20 + nums = Vector{T}() + dens = Vector{T}() + + append!(nums, range_typemin(k)) + + for x ∈ range_zero(k) + push!(nums, x) + (0 < x) && push!(dens, x) + end + + for x ∈ range_typemax(k) + push!(nums, x) + (0 < x) && push!(dens, x) + end + + sort!(nums) + sort!(dens) + unique!(nums) + unique!(dens) + + for num ∈ nums + for den ∈ dens + # XXX: shift `n` and `d` to vary the FP exponent more + test_rational_to_float_systematic_with_naive_impl(F, num//den) + end + end +end + +function test_rational_to_float_ldexp_fallback(::Type{F}, ::Type{T}) where {F,T} + l = max_no_promotion(safe_typemin(T), -5) + u = min_no_promotion(safe_typemax(T), 5) + prec = precision(F) + rm = RoundNearest + rtfp = Base.RationalToFloat.to_floating_point_fallback + for num ∈ l:u + for den ∈ true:u + @test F(num//den) == rtfp(F, BigInt, num, den, rm, prec) + end + end +end + +function test_rational_to_float_correctly_rounded_up_down(x, f, ::Val{:down}) + @test f ≤ x < nextfloat(f) +end + +function test_rational_to_float_correctly_rounded_up_down(x, f, ::Val{:up}) + @test prevfloat(f) < x ≤ f +end + +function test_rational_to_float_simplest_rms_up_down(::Type{F}, ::Type{T}, rm, sb, ::V) where {F,T,V<:Val} + range_typemax = function(x) + l = one(T)::T + u = safe_typemax(T) + a = max_no_promotion(l, u - x) + a:u + end + + range_zero = function(x) + u = safe_typemax(T) + b = min_no_promotion(u, x) + true:b + end + + k = 20 + nds = Vector{T}() + append!(nds, range_zero(k)) + append!(nds, range_typemax(k)) + sort!(nds) + unique!(nds) + + for num ∈ nds + for den ∈ nds + x_pos = num // den + x = sb ? -x_pos : x_pos + f = F(x, rm) + test_rational_to_float_correctly_rounded_up_down(x, f, V()) + end + end +end + +function test_rational_to_float_simplest_rms(::Type{F}, ::Type{T}) where {F,T} + M = Base.MPFR + rms_up = (RoundUp, ((F <: BigFloat) ? (M.MPFRRoundUp,) : ())...) + rms_down = (RoundDown, ((F <: BigFloat) ? (M.MPFRRoundDown,) : ())...) + rms_fz = (RoundFromZero, ((F <: BigFloat) ? (M.MPFRRoundFromZero,) : ())...) + rms_tz = (RoundToZero, ((F <: BigFloat) ? (M.MPFRRoundToZero,) : ())...) + + if T <: Signed + @testset "negatives" begin + @testset "round down" begin + for rm ∈ (rms_fz..., rms_down...) + test_rational_to_float_simplest_rms_up_down(F, T, rm, true, Val(:down)) + end + end + @testset "round up" begin + for rm ∈ (rms_tz..., rms_up...) + test_rational_to_float_simplest_rms_up_down(F, T, rm, true, Val(:up)) + end + end + end + end + + @testset "positives" begin + @testset "round down" begin + for rm ∈ (rms_tz..., rms_down...) + test_rational_to_float_simplest_rms_up_down(F, T, rm, false, Val(:down)) + end + end + @testset "round up" begin + for rm ∈ (rms_fz..., rms_up...) + test_rational_to_float_simplest_rms_up_down(F, T, rm, false, Val(:up)) + end + end + end +end + +struct AbstractFloatFallback <: AbstractFloat + x::Float32 + AbstractFloatFallback(x::Float32) = new(x) +end +AbstractFloatFallback(n::Integer) = AbstractFloatFallback(Float32(n)) +Base.precision(::Type{AbstractFloatFallback}) = 10 +Base.ldexp(x::AbstractFloatFallback, n::Integer) = AbstractFloatFallback(ldexp(x.x, n)) +Base.:-(x::AbstractFloatFallback) = AbstractFloatFallback(-x.x) + +struct IntegerFallback <: Integer + n::Int + IntegerFallback(n::Int) = new(n) +end +Base.signbit(n::IntegerFallback) = signbit(n.n) +Base.BigInt(n::IntegerFallback) = BigInt(n.n) +Base.oneunit(n::IntegerFallback) = IntegerFallback(oneunit(n.n)) +Base.:+(l::IntegerFallback, r::IntegerFallback) = IntegerFallback(l.n + r.n) +Base.log2(n::IntegerFallback) = log2(n.n) + +@testset "rational number to floating-point number" begin + @testset "defined for unknown types" begin + Floats = (Float16, AbstractFloatFallback) + rationals = (3//1, Base.unsafe_rational(IntegerFallback(3), IntegerFallback(1))) + @testset "F: $F" for F ∈ Floats + @testset "q: $q" for q ∈ rationals + if F <: AbstractFloatFallback + @test F(q).x == 3 + else + @test F(q) == 3 + end + end + end + end + + Us = (UInt8, UInt16, UInt32, UInt64, UInt128) + Ss = (Int8, Int16, Int32, Int64, Int128) + Ts = (Bool, Us..., Ss..., BigInt) + Fs = (Float16, Float32, Float64, BigFloat) + + @testset "`Rational` parameterized by `Union`" begin + @testset "F: $F" for F ∈ Fs + @testset "T: $T" for T ∈ Ts + Q = Rational{Union{Bool,T}} + o = one(T) + @test isone(F(Q(o, o))) + @test isone(F(Q(o, true))) + @test isone(F(Q(true, o))) + @test isone(F(Q(true, true))) + end + end + end + + @testset "defined method with default rounding mode" begin + @testset "F: $F" for F ∈ Fs + @testset "T: $T" for T ∈ Ts + @test isone(F(one(Rational{T}))) + end + end + end + + @testset "the simplest rounding modes; up, down" begin + @testset "F: $F" for F ∈ Fs + @testset "T: $T" for T ∈ Ts + test_rational_to_float_simplest_rms(F, T) + end + end + end + + @testset "with naive conversion via `BigFloat`" begin + @testset "F: $F" for F ∈ Fs + @testset "T: $T" for T ∈ Ts + test_rational_to_float_systematic_with_naive(F, T) + end + end + end + + @testset "`ldexp`-based fallback" begin + @testset "F: $F" for F ∈ Fs + @testset "T: $T" for T ∈ Ts + test_rational_to_float_ldexp_fallback(F, T) + end + end + end + + @testset "special" begin + @test Float16(9//70000) === Float16(0.0001286) + @test Float32(16777216//16777217) < 1 + @test float(-9223372036854775808//9223372036854775807) == -1 + + # issue #52394 + @test Float16(10^8 // (10^9 + 1)) === convert(Float16, 10^8 // (10^9 + 1)) === Float16(0.1) + @test Float16((typemax(UInt128)-0x01) // typemax(UInt128)) === Float16(1.0) + @test Float32((typemax(UInt128)-0x01) // typemax(UInt128)) === Float32(1.0) + end +end diff --git a/test/rational_to_float.jl b/test/rational_to_float.jl new file mode 100644 index 0000000000000..c8fcda8abfd2c --- /dev/null +++ b/test/rational_to_float.jl @@ -0,0 +1,160 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +using Test + +function div_significand_with_remainder_naive(num, den, minimum_significand_size) + dr = let den = den + n -> divrem(n, den, RoundToZero) + end + + shift = 0 + (quo, rem) = dr(num << shift) + + # bit width + bw = Base.top_set_bit + + while bw(quo) < minimum_significand_size + shift += true + (quo, rem) = dr(num << shift) + end + + # suboptimal but allowed results + (q1, r1) = dr(num << (shift + 1)) + (q2, r2) = dr(num << (shift + 2)) + (q3, r3) = dr(num << (shift + 3)) + + ( + (quo, rem, shift), # optimal result + (q1, r1, shift + 1), + (q2, r2, shift + 2), + (q3, r3, shift + 3) + ) +end + +function test_div_significand_with_remainder(n, d, mss) + (0 < n) || error("unsupported") + (0 < d) || error("unsupported") + (0 < mss) || error("unsupported") + bw = Base.top_set_bit + wid = x -> widen(widen(x)) + t = let n = wid(n), d = wid(d) + function(q_narrow, r_narrow, sh) + q = wid(q_narrow) + r = wid(r_narrow) + @test q + r/d ≈ n*(2.0^sh)/d + @test floor(Int, log2(abs(q))) == exponent(q) == bw(q) - 1 == sh + exponent(n/d) + end + end + dswr = Base.RationalToFloat.div_significand_with_remainder + a = @inferred dswr(n, d, mss) + (a0, a1, a2, a3) = div_significand_with_remainder_naive(n, d, mss) + @test a ∈ (a0, a1, a2, a3) + t(a...) + t(a0...) + t(a1...) + t(a2...) + t(a3...) + nothing +end + +function test_div_significand_with_remainder_iters(::Type{T}, nds, msss) where {T} + @testset "mss: $mss" for mss ∈ msss + @testset "num: $num" for num ∈ nds + @testset "den: $den" for den ∈ nds + # XXX: shift `n` and `d` to vary the FP exponent more + n = T(num)::T + d = T(den)::T + test_div_significand_with_remainder(widen(n), d, mss) + end + end + end +end + +max_mss_for_type(::Type{T}) where {T<:Unsigned} = sizeof(T)*8 - 3 +max_mss_for_type(::Type{T}) where {T<:Signed} = sizeof(T)*8 - 4 +max_mss_for_type(::Type{BigInt}) = 200 # just a big value + +max_nd_for_type(::Type{T}) where {T} = typemax(T) +max_nd_for_type(::Type{BigInt}) = BigInt(100000) # just a big value + +function min_no_promotion_impl(isless::F, a::T, b) where {F, T} + if isless(b, a) + T(b)::T + else + a + end +end +min_no_promotion(a::T, b) where {T} = min_no_promotion_impl(isless, a, b) +max_no_promotion(a::T, b) where {T} = min_no_promotion_impl(((l, r) -> isless(r, l)), a, b) + +function test_div_significand_with_remainder_iters(::Type{T}) where {T} + range_typemax = function(x) + l = one(T)::T + u = max_nd_for_type(T)::T + a = max_no_promotion(l, u - x) + a:u + end + + range_zero = function(x) + u = max_nd_for_type(T)::T + b = min_no_promotion(u, x) + Base.OneTo(b) + end + + k = 30 + nds = Vector{T}() + append!(nds, range_zero(k)) + append!(nds, range_typemax(k)) + sort!(nds) + unique!(nds) + + msss = Vector{Int}() + max_mss = max_mss_for_type(T) + append!(msss, intersect(Base.OneTo(6), Base.OneTo(max_mss))) + push!(msss, max_mss) + sort!(msss) + unique!(msss) + + test_div_significand_with_remainder_iters(T, nds, msss) +end + +function test_divrem_2(::Type{T}) where {T} + dr = Base.RationalToFloat.divrem_2 + @testset "n: $n" for n ∈ false:T(typemax(Int8)) + @testset "k: $k" for k ∈ 0:6 + @test dr(n, k) == divrem(n, 1 << k, RoundToZero) + end + end +end + +function test_machine_shift(shift::S, ::Type{T}) where {S,T} + @testset "k: $k" for k ∈ 0:(sizeof(T)*8 - 1) + ms = Base.RationalToFloat.machine_shift + o = one(T) + @test ms(shift, o, k) == shift(o, k) + end +end + +const Us = (UInt8, UInt16, UInt32, UInt64, UInt128) +const Ss = (Int8, Int16, Int32, Int64, Int128) +const Ts = (Us..., Ss..., BigInt) + +@testset "machine_shift" begin + @testset "T: $T" for T ∈ Ts + @testset "shift: $shift" for shift ∈ (<<, >>>) + test_machine_shift(shift, T) + end + end +end + +@testset "divrem_2" begin + @testset "T: $T" for T ∈ Ts + test_divrem_2(T) + end +end + +@testset "div_significand_with_remainder" begin + @testset "T: $T" for T ∈ Ts + test_div_significand_with_remainder_iters(T) + end +end