From 02f442413c9b4158cc0d1c431f5aeab5445d3b71 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Thu, 28 Oct 2021 14:02:18 -0400 Subject: [PATCH 1/7] fixed Float16 from Float64 and BigFloat --- base/mpfr.jl | 7 +++++-- src/runtime_intrinsics.c | 9 ++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/base/mpfr.jl b/base/mpfr.jl index 99c9b09416898..cbb88cb49b2f0 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -338,8 +338,11 @@ Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) = _cpynansgn(ccall((:mpfr_get_flt,:libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x) Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r)) -# TODO: avoid double rounding -Float16(x::BigFloat) = Float16(Float64(x)) +function Float16(x::BigFloat) :: Float16 + res = Float32(x) + reinterpret(UInt32, res) & 0x1fff != 0x1000 && return res + return nextfloat(res, cmp(x, res)) +end promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 657c750ee76d6..feeb223785ee1 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -205,7 +205,14 @@ JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param) JL_DLLEXPORT uint16_t __truncdfhf2(double param) { - return float_to_half((float)param); + float res = (float)param; + uint32_t resi; + memcpy(&resi, &res, sizeof(res)); + if ((resi & 0x1fff) == 0x1000) { + resi += (res < param) - (param < res); + memcpy(&res, &resi, sizeof(res)); + } + return float_to_half(res); } #endif From 847b086cb9ff88db4ff207853dc75bc1fd97b8e6 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Fri, 29 Oct 2021 17:31:08 -0400 Subject: [PATCH 2/7] fix impl, add tests --- base/mpfr.jl | 11 +++++++++-- src/runtime_intrinsics.c | 11 +++++++++-- test/float16.jl | 23 +++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/base/mpfr.jl b/base/mpfr.jl index cbb88cb49b2f0..2d70fd00c82dc 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -340,8 +340,15 @@ Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r)) function Float16(x::BigFloat) :: Float16 res = Float32(x) - reinterpret(UInt32, res) & 0x1fff != 0x1000 && return res - return nextfloat(res, cmp(x, res)) + resi = reinterpret(UInt32, res) + if (resi&0x7fffffff) < 0x38800000 + shift = 113-((resi & 0x7f800000)>>23) + shift<23 && (resi >>= shift) + end + if (resi & 0x1fff == 0x1000) + res = nextfloat(res, cmp(x, res)) + end + return res end promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index feeb223785ee1..38ac964094475 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -11,6 +11,7 @@ #include "julia.h" #include "julia_internal.h" #include "APInt-C.h" +#include "stdio.h" const unsigned int host_char_bit = 8; @@ -208,8 +209,14 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param) float res = (float)param; uint32_t resi; memcpy(&resi, &res, sizeof(res)); - if ((resi & 0x1fff) == 0x1000) { - resi += (res < param) - (param < res); + if ((resi&0x7fffffffu) < 0x38800000u){ + uint32_t shift = 113u-((resi & 0x7f800000u)>>23u); + if (shift<23u) + resi >>= shift; + } + if ((resi & 0x1fffu) == 0x1000u) { + memcpy(&resi, &res, sizeof(res)); + resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res)); memcpy(&res, &resi, sizeof(res)); } return float_to_half(res); diff --git a/test/float16.jl b/test/float16.jl index 804aba9ef741b..68d56828dae54 100644 --- a/test/float16.jl +++ b/test/float16.jl @@ -202,3 +202,26 @@ const minsubf16_32 = Float32(minsubf16) # issues #33076 @test Float16(1f5) == Inf16 + +@testset "conversion to Float16 from" begin + for T in (Float32, Float64, BigFloat) + @testset "conversion from $T" begin + for i in 1:2^16 + f = reinterpret(Float16, UInt16(i-1)) + isfinite(f) || continue + abs(f) Date: Fri, 29 Oct 2021 17:33:01 -0400 Subject: [PATCH 3/7] fix impl, add tests --- test/float16.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/float16.jl b/test/float16.jl index 68d56828dae54..2fb6cf95da6cc 100644 --- a/test/float16.jl +++ b/test/float16.jl @@ -209,7 +209,7 @@ const minsubf16_32 = Float32(minsubf16) for i in 1:2^16 f = reinterpret(Float16, UInt16(i-1)) isfinite(f) || continue - abs(f) Date: Sat, 30 Oct 2021 02:52:41 -0400 Subject: [PATCH 4/7] comment subnormal mask --- base/mpfr.jl | 7 +++++-- src/runtime_intrinsics.c | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/base/mpfr.jl b/base/mpfr.jl index 2d70fd00c82dc..86f1a892e8505 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -341,11 +341,14 @@ Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r)) function Float16(x::BigFloat) :: Float16 res = Float32(x) resi = reinterpret(UInt32, res) - if (resi&0x7fffffff) < 0x38800000 + if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal + #shift so that the mantissa lines up where it would for normal Float16 shift = 113-((resi & 0x7f800000)>>23) shift<23 && (resi >>= shift) end - if (resi & 0x1fff == 0x1000) + if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values + memcpy(&resi, &res, sizeof(res)); + # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer res = nextfloat(res, cmp(x, res)) end return res diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 38ac964094475..a9e38a9c40c35 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -209,13 +209,15 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param) float res = (float)param; uint32_t resi; memcpy(&resi, &res, sizeof(res)); - if ((resi&0x7fffffffu) < 0x38800000u){ + if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal + // shift so that the mantissa lines up where it would for normal Float16 uint32_t shift = 113u-((resi & 0x7f800000u)>>23u); if (shift<23u) resi >>= shift; } - if ((resi & 0x1fffu) == 0x1000u) { + if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values memcpy(&resi, &res, sizeof(res)); + // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res)); memcpy(&res, &resi, sizeof(res)); } From b821022e0a8316d0edfc921ab08b007e39013528 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Sun, 31 Oct 2021 23:15:48 -0400 Subject: [PATCH 5/7] Julia isn't C --- base/mpfr.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/base/mpfr.jl b/base/mpfr.jl index 86f1a892e8505..92a139636c2a6 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -347,7 +347,6 @@ function Float16(x::BigFloat) :: Float16 shift<23 && (resi >>= shift) end if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values - memcpy(&resi, &res, sizeof(res)); # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer res = nextfloat(res, cmp(x, res)) end From 352aef77d04fa38a6933caa01ac074077db41298 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Mon, 1 Nov 2021 11:19:20 -0400 Subject: [PATCH 6/7] remove stdio --- src/runtime_intrinsics.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index a9e38a9c40c35..5004c1ffb9c61 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -11,7 +11,6 @@ #include "julia.h" #include "julia_internal.h" #include "APInt-C.h" -#include "stdio.h" const unsigned int host_char_bit = 8; From 41533ef17cde7e618327399446d5cb90d2b3acad Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Mon, 1 Nov 2021 17:13:09 -0400 Subject: [PATCH 7/7] fix nextfloat 0.0 --- base/mpfr.jl | 5 ++++- src/runtime_intrinsics.c | 4 +++- test/float16.jl | 5 ++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/base/mpfr.jl b/base/mpfr.jl index 92a139636c2a6..e85f281619ac0 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -344,7 +344,10 @@ function Float16(x::BigFloat) :: Float16 if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal #shift so that the mantissa lines up where it would for normal Float16 shift = 113-((resi & 0x7f800000)>>23) - shift<23 && (resi >>= shift) + if shift<23 + resi |= 0x0080_0000 # set implicit bit + resi >>= shift + end end if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 5004c1ffb9c61..c7a7f21ab3589 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -211,8 +211,10 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param) if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal // shift so that the mantissa lines up where it would for normal Float16 uint32_t shift = 113u-((resi & 0x7f800000u)>>23u); - if (shift<23u) + if (shift<23u) { + resi |= 0x00800000; // set implicit bit resi >>= shift; + } } if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values memcpy(&resi, &res, sizeof(res)); diff --git a/test/float16.jl b/test/float16.jl index 2fb6cf95da6cc..75f9b55b6d51c 100644 --- a/test/float16.jl +++ b/test/float16.jl @@ -209,7 +209,6 @@ const minsubf16_32 = Float32(minsubf16) for i in 1:2^16 f = reinterpret(Float16, UInt16(i-1)) isfinite(f) || continue - abs(f)<=eps(f) && continue if f < 0 epsdown = T(eps(f))/2 epsup = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2 @@ -217,8 +216,8 @@ const minsubf16_32 = Float32(minsubf16) epsup = T(eps(f))/2 epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2 end - @test isequal(f, Float16(nextfloat(T(f) - epsdown))) - @test isequal(f, Float16(prevfloat(T(f) + epsup))) + @test isequal(f*(-1)^(f === Float16(0)), Float16(nextfloat(T(f) - epsdown))) + @test isequal(f*(-1)^(f === -Float16(0)), Float16(prevfloat(T(f) + epsup))) @test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown))) @test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup))) end