From 02f442413c9b4158cc0d1c431f5aeab5445d3b71 Mon Sep 17 00:00:00 2001
From: oscarddssmith <oscar.smith@juliacomputing.com>
Date: Thu, 28 Oct 2021 14:02:18 -0400
Subject: [PATCH 1/7] fixed Float16 from Float64 and BigFloat

---
 base/mpfr.jl             | 7 +++++--
 src/runtime_intrinsics.c | 9 ++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/base/mpfr.jl b/base/mpfr.jl
index 99c9b09416898..cbb88cb49b2f0 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -338,8 +338,11 @@ Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
     _cpynansgn(ccall((:mpfr_get_flt,:libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
 Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
 
-# TODO: avoid double rounding
-Float16(x::BigFloat) = Float16(Float64(x))
+function Float16(x::BigFloat) :: Float16
+    res = Float32(x)
+    reinterpret(UInt32, res) & 0x1fff != 0x1000 && return res
+    return nextfloat(res, cmp(x, res))
+end
 
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
 promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 657c750ee76d6..feeb223785ee1 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -205,7 +205,14 @@ JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
 
 JL_DLLEXPORT uint16_t __truncdfhf2(double param)
 {
-    return float_to_half((float)param);
+    float res = (float)param;
+    uint32_t resi;
+    memcpy(&resi, &res, sizeof(res));
+    if ((resi & 0x1fff) == 0x1000) {
+        resi += (res < param) - (param < res);
+        memcpy(&res, &resi, sizeof(res));
+    }
+    return float_to_half(res);
 }
 
 #endif

From 847b086cb9ff88db4ff207853dc75bc1fd97b8e6 Mon Sep 17 00:00:00 2001
From: oscarddssmith <oscar.smith@juliacomputing.com>
Date: Fri, 29 Oct 2021 17:31:08 -0400
Subject: [PATCH 2/7] fix impl, add tests

---
 base/mpfr.jl             | 11 +++++++++--
 src/runtime_intrinsics.c | 11 +++++++++--
 test/float16.jl          | 23 +++++++++++++++++++++++
 3 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/base/mpfr.jl b/base/mpfr.jl
index cbb88cb49b2f0..2d70fd00c82dc 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -340,8 +340,15 @@ Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
 
 function Float16(x::BigFloat) :: Float16
     res = Float32(x)
-    reinterpret(UInt32, res) & 0x1fff != 0x1000 && return res
-    return nextfloat(res, cmp(x, res))
+    resi = reinterpret(UInt32, res)
+    if (resi&0x7fffffff) < 0x38800000
+        shift = 113-((resi & 0x7f800000)>>23)
+        shift<23 && (resi >>= shift)
+    end
+    if (resi & 0x1fff == 0x1000)
+        res = nextfloat(res, cmp(x, res))
+    end
+    return res
 end
 
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index feeb223785ee1..38ac964094475 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -11,6 +11,7 @@
 #include "julia.h"
 #include "julia_internal.h"
 #include "APInt-C.h"
+#include "stdio.h"
 
 const unsigned int host_char_bit = 8;
 
@@ -208,8 +209,14 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param)
     float res = (float)param;
     uint32_t resi;
     memcpy(&resi, &res, sizeof(res));
-    if ((resi & 0x1fff) == 0x1000) {
-        resi += (res < param) - (param < res);
+    if ((resi&0x7fffffffu) < 0x38800000u){
+        uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
+        if (shift<23u)
+            resi >>= shift;
+    }
+    if ((resi & 0x1fffu) == 0x1000u) {
+        memcpy(&resi, &res, sizeof(res));
+        resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
         memcpy(&res, &resi, sizeof(res));
     }
     return float_to_half(res);
diff --git a/test/float16.jl b/test/float16.jl
index 804aba9ef741b..68d56828dae54 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -202,3 +202,26 @@ const minsubf16_32 = Float32(minsubf16)
 
 # issues #33076
 @test Float16(1f5) == Inf16
+
+@testset "conversion to Float16 from" begin
+    for T in (Float32, Float64, BigFloat)
+        @testset "conversion from $T" begin
+            for i in 1:2^16
+                f = reinterpret(Float16, UInt16(i-1))
+                isfinite(f) || continue
+                abs(f)<eps(f) && continue
+                if f < 0
+                    epsdown = T(eps(f))/2
+                    epsup   = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2
+                else
+                    epsup   = T(eps(f))/2
+                    epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2
+                end
+                @test isequal(f,  Float16(nextfloat(T(f) - epsdown)))
+                @test isequal(f, Float16(prevfloat(T(f) + epsup)))
+                @test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown)))
+                @test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup)))
+            end
+        end
+    end
+end

From 055bf60204a8eebccd8689c2de208e19ce7872f8 Mon Sep 17 00:00:00 2001
From: oscarddssmith <oscar.smith@juliacomputing.com>
Date: Fri, 29 Oct 2021 17:33:01 -0400
Subject: [PATCH 3/7] fix impl, add tests

---
 test/float16.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/float16.jl b/test/float16.jl
index 68d56828dae54..2fb6cf95da6cc 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -209,7 +209,7 @@ const minsubf16_32 = Float32(minsubf16)
             for i in 1:2^16
                 f = reinterpret(Float16, UInt16(i-1))
                 isfinite(f) || continue
-                abs(f)<eps(f) && continue
+                abs(f)<=eps(f) && continue
                 if f < 0
                     epsdown = T(eps(f))/2
                     epsup   = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2

From abfb719cc7bedca788c24faf7c25b31dce05ee8e Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith@gmail.com>
Date: Sat, 30 Oct 2021 02:52:41 -0400
Subject: [PATCH 4/7] comment subnormal mask

---
 base/mpfr.jl             | 7 +++++--
 src/runtime_intrinsics.c | 6 ++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/base/mpfr.jl b/base/mpfr.jl
index 2d70fd00c82dc..86f1a892e8505 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -341,11 +341,14 @@ Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
 function Float16(x::BigFloat) :: Float16
     res = Float32(x)
     resi = reinterpret(UInt32, res)
-    if (resi&0x7fffffff) < 0x38800000
+    if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
+        #shift so that the mantissa lines up where it would for normal Float16
         shift = 113-((resi & 0x7f800000)>>23)
         shift<23 && (resi >>= shift)
     end
-    if (resi & 0x1fff == 0x1000)
+    if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
+        memcpy(&resi, &res, sizeof(res));
+        # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
         res = nextfloat(res, cmp(x, res))
     end
     return res
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 38ac964094475..a9e38a9c40c35 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -209,13 +209,15 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param)
     float res = (float)param;
     uint32_t resi;
     memcpy(&resi, &res, sizeof(res));
-    if ((resi&0x7fffffffu) < 0x38800000u){
+    if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
+        // shift so that the mantissa lines up where it would for normal Float16
         uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
         if (shift<23u)
             resi >>= shift;
     }
-    if ((resi & 0x1fffu) == 0x1000u) {
+    if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
         memcpy(&resi, &res, sizeof(res));
+        // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
         resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
         memcpy(&res, &resi, sizeof(res));
     }

From b821022e0a8316d0edfc921ab08b007e39013528 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith@gmail.com>
Date: Sun, 31 Oct 2021 23:15:48 -0400
Subject: [PATCH 5/7] Julia isn't C

---
 base/mpfr.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/base/mpfr.jl b/base/mpfr.jl
index 86f1a892e8505..92a139636c2a6 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -347,7 +347,6 @@ function Float16(x::BigFloat) :: Float16
         shift<23 && (resi >>= shift)
     end
     if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
-        memcpy(&resi, &res, sizeof(res));
         # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
         res = nextfloat(res, cmp(x, res))
     end

From 352aef77d04fa38a6933caa01ac074077db41298 Mon Sep 17 00:00:00 2001
From: oscarddssmith <oscar.smith@juliacomputing.com>
Date: Mon, 1 Nov 2021 11:19:20 -0400
Subject: [PATCH 6/7] remove stdio

---
 src/runtime_intrinsics.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index a9e38a9c40c35..5004c1ffb9c61 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -11,7 +11,6 @@
 #include "julia.h"
 #include "julia_internal.h"
 #include "APInt-C.h"
-#include "stdio.h"
 
 const unsigned int host_char_bit = 8;
 

From 41533ef17cde7e618327399446d5cb90d2b3acad Mon Sep 17 00:00:00 2001
From: oscarddssmith <oscar.smith@juliacomputing.com>
Date: Mon, 1 Nov 2021 17:13:09 -0400
Subject: [PATCH 7/7] fix nextfloat 0.0

---
 base/mpfr.jl             | 5 ++++-
 src/runtime_intrinsics.c | 4 +++-
 test/float16.jl          | 5 ++---
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/base/mpfr.jl b/base/mpfr.jl
index 92a139636c2a6..e85f281619ac0 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -344,7 +344,10 @@ function Float16(x::BigFloat) :: Float16
     if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
         #shift so that the mantissa lines up where it would for normal Float16
         shift = 113-((resi & 0x7f800000)>>23)
-        shift<23 && (resi >>= shift)
+        if shift<23
+            resi |= 0x0080_0000 # set implicit bit
+            resi >>= shift
+        end
     end
     if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
         # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 5004c1ffb9c61..c7a7f21ab3589 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -211,8 +211,10 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param)
     if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
         // shift so that the mantissa lines up where it would for normal Float16
         uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
-        if (shift<23u)
+        if (shift<23u) {
+            resi |= 0x00800000; // set implicit bit
             resi >>= shift;
+        }
     }
     if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
         memcpy(&resi, &res, sizeof(res));
diff --git a/test/float16.jl b/test/float16.jl
index 2fb6cf95da6cc..75f9b55b6d51c 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -209,7 +209,6 @@ const minsubf16_32 = Float32(minsubf16)
             for i in 1:2^16
                 f = reinterpret(Float16, UInt16(i-1))
                 isfinite(f) || continue
-                abs(f)<=eps(f) && continue
                 if f < 0
                     epsdown = T(eps(f))/2
                     epsup   = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2
@@ -217,8 +216,8 @@ const minsubf16_32 = Float32(minsubf16)
                     epsup   = T(eps(f))/2
                     epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2
                 end
-                @test isequal(f,  Float16(nextfloat(T(f) - epsdown)))
-                @test isequal(f, Float16(prevfloat(T(f) + epsup)))
+                @test isequal(f*(-1)^(f === Float16(0)),  Float16(nextfloat(T(f) - epsdown)))
+                @test isequal(f*(-1)^(f === -Float16(0)), Float16(prevfloat(T(f) + epsup)))
                 @test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown)))
                 @test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup)))
             end