From 8b028f5af4f5eb0a0527bb822e60dfa75a62ea53 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Sun, 28 Apr 2013 01:24:34 -0400
Subject: [PATCH] clean up implementation of fpsiround, fpuiround

---
 base/boot.jl       |   4 +-
 base/float.jl      |  26 +++++------
 src/intrinsics.cpp | 111 ++++++++++++++++++++-------------------------
 3 files changed, 64 insertions(+), 77 deletions(-)

diff --git a/base/boot.jl b/base/boot.jl
index e9f7d77bc8996..1ab90a4d9d2de 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -151,8 +151,8 @@ export
     #nan_dom_err, copysign_float, ctlz_int, ctpop_int, cttz_int,
     #div_float, eq_float, eq_int, eqfsi64, eqfui64, flipsign_int,
     #fpext64, fpiseq32, fpiseq64, fpislt32, fpislt64,
-    #fpsiround32, fpsiround64, fptosi32, fptosi64, fptoui32, fptoui64,
-    #fptrunc32, fpuiround32, fpuiround64, le_float, lefsi64, lefui64, lesif64,
+    #fpsiround, fpuiround, fptosi32, fptosi64, fptoui32, fptoui64,
+    #fptrunc32, le_float, lefsi64, lefui64, lesif64,
     #leuif64, lshr_int, lt_float, ltfsi64, ltfui64, ltsif64, ltuif64, mul_float,
     #mul_int, ne_float, ne_int, neg_float, neg_int, not_int, or_int, rem_float,
     #sdiv_int, shl_int, sitofp32, sitofp64, sle_int, slt_int, smod_int,
diff --git a/base/float.jl b/base/float.jl
index 535ceae7fc71d..e5fa8850c002a 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -50,30 +50,30 @@ float(x)   = convert(FloatingPoint,   x)
 if WORD_SIZE == 64
     iround(x::Float32) = iround(float64(x))
     itrunc(x::Float32) = itrunc(float64(x))
-    iround(x::Float64) = box(Int64,fpsiround64(unbox(Float64,x)))
+    iround(x::Float64) = box(Int64,fpsiround(unbox(Float64,x)))
     itrunc(x::Float64) = box(Int64,fptosi64(unbox(Float64,x)))
 else
-    iround(x::Float32) = box(Int32,fpsiround32(unbox(Float32,x)))
+    iround(x::Float32) = box(Int32,fpsiround(unbox(Float32,x)))
     itrunc(x::Float32) = box(Int32,fptosi32(unbox(Float32,x)))
-    iround(x::Float64) = int32(box(Int64,fpsiround64(unbox(Float64,x))))
+    iround(x::Float64) = int32(box(Int64,fpsiround(unbox(Float64,x))))
     itrunc(x::Float64) = int32(box(Int64,fptosi64(unbox(Float64,x))))
 end
 
 for to in (Int8, Uint8, Int16, Uint16)
     @eval begin
-        iround(::Type{$to}, x::Float32) = box($to,trunc_int($to,fpsiround32(unbox(Float32,x))))
-        iround(::Type{$to}, x::Float64) = box($to,trunc_int($to,fpsiround64(unbox(Float64,x))))
+        iround(::Type{$to}, x::Float32) = box($to,trunc_int($to,fpsiround(unbox(Float32,x))))
+        iround(::Type{$to}, x::Float64) = box($to,trunc_int($to,fpsiround(unbox(Float64,x))))
     end
 end
 
-iround(::Type{Int32}, x::Float32) = box(Int32,fpsiround32(unbox(Float32,x)))
-iround(::Type{Int32}, x::Float64) = box(Int32,trunc_int(Int32,fpsiround64(unbox(Float64,x))))
-iround(::Type{Uint32}, x::Float32) = box(Uint32,fpuiround32(unbox(Float32,x)))
-iround(::Type{Uint32}, x::Float64) = box(Uint32,trunc_int(Uint32,fpuiround64(unbox(Float64,x))))
-iround(::Type{Int64}, x::Float32) = box(Int64,fpsiround64(fpext64(unbox(Float32,x))))
-iround(::Type{Int64}, x::Float64) = box(Int64,fpsiround64(unbox(Float64,x)))
-iround(::Type{Uint64}, x::Float32) = box(Uint64,fpuiround64(fpext64(unbox(Float32,x))))
-iround(::Type{Uint64}, x::Float64) = box(Uint64,fpuiround64(unbox(Float64,x)))
+iround(::Type{Int32}, x::Float32) = box(Int32,fpsiround(unbox(Float32,x)))
+iround(::Type{Int32}, x::Float64) = box(Int32,trunc_int(Int32,fpsiround(unbox(Float64,x))))
+iround(::Type{Uint32}, x::Float32) = box(Uint32,fpuiround(unbox(Float32,x)))
+iround(::Type{Uint32}, x::Float64) = box(Uint32,trunc_int(Uint32,fpuiround(unbox(Float64,x))))
+iround(::Type{Int64}, x::Float32) = box(Int64,fpsiround(fpext64(unbox(Float32,x))))
+iround(::Type{Int64}, x::Float64) = box(Int64,fpsiround(unbox(Float64,x)))
+iround(::Type{Uint64}, x::Float32) = box(Uint64,fpuiround(fpext64(unbox(Float32,x))))
+iround(::Type{Uint64}, x::Float64) = box(Uint64,fpuiround(unbox(Float64,x)))
 
 iround(::Type{Int128}, x::Float32) = convert(Int128,round(x))
 iround(::Type{Int128}, x::Float64) = convert(Int128,round(x))
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 40e1221254720..ca0ab0423038e 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -26,7 +26,7 @@ namespace JL_I {
         // conversion
         sext_int, zext_int, trunc_int,
         fptoui32, fptosi32, fptoui64, fptosi64,
-        fpsiround32, fpsiround64, fpuiround32, fpuiround64,
+        fpsiround, fpuiround,
         uitofp32, sitofp32, uitofp64, sitofp64,
         fptrunc32, fpext64,
         // functions
@@ -438,6 +438,50 @@ static Value *emit_checked_fptoui(jl_value_t *targ, Value *x, jl_codectx_t *ctx)
     return emit_checked_fptoui(staticeval_bitstype(targ, "checked_fptoui", ctx), x, ctx);
 }
 
+static Value *emit_iround(Value *x, bool issigned, jl_codectx_t *ctx)
+{
+    int nmantissa, expoffs, expbits;
+    int64_t topbit;
+    Type *intt, *floatt;
+    Value *bits = JL_INT(x);
+
+    if (bits->getType()->getPrimitiveSizeInBits() == 32) {
+        nmantissa = 23;
+        expoffs = 127;
+        expbits = 0xff;
+        topbit = BIT31;
+        intt = T_int32; floatt = T_float32;
+    }
+    else {
+        nmantissa = 52;
+        expoffs = 1023;
+        expbits = 0x7ff;
+        topbit = BIT63;
+        intt = T_int64; floatt = T_float64;
+    }
+
+    // itrunc(x + copysign(0.5,x))
+    // values with exponent >= nbits are already integers, and this
+    // rounding method doesn't always give the right answer there.
+    Value *expo = builder.CreateAShr(bits, ConstantInt::get(intt,nmantissa));
+    expo = builder.CreateAnd(expo, ConstantInt::get(intt,expbits));
+    Value *isint = builder.CreateICmpSGE(expo,
+                                         ConstantInt::get(intt,expoffs+nmantissa));
+    Value *half = builder.CreateBitCast(ConstantFP::get(floatt, 0.5), intt);
+    Value *signedhalf =
+        builder.CreateOr(half,
+                         builder.CreateAnd(bits,
+                                           ConstantInt::get(intt,topbit)));
+    Value *sum = builder.CreateFAdd(FP(x),
+                                    builder.CreateBitCast(signedhalf, floatt));
+
+    Value *src = builder.CreateSelect(isint, FP(x), sum);
+    if (issigned)
+        return builder.CreateFPToSI(src, intt);
+    else
+        return builder.CreateFPToUI(src, intt);
+}
+
 static Value *emit_pointerref(jl_value_t *e, jl_value_t *i, jl_codectx_t *ctx)
 {
     jl_value_t *aty = expr_type(e, ctx);
@@ -884,66 +928,10 @@ static Value *emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
     HANDLE(fptosi32,1) return builder.CreateFPToSI(FP(x), T_int32);
     HANDLE(fptoui64,1) return builder.CreateFPToUI(FP(x), T_int64);
     HANDLE(fptosi64,1) return builder.CreateFPToSI(FP(x), T_int64);
-    HANDLE(fpsiround32,1)
-    HANDLE(fpuiround32,1)
+    HANDLE(fpsiround,1)
+    HANDLE(fpuiround,1)
     {
-        // itrunc(x + copysign(0.5,x))
-        Value *bits = JL_INT(x);
-        // values with exponent >= nbits are already integers, and this
-        // rounding method doesn't always give the right answer there.
-        Value *expo = builder.CreateAShr(bits, ConstantInt::get(T_int32,23));
-        expo = builder.CreateAnd(expo, ConstantInt::get(T_int32,0xff));
-        Value *isint = builder.CreateICmpSGE(expo,
-                                             ConstantInt::get(T_int32,127+23));
-        Value *half = builder.CreateBitCast(ConstantFP::get(T_float32, 0.5),
-                                            T_int32);
-        Value *signedhalf =
-            builder.CreateOr(half,
-                             builder.CreateAnd(bits,
-                                               ConstantInt::get(T_int32,
-                                                                BIT31)));
-        Value *sum = builder.CreateFAdd(FP(x),
-                                        builder.CreateBitCast(signedhalf,
-                                                              T_float32));
-        if (f == fpuiround32) {
-            return builder.CreateSelect(isint,
-                                        builder.CreateFPToUI(FP(x), T_int32),
-                                        builder.CreateFPToUI(sum, T_int32));
-        }
-        else {
-            return builder.CreateSelect(isint,
-                                        builder.CreateFPToSI(FP(x), T_int32),
-                                        builder.CreateFPToSI(sum, T_int32));
-        }
-    }
-    HANDLE(fpsiround64,1)
-    HANDLE(fpuiround64,1)
-    {
-        Value *bits = JL_INT(x);
-        Value *expo = builder.CreateAShr(bits, ConstantInt::get(T_int64,52));
-        expo = builder.CreateAnd(expo, ConstantInt::get(T_int64,0x7ff));
-        Value *isint = builder.CreateICmpSGE(expo,
-                                             ConstantInt::get(T_int64,1023+52));
-        Value *half = builder.CreateBitCast(ConstantFP::get(T_float64, 0.5),
-                                            T_int64);
-        Value *signedhalf =
-            builder.CreateOr(half,
-                             builder.CreateAnd(bits,
-                                               ConstantInt::get(T_int64,
-                                                                BIT63)));
-        Value *sum = builder.CreateFAdd(FP(x),
-                                        builder.CreateBitCast(signedhalf,
-                                                              T_float64));
-        if (f == fpuiround64) {
-            return builder.CreateSelect(isint,
-                                        builder.CreateFPToUI(FP(x), T_int64),
-                                        builder.CreateFPToUI(sum, T_int64));
-        }
-        else {
-            return builder.CreateSelect(isint,
-                                        builder.CreateFPToSI(FP(x), T_int64),
-                                        builder.CreateFPToSI(sum, T_int64));
-        }
+        return emit_iround(x, f == fpsiround, ctx);
     }
     HANDLE(uitofp32,1)  return builder.CreateUIToFP(JL_INT(x), T_float32);
     HANDLE(sitofp32,1)  return builder.CreateSIToFP(JL_INT(x), T_float32);
@@ -1096,8 +1084,7 @@ extern "C" void jl_init_intrinsic_functions(void)
     ADD_I(ctpop_int); ADD_I(ctlz_int); ADD_I(cttz_int);
     ADD_I(sext_int); ADD_I(zext_int); ADD_I(trunc_int);
     ADD_I(fptoui32); ADD_I(fptosi32); ADD_I(fptoui64); ADD_I(fptosi64);
-    ADD_I(fpsiround32); ADD_I(fpsiround64);
-    ADD_I(fpuiround32); ADD_I(fpuiround64);
+    ADD_I(fpsiround); ADD_I(fpuiround);
     ADD_I(uitofp32); ADD_I(sitofp32); ADD_I(uitofp64); ADD_I(sitofp64);
     ADD_I(fptrunc32); ADD_I(fpext64);
     ADD_I(abs_float); ADD_I(copysign_float);