From ef21892f52a73c7956f312089f84ccb41a48bb3d Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Mon, 12 Mar 2018 23:10:34 +0100
Subject: [PATCH 01/12] Implement std.math.atan() for single- and
 double-precision

And make the x87 `real` version CTFE-able.
---
 std/math.d | 275 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 197 insertions(+), 78 deletions(-)

diff --git a/std/math.d b/std/math.d
index 5f1ac25a642..e6c736e3d7c 100644
--- a/std/math.d
+++ b/std/math.d
@@ -1260,113 +1260,232 @@ real atan(real x) @safe pure nothrow @nogc
 {
     version(InlineAsm_X86_Any)
     {
-        return atan2(x, 1.0L);
+        if (!__ctfe)
+            return atan2(x, 1.0L);
+    }
+    return atanImpl(x);
+}
+
+/// ditto
+double atan(double x) @safe pure nothrow @nogc { return atanImpl(x); }
+
+/// ditto
+float atan(float x) @safe pure nothrow @nogc { return atanImpl(x); }
+
+///
+@safe unittest
+{
+    assert(isIdentical(atan(0.0), 0.0));
+    assert(atan(sqrt(3.0)).approxEqual(PI / 3));
+}
+
+private T atanImpl(T)(T x) @safe pure nothrow @nogc
+{
+    // Coefficients for atan(x)
+    enum realFormat = floatTraits!T.realFormat;
+    static if (realFormat == RealFormat.ieeeQuadruple)
+    {
+        static immutable T[9] P = [
+            -6.880597774405940432145577545328795037141E2L,
+            -2.514829758941713674909996882101723647996E3L,
+            -3.696264445691821235400930243493001671932E3L,
+            -2.792272753241044941703278827346430350236E3L,
+            -1.148164399808514330375280133523543970854E3L,
+            -2.497759878476618348858065206895055957104E2L,
+            -2.548067867495502632615671450650071218995E1L,
+            -8.768423468036849091777415076702113400070E-1L,
+            -6.635810778635296712545011270011752799963E-4L
+        ];
+        static immutable T[9] Q = [
+            2.064179332321782129643673263598686441900E3L,
+            8.782996876218210302516194604424986107121E3L,
+            1.547394317752562611786521896296215170819E4L,
+            1.458510242529987155225086911411015961174E4L,
+            7.928572347062145288093560392463784743935E3L,
+            2.494680540950601626662048893678584497900E3L,
+            4.308348370818927353321556740027020068897E2L,
+            3.566239794444800849656497338030115886153E1L,
+            1.0
+        ];
+    }
+    else static if (realFormat == RealFormat.ieeeExtended)
+    {
+        static immutable T[5] P = [
+           -5.0894116899623603312185E1L,
+           -9.9988763777265819915721E1L,
+           -6.3976888655834347413154E1L,
+           -1.4683508633175792446076E1L,
+           -8.6863818178092187535440E-1L,
+        ];
+        static immutable T[6] Q = [
+            1.5268235069887081006606E2L,
+            3.9157570175111990631099E2L,
+            3.6144079386152023162701E2L,
+            1.4399096122250781605352E2L,
+            2.2981886733594175366172E1L,
+            1.0000000000000000000000E0L,
+        ];
+    }
+    else static if (realFormat == RealFormat.ieeeDouble)
+    {
+        static immutable T[5] P = [
+            -6.485021904942025371773E1L,
+            -1.228866684490136173410E2L,
+            -7.500855792314704667340E1L,
+            -1.615753718733365076637E1L,
+            -8.750608600031904122785E-1L,
+        ];
+        static immutable T[6] Q = [
+             1.945506571482613964425E2L,
+             4.853903996359136964868E2L,
+             4.328810604912902668951E2L,
+             1.650270098316988542046E2L,
+             2.485846490142306297962E1L,
+             1.000000000000000000000E0L,
+        ];
+
+        enum T MOREBITS = 6.123233995736765886130E-17L;
+    }
+    else static if (realFormat == RealFormat.ieeeSingle)
+    {
+        // inlined below
     }
     else
+        static assert(0, "no coefficients for atan()");
+
+    // tan(PI/8)
+    enum T TAN_PI_8 = 0.414213562373095048801688724209698078569672L;
+    // tan(3 * PI/8)
+    enum T TAN3_PI_8 = 2.414213562373095048801688724209698078569672L;
+
+    // Special cases.
+    if (x == cast(T) 0.0)
+        return x;
+    if (isInfinity(x))
+        return copysign(cast(T) PI_2, x);
+
+    // Make argument positive but save the sign.
+    bool sign = false;
+    if (signbit(x))
     {
-        // Coefficients for atan(x)
-        static if (floatTraits!real.realFormat == RealFormat.ieeeQuadruple)
+        sign = true;
+        x = -x;
+    }
+
+    static if (realFormat == RealFormat.ieeeDouble) // special case for double precision
+    {
+        short flag = 0;
+        T y;
+        if (x > TAN3_PI_8)
         {
-            static immutable real[9] P = [
-                -6.880597774405940432145577545328795037141E2L,
-                -2.514829758941713674909996882101723647996E3L,
-                -3.696264445691821235400930243493001671932E3L,
-                -2.792272753241044941703278827346430350236E3L,
-                -1.148164399808514330375280133523543970854E3L,
-                -2.497759878476618348858065206895055957104E2L,
-                -2.548067867495502632615671450650071218995E1L,
-                -8.768423468036849091777415076702113400070E-1L,
-                -6.635810778635296712545011270011752799963E-4L
-            ];
-            static immutable real[9] Q = [
-                2.064179332321782129643673263598686441900E3L,
-                8.782996876218210302516194604424986107121E3L,
-                1.547394317752562611786521896296215170819E4L,
-                1.458510242529987155225086911411015961174E4L,
-                7.928572347062145288093560392463784743935E3L,
-                2.494680540950601626662048893678584497900E3L,
-                4.308348370818927353321556740027020068897E2L,
-                3.566239794444800849656497338030115886153E1L,
-                1.0
-            ];
+            y = PI_2;
+            flag = 1;
+            x = -(1.0 / x);
         }
-        else
+        else if (x <= 0.66)
         {
-            static immutable real[5] P = [
-               -5.0894116899623603312185E1L,
-               -9.9988763777265819915721E1L,
-               -6.3976888655834347413154E1L,
-               -1.4683508633175792446076E1L,
-               -8.6863818178092187535440E-1L,
-            ];
-            static immutable real[6] Q = [
-                1.5268235069887081006606E2L,
-                3.9157570175111990631099E2L,
-                3.6144079386152023162701E2L,
-                1.4399096122250781605352E2L,
-                2.2981886733594175366172E1L,
-                1.0000000000000000000000E0L,
-            ];
+            y = 0.0;
         }
-
-        // tan(PI/8)
-        enum real TAN_PI_8 = 0.414213562373095048801688724209698078569672L;
-        // tan(3 * PI/8)
-        enum real TAN3_PI_8 = 2.414213562373095048801688724209698078569672L;
-
-        // Special cases.
-        if (x == 0.0)
-            return x;
-        if (isInfinity(x))
-            return copysign(PI_2, x);
-
-        // Make argument positive but save the sign.
-        bool sign = false;
-        if (signbit(x))
+        else
         {
-            sign = true;
-            x = -x;
+            y = PI_4;
+            flag = 2;
+            x = (x - 1.0)/(x + 1.0);
         }
 
+        T z = x * x;
+        z = z * poly(z, P) / poly(z, Q);
+        z = x * z + x;
+        if (flag == 2)
+            z += 0.5 * MOREBITS;
+        else if (flag == 1)
+            z += MOREBITS;
+        y = y + z;
+    }
+    else
+    {
         // Range reduction.
-        real y;
+        T y;
         if (x > TAN3_PI_8)
         {
             y = PI_2;
-            x = -(1.0 / x);
+            x = -((cast(T) 1.0) / x);
         }
         else if (x > TAN_PI_8)
         {
             y = PI_4;
-            x = (x - 1.0)/(x + 1.0);
+            x = (x - cast(T) 1.0)/(x + cast(T) 1.0);
         }
         else
             y = 0.0;
 
         // Rational form in x^^2.
-        const real z = x * x;
-        y = y + (poly(z, P) / poly(z, Q)) * z * x + x;
-
-        return (sign) ? -y : y;
+        const T z = x * x;
+        static if (realFormat == RealFormat.ieeeSingle)
+        {
+            y += ((( 8.05374449538e-2f * z
+                   - 1.38776856032E-1f) * z
+                   + 1.99777106478E-1f) * z
+                   - 3.33329491539E-1f) * z * x
+                 + x;
+        }
+        else
+        {
+            y = y + (poly(z, P) / poly(z, Q)) * z * x + x;
+        }
     }
-}
-
-/// ditto
-double atan(double x) @safe pure nothrow @nogc { return atan(cast(real) x); }
 
-/// ditto
-float atan(float x)  @safe pure nothrow @nogc { return atan(cast(real) x); }
-
-///
-@safe unittest
-{
-    assert(isIdentical(atan(0.0), 0.0));
-    assert(atan(sqrt(3.0)).approxEqual(PI / 3));
+    return (sign) ? -y : y;
 }
 
 @safe @nogc nothrow unittest
 {
-    assert(equalsDigit(atan(std.math.sqrt(3.0)), PI / 3, useDigits));
+    static void testAtan(T)()
+    {
+        // ±0
+        const T zero = 0.0;
+        assert(isIdentical(atan(zero), zero));
+        assert(isIdentical(atan(-zero), -zero));
+        // ±∞
+        const T inf = T.infinity;
+        assert(approxEqual(atan(inf), cast(T) PI_2));
+        assert(approxEqual(atan(-inf), cast(T) -PI_2));
+        // NaN
+        const T specialNaN = NaN(0x0123L);
+        assert(isIdentical(atan(specialNaN), specialNaN));
+
+        static immutable T[2][] vals =
+        [
+            // x, atan(x)
+            [ 0.25, 0.2449786631 ],
+            [ 0.5,  0.4636476090 ],
+            [ 1,    PI_4         ],
+            [ 1.5,  0.9827937232 ],
+            [ 10,   1.4711276743 ],
+        ];
+
+        foreach (ref val; vals)
+        {
+            T x = val[0];
+            T r = val[1];
+            T a = atan(x);
+
+            //printf("atan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) a, cast(real) r);
+            assert(approxEqual(r, a));
+
+            x = -x;
+            r = -r;
+            a = atan(x);
+            //printf("atan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) a, cast(real) r);
+            assert(approxEqual(r, a));
+        }
+    }
+
+    import std.meta : AliasSeq;
+    foreach (T; AliasSeq!(real, double, float))
+        testAtan!T();
+
+    assert(equalsDigit(atan(std.math.sqrt(3.0L)), PI / 3, useDigits));
 }
 
 /***************

From 83e0a95468618559c0885332ed660d0f58a520c9 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Tue, 13 Mar 2018 22:22:19 +0100
Subject: [PATCH 02/12] Implement std.math.atan2() for single- and
 double-precision

And make the x87 `real` version CTFE-able.
---
 std/math.d | 223 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 151 insertions(+), 72 deletions(-)

diff --git a/std/math.d b/std/math.d
index e6c736e3d7c..25707e6c142 100644
--- a/std/math.d
+++ b/std/math.d
@@ -1261,7 +1261,7 @@ real atan(real x) @safe pure nothrow @nogc
     version(InlineAsm_X86_Any)
     {
         if (!__ctfe)
-            return atan2(x, 1.0L);
+            return atan2Asm(x, 1.0L);
     }
     return atanImpl(x);
 }
@@ -1509,91 +1509,26 @@ private T atanImpl(T)(T x) @safe pure nothrow @nogc
  *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD -$(INFIN))    $(TD $(PLUSMN)3$(PI)/4))
  *      )
  */
-real atan2(real y, real x) @trusted pure nothrow @nogc
+real atan2(real y, real x) @trusted pure nothrow @nogc // TODO: @safe
 {
     version(InlineAsm_X86_Any)
     {
-        version (Win64)
-        {
-            asm pure nothrow @nogc {
-                naked;
-                fld real ptr [RDX]; // y
-                fld real ptr [RCX]; // x
-                fpatan;
-                ret;
-            }
-        }
-        else
-        {
-            asm pure nothrow @nogc {
-                fld y;
-                fld x;
-                fpatan;
-            }
-        }
-    }
-    else
-    {
-        // Special cases.
-        if (isNaN(x) || isNaN(y))
-            return real.nan;
-        if (y == 0.0)
-        {
-            if (x >= 0 && !signbit(x))
-                return copysign(0, y);
-            else
-                return copysign(PI, y);
-        }
-        if (x == 0.0)
-            return copysign(PI_2, y);
-        if (isInfinity(x))
-        {
-            if (signbit(x))
-            {
-                if (isInfinity(y))
-                    return copysign(3*PI_4, y);
-                else
-                    return copysign(PI, y);
-            }
-            else
-            {
-                if (isInfinity(y))
-                    return copysign(PI_4, y);
-                else
-                    return copysign(0.0, y);
-            }
-        }
-        if (isInfinity(y))
-            return copysign(PI_2, y);
-
-        // Call atan and determine the quadrant.
-        real z = atan(y / x);
-
-        if (signbit(x))
-        {
-            if (signbit(y))
-                z = z - PI;
-            else
-                z = z + PI;
-        }
-
-        if (z == 0.0)
-            return copysign(z, y);
-
-        return z;
+        if (!__ctfe)
+            return atan2Asm(y, x);
     }
+    return atan2Impl(y, x);
 }
 
 /// ditto
 double atan2(double y, double x) @safe pure nothrow @nogc
 {
-    return atan2(cast(real) y, cast(real) x);
+    return atan2Impl(y, x);
 }
 
 /// ditto
 float atan2(float y, float x) @safe pure nothrow @nogc
 {
-    return atan2(cast(real) y, cast(real) x);
+    return atan2Impl(y, x);
 }
 
 ///
@@ -1602,8 +1537,152 @@ float atan2(float y, float x) @safe pure nothrow @nogc
     assert(atan2(1.0, sqrt(3.0)).approxEqual(PI / 6));
 }
 
+version(InlineAsm_X86_Any)
+private real atan2Asm(real y, real x) @trusted pure nothrow @nogc
+{
+    version (Win64)
+    {
+        asm pure nothrow @nogc {
+            naked;
+            fld real ptr [RDX]; // y
+            fld real ptr [RCX]; // x
+            fpatan;
+            ret;
+        }
+    }
+    else
+    {
+        asm pure nothrow @nogc {
+            fld y;
+            fld x;
+            fpatan;
+        }
+    }
+}
+
+private T atan2Impl(T)(T y, T x) @safe pure nothrow @nogc
+{
+    // Special cases.
+    if (isNaN(x) || isNaN(y))
+        return T.nan;
+    if (y == cast(T) 0.0)
+    {
+        if (x >= 0 && !signbit(x))
+            return copysign(0, y);
+        else
+            return copysign(cast(T) PI, y);
+    }
+    if (x == cast(T) 0.0)
+        return copysign(cast(T) PI_2, y);
+    if (isInfinity(x))
+    {
+        if (signbit(x))
+        {
+            if (isInfinity(y))
+                return copysign(3 * cast(T) PI_4, y);
+            else
+                return copysign(cast(T) PI, y);
+        }
+        else
+        {
+            if (isInfinity(y))
+                return copysign(cast(T) PI_4, y);
+            else
+                return copysign(cast(T) 0.0, y);
+        }
+    }
+    if (isInfinity(y))
+        return copysign(cast(T) PI_2, y);
+
+    // Call atan and determine the quadrant.
+    T z = atan(y / x);
+
+    if (signbit(x))
+    {
+        if (signbit(y))
+            z = z - cast(T) PI;
+        else
+            z = z + cast(T) PI;
+    }
+
+    if (z == cast(T) 0.0)
+        return copysign(z, y);
+
+    return z;
+}
+
 @safe @nogc nothrow unittest
 {
+    static void testAtan2(T)()
+    {
+        // NaN
+        const T nan = T.nan;
+        assert(isNaN(atan2(nan, cast(T) 1)));
+        assert(isNaN(atan2(cast(T) 1, nan)));
+
+        const T inf = T.infinity;
+        static immutable T[3][] vals =
+        [
+            // y, x, atan2(y, x)
+
+            // ±0
+            [  0.0,  1.0,  0.0 ],
+            [ -0.0,  1.0, -0.0 ],
+            [  0.0,  0.0,  0.0 ],
+            [ -0.0,  0.0, -0.0 ],
+            [  0.0, -1.0,  PI ],
+            [ -0.0, -1.0, -PI ],
+            [  0.0, -0.0,  PI ],
+            [ -0.0, -0.0, -PI ],
+            [  1.0,  0.0,  PI_2 ],
+            [  1.0, -0.0,  PI_2 ],
+            [ -1.0,  0.0, -PI_2 ],
+            [ -1.0, -0.0, -PI_2 ],
+
+            // ±∞
+            [  1.0,  inf,  0.0 ],
+            [ -1.0,  inf, -0.0 ],
+            [  1.0, -inf,  PI ],
+            [ -1.0, -inf, -PI ],
+            [  inf,  1.0,  PI_2 ],
+            [  inf, -1.0,  PI_2 ],
+            [ -inf,  1.0, -PI_2 ],
+            [ -inf, -1.0, -PI_2 ],
+            [  inf,  inf,  PI_4 ],
+            [ -inf,  inf, -PI_4 ],
+            [  inf, -inf,  3 * PI_4 ],
+            [ -inf, -inf, -3 * PI_4 ],
+
+            [  1.0,  1.0,  PI_4 ],
+            [ -2.0,  2.0, -PI_4 ],
+            [  3.0, -3.0,  3 * PI_4 ],
+            [ -4.0, -4.0, -3 * PI_4 ],
+
+            [  0.75,  0.25,   1.249045772398 ],
+            [ -0.5,   0.375, -0.927295218002 ],
+            [  0.5,  -0.125,  1.815774989922 ],
+            [ -0.75, -0.5,   -2.158798930342 ],
+        ];
+
+        foreach (ref val; vals)
+        {
+            const T y = val[0];
+            const T x = val[1];
+            const T r = val[2];
+            const T a = atan2(y, x);
+
+            //printf("atan2(%Lg, %Lg) = %Lg, should be %Lg\n", cast(real) y, cast(real) x, cast(real) a, cast(real) r);
+            if (r == 0)
+                assert(isIdentical(r, a)); // check sign
+            else
+                assert(approxEqual(r, a));
+        }
+    }
+
+    import std.meta : AliasSeq;
+    foreach (T; AliasSeq!(real, double, float))
+        testAtan2!T();
+
     assert(equalsDigit(atan2(1.0L, std.math.sqrt(3.0L)), PI / 6, useDigits));
 }
 

From d3f5005568ea5fb7c19ed07c1914b55dd3b11b31 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Tue, 13 Mar 2018 23:06:08 +0100
Subject: [PATCH 03/12] Implement std.math.tan() for single- and
 double-precision

And make the x87 `real` version CTFE-able.
---
 std/math.d | 333 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 198 insertions(+), 135 deletions(-)

diff --git a/std/math.d b/std/math.d
index 25707e6c142..0c562080dd9 100644
--- a/std/math.d
+++ b/std/math.d
@@ -918,8 +918,32 @@ deprecated
  *      $(TR $(TD $(PLUSMNINF))  $(TD $(NAN))       $(TD yes))
  *      )
  */
+real tan(real x) @trusted pure nothrow @nogc // TODO: @safe
+{
+    version(InlineAsm_X86_Any)
+    {
+        if (!__ctfe)
+            return tanAsm(x);
+    }
+    return tanImpl(x);
+}
+
+/// ditto
+double tan(double x) @safe pure nothrow @nogc { return tanImpl(x); }
+
+/// ditto
+float tan(float x) @safe pure nothrow @nogc { return tanImpl(x); }
+
+///
+@safe unittest
+{
+    assert(isIdentical(tan(0.0), 0.0));
+    assert(tan(PI).approxEqual(0));
+    assert(tan(PI / 3).approxEqual(sqrt(3.0)));
+}
 
-real tan(real x) @trusted pure nothrow @nogc
+version(InlineAsm_X86_Any)
+private real tanAsm(real x) @trusted pure nothrow @nogc
 {
     version(D_InlineAsm_X86)
     {
@@ -1010,170 +1034,209 @@ Clear1: asm pure nothrow @nogc{
 Lret: {}
     }
     else
-    {
-        enum realFormat = floatTraits!real.realFormat;
+        static assert(0);
+}
 
-        // Coefficients for tan(x) and PI/4 split into three parts.
-        static if (realFormat == RealFormat.ieeeQuadruple)
-        {
-            static immutable real[6] P = [
-                2.883414728874239697964612246732416606301E10L,
-                -2.307030822693734879744223131873392503321E9L,
-                5.160188250214037865511600561074819366815E7L,
-                -4.249691853501233575668486667664718192660E5L,
-                1.272297782199996882828849455156962260810E3L,
-                -9.889929415807650724957118893791829849557E-1L
-            ];
-            static immutable real[7] Q = [
-                8.650244186622719093893836740197250197602E10L
-                -4.152206921457208101480801635640958361612E10L,
-                2.758476078803232151774723646710890525496E9L,
-                -5.733709132766856723608447733926138506824E7L,
-                4.529422062441341616231663543669583527923E5L,
-                -1.317243702830553658702531997959756728291E3L,
-                1.0
-            ];
+private T tanImpl(T)(T x) @safe pure nothrow @nogc
+{
+    // Coefficients for tan(x) and PI/4 split into three parts.
+    enum realFormat = floatTraits!T.realFormat;
+    static if (realFormat == RealFormat.ieeeQuadruple)
+    {
+        static immutable T[6] P = [
+            2.883414728874239697964612246732416606301E10L,
+            -2.307030822693734879744223131873392503321E9L,
+            5.160188250214037865511600561074819366815E7L,
+            -4.249691853501233575668486667664718192660E5L,
+            1.272297782199996882828849455156962260810E3L,
+            -9.889929415807650724957118893791829849557E-1L
+        ];
+        static immutable T[7] Q = [
+            8.650244186622719093893836740197250197602E10L
+            -4.152206921457208101480801635640958361612E10L,
+            2.758476078803232151774723646710890525496E9L,
+            -5.733709132766856723608447733926138506824E7L,
+            4.529422062441341616231663543669583527923E5L,
+            -1.317243702830553658702531997959756728291E3L,
+            1.0
+        ];
 
-            enum real P1 =
-                7.853981633974483067550664827649598009884357452392578125E-1L;
-            enum real P2 =
-                2.8605943630549158983813312792950660807511260829685741796657E-18L;
-            enum real P3 =
-                2.1679525325309452561992610065108379921905808E-35L;
-        }
-        else
-        {
-            static immutable real[3] P = [
-               -1.7956525197648487798769E7L,
-                1.1535166483858741613983E6L,
-               -1.3093693918138377764608E4L,
-            ];
-            static immutable real[5] Q = [
-               -5.3869575592945462988123E7L,
-                2.5008380182335791583922E7L,
-               -1.3208923444021096744731E6L,
-                1.3681296347069295467845E4L,
-                1.0000000000000000000000E0L,
-            ];
+        enum T P1 =
+            7.853981633974483067550664827649598009884357452392578125E-1L;
+        enum T P2 =
+            2.8605943630549158983813312792950660807511260829685741796657E-18L;
+        enum T P3 =
+            2.1679525325309452561992610065108379921905808E-35L;
+    }
+    else static if (realFormat == RealFormat.ieeeExtended ||
+                    realFormat == RealFormat.ieeeDouble)
+    {
+        static immutable T[3] P = [
+           -1.7956525197648487798769E7L,
+            1.1535166483858741613983E6L,
+           -1.3093693918138377764608E4L,
+        ];
+        static immutable T[5] Q = [
+           -5.3869575592945462988123E7L,
+            2.5008380182335791583922E7L,
+           -1.3208923444021096744731E6L,
+            1.3681296347069295467845E4L,
+            1.0000000000000000000000E0L,
+        ];
 
-            enum real P1 = 7.853981554508209228515625E-1L;
-            enum real P2 = 7.946627356147928367136046290398E-9L;
-            enum real P3 = 3.061616997868382943065164830688E-17L;
-        }
+        enum T P1 = 7.853981554508209228515625E-1L;
+        enum T P2 = 7.946627356147928367136046290398E-9L;
+        enum T P3 = 3.061616997868382943065164830688E-17L;
+    }
+    else static if (realFormat == RealFormat.ieeeSingle)
+    {
+        enum T P1 = 0.78515625;
+        enum T P2 = 2.4187564849853515625e-4;
+        enum T P3 = 3.77489497744594108e-8;
+    }
+    else
+        static assert(0, "no coefficients for tan()");
 
-        // Special cases.
-        if (x == 0.0 || isNaN(x))
-            return x;
-        if (isInfinity(x))
-            return real.nan;
+    // Special cases.
+    if (x == cast(T) 0.0 || isNaN(x))
+        return x;
+    if (isInfinity(x))
+        return T.nan;
 
-        // Make argument positive but save the sign.
-        bool sign = false;
-        if (signbit(x))
-        {
-            sign = true;
-            x = -x;
-        }
+    // Make argument positive but save the sign.
+    bool sign = false;
+    if (signbit(x))
+    {
+        sign = true;
+        x = -x;
+    }
 
-        // Compute x mod PI/4.
-        real y = floor(x / PI_4);
+    // Compute x mod PI/4.
+    static if (realFormat == RealFormat.ieeeSingle)
+    {
+        enum T FOPI = 4 / PI;
+        int j = cast(int) (FOPI * x);
+        T y = j;
+        T z;
+    }
+    else
+    {
+        T y = floor(x / cast(T) PI_4);
         // Strip high bits of integer part.
         enum numHighBits = (realFormat == RealFormat.ieeeDouble ? 3 : 4);
-        real z = ldexp(y, -numHighBits);
+        T z = ldexp(y, -numHighBits);
         // Compute y - 2^numHighBits * (y / 2^numHighBits).
         z = y - ldexp(floor(z), numHighBits);
 
         // Integer and fraction part modulo one octant.
         int j = cast(int)(z);
+    }
 
-        // Map zeros and singularities to origin.
-        if (j & 1)
-        {
-            j += 1;
-            y += 1.0;
-        }
+    // Map zeros and singularities to origin.
+    if (j & 1)
+    {
+        j += 1;
+        y += cast(T) 1.0;
+    }
 
-        z = ((x - y * P1) - y * P2) - y * P3;
-        const real zz = z * z;
+    z = ((x - y * P1) - y * P2) - y * P3;
+    const T zz = z * z;
 
-        enum zzThreshold = (realFormat == RealFormat.ieeeDouble ? 1.0e-14L : 1.0e-20L);
-        if (zz > zzThreshold)
-            y = z + z * (zz * poly(zz, P) / poly(zz, Q));
+    enum T zzThreshold = (realFormat == RealFormat.ieeeSingle ? 1.0e-4L :
+                          realFormat == RealFormat.ieeeDouble ? 1.0e-14L : 1.0e-20L);
+    if (zz > zzThreshold)
+    {
+        static if (realFormat == RealFormat.ieeeSingle)
+        {
+            y = ((((( 9.38540185543E-3f  * zz
+                    + 3.11992232697E-3f) * zz
+                    + 2.44301354525E-2f) * zz
+                    + 5.34112807005E-2f) * zz
+                    + 1.33387994085E-1f) * zz
+                    + 3.33331568548E-1f) * zz * z
+                + z;
+        }
         else
-            y = z;
+        {
+            y = z + z * (zz * poly(zz, P) / poly(zz, Q));
+        }
+    }
+    else
+        y = z;
 
-        if (j & 2)
-            y = -1.0 / y;
+    if (j & 2)
+        y = (cast(T) -1.0) / y;
 
-        return (sign) ? -y : y;
-    }
+    return (sign) ? -y : y;
 }
 
-@safe nothrow @nogc unittest
+@safe @nogc nothrow unittest
 {
-    static real[2][] vals =     // angle,tan
+    static void testTan(T)()
+    {
+        // ±0
+        const T zero = 0.0;
+        assert(isIdentical(tan(zero), zero));
+        assert(isIdentical(tan(-zero), -zero));
+        // ±∞
+        const T inf = T.infinity;
+        assert(isNaN(tan(inf)));
+        assert(isNaN(tan(-inf)));
+        // NaN
+        const T specialNaN = NaN(0x0123L);
+        assert(isIdentical(tan(specialNaN), specialNaN));
+
+        static immutable T[2][] vals =
         [
-         [   0,   0],
-         [   .5,  .5463024898],
-         [   1,   1.557407725],
-         [   1.5, 14.10141995],
-         [   2,  -2.185039863],
-         [   2.5,-.7470222972],
-         [   3,  -.1425465431],
-         [   3.5, .3745856402],
-         [   4,   1.157821282],
-         [   4.5, 4.637332055],
-         [   5,  -3.380515006],
-         [   5.5,-.9955840522],
-         [   6,  -.2910061914],
-         [   6.5, .2202772003],
-         [   10,  .6483608275],
-
-         // special angles
-         [   PI_4,   1],
-         //[   PI_2,   real.infinity], // PI_2 is not _exactly_ pi/2.
-         [   3*PI_4, -1],
-         [   PI,     0],
-         [   5*PI_4, 1],
-         //[   3*PI_2, -real.infinity],
-         [   7*PI_4, -1],
-         [   2*PI,   0],
+            // angle, tan
+            [   .5,  .5463024898],
+            [   1,   1.557407725],
+            [   1.5, 14.10141995],
+            [   2,  -2.185039863],
+            [   2.5,-.7470222972],
+            [   3,  -.1425465431],
+            [   3.5, .3745856402],
+            [   4,   1.157821282],
+            [   4.5, 4.637332055],
+            [   5,  -3.380515006],
+            [   5.5,-.9955840522],
+            [   6,  -.2910061914],
+            [   6.5, .2202772003],
+            [   10,  .6483608275],
+
+            // special angles
+            [   PI_4,   1],
+            //[   PI_2,   T.infinity], // PI_2 is not _exactly_ pi/2.
+            [   3*PI_4, -1],
+            [   PI,     0],
+            [   5*PI_4, 1],
+            //[   3*PI_2, -T.infinity],
+            [   7*PI_4, -1],
+            [   2*PI,   0],
          ];
-    int i;
 
-    for (i = 0; i < vals.length; i++)
-    {
-        real x = vals[i][0];
-        real r = vals[i][1];
-        real t = tan(x);
+        foreach (ref val; vals)
+        {
+            T x = val[0];
+            T r = val[1];
+            T t = tan(x);
 
-        //printf("tan(%Lg) = %Lg, should be %Lg\n", x, t, r);
-        if (!isIdentical(r, t)) assert(fabs(r-t) <= .0000001);
+            //printf("tan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) t, cast(real) r);
+            assert(approxEqual(r, t));
 
-        x = -x;
-        r = -r;
-        t = tan(x);
-        //printf("tan(%Lg) = %Lg, should be %Lg\n", x, t, r);
-        if (!isIdentical(r, t) && !(r != r && t != t)) assert(fabs(r-t) <= .0000001);
+            x = -x;
+            r = -r;
+            t = tan(x);
+            //printf("tan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) t, cast(real) r);
+            assert(approxEqual(r, t));
+        }
     }
-    // overflow
-    assert(isNaN(tan(real.infinity)));
-    assert(isNaN(tan(-real.infinity)));
-    // NaN propagation
-    assert(isIdentical( tan(NaN(0x0123L)), NaN(0x0123L) ));
-}
 
-///
-@safe unittest
-{
-    assert(isIdentical(tan(0.0), 0.0));
-    assert(tan(PI).approxEqual(0));
-    assert(tan(PI / 3).approxEqual(sqrt(3.0)));
-}
+    import std.meta : AliasSeq;
+    foreach (T; AliasSeq!(real, double, float))
+        testTan!T();
 
-@safe @nogc nothrow unittest
-{
-    assert(equalsDigit(tan(PI / 3), std.math.sqrt(3.0), useDigits));
+    assert(equalsDigit(tan(PI / 3), std.math.sqrt(3.0L), useDigits));
 }
 
 /***************

From d17cda4c770840843e463bdc520bc7818d6886a7 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Wed, 14 Mar 2018 22:34:56 +0100
Subject: [PATCH 04/12] Implement std.math.exp() for single- and
 double-precision

---
 std/math.d | 470 +++++++++++++++++++++++++++++------------------------
 1 file changed, 260 insertions(+), 210 deletions(-)

diff --git a/std/math.d b/std/math.d
index 0c562080dd9..5eb2f08a96f 100644
--- a/std/math.d
+++ b/std/math.d
@@ -2187,145 +2187,299 @@ auto sqrt(creal z) @nogc @safe pure nothrow
  *    $(TR $(TD $(NAN))        $(TD $(NAN))    )
  *  )
  */
-real exp(real x) @trusted pure nothrow @nogc
+real exp(real x) @trusted pure nothrow @nogc // TODO: @safe
 {
     version(D_InlineAsm_X86)
     {
         //  e^^x = 2^^(LOG2E*x)
         // (This is valid because the overflow & underflow limits for exp
         // and exp2 are so similar).
-        return exp2(LOG2E*x);
+        if (!__ctfe)
+            return exp2(LOG2E*x);
     }
     else version(D_InlineAsm_X86_64)
     {
         //  e^^x = 2^^(LOG2E*x)
         // (This is valid because the overflow & underflow limits for exp
         // and exp2 are so similar).
-        return exp2(LOG2E*x);
+        if (!__ctfe)
+            return exp2(LOG2E*x);
     }
-    else
+    return expImpl(x);
+}
+
+/// ditto
+double exp(double x) @safe pure nothrow @nogc { return expImpl(x); }
+
+/// ditto
+float exp(float x) @safe pure nothrow @nogc { return expImpl(x); }
+
+///
+@safe unittest
+{
+    assert(exp(0.0) == 1.0);
+    assert(exp(3.0).feqrel(E * E * E) > 16);
+}
+
+private T expImpl(T)(T x) @safe pure nothrow @nogc
+{
+    alias F = floatTraits!T;
+    static if (F.realFormat == RealFormat.ieeeSingle)
     {
-        alias F = floatTraits!real;
-        static if (F.realFormat == RealFormat.ieeeDouble)
-        {
-            // Coefficients for exp(x)
-            static immutable real[3] P = [
-                9.99999999999999999910E-1L,
-                3.02994407707441961300E-2L,
-                1.26177193074810590878E-4L,
-            ];
-            static immutable real[4] Q = [
-                2.00000000000000000009E0L,
-                2.27265548208155028766E-1L,
-                2.52448340349684104192E-3L,
-                3.00198505138664455042E-6L,
-            ];
+        enum T C1 = 0.693359375;
+        enum T C2 = -2.12194440e-4;
 
-            // C1 + C2 = LN2.
-            enum real C1 = 6.93145751953125E-1;
-            enum real C2 = 1.42860682030941723212E-6;
+        // Overflow and Underflow limits.
+        enum T OF = 88.72283905206835;
+        enum T UF = -103.278929903431851103; // ln(2^-149)
+    }
+    else static if (F.realFormat == RealFormat.ieeeDouble)
+    {
+        // Coefficients for exp(x)
+        static immutable T[3] P = [
+            9.99999999999999999910E-1L,
+            3.02994407707441961300E-2L,
+            1.26177193074810590878E-4L,
+        ];
+        static immutable T[4] Q = [
+            2.00000000000000000009E0L,
+            2.27265548208155028766E-1L,
+            2.52448340349684104192E-3L,
+            3.00198505138664455042E-6L,
+        ];
 
-            // Overflow and Underflow limits.
-            enum real OF =  7.09782712893383996732E2;  // ln((1-2^-53) * 2^1024)
-            enum real UF = -7.451332191019412076235E2; // ln(2^-1075)
-        }
-        else static if (F.realFormat == RealFormat.ieeeExtended)
-        {
-            // Coefficients for exp(x)
-            static immutable real[3] P = [
-                9.9999999999999999991025E-1L,
-                3.0299440770744196129956E-2L,
-                1.2617719307481059087798E-4L,
-            ];
-            static immutable real[4] Q = [
-                2.0000000000000000000897E0L,
-                2.2726554820815502876593E-1L,
-                2.5244834034968410419224E-3L,
-                3.0019850513866445504159E-6L,
-            ];
+        // C1 + C2 = LN2.
+        enum T C1 = 6.93145751953125E-1;
+        enum T C2 = 1.42860682030941723212E-6;
+
+        // Overflow and Underflow limits.
+        enum T OF =  7.09782712893383996732E2;  // ln((1-2^-53) * 2^1024)
+        enum T UF = -7.451332191019412076235E2; // ln(2^-1075)
+    }
+    else static if (F.realFormat == RealFormat.ieeeExtended)
+    {
+        // Coefficients for exp(x)
+        static immutable T[3] P = [
+            9.9999999999999999991025E-1L,
+            3.0299440770744196129956E-2L,
+            1.2617719307481059087798E-4L,
+        ];
+        static immutable T[4] Q = [
+            2.0000000000000000000897E0L,
+            2.2726554820815502876593E-1L,
+            2.5244834034968410419224E-3L,
+            3.0019850513866445504159E-6L,
+        ];
 
-            // C1 + C2 = LN2.
-            enum real C1 = 6.9314575195312500000000E-1L;
-            enum real C2 = 1.4286068203094172321215E-6L;
+        // C1 + C2 = LN2.
+        enum T C1 = 6.9314575195312500000000E-1L;
+        enum T C2 = 1.4286068203094172321215E-6L;
 
-            // Overflow and Underflow limits.
-            enum real OF =  1.1356523406294143949492E4L;  // ln((1-2^-64) * 2^16384)
-            enum real UF = -1.13994985314888605586758E4L; // ln(2^-16446)
-        }
-        else static if (F.realFormat == RealFormat.ieeeQuadruple)
-        {
-            // Coefficients for exp(x) - 1
-            static immutable real[5] P = [
-                9.999999999999999999999999999999999998502E-1L,
-                3.508710990737834361215404761139478627390E-2L,
-                2.708775201978218837374512615596512792224E-4L,
-                6.141506007208645008909088812338454698548E-7L,
-                3.279723985560247033712687707263393506266E-10L
-            ];
-            static immutable real[6] Q = [
-                2.000000000000000000000000000000000000150E0,
-                2.368408864814233538909747618894558968880E-1L,
-                3.611828913847589925056132680618007270344E-3L,
-                1.504792651814944826817779302637284053660E-5L,
-                1.771372078166251484503904874657985291164E-8L,
-                2.980756652081995192255342779918052538681E-12L
-            ];
+        // Overflow and Underflow limits.
+        enum T OF =  1.1356523406294143949492E4L;  // ln((1-2^-64) * 2^16384)
+        enum T UF = -1.13994985314888605586758E4L; // ln(2^-16446)
+    }
+    else static if (F.realFormat == RealFormat.ieeeQuadruple)
+    {
+        // Coefficients for exp(x) - 1
+        static immutable T[5] P = [
+            9.999999999999999999999999999999999998502E-1L,
+            3.508710990737834361215404761139478627390E-2L,
+            2.708775201978218837374512615596512792224E-4L,
+            6.141506007208645008909088812338454698548E-7L,
+            3.279723985560247033712687707263393506266E-10L
+        ];
+        static immutable T[6] Q = [
+            2.000000000000000000000000000000000000150E0,
+            2.368408864814233538909747618894558968880E-1L,
+            3.611828913847589925056132680618007270344E-3L,
+            1.504792651814944826817779302637284053660E-5L,
+            1.771372078166251484503904874657985291164E-8L,
+            2.980756652081995192255342779918052538681E-12L
+        ];
 
-            // C1 + C2 = LN2.
-            enum real C1 = 6.93145751953125E-1L;
-            enum real C2 = 1.428606820309417232121458176568075500134E-6L;
+        // C1 + C2 = LN2.
+        enum T C1 = 6.93145751953125E-1L;
+        enum T C2 = 1.428606820309417232121458176568075500134E-6L;
 
-            // Overflow and Underflow limits.
-            enum real OF =  1.135583025911358400418251384584930671458833e4L;
-            enum real UF = -1.143276959615573793352782661133116431383730e4L;
-        }
-        else
-            static assert(0, "Not implemented for this architecture");
+        // Overflow and Underflow limits.
+        enum T OF =  1.135583025911358400418251384584930671458833e4L;
+        enum T UF = -1.143276959615573793352782661133116431383730e4L;
+    }
+    else
+        static assert(0, "Not implemented for this architecture");
 
-        // Special cases.
-        if (isNaN(x))
-            return x;
-        if (x > OF)
-            return real.infinity;
-        if (x < UF)
-            return 0.0;
+    // Special cases.
+    if (isNaN(x))
+        return x;
+    if (x > OF)
+        return real.infinity;
+    if (x < UF)
+        return 0.0;
 
-        // Express: e^^x = e^^g * 2^^n
-        //   = e^^g * e^^(n * LOG2E)
-        //   = e^^(g + n * LOG2E)
-        int n = cast(int) floor(LOG2E * x + 0.5);
-        x -= n * C1;
-        x -= n * C2;
+    // Express: e^^x = e^^g * 2^^n
+    //   = e^^g * e^^(n * LOG2E)
+    //   = e^^(g + n * LOG2E)
+    T xx = floor((cast(T) LOG2E) * x + cast(T) 0.5);
+    const int n = cast(int) xx;
+    x -= xx * C1;
+    x -= xx * C2;
 
+    static if (F.realFormat == RealFormat.ieeeSingle)
+    {
+        xx = x * x;
+        x = ((((( 1.9875691500E-4f  * x
+                + 1.3981999507E-3f) * x
+                + 8.3334519073E-3f) * x
+                + 4.1665795894E-2f) * x
+                + 1.6666665459E-1f) * x
+                + 5.0000001201E-1f) * xx
+                + x
+                + 1.0f;
+    }
+    else
+    {
         // Rational approximation for exponential of the fractional part:
         //  e^^x = 1 + 2x P(x^^2) / (Q(x^^2) - P(x^^2))
-        const real xx = x * x;
-        const real px = x * poly(xx, P);
+        xx = x * x;
+        const T px = x * poly(xx, P);
         x = px / (poly(xx, Q) - px);
-        x = 1.0 + ldexp(x, 1);
-
-        // Scale by power of 2.
-        x = ldexp(x, n);
-
-        return x;
+        x = (cast(T) 1.0) + ldexp(x, 1);
     }
-}
 
-/// ditto
-double exp(double x) @safe pure nothrow @nogc  { return exp(cast(real) x); }
-
-/// ditto
-float exp(float x)  @safe pure nothrow @nogc   { return exp(cast(real) x); }
+    // Scale by power of 2.
+    x = ldexp(x, n);
 
-///
-@safe unittest
-{
-    assert(exp(0.0) == 1.0);
-    assert(exp(3.0).feqrel(E * E * E) > 16);
+    return x;
 }
 
 @safe @nogc nothrow unittest
 {
+    FloatingPointControl ctrl;
+    if (FloatingPointControl.hasExceptionTraps)
+        ctrl.disableExceptions(FloatingPointControl.allExceptions);
+    ctrl.rounding = FloatingPointControl.roundToNearest;
+
+    static void testExp(T)()
+    {
+        enum realFormat = floatTraits!T.realFormat;
+        static if (realFormat == RealFormat.ieeeQuadruple)
+        {
+            static immutable T[2][] exptestpoints =
+            [ //  x               exp(x)
+                [ 1.0L,           E                                        ],
+                [ 0.5L,           0x1.a61298e1e069bc972dfefab6df34p+0L     ],
+                [ 3.0L,           E*E*E                                    ],
+                [ 0x1.6p+13L,     0x1.6e509d45728655cdb4840542acb5p+16250L ], // near overflow
+                [ 0x1.7p+13L,     T.infinity                               ], // close overflow
+                [ 0x1p+80L,       T.infinity                               ], // far overflow
+                [ T.infinity,     T.infinity                               ],
+                [-0x1.18p+13L,    0x1.5e4bf54b4807034ea97fef0059a6p-12927L ], // near underflow
+                [-0x1.625p+13L,   0x1.a6bd68a39d11fec3a250cd97f524p-16358L ], // ditto
+                [-0x1.62dafp+13L, 0x0.cb629e9813b80ed4d639e875be6cp-16382L ], // near underflow - subnormal
+                [-0x1.6549p+13L,  0x0.0000000000000000000000000001p-16382L ], // ditto
+                [-0x1.655p+13L,   0                                        ], // close underflow
+                [-0x1p+30L,       0                                        ], // far underflow
+            ];
+        }
+        else static if (realFormat == RealFormat.ieeeExtended)
+        {
+            static immutable T[2][] exptestpoints =
+            [ //  x               exp(x)
+                [ 1.0L,           E                            ],
+                [ 0.5L,           0x1.a61298e1e069bc97p+0L     ],
+                [ 3.0L,           E*E*E                        ],
+                [ 0x1.1p+13L,     0x1.29aeffefc8ec645p+12557L  ], // near overflow
+                [ 0x1.7p+13L,     T.infinity                   ], // close overflow
+                [ 0x1p+80L,       T.infinity                   ], // far overflow
+                [ T.infinity,     T.infinity                   ],
+                [-0x1.18p+13L,    0x1.5e4bf54b4806db9p-12927L  ], // near underflow
+                [-0x1.625p+13L,   0x1.a6bd68a39d11f35cp-16358L ], // ditto
+                [-0x1.62dafp+13L, 0x1.96c53d30277021dp-16383L  ], // near underflow - subnormal
+                [-0x1.643p+13L,   0x1p-16444L                  ], // ditto
+                [-0x1.645p+13L,   0                            ], // close underflow
+                [-0x1p+30L,       0                            ], // far underflow
+            ];
+        }
+        else static if (realFormat == RealFormat.ieeeDouble)
+        {
+            static immutable T[2][] exptestpoints =
+            [ //  x,             exp(x)
+                [ 1.0L,          E                        ],
+                [ 0.5L,          0x1.a61298e1e069cp+0L    ],
+                [ 3.0L,          E*E*E                    ],
+                [ 0x1.6p+9L,     0x1.93bf4ec282efbp+1015L ], // near overflow
+                [ 0x1.7p+9L,     T.infinity               ], // close overflow
+                [ 0x1p+80L,      T.infinity               ], // far overflow
+                [ T.infinity,    T.infinity               ],
+                [-0x1.6p+9L,     0x1.44a3824e5285fp-1016L ], // near underflow
+                [-0x1.64p+9L,    0x0.06f84920bb2d4p-1022L ], // near underflow - subnormal
+                [-0x1.743p+9L,   0x0.0000000000001p-1022L ], // ditto
+                [-0x1.8p+9L,     0                        ], // close underflow
+                [-0x1p+30L,      0                        ], // far underflow
+            ];
+        }
+        else static if (realFormat == RealFormat.ieeeSingle)
+        {
+            static immutable T[2][] exptestpoints =
+            [ //  x,             exp(x)
+                [ 1.0L,          E                ],
+                [ 0.5L,          0x1.a61299p+0L   ],
+                [ 3.0L,          E*E*E            ],
+                [ 0x1.62p+6L,    0x1.99b988p+127L ], // near overflow
+                [ 0x1.7p+6L,     T.infinity       ], // close overflow
+                [ 0x1p+80L,      T.infinity       ], // far overflow
+                [ T.infinity,    T.infinity       ],
+                [-0x1.5cp+6L,    0x1.666d0ep-126L ], // near underflow
+                [-0x1.7p+6L,     0x0.026a42p-126L ], // near underflow - subnormal
+                [-0x1.9cp+6L,    0x0.000002p-126L ], // ditto
+                [-0x1.ap+6L,     0                ], // close underflow
+                [-0x1p+30L,      0                ], // far underflow
+            ];
+        }
+        else
+            static assert(0, "No exp() tests for real type!");
+
+        const minEqualMantissaBits = T.mant_dig - 2;
+        T x;
+        IeeeFlags f;
+        foreach (ref pair; exptestpoints)
+        {
+            resetIeeeFlags();
+            x = exp(pair[0]);
+            //printf("exp(%La) = %La, should be %La\n", cast(real) pair[0], cast(real) x, cast(real) pair[1]);
+            assert(feqrel(x, pair[1]) >= minEqualMantissaBits);
+        }
+
+        // Ideally, exp(0) would not set the inexact flag.
+        // Unfortunately, fldl2e sets it!
+        // So it's not realistic to avoid setting it.
+        assert(exp(cast(T) 0.0) == 1.0);
+
+        // NaN propagation. Doesn't set flags, bcos was already NaN.
+        resetIeeeFlags();
+        x = exp(T.nan);
+        f = ieeeFlags;
+        assert(isIdentical(abs(x), T.nan));
+        assert(f.flags == 0);
+
+        resetIeeeFlags();
+        x = exp(-T.nan);
+        f = ieeeFlags;
+        assert(isIdentical(abs(x), T.nan));
+        assert(f.flags == 0);
+
+        x = exp(NaN(0x123));
+        assert(isIdentical(x, NaN(0x123)));
+    }
+
+    import std.meta : AliasSeq;
+    foreach (T; AliasSeq!(real, double, float))
+        testExp!T();
+
+    // High resolution test (verified against GNU MPFR/Mathematica).
+    assert(exp(0.5L) == 0x1.A612_98E1_E069_BC97_2DFE_FAB6_DF34p+0L);
+
     assert(equalsDigit(exp(3.0L), E * E * E, useDigits));
 }
 
@@ -2914,110 +3068,6 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow
     }
 }
 
-@system unittest
-{
-    FloatingPointControl ctrl;
-    if (FloatingPointControl.hasExceptionTraps)
-        ctrl.disableExceptions(FloatingPointControl.allExceptions);
-    ctrl.rounding = FloatingPointControl.roundToNearest;
-
-    alias F = floatTraits!real;
-
-    static if (F.realFormat == RealFormat.ieeeQuadruple)
-    {
-        static immutable real[2][] exptestpoints =
-        [ //  x               exp(x)
-            [ 1.0L,           E                                        ],
-            [ 0.5L,           0x1.a61298e1e069bc972dfefab6df34p+0L     ],
-            [ 3.0L,           E*E*E                                    ],
-            [ 0x1.6p+13L,     0x1.6e509d45728655cdb4840542acb5p+16250L ], // near overflow
-            [ 0x1.7p+13L,     real.infinity                            ], // close overflow
-            [ 0x1p+80L,       real.infinity                            ], // far overflow
-            [ real.infinity,  real.infinity                            ],
-            [-0x1.18p+13L,    0x1.5e4bf54b4807034ea97fef0059a6p-12927L ], // near underflow
-            [-0x1.625p+13L,   0x1.a6bd68a39d11fec3a250cd97f524p-16358L ], // ditto
-            [-0x1.62dafp+13L, 0x0.cb629e9813b80ed4d639e875be6cp-16382L ], // near underflow - subnormal
-            [-0x1.6549p+13L,  0x0.0000000000000000000000000001p-16382L ], // ditto
-            [-0x1.655p+13L,   0                                        ], // close underflow
-            [-0x1p+30L,       0                                        ], // far underflow
-        ];
-    }
-    else static if (F.realFormat == RealFormat.ieeeExtended)
-    {
-        static immutable real[2][] exptestpoints =
-        [ //  x               exp(x)
-            [ 1.0L,           E                            ],
-            [ 0.5L,           0x1.a61298e1e069bc97p+0L     ],
-            [ 3.0L,           E*E*E                        ],
-            [ 0x1.1p+13L,     0x1.29aeffefc8ec645p+12557L  ], // near overflow
-            [ 0x1.7p+13L,     real.infinity                ], // close overflow
-            [ 0x1p+80L,       real.infinity                ], // far overflow
-            [ real.infinity,  real.infinity                ],
-            [-0x1.18p+13L,    0x1.5e4bf54b4806db9p-12927L  ], // near underflow
-            [-0x1.625p+13L,   0x1.a6bd68a39d11f35cp-16358L ], // ditto
-            [-0x1.62dafp+13L, 0x1.96c53d30277021dp-16383L  ], // near underflow - subnormal
-            [-0x1.643p+13L,   0x1p-16444L                  ], // ditto
-            [-0x1.645p+13L,   0                            ], // close underflow
-            [-0x1p+30L,       0                            ], // far underflow
-        ];
-    }
-    else static if (F.realFormat == RealFormat.ieeeDouble)
-    {
-        static immutable real[2][] exptestpoints =
-        [ //  x,             exp(x)
-            [ 1.0L,          E                        ],
-            [ 0.5L,          0x1.a61298e1e069cp+0L    ],
-            [ 3.0L,          E*E*E                    ],
-            [ 0x1.6p+9L,     0x1.93bf4ec282efbp+1015L ], // near overflow
-            [ 0x1.7p+9L,     real.infinity            ], // close overflow
-            [ 0x1p+80L,      real.infinity            ], // far overflow
-            [ real.infinity, real.infinity            ],
-            [-0x1.6p+9L,     0x1.44a3824e5285fp-1016L ], // near underflow
-            [-0x1.64p+9L,    0x0.06f84920bb2d4p-1022L ], // near underflow - subnormal
-            [-0x1.743p+9L,   0x0.0000000000001p-1022L ], // ditto
-            [-0x1.8p+9L,     0                        ], // close underflow
-            [-0x1p30L,       0                        ], // far underflow
-        ];
-    }
-    else
-        static assert(0, "No exp() tests for real type!");
-
-    const minEqualMantissaBits = real.mant_dig - 2;
-    real x;
-    IeeeFlags f;
-    foreach (ref pair; exptestpoints)
-    {
-        resetIeeeFlags();
-        x = exp(pair[0]);
-        assert(feqrel(x, pair[1]) >= minEqualMantissaBits);
-    }
-
-    // Ideally, exp(0) would not set the inexact flag.
-    // Unfortunately, fldl2e sets it!
-    // So it's not realistic to avoid setting it.
-    assert(exp(0.0L) == 1.0);
-
-    // NaN propagation. Doesn't set flags, bcos was already NaN.
-    resetIeeeFlags();
-    x = exp(real.nan);
-    f = ieeeFlags;
-    assert(isIdentical(abs(x), real.nan));
-    assert(f.flags == 0);
-
-    resetIeeeFlags();
-    x = exp(-real.nan);
-    f = ieeeFlags;
-    assert(isIdentical(abs(x), real.nan));
-    assert(f.flags == 0);
-
-    x = exp(NaN(0x123));
-    assert(isIdentical(x, NaN(0x123)));
-
-    // High resolution test (verified against GNU MPFR/Mathematica).
-    assert(exp(0.5L) == 0x1.A612_98E1_E069_BC97_2DFE_FAB6_DF34p+0L);
-}
-
-
 /*
  * Calculate cos(y) + i sin(y).
  *

From a08c986a4299a90bbf1b1982e1af573459d03814 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Thu, 15 Mar 2018 22:07:40 +0100
Subject: [PATCH 05/12] Implement std.math.expm1() for double-precision

And make the x87 `real` version CTFE-able.
I couldn't find the single-precision version in Cephes.
---
 std/math.d | 197 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 134 insertions(+), 63 deletions(-)

diff --git a/std/math.d b/std/math.d
index 5eb2f08a96f..3e9383775a4 100644
--- a/std/math.d
+++ b/std/math.d
@@ -2498,7 +2498,29 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc
  *    $(TR $(TD $(NAN))        $(TD $(NAN))       )
  *  )
  */
-real expm1(real x) @trusted pure nothrow @nogc
+real expm1(real x) @trusted pure nothrow @nogc // TODO: @safe
+{
+    version(InlineAsm_X86_Any)
+    {
+        if (!__ctfe)
+            return expm1Asm(x);
+    }
+    return expm1Impl(x);
+}
+
+/// ditto
+double expm1(double x) @safe pure nothrow @nogc { return expm1Impl(x); }
+
+///
+@safe unittest
+{
+    assert(isIdentical(expm1(0.0), 0.0));
+    assert(expm1(1.0).feqrel(1.71828) > 16);
+    assert(expm1(2.0).feqrel(6.3890) > 16);
+}
+
+version(InlineAsm_X86_Any)
+private real expm1Asm(real x) @trusted pure nothrow @nogc
 {
     version(D_InlineAsm_X86)
     {
@@ -2662,98 +2684,147 @@ L_largenegative:
         }
     }
     else
+        static assert(0);
+}
+
+private T expm1Impl(T)(T x) @safe pure nothrow @nogc
+{
+    // Coefficients for exp(x) - 1 and overflow/underflow limits.
+    enum realFormat = floatTraits!T.realFormat;
+    static if (realFormat == RealFormat.ieeeQuadruple)
     {
-        // Coefficients for exp(x) - 1 and overflow/underflow limits.
-        static if (floatTraits!real.realFormat == RealFormat.ieeeQuadruple)
-        {
-            static immutable real[8] P = [
-                2.943520915569954073888921213330863757240E8L,
-                -5.722847283900608941516165725053359168840E7L,
-                8.944630806357575461578107295909719817253E6L,
-                -7.212432713558031519943281748462837065308E5L,
-                4.578962475841642634225390068461943438441E4L,
-                -1.716772506388927649032068540558788106762E3L,
-                4.401308817383362136048032038528753151144E1L,
-                -4.888737542888633647784737721812546636240E-1L
-            ];
+        static immutable T[8] P = [
+            2.943520915569954073888921213330863757240E8L,
+            -5.722847283900608941516165725053359168840E7L,
+            8.944630806357575461578107295909719817253E6L,
+            -7.212432713558031519943281748462837065308E5L,
+            4.578962475841642634225390068461943438441E4L,
+            -1.716772506388927649032068540558788106762E3L,
+            4.401308817383362136048032038528753151144E1L,
+            -4.888737542888633647784737721812546636240E-1L
+        ];
 
-            static immutable real[9] Q = [
-                1.766112549341972444333352727998584753865E9L,
-                -7.848989743695296475743081255027098295771E8L,
-                1.615869009634292424463780387327037251069E8L,
-                -2.019684072836541751428967854947019415698E7L,
-                1.682912729190313538934190635536631941751E6L,
-                -9.615511549171441430850103489315371768998E4L,
-                3.697714952261803935521187272204485251835E3L,
-                -8.802340681794263968892934703309274564037E1L,
-                1.0
-            ];
+        static immutable T[9] Q = [
+            1.766112549341972444333352727998584753865E9L,
+            -7.848989743695296475743081255027098295771E8L,
+            1.615869009634292424463780387327037251069E8L,
+            -2.019684072836541751428967854947019415698E7L,
+            1.682912729190313538934190635536631941751E6L,
+            -9.615511549171441430850103489315371768998E4L,
+            3.697714952261803935521187272204485251835E3L,
+            -8.802340681794263968892934703309274564037E1L,
+            1.0
+        ];
 
-            enum real OF = 1.1356523406294143949491931077970764891253E4L;
-            enum real UF = -1.143276959615573793352782661133116431383730e4L;
-        }
-        else
-        {
-            static immutable real[5] P = [
-               -1.586135578666346600772998894928250240826E4L,
-                2.642771505685952966904660652518429479531E3L,
-               -3.423199068835684263987132888286791620673E2L,
-                1.800826371455042224581246202420972737840E1L,
-               -5.238523121205561042771939008061958820811E-1L,
-            ];
-            static immutable real[6] Q = [
-               -9.516813471998079611319047060563358064497E4L,
-                3.964866271411091674556850458227710004570E4L,
-               -7.207678383830091850230366618190187434796E3L,
-                7.206038318724600171970199625081491823079E2L,
-               -4.002027679107076077238836622982900945173E1L,
-                1.0
-            ];
+        enum T OF = 1.1356523406294143949491931077970764891253E4L;
+        enum T UF = -1.143276959615573793352782661133116431383730e4L;
+    }
+    else static if (realFormat == RealFormat.ieeeExtended)
+    {
+        static immutable T[5] P = [
+           -1.586135578666346600772998894928250240826E4L,
+            2.642771505685952966904660652518429479531E3L,
+           -3.423199068835684263987132888286791620673E2L,
+            1.800826371455042224581246202420972737840E1L,
+           -5.238523121205561042771939008061958820811E-1L,
+        ];
+        static immutable T[6] Q = [
+           -9.516813471998079611319047060563358064497E4L,
+            3.964866271411091674556850458227710004570E4L,
+           -7.207678383830091850230366618190187434796E3L,
+            7.206038318724600171970199625081491823079E2L,
+           -4.002027679107076077238836622982900945173E1L,
+            1.0
+        ];
 
-            enum real OF =  1.1356523406294143949492E4L;
-            enum real UF = -4.5054566736396445112120088E1L;
-        }
+        enum T OF =  1.1356523406294143949492E4L;
+        enum T UF = -4.5054566736396445112120088E1L;
+    }
+    else static if (realFormat == RealFormat.ieeeDouble)
+    {
+        static immutable T[3] P = [
+            9.9999999999999999991025E-1,
+            3.0299440770744196129956E-2,
+            1.2617719307481059087798E-4,
+        ];
+        static immutable T[4] Q = [
+            2.0000000000000000000897E0,
+            2.2726554820815502876593E-1,
+            2.5244834034968410419224E-3,
+            3.0019850513866445504159E-6,
+        ];
+    }
+    else
+        static assert(0, "no coefficients for expm1()");
 
+    static if (realFormat == RealFormat.ieeeDouble) // special case for double precision
+    {
+        if (x < -0.5 || x > 0.5)
+            return exp(x) - 1.0;
+        if (x == 0.0)
+            return x;
 
+        const T xx = x * x;
+        x = x * poly(xx, P);
+        x = x / (poly(xx, Q) - x);
+        return x + x;
+    }
+    else
+    {
         // C1 + C2 = LN2.
-        enum real C1 = 6.9314575195312500000000E-1L;
-        enum real C2 = 1.428606820309417232121458176568075500134E-6L;
+        enum T C1 = 6.9314575195312500000000E-1L;
+        enum T C2 = 1.428606820309417232121458176568075500134E-6L;
 
         // Special cases.
         if (x > OF)
             return real.infinity;
-        if (x == 0.0)
+        if (x == cast(T) 0.0)
             return x;
         if (x < UF)
             return -1.0;
 
         // Express x = LN2 (n + remainder), remainder not exceeding 1/2.
-        int n = cast(int) floor(0.5 + x / LN2);
+        int n = cast(int) floor((cast(T) 0.5) + x / cast(T) LN2);
         x -= n * C1;
         x -= n * C2;
 
         // Rational approximation:
         //  exp(x) - 1 = x + 0.5 x^^2 + x^^3 P(x) / Q(x)
-        real px = x * poly(x, P);
-        real qx = poly(x, Q);
-        const real xx = x * x;
-        qx = x + (0.5 * xx + xx * px / qx);
+        T px = x * poly(x, P);
+        T qx = poly(x, Q);
+        const T xx = x * x;
+        qx = x + ((cast(T) 0.5) * xx + xx * px / qx);
 
         // We have qx = exp(remainder LN2) - 1, so:
         //  exp(x) - 1 = 2^^n (qx + 1) - 1 = 2^^n qx + 2^^n - 1.
-        px = ldexp(1.0, n);
-        x = px * qx + (px - 1.0);
+        px = ldexp(cast(T) 1.0, n);
+        x = px * qx + (px - cast(T) 1.0);
 
         return x;
     }
 }
 
-///
-@safe unittest
+@safe @nogc nothrow unittest
 {
-    assert(isIdentical(expm1(0.0), 0.0));
-    assert(expm1(1.0).feqrel(1.71828) > 16);
-    assert(expm1(2.0).feqrel(6.3890) > 16);
+    static void testExpm1(T)()
+    {
+        // NaN
+        assert(isNaN(expm1(cast(T) T.nan)));
+
+        static immutable T[] xs = [ -2, -0.75, -0.3, 0.0, 0.1, 0.2, 0.5, 1.0 ];
+        foreach (x; xs)
+        {
+            const T e = expm1(x);
+            const T r = exp(x) - 1;
+
+            //printf("expm1(%Lg) = %Lg, should approximately be %Lg\n", cast(real) x, cast(real) e, cast(real) r);
+            assert(approxEqual(r, e));
+        }
+    }
+
+    import std.meta : AliasSeq;
+    foreach (T; AliasSeq!(real, double))
+        testExpm1!T();
 }
 
 /**

From adc30e77569a0e20a52228e8f1d0c663c3469036 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Thu, 15 Mar 2018 23:38:37 +0100
Subject: [PATCH 06/12] Implement std.math.exp2() for single- and
 double-precision

---
 std/math.d | 161 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 124 insertions(+), 37 deletions(-)

diff --git a/std/math.d b/std/math.d
index 3e9383775a4..1d57b19d6f7 100644
--- a/std/math.d
+++ b/std/math.d
@@ -2195,7 +2195,7 @@ real exp(real x) @trusted pure nothrow @nogc // TODO: @safe
         // (This is valid because the overflow & underflow limits for exp
         // and exp2 are so similar).
         if (!__ctfe)
-            return exp2(LOG2E*x);
+            return exp2Asm(LOG2E*x);
     }
     else version(D_InlineAsm_X86_64)
     {
@@ -2203,7 +2203,7 @@ real exp(real x) @trusted pure nothrow @nogc // TODO: @safe
         // (This is valid because the overflow & underflow limits for exp
         // and exp2 are so similar).
         if (!__ctfe)
-            return exp2(LOG2E*x);
+            return exp2Asm(LOG2E*x);
     }
     return expImpl(x);
 }
@@ -2837,22 +2837,22 @@ private T expm1Impl(T)(T x) @safe pure nothrow @nogc
  *    $(TR $(TD $(NAN))        $(TD $(NAN))    )
  *  )
  */
-pragma(inline, true)
-real exp2(real x) @nogc @trusted pure nothrow
+real exp2(real x) @nogc @trusted pure nothrow // TODO: @safe
 {
     version(InlineAsm_X86_Any)
     {
         if (!__ctfe)
             return exp2Asm(x);
-        else
-            return exp2Impl(x);
-    }
-    else
-    {
-        return exp2Impl(x);
     }
+    return exp2Impl(x);
 }
 
+/// ditto
+double exp2(double x) @nogc @safe pure nothrow { return exp2Impl(x); }
+
+/// ditto
+float exp2(float x) @nogc @safe pure nothrow { return exp2Impl(x); }
+
 ///
 @safe unittest
 {
@@ -2861,6 +2861,16 @@ real exp2(real x) @nogc @trusted pure nothrow
     assert(exp2(8.0).feqrel(256.0) > 16);
 }
 
+@safe unittest
+{
+    version(CRuntime_Microsoft) {} else // aexp2/exp2f/exp2l not implemented
+    {
+        assert( core.stdc.math.exp2f(0.0f) == 1 );
+        assert( core.stdc.math.exp2 (0.0)  == 1 );
+        assert( core.stdc.math.exp2l(0.0L) == 1 );
+    }
+}
+
 version(InlineAsm_X86_Any)
 private real exp2Asm(real x) @nogc @trusted pure nothrow
 {
@@ -3055,12 +3065,13 @@ L_was_nan:
         static assert(0);
 }
 
-private real exp2Impl(real x) @nogc @trusted pure nothrow
+private T exp2Impl(T)(T x) @nogc @safe pure nothrow
 {
     // Coefficients for exp2(x)
-    static if (floatTraits!real.realFormat == RealFormat.ieeeQuadruple)
+    enum realFormat = floatTraits!T.realFormat;
+    static if (realFormat == RealFormat.ieeeQuadruple)
     {
-        static immutable real[5] P = [
+        static immutable T[5] P = [
             9.079594442980146270952372234833529694788E12L,
             1.530625323728429161131811299626419117557E11L,
             5.677513871931844661829755443994214173883E8L,
@@ -3068,7 +3079,7 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow
             1.587171580015525194694938306936721666031E2L
         ];
 
-        static immutable real[6] Q = [
+        static immutable T[6] Q = [
             2.619817175234089411411070339065679229869E13L,
             1.490560994263653042761789432690793026977E12L,
             1.092141473886177435056423606755843616331E10L,
@@ -3077,24 +3088,50 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow
             1.0
         ];
     }
-    else
+    else static if (realFormat == RealFormat.ieeeExtended)
     {
-        static immutable real[3] P = [
+        static immutable T[3] P = [
             2.0803843631901852422887E6L,
             3.0286971917562792508623E4L,
             6.0614853552242266094567E1L,
         ];
-        static immutable real[4] Q = [
+        static immutable T[4] Q = [
             6.0027204078348487957118E6L,
             3.2772515434906797273099E5L,
             1.7492876999891839021063E3L,
             1.0000000000000000000000E0L,
         ];
     }
+    else static if (realFormat == RealFormat.ieeeDouble)
+    {
+        static immutable T[3] P = [
+            1.51390680115615096133E3L,
+            2.02020656693165307700E1L,
+            2.30933477057345225087E-2L,
+        ];
+        static immutable T[3] Q = [
+            4.36821166879210612817E3L,
+            2.33184211722314911771E2L,
+            1.00000000000000000000E0L,
+        ];
+    }
+    else static if (realFormat == RealFormat.ieeeSingle)
+    {
+        static immutable T[6] P = [
+            6.931472028550421E-001L,
+            2.402264791363012E-001L,
+            5.550332471162809E-002L,
+            9.618437357674640E-003L,
+            1.339887440266574E-003L,
+            1.535336188319500E-004L,
+        ];
+    }
+    else
+        static assert(0, "no coefficients for exp2()");
 
     // Overflow and Underflow limits.
-    enum real OF =  16_384.0L;
-    enum real UF = -16_382.0L;
+    enum T OF = T.max_exp;
+    enum T UF = T.min_exp - 1;
 
     // Special cases.
     if (isNaN(x))
@@ -3104,16 +3141,40 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow
     if (x < UF)
         return 0.0;
 
-    // Separate into integer and fractional parts.
-    int n = cast(int) floor(x + 0.5);
-    x -= n;
+    static if (realFormat == RealFormat.ieeeSingle) // special case for single precision
+    {
+        // The following is necessary because range reduction blows up.
+        if (x == 0.0f)
+            return 1.0f;
+
+        // Separate into integer and fractional parts.
+        const T i = floor(x);
+        int n = cast(int) i;
+        x -= i;
+        if (x > 0.5f)
+        {
+            n += 1;
+            x -= 1.0f;
+        }
+
+        // Rational approximation:
+        //  exp2(x) = 1.0 + x P(x)
+        x = 1.0f + x * poly(x, P);
+    }
+    else
+    {
+        // Separate into integer and fractional parts.
+        const T i = floor(x + cast(T) 0.5);
+        int n = cast(int) i;
+        x -= i;
 
-    // Rational approximation:
-    //  exp2(x) = 1.0 + 2x P(x^^2) / (Q(x^^2) - P(x^^2))
-    const real xx = x * x;
-    const real px = x * poly(xx, P);
-    x = px / (poly(xx, Q) - px);
-    x = 1.0 + ldexp(x, 1);
+        // Rational approximation:
+        //  exp2(x) = 1.0 + 2x P(x^^2) / (Q(x^^2) - P(x^^2))
+        const T xx = x * x;
+        const T px = x * poly(xx, P);
+        x = px / (poly(xx, Q) - px);
+        x = (cast(T) 1.0) + ldexp(x, 1);
+    }
 
     // Scale by power of 2.
     x = ldexp(x, n);
@@ -3121,22 +3182,48 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow
     return x;
 }
 
-///
-@safe unittest
+@safe @nogc nothrow unittest
 {
     assert(feqrel(exp2(0.5L), SQRT2) >= real.mant_dig -1);
     assert(exp2(8.0L) == 256.0);
     assert(exp2(-9.0L)== 1.0L/512.0);
-}
 
-@safe unittest
-{
-    version(CRuntime_Microsoft) {} else // aexp2/exp2f/exp2l not implemented
+    static void testExp2(T)()
     {
-        assert( core.stdc.math.exp2f(0.0f) == 1 );
-        assert( core.stdc.math.exp2 (0.0)  == 1 );
-        assert( core.stdc.math.exp2l(0.0L) == 1 );
+        // NaN
+        const T specialNaN = NaN(0x0123L);
+        assert(isIdentical(exp2(specialNaN), specialNaN));
+
+        // over-/underflow
+        enum T OF = T.max_exp;
+        enum T UF = T.min_exp - T.mant_dig;
+        assert(isIdentical(exp2(OF + 1), cast(T) T.infinity));
+        assert(isIdentical(exp2(UF - 1), cast(T) 0.0));
+
+        static immutable T[2][] vals =
+        [
+            // x, exp2(x)
+            [  0.0, 1.0 ],
+            [ -0.0, 1.0 ],
+            [  0.5, SQRT2 ],
+            [  8.0, 256.0 ],
+            [ -9.0, 1.0 / 512 ],
+        ];
+
+        foreach (ref val; vals)
+        {
+            const T x = val[0];
+            const T r = val[1];
+            const T e = exp2(x);
+
+            //printf("exp2(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) e, cast(real) r);
+            assert(approxEqual(r, e));
+        }
     }
+
+    import std.meta : AliasSeq;
+    foreach (T; AliasSeq!(real, double, float))
+        testExp2!T();
 }
 
 /*

From e1d987dda6e61596fbdf67ede4f0e76a12badbec Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Sat, 17 Mar 2018 14:48:24 +0100
Subject: [PATCH 07/12] Improve performance of std.math.poly()

Add a statically unrolled version for 1..10 coefficients.

Results on Linux x86_64 and with an Intel i5-3550 for:

  static immutable T[6] coeffs = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6 ];
  std.math.poly(cast(T) 0.43685, coeffs);

=> real:   ~1.2x faster
=> double: ~3.2x faster
=> float:  ~3.0x faster
---
 std/math.d | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/std/math.d b/std/math.d
index 1d57b19d6f7..a7a5ce3b39e 100644
--- a/std/math.d
+++ b/std/math.d
@@ -8438,6 +8438,20 @@ do
     }
 }
 
+/// ditto
+Unqual!(CommonType!(T1, T2)) poly(T1, T2, int N)(T1 x, ref const T2[N] A) @safe pure nothrow @nogc
+if (isFloatingPoint!T1 && isFloatingPoint!T2 && N > 0 && N <= 10)
+{
+    // statically unrolled version for up to 10 coefficients
+    typeof(return) r = A[N - 1];
+    static foreach (i; 1 .. N)
+    {
+        r *= x;
+        r += A[N - 1 - i];
+    }
+    return r;
+}
+
 ///
 @safe nothrow @nogc unittest
 {

From 63254627ca88986de01b7441b730a2dbe3ea5fc5 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Sat, 17 Mar 2018 20:55:00 +0100
Subject: [PATCH 08/12] Avoid ldexp() for static exponents

Prefer an arithmetic multiply by the according power-of-two constant, as
that's much faster. E.g., this makes tan(double) run 2.05x faster on an
Intel i5-3550.
---
 std/math.d | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/std/math.d b/std/math.d
index a7a5ce3b39e..245d12fcb05 100644
--- a/std/math.d
+++ b/std/math.d
@@ -1123,10 +1123,11 @@ private T tanImpl(T)(T x) @safe pure nothrow @nogc
     {
         T y = floor(x / cast(T) PI_4);
         // Strip high bits of integer part.
-        enum numHighBits = (realFormat == RealFormat.ieeeDouble ? 3 : 4);
-        T z = ldexp(y, -numHighBits);
+        enum T highBitsFactor = (realFormat == RealFormat.ieeeDouble ? 0x1p3 : 0x1p4);
+        enum T highBitsInv = 1.0 / highBitsFactor;
+        T z = y * highBitsInv;
         // Compute y - 2^numHighBits * (y / 2^numHighBits).
-        z = y - ldexp(floor(z), numHighBits);
+        z = y - highBitsFactor * floor(z);
 
         // Integer and fraction part modulo one octant.
         int j = cast(int)(z);
@@ -2344,7 +2345,7 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc
         xx = x * x;
         const T px = x * poly(xx, P);
         x = px / (poly(xx, Q) - px);
-        x = (cast(T) 1.0) + ldexp(x, 1);
+        x = (cast(T) 1.0) + (cast(T) 2.0) * x;
     }
 
     // Scale by power of 2.
@@ -3173,7 +3174,7 @@ private T exp2Impl(T)(T x) @nogc @safe pure nothrow
         const T xx = x * x;
         const T px = x * poly(xx, P);
         x = px / (poly(xx, Q) - px);
-        x = (cast(T) 1.0) + ldexp(x, 1);
+        x = (cast(T) 1.0) + (cast(T) 2.0) * x;
     }
 
     // Scale by power of 2.

From 02883860014a86de6c68e2e69a8fd5faa5cb56a0 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Thu, 22 Mar 2018 20:02:23 +0100
Subject: [PATCH 09/12] Streamline Cephes implementations for single precision

I.e., use poly() as for the other precisions (unlike the C source).
With enabled compiler optimizations, it should be inlined anyway.
---
 std/math.d | 82 ++++++++++++++++++++++++++----------------------------
 1 file changed, 40 insertions(+), 42 deletions(-)

diff --git a/std/math.d b/std/math.d
index 245d12fcb05..da8315b000e 100644
--- a/std/math.d
+++ b/std/math.d
@@ -1090,9 +1090,18 @@ private T tanImpl(T)(T x) @safe pure nothrow @nogc
     }
     else static if (realFormat == RealFormat.ieeeSingle)
     {
+        static immutable T[6] P = [
+            3.33331568548E-1,
+            1.33387994085E-1,
+            5.34112807005E-2,
+            2.44301354525E-2,
+            3.11992232697E-3,
+            9.38540185543E-3,
+        ];
+
         enum T P1 = 0.78515625;
-        enum T P2 = 2.4187564849853515625e-4;
-        enum T P3 = 3.77489497744594108e-8;
+        enum T P2 = 2.4187564849853515625E-4;
+        enum T P3 = 3.77489497744594108E-8;
     }
     else
         static assert(0, "no coefficients for tan()");
@@ -1148,19 +1157,9 @@ private T tanImpl(T)(T x) @safe pure nothrow @nogc
     if (zz > zzThreshold)
     {
         static if (realFormat == RealFormat.ieeeSingle)
-        {
-            y = ((((( 9.38540185543E-3f  * zz
-                    + 3.11992232697E-3f) * zz
-                    + 2.44301354525E-2f) * zz
-                    + 5.34112807005E-2f) * zz
-                    + 1.33387994085E-1f) * zz
-                    + 3.33331568548E-1f) * zz * z
-                + z;
-        }
+            y = z + z * (zz * poly(zz, P));
         else
-        {
             y = z + z * (zz * poly(zz, P) / poly(zz, Q));
-        }
     }
     else
         y = z;
@@ -1393,26 +1392,31 @@ private T atanImpl(T)(T x) @safe pure nothrow @nogc
     else static if (realFormat == RealFormat.ieeeDouble)
     {
         static immutable T[5] P = [
-            -6.485021904942025371773E1L,
-            -1.228866684490136173410E2L,
-            -7.500855792314704667340E1L,
-            -1.615753718733365076637E1L,
-            -8.750608600031904122785E-1L,
+           -6.485021904942025371773E1L,
+           -1.228866684490136173410E2L,
+           -7.500855792314704667340E1L,
+           -1.615753718733365076637E1L,
+           -8.750608600031904122785E-1L,
         ];
         static immutable T[6] Q = [
-             1.945506571482613964425E2L,
-             4.853903996359136964868E2L,
-             4.328810604912902668951E2L,
-             1.650270098316988542046E2L,
-             2.485846490142306297962E1L,
-             1.000000000000000000000E0L,
+            1.945506571482613964425E2L,
+            4.853903996359136964868E2L,
+            4.328810604912902668951E2L,
+            1.650270098316988542046E2L,
+            2.485846490142306297962E1L,
+            1.000000000000000000000E0L,
         ];
 
         enum T MOREBITS = 6.123233995736765886130E-17L;
     }
     else static if (realFormat == RealFormat.ieeeSingle)
     {
-        // inlined below
+        static immutable T[4] P = [
+           -3.33329491539E-1,
+            1.99777106478E-1,
+           -1.38776856032E-1,
+            8.05374449538E-2,
+        ];
     }
     else
         static assert(0, "no coefficients for atan()");
@@ -1486,17 +1490,9 @@ private T atanImpl(T)(T x) @safe pure nothrow @nogc
         // Rational form in x^^2.
         const T z = x * x;
         static if (realFormat == RealFormat.ieeeSingle)
-        {
-            y += ((( 8.05374449538e-2f * z
-                   - 1.38776856032E-1f) * z
-                   + 1.99777106478E-1f) * z
-                   - 3.33329491539E-1f) * z * x
-                 + x;
-        }
+            y += poly(z, P) * z * x + x;
         else
-        {
             y = y + (poly(z, P) / poly(z, Q)) * z * x + x;
-        }
     }
 
     return (sign) ? -y : y;
@@ -2227,6 +2223,15 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc
     alias F = floatTraits!T;
     static if (F.realFormat == RealFormat.ieeeSingle)
     {
+        static immutable T[6] P = [
+            5.0000001201E-1,
+            1.6666665459E-1,
+            4.1665795894E-2,
+            8.3334519073E-3,
+            1.3981999507E-3,
+            1.9875691500E-4,
+        ];
+
         enum T C1 = 0.693359375;
         enum T C2 = -2.12194440e-4;
 
@@ -2329,14 +2334,7 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc
     static if (F.realFormat == RealFormat.ieeeSingle)
     {
         xx = x * x;
-        x = ((((( 1.9875691500E-4f  * x
-                + 1.3981999507E-3f) * x
-                + 8.3334519073E-3f) * x
-                + 4.1665795894E-2f) * x
-                + 1.6666665459E-1f) * x
-                + 5.0000001201E-1f) * xx
-                + x
-                + 1.0f;
+        x = poly(x, P) * xx + x + 1.0f;
     }
     else
     {

From 21468e788b9d9c2de21793e1998197e3491b9403 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Wed, 2 May 2018 21:35:12 +0200
Subject: [PATCH 10/12] Forward to (builtin) real version for CTFE in
 single/double precision overloads

---
 std/math.d | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/std/math.d b/std/math.d
index da8315b000e..65de35ba921 100644
--- a/std/math.d
+++ b/std/math.d
@@ -929,10 +929,10 @@ real tan(real x) @trusted pure nothrow @nogc // TODO: @safe
 }
 
 /// ditto
-double tan(double x) @safe pure nothrow @nogc { return tanImpl(x); }
+double tan(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) tan(cast(real) x) : tanImpl(x); }
 
 /// ditto
-float tan(float x) @safe pure nothrow @nogc { return tanImpl(x); }
+float tan(float x) @safe pure nothrow @nogc { return __ctfe ? cast(float) tan(cast(real) x) : tanImpl(x); }
 
 ///
 @safe unittest
@@ -1330,10 +1330,10 @@ real atan(real x) @safe pure nothrow @nogc
 }
 
 /// ditto
-double atan(double x) @safe pure nothrow @nogc { return atanImpl(x); }
+double atan(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) atan(cast(real) x) : atanImpl(x); }
 
 /// ditto
-float atan(float x) @safe pure nothrow @nogc { return atanImpl(x); }
+float atan(float x) @safe pure nothrow @nogc { return __ctfe ? cast(float) atan(cast(real) x) : atanImpl(x); }
 
 ///
 @safe unittest
@@ -1582,13 +1582,13 @@ real atan2(real y, real x) @trusted pure nothrow @nogc // TODO: @safe
 /// ditto
 double atan2(double y, double x) @safe pure nothrow @nogc
 {
-    return atan2Impl(y, x);
+    return __ctfe ? cast(double) atan2(cast(real) y, cast(real) x) : atan2Impl(y, x);
 }
 
 /// ditto
 float atan2(float y, float x) @safe pure nothrow @nogc
 {
-    return atan2Impl(y, x);
+    return __ctfe ? cast(float) atan2(cast(real) y, cast(real) x) : atan2Impl(y, x);
 }
 
 ///
@@ -2206,10 +2206,10 @@ real exp(real x) @trusted pure nothrow @nogc // TODO: @safe
 }
 
 /// ditto
-double exp(double x) @safe pure nothrow @nogc { return expImpl(x); }
+double exp(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) exp(cast(real) x) : expImpl(x); }
 
 /// ditto
-float exp(float x) @safe pure nothrow @nogc { return expImpl(x); }
+float exp(float x) @safe pure nothrow @nogc { return __ctfe ? cast(float) exp(cast(real) x) : expImpl(x); }
 
 ///
 @safe unittest
@@ -2508,7 +2508,7 @@ real expm1(real x) @trusted pure nothrow @nogc // TODO: @safe
 }
 
 /// ditto
-double expm1(double x) @safe pure nothrow @nogc { return expm1Impl(x); }
+double expm1(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) expm1(cast(real) x) : expm1Impl(x); }
 
 ///
 @safe unittest
@@ -2847,10 +2847,10 @@ real exp2(real x) @nogc @trusted pure nothrow // TODO: @safe
 }
 
 /// ditto
-double exp2(double x) @nogc @safe pure nothrow { return exp2Impl(x); }
+double exp2(double x) @nogc @safe pure nothrow { return __ctfe ? cast(double) exp2(cast(real) x) : exp2Impl(x); }
 
 /// ditto
-float exp2(float x) @nogc @safe pure nothrow { return exp2Impl(x); }
+float exp2(float x) @nogc @safe pure nothrow { return __ctfe ? cast(float) exp2(cast(real) x) : exp2Impl(x); }
 
 ///
 @safe unittest

From 77376fcf5653daa8751436b7ead1934352560764 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Wed, 2 May 2018 21:39:24 +0200
Subject: [PATCH 11/12] Add std.math.expm1(float) overload (using double
 precision)

---
 std/math.d | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/std/math.d b/std/math.d
index 65de35ba921..edbd7e6ae1a 100644
--- a/std/math.d
+++ b/std/math.d
@@ -2508,7 +2508,17 @@ real expm1(real x) @trusted pure nothrow @nogc // TODO: @safe
 }
 
 /// ditto
-double expm1(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) expm1(cast(real) x) : expm1Impl(x); }
+double expm1(double x) @safe pure nothrow @nogc
+{
+    return __ctfe ? cast(double) expm1(cast(real) x) : expm1Impl(x);
+}
+
+/// ditto
+float expm1(float x) @safe pure nothrow @nogc
+{
+    // no single-precision version in Cephes => use double precision
+    return __ctfe ? cast(float) expm1(cast(real) x) : cast(float) expm1Impl(cast(double) x);
+}
 
 ///
 @safe unittest

From 97a491b7c28d521d0337acb0b0afacc861559825 Mon Sep 17 00:00:00 2001
From: Martin <noone@nowhere.com>
Date: Wed, 16 May 2018 02:32:42 +0200
Subject: [PATCH 12/12] Add changelog entry

---
 changelog/math_float_double_implementations.dd | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 changelog/math_float_double_implementations.dd

diff --git a/changelog/math_float_double_implementations.dd b/changelog/math_float_double_implementations.dd
new file mode 100644
index 00000000000..81c3690afe9
--- /dev/null
+++ b/changelog/math_float_double_implementations.dd
@@ -0,0 +1,15 @@
+Single- and double-precision implementations for (a)tan and exp function families
+
+The `float` and `double` overloads of
+$(REF atan, std, math), $(REF atan2, std, math), $(REF tan, std, math),
+$(REF exp, std, math), $(REF expm1, std, math) and $(REF exp2, std, math)
+previously forwarded to the `real` implementation (inline assembly) and
+now got proper 'software' implementations in the corresponding precision.
+
+While this may result in a slowdown in some cases for DMD (especially for
+`exp()` and `exp2()`), the overall speed-up factor for LDC is > 3, for
+both `double` and `float`.
+
+This also implies less precise results, especially in single-precision,
+so if your code depended on more accurate results via 80-bit intermediate
+precision, you'll have to cast the argument(s) explicitly now.