From ef21892f52a73c7956f312089f84ccb41a48bb3d Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 12 Mar 2018 23:10:34 +0100 Subject: [PATCH 01/12] Implement std.math.atan() for single- and double-precision And make the x87 `real` version CTFE-able. --- std/math.d | 275 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 197 insertions(+), 78 deletions(-) diff --git a/std/math.d b/std/math.d index 5f1ac25a642..e6c736e3d7c 100644 --- a/std/math.d +++ b/std/math.d @@ -1260,113 +1260,232 @@ real atan(real x) @safe pure nothrow @nogc { version(InlineAsm_X86_Any) { - return atan2(x, 1.0L); + if (!__ctfe) + return atan2(x, 1.0L); + } + return atanImpl(x); +} + +/// ditto +double atan(double x) @safe pure nothrow @nogc { return atanImpl(x); } + +/// ditto +float atan(float x) @safe pure nothrow @nogc { return atanImpl(x); } + +/// +@safe unittest +{ + assert(isIdentical(atan(0.0), 0.0)); + assert(atan(sqrt(3.0)).approxEqual(PI / 3)); +} + +private T atanImpl(T)(T x) @safe pure nothrow @nogc +{ + // Coefficients for atan(x) + enum realFormat = floatTraits!T.realFormat; + static if (realFormat == RealFormat.ieeeQuadruple) + { + static immutable T[9] P = [ + -6.880597774405940432145577545328795037141E2L, + -2.514829758941713674909996882101723647996E3L, + -3.696264445691821235400930243493001671932E3L, + -2.792272753241044941703278827346430350236E3L, + -1.148164399808514330375280133523543970854E3L, + -2.497759878476618348858065206895055957104E2L, + -2.548067867495502632615671450650071218995E1L, + -8.768423468036849091777415076702113400070E-1L, + -6.635810778635296712545011270011752799963E-4L + ]; + static immutable T[9] Q = [ + 2.064179332321782129643673263598686441900E3L, + 8.782996876218210302516194604424986107121E3L, + 1.547394317752562611786521896296215170819E4L, + 1.458510242529987155225086911411015961174E4L, + 7.928572347062145288093560392463784743935E3L, + 2.494680540950601626662048893678584497900E3L, + 4.308348370818927353321556740027020068897E2L, + 3.566239794444800849656497338030115886153E1L, + 1.0 + ]; + } + else static if (realFormat == RealFormat.ieeeExtended) + { + static immutable T[5] P = [ + -5.0894116899623603312185E1L, + -9.9988763777265819915721E1L, + -6.3976888655834347413154E1L, + -1.4683508633175792446076E1L, + -8.6863818178092187535440E-1L, + ]; + static immutable T[6] Q = [ + 1.5268235069887081006606E2L, + 3.9157570175111990631099E2L, + 3.6144079386152023162701E2L, + 1.4399096122250781605352E2L, + 2.2981886733594175366172E1L, + 1.0000000000000000000000E0L, + ]; + } + else static if (realFormat == RealFormat.ieeeDouble) + { + static immutable T[5] P = [ + -6.485021904942025371773E1L, + -1.228866684490136173410E2L, + -7.500855792314704667340E1L, + -1.615753718733365076637E1L, + -8.750608600031904122785E-1L, + ]; + static immutable T[6] Q = [ + 1.945506571482613964425E2L, + 4.853903996359136964868E2L, + 4.328810604912902668951E2L, + 1.650270098316988542046E2L, + 2.485846490142306297962E1L, + 1.000000000000000000000E0L, + ]; + + enum T MOREBITS = 6.123233995736765886130E-17L; + } + else static if (realFormat == RealFormat.ieeeSingle) + { + // inlined below } else + static assert(0, "no coefficients for atan()"); + + // tan(PI/8) + enum T TAN_PI_8 = 0.414213562373095048801688724209698078569672L; + // tan(3 * PI/8) + enum T TAN3_PI_8 = 2.414213562373095048801688724209698078569672L; + + // Special cases. + if (x == cast(T) 0.0) + return x; + if (isInfinity(x)) + return copysign(cast(T) PI_2, x); + + // Make argument positive but save the sign. + bool sign = false; + if (signbit(x)) { - // Coefficients for atan(x) - static if (floatTraits!real.realFormat == RealFormat.ieeeQuadruple) + sign = true; + x = -x; + } + + static if (realFormat == RealFormat.ieeeDouble) // special case for double precision + { + short flag = 0; + T y; + if (x > TAN3_PI_8) { - static immutable real[9] P = [ - -6.880597774405940432145577545328795037141E2L, - -2.514829758941713674909996882101723647996E3L, - -3.696264445691821235400930243493001671932E3L, - -2.792272753241044941703278827346430350236E3L, - -1.148164399808514330375280133523543970854E3L, - -2.497759878476618348858065206895055957104E2L, - -2.548067867495502632615671450650071218995E1L, - -8.768423468036849091777415076702113400070E-1L, - -6.635810778635296712545011270011752799963E-4L - ]; - static immutable real[9] Q = [ - 2.064179332321782129643673263598686441900E3L, - 8.782996876218210302516194604424986107121E3L, - 1.547394317752562611786521896296215170819E4L, - 1.458510242529987155225086911411015961174E4L, - 7.928572347062145288093560392463784743935E3L, - 2.494680540950601626662048893678584497900E3L, - 4.308348370818927353321556740027020068897E2L, - 3.566239794444800849656497338030115886153E1L, - 1.0 - ]; + y = PI_2; + flag = 1; + x = -(1.0 / x); } - else + else if (x <= 0.66) { - static immutable real[5] P = [ - -5.0894116899623603312185E1L, - -9.9988763777265819915721E1L, - -6.3976888655834347413154E1L, - -1.4683508633175792446076E1L, - -8.6863818178092187535440E-1L, - ]; - static immutable real[6] Q = [ - 1.5268235069887081006606E2L, - 3.9157570175111990631099E2L, - 3.6144079386152023162701E2L, - 1.4399096122250781605352E2L, - 2.2981886733594175366172E1L, - 1.0000000000000000000000E0L, - ]; + y = 0.0; } - - // tan(PI/8) - enum real TAN_PI_8 = 0.414213562373095048801688724209698078569672L; - // tan(3 * PI/8) - enum real TAN3_PI_8 = 2.414213562373095048801688724209698078569672L; - - // Special cases. - if (x == 0.0) - return x; - if (isInfinity(x)) - return copysign(PI_2, x); - - // Make argument positive but save the sign. - bool sign = false; - if (signbit(x)) + else { - sign = true; - x = -x; + y = PI_4; + flag = 2; + x = (x - 1.0)/(x + 1.0); } + T z = x * x; + z = z * poly(z, P) / poly(z, Q); + z = x * z + x; + if (flag == 2) + z += 0.5 * MOREBITS; + else if (flag == 1) + z += MOREBITS; + y = y + z; + } + else + { // Range reduction. - real y; + T y; if (x > TAN3_PI_8) { y = PI_2; - x = -(1.0 / x); + x = -((cast(T) 1.0) / x); } else if (x > TAN_PI_8) { y = PI_4; - x = (x - 1.0)/(x + 1.0); + x = (x - cast(T) 1.0)/(x + cast(T) 1.0); } else y = 0.0; // Rational form in x^^2. - const real z = x * x; - y = y + (poly(z, P) / poly(z, Q)) * z * x + x; - - return (sign) ? -y : y; + const T z = x * x; + static if (realFormat == RealFormat.ieeeSingle) + { + y += ((( 8.05374449538e-2f * z + - 1.38776856032E-1f) * z + + 1.99777106478E-1f) * z + - 3.33329491539E-1f) * z * x + + x; + } + else + { + y = y + (poly(z, P) / poly(z, Q)) * z * x + x; + } } -} - -/// ditto -double atan(double x) @safe pure nothrow @nogc { return atan(cast(real) x); } -/// ditto -float atan(float x) @safe pure nothrow @nogc { return atan(cast(real) x); } - -/// -@safe unittest -{ - assert(isIdentical(atan(0.0), 0.0)); - assert(atan(sqrt(3.0)).approxEqual(PI / 3)); + return (sign) ? -y : y; } @safe @nogc nothrow unittest { - assert(equalsDigit(atan(std.math.sqrt(3.0)), PI / 3, useDigits)); + static void testAtan(T)() + { + // ±0 + const T zero = 0.0; + assert(isIdentical(atan(zero), zero)); + assert(isIdentical(atan(-zero), -zero)); + // ±∞ + const T inf = T.infinity; + assert(approxEqual(atan(inf), cast(T) PI_2)); + assert(approxEqual(atan(-inf), cast(T) -PI_2)); + // NaN + const T specialNaN = NaN(0x0123L); + assert(isIdentical(atan(specialNaN), specialNaN)); + + static immutable T[2][] vals = + [ + // x, atan(x) + [ 0.25, 0.2449786631 ], + [ 0.5, 0.4636476090 ], + [ 1, PI_4 ], + [ 1.5, 0.9827937232 ], + [ 10, 1.4711276743 ], + ]; + + foreach (ref val; vals) + { + T x = val[0]; + T r = val[1]; + T a = atan(x); + + //printf("atan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) a, cast(real) r); + assert(approxEqual(r, a)); + + x = -x; + r = -r; + a = atan(x); + //printf("atan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) a, cast(real) r); + assert(approxEqual(r, a)); + } + } + + import std.meta : AliasSeq; + foreach (T; AliasSeq!(real, double, float)) + testAtan!T(); + + assert(equalsDigit(atan(std.math.sqrt(3.0L)), PI / 3, useDigits)); } /*************** From 83e0a95468618559c0885332ed660d0f58a520c9 Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 13 Mar 2018 22:22:19 +0100 Subject: [PATCH 02/12] Implement std.math.atan2() for single- and double-precision And make the x87 `real` version CTFE-able. --- std/math.d | 223 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 151 insertions(+), 72 deletions(-) diff --git a/std/math.d b/std/math.d index e6c736e3d7c..25707e6c142 100644 --- a/std/math.d +++ b/std/math.d @@ -1261,7 +1261,7 @@ real atan(real x) @safe pure nothrow @nogc version(InlineAsm_X86_Any) { if (!__ctfe) - return atan2(x, 1.0L); + return atan2Asm(x, 1.0L); } return atanImpl(x); } @@ -1509,91 +1509,26 @@ private T atanImpl(T)(T x) @safe pure nothrow @nogc * $(TR $(TD $(PLUSMN)$(INFIN)) $(TD -$(INFIN)) $(TD $(PLUSMN)3$(PI)/4)) * ) */ -real atan2(real y, real x) @trusted pure nothrow @nogc +real atan2(real y, real x) @trusted pure nothrow @nogc // TODO: @safe { version(InlineAsm_X86_Any) { - version (Win64) - { - asm pure nothrow @nogc { - naked; - fld real ptr [RDX]; // y - fld real ptr [RCX]; // x - fpatan; - ret; - } - } - else - { - asm pure nothrow @nogc { - fld y; - fld x; - fpatan; - } - } - } - else - { - // Special cases. - if (isNaN(x) || isNaN(y)) - return real.nan; - if (y == 0.0) - { - if (x >= 0 && !signbit(x)) - return copysign(0, y); - else - return copysign(PI, y); - } - if (x == 0.0) - return copysign(PI_2, y); - if (isInfinity(x)) - { - if (signbit(x)) - { - if (isInfinity(y)) - return copysign(3*PI_4, y); - else - return copysign(PI, y); - } - else - { - if (isInfinity(y)) - return copysign(PI_4, y); - else - return copysign(0.0, y); - } - } - if (isInfinity(y)) - return copysign(PI_2, y); - - // Call atan and determine the quadrant. - real z = atan(y / x); - - if (signbit(x)) - { - if (signbit(y)) - z = z - PI; - else - z = z + PI; - } - - if (z == 0.0) - return copysign(z, y); - - return z; + if (!__ctfe) + return atan2Asm(y, x); } + return atan2Impl(y, x); } /// ditto double atan2(double y, double x) @safe pure nothrow @nogc { - return atan2(cast(real) y, cast(real) x); + return atan2Impl(y, x); } /// ditto float atan2(float y, float x) @safe pure nothrow @nogc { - return atan2(cast(real) y, cast(real) x); + return atan2Impl(y, x); } /// @@ -1602,8 +1537,152 @@ float atan2(float y, float x) @safe pure nothrow @nogc assert(atan2(1.0, sqrt(3.0)).approxEqual(PI / 6)); } +version(InlineAsm_X86_Any) +private real atan2Asm(real y, real x) @trusted pure nothrow @nogc +{ + version (Win64) + { + asm pure nothrow @nogc { + naked; + fld real ptr [RDX]; // y + fld real ptr [RCX]; // x + fpatan; + ret; + } + } + else + { + asm pure nothrow @nogc { + fld y; + fld x; + fpatan; + } + } +} + +private T atan2Impl(T)(T y, T x) @safe pure nothrow @nogc +{ + // Special cases. + if (isNaN(x) || isNaN(y)) + return T.nan; + if (y == cast(T) 0.0) + { + if (x >= 0 && !signbit(x)) + return copysign(0, y); + else + return copysign(cast(T) PI, y); + } + if (x == cast(T) 0.0) + return copysign(cast(T) PI_2, y); + if (isInfinity(x)) + { + if (signbit(x)) + { + if (isInfinity(y)) + return copysign(3 * cast(T) PI_4, y); + else + return copysign(cast(T) PI, y); + } + else + { + if (isInfinity(y)) + return copysign(cast(T) PI_4, y); + else + return copysign(cast(T) 0.0, y); + } + } + if (isInfinity(y)) + return copysign(cast(T) PI_2, y); + + // Call atan and determine the quadrant. + T z = atan(y / x); + + if (signbit(x)) + { + if (signbit(y)) + z = z - cast(T) PI; + else + z = z + cast(T) PI; + } + + if (z == cast(T) 0.0) + return copysign(z, y); + + return z; +} + @safe @nogc nothrow unittest { + static void testAtan2(T)() + { + // NaN + const T nan = T.nan; + assert(isNaN(atan2(nan, cast(T) 1))); + assert(isNaN(atan2(cast(T) 1, nan))); + + const T inf = T.infinity; + static immutable T[3][] vals = + [ + // y, x, atan2(y, x) + + // ±0 + [ 0.0, 1.0, 0.0 ], + [ -0.0, 1.0, -0.0 ], + [ 0.0, 0.0, 0.0 ], + [ -0.0, 0.0, -0.0 ], + [ 0.0, -1.0, PI ], + [ -0.0, -1.0, -PI ], + [ 0.0, -0.0, PI ], + [ -0.0, -0.0, -PI ], + [ 1.0, 0.0, PI_2 ], + [ 1.0, -0.0, PI_2 ], + [ -1.0, 0.0, -PI_2 ], + [ -1.0, -0.0, -PI_2 ], + + // ±∞ + [ 1.0, inf, 0.0 ], + [ -1.0, inf, -0.0 ], + [ 1.0, -inf, PI ], + [ -1.0, -inf, -PI ], + [ inf, 1.0, PI_2 ], + [ inf, -1.0, PI_2 ], + [ -inf, 1.0, -PI_2 ], + [ -inf, -1.0, -PI_2 ], + [ inf, inf, PI_4 ], + [ -inf, inf, -PI_4 ], + [ inf, -inf, 3 * PI_4 ], + [ -inf, -inf, -3 * PI_4 ], + + [ 1.0, 1.0, PI_4 ], + [ -2.0, 2.0, -PI_4 ], + [ 3.0, -3.0, 3 * PI_4 ], + [ -4.0, -4.0, -3 * PI_4 ], + + [ 0.75, 0.25, 1.249045772398 ], + [ -0.5, 0.375, -0.927295218002 ], + [ 0.5, -0.125, 1.815774989922 ], + [ -0.75, -0.5, -2.158798930342 ], + ]; + + foreach (ref val; vals) + { + const T y = val[0]; + const T x = val[1]; + const T r = val[2]; + const T a = atan2(y, x); + + //printf("atan2(%Lg, %Lg) = %Lg, should be %Lg\n", cast(real) y, cast(real) x, cast(real) a, cast(real) r); + if (r == 0) + assert(isIdentical(r, a)); // check sign + else + assert(approxEqual(r, a)); + } + } + + import std.meta : AliasSeq; + foreach (T; AliasSeq!(real, double, float)) + testAtan2!T(); + assert(equalsDigit(atan2(1.0L, std.math.sqrt(3.0L)), PI / 6, useDigits)); } From d3f5005568ea5fb7c19ed07c1914b55dd3b11b31 Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 13 Mar 2018 23:06:08 +0100 Subject: [PATCH 03/12] Implement std.math.tan() for single- and double-precision And make the x87 `real` version CTFE-able. --- std/math.d | 333 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 198 insertions(+), 135 deletions(-) diff --git a/std/math.d b/std/math.d index 25707e6c142..0c562080dd9 100644 --- a/std/math.d +++ b/std/math.d @@ -918,8 +918,32 @@ deprecated * $(TR $(TD $(PLUSMNINF)) $(TD $(NAN)) $(TD yes)) * ) */ +real tan(real x) @trusted pure nothrow @nogc // TODO: @safe +{ + version(InlineAsm_X86_Any) + { + if (!__ctfe) + return tanAsm(x); + } + return tanImpl(x); +} + +/// ditto +double tan(double x) @safe pure nothrow @nogc { return tanImpl(x); } + +/// ditto +float tan(float x) @safe pure nothrow @nogc { return tanImpl(x); } + +/// +@safe unittest +{ + assert(isIdentical(tan(0.0), 0.0)); + assert(tan(PI).approxEqual(0)); + assert(tan(PI / 3).approxEqual(sqrt(3.0))); +} -real tan(real x) @trusted pure nothrow @nogc +version(InlineAsm_X86_Any) +private real tanAsm(real x) @trusted pure nothrow @nogc { version(D_InlineAsm_X86) { @@ -1010,170 +1034,209 @@ Clear1: asm pure nothrow @nogc{ Lret: {} } else - { - enum realFormat = floatTraits!real.realFormat; + static assert(0); +} - // Coefficients for tan(x) and PI/4 split into three parts. - static if (realFormat == RealFormat.ieeeQuadruple) - { - static immutable real[6] P = [ - 2.883414728874239697964612246732416606301E10L, - -2.307030822693734879744223131873392503321E9L, - 5.160188250214037865511600561074819366815E7L, - -4.249691853501233575668486667664718192660E5L, - 1.272297782199996882828849455156962260810E3L, - -9.889929415807650724957118893791829849557E-1L - ]; - static immutable real[7] Q = [ - 8.650244186622719093893836740197250197602E10L - -4.152206921457208101480801635640958361612E10L, - 2.758476078803232151774723646710890525496E9L, - -5.733709132766856723608447733926138506824E7L, - 4.529422062441341616231663543669583527923E5L, - -1.317243702830553658702531997959756728291E3L, - 1.0 - ]; +private T tanImpl(T)(T x) @safe pure nothrow @nogc +{ + // Coefficients for tan(x) and PI/4 split into three parts. + enum realFormat = floatTraits!T.realFormat; + static if (realFormat == RealFormat.ieeeQuadruple) + { + static immutable T[6] P = [ + 2.883414728874239697964612246732416606301E10L, + -2.307030822693734879744223131873392503321E9L, + 5.160188250214037865511600561074819366815E7L, + -4.249691853501233575668486667664718192660E5L, + 1.272297782199996882828849455156962260810E3L, + -9.889929415807650724957118893791829849557E-1L + ]; + static immutable T[7] Q = [ + 8.650244186622719093893836740197250197602E10L + -4.152206921457208101480801635640958361612E10L, + 2.758476078803232151774723646710890525496E9L, + -5.733709132766856723608447733926138506824E7L, + 4.529422062441341616231663543669583527923E5L, + -1.317243702830553658702531997959756728291E3L, + 1.0 + ]; - enum real P1 = - 7.853981633974483067550664827649598009884357452392578125E-1L; - enum real P2 = - 2.8605943630549158983813312792950660807511260829685741796657E-18L; - enum real P3 = - 2.1679525325309452561992610065108379921905808E-35L; - } - else - { - static immutable real[3] P = [ - -1.7956525197648487798769E7L, - 1.1535166483858741613983E6L, - -1.3093693918138377764608E4L, - ]; - static immutable real[5] Q = [ - -5.3869575592945462988123E7L, - 2.5008380182335791583922E7L, - -1.3208923444021096744731E6L, - 1.3681296347069295467845E4L, - 1.0000000000000000000000E0L, - ]; + enum T P1 = + 7.853981633974483067550664827649598009884357452392578125E-1L; + enum T P2 = + 2.8605943630549158983813312792950660807511260829685741796657E-18L; + enum T P3 = + 2.1679525325309452561992610065108379921905808E-35L; + } + else static if (realFormat == RealFormat.ieeeExtended || + realFormat == RealFormat.ieeeDouble) + { + static immutable T[3] P = [ + -1.7956525197648487798769E7L, + 1.1535166483858741613983E6L, + -1.3093693918138377764608E4L, + ]; + static immutable T[5] Q = [ + -5.3869575592945462988123E7L, + 2.5008380182335791583922E7L, + -1.3208923444021096744731E6L, + 1.3681296347069295467845E4L, + 1.0000000000000000000000E0L, + ]; - enum real P1 = 7.853981554508209228515625E-1L; - enum real P2 = 7.946627356147928367136046290398E-9L; - enum real P3 = 3.061616997868382943065164830688E-17L; - } + enum T P1 = 7.853981554508209228515625E-1L; + enum T P2 = 7.946627356147928367136046290398E-9L; + enum T P3 = 3.061616997868382943065164830688E-17L; + } + else static if (realFormat == RealFormat.ieeeSingle) + { + enum T P1 = 0.78515625; + enum T P2 = 2.4187564849853515625e-4; + enum T P3 = 3.77489497744594108e-8; + } + else + static assert(0, "no coefficients for tan()"); - // Special cases. - if (x == 0.0 || isNaN(x)) - return x; - if (isInfinity(x)) - return real.nan; + // Special cases. + if (x == cast(T) 0.0 || isNaN(x)) + return x; + if (isInfinity(x)) + return T.nan; - // Make argument positive but save the sign. - bool sign = false; - if (signbit(x)) - { - sign = true; - x = -x; - } + // Make argument positive but save the sign. + bool sign = false; + if (signbit(x)) + { + sign = true; + x = -x; + } - // Compute x mod PI/4. - real y = floor(x / PI_4); + // Compute x mod PI/4. + static if (realFormat == RealFormat.ieeeSingle) + { + enum T FOPI = 4 / PI; + int j = cast(int) (FOPI * x); + T y = j; + T z; + } + else + { + T y = floor(x / cast(T) PI_4); // Strip high bits of integer part. enum numHighBits = (realFormat == RealFormat.ieeeDouble ? 3 : 4); - real z = ldexp(y, -numHighBits); + T z = ldexp(y, -numHighBits); // Compute y - 2^numHighBits * (y / 2^numHighBits). z = y - ldexp(floor(z), numHighBits); // Integer and fraction part modulo one octant. int j = cast(int)(z); + } - // Map zeros and singularities to origin. - if (j & 1) - { - j += 1; - y += 1.0; - } + // Map zeros and singularities to origin. + if (j & 1) + { + j += 1; + y += cast(T) 1.0; + } - z = ((x - y * P1) - y * P2) - y * P3; - const real zz = z * z; + z = ((x - y * P1) - y * P2) - y * P3; + const T zz = z * z; - enum zzThreshold = (realFormat == RealFormat.ieeeDouble ? 1.0e-14L : 1.0e-20L); - if (zz > zzThreshold) - y = z + z * (zz * poly(zz, P) / poly(zz, Q)); + enum T zzThreshold = (realFormat == RealFormat.ieeeSingle ? 1.0e-4L : + realFormat == RealFormat.ieeeDouble ? 1.0e-14L : 1.0e-20L); + if (zz > zzThreshold) + { + static if (realFormat == RealFormat.ieeeSingle) + { + y = ((((( 9.38540185543E-3f * zz + + 3.11992232697E-3f) * zz + + 2.44301354525E-2f) * zz + + 5.34112807005E-2f) * zz + + 1.33387994085E-1f) * zz + + 3.33331568548E-1f) * zz * z + + z; + } else - y = z; + { + y = z + z * (zz * poly(zz, P) / poly(zz, Q)); + } + } + else + y = z; - if (j & 2) - y = -1.0 / y; + if (j & 2) + y = (cast(T) -1.0) / y; - return (sign) ? -y : y; - } + return (sign) ? -y : y; } -@safe nothrow @nogc unittest +@safe @nogc nothrow unittest { - static real[2][] vals = // angle,tan + static void testTan(T)() + { + // ±0 + const T zero = 0.0; + assert(isIdentical(tan(zero), zero)); + assert(isIdentical(tan(-zero), -zero)); + // ±∞ + const T inf = T.infinity; + assert(isNaN(tan(inf))); + assert(isNaN(tan(-inf))); + // NaN + const T specialNaN = NaN(0x0123L); + assert(isIdentical(tan(specialNaN), specialNaN)); + + static immutable T[2][] vals = [ - [ 0, 0], - [ .5, .5463024898], - [ 1, 1.557407725], - [ 1.5, 14.10141995], - [ 2, -2.185039863], - [ 2.5,-.7470222972], - [ 3, -.1425465431], - [ 3.5, .3745856402], - [ 4, 1.157821282], - [ 4.5, 4.637332055], - [ 5, -3.380515006], - [ 5.5,-.9955840522], - [ 6, -.2910061914], - [ 6.5, .2202772003], - [ 10, .6483608275], - - // special angles - [ PI_4, 1], - //[ PI_2, real.infinity], // PI_2 is not _exactly_ pi/2. - [ 3*PI_4, -1], - [ PI, 0], - [ 5*PI_4, 1], - //[ 3*PI_2, -real.infinity], - [ 7*PI_4, -1], - [ 2*PI, 0], + // angle, tan + [ .5, .5463024898], + [ 1, 1.557407725], + [ 1.5, 14.10141995], + [ 2, -2.185039863], + [ 2.5,-.7470222972], + [ 3, -.1425465431], + [ 3.5, .3745856402], + [ 4, 1.157821282], + [ 4.5, 4.637332055], + [ 5, -3.380515006], + [ 5.5,-.9955840522], + [ 6, -.2910061914], + [ 6.5, .2202772003], + [ 10, .6483608275], + + // special angles + [ PI_4, 1], + //[ PI_2, T.infinity], // PI_2 is not _exactly_ pi/2. + [ 3*PI_4, -1], + [ PI, 0], + [ 5*PI_4, 1], + //[ 3*PI_2, -T.infinity], + [ 7*PI_4, -1], + [ 2*PI, 0], ]; - int i; - for (i = 0; i < vals.length; i++) - { - real x = vals[i][0]; - real r = vals[i][1]; - real t = tan(x); + foreach (ref val; vals) + { + T x = val[0]; + T r = val[1]; + T t = tan(x); - //printf("tan(%Lg) = %Lg, should be %Lg\n", x, t, r); - if (!isIdentical(r, t)) assert(fabs(r-t) <= .0000001); + //printf("tan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) t, cast(real) r); + assert(approxEqual(r, t)); - x = -x; - r = -r; - t = tan(x); - //printf("tan(%Lg) = %Lg, should be %Lg\n", x, t, r); - if (!isIdentical(r, t) && !(r != r && t != t)) assert(fabs(r-t) <= .0000001); + x = -x; + r = -r; + t = tan(x); + //printf("tan(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) t, cast(real) r); + assert(approxEqual(r, t)); + } } - // overflow - assert(isNaN(tan(real.infinity))); - assert(isNaN(tan(-real.infinity))); - // NaN propagation - assert(isIdentical( tan(NaN(0x0123L)), NaN(0x0123L) )); -} -/// -@safe unittest -{ - assert(isIdentical(tan(0.0), 0.0)); - assert(tan(PI).approxEqual(0)); - assert(tan(PI / 3).approxEqual(sqrt(3.0))); -} + import std.meta : AliasSeq; + foreach (T; AliasSeq!(real, double, float)) + testTan!T(); -@safe @nogc nothrow unittest -{ - assert(equalsDigit(tan(PI / 3), std.math.sqrt(3.0), useDigits)); + assert(equalsDigit(tan(PI / 3), std.math.sqrt(3.0L), useDigits)); } /*************** From d17cda4c770840843e463bdc520bc7818d6886a7 Mon Sep 17 00:00:00 2001 From: Martin Date: Wed, 14 Mar 2018 22:34:56 +0100 Subject: [PATCH 04/12] Implement std.math.exp() for single- and double-precision --- std/math.d | 470 +++++++++++++++++++++++++++++------------------------ 1 file changed, 260 insertions(+), 210 deletions(-) diff --git a/std/math.d b/std/math.d index 0c562080dd9..5eb2f08a96f 100644 --- a/std/math.d +++ b/std/math.d @@ -2187,145 +2187,299 @@ auto sqrt(creal z) @nogc @safe pure nothrow * $(TR $(TD $(NAN)) $(TD $(NAN)) ) * ) */ -real exp(real x) @trusted pure nothrow @nogc +real exp(real x) @trusted pure nothrow @nogc // TODO: @safe { version(D_InlineAsm_X86) { // e^^x = 2^^(LOG2E*x) // (This is valid because the overflow & underflow limits for exp // and exp2 are so similar). - return exp2(LOG2E*x); + if (!__ctfe) + return exp2(LOG2E*x); } else version(D_InlineAsm_X86_64) { // e^^x = 2^^(LOG2E*x) // (This is valid because the overflow & underflow limits for exp // and exp2 are so similar). - return exp2(LOG2E*x); + if (!__ctfe) + return exp2(LOG2E*x); } - else + return expImpl(x); +} + +/// ditto +double exp(double x) @safe pure nothrow @nogc { return expImpl(x); } + +/// ditto +float exp(float x) @safe pure nothrow @nogc { return expImpl(x); } + +/// +@safe unittest +{ + assert(exp(0.0) == 1.0); + assert(exp(3.0).feqrel(E * E * E) > 16); +} + +private T expImpl(T)(T x) @safe pure nothrow @nogc +{ + alias F = floatTraits!T; + static if (F.realFormat == RealFormat.ieeeSingle) { - alias F = floatTraits!real; - static if (F.realFormat == RealFormat.ieeeDouble) - { - // Coefficients for exp(x) - static immutable real[3] P = [ - 9.99999999999999999910E-1L, - 3.02994407707441961300E-2L, - 1.26177193074810590878E-4L, - ]; - static immutable real[4] Q = [ - 2.00000000000000000009E0L, - 2.27265548208155028766E-1L, - 2.52448340349684104192E-3L, - 3.00198505138664455042E-6L, - ]; + enum T C1 = 0.693359375; + enum T C2 = -2.12194440e-4; - // C1 + C2 = LN2. - enum real C1 = 6.93145751953125E-1; - enum real C2 = 1.42860682030941723212E-6; + // Overflow and Underflow limits. + enum T OF = 88.72283905206835; + enum T UF = -103.278929903431851103; // ln(2^-149) + } + else static if (F.realFormat == RealFormat.ieeeDouble) + { + // Coefficients for exp(x) + static immutable T[3] P = [ + 9.99999999999999999910E-1L, + 3.02994407707441961300E-2L, + 1.26177193074810590878E-4L, + ]; + static immutable T[4] Q = [ + 2.00000000000000000009E0L, + 2.27265548208155028766E-1L, + 2.52448340349684104192E-3L, + 3.00198505138664455042E-6L, + ]; - // Overflow and Underflow limits. - enum real OF = 7.09782712893383996732E2; // ln((1-2^-53) * 2^1024) - enum real UF = -7.451332191019412076235E2; // ln(2^-1075) - } - else static if (F.realFormat == RealFormat.ieeeExtended) - { - // Coefficients for exp(x) - static immutable real[3] P = [ - 9.9999999999999999991025E-1L, - 3.0299440770744196129956E-2L, - 1.2617719307481059087798E-4L, - ]; - static immutable real[4] Q = [ - 2.0000000000000000000897E0L, - 2.2726554820815502876593E-1L, - 2.5244834034968410419224E-3L, - 3.0019850513866445504159E-6L, - ]; + // C1 + C2 = LN2. + enum T C1 = 6.93145751953125E-1; + enum T C2 = 1.42860682030941723212E-6; + + // Overflow and Underflow limits. + enum T OF = 7.09782712893383996732E2; // ln((1-2^-53) * 2^1024) + enum T UF = -7.451332191019412076235E2; // ln(2^-1075) + } + else static if (F.realFormat == RealFormat.ieeeExtended) + { + // Coefficients for exp(x) + static immutable T[3] P = [ + 9.9999999999999999991025E-1L, + 3.0299440770744196129956E-2L, + 1.2617719307481059087798E-4L, + ]; + static immutable T[4] Q = [ + 2.0000000000000000000897E0L, + 2.2726554820815502876593E-1L, + 2.5244834034968410419224E-3L, + 3.0019850513866445504159E-6L, + ]; - // C1 + C2 = LN2. - enum real C1 = 6.9314575195312500000000E-1L; - enum real C2 = 1.4286068203094172321215E-6L; + // C1 + C2 = LN2. + enum T C1 = 6.9314575195312500000000E-1L; + enum T C2 = 1.4286068203094172321215E-6L; - // Overflow and Underflow limits. - enum real OF = 1.1356523406294143949492E4L; // ln((1-2^-64) * 2^16384) - enum real UF = -1.13994985314888605586758E4L; // ln(2^-16446) - } - else static if (F.realFormat == RealFormat.ieeeQuadruple) - { - // Coefficients for exp(x) - 1 - static immutable real[5] P = [ - 9.999999999999999999999999999999999998502E-1L, - 3.508710990737834361215404761139478627390E-2L, - 2.708775201978218837374512615596512792224E-4L, - 6.141506007208645008909088812338454698548E-7L, - 3.279723985560247033712687707263393506266E-10L - ]; - static immutable real[6] Q = [ - 2.000000000000000000000000000000000000150E0, - 2.368408864814233538909747618894558968880E-1L, - 3.611828913847589925056132680618007270344E-3L, - 1.504792651814944826817779302637284053660E-5L, - 1.771372078166251484503904874657985291164E-8L, - 2.980756652081995192255342779918052538681E-12L - ]; + // Overflow and Underflow limits. + enum T OF = 1.1356523406294143949492E4L; // ln((1-2^-64) * 2^16384) + enum T UF = -1.13994985314888605586758E4L; // ln(2^-16446) + } + else static if (F.realFormat == RealFormat.ieeeQuadruple) + { + // Coefficients for exp(x) - 1 + static immutable T[5] P = [ + 9.999999999999999999999999999999999998502E-1L, + 3.508710990737834361215404761139478627390E-2L, + 2.708775201978218837374512615596512792224E-4L, + 6.141506007208645008909088812338454698548E-7L, + 3.279723985560247033712687707263393506266E-10L + ]; + static immutable T[6] Q = [ + 2.000000000000000000000000000000000000150E0, + 2.368408864814233538909747618894558968880E-1L, + 3.611828913847589925056132680618007270344E-3L, + 1.504792651814944826817779302637284053660E-5L, + 1.771372078166251484503904874657985291164E-8L, + 2.980756652081995192255342779918052538681E-12L + ]; - // C1 + C2 = LN2. - enum real C1 = 6.93145751953125E-1L; - enum real C2 = 1.428606820309417232121458176568075500134E-6L; + // C1 + C2 = LN2. + enum T C1 = 6.93145751953125E-1L; + enum T C2 = 1.428606820309417232121458176568075500134E-6L; - // Overflow and Underflow limits. - enum real OF = 1.135583025911358400418251384584930671458833e4L; - enum real UF = -1.143276959615573793352782661133116431383730e4L; - } - else - static assert(0, "Not implemented for this architecture"); + // Overflow and Underflow limits. + enum T OF = 1.135583025911358400418251384584930671458833e4L; + enum T UF = -1.143276959615573793352782661133116431383730e4L; + } + else + static assert(0, "Not implemented for this architecture"); - // Special cases. - if (isNaN(x)) - return x; - if (x > OF) - return real.infinity; - if (x < UF) - return 0.0; + // Special cases. + if (isNaN(x)) + return x; + if (x > OF) + return real.infinity; + if (x < UF) + return 0.0; - // Express: e^^x = e^^g * 2^^n - // = e^^g * e^^(n * LOG2E) - // = e^^(g + n * LOG2E) - int n = cast(int) floor(LOG2E * x + 0.5); - x -= n * C1; - x -= n * C2; + // Express: e^^x = e^^g * 2^^n + // = e^^g * e^^(n * LOG2E) + // = e^^(g + n * LOG2E) + T xx = floor((cast(T) LOG2E) * x + cast(T) 0.5); + const int n = cast(int) xx; + x -= xx * C1; + x -= xx * C2; + static if (F.realFormat == RealFormat.ieeeSingle) + { + xx = x * x; + x = ((((( 1.9875691500E-4f * x + + 1.3981999507E-3f) * x + + 8.3334519073E-3f) * x + + 4.1665795894E-2f) * x + + 1.6666665459E-1f) * x + + 5.0000001201E-1f) * xx + + x + + 1.0f; + } + else + { // Rational approximation for exponential of the fractional part: // e^^x = 1 + 2x P(x^^2) / (Q(x^^2) - P(x^^2)) - const real xx = x * x; - const real px = x * poly(xx, P); + xx = x * x; + const T px = x * poly(xx, P); x = px / (poly(xx, Q) - px); - x = 1.0 + ldexp(x, 1); - - // Scale by power of 2. - x = ldexp(x, n); - - return x; + x = (cast(T) 1.0) + ldexp(x, 1); } -} -/// ditto -double exp(double x) @safe pure nothrow @nogc { return exp(cast(real) x); } - -/// ditto -float exp(float x) @safe pure nothrow @nogc { return exp(cast(real) x); } + // Scale by power of 2. + x = ldexp(x, n); -/// -@safe unittest -{ - assert(exp(0.0) == 1.0); - assert(exp(3.0).feqrel(E * E * E) > 16); + return x; } @safe @nogc nothrow unittest { + FloatingPointControl ctrl; + if (FloatingPointControl.hasExceptionTraps) + ctrl.disableExceptions(FloatingPointControl.allExceptions); + ctrl.rounding = FloatingPointControl.roundToNearest; + + static void testExp(T)() + { + enum realFormat = floatTraits!T.realFormat; + static if (realFormat == RealFormat.ieeeQuadruple) + { + static immutable T[2][] exptestpoints = + [ // x exp(x) + [ 1.0L, E ], + [ 0.5L, 0x1.a61298e1e069bc972dfefab6df34p+0L ], + [ 3.0L, E*E*E ], + [ 0x1.6p+13L, 0x1.6e509d45728655cdb4840542acb5p+16250L ], // near overflow + [ 0x1.7p+13L, T.infinity ], // close overflow + [ 0x1p+80L, T.infinity ], // far overflow + [ T.infinity, T.infinity ], + [-0x1.18p+13L, 0x1.5e4bf54b4807034ea97fef0059a6p-12927L ], // near underflow + [-0x1.625p+13L, 0x1.a6bd68a39d11fec3a250cd97f524p-16358L ], // ditto + [-0x1.62dafp+13L, 0x0.cb629e9813b80ed4d639e875be6cp-16382L ], // near underflow - subnormal + [-0x1.6549p+13L, 0x0.0000000000000000000000000001p-16382L ], // ditto + [-0x1.655p+13L, 0 ], // close underflow + [-0x1p+30L, 0 ], // far underflow + ]; + } + else static if (realFormat == RealFormat.ieeeExtended) + { + static immutable T[2][] exptestpoints = + [ // x exp(x) + [ 1.0L, E ], + [ 0.5L, 0x1.a61298e1e069bc97p+0L ], + [ 3.0L, E*E*E ], + [ 0x1.1p+13L, 0x1.29aeffefc8ec645p+12557L ], // near overflow + [ 0x1.7p+13L, T.infinity ], // close overflow + [ 0x1p+80L, T.infinity ], // far overflow + [ T.infinity, T.infinity ], + [-0x1.18p+13L, 0x1.5e4bf54b4806db9p-12927L ], // near underflow + [-0x1.625p+13L, 0x1.a6bd68a39d11f35cp-16358L ], // ditto + [-0x1.62dafp+13L, 0x1.96c53d30277021dp-16383L ], // near underflow - subnormal + [-0x1.643p+13L, 0x1p-16444L ], // ditto + [-0x1.645p+13L, 0 ], // close underflow + [-0x1p+30L, 0 ], // far underflow + ]; + } + else static if (realFormat == RealFormat.ieeeDouble) + { + static immutable T[2][] exptestpoints = + [ // x, exp(x) + [ 1.0L, E ], + [ 0.5L, 0x1.a61298e1e069cp+0L ], + [ 3.0L, E*E*E ], + [ 0x1.6p+9L, 0x1.93bf4ec282efbp+1015L ], // near overflow + [ 0x1.7p+9L, T.infinity ], // close overflow + [ 0x1p+80L, T.infinity ], // far overflow + [ T.infinity, T.infinity ], + [-0x1.6p+9L, 0x1.44a3824e5285fp-1016L ], // near underflow + [-0x1.64p+9L, 0x0.06f84920bb2d4p-1022L ], // near underflow - subnormal + [-0x1.743p+9L, 0x0.0000000000001p-1022L ], // ditto + [-0x1.8p+9L, 0 ], // close underflow + [-0x1p+30L, 0 ], // far underflow + ]; + } + else static if (realFormat == RealFormat.ieeeSingle) + { + static immutable T[2][] exptestpoints = + [ // x, exp(x) + [ 1.0L, E ], + [ 0.5L, 0x1.a61299p+0L ], + [ 3.0L, E*E*E ], + [ 0x1.62p+6L, 0x1.99b988p+127L ], // near overflow + [ 0x1.7p+6L, T.infinity ], // close overflow + [ 0x1p+80L, T.infinity ], // far overflow + [ T.infinity, T.infinity ], + [-0x1.5cp+6L, 0x1.666d0ep-126L ], // near underflow + [-0x1.7p+6L, 0x0.026a42p-126L ], // near underflow - subnormal + [-0x1.9cp+6L, 0x0.000002p-126L ], // ditto + [-0x1.ap+6L, 0 ], // close underflow + [-0x1p+30L, 0 ], // far underflow + ]; + } + else + static assert(0, "No exp() tests for real type!"); + + const minEqualMantissaBits = T.mant_dig - 2; + T x; + IeeeFlags f; + foreach (ref pair; exptestpoints) + { + resetIeeeFlags(); + x = exp(pair[0]); + //printf("exp(%La) = %La, should be %La\n", cast(real) pair[0], cast(real) x, cast(real) pair[1]); + assert(feqrel(x, pair[1]) >= minEqualMantissaBits); + } + + // Ideally, exp(0) would not set the inexact flag. + // Unfortunately, fldl2e sets it! + // So it's not realistic to avoid setting it. + assert(exp(cast(T) 0.0) == 1.0); + + // NaN propagation. Doesn't set flags, bcos was already NaN. + resetIeeeFlags(); + x = exp(T.nan); + f = ieeeFlags; + assert(isIdentical(abs(x), T.nan)); + assert(f.flags == 0); + + resetIeeeFlags(); + x = exp(-T.nan); + f = ieeeFlags; + assert(isIdentical(abs(x), T.nan)); + assert(f.flags == 0); + + x = exp(NaN(0x123)); + assert(isIdentical(x, NaN(0x123))); + } + + import std.meta : AliasSeq; + foreach (T; AliasSeq!(real, double, float)) + testExp!T(); + + // High resolution test (verified against GNU MPFR/Mathematica). + assert(exp(0.5L) == 0x1.A612_98E1_E069_BC97_2DFE_FAB6_DF34p+0L); + assert(equalsDigit(exp(3.0L), E * E * E, useDigits)); } @@ -2914,110 +3068,6 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow } } -@system unittest -{ - FloatingPointControl ctrl; - if (FloatingPointControl.hasExceptionTraps) - ctrl.disableExceptions(FloatingPointControl.allExceptions); - ctrl.rounding = FloatingPointControl.roundToNearest; - - alias F = floatTraits!real; - - static if (F.realFormat == RealFormat.ieeeQuadruple) - { - static immutable real[2][] exptestpoints = - [ // x exp(x) - [ 1.0L, E ], - [ 0.5L, 0x1.a61298e1e069bc972dfefab6df34p+0L ], - [ 3.0L, E*E*E ], - [ 0x1.6p+13L, 0x1.6e509d45728655cdb4840542acb5p+16250L ], // near overflow - [ 0x1.7p+13L, real.infinity ], // close overflow - [ 0x1p+80L, real.infinity ], // far overflow - [ real.infinity, real.infinity ], - [-0x1.18p+13L, 0x1.5e4bf54b4807034ea97fef0059a6p-12927L ], // near underflow - [-0x1.625p+13L, 0x1.a6bd68a39d11fec3a250cd97f524p-16358L ], // ditto - [-0x1.62dafp+13L, 0x0.cb629e9813b80ed4d639e875be6cp-16382L ], // near underflow - subnormal - [-0x1.6549p+13L, 0x0.0000000000000000000000000001p-16382L ], // ditto - [-0x1.655p+13L, 0 ], // close underflow - [-0x1p+30L, 0 ], // far underflow - ]; - } - else static if (F.realFormat == RealFormat.ieeeExtended) - { - static immutable real[2][] exptestpoints = - [ // x exp(x) - [ 1.0L, E ], - [ 0.5L, 0x1.a61298e1e069bc97p+0L ], - [ 3.0L, E*E*E ], - [ 0x1.1p+13L, 0x1.29aeffefc8ec645p+12557L ], // near overflow - [ 0x1.7p+13L, real.infinity ], // close overflow - [ 0x1p+80L, real.infinity ], // far overflow - [ real.infinity, real.infinity ], - [-0x1.18p+13L, 0x1.5e4bf54b4806db9p-12927L ], // near underflow - [-0x1.625p+13L, 0x1.a6bd68a39d11f35cp-16358L ], // ditto - [-0x1.62dafp+13L, 0x1.96c53d30277021dp-16383L ], // near underflow - subnormal - [-0x1.643p+13L, 0x1p-16444L ], // ditto - [-0x1.645p+13L, 0 ], // close underflow - [-0x1p+30L, 0 ], // far underflow - ]; - } - else static if (F.realFormat == RealFormat.ieeeDouble) - { - static immutable real[2][] exptestpoints = - [ // x, exp(x) - [ 1.0L, E ], - [ 0.5L, 0x1.a61298e1e069cp+0L ], - [ 3.0L, E*E*E ], - [ 0x1.6p+9L, 0x1.93bf4ec282efbp+1015L ], // near overflow - [ 0x1.7p+9L, real.infinity ], // close overflow - [ 0x1p+80L, real.infinity ], // far overflow - [ real.infinity, real.infinity ], - [-0x1.6p+9L, 0x1.44a3824e5285fp-1016L ], // near underflow - [-0x1.64p+9L, 0x0.06f84920bb2d4p-1022L ], // near underflow - subnormal - [-0x1.743p+9L, 0x0.0000000000001p-1022L ], // ditto - [-0x1.8p+9L, 0 ], // close underflow - [-0x1p30L, 0 ], // far underflow - ]; - } - else - static assert(0, "No exp() tests for real type!"); - - const minEqualMantissaBits = real.mant_dig - 2; - real x; - IeeeFlags f; - foreach (ref pair; exptestpoints) - { - resetIeeeFlags(); - x = exp(pair[0]); - assert(feqrel(x, pair[1]) >= minEqualMantissaBits); - } - - // Ideally, exp(0) would not set the inexact flag. - // Unfortunately, fldl2e sets it! - // So it's not realistic to avoid setting it. - assert(exp(0.0L) == 1.0); - - // NaN propagation. Doesn't set flags, bcos was already NaN. - resetIeeeFlags(); - x = exp(real.nan); - f = ieeeFlags; - assert(isIdentical(abs(x), real.nan)); - assert(f.flags == 0); - - resetIeeeFlags(); - x = exp(-real.nan); - f = ieeeFlags; - assert(isIdentical(abs(x), real.nan)); - assert(f.flags == 0); - - x = exp(NaN(0x123)); - assert(isIdentical(x, NaN(0x123))); - - // High resolution test (verified against GNU MPFR/Mathematica). - assert(exp(0.5L) == 0x1.A612_98E1_E069_BC97_2DFE_FAB6_DF34p+0L); -} - - /* * Calculate cos(y) + i sin(y). * From a08c986a4299a90bbf1b1982e1af573459d03814 Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 15 Mar 2018 22:07:40 +0100 Subject: [PATCH 05/12] Implement std.math.expm1() for double-precision And make the x87 `real` version CTFE-able. I couldn't find the single-precision version in Cephes. --- std/math.d | 197 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 134 insertions(+), 63 deletions(-) diff --git a/std/math.d b/std/math.d index 5eb2f08a96f..3e9383775a4 100644 --- a/std/math.d +++ b/std/math.d @@ -2498,7 +2498,29 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc * $(TR $(TD $(NAN)) $(TD $(NAN)) ) * ) */ -real expm1(real x) @trusted pure nothrow @nogc +real expm1(real x) @trusted pure nothrow @nogc // TODO: @safe +{ + version(InlineAsm_X86_Any) + { + if (!__ctfe) + return expm1Asm(x); + } + return expm1Impl(x); +} + +/// ditto +double expm1(double x) @safe pure nothrow @nogc { return expm1Impl(x); } + +/// +@safe unittest +{ + assert(isIdentical(expm1(0.0), 0.0)); + assert(expm1(1.0).feqrel(1.71828) > 16); + assert(expm1(2.0).feqrel(6.3890) > 16); +} + +version(InlineAsm_X86_Any) +private real expm1Asm(real x) @trusted pure nothrow @nogc { version(D_InlineAsm_X86) { @@ -2662,98 +2684,147 @@ L_largenegative: } } else + static assert(0); +} + +private T expm1Impl(T)(T x) @safe pure nothrow @nogc +{ + // Coefficients for exp(x) - 1 and overflow/underflow limits. + enum realFormat = floatTraits!T.realFormat; + static if (realFormat == RealFormat.ieeeQuadruple) { - // Coefficients for exp(x) - 1 and overflow/underflow limits. - static if (floatTraits!real.realFormat == RealFormat.ieeeQuadruple) - { - static immutable real[8] P = [ - 2.943520915569954073888921213330863757240E8L, - -5.722847283900608941516165725053359168840E7L, - 8.944630806357575461578107295909719817253E6L, - -7.212432713558031519943281748462837065308E5L, - 4.578962475841642634225390068461943438441E4L, - -1.716772506388927649032068540558788106762E3L, - 4.401308817383362136048032038528753151144E1L, - -4.888737542888633647784737721812546636240E-1L - ]; + static immutable T[8] P = [ + 2.943520915569954073888921213330863757240E8L, + -5.722847283900608941516165725053359168840E7L, + 8.944630806357575461578107295909719817253E6L, + -7.212432713558031519943281748462837065308E5L, + 4.578962475841642634225390068461943438441E4L, + -1.716772506388927649032068540558788106762E3L, + 4.401308817383362136048032038528753151144E1L, + -4.888737542888633647784737721812546636240E-1L + ]; - static immutable real[9] Q = [ - 1.766112549341972444333352727998584753865E9L, - -7.848989743695296475743081255027098295771E8L, - 1.615869009634292424463780387327037251069E8L, - -2.019684072836541751428967854947019415698E7L, - 1.682912729190313538934190635536631941751E6L, - -9.615511549171441430850103489315371768998E4L, - 3.697714952261803935521187272204485251835E3L, - -8.802340681794263968892934703309274564037E1L, - 1.0 - ]; + static immutable T[9] Q = [ + 1.766112549341972444333352727998584753865E9L, + -7.848989743695296475743081255027098295771E8L, + 1.615869009634292424463780387327037251069E8L, + -2.019684072836541751428967854947019415698E7L, + 1.682912729190313538934190635536631941751E6L, + -9.615511549171441430850103489315371768998E4L, + 3.697714952261803935521187272204485251835E3L, + -8.802340681794263968892934703309274564037E1L, + 1.0 + ]; - enum real OF = 1.1356523406294143949491931077970764891253E4L; - enum real UF = -1.143276959615573793352782661133116431383730e4L; - } - else - { - static immutable real[5] P = [ - -1.586135578666346600772998894928250240826E4L, - 2.642771505685952966904660652518429479531E3L, - -3.423199068835684263987132888286791620673E2L, - 1.800826371455042224581246202420972737840E1L, - -5.238523121205561042771939008061958820811E-1L, - ]; - static immutable real[6] Q = [ - -9.516813471998079611319047060563358064497E4L, - 3.964866271411091674556850458227710004570E4L, - -7.207678383830091850230366618190187434796E3L, - 7.206038318724600171970199625081491823079E2L, - -4.002027679107076077238836622982900945173E1L, - 1.0 - ]; + enum T OF = 1.1356523406294143949491931077970764891253E4L; + enum T UF = -1.143276959615573793352782661133116431383730e4L; + } + else static if (realFormat == RealFormat.ieeeExtended) + { + static immutable T[5] P = [ + -1.586135578666346600772998894928250240826E4L, + 2.642771505685952966904660652518429479531E3L, + -3.423199068835684263987132888286791620673E2L, + 1.800826371455042224581246202420972737840E1L, + -5.238523121205561042771939008061958820811E-1L, + ]; + static immutable T[6] Q = [ + -9.516813471998079611319047060563358064497E4L, + 3.964866271411091674556850458227710004570E4L, + -7.207678383830091850230366618190187434796E3L, + 7.206038318724600171970199625081491823079E2L, + -4.002027679107076077238836622982900945173E1L, + 1.0 + ]; - enum real OF = 1.1356523406294143949492E4L; - enum real UF = -4.5054566736396445112120088E1L; - } + enum T OF = 1.1356523406294143949492E4L; + enum T UF = -4.5054566736396445112120088E1L; + } + else static if (realFormat == RealFormat.ieeeDouble) + { + static immutable T[3] P = [ + 9.9999999999999999991025E-1, + 3.0299440770744196129956E-2, + 1.2617719307481059087798E-4, + ]; + static immutable T[4] Q = [ + 2.0000000000000000000897E0, + 2.2726554820815502876593E-1, + 2.5244834034968410419224E-3, + 3.0019850513866445504159E-6, + ]; + } + else + static assert(0, "no coefficients for expm1()"); + static if (realFormat == RealFormat.ieeeDouble) // special case for double precision + { + if (x < -0.5 || x > 0.5) + return exp(x) - 1.0; + if (x == 0.0) + return x; + const T xx = x * x; + x = x * poly(xx, P); + x = x / (poly(xx, Q) - x); + return x + x; + } + else + { // C1 + C2 = LN2. - enum real C1 = 6.9314575195312500000000E-1L; - enum real C2 = 1.428606820309417232121458176568075500134E-6L; + enum T C1 = 6.9314575195312500000000E-1L; + enum T C2 = 1.428606820309417232121458176568075500134E-6L; // Special cases. if (x > OF) return real.infinity; - if (x == 0.0) + if (x == cast(T) 0.0) return x; if (x < UF) return -1.0; // Express x = LN2 (n + remainder), remainder not exceeding 1/2. - int n = cast(int) floor(0.5 + x / LN2); + int n = cast(int) floor((cast(T) 0.5) + x / cast(T) LN2); x -= n * C1; x -= n * C2; // Rational approximation: // exp(x) - 1 = x + 0.5 x^^2 + x^^3 P(x) / Q(x) - real px = x * poly(x, P); - real qx = poly(x, Q); - const real xx = x * x; - qx = x + (0.5 * xx + xx * px / qx); + T px = x * poly(x, P); + T qx = poly(x, Q); + const T xx = x * x; + qx = x + ((cast(T) 0.5) * xx + xx * px / qx); // We have qx = exp(remainder LN2) - 1, so: // exp(x) - 1 = 2^^n (qx + 1) - 1 = 2^^n qx + 2^^n - 1. - px = ldexp(1.0, n); - x = px * qx + (px - 1.0); + px = ldexp(cast(T) 1.0, n); + x = px * qx + (px - cast(T) 1.0); return x; } } -/// -@safe unittest +@safe @nogc nothrow unittest { - assert(isIdentical(expm1(0.0), 0.0)); - assert(expm1(1.0).feqrel(1.71828) > 16); - assert(expm1(2.0).feqrel(6.3890) > 16); + static void testExpm1(T)() + { + // NaN + assert(isNaN(expm1(cast(T) T.nan))); + + static immutable T[] xs = [ -2, -0.75, -0.3, 0.0, 0.1, 0.2, 0.5, 1.0 ]; + foreach (x; xs) + { + const T e = expm1(x); + const T r = exp(x) - 1; + + //printf("expm1(%Lg) = %Lg, should approximately be %Lg\n", cast(real) x, cast(real) e, cast(real) r); + assert(approxEqual(r, e)); + } + } + + import std.meta : AliasSeq; + foreach (T; AliasSeq!(real, double)) + testExpm1!T(); } /** From adc30e77569a0e20a52228e8f1d0c663c3469036 Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 15 Mar 2018 23:38:37 +0100 Subject: [PATCH 06/12] Implement std.math.exp2() for single- and double-precision --- std/math.d | 161 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 124 insertions(+), 37 deletions(-) diff --git a/std/math.d b/std/math.d index 3e9383775a4..1d57b19d6f7 100644 --- a/std/math.d +++ b/std/math.d @@ -2195,7 +2195,7 @@ real exp(real x) @trusted pure nothrow @nogc // TODO: @safe // (This is valid because the overflow & underflow limits for exp // and exp2 are so similar). if (!__ctfe) - return exp2(LOG2E*x); + return exp2Asm(LOG2E*x); } else version(D_InlineAsm_X86_64) { @@ -2203,7 +2203,7 @@ real exp(real x) @trusted pure nothrow @nogc // TODO: @safe // (This is valid because the overflow & underflow limits for exp // and exp2 are so similar). if (!__ctfe) - return exp2(LOG2E*x); + return exp2Asm(LOG2E*x); } return expImpl(x); } @@ -2837,22 +2837,22 @@ private T expm1Impl(T)(T x) @safe pure nothrow @nogc * $(TR $(TD $(NAN)) $(TD $(NAN)) ) * ) */ -pragma(inline, true) -real exp2(real x) @nogc @trusted pure nothrow +real exp2(real x) @nogc @trusted pure nothrow // TODO: @safe { version(InlineAsm_X86_Any) { if (!__ctfe) return exp2Asm(x); - else - return exp2Impl(x); - } - else - { - return exp2Impl(x); } + return exp2Impl(x); } +/// ditto +double exp2(double x) @nogc @safe pure nothrow { return exp2Impl(x); } + +/// ditto +float exp2(float x) @nogc @safe pure nothrow { return exp2Impl(x); } + /// @safe unittest { @@ -2861,6 +2861,16 @@ real exp2(real x) @nogc @trusted pure nothrow assert(exp2(8.0).feqrel(256.0) > 16); } +@safe unittest +{ + version(CRuntime_Microsoft) {} else // aexp2/exp2f/exp2l not implemented + { + assert( core.stdc.math.exp2f(0.0f) == 1 ); + assert( core.stdc.math.exp2 (0.0) == 1 ); + assert( core.stdc.math.exp2l(0.0L) == 1 ); + } +} + version(InlineAsm_X86_Any) private real exp2Asm(real x) @nogc @trusted pure nothrow { @@ -3055,12 +3065,13 @@ L_was_nan: static assert(0); } -private real exp2Impl(real x) @nogc @trusted pure nothrow +private T exp2Impl(T)(T x) @nogc @safe pure nothrow { // Coefficients for exp2(x) - static if (floatTraits!real.realFormat == RealFormat.ieeeQuadruple) + enum realFormat = floatTraits!T.realFormat; + static if (realFormat == RealFormat.ieeeQuadruple) { - static immutable real[5] P = [ + static immutable T[5] P = [ 9.079594442980146270952372234833529694788E12L, 1.530625323728429161131811299626419117557E11L, 5.677513871931844661829755443994214173883E8L, @@ -3068,7 +3079,7 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow 1.587171580015525194694938306936721666031E2L ]; - static immutable real[6] Q = [ + static immutable T[6] Q = [ 2.619817175234089411411070339065679229869E13L, 1.490560994263653042761789432690793026977E12L, 1.092141473886177435056423606755843616331E10L, @@ -3077,24 +3088,50 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow 1.0 ]; } - else + else static if (realFormat == RealFormat.ieeeExtended) { - static immutable real[3] P = [ + static immutable T[3] P = [ 2.0803843631901852422887E6L, 3.0286971917562792508623E4L, 6.0614853552242266094567E1L, ]; - static immutable real[4] Q = [ + static immutable T[4] Q = [ 6.0027204078348487957118E6L, 3.2772515434906797273099E5L, 1.7492876999891839021063E3L, 1.0000000000000000000000E0L, ]; } + else static if (realFormat == RealFormat.ieeeDouble) + { + static immutable T[3] P = [ + 1.51390680115615096133E3L, + 2.02020656693165307700E1L, + 2.30933477057345225087E-2L, + ]; + static immutable T[3] Q = [ + 4.36821166879210612817E3L, + 2.33184211722314911771E2L, + 1.00000000000000000000E0L, + ]; + } + else static if (realFormat == RealFormat.ieeeSingle) + { + static immutable T[6] P = [ + 6.931472028550421E-001L, + 2.402264791363012E-001L, + 5.550332471162809E-002L, + 9.618437357674640E-003L, + 1.339887440266574E-003L, + 1.535336188319500E-004L, + ]; + } + else + static assert(0, "no coefficients for exp2()"); // Overflow and Underflow limits. - enum real OF = 16_384.0L; - enum real UF = -16_382.0L; + enum T OF = T.max_exp; + enum T UF = T.min_exp - 1; // Special cases. if (isNaN(x)) @@ -3104,16 +3141,40 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow if (x < UF) return 0.0; - // Separate into integer and fractional parts. - int n = cast(int) floor(x + 0.5); - x -= n; + static if (realFormat == RealFormat.ieeeSingle) // special case for single precision + { + // The following is necessary because range reduction blows up. + if (x == 0.0f) + return 1.0f; + + // Separate into integer and fractional parts. + const T i = floor(x); + int n = cast(int) i; + x -= i; + if (x > 0.5f) + { + n += 1; + x -= 1.0f; + } + + // Rational approximation: + // exp2(x) = 1.0 + x P(x) + x = 1.0f + x * poly(x, P); + } + else + { + // Separate into integer and fractional parts. + const T i = floor(x + cast(T) 0.5); + int n = cast(int) i; + x -= i; - // Rational approximation: - // exp2(x) = 1.0 + 2x P(x^^2) / (Q(x^^2) - P(x^^2)) - const real xx = x * x; - const real px = x * poly(xx, P); - x = px / (poly(xx, Q) - px); - x = 1.0 + ldexp(x, 1); + // Rational approximation: + // exp2(x) = 1.0 + 2x P(x^^2) / (Q(x^^2) - P(x^^2)) + const T xx = x * x; + const T px = x * poly(xx, P); + x = px / (poly(xx, Q) - px); + x = (cast(T) 1.0) + ldexp(x, 1); + } // Scale by power of 2. x = ldexp(x, n); @@ -3121,22 +3182,48 @@ private real exp2Impl(real x) @nogc @trusted pure nothrow return x; } -/// -@safe unittest +@safe @nogc nothrow unittest { assert(feqrel(exp2(0.5L), SQRT2) >= real.mant_dig -1); assert(exp2(8.0L) == 256.0); assert(exp2(-9.0L)== 1.0L/512.0); -} -@safe unittest -{ - version(CRuntime_Microsoft) {} else // aexp2/exp2f/exp2l not implemented + static void testExp2(T)() { - assert( core.stdc.math.exp2f(0.0f) == 1 ); - assert( core.stdc.math.exp2 (0.0) == 1 ); - assert( core.stdc.math.exp2l(0.0L) == 1 ); + // NaN + const T specialNaN = NaN(0x0123L); + assert(isIdentical(exp2(specialNaN), specialNaN)); + + // over-/underflow + enum T OF = T.max_exp; + enum T UF = T.min_exp - T.mant_dig; + assert(isIdentical(exp2(OF + 1), cast(T) T.infinity)); + assert(isIdentical(exp2(UF - 1), cast(T) 0.0)); + + static immutable T[2][] vals = + [ + // x, exp2(x) + [ 0.0, 1.0 ], + [ -0.0, 1.0 ], + [ 0.5, SQRT2 ], + [ 8.0, 256.0 ], + [ -9.0, 1.0 / 512 ], + ]; + + foreach (ref val; vals) + { + const T x = val[0]; + const T r = val[1]; + const T e = exp2(x); + + //printf("exp2(%Lg) = %Lg, should be %Lg\n", cast(real) x, cast(real) e, cast(real) r); + assert(approxEqual(r, e)); + } } + + import std.meta : AliasSeq; + foreach (T; AliasSeq!(real, double, float)) + testExp2!T(); } /* From e1d987dda6e61596fbdf67ede4f0e76a12badbec Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 17 Mar 2018 14:48:24 +0100 Subject: [PATCH 07/12] Improve performance of std.math.poly() Add a statically unrolled version for 1..10 coefficients. Results on Linux x86_64 and with an Intel i5-3550 for: static immutable T[6] coeffs = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6 ]; std.math.poly(cast(T) 0.43685, coeffs); => real: ~1.2x faster => double: ~3.2x faster => float: ~3.0x faster --- std/math.d | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/std/math.d b/std/math.d index 1d57b19d6f7..a7a5ce3b39e 100644 --- a/std/math.d +++ b/std/math.d @@ -8438,6 +8438,20 @@ do } } +/// ditto +Unqual!(CommonType!(T1, T2)) poly(T1, T2, int N)(T1 x, ref const T2[N] A) @safe pure nothrow @nogc +if (isFloatingPoint!T1 && isFloatingPoint!T2 && N > 0 && N <= 10) +{ + // statically unrolled version for up to 10 coefficients + typeof(return) r = A[N - 1]; + static foreach (i; 1 .. N) + { + r *= x; + r += A[N - 1 - i]; + } + return r; +} + /// @safe nothrow @nogc unittest { From 63254627ca88986de01b7441b730a2dbe3ea5fc5 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 17 Mar 2018 20:55:00 +0100 Subject: [PATCH 08/12] Avoid ldexp() for static exponents Prefer an arithmetic multiply by the according power-of-two constant, as that's much faster. E.g., this makes tan(double) run 2.05x faster on an Intel i5-3550. --- std/math.d | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/std/math.d b/std/math.d index a7a5ce3b39e..245d12fcb05 100644 --- a/std/math.d +++ b/std/math.d @@ -1123,10 +1123,11 @@ private T tanImpl(T)(T x) @safe pure nothrow @nogc { T y = floor(x / cast(T) PI_4); // Strip high bits of integer part. - enum numHighBits = (realFormat == RealFormat.ieeeDouble ? 3 : 4); - T z = ldexp(y, -numHighBits); + enum T highBitsFactor = (realFormat == RealFormat.ieeeDouble ? 0x1p3 : 0x1p4); + enum T highBitsInv = 1.0 / highBitsFactor; + T z = y * highBitsInv; // Compute y - 2^numHighBits * (y / 2^numHighBits). - z = y - ldexp(floor(z), numHighBits); + z = y - highBitsFactor * floor(z); // Integer and fraction part modulo one octant. int j = cast(int)(z); @@ -2344,7 +2345,7 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc xx = x * x; const T px = x * poly(xx, P); x = px / (poly(xx, Q) - px); - x = (cast(T) 1.0) + ldexp(x, 1); + x = (cast(T) 1.0) + (cast(T) 2.0) * x; } // Scale by power of 2. @@ -3173,7 +3174,7 @@ private T exp2Impl(T)(T x) @nogc @safe pure nothrow const T xx = x * x; const T px = x * poly(xx, P); x = px / (poly(xx, Q) - px); - x = (cast(T) 1.0) + ldexp(x, 1); + x = (cast(T) 1.0) + (cast(T) 2.0) * x; } // Scale by power of 2. From 02883860014a86de6c68e2e69a8fd5faa5cb56a0 Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 22 Mar 2018 20:02:23 +0100 Subject: [PATCH 09/12] Streamline Cephes implementations for single precision I.e., use poly() as for the other precisions (unlike the C source). With enabled compiler optimizations, it should be inlined anyway. --- std/math.d | 82 ++++++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/std/math.d b/std/math.d index 245d12fcb05..da8315b000e 100644 --- a/std/math.d +++ b/std/math.d @@ -1090,9 +1090,18 @@ private T tanImpl(T)(T x) @safe pure nothrow @nogc } else static if (realFormat == RealFormat.ieeeSingle) { + static immutable T[6] P = [ + 3.33331568548E-1, + 1.33387994085E-1, + 5.34112807005E-2, + 2.44301354525E-2, + 3.11992232697E-3, + 9.38540185543E-3, + ]; + enum T P1 = 0.78515625; - enum T P2 = 2.4187564849853515625e-4; - enum T P3 = 3.77489497744594108e-8; + enum T P2 = 2.4187564849853515625E-4; + enum T P3 = 3.77489497744594108E-8; } else static assert(0, "no coefficients for tan()"); @@ -1148,19 +1157,9 @@ private T tanImpl(T)(T x) @safe pure nothrow @nogc if (zz > zzThreshold) { static if (realFormat == RealFormat.ieeeSingle) - { - y = ((((( 9.38540185543E-3f * zz - + 3.11992232697E-3f) * zz - + 2.44301354525E-2f) * zz - + 5.34112807005E-2f) * zz - + 1.33387994085E-1f) * zz - + 3.33331568548E-1f) * zz * z - + z; - } + y = z + z * (zz * poly(zz, P)); else - { y = z + z * (zz * poly(zz, P) / poly(zz, Q)); - } } else y = z; @@ -1393,26 +1392,31 @@ private T atanImpl(T)(T x) @safe pure nothrow @nogc else static if (realFormat == RealFormat.ieeeDouble) { static immutable T[5] P = [ - -6.485021904942025371773E1L, - -1.228866684490136173410E2L, - -7.500855792314704667340E1L, - -1.615753718733365076637E1L, - -8.750608600031904122785E-1L, + -6.485021904942025371773E1L, + -1.228866684490136173410E2L, + -7.500855792314704667340E1L, + -1.615753718733365076637E1L, + -8.750608600031904122785E-1L, ]; static immutable T[6] Q = [ - 1.945506571482613964425E2L, - 4.853903996359136964868E2L, - 4.328810604912902668951E2L, - 1.650270098316988542046E2L, - 2.485846490142306297962E1L, - 1.000000000000000000000E0L, + 1.945506571482613964425E2L, + 4.853903996359136964868E2L, + 4.328810604912902668951E2L, + 1.650270098316988542046E2L, + 2.485846490142306297962E1L, + 1.000000000000000000000E0L, ]; enum T MOREBITS = 6.123233995736765886130E-17L; } else static if (realFormat == RealFormat.ieeeSingle) { - // inlined below + static immutable T[4] P = [ + -3.33329491539E-1, + 1.99777106478E-1, + -1.38776856032E-1, + 8.05374449538E-2, + ]; } else static assert(0, "no coefficients for atan()"); @@ -1486,17 +1490,9 @@ private T atanImpl(T)(T x) @safe pure nothrow @nogc // Rational form in x^^2. const T z = x * x; static if (realFormat == RealFormat.ieeeSingle) - { - y += ((( 8.05374449538e-2f * z - - 1.38776856032E-1f) * z - + 1.99777106478E-1f) * z - - 3.33329491539E-1f) * z * x - + x; - } + y += poly(z, P) * z * x + x; else - { y = y + (poly(z, P) / poly(z, Q)) * z * x + x; - } } return (sign) ? -y : y; @@ -2227,6 +2223,15 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc alias F = floatTraits!T; static if (F.realFormat == RealFormat.ieeeSingle) { + static immutable T[6] P = [ + 5.0000001201E-1, + 1.6666665459E-1, + 4.1665795894E-2, + 8.3334519073E-3, + 1.3981999507E-3, + 1.9875691500E-4, + ]; + enum T C1 = 0.693359375; enum T C2 = -2.12194440e-4; @@ -2329,14 +2334,7 @@ private T expImpl(T)(T x) @safe pure nothrow @nogc static if (F.realFormat == RealFormat.ieeeSingle) { xx = x * x; - x = ((((( 1.9875691500E-4f * x - + 1.3981999507E-3f) * x - + 8.3334519073E-3f) * x - + 4.1665795894E-2f) * x - + 1.6666665459E-1f) * x - + 5.0000001201E-1f) * xx - + x - + 1.0f; + x = poly(x, P) * xx + x + 1.0f; } else { From 21468e788b9d9c2de21793e1998197e3491b9403 Mon Sep 17 00:00:00 2001 From: Martin Date: Wed, 2 May 2018 21:35:12 +0200 Subject: [PATCH 10/12] Forward to (builtin) real version for CTFE in single/double precision overloads --- std/math.d | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/std/math.d b/std/math.d index da8315b000e..65de35ba921 100644 --- a/std/math.d +++ b/std/math.d @@ -929,10 +929,10 @@ real tan(real x) @trusted pure nothrow @nogc // TODO: @safe } /// ditto -double tan(double x) @safe pure nothrow @nogc { return tanImpl(x); } +double tan(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) tan(cast(real) x) : tanImpl(x); } /// ditto -float tan(float x) @safe pure nothrow @nogc { return tanImpl(x); } +float tan(float x) @safe pure nothrow @nogc { return __ctfe ? cast(float) tan(cast(real) x) : tanImpl(x); } /// @safe unittest @@ -1330,10 +1330,10 @@ real atan(real x) @safe pure nothrow @nogc } /// ditto -double atan(double x) @safe pure nothrow @nogc { return atanImpl(x); } +double atan(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) atan(cast(real) x) : atanImpl(x); } /// ditto -float atan(float x) @safe pure nothrow @nogc { return atanImpl(x); } +float atan(float x) @safe pure nothrow @nogc { return __ctfe ? cast(float) atan(cast(real) x) : atanImpl(x); } /// @safe unittest @@ -1582,13 +1582,13 @@ real atan2(real y, real x) @trusted pure nothrow @nogc // TODO: @safe /// ditto double atan2(double y, double x) @safe pure nothrow @nogc { - return atan2Impl(y, x); + return __ctfe ? cast(double) atan2(cast(real) y, cast(real) x) : atan2Impl(y, x); } /// ditto float atan2(float y, float x) @safe pure nothrow @nogc { - return atan2Impl(y, x); + return __ctfe ? cast(float) atan2(cast(real) y, cast(real) x) : atan2Impl(y, x); } /// @@ -2206,10 +2206,10 @@ real exp(real x) @trusted pure nothrow @nogc // TODO: @safe } /// ditto -double exp(double x) @safe pure nothrow @nogc { return expImpl(x); } +double exp(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) exp(cast(real) x) : expImpl(x); } /// ditto -float exp(float x) @safe pure nothrow @nogc { return expImpl(x); } +float exp(float x) @safe pure nothrow @nogc { return __ctfe ? cast(float) exp(cast(real) x) : expImpl(x); } /// @safe unittest @@ -2508,7 +2508,7 @@ real expm1(real x) @trusted pure nothrow @nogc // TODO: @safe } /// ditto -double expm1(double x) @safe pure nothrow @nogc { return expm1Impl(x); } +double expm1(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) expm1(cast(real) x) : expm1Impl(x); } /// @safe unittest @@ -2847,10 +2847,10 @@ real exp2(real x) @nogc @trusted pure nothrow // TODO: @safe } /// ditto -double exp2(double x) @nogc @safe pure nothrow { return exp2Impl(x); } +double exp2(double x) @nogc @safe pure nothrow { return __ctfe ? cast(double) exp2(cast(real) x) : exp2Impl(x); } /// ditto -float exp2(float x) @nogc @safe pure nothrow { return exp2Impl(x); } +float exp2(float x) @nogc @safe pure nothrow { return __ctfe ? cast(float) exp2(cast(real) x) : exp2Impl(x); } /// @safe unittest From 77376fcf5653daa8751436b7ead1934352560764 Mon Sep 17 00:00:00 2001 From: Martin Date: Wed, 2 May 2018 21:39:24 +0200 Subject: [PATCH 11/12] Add std.math.expm1(float) overload (using double precision) --- std/math.d | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/std/math.d b/std/math.d index 65de35ba921..edbd7e6ae1a 100644 --- a/std/math.d +++ b/std/math.d @@ -2508,7 +2508,17 @@ real expm1(real x) @trusted pure nothrow @nogc // TODO: @safe } /// ditto -double expm1(double x) @safe pure nothrow @nogc { return __ctfe ? cast(double) expm1(cast(real) x) : expm1Impl(x); } +double expm1(double x) @safe pure nothrow @nogc +{ + return __ctfe ? cast(double) expm1(cast(real) x) : expm1Impl(x); +} + +/// ditto +float expm1(float x) @safe pure nothrow @nogc +{ + // no single-precision version in Cephes => use double precision + return __ctfe ? cast(float) expm1(cast(real) x) : cast(float) expm1Impl(cast(double) x); +} /// @safe unittest From 97a491b7c28d521d0337acb0b0afacc861559825 Mon Sep 17 00:00:00 2001 From: Martin Date: Wed, 16 May 2018 02:32:42 +0200 Subject: [PATCH 12/12] Add changelog entry --- changelog/math_float_double_implementations.dd | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 changelog/math_float_double_implementations.dd diff --git a/changelog/math_float_double_implementations.dd b/changelog/math_float_double_implementations.dd new file mode 100644 index 00000000000..81c3690afe9 --- /dev/null +++ b/changelog/math_float_double_implementations.dd @@ -0,0 +1,15 @@ +Single- and double-precision implementations for (a)tan and exp function families + +The `float` and `double` overloads of +$(REF atan, std, math), $(REF atan2, std, math), $(REF tan, std, math), +$(REF exp, std, math), $(REF expm1, std, math) and $(REF exp2, std, math) +previously forwarded to the `real` implementation (inline assembly) and +now got proper 'software' implementations in the corresponding precision. + +While this may result in a slowdown in some cases for DMD (especially for +`exp()` and `exp2()`), the overall speed-up factor for LDC is > 3, for +both `double` and `float`. + +This also implies less precise results, especially in single-precision, +so if your code depended on more accurate results via 80-bit intermediate +precision, you'll have to cast the argument(s) explicitly now.