Bugs in miloyip/nativejson-benchmark: floating-point parsing #186

nlohmann · 2016-01-22T19:13:19Z

I checked the library with the latest https://github.com/miloyip/nativejson-benchmark benchmark suite. There are several errors parsing floating-point numbers:

Parse Double,Nlohmann (C++11),double02,false
Parse Double,Nlohmann (C++11),double37,false
Parse Double,Nlohmann (C++11),double40,false
Parse Double,Nlohmann (C++11),double44,false
Parse Double,Nlohmann (C++11),double48,false
Parse Double,Nlohmann (C++11),double53,false
Parse Double,Nlohmann (C++11),double58,false
Parse Double,Nlohmann (C++11),double63,false

The respective code is this:

// Parse Double
{
    using rapidjson::internal::Double;
    int i = 1;
    #define TEST_DOUBLE(json, expect)\
    {\
        bool result = false;\
        double actual = 0.0;\
        if (test.ParseDouble(json, &actual)) \
            result = Double(expect).Uint64Value() == Double(actual).Uint64Value();\
        printf("double%02d: %s\n", i, result ? "true" : "false");\
        if (!result)\
            printf("JSON: %s\nExpect: %17g\nActual: %17g\n\n", json, expect, actual);\
        fprintf(fp, "2. Parse Double,%s,double%02d,%s\n", test.GetName(), i, result ? "true" : "false");\
        i++;\
    }
    TEST_DOUBLE("[0.0]", 0.0);
    TEST_DOUBLE("[-0.0]", -0.0);
    TEST_DOUBLE("[1.0]", 1.0);
    TEST_DOUBLE("[-1.0]", -1.0);
    TEST_DOUBLE("[1.5]", 1.5);
    TEST_DOUBLE("[-1.5]", -1.5);
    TEST_DOUBLE("[3.1416]", 3.1416);
    TEST_DOUBLE("[1E10]", 1E10);
    TEST_DOUBLE("[1e10]", 1e10);
    TEST_DOUBLE("[1E+10]", 1E+10);
    TEST_DOUBLE("[1E-10]", 1E-10);
    TEST_DOUBLE("[-1E10]", -1E10);
    TEST_DOUBLE("[-1e10]", -1e10);
    TEST_DOUBLE("[-1E+10]", -1E+10);
    TEST_DOUBLE("[-1E-10]", -1E-10);
    TEST_DOUBLE("[1.234E+10]", 1.234E+10);
    TEST_DOUBLE("[1.234E-10]", 1.234E-10);
    TEST_DOUBLE("[1.79769e+308]", 1.79769e+308);
    TEST_DOUBLE("[2.22507e-308]", 2.22507e-308);
    TEST_DOUBLE("[-1.79769e+308]", -1.79769e+308);
    TEST_DOUBLE("[-2.22507e-308]", -2.22507e-308);
    TEST_DOUBLE("[4.9406564584124654e-324]", 4.9406564584124654e-324); // minimum denormal
    TEST_DOUBLE("[2.2250738585072009e-308]", 2.2250738585072009e-308); // Max subnormal double
    TEST_DOUBLE("[2.2250738585072014e-308]", 2.2250738585072014e-308); // Min normal positive double
    TEST_DOUBLE("[1.7976931348623157e+308]", 1.7976931348623157e+308); // Max double
    TEST_DOUBLE("[1e-10000]", 0.0);                                   // must underflow
    TEST_DOUBLE("[18446744073709551616]", 18446744073709551616.0);    // 2^64 (max of uint64_t + 1, force to use double)
    TEST_DOUBLE("[-9223372036854775809]", -9223372036854775809.0);    // -2^63 - 1(min of int64_t + 1, force to use double)
    TEST_DOUBLE("[0.9868011474609375]", 0.9868011474609375);          // https://github.com/miloyip/rapidjson/issues/120
    TEST_DOUBLE("[123e34]", 123e34);                                  // Fast Path Cases In Disguise
    TEST_DOUBLE("[45913141877270640000.0]", 45913141877270640000.0);
    TEST_DOUBLE("[2.2250738585072011e-308]", 2.2250738585072011e-308); // http://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
    //TEST_DOUBLE("[1e-00011111111111]", 0.0);
    //TEST_DOUBLE("[-1e-00011111111111]", -0.0);
    TEST_DOUBLE("[1e-214748363]", 0.0);
    TEST_DOUBLE("[1e-214748364]", 0.0);
    //TEST_DOUBLE("[1e-21474836311]", 0.0);
    TEST_DOUBLE("[0.017976931348623157e+310]", 1.7976931348623157e+308); // Max double in another form

    // Since
    // abs((2^-1022 - 2^-1074) - 2.2250738585072012e-308) = 3.109754131239141401123495768877590405345064751974375599... ¡Á 10^-324
    // abs((2^-1022) - 2.2250738585072012e-308) = 1.830902327173324040642192159804623318305533274168872044... ¡Á 10 ^ -324
    // So 2.2250738585072012e-308 should round to 2^-1022 = 2.2250738585072014e-308
    TEST_DOUBLE("[2.2250738585072012e-308]", 2.2250738585072014e-308); // http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/

    // More closer to normal/subnormal boundary
    // boundary = 2^-1022 - 2^-1075 = 2.225073858507201136057409796709131975934819546351645648... ¡Á 10^-308
    TEST_DOUBLE("[2.22507385850720113605740979670913197593481954635164564e-308]", 2.2250738585072009e-308);
    TEST_DOUBLE("[2.22507385850720113605740979670913197593481954635164565e-308]", 2.2250738585072014e-308);

    // 1.0 is in (1.0 - 2^-54, 1.0 + 2^-53)
    // 1.0 - 2^-54 = 0.999999999999999944488848768742172978818416595458984375
    TEST_DOUBLE("[0.999999999999999944488848768742172978818416595458984375]", 1.0); // round to even
    TEST_DOUBLE("[0.999999999999999944488848768742172978818416595458984374]", 0.99999999999999989); // previous double
    TEST_DOUBLE("[0.999999999999999944488848768742172978818416595458984376]", 1.0); // next double
    // 1.0 + 2^-53 = 1.00000000000000011102230246251565404236316680908203125
    TEST_DOUBLE("[1.00000000000000011102230246251565404236316680908203125]", 1.0); // round to even
    TEST_DOUBLE("[1.00000000000000011102230246251565404236316680908203124]", 1.0); // previous double
    TEST_DOUBLE("[1.00000000000000011102230246251565404236316680908203126]", 1.00000000000000022); // next double

    // Numbers from https://github.com/floitsch/double-conversion/blob/master/test/cctest/test-strtod.cc

    TEST_DOUBLE("[72057594037927928.0]", 72057594037927928.0);
    TEST_DOUBLE("[72057594037927936.0]", 72057594037927936.0);
    TEST_DOUBLE("[72057594037927932.0]", 72057594037927936.0);
    TEST_DOUBLE("[7205759403792793199999e-5]", 72057594037927928.0);
    TEST_DOUBLE("[7205759403792793200001e-5]", 72057594037927936.0);

    TEST_DOUBLE("[9223372036854774784.0]", 9223372036854774784.0);
    TEST_DOUBLE("[9223372036854775808.0]", 9223372036854775808.0);
    TEST_DOUBLE("[9223372036854775296.0]", 9223372036854775808.0);
    TEST_DOUBLE("[922337203685477529599999e-5]", 9223372036854774784.0);
    TEST_DOUBLE("[922337203685477529600001e-5]", 9223372036854775808.0);

    TEST_DOUBLE("[10141204801825834086073718800384]", 10141204801825834086073718800384.0);
    TEST_DOUBLE("[10141204801825835211973625643008]", 10141204801825835211973625643008.0);
    TEST_DOUBLE("[10141204801825834649023672221696]", 10141204801825835211973625643008.0);
    TEST_DOUBLE("[1014120480182583464902367222169599999e-5]", 10141204801825834086073718800384.0);
    TEST_DOUBLE("[1014120480182583464902367222169600001e-5]", 10141204801825835211973625643008.0);

    TEST_DOUBLE("[5708990770823838890407843763683279797179383808]", 5708990770823838890407843763683279797179383808.0);
    TEST_DOUBLE("[5708990770823839524233143877797980545530986496]", 5708990770823839524233143877797980545530986496.0);
    TEST_DOUBLE("[5708990770823839207320493820740630171355185152]", 5708990770823839524233143877797980545530986496.0);
    TEST_DOUBLE("[5708990770823839207320493820740630171355185151999e-3]", 5708990770823838890407843763683279797179383808.0);
    TEST_DOUBLE("[5708990770823839207320493820740630171355185152001e-3]", 5708990770823839524233143877797980545530986496.0);

    {
        char n1e308[312];   // '1' followed by 308 '0'
        n1e308[0] = '[';
        n1e308[1] = '1';
        for (int j = 2; j < 310; j++)
            n1e308[j] = '0';
        n1e308[310] = ']';
        n1e308[311] = '\0';
        TEST_DOUBLE(n1e308, 1E308);
    }

    // Cover trimming
    TEST_DOUBLE(
        "[2.22507385850720113605740979670913197593481954635164564802342610972482222202107694551652952390813508"
        "7914149158913039621106870086438694594645527657207407820621743379988141063267329253552286881372149012"
        "9811224514518898490572223072852551331557550159143974763979834118019993239625482890171070818506906306"
        "6665599493827577257201576306269066333264756530000924588831643303777979186961204949739037782970490505"
        "1080609940730262937128958950003583799967207254304360284078895771796150945516748243471030702609144621"
        "5722898802581825451803257070188608721131280795122334262883686223215037756666225039825343359745688844"
        "2390026549819838548794829220689472168983109969836584681402285424333066033985088644580400103493397042"
        "7567186443383770486037861622771738545623065874679014086723327636718751234567890123456789012345678901"
        "e-308]", 
        2.2250738585072014e-308);
}

I will have a deeper look at this later. I just wanted to paste the results here before I forget.

nlohmann · 2016-01-22T19:45:21Z

These are the failing test cases mentioned above:

TEST_DOUBLE("[-0.0]", -0.0);
TEST_DOUBLE("[2.22507385850720113605740979670913197593481954635164564e-308]", 2.2250738585072009e-308);
TEST_DOUBLE("[0.999999999999999944488848768742172978818416595458984374]", 0.99999999999999989); // previous double
TEST_DOUBLE("[1.00000000000000011102230246251565404236316680908203126]", 1.00000000000000022); // next double
TEST_DOUBLE("[7205759403792793199999e-5]", 72057594037927928.0);
TEST_DOUBLE("[922337203685477529599999e-5]", 9223372036854774784.0);
TEST_DOUBLE("[1014120480182583464902367222169599999e-5]", 10141204801825834086073718800384.0);
TEST_DOUBLE("[5708990770823839207320493820740630171355185151999e-3]", 5708990770823838890407843763683279797179383808.0);

nlohmann · 2016-01-22T19:56:15Z

With the following program, I can reproduce all but the first error:

#include <json.hpp>

using nlohmann::json;

int main()
{
    json j02 = json::parse("-0.0");
    json j37 = json::parse("2.22507385850720113605740979670913197593481954635164564e-308");
    json j40 = json::parse("0.999999999999999944488848768742172978818416595458984374");
    json j44 = json::parse("1.00000000000000011102230246251565404236316680908203126");
    json j48 = json::parse("7205759403792793199999e-5");
    json j53 = json::parse("922337203685477529599999e-5");
    json j58 = json::parse("1014120480182583464902367222169599999e-5");
    json j63 = json::parse("5708990770823839207320493820740630171355185151999e-3");

    std::cout << std::boolalpha
        << (j02.get<double>() == -0.0) << '\n'
        << (j37.get<double>() == 2.2250738585072009e-308) << '\n'
        << (j40.get<double>() == 0.99999999999999989) << '\n'
        << (j44.get<double>() == 1.00000000000000022) << '\n'
        << (j48.get<double>() == 72057594037927928.0) << '\n'
        << (j53.get<double>() == 9223372036854774784.0) << '\n'
        << (j58.get<double>() == 10141204801825834086073718800384.0) << '\n'
        << (j63.get<double>() == 5708990770823838890407843763683279797179383808.0) << '\n';
}

Output:

true
false
false
false
false
false
false
false

nlohmann · 2016-01-22T20:23:30Z

Interestingly, the exact same benchmark is already part of the unit tests (test case "compliance tests from nativejson-benchmark", section "doubles"). There the following comparison code is used:

auto TEST_DOUBLE = [](const std::string & json_string, const double expected)
{
    CAPTURE(json_string);
    CAPTURE(expected);
    CHECK(json::parse(json_string)[0].get<double>() == Approx(expected));
};

nlohmann · 2016-01-22T20:24:30Z

I am confused :-)

twelsby · 2016-01-23T15:23:54Z

The tests all pass on VS2015 with the current code but fail on GCC. The reason for this is that VS2015 treats long double as a double precision floating point number (64 bits) unless you set certain command line options while GCC treats it as an extended double precision floating point number (80 bits). The problem is that get_number() in version 1.0.1 uses strtold() and then casts to a double. Therefore on GCC it is parsed as a double and must undergoes some rounding, then is cast and undergoes more rounding. It is the interaction of these two roundings that produces the error.

The obvious solution is to parse under GCC using strtod(). This works with the above examples however it will break if the user decides to use a long double as the floating point type and either compiles under GCC or VS2015 with the appropriate option set. This issue will also affect the 2.0.0 get_number() method although there I am already using strtod() as I neglected to consider the long double case.

The solution that works in all cases is to wrap strtof(), strtod() and strtold() in overloads and let the compiler select the correct one. Its a little messy but I can't see another way. On the plus side it should work for both 1.0.1 and 2.0.0.

gregmarr · 2016-01-23T16:03:24Z

Where does get_number() in 1.0.1 cast to a double? Do you mean that the test casts to a double by calling get<double>() instead of get<long double>()?

    long double get_number() const
    {
        const auto float_val = std::strtold(...);
        return (...) ? float_val : NAN;
    }

twelsby · 2016-01-23T16:50:16Z

Nope, later in the precision check in parse_internal():

...
// we would lose precision -> return float
result.m_type = value_t::number_float;
result.m_value = static_cast<number_float_t>(float_val);
...

In the default basic_json, number_float_t is a double. The tests that break are run using this basic_json.

The way I put it was probably unclear. What I mean was that get_number() parses as a long double and then later it is cast to a double.

twelsby · 2016-01-24T01:41:56Z

@nlohmann, the reason why j02.get<double>() == -0.0 yields true in your test but under the benchmark the equivalent yields false is that 0.0 == -0.0. When tested as doubles you are not simply testing the bit pattern for equality but are performing the mathematical test for equality which ignores the sign for zero.

The benchmark compares them as if they are uint64_t via a union, so it is looking for equality of the bit pattern.

An example that illustrates the difference is:

union double_union {
    double _double;
    uint64_t _uint64_t;
};

int main()
{
    double_union pos_zero = { 0.0 };
    double_union neg_zero = { -0.0 };

    if(pos_zero._double == neg_zero._double)
        std::cout << "0.0 == -0.0 when tested as a double" << std::endl;

    if(pos_zero._uint64_t == neg_zero._uint64_t)
        std::cout << "0.0 == -0.0 when tested as a uint64_t" << std::endl;  // Never reached

    return 0;
}

Which produces the output:

0.0 == -0.0 when tested as a double

As I mention in #187 the only fix for this is to keep -0.0 as a floating point number. I believe we should be doing this because although 0.0 == -0.0 there are differences between the two in some contexts and it can be mathematically convenient to have a negative zero (which is why it is in IEEE 754 to begin with).

twelsby · 2016-01-24T03:05:00Z

Note that to make the round trip for -0.0 work a modification has to be made to dump() to add the trailing .0 for floating point numbers that are integers.

Fixed Issue #186 - add strto(f|d|ld) overload wrappers, "-0.0" special case and FP trailing zero

nlohmann · 2016-01-24T16:38:13Z

Update: The example program above now outputs true for all examples.

nlohmann · 2016-01-24T16:57:07Z

I reran the benchmark: all double tests succeed now.

nlohmann added the kind: bug label Jan 22, 2016

nlohmann mentioned this issue Jan 22, 2016

JSON performance benchmark comparision #177

Closed

nlohmann added the confirmed label Jan 22, 2016

nlohmann mentioned this issue Jan 22, 2016

Extending support to full uint64_t/int64_t range - issue #178 (Integer conversion to unsigned) #183

Closed

twelsby mentioned this issue Jan 23, 2016

Bugs in miloyip/nativejson-benchmark: roundtrips #187

Closed

twelsby pushed a commit to twelsby/json that referenced this issue Jan 24, 2016

Fix Issue nlohmann#186 - add overload wrappers for strto(f|d|ld)

a1c6f16

twelsby mentioned this issue Jan 24, 2016

Fixed Issue #186 - add strto(f|d|ld) overload wrappers, "-0.0" special case and FP trailing zero #191

Merged

nlohmann added a commit that referenced this issue Jan 24, 2016

Merge pull request #191 from twelsby/issue186

ad5d1da

Fixed Issue #186 - add strto(f|d|ld) overload wrappers, "-0.0" special case and FP trailing zero

nlohmann added this to the Release 1.0.1 milestone Jan 24, 2016

nlohmann closed this as completed Jan 24, 2016

twelsby mentioned this issue Jan 25, 2016

GCC/clang floating point parsing bug in strtod() #195

Closed

TurpentineDistillery mentioned this issue Nov 19, 2016

replace strtold with non locale dependent version #337

Closed

bignmllc mentioned this issue Dec 22, 2017

Regression Tests Failure using "ctest" #887

Closed

Tridacnid mentioned this issue Sep 11, 2019

cmake_* tests don't use the build system's compiler #1747

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Bugs in miloyip/nativejson-benchmark: floating-point parsing #186

Bugs in miloyip/nativejson-benchmark: floating-point parsing #186

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

twelsby commented Jan 23, 2016

gregmarr commented Jan 23, 2016

twelsby commented Jan 23, 2016

twelsby commented Jan 24, 2016

twelsby commented Jan 24, 2016

nlohmann commented Jan 24, 2016

nlohmann commented Jan 24, 2016

Bugs in miloyip/nativejson-benchmark: floating-point parsing #186

Bugs in miloyip/nativejson-benchmark: floating-point parsing #186

Comments

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

nlohmann commented Jan 22, 2016

twelsby commented Jan 23, 2016

gregmarr commented Jan 23, 2016

twelsby commented Jan 23, 2016

twelsby commented Jan 24, 2016

twelsby commented Jan 24, 2016

nlohmann commented Jan 24, 2016

nlohmann commented Jan 24, 2016