diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 75a20ed7b8..703e6c0f65 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -1938,6 +1938,7 @@ class binary_reader { std::pair size_and_type; size_t dimlen = 0; + bool is_ndarray = false; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -1952,7 +1953,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, size_and_type.second))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second))) { return false; } @@ -1964,7 +1965,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray))) { return false; } @@ -1976,7 +1977,7 @@ class binary_reader { while (current != ']') { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current))) { return false; } @@ -1991,8 +1992,9 @@ class binary_reader @param[out] result determined size @return whether size determination completed */ - bool get_ubjson_size_value(std::size_t& result, char_int_type prefix = 0) + bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0) { + is_ndarray = false; if (prefix == 0) { prefix = get_ignore_noop(); @@ -2132,7 +2134,7 @@ class binary_reader return false; } } - result |= (1ull << (sizeof(result) * 8 - 1)); // low 63 bit of result stores the total element count, sign-bit indicates ndarray + is_ndarray = true; return sax->end_array(); } result = 0; @@ -2168,6 +2170,7 @@ class binary_reader */ bool get_ubjson_size_type(std::pair& result) { + bool is_ndarray = false; result.first = string_t::npos; // size result.second = 0; // type @@ -2185,7 +2188,7 @@ class binary_reader exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr)); } - if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type") || (input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() ))) + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type"))) { return false; } @@ -2202,12 +2205,22 @@ class binary_reader exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); } - return get_ubjson_size_value(result.first); + bool is_error = get_ubjson_size_value(result.first, is_ndarray); + if (input_format == input_format_t::bjdata && is_ndarray) + { + result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return is_error; } if (current == '#') { - return get_ubjson_size_value(result.first); + bool is_error = get_ubjson_size_value(result.first, is_ndarray); + if (input_format == input_format_t::bjdata && is_ndarray) + { + result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return is_error; } return true; @@ -2408,17 +2421,26 @@ class binary_reader return false; } - // detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): + // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) { std::map bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} }; + size_and_type.second &= ~(static_cast(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker + string_t key = "_ArrayType_"; - if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) + if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); + } + + if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) { return false; } @@ -2428,7 +2450,6 @@ class binary_reader size_and_type.second = 'U'; } - size_and_type.first &= ~(1ull << (sizeof(std::size_t) * 8 - 1)); key = "_ArrayData_"; if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) { @@ -2508,9 +2529,12 @@ class binary_reader return false; } - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + // do not accept ND-array size in objects in BJData + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) { - return false; + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr)); } string_t key; @@ -2584,7 +2608,8 @@ class binary_reader { // get size of following number string std::size_t size{}; - auto res = get_ubjson_size_value(size); + bool is_ndarray = false; + auto res = get_ubjson_size_value(size, is_ndarray); if (JSON_HEDLEY_UNLIKELY(!res)) { return res; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index bbd84d2be1..a29f7bce96 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -10412,6 +10412,7 @@ class binary_reader { std::pair size_and_type; size_t dimlen = 0; + bool is_ndarray = false; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -10426,7 +10427,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, size_and_type.second))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second))) { return false; } @@ -10438,7 +10439,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray))) { return false; } @@ -10450,7 +10451,7 @@ class binary_reader { while (current != ']') { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, current))) + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current))) { return false; } @@ -10465,8 +10466,9 @@ class binary_reader @param[out] result determined size @return whether size determination completed */ - bool get_ubjson_size_value(std::size_t& result, char_int_type prefix = 0) + bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0) { + is_ndarray = false; if (prefix == 0) { prefix = get_ignore_noop(); @@ -10606,7 +10608,7 @@ class binary_reader return false; } } - result |= (1ull << (sizeof(result) * 8 - 1)); // low 63 bit of result stores the total element count, sign-bit indicates ndarray + is_ndarray = true; return sax->end_array(); } result = 0; @@ -10642,6 +10644,7 @@ class binary_reader */ bool get_ubjson_size_type(std::pair& result) { + bool is_ndarray = false; result.first = string_t::npos; // size result.second = 0; // type @@ -10659,7 +10662,7 @@ class binary_reader exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr)); } - if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type") || (input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() ))) + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type"))) { return false; } @@ -10676,12 +10679,22 @@ class binary_reader exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); } - return get_ubjson_size_value(result.first); + bool is_error = get_ubjson_size_value(result.first, is_ndarray); + if (input_format == input_format_t::bjdata && is_ndarray) + { + result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return is_error; } if (current == '#') { - return get_ubjson_size_value(result.first); + bool is_error = get_ubjson_size_value(result.first, is_ndarray); + if (input_format == input_format_t::bjdata && is_ndarray) + { + result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return is_error; } return true; @@ -10882,17 +10895,26 @@ class binary_reader return false; } - // detect and encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): + // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) { std::map bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} }; + size_and_type.second &= ~(static_cast(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker + string_t key = "_ArrayType_"; - if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0 || !sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) + if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); + } + + if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) { return false; } @@ -10902,7 +10924,6 @@ class binary_reader size_and_type.second = 'U'; } - size_and_type.first &= ~(1ull << (sizeof(std::size_t) * 8 - 1)); key = "_ArrayData_"; if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) { @@ -10982,9 +11003,12 @@ class binary_reader return false; } - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && size_and_type.first >= (1ull << (sizeof(std::size_t) * 8 - 1))) + // do not accept ND-array size in objects in BJData + if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) { - return false; + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr)); } string_t key; @@ -11058,7 +11082,8 @@ class binary_reader { // get size of following number string std::size_t size{}; - auto res = get_ubjson_size_value(size); + bool is_ndarray = false; + auto res = get_ubjson_size_value(size, is_ndarray); if (JSON_HEDLEY_UNLIKELY(!res)) { return res; diff --git a/tests/src/unit-bjdata.cpp b/tests/src/unit-bjdata.cpp index dc2c631229..d2fa9a9b28 100644 --- a/tests/src/unit-bjdata.cpp +++ b/tests/src/unit-bjdata.cpp @@ -1130,7 +1130,7 @@ TEST_CASE("BJData") { json j = json::from_bjdata(std::vector({'h', 0x00, 0x7c})); json::number_float_t d{j}; - CHECK(!std::isfinite(d)); + CHECK_FALSE(std::isfinite(d)); CHECK(j.dump() == "null"); } @@ -2035,77 +2035,98 @@ TEST_CASE("BJData") { std::vector v = {'[', 'T', 'F', ']'}; SaxCountdown scp(0); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("start_object()") { std::vector v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'}; SaxCountdown scp(0); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("key() in object") { std::vector v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'}; SaxCountdown scp(1); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("start_array(len)") { std::vector v = {'[', '#', 'i', '2', 'T', 'F'}; SaxCountdown scp(0); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("start_object(len)") { std::vector v = {'{', '#', 'i', '1', 3, 'f', 'o', 'o', 'F'}; SaxCountdown scp(0); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("key() in object with length") { std::vector v = {'{', 'i', 3, 'f', 'o', 'o', 'F', '}'}; SaxCountdown scp(1); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("start_array() in ndarray _ArraySize_") { std::vector v = {'[', '$', 'i', '#', '[', '$', 'i', '#', 'i', 2, 2, 1, 1, 2}; SaxCountdown scp(2); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("number_integer() in ndarray _ArraySize_") { std::vector v = {'[', '$', 'U', '#', '[', '$', 'i', '#', 'i', 2, 2, 1, 1, 2}; SaxCountdown scp(3); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("key() in ndarray _ArrayType_") + { + std::vector v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4}; + SaxCountdown scp(6); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); + } + + SECTION("string() in ndarray _ArrayType_") + { + std::vector v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4}; + SaxCountdown scp(7); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); + } + + SECTION("key() in ndarray _ArrayData_") { std::vector v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4}; SaxCountdown scp(8); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); + } + + SECTION("string() in ndarray _ArrayData_") + { + std::vector v = {'[', '$', 'U', '#', '[', '$', 'U', '#', 'i', 2, 2, 2, 1, 2, 3, 4}; + SaxCountdown scp(9); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("string() in ndarray _ArrayType_") { std::vector v = {'[', '$', 'U', '#', '[', '$', 'i', '#', 'i', 2, 3, 2, 6, 5, 4, 3, 2, 1}; SaxCountdown scp(11); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } SECTION("start_array() in ndarray _ArrayData_") { std::vector v = {'[', '$', 'U', '#', '[', 'i', 2, 'i', 3, ']', 6, 5, 4, 3, 2, 1}; SaxCountdown scp(13); - CHECK(!json::sax_parse(v, &scp, json::input_format_t::bjdata)); + CHECK_FALSE(json::sax_parse(v, &scp, json::input_format_t::bjdata)); } } @@ -2488,6 +2509,37 @@ TEST_CASE("BJData") CHECK_THROWS_WITH(_ = json::from_bjdata(v), "[json.exception.parse_error.112] parse error at byte 4: syntax error while parsing BJData size: expected '#' after type information; last byte: 0x02"); } + SECTION("optimized array: negative size") + { + std::vector v1 = {'[', '#', 'i', 0xF1}; + std::vector v2 = {'[', '$', 'I', '#', 'i', 0xF2}; + std::vector v3 = {'[', '$', 'I', '#', '[', 'i', 0xF4, 'i', 0x02, ']'}; + std::vector v4 = {'[', '$', 0xF6, '#', 'i', 0xF7}; + std::vector v5 = {'[', '$', 'I', '#', '[', 'i', 0xF5, 'i', 0xF1, ']'}; + std::vector v6 = {'[', '#', '[', 'i', 0xF3, 'i', 0x02, ']'}; + + json _; + static bool is_64bit = (sizeof(size_t) == 8); + + if (is_64bit) + { + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v1), "[json.exception.out_of_range.408] excessive array size: 18446744073709551601", json::out_of_range&); + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v2), "[json.exception.out_of_range.408] excessive array size: 18446744073709551602", json::out_of_range&); + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v3), "[json.exception.out_of_range.408] excessive array size: 18446744073709551592", json::out_of_range&); + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v4), "[json.exception.out_of_range.408] excessive array size: 18446744073709551607", json::out_of_range&); + } + else + { + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v1), "[json.exception.out_of_range.408] excessive array size: 4294967281", json::out_of_range&); + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v2), "[json.exception.out_of_range.408] excessive array size: 4294967282", json::out_of_range&); + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v3), "[json.exception.out_of_range.408] excessive array size: 4294967272", json::out_of_range&); + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v4), "[json.exception.out_of_range.408] excessive array size: 4294967287", json::out_of_range&); + } + CHECK_THROWS_WITH_AS(_ = json::from_bjdata(v5), "[json.exception.parse_error.110] parse error at byte 11: syntax error while parsing BJData number: unexpected end of input", json::parse_error&); + + CHECK(json::from_bjdata(v6, true, false).is_discarded()); + } + SECTION("do not accept NTFZ markers in ndarray optimized type") { json _;