From 58f5f259686d4e716ee9e63b3d30178dbba10908 Mon Sep 17 00:00:00 2001 From: Sushrut Shringarputale Date: Wed, 18 Dec 2024 13:46:14 -0800 Subject: [PATCH] json start/end position implementation (#4517) * Add implementation to retrieve start and end positions of json during parse * Add more unit tests and add start/stop parsing for arrays * Add raw value for all types * Add more tests and fix compiler warning * Amalgamate * Fix CLang GCC warnings * Fix error in build * Style using astyle 3.1 * Fix whitespace changes * revert * more whitespace reverts * Address PR comments * Fix failing issues * More whitespace reverts * Address remaining PR comments * Address comments * Switch to using custom base class instead of default basic_json * Adding a basic using for a json using the new base class. Also address PR comments and fix CI failures * Address decltype comments * Diagnostic positions macro (#4) Co-authored-by: Sush Shringarputale * Fix missed include deletion * Add docs and address other PR comments (#5) * Add docs and address other PR comments --------- Co-authored-by: Sush Shringarputale * Address new PR comments and fix CI tests for documentation * Update documentation based on feedback (#6) --------- Co-authored-by: Sush Shringarputale * Address std::size_t and other comments * Fix new CI issues * Fix lcov * Improve lcov case with update to handle_diagnostic_positions call for discarded values * Fix indentation of LCOV_EXCL_STOP comments * fix amalgamation astyle issue --------- Co-authored-by: Sush Shringarputale --- CMakeLists.txt | 6 + docs/examples/diagnostic_positions.cpp | 51 + docs/examples/diagnostic_positions.output | 50 + .../api/macros/json_diagnostic_positions.md | 61 + docs/mkdocs/docs/integration/cmake.md | 3 + include/nlohmann/detail/abi_macros.hpp | 19 +- .../nlohmann/detail/input/binary_reader.hpp | 2 +- .../nlohmann/detail/input/input_adapters.hpp | 3 + include/nlohmann/detail/input/json_sax.hpp | 282 +- include/nlohmann/detail/input/parser.hpp | 4 +- include/nlohmann/json.hpp | 73 +- single_include/nlohmann/json.hpp | 4262 +++++++++-------- single_include/nlohmann/json_fwd.hpp | 19 +- tests/src/unit-cbor.cpp | 2 +- tests/src/unit-class_parser.cpp | 2 +- ...unit-class_parser_diagnostic_positions.cpp | 1957 ++++++++ tests/src/unit-deserialization.cpp | 2 +- tests/src/unit-disabled_exceptions.cpp | 4 +- tests/src/unit-regression2.cpp | 4 +- tests/src/unit-ubjson.cpp | 4 +- 20 files changed, 4784 insertions(+), 2026 deletions(-) create mode 100644 docs/examples/diagnostic_positions.cpp create mode 100644 docs/examples/diagnostic_positions.output create mode 100644 docs/mkdocs/docs/api/macros/json_diagnostic_positions.md create mode 100644 tests/src/unit-class_parser_diagnostic_positions.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bcfa907293..da5799cba5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ endif() option(JSON_BuildTests "Build the unit tests when BUILD_TESTING is enabled." ${JSON_BuildTests_INIT}) option(JSON_CI "Enable CI build targets." OFF) option(JSON_Diagnostics "Use extended diagnostic messages." OFF) +option(JSON_Diagnostic_Positions "Enable diagnostic positions." OFF) option(JSON_GlobalUDLs "Place user-defined string literals in the global namespace." ON) option(JSON_ImplicitConversions "Enable implicit conversions." ON) option(JSON_DisableEnumSerialization "Disable default integer enum serialization." OFF) @@ -96,6 +97,10 @@ if (JSON_Diagnostics) message(STATUS "Diagnostics enabled (JSON_DIAGNOSTICS=1)") endif() +if (JSON_Diagnostic_Positions) + message(STATUS "Diagnostic positions enabled") +endif() + if (NOT JSON_GlobalUDLs) message(STATUS "User-defined string literals are not put in the global namespace (JSON_USE_GLOBAL_UDLS=0)") endif() @@ -123,6 +128,7 @@ target_compile_definitions( $<$>:JSON_USE_IMPLICIT_CONVERSIONS=0> $<$:JSON_DISABLE_ENUM_SERIALIZATION=1> $<$:JSON_DIAGNOSTICS=1> + $<$:JSON_DIAGNOSTIC_POSITIONS=1> $<$:JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON=1> ) diff --git a/docs/examples/diagnostic_positions.cpp b/docs/examples/diagnostic_positions.cpp new file mode 100644 index 0000000000..259344cd05 --- /dev/null +++ b/docs/examples/diagnostic_positions.cpp @@ -0,0 +1,51 @@ +#include + +#define JSON_DIAGNOSTIC_POSITIONS 1 +#include + +using json = nlohmann::json; + +int main() +{ + std::string json_string = R"( + { + "address": { + "street": "Fake Street", + "housenumber": 1 + } + } + )"; + json j = json::parse(json_string); + + std::cout << "Root diagnostic positions: \n"; + std::cout << "\tstart_pos: " << j.start_pos() << '\n'; + std::cout << "\tend_pos:" << j.end_pos() << "\n"; + std::cout << "Original string: \n"; + std::cout << "{\n \"address\": {\n \"street\": \"Fake Street\",\n \"housenumber\": 1\n }\n }" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j.start_pos(), j.end_pos() - j.start_pos()) << "\n\n"; + + std::cout << "address diagnostic positions: \n"; + std::cout << "\tstart_pos:" << j["address"].start_pos() << '\n'; + std::cout << "\tend_pos:" << j["address"].end_pos() << "\n\n"; + std::cout << "Original string: \n"; + std::cout << "{ \"street\": \"Fake Street\",\n \"housenumber\": 1\n }" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j["address"].start_pos(), j["address"].end_pos() - j["address"].start_pos()) << "\n\n"; + + std::cout << "street diagnostic positions: \n"; + std::cout << "\tstart_pos:" << j["address"]["street"].start_pos() << '\n'; + std::cout << "\tend_pos:" << j["address"]["street"].end_pos() << "\n\n"; + std::cout << "Original string: \n"; + std::cout << "\"Fake Street\"" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j["address"]["street"].start_pos(), j["address"]["street"].end_pos() - j["address"]["street"].start_pos()) << "\n\n"; + + std::cout << "housenumber diagnostic positions: \n"; + std::cout << "\tstart_pos:" << j["address"]["housenumber"].start_pos() << '\n'; + std::cout << "\tend_pos:" << j["address"]["housenumber"].end_pos() << "\n\n"; + std::cout << "Original string: \n"; + std::cout << "1" << "\n"; + std::cout << "Parsed string: \n"; + std::cout << json_string.substr(j["address"]["housenumber"].start_pos(), j["address"]["housenumber"].end_pos() - j["address"]["housenumber"].start_pos()) << "\n\n"; +} diff --git a/docs/examples/diagnostic_positions.output b/docs/examples/diagnostic_positions.output new file mode 100644 index 0000000000..a6becc0e6d --- /dev/null +++ b/docs/examples/diagnostic_positions.output @@ -0,0 +1,50 @@ +Root diagnostic positions: + start_pos: 5 + end_pos:109 +Original string: +{ + "address": { + "street": "Fake Street", + "housenumber": 1 + } + } +Parsed string: +{ + "address": { + "street": "Fake Street", + "housenumber": 1 + } + } + +address diagnostic positions: + start_pos:26 + end_pos:103 + +Original string: +{ "street": "Fake Street", + "housenumber": 1 + } +Parsed string: +{ + "street": "Fake Street", + "housenumber": 1 + } + +street diagnostic positions: + start_pos:50 + end_pos:63 + +Original string: +"Fake Street" +Parsed string: +"Fake Street" + +housenumber diagnostic positions: + start_pos:92 + end_pos:93 + +Original string: +1 +Parsed string: +1 + diff --git a/docs/mkdocs/docs/api/macros/json_diagnostic_positions.md b/docs/mkdocs/docs/api/macros/json_diagnostic_positions.md new file mode 100644 index 0000000000..d7d4ddd438 --- /dev/null +++ b/docs/mkdocs/docs/api/macros/json_diagnostic_positions.md @@ -0,0 +1,61 @@ +# JSON_DIAGNOSTIC_POSITIONS + +```cpp +#define JSON_DIAGNOSTIC_POSITIONS /* value */ +``` + +This macro enables position diagnostics for generated JSON objects. + +When enabled, two new properties: `start_pos()` and `end_pos()` are added to `nlohmann::basic_json` objects and fields. `start_pos()` returns the start +position of that JSON object/field in the original string the object was parsed from. Likewise, `end_pos()` returns the end position of that JSON +object/field in the original string the object was parsed from. + +`start_pos()` returns the first character of a given element in the original JSON string, while `end_pos()` returns the character following the last +character. For objects and arrays, the first and last characters correspond to the opening or closing braces/brackets, respectively. For fields, the first +and last character represent the opening and closing quotes or the first and last character of the field's numerical or predefined value +(true/false/null), respectively. + +Given the above, `end_pos() - start_pos()` for an object or field provides the length of the string representation for that object or field, including the +opening or closing braces, brackets, or quotes. + +`start_pos()` and `end_pos()` are only set if the JSON object was parsed using `parse()`. For all other cases, `std::string::npos` will be returned. + +Note that enabling this macro increases the size of every JSON value by two `std::size_t` fields and adds +slight runtime overhead. + +## Default definition + +The default value is `0` (position diagnostics are switched off). + +```cpp +#define JSON_DIAGNOSTIC_POSITIONS 0 +``` + +When the macro is not defined, the library will define it to its default value. + +## Notes + +!!! hint "CMake option" + + Diagnostic messages can also be controlled with the CMake option + [`JSON_Diagnostic_Positions`](../../integration/cmake.md#json_diagnostic_positions) (`OFF` by default) + which defines `JSON_DIAGNOSTIC_POSITIONS` accordingly. + +## Examples + +??? example "Example 1: retrieving positions" + + ```cpp + --8<-- "examples/diagnostic_positions.cpp" + ``` + + Output: + + ``` + --8<-- "examples/diagnostic_positions.output" + ``` + + The output shows the start/end positions of all the objects and fields in the JSON string. + +## Version history + diff --git a/docs/mkdocs/docs/integration/cmake.md b/docs/mkdocs/docs/integration/cmake.md index 2f68f09ceb..df0203192b 100644 --- a/docs/mkdocs/docs/integration/cmake.md +++ b/docs/mkdocs/docs/integration/cmake.md @@ -135,6 +135,9 @@ Enable CI build targets. The exact targets are used during the several CI steps Enable [extended diagnostic messages](../home/exceptions.md#extended-diagnostic-messages) by defining macro [`JSON_DIAGNOSTICS`](../api/macros/json_diagnostics.md). This option is `OFF` by default. +### `JSON_Diagnostic_Positions` +Enable position diagnostics by defining macro [`JSON_DIAGNOSTIC_POSITIONS`](../api/macros/json_diagnostic_positions.md). This option is off by default. + ### `JSON_DisableEnumSerialization` Disable default `enum` serialization by defining the macro diff --git a/include/nlohmann/detail/abi_macros.hpp b/include/nlohmann/detail/abi_macros.hpp index ea7ce64210..baa2762da2 100644 --- a/include/nlohmann/detail/abi_macros.hpp +++ b/include/nlohmann/detail/abi_macros.hpp @@ -26,6 +26,10 @@ #define JSON_DIAGNOSTICS 0 #endif +#ifndef JSON_DIAGNOSTIC_POSITIONS + #define JSON_DIAGNOSTIC_POSITIONS 0 +#endif + #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -36,6 +40,12 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif +#if JSON_DIAGNOSTIC_POSITIONS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS +#endif + #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -47,14 +57,15 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index f8c5190643..f498b0de96 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -65,7 +65,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index f0ef1f44cc..1fb132d376 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -462,6 +462,9 @@ typename container_input_adapter_factory_impl::container_input_adapter_factory::create(container); } +// specialization for std::string +using string_input_adapter_type = decltype(input_adapter(std::declval())); + #ifndef JSON_NO_IO // Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index b549ec2faa..f081e4aa40 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -10,13 +10,14 @@ #include #include // string +#include // enable_if_t #include // move #include // vector #include +#include #include #include - NLOHMANN_JSON_NAMESPACE_BEGIN /*! @@ -157,7 +158,7 @@ constructor contains the parsed value. @tparam BasicJsonType the JSON type */ -template +template class json_sax_dom_parser { public: @@ -166,14 +167,15 @@ class json_sax_dom_parser using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer; /*! @param[in,out] r reference to a JSON value that is manipulated while parsing @param[in] allow_exceptions_ whether parse errors yield exceptions */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) {} // make class move-only @@ -229,6 +231,17 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); @@ -252,6 +265,14 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_object()); +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -261,6 +282,15 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); @@ -274,6 +304,14 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_array()); +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -298,6 +336,75 @@ class json_sax_dom_parser } private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + // As we handle the start and end positions for values created during parsing, + // we do not expect the following value type to be called. Regardless, set the positions + // in case this is created manually or through a different constructor. Exclude from lcov + // since the exact condition of this switch is esoteric. + // LCOV_EXCL_START + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + // LCOV_EXCL_STOP + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -311,6 +418,11 @@ class json_sax_dom_parser if (ref_stack.empty()) { root = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(root); +#endif + return &root; } @@ -319,12 +431,22 @@ class json_sax_dom_parser if (ref_stack.back()->is_array()) { ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(ref_stack.back()->m_data.m_value.array->back()); +#endif + return &(ref_stack.back()->m_data.m_value.array->back()); } JSON_ASSERT(ref_stack.back()->is_object()); JSON_ASSERT(object_element); *object_element = BasicJsonType(std::forward(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(*object_element); +#endif + return object_element; } @@ -338,9 +460,11 @@ class json_sax_dom_parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; }; -template +template class json_sax_dom_callback_parser { public: @@ -351,11 +475,13 @@ class json_sax_dom_callback_parser using binary_t = typename BasicJsonType::binary_t; using parser_callback_t = typename BasicJsonType::parser_callback_t; using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer; json_sax_dom_callback_parser(BasicJsonType& r, parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_) + const bool allow_exceptions_ = true, + lexer_t* lexer_ = nullptr) + : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) { keep_stack.push_back(true); } @@ -418,12 +544,26 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::object, true); ref_stack.push_back(val.second); - // check object limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check object limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + } return true; } @@ -452,9 +592,23 @@ class json_sax_dom_callback_parser { // discard object *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded object. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif } else { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); } } @@ -488,10 +642,25 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::array, true); ref_stack.push_back(val.second); - // check array limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + if (ref_stack.back()) { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the array, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check array limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } } return true; @@ -506,12 +675,26 @@ class json_sax_dom_callback_parser keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); if (keep) { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + ref_stack.back()->set_parents(); } else { // discard array *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded array. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif } } @@ -548,6 +731,71 @@ class json_sax_dom_callback_parser } private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + /*! @param[in] v value to add to the JSON value we build during parsing @param[in] skip_callback whether we should skip calling the callback @@ -578,6 +826,10 @@ class json_sax_dom_callback_parser // create value auto value = BasicJsonType(std::forward(v)); +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(value); +#endif + // check callback const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); @@ -645,6 +897,8 @@ class json_sax_dom_callback_parser const bool allow_exceptions = true; /// a discarded value for the callback BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; }; template diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index e46b0ef2a8..997a26ff3f 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -94,7 +94,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -122,7 +122,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions); + json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index dda9b623f7..7234177a03 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -115,9 +115,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; @@ -848,6 +848,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::enable_if_t < detail::is_basic_json::value&& !std::is_same::value, int > = 0 > basic_json(const BasicJsonType& val) +#if JSON_DIAGNOSTIC_POSITIONS + : start_position(val.start_position), + end_position(val.end_position) +#endif { using other_boolean_t = typename BasicJsonType::boolean_t; using other_number_float_t = typename BasicJsonType::number_float_t; @@ -894,6 +898,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } JSON_ASSERT(m_data.m_type == val.type()); + set_parents(); assert_invariant(); } @@ -1145,6 +1150,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(const basic_json& other) : json_base_class_t(other) +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) + , end_position(other.end_position) +#endif { m_data.m_type = other.m_data.m_type; // check of passed value is valid @@ -1215,6 +1224,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), m_data(std::move(other.m_data)) // cppcheck-suppress[accessForwarded] TODO check +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) // cppcheck-suppress[accessForwarded] TODO check + , end_position(other.end_position) // cppcheck-suppress[accessForwarded] TODO check +#endif { // check that passed value is valid other.assert_invariant(false); // cppcheck-suppress[accessForwarded] @@ -1223,6 +1236,11 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec other.m_data.m_type = value_t::null; other.m_data.m_value = {}; +#if JSON_DIAGNOSTIC_POSITIONS + other.start_position = std::string::npos; + other.end_position = std::string::npos; +#endif + set_parents(); assert_invariant(); } @@ -1243,6 +1261,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); + +#if JSON_DIAGNOSTIC_POSITIONS + swap(start_position, other.start_position); + swap(end_position, other.end_position); +#endif + json_base_class_t::operator=(std::move(other)); set_parents(); @@ -4226,6 +4250,23 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json* m_parent = nullptr; #endif +#if JSON_DIAGNOSTIC_POSITIONS + /// the start position of the value + std::size_t start_position = std::string::npos; + /// the end position of the value + std::size_t end_position = std::string::npos; + public: + constexpr std::size_t start_pos() const noexcept + { + return start_position; + } + + constexpr std::size_t end_pos() const noexcept + { + return end_position; + } +#endif + ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -4367,8 +4408,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4383,8 +4424,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4408,8 +4449,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -4424,8 +4465,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4439,8 +4480,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4462,8 +4503,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -4478,8 +4519,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4493,8 +4534,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4516,8 +4557,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -4532,8 +4573,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4547,8 +4588,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4562,8 +4603,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4577,8 +4618,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -4600,8 +4641,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 24c19df100..06fbe64c06 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -73,6 +73,10 @@ #define JSON_DIAGNOSTICS 0 #endif +#ifndef JSON_DIAGNOSTIC_POSITIONS + #define JSON_DIAGNOSTIC_POSITIONS 0 +#endif + #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -83,6 +87,12 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif +#if JSON_DIAGNOSTIC_POSITIONS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS +#endif + #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -94,14 +104,15 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ @@ -6775,6 +6786,9 @@ typename container_input_adapter_factory_impl::container_input_adapter_factory::create(container); } +// specialization for std::string +using string_input_adapter_type = decltype(input_adapter(std::declval())); + #ifndef JSON_NO_IO // Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) @@ -6871,2374 +6885,2629 @@ NLOHMANN_JSON_NAMESPACE_END #include #include // string +#include // enable_if_t #include // move #include // vector // #include +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013 - 2024 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // array +#include // localeconv +#include // size_t +#include // snprintf +#include // strtof, strtod, strtold, strtoll, strtoull +#include // initializer_list +#include // char_traits, string +#include // move +#include // vector + +// #include + +// #include + // #include -// #include +// #include NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////// +// lexer // +/////////// + +template +class lexer_base +{ + public: + /// token types for the parser + enum class token_type + { + uninitialized, ///< indicating the scanner is uninitialized + literal_true, ///< the `true` literal + literal_false, ///< the `false` literal + literal_null, ///< the `null` literal + value_string, ///< a string -- use get_string() for actual value + value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value + value_integer, ///< a signed integer -- use get_number_integer() for actual value + value_float, ///< an floating point number -- use get_number_float() for actual value + begin_array, ///< the character for array begin `[` + begin_object, ///< the character for object begin `{` + end_array, ///< the character for array end `]` + end_object, ///< the character for object end `}` + name_separator, ///< the name separator `:` + value_separator, ///< the value separator `,` + parse_error, ///< indicating a parse error + end_of_input, ///< indicating the end of the input buffer + literal_or_value ///< a literal or the begin of a value (only for diagnostics) + }; + /// return name of values of type token_type (only used for errors) + JSON_HEDLEY_RETURNS_NON_NULL + JSON_HEDLEY_CONST + static const char* token_type_name(const token_type t) noexcept + { + switch (t) + { + case token_type::uninitialized: + return ""; + case token_type::literal_true: + return "true literal"; + case token_type::literal_false: + return "false literal"; + case token_type::literal_null: + return "null literal"; + case token_type::value_string: + return "string literal"; + case token_type::value_unsigned: + case token_type::value_integer: + case token_type::value_float: + return "number literal"; + case token_type::begin_array: + return "'['"; + case token_type::begin_object: + return "'{'"; + case token_type::end_array: + return "']'"; + case token_type::end_object: + return "'}'"; + case token_type::name_separator: + return "':'"; + case token_type::value_separator: + return "','"; + case token_type::parse_error: + return ""; + case token_type::end_of_input: + return "end of input"; + case token_type::literal_or_value: + return "'[', '{', or a literal"; + // LCOV_EXCL_START + default: // catch non-enum values + return "unknown token"; + // LCOV_EXCL_STOP + } + } +}; /*! -@brief SAX interface +@brief lexical analysis -This class describes the SAX interface used by @ref nlohmann::json::sax_parse. -Each function is called in different situations while the input is parsed. The -boolean return value informs the parser whether to continue processing the -input. +This class organizes the lexical analysis during JSON deserialization. */ -template -struct json_sax +template +class lexer : public lexer_base { using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - - /*! - @brief a null value was read - @return whether parsing should proceed - */ - virtual bool null() = 0; + using char_type = typename InputAdapterType::char_type; + using char_int_type = typename char_traits::int_type; - /*! - @brief a boolean value was read - @param[in] val boolean value - @return whether parsing should proceed - */ - virtual bool boolean(bool val) = 0; + public: + using token_type = typename lexer_base::token_type; - /*! - @brief an integer number was read - @param[in] val integer value - @return whether parsing should proceed - */ - virtual bool number_integer(number_integer_t val) = 0; + explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept + : ia(std::move(adapter)) + , ignore_comments(ignore_comments_) + , decimal_point_char(static_cast(get_decimal_point())) + {} - /*! - @brief an unsigned integer number was read - @param[in] val unsigned integer value - @return whether parsing should proceed - */ - virtual bool number_unsigned(number_unsigned_t val) = 0; + // delete because of pointer members + lexer(const lexer&) = delete; + lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + lexer& operator=(lexer&) = delete; + lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~lexer() = default; - /*! - @brief a floating-point number was read - @param[in] val floating-point value - @param[in] s raw token value - @return whether parsing should proceed - */ - virtual bool number_float(number_float_t val, const string_t& s) = 0; + private: + ///////////////////// + // locales + ///////////////////// - /*! - @brief a string value was read - @param[in] val string value - @return whether parsing should proceed - @note It is safe to move the passed string value. - */ - virtual bool string(string_t& val) = 0; + /// return the locale-dependent decimal point + JSON_HEDLEY_PURE + static char get_decimal_point() noexcept + { + const auto* loc = localeconv(); + JSON_ASSERT(loc != nullptr); + return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); + } - /*! - @brief a binary value was read - @param[in] val binary value - @return whether parsing should proceed - @note It is safe to move the passed binary value. - */ - virtual bool binary(binary_t& val) = 0; + ///////////////////// + // scan functions + ///////////////////// /*! - @brief the beginning of an object was read - @param[in] elements number of object elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_object(std::size_t elements) = 0; + @brief get codepoint from 4 hex characters following `\u` - /*! - @brief an object key was read - @param[in] val object key - @return whether parsing should proceed - @note It is safe to move the passed string. - */ - virtual bool key(string_t& val) = 0; + For input "\u c1 c2 c3 c4" the codepoint is: + (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 + = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) - /*! - @brief the end of an object was read - @return whether parsing should proceed - */ - virtual bool end_object() = 0; + Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' + must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The + conversion is done by subtracting the offset (0x30, 0x37, and 0x57) + between the ASCII value of the character and the desired integer value. - /*! - @brief the beginning of an array was read - @param[in] elements number of array elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements + @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or + non-hex character) */ - virtual bool start_array(std::size_t elements) = 0; + int get_codepoint() + { + // this function only makes sense after reading `\u` + JSON_ASSERT(current == 'u'); + int codepoint = 0; - /*! - @brief the end of an array was read - @return whether parsing should proceed - */ - virtual bool end_array() = 0; + const auto factors = { 12u, 8u, 4u, 0u }; + for (const auto factor : factors) + { + get(); - /*! - @brief a parse error occurred - @param[in] position the position in the input where the error occurs - @param[in] last_token the last read token - @param[in] ex an exception object describing the error - @return whether parsing should proceed (must return false) - */ - virtual bool parse_error(std::size_t position, - const std::string& last_token, - const detail::exception& ex) = 0; + if (current >= '0' && current <= '9') + { + codepoint += static_cast((static_cast(current) - 0x30u) << factor); + } + else if (current >= 'A' && current <= 'F') + { + codepoint += static_cast((static_cast(current) - 0x37u) << factor); + } + else if (current >= 'a' && current <= 'f') + { + codepoint += static_cast((static_cast(current) - 0x57u) << factor); + } + else + { + return -1; + } + } - json_sax() = default; - json_sax(const json_sax&) = default; - json_sax(json_sax&&) noexcept = default; - json_sax& operator=(const json_sax&) = default; - json_sax& operator=(json_sax&&) noexcept = default; - virtual ~json_sax() = default; -}; + JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF); + return codepoint; + } -namespace detail -{ -/*! -@brief SAX implementation to create a JSON value from SAX events + /*! + @brief check if the next byte(s) are inside a given range -This class implements the @ref json_sax interface and processes the SAX events -to create a JSON value which makes it basically a DOM parser. The structure or -hierarchy of the JSON value is managed by the stack `ref_stack` which contains -a pointer to the respective array or object for each recursion depth. + Adds the current byte and, for each passed range, reads a new byte and + checks if it is inside the range. If a violation was detected, set up an + error message and return false. Otherwise, return true. -After successful parsing, the value that is passed by reference to the -constructor contains the parsed value. + @param[in] ranges list of integers; interpreted as list of pairs of + inclusive lower and upper bound, respectively -@tparam BasicJsonType the JSON type -*/ -template -class json_sax_dom_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; + @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, + 1, 2, or 3 pairs. This precondition is enforced by an assertion. - /*! - @param[in,out] r reference to a JSON value that is manipulated while - parsing - @param[in] allow_exceptions_ whether parse errors yield exceptions + @return true if and only if no range violation was detected */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) - {} - - // make class move-only - json_sax_dom_parser(const json_sax_dom_parser&) = delete; - json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; - json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_parser() = default; - - bool null() + bool next_byte_in_range(std::initializer_list ranges) { - handle_value(nullptr); - return true; - } + JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6); + add(current); - bool boolean(bool val) - { - handle_value(val); - return true; - } + for (auto range = ranges.begin(); range != ranges.end(); ++range) + { + get(); + if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) // NOLINT(bugprone-inc-dec-in-conditions) + { + add(current); + } + else + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return false; + } + } - bool number_integer(number_integer_t val) - { - handle_value(val); return true; } - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } + /*! + @brief scan a string literal - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } + This function scans a string according to Sect. 7 of RFC 8259. While + scanning, bytes are escaped and copied into buffer token_buffer. Then the + function returns successfully, token_buffer is *not* null-terminated (as it + may contain \0 bytes), and token_buffer.size() is the number of bytes in the + string. - bool string(string_t& val) - { - handle_value(val); - return true; - } + @return token_type::value_string if string could be successfully scanned, + token_type::parse_error otherwise - bool binary(binary_t& val) + @note In case of errors, variable error_message contains a textual + description. + */ + token_type scan_string() { - handle_value(std::move(val)); - return true; - } + // reset token_buffer (ignore opening quote) + reset(); - bool start_object(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + // we entered the function by reading an open quote + JSON_ASSERT(current == '\"'); - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + while (true) { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool key(string_t& val) - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); - - // add null at given key and store the reference for later - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); - return true; - } - - bool end_object() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); + // get next character + switch (get()) + { + // end of file while parsing string + case char_traits::eof(): + { + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; + } - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } + // closing quote + case '\"': + { + return token_type::value_string; + } - bool start_array(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + // escapes + case '\\': + { + switch (get()) + { + // quotation mark + case '\"': + add('\"'); + break; + // reverse solidus + case '\\': + add('\\'); + break; + // solidus + case '/': + add('/'); + break; + // backspace + case 'b': + add('\b'); + break; + // form feed + case 'f': + add('\f'); + break; + // line feed + case 'n': + add('\n'); + break; + // carriage return + case 'r': + add('\r'); + break; + // tab + case 't': + add('\t'); + break; - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); - } + // unicode escapes + case 'u': + { + const int codepoint1 = get_codepoint(); + int codepoint = codepoint1; // start with codepoint1 - return true; - } + if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } - bool end_array() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_array()); + // check if code point is a high surrogate + if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF) + { + // expect next \uxxxx entry + if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u')) + { + const int codepoint2 = get_codepoint(); - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } + if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } + // check if codepoint2 is a low surrogate + if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF)) + { + // overwrite codepoint + codepoint = static_cast( + // high surrogate occupies the most significant 22 bits + (static_cast(codepoint1) << 10u) + // low surrogate occupies the least significant 15 bits + + static_cast(codepoint2) + // there is still the 0xD800, 0xDC00 and 0x10000 noise + // in the result, so we have to subtract with: + // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 + - 0x35FDC00u); + } + else + { + error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF)) + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; + return token_type::parse_error; + } + } - constexpr bool is_errored() const - { - return errored; - } + // result of the above calculation yields a proper codepoint + JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF); - private: - /*! - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - */ - template - JSON_HEDLEY_RETURNS_NON_NULL - BasicJsonType* handle_value(Value&& v) - { - if (ref_stack.empty()) - { - root = BasicJsonType(std::forward(v)); - return &root; - } + // translate codepoint into bytes + if (codepoint < 0x80) + { + // 1-byte characters: 0xxxxxxx (ASCII) + add(static_cast(codepoint)); + } + else if (codepoint <= 0x7FF) + { + // 2-byte characters: 110xxxxx 10xxxxxx + add(static_cast(0xC0u | (static_cast(codepoint) >> 6u))); + add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); + } + else if (codepoint <= 0xFFFF) + { + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + add(static_cast(0xE0u | (static_cast(codepoint) >> 12u))); + add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); + add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); + } + else + { + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + add(static_cast(0xF0u | (static_cast(codepoint) >> 18u))); + add(static_cast(0x80u | ((static_cast(codepoint) >> 12u) & 0x3Fu))); + add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); + add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); + } - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + break; + } - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); - return &(ref_stack.back()->m_data.m_value.array->back()); - } + // other characters after escape + default: + error_message = "invalid string: forbidden character after backslash"; + return token_type::parse_error; + } - JSON_ASSERT(ref_stack.back()->is_object()); - JSON_ASSERT(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; - } + break; + } - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; -}; + // invalid control characters + case 0x00: + { + error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; + return token_type::parse_error; + } -template -class json_sax_dom_callback_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - using parser_callback_t = typename BasicJsonType::parser_callback_t; - using parse_event_t = typename BasicJsonType::parse_event_t; + case 0x01: + { + error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; + return token_type::parse_error; + } - json_sax_dom_callback_parser(BasicJsonType& r, - parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_) - { - keep_stack.push_back(true); - } + case 0x02: + { + error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; + return token_type::parse_error; + } - // make class move-only - json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_callback_parser() = default; + case 0x03: + { + error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; + return token_type::parse_error; + } - bool null() - { - handle_value(nullptr); - return true; - } + case 0x04: + { + error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; + return token_type::parse_error; + } - bool boolean(bool val) - { - handle_value(val); - return true; - } + case 0x05: + { + error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; + return token_type::parse_error; + } - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } + case 0x06: + { + error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; + return token_type::parse_error; + } - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } + case 0x07: + { + error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; + return token_type::parse_error; + } - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } + case 0x08: + { + error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; + return token_type::parse_error; + } - bool string(string_t& val) - { - handle_value(val); - return true; - } + case 0x09: + { + error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; + return token_type::parse_error; + } - bool binary(binary_t& val) - { - handle_value(std::move(val)); - return true; - } + case 0x0A: + { + error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; + return token_type::parse_error; + } - bool start_object(std::size_t len) - { - // check callback for object start - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); - keep_stack.push_back(keep); + case 0x0B: + { + error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; + return token_type::parse_error; + } - auto val = handle_value(BasicJsonType::value_t::object, true); - ref_stack.push_back(val.second); + case 0x0C: + { + error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; + return token_type::parse_error; + } - // check object limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool key(string_t& val) - { - BasicJsonType k = BasicJsonType(val); - - // check callback for key - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); - key_keep_stack.push_back(keep); - - // add discarded value at given key and store the reference for later - if (keep && ref_stack.back()) - { - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); - } - - return true; - } - - bool end_object() - { - if (ref_stack.back()) - { - if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) - { - // discard object - *ref_stack.back() = discarded; - } - else - { - ref_stack.back()->set_parents(); - } - } - - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); - - if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) - { - // remove discarded value - for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) - { - if (it->is_discarded()) + case 0x0D: { - ref_stack.back()->erase(it); - break; + error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; + return token_type::parse_error; } - } - } - - return true; - } - - bool start_array(std::size_t len) - { - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); - keep_stack.push_back(keep); - - auto val = handle_value(BasicJsonType::value_t::array, true); - ref_stack.push_back(val.second); - - // check array limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); - } - return true; - } + case 0x0E: + { + error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; + return token_type::parse_error; + } - bool end_array() - { - bool keep = true; + case 0x0F: + { + error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; + return token_type::parse_error; + } - if (ref_stack.back()) - { - keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); - if (keep) - { - ref_stack.back()->set_parents(); - } - else - { - // discard array - *ref_stack.back() = discarded; - } - } + case 0x10: + { + error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; + return token_type::parse_error; + } - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); + case 0x11: + { + error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; + return token_type::parse_error; + } - // remove discarded value - if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->pop_back(); - } + case 0x12: + { + error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; + return token_type::parse_error; + } - return true; - } + case 0x13: + { + error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; + return token_type::parse_error; + } - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } + case 0x14: + { + error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; + return token_type::parse_error; + } - constexpr bool is_errored() const - { - return errored; - } + case 0x15: + { + error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; + return token_type::parse_error; + } - private: - /*! - @param[in] v value to add to the JSON value we build during parsing - @param[in] skip_callback whether we should skip calling the callback - function; this is required after start_array() and - start_object() SAX events, because otherwise we would call the - callback function with an empty array or object, respectively. + case 0x16: + { + error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; + return token_type::parse_error; + } - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements + case 0x17: + { + error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; + return token_type::parse_error; + } - @return pair of boolean (whether value should be kept) and pointer (to the - passed value in the ref_stack hierarchy; nullptr if not kept) - */ - template - std::pair handle_value(Value&& v, const bool skip_callback = false) - { - JSON_ASSERT(!keep_stack.empty()); + case 0x18: + { + error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; + return token_type::parse_error; + } - // do not handle this value if we know it would be added to a discarded - // container - if (!keep_stack.back()) - { - return {false, nullptr}; - } + case 0x19: + { + error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; + return token_type::parse_error; + } - // create value - auto value = BasicJsonType(std::forward(v)); + case 0x1A: + { + error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; + return token_type::parse_error; + } - // check callback - const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); + case 0x1B: + { + error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; + return token_type::parse_error; + } - // do not handle this value if we just learnt it shall be discarded - if (!keep) - { - return {false, nullptr}; - } + case 0x1C: + { + error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; + return token_type::parse_error; + } - if (ref_stack.empty()) - { - root = std::move(value); - return {true, & root}; - } + case 0x1D: + { + error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; + return token_type::parse_error; + } - // skip this value if we already decided to skip the parent - // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) - if (!ref_stack.back()) - { - return {false, nullptr}; - } + case 0x1E: + { + error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; + return token_type::parse_error; + } - // we now only expect arrays and objects - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + case 0x1F: + { + error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; + return token_type::parse_error; + } - // array - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); - return {true, & (ref_stack.back()->m_data.m_value.array->back())}; + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case 0x20: + case 0x21: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + { + add(current); + break; + } + + // U+0080..U+07FF: bytes C2..DF 80..BF + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + { + if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF}))) + { + return token_type::parse_error; + } + break; + } + + // U+0800..U+0FFF: bytes E0 A0..BF 80..BF + case 0xE0: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF + // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xEE: + case 0xEF: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+D000..U+D7FF: bytes ED 80..9F 80..BF + case 0xED: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + case 0xF0: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + case 0xF1: + case 0xF2: + case 0xF3: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + case 0xF4: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // remaining bytes (80..C1 and F5..FF) are ill-formed + default: + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; + } + } + } + } + + /*! + * @brief scan a comment + * @return whether comment could be scanned successfully + */ + bool scan_comment() + { + switch (get()) + { + // single-line comments skip input until a newline or EOF is read + case '/': + { + while (true) + { + switch (get()) + { + case '\n': + case '\r': + case char_traits::eof(): + case '\0': + return true; + + default: + break; + } + } + } + + // multi-line comments skip input until */ is read + case '*': + { + while (true) + { + switch (get()) + { + case char_traits::eof(): + case '\0': + { + error_message = "invalid comment; missing closing '*/'"; + return false; + } + + case '*': + { + switch (get()) + { + case '/': + return true; + + default: + { + unget(); + continue; + } + } + } + + default: + continue; + } + } + } + + // unexpected character after reading '/' + default: + { + error_message = "invalid comment; expecting '/' or '*' after '/'"; + return false; + } + } + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(float& f, const char* str, char** endptr) noexcept + { + f = std::strtof(str, endptr); + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(double& f, const char* str, char** endptr) noexcept + { + f = std::strtod(str, endptr); + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(long double& f, const char* str, char** endptr) noexcept + { + f = std::strtold(str, endptr); + } + + /*! + @brief scan a number literal + + This function scans a string according to Sect. 6 of RFC 8259. + + The function is realized with a deterministic finite state machine derived + from the grammar described in RFC 8259. Starting in state "init", the + input is read and used to determined the next state. Only state "done" + accepts the number. State "error" is a trap state to model errors. In the + table below, "anything" means any character but the ones listed before. + + state | 0 | 1-9 | e E | + | - | . | anything + ---------|----------|----------|----------|---------|---------|----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done + decimal1 | decimal2 | decimal2 | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done + + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. + + During scanning, the read bytes are stored in token_buffer. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. + */ + token_type scan_number() // lgtm [cpp/use-of-goto] `goto` is used in this function to implement the number-parsing state machine described above. By design, any finite input will eventually reach the "done" state or return token_type::parse_error. In each intermediate state, 1 byte of the input is appended to the token_buffer vector, and only the already initialized variables token_buffer, number_type, and error_message are manipulated. + { + // reset token_buffer to store the number's bytes + reset(); + + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point or exponent is read + token_type number_type = token_type::value_unsigned; + + // state (init): we just found out we need to scan a number + switch (current) + { + case '-': + { + add(current); + goto scan_number_minus; + } + + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + // all other characters are rejected outside scan_number() + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + } + +scan_number_minus: + // state: we just parsed a leading minus sign + number_type = token_type::value_integer; + switch (get()) + { + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + error_message = "invalid number; expected digit after '-'"; + return token_type::parse_error; + } + } + +scan_number_zero: + // state: we just parse a zero (maybe with a leading minus sign) + switch (get()) + { + case '.': + { + add(decimal_point_char); + decimal_point_position = token_buffer.size() - 1; + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_any1: + // state: we just parsed a number 0-9 (maybe with a leading minus sign) + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + case '.': + { + add(decimal_point_char); + decimal_point_position = token_buffer.size() - 1; + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; } - // object - JSON_ASSERT(ref_stack.back()->is_object()); - // check if we should store an element for the current key - JSON_ASSERT(!key_keep_stack.empty()); - const bool store_element = key_keep_stack.back(); - key_keep_stack.pop_back(); +scan_number_decimal1: + // state: we just parsed a decimal point + number_type = token_type::value_float; + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } - if (!store_element) + default: + { + error_message = "invalid number; expected digit after '.'"; + return token_type::parse_error; + } + } + +scan_number_decimal2: + // we just parsed at least one number after a decimal point + switch (get()) { - return {false, nullptr}; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; } - JSON_ASSERT(object_element); - *object_element = std::move(value); - return {true, object_element}; - } +scan_number_exponent: + // we just parsed an exponent + number_type = token_type::value_float; + switch (get()) + { + case '+': + case '-': + { + add(current); + goto scan_number_sign; + } - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// stack to manage which values to keep - std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) - /// stack to manage which object keys to keep - std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// callback function - const parser_callback_t callback = nullptr; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; - /// a discarded value for the callback - BasicJsonType discarded = BasicJsonType::value_t::discarded; -}; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } -template -class json_sax_acceptor -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; + default: + { + error_message = + "invalid number; expected '+', '-', or digit after exponent"; + return token_type::parse_error; + } + } - bool null() - { - return true; - } +scan_number_sign: + // we just parsed an exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } - bool boolean(bool /*unused*/) - { - return true; - } + default: + { + error_message = "invalid number; expected digit after exponent sign"; + return token_type::parse_error; + } + } - bool number_integer(number_integer_t /*unused*/) - { - return true; - } +scan_number_any2: + // we just parsed a number after the exponent or exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } - bool number_unsigned(number_unsigned_t /*unused*/) - { - return true; - } + default: + goto scan_number_done; + } - bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) - { - return true; - } +scan_number_done: + // unget the character after the number (we only read it to know that + // we are done scanning a number) + unget(); - bool string(string_t& /*unused*/) - { - return true; - } + char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + errno = 0; - bool binary(binary_t& /*unused*/) - { - return true; - } + // try to parse integers first and fall back to floats + if (number_type == token_type::value_unsigned) + { + const auto x = std::strtoull(token_buffer.data(), &endptr, 10); - bool start_object(std::size_t /*unused*/ = static_cast(-1)) - { - return true; - } + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + + if (errno != ERANGE) + { + value_unsigned = static_cast(x); + if (value_unsigned == x) + { + return token_type::value_unsigned; + } + } + } + else if (number_type == token_type::value_integer) + { + const auto x = std::strtoll(token_buffer.data(), &endptr, 10); + + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + + if (errno != ERANGE) + { + value_integer = static_cast(x); + if (value_integer == x) + { + return token_type::value_integer; + } + } + } + + // this code is reached if we parse a floating-point number or if an + // integer conversion above failed + strtof(value_float, token_buffer.data(), &endptr); - bool key(string_t& /*unused*/) - { - return true; - } + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - bool end_object() - { - return true; + return token_type::value_float; } - bool start_array(std::size_t /*unused*/ = static_cast(-1)) + /*! + @param[in] literal_text the literal text to expect + @param[in] length the length of the passed literal text + @param[in] return_type the token type to return on success + */ + JSON_HEDLEY_NON_NULL(2) + token_type scan_literal(const char_type* literal_text, const std::size_t length, + token_type return_type) { - return true; + JSON_ASSERT(char_traits::to_char_type(current) == literal_text[0]); + for (std::size_t i = 1; i < length; ++i) + { + if (JSON_HEDLEY_UNLIKELY(char_traits::to_char_type(get()) != literal_text[i])) + { + error_message = "invalid literal"; + return token_type::parse_error; + } + } + return return_type; } - bool end_array() - { - return true; - } + ///////////////////// + // input management + ///////////////////// - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) + /// reset token_buffer; current character is beginning of token + void reset() noexcept { - return false; + token_buffer.clear(); + token_string.clear(); + decimal_point_position = std::string::npos; + token_string.push_back(char_traits::to_char_type(current)); } -}; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013 - 2024 Niels Lohmann -// SPDX-License-Identifier: MIT + /* + @brief get next character from the input + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a + `char_traits::eof()` in that case. Stores the scanned characters + for use in error messages. -#include // array -#include // localeconv -#include // size_t -#include // snprintf -#include // strtof, strtod, strtold, strtoll, strtoull -#include // initializer_list -#include // char_traits, string -#include // move -#include // vector + @return character read from the input + */ + char_int_type get() + { + ++position.chars_read_total; + ++position.chars_read_current_line; -// #include + if (next_unget) + { + // just reset the next_unget variable and work with current + next_unget = false; + } + else + { + current = ia.get_character(); + } -// #include + if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) + { + token_string.push_back(char_traits::to_char_type(current)); + } -// #include + if (current == '\n') + { + ++position.lines_read; + position.chars_read_current_line = 0; + } -// #include + return current; + } + /*! + @brief unget current character (read it again on next get) -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ + We implement unget by setting variable next_unget to true. The input is not + changed - we just simulate ungetting by modifying chars_read_total, + chars_read_current_line, and token_string. The next call to get() will + behave as if the unget character is read again. + */ + void unget() + { + next_unget = true; -/////////// -// lexer // -/////////// + --position.chars_read_total; -template -class lexer_base -{ - public: - /// token types for the parser - enum class token_type - { - uninitialized, ///< indicating the scanner is uninitialized - literal_true, ///< the `true` literal - literal_false, ///< the `false` literal - literal_null, ///< the `null` literal - value_string, ///< a string -- use get_string() for actual value - value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value - value_integer, ///< a signed integer -- use get_number_integer() for actual value - value_float, ///< an floating point number -- use get_number_float() for actual value - begin_array, ///< the character for array begin `[` - begin_object, ///< the character for object begin `{` - end_array, ///< the character for array end `]` - end_object, ///< the character for object end `}` - name_separator, ///< the name separator `:` - value_separator, ///< the value separator `,` - parse_error, ///< indicating a parse error - end_of_input, ///< indicating the end of the input buffer - literal_or_value ///< a literal or the begin of a value (only for diagnostics) - }; + // in case we "unget" a newline, we have to also decrement the lines_read + if (position.chars_read_current_line == 0) + { + if (position.lines_read > 0) + { + --position.lines_read; + } + } + else + { + --position.chars_read_current_line; + } - /// return name of values of type token_type (only used for errors) - JSON_HEDLEY_RETURNS_NON_NULL - JSON_HEDLEY_CONST - static const char* token_type_name(const token_type t) noexcept - { - switch (t) + if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) { - case token_type::uninitialized: - return ""; - case token_type::literal_true: - return "true literal"; - case token_type::literal_false: - return "false literal"; - case token_type::literal_null: - return "null literal"; - case token_type::value_string: - return "string literal"; - case token_type::value_unsigned: - case token_type::value_integer: - case token_type::value_float: - return "number literal"; - case token_type::begin_array: - return "'['"; - case token_type::begin_object: - return "'{'"; - case token_type::end_array: - return "']'"; - case token_type::end_object: - return "'}'"; - case token_type::name_separator: - return "':'"; - case token_type::value_separator: - return "','"; - case token_type::parse_error: - return ""; - case token_type::end_of_input: - return "end of input"; - case token_type::literal_or_value: - return "'[', '{', or a literal"; - // LCOV_EXCL_START - default: // catch non-enum values - return "unknown token"; - // LCOV_EXCL_STOP + JSON_ASSERT(!token_string.empty()); + token_string.pop_back(); } } -}; -/*! -@brief lexical analysis -This class organizes the lexical analysis during JSON deserialization. -*/ -template -class lexer : public lexer_base -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using char_type = typename InputAdapterType::char_type; - using char_int_type = typename char_traits::int_type; + /// add a character to token_buffer + void add(char_int_type c) + { + token_buffer.push_back(static_cast(c)); + } public: - using token_type = typename lexer_base::token_type; + ///////////////////// + // value getters + ///////////////////// - explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept - : ia(std::move(adapter)) - , ignore_comments(ignore_comments_) - , decimal_point_char(static_cast(get_decimal_point())) - {} + /// return integer value + constexpr number_integer_t get_number_integer() const noexcept + { + return value_integer; + } - // delete because of pointer members - lexer(const lexer&) = delete; - lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - lexer& operator=(lexer&) = delete; - lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~lexer() = default; + /// return unsigned integer value + constexpr number_unsigned_t get_number_unsigned() const noexcept + { + return value_unsigned; + } + + /// return floating-point value + constexpr number_float_t get_number_float() const noexcept + { + return value_float; + } + + /// return current string value (implicitly resets the token; useful only once) + string_t& get_string() + { + // translate decimal points from locale back to '.' (#4084) + if (decimal_point_char != '.' && decimal_point_position != std::string::npos) + { + token_buffer[decimal_point_position] = '.'; + } + return token_buffer; + } - private: ///////////////////// - // locales + // diagnostics ///////////////////// - /// return the locale-dependent decimal point - JSON_HEDLEY_PURE - static char get_decimal_point() noexcept + /// return position of last read token + constexpr position_t get_position() const noexcept { - const auto* loc = localeconv(); - JSON_ASSERT(loc != nullptr); - return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); + return position; } - ///////////////////// - // scan functions - ///////////////////// + /// return the last read token (for errors only). Will never contain EOF + /// (an arbitrary value that is not a valid char value, often -1), because + /// 255 may legitimately occur. May contain NUL, which should be escaped. + std::string get_token_string() const + { + // escape control characters + std::string result; + for (const auto c : token_string) + { + if (static_cast(c) <= '\x1F') + { + // escape control characters + std::array cs{{}}; + static_cast((std::snprintf)(cs.data(), cs.size(), "", static_cast(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + result += cs.data(); + } + else + { + // add character as is + result.push_back(static_cast(c)); + } + } - /*! - @brief get codepoint from 4 hex characters following `\u` + return result; + } - For input "\u c1 c2 c3 c4" the codepoint is: - (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 - = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) + /// return syntax error message + JSON_HEDLEY_RETURNS_NON_NULL + constexpr const char* get_error_message() const noexcept + { + return error_message; + } - Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' - must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The - conversion is done by subtracting the offset (0x30, 0x37, and 0x57) - between the ASCII value of the character and the desired integer value. + ///////////////////// + // actual scanner + ///////////////////// - @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or - non-hex character) + /*! + @brief skip the UTF-8 byte order mark + @return true iff there is no BOM or the correct BOM has been skipped */ - int get_codepoint() + bool skip_bom() { - // this function only makes sense after reading `\u` - JSON_ASSERT(current == 'u'); - int codepoint = 0; + if (get() == 0xEF) + { + // check if we completely parse the BOM + return get() == 0xBB && get() == 0xBF; + } - const auto factors = { 12u, 8u, 4u, 0u }; - for (const auto factor : factors) + // the first character is not the beginning of the BOM; unget it to + // process is later + unget(); + return true; + } + + void skip_whitespace() + { + do { get(); + } + while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); + } - if (current >= '0' && current <= '9') + token_type scan() + { + // initially, skip the BOM + if (position.chars_read_total == 0 && !skip_bom()) + { + error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; + return token_type::parse_error; + } + + // read next character and ignore whitespace + skip_whitespace(); + + // ignore comments + while (ignore_comments && current == '/') + { + if (!scan_comment()) { - codepoint += static_cast((static_cast(current) - 0x30u) << factor); + return token_type::parse_error; } - else if (current >= 'A' && current <= 'F') + + // skip following whitespace + skip_whitespace(); + } + + switch (current) + { + // structural characters + case '[': + return token_type::begin_array; + case ']': + return token_type::end_array; + case '{': + return token_type::begin_object; + case '}': + return token_type::end_object; + case ':': + return token_type::name_separator; + case ',': + return token_type::value_separator; + + // literals + case 't': { - codepoint += static_cast((static_cast(current) - 0x37u) << factor); + std::array true_literal = {{static_cast('t'), static_cast('r'), static_cast('u'), static_cast('e')}}; + return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true); } - else if (current >= 'a' && current <= 'f') + case 'f': { - codepoint += static_cast((static_cast(current) - 0x57u) << factor); + std::array false_literal = {{static_cast('f'), static_cast('a'), static_cast('l'), static_cast('s'), static_cast('e')}}; + return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false); } - else + case 'n': { - return -1; + std::array null_literal = {{static_cast('n'), static_cast('u'), static_cast('l'), static_cast('l')}}; + return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null); } - } - JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF); - return codepoint; + // string + case '\"': + return scan_string(); + + // number + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return scan_number(); + + // end of input (the null byte is needed when parsing from + // string literals) + case '\0': + case char_traits::eof(): + return token_type::end_of_input; + + // error + default: + error_message = "invalid literal"; + return token_type::parse_error; + } } - /*! - @brief check if the next byte(s) are inside a given range + private: + /// input adapter + InputAdapterType ia; - Adds the current byte and, for each passed range, reads a new byte and - checks if it is inside the range. If a violation was detected, set up an - error message and return false. Otherwise, return true. + /// whether comments should be ignored (true) or signaled as errors (false) + const bool ignore_comments = false; - @param[in] ranges list of integers; interpreted as list of pairs of - inclusive lower and upper bound, respectively + /// the current character + char_int_type current = char_traits::eof(); - @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, - 1, 2, or 3 pairs. This precondition is enforced by an assertion. + /// whether the next get() call should just return current + bool next_unget = false; - @return true if and only if no range violation was detected - */ - bool next_byte_in_range(std::initializer_list ranges) - { - JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6); - add(current); + /// the start position of the current token + position_t position {}; - for (auto range = ranges.begin(); range != ranges.end(); ++range) - { - get(); - if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) // NOLINT(bugprone-inc-dec-in-conditions) - { - add(current); - } - else - { - error_message = "invalid string: ill-formed UTF-8 byte"; - return false; - } - } + /// raw input token string (for error messages) + std::vector token_string {}; - return true; - } + /// buffer for variable-length tokens (numbers, strings) + string_t token_buffer {}; - /*! - @brief scan a string literal + /// a description of occurred lexer errors + const char* error_message = ""; - This function scans a string according to Sect. 7 of RFC 8259. While - scanning, bytes are escaped and copied into buffer token_buffer. Then the - function returns successfully, token_buffer is *not* null-terminated (as it - may contain \0 bytes), and token_buffer.size() is the number of bytes in the - string. + // number values + number_integer_t value_integer = 0; + number_unsigned_t value_unsigned = 0; + number_float_t value_float = 0; - @return token_type::value_string if string could be successfully scanned, - token_type::parse_error otherwise + /// the decimal point + const char_int_type decimal_point_char = '.'; + /// the position of the decimal point in the input + std::size_t decimal_point_position = std::string::npos; +}; - @note In case of errors, variable error_message contains a textual - description. - */ - token_type scan_string() - { - // reset token_buffer (ignore opening quote) - reset(); +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include - // we entered the function by reading an open quote - JSON_ASSERT(current == '\"'); +// #include - while (true) - { - // get next character - switch (get()) - { - // end of file while parsing string - case char_traits::eof(): - { - error_message = "invalid string: missing closing quote"; - return token_type::parse_error; - } +NLOHMANN_JSON_NAMESPACE_BEGIN - // closing quote - case '\"': - { - return token_type::value_string; - } +/*! +@brief SAX interface - // escapes - case '\\': - { - switch (get()) - { - // quotation mark - case '\"': - add('\"'); - break; - // reverse solidus - case '\\': - add('\\'); - break; - // solidus - case '/': - add('/'); - break; - // backspace - case 'b': - add('\b'); - break; - // form feed - case 'f': - add('\f'); - break; - // line feed - case 'n': - add('\n'); - break; - // carriage return - case 'r': - add('\r'); - break; - // tab - case 't': - add('\t'); - break; +This class describes the SAX interface used by @ref nlohmann::json::sax_parse. +Each function is called in different situations while the input is parsed. The +boolean return value informs the parser whether to continue processing the +input. +*/ +template +struct json_sax +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; - // unicode escapes - case 'u': - { - const int codepoint1 = get_codepoint(); - int codepoint = codepoint1; // start with codepoint1 + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; - if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; - // check if code point is a high surrogate - if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF) - { - // expect next \uxxxx entry - if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u')) - { - const int codepoint2 = get_codepoint(); + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; - if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; - // check if codepoint2 is a low surrogate - if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF)) - { - // overwrite codepoint - codepoint = static_cast( - // high surrogate occupies the most significant 22 bits - (static_cast(codepoint1) << 10u) - // low surrogate occupies the least significant 15 bits - + static_cast(codepoint2) - // there is still the 0xD800, 0xDC00 and 0x10000 noise - // in the result, so we have to subtract with: - // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 - - 0x35FDC00u); - } - else - { - error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF)) - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; - return token_type::parse_error; - } - } + /*! + @brief a floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const string_t& s) = 0; - // result of the above calculation yields a proper codepoint - JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF); + /*! + @brief a string value was read + @param[in] val string value + @return whether parsing should proceed + @note It is safe to move the passed string value. + */ + virtual bool string(string_t& val) = 0; - // translate codepoint into bytes - if (codepoint < 0x80) - { - // 1-byte characters: 0xxxxxxx (ASCII) - add(static_cast(codepoint)); - } - else if (codepoint <= 0x7FF) - { - // 2-byte characters: 110xxxxx 10xxxxxx - add(static_cast(0xC0u | (static_cast(codepoint) >> 6u))); - add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); - } - else if (codepoint <= 0xFFFF) - { - // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx - add(static_cast(0xE0u | (static_cast(codepoint) >> 12u))); - add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); - add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); - } - else - { - // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - add(static_cast(0xF0u | (static_cast(codepoint) >> 18u))); - add(static_cast(0x80u | ((static_cast(codepoint) >> 12u) & 0x3Fu))); - add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); - add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); - } + /*! + @brief a binary value was read + @param[in] val binary value + @return whether parsing should proceed + @note It is safe to move the passed binary value. + */ + virtual bool binary(binary_t& val) = 0; - break; - } + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; - // other characters after escape - default: - error_message = "invalid string: forbidden character after backslash"; - return token_type::parse_error; - } + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + @note It is safe to move the passed string. + */ + virtual bool key(string_t& val) = 0; - break; - } + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; - // invalid control characters - case 0x00: - { - error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; - return token_type::parse_error; - } + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; + + /*! + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; + + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @param[in] ex an exception object describing the error + @return whether parsing should proceed (must return false) + */ + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const detail::exception& ex) = 0; - case 0x01: - { - error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; - return token_type::parse_error; - } + json_sax() = default; + json_sax(const json_sax&) = default; + json_sax(json_sax&&) noexcept = default; + json_sax& operator=(const json_sax&) = default; + json_sax& operator=(json_sax&&) noexcept = default; + virtual ~json_sax() = default; +}; - case 0x02: - { - error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; - return token_type::parse_error; - } +namespace detail +{ +/*! +@brief SAX implementation to create a JSON value from SAX events - case 0x03: - { - error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; - return token_type::parse_error; - } +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. - case 0x04: - { - error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; - return token_type::parse_error; - } +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. - case 0x05: - { - error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; - return token_type::parse_error; - } +@tparam BasicJsonType the JSON type +*/ +template +class json_sax_dom_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer; - case 0x06: - { - error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; - return token_type::parse_error; - } + /*! + @param[in,out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) + {} - case 0x07: - { - error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; - return token_type::parse_error; - } + // make class move-only + json_sax_dom_parser(const json_sax_dom_parser&) = delete; + json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; + json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_parser() = default; - case 0x08: - { - error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; - return token_type::parse_error; - } + bool null() + { + handle_value(nullptr); + return true; + } - case 0x09: - { - error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; - return token_type::parse_error; - } + bool boolean(bool val) + { + handle_value(val); + return true; + } - case 0x0A: - { - error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; - return token_type::parse_error; - } + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } - case 0x0B: - { - error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; - return token_type::parse_error; - } + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } - case 0x0C: - { - error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; - return token_type::parse_error; - } + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } - case 0x0D: - { - error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; - return token_type::parse_error; - } + bool string(string_t& val) + { + handle_value(val); + return true; + } - case 0x0E: - { - error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; - return token_type::parse_error; - } + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } - case 0x0F: - { - error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; - return token_type::parse_error; - } + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); - case 0x10: - { - error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; - return token_type::parse_error; - } +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif - case 0x11: - { - error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; - return token_type::parse_error; - } + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } - case 0x12: - { - error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; - return token_type::parse_error; - } + return true; + } - case 0x13: - { - error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; - return token_type::parse_error; - } + bool key(string_t& val) + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); - case 0x14: - { - error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; - return token_type::parse_error; - } + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); + return true; + } - case 0x15: - { - error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; - return token_type::parse_error; - } + bool end_object() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); - case 0x16: - { - error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; - return token_type::parse_error; - } +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif - case 0x17: - { - error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; - return token_type::parse_error; - } + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } - case 0x18: - { - error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; - return token_type::parse_error; - } + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); - case 0x19: - { - error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; - return token_type::parse_error; - } +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif - case 0x1A: - { - error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; - return token_type::parse_error; - } + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } - case 0x1B: - { - error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; - return token_type::parse_error; - } + return true; + } - case 0x1C: - { - error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; - return token_type::parse_error; - } + bool end_array() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_array()); - case 0x1D: - { - error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; - return token_type::parse_error; - } +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif - case 0x1E: - { - error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; - return token_type::parse_error; - } + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } - case 0x1F: - { - error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; - return token_type::parse_error; - } + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } - // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) - case 0x20: - case 0x21: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2A: - case 0x2B: - case 0x2C: - case 0x2D: - case 0x2E: - case 0x2F: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4A: - case 0x4B: - case 0x4C: - case 0x4D: - case 0x4E: - case 0x4F: - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5A: - case 0x5B: - case 0x5D: - case 0x5E: - case 0x5F: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: - { - add(current); - break; - } + constexpr bool is_errored() const + { + return errored; + } - // U+0080..U+07FF: bytes C2..DF 80..BF - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - { - if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF}))) - { - return token_type::parse_error; - } - break; - } + private: - // U+0800..U+0FFF: bytes E0 A0..BF 80..BF - case 0xE0: - { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); - // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF - // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xEE: - case 0xEF: + switch (v.type()) + { + case value_t::boolean: { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); break; } - // U+D000..U+D7FF: bytes ED 80..9F 80..BF - case 0xED: + case value_t::null: { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) - { - return token_type::parse_error; - } + // 4 is the string length of "null" + v.start_position = v.end_position - 4; break; } - // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF - case 0xF0: + case value_t::string: { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; break; } - // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF - case 0xF1: - case 0xF2: - case 0xF3: + // As we handle the start and end positions for values created during parsing, + // we do not expect the following value type to be called. Regardless, set the positions + // in case this is created manually or through a different constructor. Exclude from lcov + // since the exact condition of this switch is esoteric. + // LCOV_EXCL_START + case value_t::discarded: { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } + v.end_position = std::string::npos; + v.start_position = v.end_position; break; } - - // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF - case 0xF4: + // LCOV_EXCL_STOP + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } + v.start_position = v.end_position - m_lexer_ref->get_string().size(); break; } - - // remaining bytes (80..C1 and F5..FF) are ill-formed - default: + case value_t::object: + case value_t::array: { - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE } } } +#endif /*! - * @brief scan a comment - * @return whether comment could be scanned successfully - */ - bool scan_comment() + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + JSON_HEDLEY_RETURNS_NON_NULL + BasicJsonType* handle_value(Value&& v) { - switch (get()) + if (ref_stack.empty()) { - // single-line comments skip input until a newline or EOF is read - case '/': - { - while (true) - { - switch (get()) - { - case '\n': - case '\r': - case char_traits::eof(): - case '\0': - return true; + root = BasicJsonType(std::forward(v)); - default: - break; - } - } - } +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(root); +#endif - // multi-line comments skip input until */ is read - case '*': - { - while (true) - { - switch (get()) - { - case char_traits::eof(): - case '\0': - { - error_message = "invalid comment; missing closing '*/'"; - return false; - } + return &root; + } - case '*': - { - switch (get()) - { - case '/': - return true; + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); - default: - { - unget(); - continue; - } - } - } + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); - default: - continue; - } - } - } +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(ref_stack.back()->m_data.m_value.array->back()); +#endif - // unexpected character after reading '/' - default: - { - error_message = "invalid comment; expecting '/' or '*' after '/'"; - return false; - } + return &(ref_stack.back()->m_data.m_value.array->back()); } - } - JSON_HEDLEY_NON_NULL(2) - static void strtof(float& f, const char* str, char** endptr) noexcept - { - f = std::strtof(str, endptr); - } + JSON_ASSERT(ref_stack.back()->is_object()); + JSON_ASSERT(object_element); + *object_element = BasicJsonType(std::forward(v)); - JSON_HEDLEY_NON_NULL(2) - static void strtof(double& f, const char* str, char** endptr) noexcept - { - f = std::strtod(str, endptr); - } +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(*object_element); +#endif - JSON_HEDLEY_NON_NULL(2) - static void strtof(long double& f, const char* str, char** endptr) noexcept - { - f = std::strtold(str, endptr); + return object_element; } - /*! - @brief scan a number literal - - This function scans a string according to Sect. 6 of RFC 8259. + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; +}; - The function is realized with a deterministic finite state machine derived - from the grammar described in RFC 8259. Starting in state "init", the - input is read and used to determined the next state. Only state "done" - accepts the number. State "error" is a trap state to model errors. In the - table below, "anything" means any character but the ones listed before. +template +class json_sax_dom_callback_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer; - state | 0 | 1-9 | e E | + | - | . | anything - ---------|----------|----------|----------|---------|---------|----------|----------- - init | zero | any1 | [error] | [error] | minus | [error] | [error] - minus | zero | any1 | [error] | [error] | [error] | [error] | [error] - zero | done | done | exponent | done | done | decimal1 | done - any1 | any1 | any1 | exponent | done | done | decimal1 | done - decimal1 | decimal2 | decimal2 | [error] | [error] | [error] | [error] | [error] - decimal2 | decimal2 | decimal2 | exponent | done | done | done | done - exponent | any2 | any2 | [error] | sign | sign | [error] | [error] - sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] - any2 | any2 | any2 | done | done | done | done | done + json_sax_dom_callback_parser(BasicJsonType& r, + parser_callback_t cb, + const bool allow_exceptions_ = true, + lexer_t* lexer_ = nullptr) + : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) + { + keep_stack.push_back(true); + } - The state machine is realized with one label per state (prefixed with - "scan_number_") and `goto` statements between them. The state machine - contains cycles, but any cycle can be left when EOF is read. Therefore, - the function is guaranteed to terminate. + // make class move-only + json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_callback_parser() = default; - During scanning, the read bytes are stored in token_buffer. This string is - then converted to a signed integer, an unsigned integer, or a - floating-point number. + bool null() + { + handle_value(nullptr); + return true; + } - @return token_type::value_unsigned, token_type::value_integer, or - token_type::value_float if number could be successfully scanned, - token_type::parse_error otherwise + bool boolean(bool val) + { + handle_value(val); + return true; + } - @note The scanner is independent of the current locale. Internally, the - locale's decimal point is used instead of `.` to work with the - locale-dependent converters. - */ - token_type scan_number() // lgtm [cpp/use-of-goto] `goto` is used in this function to implement the number-parsing state machine described above. By design, any finite input will eventually reach the "done" state or return token_type::parse_error. In each intermediate state, 1 byte of the input is appended to the token_buffer vector, and only the already initialized variables token_buffer, number_type, and error_message are manipulated. + bool number_integer(number_integer_t val) { - // reset token_buffer to store the number's bytes - reset(); + handle_value(val); + return true; + } - // the type of the parsed number; initially set to unsigned; will be - // changed if minus sign, decimal point or exponent is read - token_type number_type = token_type::value_unsigned; + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } - // state (init): we just found out we need to scan a number - switch (current) - { - case '-': - { - add(current); - goto scan_number_minus; - } + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } - case '0': - { - add(current); - goto scan_number_zero; - } + bool string(string_t& val) + { + handle_value(val); + return true; + } - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } - // all other characters are rejected outside scan_number() - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE - } + bool start_object(std::size_t len) + { + // check callback for object start + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); + keep_stack.push_back(keep); -scan_number_minus: - // state: we just parsed a leading minus sign - number_type = token_type::value_integer; - switch (get()) + auto val = handle_value(BasicJsonType::value_t::object, true); + ref_stack.push_back(val.second); + + if (ref_stack.back()) { - case '0': - { - add(current); - goto scan_number_zero; - } - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) { - add(current); - goto scan_number_any1; + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; } +#endif - default: + // check object limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { - error_message = "invalid number; expected digit after '-'"; - return token_type::parse_error; + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); } } + return true; + } -scan_number_zero: - // state: we just parse a zero (maybe with a leading minus sign) - switch (get()) - { - case '.': - { - add(decimal_point_char); - decimal_point_position = token_buffer.size() - 1; - goto scan_number_decimal1; - } + bool key(string_t& val) + { + BasicJsonType k = BasicJsonType(val); - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } + // check callback for key + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); - default: - goto scan_number_done; + // add discarded value at given key and store the reference for later + if (keep && ref_stack.back()) + { + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); } -scan_number_any1: - // state: we just parsed a number 0-9 (maybe with a leading minus sign) - switch (get()) + return true; + } + + bool end_object() + { + if (ref_stack.back()) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': + if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) { - add(current); - goto scan_number_any1; - } + // discard object + *ref_stack.back() = discarded; - case '.': - { - add(decimal_point_char); - decimal_point_position = token_buffer.size() - 1; - goto scan_number_decimal1; +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded object. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif } - - case 'e': - case 'E': + else { - add(current); - goto scan_number_exponent; - } - - default: - goto scan_number_done; - } -scan_number_decimal1: - // state: we just parsed a decimal point - number_type = token_type::value_float; - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_decimal2; - } +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif - default: - { - error_message = "invalid number; expected digit after '.'"; - return token_type::parse_error; + ref_stack.back()->set_parents(); } } -scan_number_decimal2: - // we just parsed at least one number after a decimal point - switch (get()) + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': + // remove discarded value + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) { - add(current); - goto scan_number_decimal2; + if (it->is_discarded()) + { + ref_stack.back()->erase(it); + break; + } } + } - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } + return true; + } - default: - goto scan_number_done; - } + bool start_array(std::size_t len) + { + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); + keep_stack.push_back(keep); -scan_number_exponent: - // we just parsed an exponent - number_type = token_type::value_float; - switch (get()) + auto val = handle_value(BasicJsonType::value_t::array, true); + ref_stack.push_back(val.second); + + if (ref_stack.back()) { - case '+': - case '-': - { - add(current); - goto scan_number_sign; - } - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) { - add(current); - goto scan_number_any2; + // Lexer has read the first character of the array, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; } +#endif - default: + // check array limit + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { - error_message = - "invalid number; expected '+', '-', or digit after exponent"; - return token_type::parse_error; + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); } } -scan_number_sign: - // we just parsed an exponent sign - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } + return true; + } - default: - { - error_message = "invalid number; expected digit after exponent sign"; - return token_type::parse_error; - } - } + bool end_array() + { + bool keep = true; -scan_number_any2: - // we just parsed a number after the exponent or exponent sign - switch (get()) + if (ref_stack.back()) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (keep) { - add(current); - goto scan_number_any2; + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); } + else + { + // discard array + *ref_stack.back() = discarded; - default: - goto scan_number_done; +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded array. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif + } } -scan_number_done: - // unget the character after the number (we only read it to know that - // we are done scanning a number) - unget(); + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); - char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) - errno = 0; + // remove discarded value + if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->pop_back(); + } - // try to parse integers first and fall back to floats - if (number_type == token_type::value_unsigned) + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) { - const auto x = std::strtoull(token_buffer.data(), &endptr, 10); + JSON_THROW(ex); + } + return false; + } - // we checked the number format before - JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + constexpr bool is_errored() const + { + return errored; + } - if (errno != ERANGE) + private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) { - value_unsigned = static_cast(x); - if (value_unsigned == x) + case value_t::boolean: { - return token_type::value_unsigned; + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; } - } - } - else if (number_type == token_type::value_integer) - { - const auto x = std::strtoll(token_buffer.data(), &endptr, 10); - // we checked the number format before - JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } - if (errno != ERANGE) - { - value_integer = static_cast(x); - if (value_integer == x) + case value_t::string: { - return token_type::value_integer; + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; } - } - } - // this code is reached if we parse a floating-point number or if an - // integer conversion above failed - strtof(value_float, token_buffer.data(), &endptr); + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } - // we checked the number format before - JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } - return token_type::value_float; + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } } +#endif /*! - @param[in] literal_text the literal text to expect - @param[in] length the length of the passed literal text - @param[in] return_type the token type to return on success + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. + + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) */ - JSON_HEDLEY_NON_NULL(2) - token_type scan_literal(const char_type* literal_text, const std::size_t length, - token_type return_type) + template + std::pair handle_value(Value&& v, const bool skip_callback = false) { - JSON_ASSERT(char_traits::to_char_type(current) == literal_text[0]); - for (std::size_t i = 1; i < length; ++i) + JSON_ASSERT(!keep_stack.empty()); + + // do not handle this value if we know it would be added to a discarded + // container + if (!keep_stack.back()) { - if (JSON_HEDLEY_UNLIKELY(char_traits::to_char_type(get()) != literal_text[i])) - { - error_message = "invalid literal"; - return token_type::parse_error; - } + return {false, nullptr}; } - return return_type; - } - - ///////////////////// - // input management - ///////////////////// - - /// reset token_buffer; current character is beginning of token - void reset() noexcept - { - token_buffer.clear(); - token_string.clear(); - decimal_point_position = std::string::npos; - token_string.push_back(char_traits::to_char_type(current)); - } - /* - @brief get next character from the input + // create value + auto value = BasicJsonType(std::forward(v)); - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a - `char_traits::eof()` in that case. Stores the scanned characters - for use in error messages. +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(value); +#endif - @return character read from the input - */ - char_int_type get() - { - ++position.chars_read_total; - ++position.chars_read_current_line; + // check callback + const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); - if (next_unget) + // do not handle this value if we just learnt it shall be discarded + if (!keep) { - // just reset the next_unget variable and work with current - next_unget = false; + return {false, nullptr}; } - else + + if (ref_stack.empty()) { - current = ia.get_character(); + root = std::move(value); + return {true, & root}; } - if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) + // skip this value if we already decided to skip the parent + // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) + if (!ref_stack.back()) { - token_string.push_back(char_traits::to_char_type(current)); + return {false, nullptr}; } - if (current == '\n') + // we now only expect arrays and objects + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + // array + if (ref_stack.back()->is_array()) { - ++position.lines_read; - position.chars_read_current_line = 0; + ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); + return {true, & (ref_stack.back()->m_data.m_value.array->back())}; } - return current; - } + // object + JSON_ASSERT(ref_stack.back()->is_object()); + // check if we should store an element for the current key + JSON_ASSERT(!key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); - /*! - @brief unget current character (read it again on next get) + if (!store_element) + { + return {false, nullptr}; + } - We implement unget by setting variable next_unget to true. The input is not - changed - we just simulate ungetting by modifying chars_read_total, - chars_read_current_line, and token_string. The next call to get() will - behave as if the unget character is read again. - */ - void unget() - { - next_unget = true; + JSON_ASSERT(object_element); + *object_element = std::move(value); + return {true, object_element}; + } - --position.chars_read_total; + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// stack to manage which values to keep + std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) + /// stack to manage which object keys to keep + std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; +}; - // in case we "unget" a newline, we have to also decrement the lines_read - if (position.chars_read_current_line == 0) - { - if (position.lines_read > 0) - { - --position.lines_read; - } - } - else - { - --position.chars_read_current_line; - } +template +class json_sax_acceptor +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; - if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) - { - JSON_ASSERT(!token_string.empty()); - token_string.pop_back(); - } + bool null() + { + return true; } - /// add a character to token_buffer - void add(char_int_type c) + bool boolean(bool /*unused*/) { - token_buffer.push_back(static_cast(c)); + return true; } - public: - ///////////////////// - // value getters - ///////////////////// - - /// return integer value - constexpr number_integer_t get_number_integer() const noexcept + bool number_integer(number_integer_t /*unused*/) { - return value_integer; + return true; } - /// return unsigned integer value - constexpr number_unsigned_t get_number_unsigned() const noexcept + bool number_unsigned(number_unsigned_t /*unused*/) { - return value_unsigned; + return true; } - /// return floating-point value - constexpr number_float_t get_number_float() const noexcept + bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) { - return value_float; + return true; } - /// return current string value (implicitly resets the token; useful only once) - string_t& get_string() + bool string(string_t& /*unused*/) { - // translate decimal points from locale back to '.' (#4084) - if (decimal_point_char != '.' && decimal_point_position != std::string::npos) - { - token_buffer[decimal_point_position] = '.'; - } - return token_buffer; + return true; } - ///////////////////// - // diagnostics - ///////////////////// - - /// return position of last read token - constexpr position_t get_position() const noexcept + bool binary(binary_t& /*unused*/) { - return position; + return true; } - /// return the last read token (for errors only). Will never contain EOF - /// (an arbitrary value that is not a valid char value, often -1), because - /// 255 may legitimately occur. May contain NUL, which should be escaped. - std::string get_token_string() const + bool start_object(std::size_t /*unused*/ = static_cast(-1)) { - // escape control characters - std::string result; - for (const auto c : token_string) - { - if (static_cast(c) <= '\x1F') - { - // escape control characters - std::array cs{{}}; - static_cast((std::snprintf)(cs.data(), cs.size(), "", static_cast(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) - result += cs.data(); - } - else - { - // add character as is - result.push_back(static_cast(c)); - } - } - - return result; + return true; } - /// return syntax error message - JSON_HEDLEY_RETURNS_NON_NULL - constexpr const char* get_error_message() const noexcept + bool key(string_t& /*unused*/) { - return error_message; + return true; } - ///////////////////// - // actual scanner - ///////////////////// - - /*! - @brief skip the UTF-8 byte order mark - @return true iff there is no BOM or the correct BOM has been skipped - */ - bool skip_bom() + bool end_object() { - if (get() == 0xEF) - { - // check if we completely parse the BOM - return get() == 0xBB && get() == 0xBF; - } - - // the first character is not the beginning of the BOM; unget it to - // process is later - unget(); return true; } - void skip_whitespace() + bool start_array(std::size_t /*unused*/ = static_cast(-1)) { - do - { - get(); - } - while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); + return true; } - token_type scan() + bool end_array() { - // initially, skip the BOM - if (position.chars_read_total == 0 && !skip_bom()) - { - error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; - return token_type::parse_error; - } - - // read next character and ignore whitespace - skip_whitespace(); - - // ignore comments - while (ignore_comments && current == '/') - { - if (!scan_comment()) - { - return token_type::parse_error; - } - - // skip following whitespace - skip_whitespace(); - } - - switch (current) - { - // structural characters - case '[': - return token_type::begin_array; - case ']': - return token_type::end_array; - case '{': - return token_type::begin_object; - case '}': - return token_type::end_object; - case ':': - return token_type::name_separator; - case ',': - return token_type::value_separator; - - // literals - case 't': - { - std::array true_literal = {{static_cast('t'), static_cast('r'), static_cast('u'), static_cast('e')}}; - return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true); - } - case 'f': - { - std::array false_literal = {{static_cast('f'), static_cast('a'), static_cast('l'), static_cast('s'), static_cast('e')}}; - return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false); - } - case 'n': - { - std::array null_literal = {{static_cast('n'), static_cast('u'), static_cast('l'), static_cast('l')}}; - return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null); - } - - // string - case '\"': - return scan_string(); - - // number - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return scan_number(); - - // end of input (the null byte is needed when parsing from - // string literals) - case '\0': - case char_traits::eof(): - return token_type::end_of_input; - - // error - default: - error_message = "invalid literal"; - return token_type::parse_error; - } + return true; } - private: - /// input adapter - InputAdapterType ia; - - /// whether comments should be ignored (true) or signaled as errors (false) - const bool ignore_comments = false; - - /// the current character - char_int_type current = char_traits::eof(); - - /// whether the next get() call should just return current - bool next_unget = false; - - /// the start position of the current token - position_t position {}; - - /// raw input token string (for error messages) - std::vector token_string {}; - - /// buffer for variable-length tokens (numbers, strings) - string_t token_buffer {}; - - /// a description of occurred lexer errors - const char* error_message = ""; - - // number values - number_integer_t value_integer = 0; - number_unsigned_t value_unsigned = 0; - number_float_t value_float = 0; - - /// the decimal point - const char_int_type decimal_point_char = '.'; - /// the position of the decimal point in the input - std::size_t decimal_point_position = std::string::npos; + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) + { + return false; + } }; } // namespace detail NLOHMANN_JSON_NAMESPACE_END +// #include + // #include // #include @@ -9443,7 +9712,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; @@ -12530,7 +12799,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -12558,7 +12827,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions); + json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -19695,9 +19964,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; @@ -20428,6 +20697,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::enable_if_t < detail::is_basic_json::value&& !std::is_same::value, int > = 0 > basic_json(const BasicJsonType& val) +#if JSON_DIAGNOSTIC_POSITIONS + : start_position(val.start_position), + end_position(val.end_position) +#endif { using other_boolean_t = typename BasicJsonType::boolean_t; using other_number_float_t = typename BasicJsonType::number_float_t; @@ -20474,6 +20747,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } JSON_ASSERT(m_data.m_type == val.type()); + set_parents(); assert_invariant(); } @@ -20725,6 +20999,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(const basic_json& other) : json_base_class_t(other) +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) + , end_position(other.end_position) +#endif { m_data.m_type = other.m_data.m_type; // check of passed value is valid @@ -20795,6 +21073,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), m_data(std::move(other.m_data)) // cppcheck-suppress[accessForwarded] TODO check +#if JSON_DIAGNOSTIC_POSITIONS + , start_position(other.start_position) // cppcheck-suppress[accessForwarded] TODO check + , end_position(other.end_position) // cppcheck-suppress[accessForwarded] TODO check +#endif { // check that passed value is valid other.assert_invariant(false); // cppcheck-suppress[accessForwarded] @@ -20803,6 +21085,11 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec other.m_data.m_type = value_t::null; other.m_data.m_value = {}; +#if JSON_DIAGNOSTIC_POSITIONS + other.start_position = std::string::npos; + other.end_position = std::string::npos; +#endif + set_parents(); assert_invariant(); } @@ -20823,6 +21110,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); + +#if JSON_DIAGNOSTIC_POSITIONS + swap(start_position, other.start_position); + swap(end_position, other.end_position); +#endif + json_base_class_t::operator=(std::move(other)); set_parents(); @@ -23806,6 +24099,23 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json* m_parent = nullptr; #endif +#if JSON_DIAGNOSTIC_POSITIONS + /// the start position of the value + std::size_t start_position = std::string::npos; + /// the end position of the value + std::size_t end_position = std::string::npos; + public: + constexpr std::size_t start_pos() const noexcept + { + return start_position; + } + + constexpr std::size_t end_pos() const noexcept + { + return end_position; + } +#endif + ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -23947,8 +24257,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -23963,8 +24273,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -23988,8 +24298,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -24004,8 +24314,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24019,8 +24329,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24042,8 +24352,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -24058,8 +24368,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24073,8 +24383,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24096,8 +24406,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); @@ -24112,8 +24422,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24127,8 +24437,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24142,8 +24452,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24157,8 +24467,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); } @@ -24180,8 +24490,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] return res ? result : basic_json(value_t::discarded); diff --git a/single_include/nlohmann/json_fwd.hpp b/single_include/nlohmann/json_fwd.hpp index f96b6af967..1df3928d71 100644 --- a/single_include/nlohmann/json_fwd.hpp +++ b/single_include/nlohmann/json_fwd.hpp @@ -44,6 +44,10 @@ #define JSON_DIAGNOSTICS 0 #endif +#ifndef JSON_DIAGNOSTIC_POSITIONS + #define JSON_DIAGNOSTIC_POSITIONS 0 +#endif + #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -54,6 +58,12 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif +#if JSON_DIAGNOSTIC_POSITIONS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS +#endif + #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -65,14 +75,15 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ diff --git a/tests/src/unit-cbor.cpp b/tests/src/unit-cbor.cpp index cfee5f5dcb..6c0c50ae5a 100644 --- a/tests/src/unit-cbor.cpp +++ b/tests/src/unit-cbor.cpp @@ -1631,7 +1631,7 @@ TEST_CASE("CBOR") }; json j; - auto cbp = nlohmann::detail::json_sax_dom_callback_parser(j, callback, true); + auto cbp = nlohmann::detail::json_sax_dom_callback_parser(j, callback, true); CHECK(json::sax_parse(input, &cbp, json::input_format_t::cbor)); CHECK(j.at("foo").is_binary()); CHECK(binary_seen); diff --git a/tests/src/unit-class_parser.cpp b/tests/src/unit-class_parser.cpp index 017a789b4b..36de35e55c 100644 --- a/tests/src/unit-class_parser.cpp +++ b/tests/src/unit-class_parser.cpp @@ -219,7 +219,7 @@ json parser_helper(const std::string& s) CHECK(j_nothrow == j); json j_sax; - nlohmann::detail::json_sax_dom_parser sdp(j_sax); + nlohmann::detail::json_sax_dom_parser sdp(j_sax); json::sax_parse(s, &sdp); CHECK(j_sax == j); diff --git a/tests/src/unit-class_parser_diagnostic_positions.cpp b/tests/src/unit-class_parser_diagnostic_positions.cpp new file mode 100644 index 0000000000..b4ef434d64 --- /dev/null +++ b/tests/src/unit-class_parser_diagnostic_positions.cpp @@ -0,0 +1,1957 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ (supporting code) +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +#include "doctest_compatibility.h" +#define JSON_TESTS_PRIVATE +#ifdef JSON_DIAGNOSTIC_POSITIONS + #undef JSON_DIAGNOSTIC_POSITIONS +#endif + +#define JSON_DIAGNOSTIC_POSITIONS 1 +#include +using nlohmann::json; + +#ifdef JSON_TEST_NO_GLOBAL_UDLS + using namespace nlohmann::literals; // NOLINT(google-build-using-namespace) +#endif + +#include + +namespace +{ +class SaxEventLogger +{ + public: + bool null() + { + events.emplace_back("null()"); + return true; + } + + bool boolean(bool val) + { + events.emplace_back(val ? "boolean(true)" : "boolean(false)"); + return true; + } + + bool number_integer(json::number_integer_t val) + { + events.push_back("number_integer(" + std::to_string(val) + ")"); + return true; + } + + bool number_unsigned(json::number_unsigned_t val) + { + events.push_back("number_unsigned(" + std::to_string(val) + ")"); + return true; + } + + bool number_float(json::number_float_t /*unused*/, const std::string& s) + { + events.push_back("number_float(" + s + ")"); + return true; + } + + bool string(std::string& val) + { + events.push_back("string(" + val + ")"); + return true; + } + + bool binary(json::binary_t& val) + { + std::string binary_contents = "binary("; + std::string comma_space; + for (auto b : val) + { + binary_contents.append(comma_space); + binary_contents.append(std::to_string(static_cast(b))); + comma_space = ", "; + } + binary_contents.append(")"); + events.push_back(binary_contents); + return true; + } + + bool start_object(std::size_t elements) + { + if (elements == static_cast(-1)) + { + events.emplace_back("start_object()"); + } + else + { + events.push_back("start_object(" + std::to_string(elements) + ")"); + } + return true; + } + + bool key(std::string& val) + { + events.push_back("key(" + val + ")"); + return true; + } + + bool end_object() + { + events.emplace_back("end_object()"); + return true; + } + + bool start_array(std::size_t elements) + { + if (elements == static_cast(-1)) + { + events.emplace_back("start_array()"); + } + else + { + events.push_back("start_array(" + std::to_string(elements) + ")"); + } + return true; + } + + bool end_array() + { + events.emplace_back("end_array()"); + return true; + } + + bool parse_error(std::size_t position, const std::string& /*unused*/, const json::exception& /*unused*/) + { + errored = true; + events.push_back("parse_error(" + std::to_string(position) + ")"); + return false; + } + + std::vector events {}; // NOLINT(readability-redundant-member-init) + bool errored = false; +}; + +class SaxCountdown : public nlohmann::json::json_sax_t +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() override + { + return events_left-- > 0; + } + + bool boolean(bool /*val*/) override + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t /*val*/) override + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t /*val*/) override + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t /*val*/, const std::string& /*s*/) override + { + return events_left-- > 0; + } + + bool string(std::string& /*val*/) override + { + return events_left-- > 0; + } + + bool binary(json::binary_t& /*val*/) override + { + return events_left-- > 0; + } + + bool start_object(std::size_t /*elements*/) override + { + return events_left-- > 0; + } + + bool key(std::string& /*val*/) override + { + return events_left-- > 0; + } + + bool end_object() override + { + return events_left-- > 0; + } + + bool start_array(std::size_t /*elements*/) override + { + return events_left-- > 0; + } + + bool end_array() override + { + return events_left-- > 0; + } + + bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& /*ex*/) override + { + return false; + } + + private: + int events_left = 0; +}; + +json parser_helper(const std::string& s); +bool accept_helper(const std::string& s); +void comments_helper(const std::string& s); + +json parser_helper(const std::string& s) +{ + json j; + json::parser(nlohmann::detail::input_adapter(s)).parse(true, j); + + // if this line was reached, no exception occurred + // -> check if result is the same without exceptions + json j_nothrow; + CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j_nothrow)); + CHECK(j_nothrow == j); + + json j_sax; + nlohmann::detail::json_sax_dom_parser sdp(j_sax); + json::sax_parse(s, &sdp); + CHECK(j_sax == j); + + comments_helper(s); + + return j; +} + +bool accept_helper(const std::string& s) +{ + CAPTURE(s) + + // 1. parse s without exceptions + json j; + CHECK_NOTHROW(json::parser(nlohmann::detail::input_adapter(s), nullptr, false).parse(true, j)); + const bool ok_noexcept = !j.is_discarded(); + + // 2. accept s + const bool ok_accept = json::parser(nlohmann::detail::input_adapter(s)).accept(true); + + // 3. check if both approaches come to the same result + CHECK(ok_noexcept == ok_accept); + + // 4. parse with SAX (compare with relaxed accept result) + SaxEventLogger el; + CHECK_NOTHROW(json::sax_parse(s, &el, json::input_format_t::json, false)); + CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept(false) == !el.errored); + + // 5. parse with simple callback + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + json const j_cb = json::parse(s, cb, false); + const bool ok_noexcept_cb = !j_cb.is_discarded(); + + // 6. check if this approach came to the same result + CHECK(ok_noexcept == ok_noexcept_cb); + + // 7. check if comments are properly ignored + if (ok_accept) + { + comments_helper(s); + } + + // 8. return result + return ok_accept; +} + +void comments_helper(const std::string& s) +{ + json _; + + // parse/accept with default parser + CHECK_NOTHROW(_ = json::parse(s)); + CHECK(json::accept(s)); + + // parse/accept while skipping comments + CHECK_NOTHROW(_ = json::parse(s, nullptr, false, true)); + CHECK(json::accept(s, true)); + + std::vector json_with_comments; + + // start with a comment + json_with_comments.push_back(std::string("// this is a comment\n") + s); + json_with_comments.push_back(std::string("/* this is a comment */") + s); + // end with a comment + json_with_comments.push_back(s + "// this is a comment"); + json_with_comments.push_back(s + "/* this is a comment */"); + + // check all strings + for (const auto& json_with_comment : json_with_comments) + { + CAPTURE(json_with_comment) + CHECK_THROWS_AS(_ = json::parse(json_with_comment), json::parse_error); + CHECK(!json::accept(json_with_comment)); + + CHECK_NOTHROW(_ = json::parse(json_with_comment, nullptr, true, true)); + CHECK(json::accept(json_with_comment, true)); + } +} + +/** + * Validates that the generated JSON object is the same as expected + * Validates that the start position and end position match the start and end of the string + * + * This check assumes that there is no whitespace around the json object in the original string. + */ +void validate_generated_json_and_start_end_pos_helper(const std::string& original_string, const json& j, const json& check) +{ + CHECK(j == check); + CHECK(j.start_pos() == 0); + CHECK(j.end_pos() == original_string.size()); +} + +/** + * Parses the root object from the given root string and validates that the start and end positions for the nested object are correct. + * + * This checks that whitespace around the nested object is included in the start and end positions of the root object. + */ +void validate_start_end_pos_for_nested_obj_helper(const std::string& nested_type_json_str, const std::string& root_type_json_str, const json& expected_json, const json::parser_callback_t& cb = nullptr) +{ + json j; + + // 1. If callback is provided, use callback version of parse() + if (cb) + { + j = json::parse(root_type_json_str, cb); + } + else + { + j = json::parse(root_type_json_str); + } + + // 2. Check if the generated JSON is as expected + // Assumptions: The root_type_json_str does not have any whitespace around the json object + validate_generated_json_and_start_end_pos_helper(root_type_json_str, j, expected_json); + + // 3. Get the nested object + const auto& nested = j["nested"]; + // 4. Check if the start and end positions are generated correctly for nested objects and arrays + CHECK(nested_type_json_str == root_type_json_str.substr(nested.start_pos(), nested.end_pos() - nested.start_pos())); +} + +} // namespace + +TEST_CASE("parser class") +{ + SECTION("parse") + { + SECTION("null") + { + CHECK(parser_helper("null") == json(nullptr)); + } + + SECTION("true") + { + CHECK(parser_helper("true") == json(true)); + } + + SECTION("false") + { + CHECK(parser_helper("false") == json(false)); + } + + SECTION("array") + { + SECTION("empty array") + { + CHECK(parser_helper("[]") == json(json::value_t::array)); + CHECK(parser_helper("[ ]") == json(json::value_t::array)); + } + + SECTION("nonempty array") + { + CHECK(parser_helper("[true, false, null]") == json({true, false, nullptr})); + } + } + + SECTION("object") + { + SECTION("empty object") + { + CHECK(parser_helper("{}") == json(json::value_t::object)); + CHECK(parser_helper("{ }") == json(json::value_t::object)); + } + + SECTION("nonempty object") + { + CHECK(parser_helper("{\"\": true, \"one\": 1, \"two\": null}") == json({{"", true}, {"one", 1}, {"two", nullptr}})); + } + } + + SECTION("string") + { + // empty string + CHECK(parser_helper("\"\"") == json(json::value_t::string)); + + SECTION("errors") + { + // error: tab in string + CHECK_THROWS_WITH_AS(parser_helper("\"\t\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t; last read: '\"'", json::parse_error&); + // error: newline in string + CHECK_THROWS_WITH_AS(parser_helper("\"\n\""), "[json.exception.parse_error.101] parse error at line 2, column 0: syntax error while parsing value - invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\r\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r; last read: '\"'", json::parse_error&); + // error: backspace in string + CHECK_THROWS_WITH_AS(parser_helper("\"\b\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b; last read: '\"'", json::parse_error&); + // improve code coverage + CHECK_THROWS_AS(parser_helper("\uFF01"), json::parse_error&); + CHECK_THROWS_AS(parser_helper("[-4:1,]"), json::parse_error&); + // unescaped control characters + CHECK_THROWS_WITH_AS(parser_helper("\"\x00\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: missing closing quote; last read: '\"'", json::parse_error&); // NOLINT(bugprone-string-literal-with-embedded-nul) + CHECK_THROWS_WITH_AS(parser_helper("\"\x01\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0001 (SOH) must be escaped to \\u0001; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x02\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0002 (STX) must be escaped to \\u0002; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x03\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0003 (ETX) must be escaped to \\u0003; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x04\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0004 (EOT) must be escaped to \\u0004; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x05\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0005 (ENQ) must be escaped to \\u0005; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x06\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0006 (ACK) must be escaped to \\u0006; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x07\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0007 (BEL) must be escaped to \\u0007; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x08\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x09\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0a\""), "[json.exception.parse_error.101] parse error at line 2, column 0: syntax error while parsing value - invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0b\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000B (VT) must be escaped to \\u000B; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0c\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0d\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0e\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000E (SO) must be escaped to \\u000E; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x0f\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+000F (SI) must be escaped to \\u000F; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x10\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0010 (DLE) must be escaped to \\u0010; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x11\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0011 (DC1) must be escaped to \\u0011; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x12\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0012 (DC2) must be escaped to \\u0012; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x13\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0013 (DC3) must be escaped to \\u0013; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x14\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0014 (DC4) must be escaped to \\u0014; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x15\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0015 (NAK) must be escaped to \\u0015; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x16\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0016 (SYN) must be escaped to \\u0016; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x17\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0017 (ETB) must be escaped to \\u0017; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x18\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0018 (CAN) must be escaped to \\u0018; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x19\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0019 (EM) must be escaped to \\u0019; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1a\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001A (SUB) must be escaped to \\u001A; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1b\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001B (ESC) must be escaped to \\u001B; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1c\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001C (FS) must be escaped to \\u001C; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1d\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001D (GS) must be escaped to \\u001D; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1e\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001E (RS) must be escaped to \\u001E; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\x1f\""), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+001F (US) must be escaped to \\u001F; last read: '\"'", json::parse_error&); + + SECTION("additional test for null byte") + { + // The test above for the null byte is wrong, because passing + // a string to the parser only reads int until it encounters + // a null byte. This test inserts the null byte later on and + // uses an iterator range. + std::string s = "\"1\""; + s[1] = '\0'; + json _; + CHECK_THROWS_WITH_AS(_ = json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: control character U+0000 (NUL) must be escaped to \\u0000; last read: '\"'", json::parse_error&); + } + } + + SECTION("escaped") + { + // quotation mark "\"" + auto r1 = R"("\"")"_json; + CHECK(parser_helper("\"\\\"\"") == r1); + // reverse solidus "\\" + auto r2 = R"("\\")"_json; + CHECK(parser_helper("\"\\\\\"") == r2); + // solidus + CHECK(parser_helper("\"\\/\"") == R"("/")"_json); + // backspace + CHECK(parser_helper("\"\\b\"") == json("\b")); + // formfeed + CHECK(parser_helper("\"\\f\"") == json("\f")); + // newline + CHECK(parser_helper("\"\\n\"") == json("\n")); + // carriage return + CHECK(parser_helper("\"\\r\"") == json("\r")); + // horizontal tab + CHECK(parser_helper("\"\\t\"") == json("\t")); + + CHECK(parser_helper("\"\\u0001\"").get() == "\x01"); + CHECK(parser_helper("\"\\u000a\"").get() == "\n"); + CHECK(parser_helper("\"\\u00b0\"").get() == "°"); + CHECK(parser_helper("\"\\u0c00\"").get() == "ఀ"); + CHECK(parser_helper("\"\\ud000\"").get() == "퀀"); + CHECK(parser_helper("\"\\u000E\"").get() == "\x0E"); + CHECK(parser_helper("\"\\u00F0\"").get() == "ð"); + CHECK(parser_helper("\"\\u0100\"").get() == "Ā"); + CHECK(parser_helper("\"\\u2000\"").get() == " "); + CHECK(parser_helper("\"\\uFFFF\"").get() == "￿"); + CHECK(parser_helper("\"\\u20AC\"").get() == "€"); + CHECK(parser_helper("\"€\"").get() == "€"); + CHECK(parser_helper("\"🎈\"").get() == "🎈"); + + CHECK(parser_helper("\"\\ud80c\\udc60\"").get() == "\xf0\x93\x81\xa0"); + CHECK(parser_helper("\"\\ud83c\\udf1e\"").get() == "🌞"); + } + } + + SECTION("number") + { + SECTION("integers") + { + SECTION("without exponent") + { + CHECK(parser_helper("-128") == json(-128)); + CHECK(parser_helper("-0") == json(-0)); + CHECK(parser_helper("0") == json(0)); + CHECK(parser_helper("128") == json(128)); + } + + SECTION("with exponent") + { + CHECK(parser_helper("0e1") == json(0e1)); + CHECK(parser_helper("0E1") == json(0e1)); + + CHECK(parser_helper("10000E-4") == json(10000e-4)); + CHECK(parser_helper("10000E-3") == json(10000e-3)); + CHECK(parser_helper("10000E-2") == json(10000e-2)); + CHECK(parser_helper("10000E-1") == json(10000e-1)); + CHECK(parser_helper("10000E0") == json(10000e0)); + CHECK(parser_helper("10000E1") == json(10000e1)); + CHECK(parser_helper("10000E2") == json(10000e2)); + CHECK(parser_helper("10000E3") == json(10000e3)); + CHECK(parser_helper("10000E4") == json(10000e4)); + + CHECK(parser_helper("10000e-4") == json(10000e-4)); + CHECK(parser_helper("10000e-3") == json(10000e-3)); + CHECK(parser_helper("10000e-2") == json(10000e-2)); + CHECK(parser_helper("10000e-1") == json(10000e-1)); + CHECK(parser_helper("10000e0") == json(10000e0)); + CHECK(parser_helper("10000e1") == json(10000e1)); + CHECK(parser_helper("10000e2") == json(10000e2)); + CHECK(parser_helper("10000e3") == json(10000e3)); + CHECK(parser_helper("10000e4") == json(10000e4)); + + CHECK(parser_helper("-0e1") == json(-0e1)); + CHECK(parser_helper("-0E1") == json(-0e1)); + CHECK(parser_helper("-0E123") == json(-0e123)); + + // numbers after exponent + CHECK(parser_helper("10E0") == json(10e0)); + CHECK(parser_helper("10E1") == json(10e1)); + CHECK(parser_helper("10E2") == json(10e2)); + CHECK(parser_helper("10E3") == json(10e3)); + CHECK(parser_helper("10E4") == json(10e4)); + CHECK(parser_helper("10E5") == json(10e5)); + CHECK(parser_helper("10E6") == json(10e6)); + CHECK(parser_helper("10E7") == json(10e7)); + CHECK(parser_helper("10E8") == json(10e8)); + CHECK(parser_helper("10E9") == json(10e9)); + CHECK(parser_helper("10E+0") == json(10e0)); + CHECK(parser_helper("10E+1") == json(10e1)); + CHECK(parser_helper("10E+2") == json(10e2)); + CHECK(parser_helper("10E+3") == json(10e3)); + CHECK(parser_helper("10E+4") == json(10e4)); + CHECK(parser_helper("10E+5") == json(10e5)); + CHECK(parser_helper("10E+6") == json(10e6)); + CHECK(parser_helper("10E+7") == json(10e7)); + CHECK(parser_helper("10E+8") == json(10e8)); + CHECK(parser_helper("10E+9") == json(10e9)); + CHECK(parser_helper("10E-1") == json(10e-1)); + CHECK(parser_helper("10E-2") == json(10e-2)); + CHECK(parser_helper("10E-3") == json(10e-3)); + CHECK(parser_helper("10E-4") == json(10e-4)); + CHECK(parser_helper("10E-5") == json(10e-5)); + CHECK(parser_helper("10E-6") == json(10e-6)); + CHECK(parser_helper("10E-7") == json(10e-7)); + CHECK(parser_helper("10E-8") == json(10e-8)); + CHECK(parser_helper("10E-9") == json(10e-9)); + } + + SECTION("edge cases") + { + // From RFC8259, Section 6: + // Note that when such software is used, numbers that are + // integers and are in the range [-(2**53)+1, (2**53)-1] + // are interoperable in the sense that implementations will + // agree exactly on their numeric values. + + // -(2**53)+1 + CHECK(parser_helper("-9007199254740991").get() == -9007199254740991); + // (2**53)-1 + CHECK(parser_helper("9007199254740991").get() == 9007199254740991); + } + + SECTION("over the edge cases") // issue #178 - Integer conversion to unsigned (incorrect handling of 64-bit integers) + { + // While RFC8259, Section 6 specifies a preference for support + // for ranges in range of IEEE 754-2008 binary64 (double precision) + // this does not accommodate 64-bit integers without loss of accuracy. + // As 64-bit integers are now widely used in software, it is desirable + // to expand support to the full 64 bit (signed and unsigned) range + // i.e. -(2**63) -> (2**64)-1. + + // -(2**63) ** Note: compilers see negative literals as negated positive numbers (hence the -1)) + CHECK(parser_helper("-9223372036854775808").get() == -9223372036854775807 - 1); + // (2**63)-1 + CHECK(parser_helper("9223372036854775807").get() == 9223372036854775807); + // (2**64)-1 + CHECK(parser_helper("18446744073709551615").get() == 18446744073709551615u); + } + } + + SECTION("floating-point") + { + SECTION("without exponent") + { + CHECK(parser_helper("-128.5") == json(-128.5)); + CHECK(parser_helper("0.999") == json(0.999)); + CHECK(parser_helper("128.5") == json(128.5)); + CHECK(parser_helper("-0.0") == json(-0.0)); + } + + SECTION("with exponent") + { + CHECK(parser_helper("-128.5E3") == json(-128.5E3)); + CHECK(parser_helper("-128.5E-3") == json(-128.5E-3)); + CHECK(parser_helper("-0.0e1") == json(-0.0e1)); + CHECK(parser_helper("-0.0E1") == json(-0.0e1)); + } + } + + SECTION("overflow") + { + // overflows during parsing yield an exception + CHECK_THROWS_WITH_AS(parser_helper("1.18973e+4932").empty(), "[json.exception.out_of_range.406] number overflow parsing '1.18973e+4932'", json::out_of_range&); + } + + SECTION("invalid numbers") + { + // numbers must not begin with "+" + CHECK_THROWS_AS(parser_helper("+1"), json::parse_error&); + CHECK_THROWS_AS(parser_helper("+0"), json::parse_error&); + + CHECK_THROWS_WITH_AS(parser_helper("01"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - unexpected number literal; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-01"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - unexpected number literal; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("--1"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '--'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '1.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E-"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected digit after exponent sign; last read: '1E-'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1.E1"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '1.E'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-1E"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '-1E'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0E#"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '-0E#'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0E-#"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid number; expected digit after exponent sign; last read: '-0E-#'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0#"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: '-0#'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0.0:"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - unexpected ':'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0.0Z"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: '-0.0Z'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0E123:"), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - unexpected ':'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0e0-:"), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-:'; expected end of input", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0e-:"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid number; expected digit after exponent sign; last read: '-0e-:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0f"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: '-0f'; expected end of input", json::parse_error&); + } + } + } + + SECTION("accept") + { + SECTION("null") + { + CHECK(accept_helper("null")); + } + + SECTION("true") + { + CHECK(accept_helper("true")); + } + + SECTION("false") + { + CHECK(accept_helper("false")); + } + + SECTION("array") + { + SECTION("empty array") + { + CHECK(accept_helper("[]")); + CHECK(accept_helper("[ ]")); + } + + SECTION("nonempty array") + { + CHECK(accept_helper("[true, false, null]")); + } + } + + SECTION("object") + { + SECTION("empty object") + { + CHECK(accept_helper("{}")); + CHECK(accept_helper("{ }")); + } + + SECTION("nonempty object") + { + CHECK(accept_helper("{\"\": true, \"one\": 1, \"two\": null}")); + } + } + + SECTION("string") + { + // empty string + CHECK(accept_helper("\"\"")); + + SECTION("errors") + { + // error: tab in string + CHECK(accept_helper("\"\t\"") == false); + // error: newline in string + CHECK(accept_helper("\"\n\"") == false); + CHECK(accept_helper("\"\r\"") == false); + // error: backspace in string + CHECK(accept_helper("\"\b\"") == false); + // improve code coverage + CHECK(accept_helper("\uFF01") == false); + CHECK(accept_helper("[-4:1,]") == false); + // unescaped control characters + CHECK(accept_helper("\"\x00\"") == false); // NOLINT(bugprone-string-literal-with-embedded-nul) + CHECK(accept_helper("\"\x01\"") == false); + CHECK(accept_helper("\"\x02\"") == false); + CHECK(accept_helper("\"\x03\"") == false); + CHECK(accept_helper("\"\x04\"") == false); + CHECK(accept_helper("\"\x05\"") == false); + CHECK(accept_helper("\"\x06\"") == false); + CHECK(accept_helper("\"\x07\"") == false); + CHECK(accept_helper("\"\x08\"") == false); + CHECK(accept_helper("\"\x09\"") == false); + CHECK(accept_helper("\"\x0a\"") == false); + CHECK(accept_helper("\"\x0b\"") == false); + CHECK(accept_helper("\"\x0c\"") == false); + CHECK(accept_helper("\"\x0d\"") == false); + CHECK(accept_helper("\"\x0e\"") == false); + CHECK(accept_helper("\"\x0f\"") == false); + CHECK(accept_helper("\"\x10\"") == false); + CHECK(accept_helper("\"\x11\"") == false); + CHECK(accept_helper("\"\x12\"") == false); + CHECK(accept_helper("\"\x13\"") == false); + CHECK(accept_helper("\"\x14\"") == false); + CHECK(accept_helper("\"\x15\"") == false); + CHECK(accept_helper("\"\x16\"") == false); + CHECK(accept_helper("\"\x17\"") == false); + CHECK(accept_helper("\"\x18\"") == false); + CHECK(accept_helper("\"\x19\"") == false); + CHECK(accept_helper("\"\x1a\"") == false); + CHECK(accept_helper("\"\x1b\"") == false); + CHECK(accept_helper("\"\x1c\"") == false); + CHECK(accept_helper("\"\x1d\"") == false); + CHECK(accept_helper("\"\x1e\"") == false); + CHECK(accept_helper("\"\x1f\"") == false); + } + + SECTION("escaped") + { + // quotation mark "\"" + auto r1 = R"("\"")"_json; + CHECK(accept_helper("\"\\\"\"")); + // reverse solidus "\\" + auto r2 = R"("\\")"_json; + CHECK(accept_helper("\"\\\\\"")); + // solidus + CHECK(accept_helper("\"\\/\"")); + // backspace + CHECK(accept_helper("\"\\b\"")); + // formfeed + CHECK(accept_helper("\"\\f\"")); + // newline + CHECK(accept_helper("\"\\n\"")); + // carriage return + CHECK(accept_helper("\"\\r\"")); + // horizontal tab + CHECK(accept_helper("\"\\t\"")); + + CHECK(accept_helper("\"\\u0001\"")); + CHECK(accept_helper("\"\\u000a\"")); + CHECK(accept_helper("\"\\u00b0\"")); + CHECK(accept_helper("\"\\u0c00\"")); + CHECK(accept_helper("\"\\ud000\"")); + CHECK(accept_helper("\"\\u000E\"")); + CHECK(accept_helper("\"\\u00F0\"")); + CHECK(accept_helper("\"\\u0100\"")); + CHECK(accept_helper("\"\\u2000\"")); + CHECK(accept_helper("\"\\uFFFF\"")); + CHECK(accept_helper("\"\\u20AC\"")); + CHECK(accept_helper("\"€\"")); + CHECK(accept_helper("\"🎈\"")); + + CHECK(accept_helper("\"\\ud80c\\udc60\"")); + CHECK(accept_helper("\"\\ud83c\\udf1e\"")); + } + } + + SECTION("number") + { + SECTION("integers") + { + SECTION("without exponent") + { + CHECK(accept_helper("-128")); + CHECK(accept_helper("-0")); + CHECK(accept_helper("0")); + CHECK(accept_helper("128")); + } + + SECTION("with exponent") + { + CHECK(accept_helper("0e1")); + CHECK(accept_helper("0E1")); + + CHECK(accept_helper("10000E-4")); + CHECK(accept_helper("10000E-3")); + CHECK(accept_helper("10000E-2")); + CHECK(accept_helper("10000E-1")); + CHECK(accept_helper("10000E0")); + CHECK(accept_helper("10000E1")); + CHECK(accept_helper("10000E2")); + CHECK(accept_helper("10000E3")); + CHECK(accept_helper("10000E4")); + + CHECK(accept_helper("10000e-4")); + CHECK(accept_helper("10000e-3")); + CHECK(accept_helper("10000e-2")); + CHECK(accept_helper("10000e-1")); + CHECK(accept_helper("10000e0")); + CHECK(accept_helper("10000e1")); + CHECK(accept_helper("10000e2")); + CHECK(accept_helper("10000e3")); + CHECK(accept_helper("10000e4")); + + CHECK(accept_helper("-0e1")); + CHECK(accept_helper("-0E1")); + CHECK(accept_helper("-0E123")); + } + + SECTION("edge cases") + { + // From RFC8259, Section 6: + // Note that when such software is used, numbers that are + // integers and are in the range [-(2**53)+1, (2**53)-1] + // are interoperable in the sense that implementations will + // agree exactly on their numeric values. + + // -(2**53)+1 + CHECK(accept_helper("-9007199254740991")); + // (2**53)-1 + CHECK(accept_helper("9007199254740991")); + } + + SECTION("over the edge cases") // issue #178 - Integer conversion to unsigned (incorrect handling of 64-bit integers) + { + // While RFC8259, Section 6 specifies a preference for support + // for ranges in range of IEEE 754-2008 binary64 (double precision) + // this does not accommodate 64 bit integers without loss of accuracy. + // As 64 bit integers are now widely used in software, it is desirable + // to expand support to the full 64 bit (signed and unsigned) range + // i.e. -(2**63) -> (2**64)-1. + + // -(2**63) ** Note: compilers see negative literals as negated positive numbers (hence the -1)) + CHECK(accept_helper("-9223372036854775808")); + // (2**63)-1 + CHECK(accept_helper("9223372036854775807")); + // (2**64)-1 + CHECK(accept_helper("18446744073709551615")); + } + } + + SECTION("floating-point") + { + SECTION("without exponent") + { + CHECK(accept_helper("-128.5")); + CHECK(accept_helper("0.999")); + CHECK(accept_helper("128.5")); + CHECK(accept_helper("-0.0")); + } + + SECTION("with exponent") + { + CHECK(accept_helper("-128.5E3")); + CHECK(accept_helper("-128.5E-3")); + CHECK(accept_helper("-0.0e1")); + CHECK(accept_helper("-0.0E1")); + } + } + + SECTION("overflow") + { + // overflows during parsing + CHECK(!accept_helper("1.18973e+4932")); + } + + SECTION("invalid numbers") + { + CHECK(accept_helper("01") == false); + CHECK(accept_helper("--1") == false); + CHECK(accept_helper("1.") == false); + CHECK(accept_helper("1E") == false); + CHECK(accept_helper("1E-") == false); + CHECK(accept_helper("1.E1") == false); + CHECK(accept_helper("-1E") == false); + CHECK(accept_helper("-0E#") == false); + CHECK(accept_helper("-0E-#") == false); + CHECK(accept_helper("-0#") == false); + CHECK(accept_helper("-0.0:") == false); + CHECK(accept_helper("-0.0Z") == false); + CHECK(accept_helper("-0E123:") == false); + CHECK(accept_helper("-0e0-:") == false); + CHECK(accept_helper("-0e-:") == false); + CHECK(accept_helper("-0f") == false); + + // numbers must not begin with "+" + CHECK(accept_helper("+1") == false); + CHECK(accept_helper("+0") == false); + } + } + } + + SECTION("parse errors") + { + // unexpected end of number + CHECK_THROWS_WITH_AS(parser_helper("0."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '0.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("--"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '--'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-0."), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid number; expected digit after '.'; last read: '-0.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-."), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("-:"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid number; expected digit after '-'; last read: '-:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("0.:"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected digit after '.'; last read: '0.:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("e."), + "[json.exception.parse_error.101] parse error at line 1, column 1: syntax error while parsing value - invalid literal; last read: 'e'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1e."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1e.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1e/"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1e/'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1e:"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1e:'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E."), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E.'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E/"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E/'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("1E:"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid number; expected '+', '-', or digit after exponent; last read: '1E:'", json::parse_error&); + + // unexpected end of null + CHECK_THROWS_WITH_AS(parser_helper("n"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid literal; last read: 'n'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nu"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: 'nu'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nul"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'nul'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nulk"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'nulk'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("nulm"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'nulm'", json::parse_error&); + + // unexpected end of true + CHECK_THROWS_WITH_AS(parser_helper("t"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid literal; last read: 't'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("tr"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: 'tr'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("tru"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'tru'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("trud"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'trud'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("truf"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'truf'", json::parse_error&); + + // unexpected end of false + CHECK_THROWS_WITH_AS(parser_helper("f"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid literal; last read: 'f'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("fa"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid literal; last read: 'fa'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("fal"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: 'fal'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("fals"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: 'fals'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("falsd"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: 'falsd'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("falsf"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid literal; last read: 'falsf'", json::parse_error&); + + // missing/unexpected end of array + CHECK_THROWS_WITH_AS(parser_helper("["), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("[1"), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing array - unexpected end of input; expected ']'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("[1,"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("[1,]"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - unexpected ']'; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("]"), + "[json.exception.parse_error.101] parse error at line 1, column 1: syntax error while parsing value - unexpected ']'; expected '[', '{', or a literal", json::parse_error&); + + // missing/unexpected end of object + CHECK_THROWS_WITH_AS(parser_helper("{"), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing object key - unexpected end of input; expected string literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\""), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing object separator - unexpected end of input; expected ':'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\":"), + "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\":}"), + "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - unexpected '}'; expected '[', '{', or a literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("{\"foo\":1,}"), + "[json.exception.parse_error.101] parse error at line 1, column 10: syntax error while parsing object key - unexpected '}'; expected string literal", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("}"), + "[json.exception.parse_error.101] parse error at line 1, column 1: syntax error while parsing value - unexpected '}'; expected '[', '{', or a literal", json::parse_error&); + + // missing/unexpected end of string + CHECK_THROWS_WITH_AS(parser_helper("\""), + "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid string: missing closing quote; last read: '\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\\""), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: missing closing quote; last read: '\"\\\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u\""), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u0\""), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u0\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u01\""), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u01\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u012\""), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u012\"'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u"), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u0"), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u0'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u01"), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u01'", json::parse_error&); + CHECK_THROWS_WITH_AS(parser_helper("\"\\u012"), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\u012'", json::parse_error&); + + // invalid escapes + for (int c = 1; c < 128; ++c) + { + auto s = std::string("\"\\") + std::string(1, static_cast(c)) + "\""; + + switch (c) + { + // valid escapes + case ('"'): + case ('\\'): + case ('/'): + case ('b'): + case ('f'): + case ('n'): + case ('r'): + case ('t'): + { + CHECK_NOTHROW(parser_helper(s)); + break; + } + + // \u must be followed with four numbers, so we skip it here + case ('u'): + { + break; + } + + // any other combination of backslash and character is invalid + default: + { + CHECK_THROWS_AS(parser_helper(s), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s), + "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid string: forbidden character after backslash; last read: '\"\\" + std::string(1, static_cast(c)) + "'"); + } + break; + } + } + } + + // invalid \uxxxx escapes + { + // check whether character is a valid hex character + const auto valid = [](int c) + { + switch (c) + { + case ('0'): + case ('1'): + case ('2'): + case ('3'): + case ('4'): + case ('5'): + case ('6'): + case ('7'): + case ('8'): + case ('9'): + case ('a'): + case ('b'): + case ('c'): + case ('d'): + case ('e'): + case ('f'): + case ('A'): + case ('B'): + case ('C'): + case ('D'): + case ('E'): + case ('F'): + { + return true; + } + + default: + { + return false; + } + } + }; + + for (int c = 1; c < 128; ++c) + { + std::string const s = "\"\\u"; + + // create a string with the iterated character at each position + auto s1 = s + "000" + std::string(1, static_cast(c)) + "\""; + auto s2 = s + "00" + std::string(1, static_cast(c)) + "0\""; + auto s3 = s + "0" + std::string(1, static_cast(c)) + "00\""; + auto s4 = s + std::string(1, static_cast(c)) + "000\""; + + if (valid(c)) + { + CAPTURE(s1) + CHECK_NOTHROW(parser_helper(s1)); + CAPTURE(s2) + CHECK_NOTHROW(parser_helper(s2)); + CAPTURE(s3) + CHECK_NOTHROW(parser_helper(s3)); + CAPTURE(s4) + CHECK_NOTHROW(parser_helper(s4)); + } + else + { + CAPTURE(s1) + CHECK_THROWS_AS(parser_helper(s1), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s1), + "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s1.substr(0, 7) + "'"); + } + + CAPTURE(s2) + CHECK_THROWS_AS(parser_helper(s2), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s2), + "[json.exception.parse_error.101] parse error at line 1, column 6: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s2.substr(0, 6) + "'"); + } + + CAPTURE(s3) + CHECK_THROWS_AS(parser_helper(s3), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s3), + "[json.exception.parse_error.101] parse error at line 1, column 5: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s3.substr(0, 5) + "'"); + } + + CAPTURE(s4) + CHECK_THROWS_AS(parser_helper(s4), json::parse_error&); + // only check error message if c is not a control character + if (c > 0x1f) + { + CHECK_THROWS_WITH_STD_STR(parser_helper(s4), + "[json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '" + s4.substr(0, 4) + "'"); + } + } + } + } + + json _; + + // missing part of a surrogate pair + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\""), "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\"'", json::parse_error&); + // invalid surrogate pair + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\\uD80C\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\\uD80C'", json::parse_error&); + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\\u0000\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\\u0000'", json::parse_error&); + CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD80C\\uFFFF\""), + "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD80C\\uFFFF'", json::parse_error&); + } + + SECTION("parse errors (accept)") + { + // unexpected end of number + CHECK(accept_helper("0.") == false); + CHECK(accept_helper("-") == false); + CHECK(accept_helper("--") == false); + CHECK(accept_helper("-0.") == false); + CHECK(accept_helper("-.") == false); + CHECK(accept_helper("-:") == false); + CHECK(accept_helper("0.:") == false); + CHECK(accept_helper("e.") == false); + CHECK(accept_helper("1e.") == false); + CHECK(accept_helper("1e/") == false); + CHECK(accept_helper("1e:") == false); + CHECK(accept_helper("1E.") == false); + CHECK(accept_helper("1E/") == false); + CHECK(accept_helper("1E:") == false); + + // unexpected end of null + CHECK(accept_helper("n") == false); + CHECK(accept_helper("nu") == false); + CHECK(accept_helper("nul") == false); + + // unexpected end of true + CHECK(accept_helper("t") == false); + CHECK(accept_helper("tr") == false); + CHECK(accept_helper("tru") == false); + + // unexpected end of false + CHECK(accept_helper("f") == false); + CHECK(accept_helper("fa") == false); + CHECK(accept_helper("fal") == false); + CHECK(accept_helper("fals") == false); + + // missing/unexpected end of array + CHECK(accept_helper("[") == false); + CHECK(accept_helper("[1") == false); + CHECK(accept_helper("[1,") == false); + CHECK(accept_helper("[1,]") == false); + CHECK(accept_helper("]") == false); + + // missing/unexpected end of object + CHECK(accept_helper("{") == false); + CHECK(accept_helper("{\"foo\"") == false); + CHECK(accept_helper("{\"foo\":") == false); + CHECK(accept_helper("{\"foo\":}") == false); + CHECK(accept_helper("{\"foo\":1,}") == false); + CHECK(accept_helper("}") == false); + + // missing/unexpected end of string + CHECK(accept_helper("\"") == false); + CHECK(accept_helper("\"\\\"") == false); + CHECK(accept_helper("\"\\u\"") == false); + CHECK(accept_helper("\"\\u0\"") == false); + CHECK(accept_helper("\"\\u01\"") == false); + CHECK(accept_helper("\"\\u012\"") == false); + CHECK(accept_helper("\"\\u") == false); + CHECK(accept_helper("\"\\u0") == false); + CHECK(accept_helper("\"\\u01") == false); + CHECK(accept_helper("\"\\u012") == false); + + // unget of newline + CHECK(parser_helper("\n123\n") == 123); + + // invalid escapes + for (int c = 1; c < 128; ++c) + { + auto s = std::string("\"\\") + std::string(1, static_cast(c)) + "\""; + + switch (c) + { + // valid escapes + case ('"'): + case ('\\'): + case ('/'): + case ('b'): + case ('f'): + case ('n'): + case ('r'): + case ('t'): + { + CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept()); + break; + } + + // \u must be followed with four numbers, so we skip it here + case ('u'): + { + break; + } + + // any other combination of backslash and character is invalid + default: + { + CHECK(json::parser(nlohmann::detail::input_adapter(s)).accept() == false); + break; + } + } + } + + // invalid \uxxxx escapes + { + // check whether character is a valid hex character + const auto valid = [](int c) + { + switch (c) + { + case ('0'): + case ('1'): + case ('2'): + case ('3'): + case ('4'): + case ('5'): + case ('6'): + case ('7'): + case ('8'): + case ('9'): + case ('a'): + case ('b'): + case ('c'): + case ('d'): + case ('e'): + case ('f'): + case ('A'): + case ('B'): + case ('C'): + case ('D'): + case ('E'): + case ('F'): + { + return true; + } + + default: + { + return false; + } + } + }; + + for (int c = 1; c < 128; ++c) + { + std::string const s = "\"\\u"; + + // create a string with the iterated character at each position + const auto s1 = s + "000" + std::string(1, static_cast(c)) + "\""; + const auto s2 = s + "00" + std::string(1, static_cast(c)) + "0\""; + const auto s3 = s + "0" + std::string(1, static_cast(c)) + "00\""; + const auto s4 = s + std::string(1, static_cast(c)) + "000\""; + + if (valid(c)) + { + CAPTURE(s1) + CHECK(json::parser(nlohmann::detail::input_adapter(s1)).accept()); + CAPTURE(s2) + CHECK(json::parser(nlohmann::detail::input_adapter(s2)).accept()); + CAPTURE(s3) + CHECK(json::parser(nlohmann::detail::input_adapter(s3)).accept()); + CAPTURE(s4) + CHECK(json::parser(nlohmann::detail::input_adapter(s4)).accept()); + } + else + { + CAPTURE(s1) + CHECK(json::parser(nlohmann::detail::input_adapter(s1)).accept() == false); + + CAPTURE(s2) + CHECK(json::parser(nlohmann::detail::input_adapter(s2)).accept() == false); + + CAPTURE(s3) + CHECK(json::parser(nlohmann::detail::input_adapter(s3)).accept() == false); + + CAPTURE(s4) + CHECK(json::parser(nlohmann::detail::input_adapter(s4)).accept() == false); + } + } + } + + // missing part of a surrogate pair + CHECK(accept_helper("\"\\uD80C\"") == false); + // invalid surrogate pair + CHECK(accept_helper("\"\\uD80C\\uD80C\"") == false); + CHECK(accept_helper("\"\\uD80C\\u0000\"") == false); + CHECK(accept_helper("\"\\uD80C\\uFFFF\"") == false); + } + + SECTION("tests found by mutate++") + { + // test case to make sure no comma precedes the first key + CHECK_THROWS_WITH_AS(parser_helper("{,\"key\": false}"), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing object key - unexpected ','; expected string literal", json::parse_error&); + // test case to make sure an object is properly closed + CHECK_THROWS_WITH_AS(parser_helper("[{\"key\": false true]"), "[json.exception.parse_error.101] parse error at line 1, column 19: syntax error while parsing object - unexpected true literal; expected '}'", json::parse_error&); + + // test case to make sure the callback is properly evaluated after reading a key + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t event, json& /*unused*/) noexcept + { + return event != json::parse_event_t::key; + }; + + json x = json::parse("{\"key\": false}", cb); + CHECK(x == json::object()); + } + } + + SECTION("callback function") + { + const auto* s_object = R"( + { + "foo": 2, + "bar": { + "baz": 1 + } + } + )"; + + const auto* s_array = R"( + [1,2,[3,4,5],4,5] + )"; + + const auto* structured_array = R"( + [ + 1, + { + "foo": "bar" + }, + { + "qux": "baz" + } + ] + )"; + + SECTION("filter nothing") + { + json j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return true; + }); + + CHECK (j_object == json({{"foo", 2}, {"bar", {{"baz", 1}}}})); + + json j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return true; + }); + + CHECK (j_array == json({1, 2, {3, 4, 5}, 4, 5})); + } + + SECTION("filter everything") + { + json const j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return false; + }); + + // the top-level object will be discarded, leaving a null + CHECK (j_object.is_null()); + + json const j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + { + return false; + }); + + // the top-level array will be discarded, leaving a null + CHECK (j_array.is_null()); + } + + SECTION("filter specific element") + { + json j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t event, const json & j) noexcept + { + // filter all number(2) elements + return event != json::parse_event_t::value || j != json(2); + }); + + CHECK (j_object == json({{"bar", {{"baz", 1}}}})); + + json j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t event, const json & j) noexcept + { + return event != json::parse_event_t::value || j != json(2); + }); + + CHECK (j_array == json({1, {3, 4, 5}, 4, 5})); + } + + SECTION("filter object in array") + { + json j_filtered1 = json::parse(structured_array, [](int /*unused*/, json::parse_event_t e, const json & parsed) + { + return !(e == json::parse_event_t::object_end && parsed.contains("foo")); + }); + + // the specified object will be discarded, and removed. + CHECK (j_filtered1.size() == 2); + CHECK (j_filtered1 == json({1, {{"qux", "baz"}}})); + + json j_filtered2 = json::parse(structured_array, [](int /*unused*/, json::parse_event_t e, const json& /*parsed*/) noexcept + { + return e != json::parse_event_t::object_end; + }); + + // removed all objects in array. + CHECK (j_filtered2.size() == 1); + CHECK (j_filtered2 == json({1})); + } + + SECTION("filter specific events") + { + SECTION("first closing event") + { + { + json j_object = json::parse(s_object, [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + static bool first = true; + if (e == json::parse_event_t::object_end && first) + { + first = false; + return false; + } + + return true; + }); + + // the first completed object will be discarded + CHECK (j_object == json({{"foo", 2}})); + } + + { + json j_array = json::parse(s_array, [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + static bool first = true; + if (e == json::parse_event_t::array_end && first) + { + first = false; + return false; + } + + return true; + }); + + // the first completed array will be discarded + CHECK (j_array == json({1, 2, 4, 5})); + } + } + } + + SECTION("special cases") + { + // the following test cases cover the situation in which an empty + // object and array is discarded only after the closing character + // has been read + + json j_empty_object = json::parse("{}", [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + return e != json::parse_event_t::object_end; + }); + CHECK(j_empty_object == json()); + + json j_empty_array = json::parse("[]", [](int /*unused*/, json::parse_event_t e, const json& /*unused*/) noexcept + { + return e != json::parse_event_t::array_end; + }); + CHECK(j_empty_array == json()); + } + } + + SECTION("constructing from contiguous containers") + { + SECTION("from std::vector") + { + std::vector v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from std::array") + { + std::array v { {'t', 'r', 'u', 'e'} }; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from array") + { + uint8_t v[] = {'t', 'r', 'u', 'e'}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from char literal") + { + CHECK(parser_helper("true") == json(true)); + } + + SECTION("from std::string") + { + std::string v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from std::initializer_list") + { + std::initializer_list const v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + + SECTION("from std::valarray") + { + std::valarray v = {'t', 'r', 'u', 'e'}; + json j; + json::parser(nlohmann::detail::input_adapter(std::begin(v), std::end(v))).parse(true, j); + CHECK(j == json(true)); + } + } + + SECTION("improve test coverage") + { + SECTION("parser with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + + CHECK(json::parse("{\"foo\": true:", cb, false).is_discarded()); + + json _; + CHECK_THROWS_WITH_AS(_ = json::parse("{\"foo\": true:", cb), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing object - unexpected ':'; expected '}'", json::parse_error&); + + CHECK_THROWS_WITH_AS(_ = json::parse("1.18973e+4932", cb), "[json.exception.out_of_range.406] number overflow parsing '1.18973e+4932'", json::out_of_range&); + } + + SECTION("SAX parser") + { + SECTION("} without value") + { + SaxCountdown s(1); + CHECK(json::sax_parse("{}", &s) == false); + } + + SECTION("} with value") + { + SaxCountdown s(3); + CHECK(json::sax_parse("{\"k1\": true}", &s) == false); + } + + SECTION("second key") + { + SaxCountdown s(3); + CHECK(json::sax_parse("{\"k1\": true, \"k2\": false}", &s) == false); + } + + SECTION("] without value") + { + SaxCountdown s(1); + CHECK(json::sax_parse("[]", &s) == false); + } + + SECTION("] with value") + { + SaxCountdown s(2); + CHECK(json::sax_parse("[1]", &s) == false); + } + + SECTION("float") + { + SaxCountdown s(0); + CHECK(json::sax_parse("3.14", &s) == false); + } + + SECTION("false") + { + SaxCountdown s(0); + CHECK(json::sax_parse("false", &s) == false); + } + + SECTION("null") + { + SaxCountdown s(0); + CHECK(json::sax_parse("null", &s) == false); + } + + SECTION("true") + { + SaxCountdown s(0); + CHECK(json::sax_parse("true", &s) == false); + } + + SECTION("unsigned") + { + SaxCountdown s(0); + CHECK(json::sax_parse("12", &s) == false); + } + + SECTION("integer") + { + SaxCountdown s(0); + CHECK(json::sax_parse("-12", &s) == false); + } + + SECTION("string") + { + SaxCountdown s(0); + CHECK(json::sax_parse("\"foo\"", &s) == false); + } + } + } + + SECTION("error messages for comments") + { + json _; + CHECK_THROWS_WITH_AS(_ = json::parse("/a", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid comment; expecting '/' or '*' after '/'; last read: '/a'", json::parse_error); + CHECK_THROWS_WITH_AS(_ = json::parse("/*", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid comment; missing closing '*/'; last read: '/*'", json::parse_error); + } + + // Macro for all test cases for start_pos and end_pos +#define SETUP_TESTCASES() \ + SECTION("with callback") \ + { \ + SECTION("filter nothing") \ + { \ + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept \ + { \ + return true; \ + }; \ + validate_start_end_pos_for_nested_obj_helper(nested_type_json_str, root_type_json_str, expected, cb); \ + } \ + SECTION("filter element") \ + { \ + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t event, json& j) noexcept \ + { \ + return (event != json::parse_event_t::key && event != json::parse_event_t::value) || j != json("a"); \ + }; \ + validate_start_end_pos_for_nested_obj_helper(nested_type_json_str, root_type_json_str, filteredExpected, cb); \ + } \ + } \ + SECTION("without callback") \ + { \ + validate_start_end_pos_for_nested_obj_helper(nested_type_json_str, root_type_json_str, expected); \ + } + + SECTION("retrieve start position and end position") + { + SECTION("for object") + { + // Create an object with spaces to test the start and end positions. Spaces will not be included in the + // JSON object, however, the start and end positions should include the spaces from the input JSON string. + const std::string nested_type_json_str = R"({ "a": 1,"b" : "test1"})"; + const std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test2"})"; + auto expected = json({{"nested", {{"a", 1}, {"b", "test1"}}}, {"anotherValue", "test2"}}); + auto filteredExpected = expected; + filteredExpected["nested"].erase("a"); + + SETUP_TESTCASES() + } + + SECTION("for array") + { + const std::string nested_type_json_str = R"(["a", "test", 45])"; + const std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", {"a", "test", 45}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected["nested"] = json({"test", 45}); + SETUP_TESTCASES() + } + + SECTION("for array with objects") + { + const std::string nested_type_json_str = R"([{"a": 1, "b": "test"}, {"c": 2, "d": "test2"}])"; + const std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", {{{"a", 1}, {"b", "test"}}, {{"c", 2}, {"d", "test2"}}}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected["nested"][0].erase("a"); + SETUP_TESTCASES() + + auto j = json::parse(root_type_json_str); + auto nested_array = j["nested"]; + const auto& nested_obj = nested_array[0]; + CHECK(nested_type_json_str.substr(1, 21) == root_type_json_str.substr(nested_obj.start_pos(), nested_obj.end_pos() - nested_obj.start_pos())); + CHECK(nested_type_json_str.substr(24, 22) == root_type_json_str.substr(nested_array[1].start_pos(), nested_array[1].end_pos() - nested_array[1].start_pos())); + } + + SECTION("for two levels of nesting objects") + { + const std::string nested_type_json_str = R"({"nested2": {"b": "test"}})"; + const std::string root_type_json_str = R"({ "a": 2, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"a", 2}, {"nested", {{"nested2", {{"b", "test"}}}}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + + auto j = json::parse(root_type_json_str); + auto nested_obj = j["nested"]["nested2"]; + CHECK(nested_type_json_str.substr(12, 13) == root_type_json_str.substr(nested_obj.start_pos(), nested_obj.end_pos() - nested_obj.start_pos())); + } + + SECTION("for simple types") + { + SECTION("no nested") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + + // 1. string type + std::string json_str = R"("test")"; + auto j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, "test"); + + // 2. number type + json_str = R"(1)"; + j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1); + + // 3. boolean type + json_str = R"(true)"; + j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, true); + + // 4. null type + json_str = R"(null)"; + j = json::parse(json_str, cb); + validate_generated_json_and_start_end_pos_helper(json_str, j, nullptr); + } + + SECTION("without callback") + { + // 1. string type + std::string json_str = R"("test")"; + auto j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, "test"); + + // 2. number type + json_str = R"(1)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1); + + json_str = R"(1.001239923)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1.001239923); + + json_str = R"(1.123812389000000)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, 1.123812389); + + // 3. boolean type + json_str = R"(true)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, true); + + json_str = R"(false)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, false); + + // 4. null type + json_str = R"(null)"; + j = json::parse(json_str); + validate_generated_json_and_start_end_pos_helper(json_str, j, nullptr); + } + } + + SECTION("string type") + { + const std::string nested_type_json_str = R"("test")"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", "test"}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + + SECTION("number type") + { + const std::string nested_type_json_str = R"(2)"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", 2}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + + SECTION("boolean type") + { + const std::string nested_type_json_str = R"(true)"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", true}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + + SECTION("null type") + { + const std::string nested_type_json_str = R"(null)"; + const std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", nullptr}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + } + } + SECTION("with leading whitespace and newlines around root JSON") + { + const std::string initial_whitespace = R"( + + )"; + const std::string nested_type_json_str = R"({ + "a": 1, + "nested": { + "b": "test" + }, + "anotherValue": "test" + })"; + const std::string end_whitespace = R"( + + )"; + const std::string root_type_json_str = initial_whitespace + nested_type_json_str + end_whitespace; + + auto expected = json({{"a", 1}, {"nested", {{"b", "test"}}}, {"anotherValue", "test"}}); + + auto j = json::parse(root_type_json_str); + + // 2. Check if the generated JSON is as expected + CHECK(j == expected); + + // 3. Check if the start and end positions do not include the surrounding whitespace + CHECK(j.start_pos() == initial_whitespace.size()); + CHECK(j.end_pos() == root_type_json_str.size() - end_whitespace.size()); + } + } +} diff --git a/tests/src/unit-deserialization.cpp b/tests/src/unit-deserialization.cpp index 0635c5e8e8..602a3ee740 100644 --- a/tests/src/unit-deserialization.cpp +++ b/tests/src/unit-deserialization.cpp @@ -587,7 +587,7 @@ TEST_CASE("deserialization") auto first = str.begin(); auto last = str.end(); json j; - json_sax_dom_parser sax(j, true); + json_sax_dom_parser sax(j, true); CHECK(json::sax_parse(proxy(first), proxy(last), &sax, input_format_t::json, false)); diff --git a/tests/src/unit-disabled_exceptions.cpp b/tests/src/unit-disabled_exceptions.cpp index 5adf8c4cb0..ef64eeeeff 100644 --- a/tests/src/unit-disabled_exceptions.cpp +++ b/tests/src/unit-disabled_exceptions.cpp @@ -19,10 +19,10 @@ using json = nlohmann::json; // for #2824 ///////////////////////////////////////////////////////////////////// -class sax_no_exception : public nlohmann::detail::json_sax_dom_parser +class sax_no_exception : public nlohmann::detail::json_sax_dom_parser { public: - explicit sax_no_exception(json& j) : nlohmann::detail::json_sax_dom_parser(j, false) {} + explicit sax_no_exception(json& j) : nlohmann::detail::json_sax_dom_parser(j, false) {} static bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& ex) { diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index 38d01059d1..88a64a7b48 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -162,11 +162,11 @@ struct adl_serializer // for #2824 ///////////////////////////////////////////////////////////////////// -class sax_no_exception : public nlohmann::detail::json_sax_dom_parser +class sax_no_exception : public nlohmann::detail::json_sax_dom_parser { public: explicit sax_no_exception(json& j) - : nlohmann::detail::json_sax_dom_parser(j, false) + : nlohmann::detail::json_sax_dom_parser(j, false) {} static bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& ex) diff --git a/tests/src/unit-ubjson.cpp b/tests/src/unit-ubjson.cpp index 673ae43eda..a9d931cd94 100644 --- a/tests/src/unit-ubjson.cpp +++ b/tests/src/unit-ubjson.cpp @@ -1617,7 +1617,7 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(_ = json::from_ubjson(v_ubjson), json::out_of_range&); json j; - nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept { return true; }); @@ -1631,7 +1631,7 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(_ = json::from_ubjson(v_ubjson), json::out_of_range&); json j; - nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept { return true; });