From 3cd6c591e141223848a7900579eb3c673dadd8d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Wed, 17 Jul 2024 12:24:36 +0100 Subject: [PATCH 1/5] sr/json: drop support for draft 5 This is needed because the next commit introduces jsoncons for validating json schemas against their draft's metaschema and jsoncons does not have support for draft 5. Other schema registries do not support draft 5 either, so it is unlikely that we would have customers that need to use draft 5 explicitly (and can't just use the largely equivalent draft 4). --- src/v/pandaproxy/schema_registry/json.cc | 15 --------------- .../schema_registry/test/test_json_schema.cc | 8 -------- 2 files changed, 23 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 7fe607dfe3ace..f3dd9d3734129 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -88,7 +88,6 @@ namespace { // from https://json-schema.org/draft-04/schema, this is used to meta-validate a // jsonschema. -// note: draft5 uses the same metaschema as draft4 constexpr std::string_view json_draft_4_metaschema = R"json( { "id": "http://json-schema.org/draft-04/schema#", @@ -656,7 +655,6 @@ ss::future<> check_references(sharded_store& store, canonical_schema schema) { // this is the list of supported dialects enum class json_schema_dialect { draft4, - draft5, draft6, draft7, }; @@ -668,8 +666,6 @@ to_uri(json_schema_dialect draft, bool strip = false) { switch (draft) { case draft4: return "http://json-schema.org/draft-04/schema#"; - case draft5: - return "http://json-schema.org/draft-05/schema#"; case draft6: return "http://json-schema.org/draft-06/schema#"; case draft7: @@ -689,7 +685,6 @@ constexpr std::optional from_uri(std::string_view uri) { using enum json_schema_dialect; return string_switch>{uri} .match_all(to_uri(draft4), to_uri(draft4, true), draft4) - .match_all(to_uri(draft5), to_uri(draft5, true), draft5) .match_all(to_uri(draft6), to_uri(draft6, true), draft6) .match_all(to_uri(draft7), to_uri(draft7, true), draft7) .default_match(std::nullopt); @@ -713,12 +708,6 @@ json::SchemaDocument const& get_metaschema() { switch (Dialect) { case json_schema_dialect::draft4: return json_draft_4_metaschema; - case json_schema_dialect::draft5: - // note1: draft5 uses the same metaschema as draft4. - // note2: this case is handled for completeness but - // get_metaschema() should not be instantiated because - // the codegen would be redundant - return json_draft_4_metaschema; case json_schema_dialect::draft6: return json_draft_6_metaschema; case json_schema_dialect::draft7: @@ -745,10 +734,6 @@ result validate_json_schema( switch (dialect) { case draft4: return get_metaschema(); - case draft5: - // NOTE: draft5 reuses the metaschema for draft4, so there is no - // need to instantiate get_metaschema - return get_metaschema(); case draft6: return get_metaschema(); case draft7: diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 4ca02886770d9..8f1bea322cb47 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -122,13 +122,6 @@ static constexpr auto valid_test_cases = std::to_array({ "type": "number", "minimum": 0, "exclusiveMinimum": false -})", - R"( -{ - "$schema": "http://json-schema.org/draft-05/schema#", - "type": "number", - "minimum": 0, - "exclusiveMinimum": false })", R"json( { @@ -162,7 +155,6 @@ static constexpr auto valid_test_cases = std::to_array({ )json", R"json({"$schema": "http://json-schema.org/draft-07/schema"})json", R"json({"$schema": "http://json-schema.org/draft-06/schema"})json", - R"json({"$schema": "http://json-schema.org/draft-05/schema"})json", R"json({"$schema": "http://json-schema.org/draft-04/schema"})json", }); SEASTAR_THREAD_TEST_CASE(test_make_valid_json_schema) { From 8ad6b26dc55332d241fff63e44ae39481aa59769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Mon, 22 Jul 2024 12:30:50 +0100 Subject: [PATCH 2/5] sr/json: add test for invalid metaschema * Add an error test case for a schema that is invalid for the specific draft's metaschema only. * Add an assertion that invalid schemas do raise an error. --- .../schema_registry/test/test_json_schema.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 8f1bea322cb47..5fb99f3960478 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -76,6 +76,18 @@ static const auto error_test_cases = std::to_array({ pps::error_info{ pps::error_code::schema_invalid, "Unsupported json schema dialect: '42'"}}, + // exclusiveMinimum is a bool in draft 4 but it is a double in draft 6 + error_test_case{ + R"( +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "type": "number", + "minimum": 0, + "exclusiveMinimum": false +})", + pps::error_info{ + pps::error_code::schema_invalid, + R"(Invalid json schema: '{"$schema":"http://json-schema.org/draft-06/schema#","exclusiveMinimum":false,"minimum":0,"type":"number"}'. Error: '/exclusiveMinimum: Expected number, found boolean')"}}, }); SEASTAR_THREAD_TEST_CASE(test_make_invalid_json_schema) { for (const auto& data : error_test_cases) { @@ -86,6 +98,8 @@ SEASTAR_THREAD_TEST_CASE(test_make_invalid_json_schema) { f.store, {pps::subject{"test"}, {data.def, pps::schema_type::json}}) .get(); + BOOST_CHECK_MESSAGE( + false, "terminated without an exception for invalid schema"); } catch (pps::exception const& e) { BOOST_CHECK_EQUAL(e.code(), data.err.code()); BOOST_WARN_MESSAGE( From ada73ff589b68adcc588456d1ac269cec55692c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Mon, 22 Jul 2024 14:31:55 +0100 Subject: [PATCH 3/5] cmake: import jsoncons To be used in the next commit. --- cmake/dependencies.cmake | 8 +++++++- licenses/third_party.md | 1 + src/v/pandaproxy/schema_registry/CMakeLists.txt | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 243c728007944..27d6c6b5f131e 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -59,6 +59,11 @@ fetch_dep(rapidjson TAG 14a5dd756e9bef26f9b53d3b4eb1b73c6a1794d5 SOURCE_SUBDIR redpanda_build) +FetchContent_Declare(jsoncons + URL https://github.com/danielaparker/jsoncons/archive/ffd2540bc9cfb54c16ef4d29d80622605d8dfbe8.tar.gz + URL_HASH MD5=8984d54668cdeb924fe1e37ea8dcc236 + OVERRIDE_FIND_PACKAGE) + fetch_dep(unordered_dense REPO https://github.com/redpanda-data/unordered_dense TAG 9338f301522a965309ecec58ce61f54a52fb5c22 @@ -142,7 +147,8 @@ FetchContent_MakeAvailable( wasmtime hdrhistogram ada - unordered_dense) + unordered_dense + jsoncons) add_library(Crc32c::crc32c ALIAS crc32c) add_library(aklomp::base64 ALIAS base64) diff --git a/licenses/third_party.md b/licenses/third_party.md index 7b9672fc419fe..b792ff0a92a8a 100644 --- a/licenses/third_party.md +++ b/licenses/third_party.md @@ -20,6 +20,7 @@ please keep this up to date with every new library use. | fmt | BSD | | HdrHistogram | BSD 2 | | hwloc | BSD | +| jsoncons | Boost Software License Version 1.0 | | krb5 | MIT | | libcxx | Apache License 2 | | libcxxabi | Apache License 2 | diff --git a/src/v/pandaproxy/schema_registry/CMakeLists.txt b/src/v/pandaproxy/schema_registry/CMakeLists.txt index 5b07df55d6030..631fab17db1d9 100644 --- a/src/v/pandaproxy/schema_registry/CMakeLists.txt +++ b/src/v/pandaproxy/schema_registry/CMakeLists.txt @@ -7,6 +7,7 @@ seastar_generate_swagger( find_package(Protobuf REQUIRED) find_package(Avro) +find_package(jsoncons REQUIRED) v_cc_library( NAME pandaproxy_schema_registry @@ -39,6 +40,7 @@ v_cc_library( Boost::iostreams protobuf::libprotobuf protobuf::libprotoc + jsoncons ) add_subdirectory(test) From adbe29dce996a9aa3f3a419492f97b093e0904aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Wed, 17 Jul 2024 12:19:46 +0100 Subject: [PATCH 4/5] sr/json: validate against metaschema with jsoncons rapidjson does not support validating schemas above draft 5, so previously we have been rewriting newer schemas in draft 4's metaschema in order to validate them. This is error prone and gets increasingly complicated for later drafts. Instead, this commit pulls in the jsoncons library which has support for validing schemas in drafts 4, 6, 7, 2019-09, 2020-12, which covers our expected use cases. --- src/v/pandaproxy/schema_registry/json.cc | 629 ++---------------- .../schema_registry/test/test_json_schema.cc | 3 +- 2 files changed, 38 insertions(+), 594 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index f3dd9d3734129..cc984a6fd1009 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -39,6 +39,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -86,559 +91,6 @@ ss::sstring json_schema_definition::name() const { return {_impl->name}; }; namespace { -// from https://json-schema.org/draft-04/schema, this is used to meta-validate a -// jsonschema. -constexpr std::string_view json_draft_4_metaschema = R"json( -{ - "id": "http://json-schema.org/draft-04/schema#", - "$schema": "http://json-schema.org/draft-04/schema#", - "description": "Core schema meta-schema", - "definitions": { - "schemaArray": { - "type": "array", - "minItems": 1, - "items": { "$ref": "#" } - }, - "positiveInteger": { - "type": "integer", - "minimum": 0 - }, - "positiveIntegerDefault0": { - "allOf": [ { "$ref": "#/definitions/positiveInteger" }, { "default": 0 } ] - }, - "simpleTypes": { - "enum": [ "array", "boolean", "integer", "null", "number", "object", "string" ] - }, - "stringArray": { - "type": "array", - "items": { "type": "string" }, - "minItems": 1, - "uniqueItems": true - } - }, - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "$schema": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "default": {}, - "multipleOf": { - "type": "number", - "minimum": 0, - "exclusiveMinimum": true - }, - "maximum": { - "type": "number" - }, - "exclusiveMaximum": { - "type": "boolean", - "default": false - }, - "minimum": { - "type": "number" - }, - "exclusiveMinimum": { - "type": "boolean", - "default": false - }, - "maxLength": { "$ref": "#/definitions/positiveInteger" }, - "minLength": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "pattern": { - "type": "string", - "format": "regex" - }, - "additionalItems": { - "anyOf": [ - { "type": "boolean" }, - { "$ref": "#" } - ], - "default": {} - }, - "items": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/schemaArray" } - ], - "default": {} - }, - "maxItems": { "$ref": "#/definitions/positiveInteger" }, - "minItems": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "uniqueItems": { - "type": "boolean", - "default": false - }, - "maxProperties": { "$ref": "#/definitions/positiveInteger" }, - "minProperties": { "$ref": "#/definitions/positiveIntegerDefault0" }, - "required": { "$ref": "#/definitions/stringArray" }, - "additionalProperties": { - "anyOf": [ - { "type": "boolean" }, - { "$ref": "#" } - ], - "default": {} - }, - "definitions": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "properties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "patternProperties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "dependencies": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/stringArray" } - ] - } - }, - "enum": { - "type": "array", - "minItems": 1, - "uniqueItems": true - }, - "type": { - "anyOf": [ - { "$ref": "#/definitions/simpleTypes" }, - { - "type": "array", - "items": { "$ref": "#/definitions/simpleTypes" }, - "minItems": 1, - "uniqueItems": true - } - ] - }, - "format": { "type": "string" }, - "allOf": { "$ref": "#/definitions/schemaArray" }, - "anyOf": { "$ref": "#/definitions/schemaArray" }, - "oneOf": { "$ref": "#/definitions/schemaArray" }, - "not": { "$ref": "#" } - }, - "dependencies": { - "exclusiveMaximum": [ "maximum" ], - "exclusiveMinimum": [ "minimum" ] - }, - "default": {} -} -)json"; - -/* - From https://json-schema.org/draft-06/schema, this is the draft6 metaschema - used to validate draft6 json schemas. - TODO It's implemented in the draft4 dialect, because the current version - of rapidjson only support draft4, change this when it's upgraded - For reference, this is the diff applied to the original metaschema: ---- draft6.json 2024-07-03 10:46:11.956695951 +0200 -+++ draft6.asdraft4.json 2024-07-05 15:55:10.472362720 +0200 -@@ -1,4 +1,4 @@ - { -- "$schema": "http://json-schema.org/draft-06/schema#", -- "$id": "http://json-schema.org/draft-06/schema#", -+ "$schema": "http://json-schema.org/draft-04/schema#", -+ "id": "http://json-schema.org/draft-06/schema#", - "title": "Core schema meta-schema", -@@ -65,3 +66,4 @@ - "type": "number", -- "exclusiveMinimum": 0 -+ "minimum": 0, -+ "exclusiveMinimum": true - }, -*/ -constexpr std::string_view json_draft_6_metaschema = R"json( -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "id": "http://json-schema.org/draft-06/schema#", - "title": "Core schema meta-schema", - "definitions": { - "schemaArray": { - "type": "array", - "minItems": 1, - "items": { "$ref": "#" } - }, - "nonNegativeInteger": { - "type": "integer", - "minimum": 0 - }, - "nonNegativeIntegerDefault0": { - "allOf": [ - { "$ref": "#/definitions/nonNegativeInteger" }, - { "default": 0 } - ] - }, - "simpleTypes": { - "enum": [ - "array", - "boolean", - "integer", - "null", - "number", - "object", - "string" - ] - }, - "stringArray": { - "type": "array", - "items": { "type": "string" }, - "uniqueItems": true, - "default": [] - } - }, - "type": ["object", "boolean"], - "properties": { - "$id": { - "type": "string", - "format": "uri-reference" - }, - "$schema": { - "type": "string", - "format": "uri" - }, - "$ref": { - "type": "string", - "format": "uri-reference" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "default": {}, - "examples": { - "type": "array", - "items": {} - }, - "multipleOf": { - "type": "number", - "minimum": 0, - "exclusiveMinimum": true - }, - "maximum": { - "type": "number" - }, - "exclusiveMaximum": { - "type": "number" - }, - "minimum": { - "type": "number" - }, - "exclusiveMinimum": { - "type": "number" - }, - "maxLength": { "$ref": "#/definitions/nonNegativeInteger" }, - "minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, - "pattern": { - "type": "string", - "format": "regex" - }, - "additionalItems": { "$ref": "#" }, - "items": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/schemaArray" } - ], - "default": {} - }, - "maxItems": { "$ref": "#/definitions/nonNegativeInteger" }, - "minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, - "uniqueItems": { - "type": "boolean", - "default": false - }, - "contains": { "$ref": "#" }, - "maxProperties": { "$ref": "#/definitions/nonNegativeInteger" }, - "minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, - "required": { "$ref": "#/definitions/stringArray" }, - "additionalProperties": { "$ref": "#" }, - "definitions": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "properties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "patternProperties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "propertyNames": { "format": "regex" }, - "default": {} - }, - "dependencies": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/stringArray" } - ] - } - }, - "propertyNames": { "$ref": "#" }, - "const": {}, - "enum": { - "type": "array", - "minItems": 1, - "uniqueItems": true - }, - "type": { - "anyOf": [ - { "$ref": "#/definitions/simpleTypes" }, - { - "type": "array", - "items": { "$ref": "#/definitions/simpleTypes" }, - "minItems": 1, - "uniqueItems": true - } - ] - }, - "format": { "type": "string" }, - "allOf": { "$ref": "#/definitions/schemaArray" }, - "anyOf": { "$ref": "#/definitions/schemaArray" }, - "oneOf": { "$ref": "#/definitions/schemaArray" }, - "not": { "$ref": "#" } - }, - "default": {} -} -)json"; - -/* - From https://json-schema.org/draft-07/schema, this is the draft7 metaschema - used to validate draft7 json schemas. - TODO It's implemented in the draft4 dialect, because the current version - of rapidjson only support draft4, change this when it's upgraded - For reference, this is the diff applied to the original metaschema: ---- draft7.json 2024-07-02 09:53:23.943963373 +0200 -+++ draft7.asdraft4.json 2024-07-05 15:55:21.160409278 +0200 -@@ -1,4 +1,4 @@ - { -- "$schema": "http://json-schema.org/draft-07/schema#", -- "$id": "http://json-schema.org/draft-07/schema#", -+ "$schema": "http://json-schema.org/draft-04/schema#", -+ "id": "http://json-schema.org/draft-07/schema#", - "title": "Core schema meta-schema", -@@ -61,3 +62,3 @@ - }, -- "default": true, -+ "default": {}, - "readOnly": { -@@ -72,3 +73,3 @@ - "type": "array", -- "items": true -+ "items": {} - }, -@@ -76,3 +77,4 @@ - "type": "number", -- "exclusiveMinimum": 0 -+ "minimum": 0, -+ "exclusiveMinimum": true - }, -@@ -141,6 +143,6 @@ - "propertyNames": { "$ref": "#" }, -- "const": true, -+ "const": {}, - "enum": { - "type": "array", -- "items": true, -+ "items": {}, - "minItems": 1, -*/ - -constexpr std::string_view json_draft_7_metaschema = R"json( -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "id": "http://json-schema.org/draft-07/schema#", - "title": "Core schema meta-schema", - "definitions": { - "schemaArray": { - "type": "array", - "minItems": 1, - "items": { "$ref": "#" } - }, - "nonNegativeInteger": { - "type": "integer", - "minimum": 0 - }, - "nonNegativeIntegerDefault0": { - "allOf": [ - { "$ref": "#/definitions/nonNegativeInteger" }, - { "default": 0 } - ] - }, - "simpleTypes": { - "enum": [ - "array", - "boolean", - "integer", - "null", - "number", - "object", - "string" - ] - }, - "stringArray": { - "type": "array", - "items": { "type": "string" }, - "uniqueItems": true, - "default": [] - } - }, - "type": ["object", "boolean"], - "properties": { - "$id": { - "type": "string", - "format": "uri-reference" - }, - "$schema": { - "type": "string", - "format": "uri" - }, - "$ref": { - "type": "string", - "format": "uri-reference" - }, - "$comment": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "default": {}, - "readOnly": { - "type": "boolean", - "default": false - }, - "writeOnly": { - "type": "boolean", - "default": false - }, - "examples": { - "type": "array", - "items": {} - }, - "multipleOf": { - "type": "number", - "minimum": 0, - "exclusiveMinimum": true - }, - "maximum": { - "type": "number" - }, - "exclusiveMaximum": { - "type": "number" - }, - "minimum": { - "type": "number" - }, - "exclusiveMinimum": { - "type": "number" - }, - "maxLength": { "$ref": "#/definitions/nonNegativeInteger" }, - "minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, - "pattern": { - "type": "string", - "format": "regex" - }, - "additionalItems": { "$ref": "#" }, - "items": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/schemaArray" } - ], - "default": true - }, - "maxItems": { "$ref": "#/definitions/nonNegativeInteger" }, - "minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, - "uniqueItems": { - "type": "boolean", - "default": false - }, - "contains": { "$ref": "#" }, - "maxProperties": { "$ref": "#/definitions/nonNegativeInteger" }, - "minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, - "required": { "$ref": "#/definitions/stringArray" }, - "additionalProperties": { "$ref": "#" }, - "definitions": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "properties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "default": {} - }, - "patternProperties": { - "type": "object", - "additionalProperties": { "$ref": "#" }, - "propertyNames": { "format": "regex" }, - "default": {} - }, - "dependencies": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { "$ref": "#" }, - { "$ref": "#/definitions/stringArray" } - ] - } - }, - "propertyNames": { "$ref": "#" }, - "const": {}, - "enum": { - "type": "array", - "items": {}, - "minItems": 1, - "uniqueItems": true - }, - "type": { - "anyOf": [ - { "$ref": "#/definitions/simpleTypes" }, - { - "type": "array", - "items": { "$ref": "#/definitions/simpleTypes" }, - "minItems": 1, - "uniqueItems": true - } - ] - }, - "format": { "type": "string" }, - "contentMediaType": { "type": "string" }, - "contentEncoding": { "type": "string" }, - "if": { "$ref": "#" }, - "then": { "$ref": "#" }, - "else": { "$ref": "#" }, - "allOf": { "$ref": "#/definitions/schemaArray" }, - "anyOf": { "$ref": "#/definitions/schemaArray" }, - "oneOf": { "$ref": "#/definitions/schemaArray" }, - "not": { "$ref": "#" } - }, - "default": true -} -)json"; - ss::future<> check_references(sharded_store& store, canonical_schema schema) { for (const auto& ref : schema.def().refs()) { co_await store.is_subject_version_deleted(ref.sub, ref.version) @@ -702,34 +154,34 @@ struct pj { }; template -json::SchemaDocument const& get_metaschema() { +jsoncons::jsonschema::json_schema const& get_metaschema() { static auto const meteschema_doc = [] { - auto metaschema_str = [] { + auto metaschema = [] { switch (Dialect) { case json_schema_dialect::draft4: - return json_draft_4_metaschema; + return jsoncons::jsonschema::draft4::schema_draft4< + jsoncons::json>::get_schema(); case json_schema_dialect::draft6: - return json_draft_6_metaschema; + return jsoncons::jsonschema::draft6::schema_draft6< + jsoncons::json>::get_schema(); case json_schema_dialect::draft7: - return json_draft_7_metaschema; + return jsoncons::jsonschema::draft7::schema_draft7< + jsoncons::json>::get_schema(); } }(); - auto metaschema_json = json::Document{}; - metaschema_json.Parse(metaschema_str.data(), metaschema_str.size()); - vassert( - !metaschema_json.HasParseError(), "Malformed metaschema document"); - - return json::SchemaDocument{metaschema_json}; + // Throws if the metaschema can't be parsed (which should never happen + // and if it does, it would be detected by unit tests) + return jsoncons::jsonschema::make_json_schema(metaschema); }(); return meteschema_doc; } result validate_json_schema( - json_schema_dialect dialect, json::Document const& schema) { + json_schema_dialect dialect, const jsoncons::json& schema) { // validation pre-step: get metaschema for json draft - auto const& metaschema_doc = [=]() -> json::SchemaDocument const& { + auto const& metaschema_doc = [=]() -> const auto& { using enum json_schema_dialect; switch (dialect) { case draft4: @@ -742,31 +194,16 @@ result validate_json_schema( }(); // validation of schema: validate it against metaschema - auto validator = json::SchemaValidator{metaschema_doc}; - - if (!schema.Accept(validator)) { - // schema it's not a valid json schema for Dialect, according to - // metaschema - - auto error_loc_metaschema = json::StringBuffer{}; - auto error_loc_schema = json::StringBuffer{}; - validator.GetInvalidSchemaPointer().StringifyUriFragment( - error_loc_metaschema); - validator.GetInvalidDocumentPointer().StringifyUriFragment( - error_loc_schema); - auto invalid_keyword = validator.GetInvalidSchemaKeyword(); - + try { + // Throws when the schema is invalid with details about the failure + metaschema_doc.validate(schema); + } catch (const std::exception& e) { return error_info{ error_code::schema_invalid, fmt::format( - "Invalid json schema: '{}', invalid metaschema: '{}', invalid " - "keyword: '{}'", - std::string_view{ - error_loc_schema.GetString(), error_loc_schema.GetLength()}, - std::string_view{ - error_loc_metaschema.GetString(), - error_loc_metaschema.GetLength()}, - invalid_keyword)}; + "Invalid json schema: '{}'. Error: '{}'", + schema.to_string(), + e.what())}; } // schema is a syntactically valid json schema, where $schema == Dialect. @@ -775,7 +212,7 @@ result validate_json_schema( return outcome::success(); } -result try_validate_json_schema(json::Document const& schema) { +result try_validate_json_schema(const jsoncons::json& schema) { using enum json_schema_dialect; // no explicit $schema: try to validate from newest to oldest draft @@ -800,7 +237,8 @@ result try_validate_json_schema(json::Document const& schema) { result parse_json(iobuf buf) { // parse string in json document, check it's a valid json - auto schema_stream = json::chunked_input_stream{std::move(buf)}; + auto schema_stream = json::chunked_input_stream{ + buf.share(0, buf.size_bytes())}; auto schema = json::Document{}; if (schema.ParseStream(schema_stream).HasParseError()) { // not a valid json document, return error @@ -831,12 +269,19 @@ result parse_json(iobuf buf) { } } + // We use jsoncons for validating the schema against the metaschema as + // currently rapidjson doesn't support validating schemas newer than + // draft 5. + iobuf_istream is{std::move(buf)}; + auto jsoncons_schema = jsoncons::json::parse(is.istream()); auto validation_res = dialect.has_value() - ? validate_json_schema(dialect.value(), schema) - : try_validate_json_schema(schema); + ? validate_json_schema( + dialect.value(), jsoncons_schema) + : try_validate_json_schema(jsoncons_schema); if (validation_res.has_error()) { return validation_res.as_failure(); } + return {std::move(schema)}; } diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 5fb99f3960478..ac5c01921718a 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -64,8 +64,7 @@ static const auto error_test_cases = std::to_array({ R"({"type": "thisisnotapropertype"})", pps::error_info{ pps::error_code::schema_invalid, - "Invalid json schema: '#/type', invalid metaschema: '#/properties/type', " - "invalid keyword: 'anyOf'"}}, + R"(Invalid json schema: '{"type":"thisisnotapropertype"}'. Error: '/type: Must be valid against at least one schema, but found no matching schemas')"}}, error_test_case{ R"({"$schema": "unsupported_dialect"})", pps::error_info{ From ff9305ba9c60e2181e6cdef6c7761bbbf4f97774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Wed, 17 Jul 2024 12:22:26 +0100 Subject: [PATCH 5/5] sr/json: add support for 201909 and 202012 drafts This adds support for validating schemas of the 2019-09 and 2020-12 drafts. The assertion keywords introduced in these drafts are not yet validated for compatibility across successive schema versions. The implementation of these keywords is going to be implemented later. https://json-schema.org/draft/2019-09/release-notes https://json-schema.org/draft/2020-12/release-notes --- src/v/pandaproxy/schema_registry/json.cc | 34 +++++++++++++++++-- .../schema_registry/test/test_json_schema.cc | 2 ++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index cc984a6fd1009..73fba08a1b88e 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -109,6 +109,8 @@ enum class json_schema_dialect { draft4, draft6, draft7, + draft201909, + draft202012, }; constexpr std::string_view @@ -122,6 +124,10 @@ to_uri(json_schema_dialect draft, bool strip = false) { return "http://json-schema.org/draft-06/schema#"; case draft7: return "http://json-schema.org/draft-07/schema#"; + case draft201909: + return "https://json-schema.org/draft/2019-09/schema#"; + case draft202012: + return "https://json-schema.org/draft/2020-12/schema#"; } }(); @@ -139,6 +145,8 @@ constexpr std::optional from_uri(std::string_view uri) { .match_all(to_uri(draft4), to_uri(draft4, true), draft4) .match_all(to_uri(draft6), to_uri(draft6, true), draft6) .match_all(to_uri(draft7), to_uri(draft7, true), draft7) + .match_all(to_uri(draft201909), to_uri(draft201909, true), draft201909) + .match_all(to_uri(draft202012), to_uri(draft202012, true), draft202012) .default_match(std::nullopt); } @@ -167,6 +175,12 @@ jsoncons::jsonschema::json_schema const& get_metaschema() { case json_schema_dialect::draft7: return jsoncons::jsonschema::draft7::schema_draft7< jsoncons::json>::get_schema(); + case json_schema_dialect::draft201909: + return jsoncons::jsonschema::draft201909::schema_draft201909< + jsoncons::json>::get_schema(); + case json_schema_dialect::draft202012: + return jsoncons::jsonschema::draft202012::schema_draft202012< + jsoncons::json>::get_schema(); } }(); @@ -190,6 +204,10 @@ result validate_json_schema( return get_metaschema(); case draft7: return get_metaschema(); + case draft201909: + return get_metaschema(); + case draft202012: + return get_metaschema(); } }(); @@ -217,7 +235,7 @@ result try_validate_json_schema(const jsoncons::json& schema) { // no explicit $schema: try to validate from newest to oldest draft auto first_error = std::optional{}; - for (auto d : {draft7, draft6, draft4}) { + for (auto d : {draft202012, draft201909, draft7, draft6, draft4}) { auto res = validate_json_schema(d, schema); if (res.has_value()) { return outcome::success(); @@ -1255,7 +1273,19 @@ bool is_superset(json::Value const& older, json::Value const& newer) { "if", "then", "else", - // later drafts: + // draft 2019-09 unhandled keywords: + "$anchor", + "$recursiveRef", + "$recursiveAnchor", + "unevaluatedItems", + "unevaluatedProperties", + "dependentRequired", + "maxContains", + "minContains", + "deprecated", + // draft 2020-12 unhandled keywords: + "$dynamicRef", + "$dynamicAnchor", "prefixItems", }) { if ( diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index ac5c01921718a..bcc6f2b13f9be 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -169,6 +169,8 @@ static constexpr auto valid_test_cases = std::to_array({ R"json({"$schema": "http://json-schema.org/draft-07/schema"})json", R"json({"$schema": "http://json-schema.org/draft-06/schema"})json", R"json({"$schema": "http://json-schema.org/draft-04/schema"})json", + R"json({"$schema": "https://json-schema.org/draft/2019-09/schema"})json", + R"json({"$schema": "https://json-schema.org/draft/2020-12/schema"})json", }); SEASTAR_THREAD_TEST_CASE(test_make_valid_json_schema) { for (const auto& data : valid_test_cases) {