From 4cd282e397289382b39c4fc93e9440ccfa3277a3 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 23 Aug 2024 17:27:30 +0200 Subject: [PATCH 1/5] schema_registry/json: convert if to switch future proof if some future json schema dialect appears (cherry picked from commit 339d4fd6bfbd4ae63ebb2951fc3fe1ace6baaad3) --- src/v/pandaproxy/schema_registry/json.cc | 40 ++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index a6654f42ceea..673028ad00f3 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -678,8 +678,16 @@ bool is_additional_superset( case additional_field_for::object: return "additionalProperties"; case additional_field_for::array: - return d == json_schema_dialect::draft202012 ? "items" - : "additionalItems"; + using enum json_schema_dialect; + switch (d) { + case draft4: + case draft6: + case draft7: + case draft201909: + return "additionalItems"; + case draft202012: + return "items"; + } } }; @@ -923,10 +931,28 @@ bool is_array_superset( // in draft 2020, "prefixItems" is used to represent tuples instead of an // overloaded "items" constexpr static auto get_tuple_items_kw = [](json_schema_dialect d) { - if (d == json_schema_dialect::draft202012) { + using enum json_schema_dialect; + switch (d) { + case draft4: + case draft6: + case draft7: + case draft201909: + return "items"; + case draft202012: return "prefixItems"; } - return "items"; + }; + constexpr static auto get_additional_items_kw = [](json_schema_dialect d) { + using enum json_schema_dialect; + switch (d) { + case draft4: + case draft6: + case draft7: + case draft201909: + return "additionalItems"; + case draft202012: + return "items"; + } }; // check if the input is an array schema or a tuple schema @@ -997,11 +1023,7 @@ bool is_array_superset( // excess elements with older["additionalItems"] auto older_additional_schema = get_object_or_empty( - ctx.older, - older, - ctx.older.dialect() == json_schema_dialect::draft202012 - ? "items" - : "additionalItems"); + ctx.older, older, get_additional_items_kw(ctx.older.dialect())); // check that all excess schemas are compatible with // older["additionalItems"] From aa2fdf1e32698a67fe5be9b03c2e9b6de91e0686 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Tue, 27 Aug 2024 22:11:13 +0200 Subject: [PATCH 2/5] schema_registry/json: is_array_superset refactor only a name change. lambda is_array -> is_tuple, is_tuple is the opposite of is_array. less confusing in the context of is_array_superset (cherry picked from commit 348f0041cfa003a5291f5b0ecfc29a7e91ea3257) --- src/v/pandaproxy/schema_registry/json.cc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 673028ad00f3..d50a8a9ab09f 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -956,25 +956,24 @@ bool is_array_superset( }; // check if the input is an array schema or a tuple schema - auto is_array = [](json_schema_dialect d, json::Value const& v) -> bool { + auto is_tuple = [](json_schema_dialect d, json::Value const& v) -> bool { auto tuple_items_it = v.FindMember(get_tuple_items_kw(d)); // default for items is `{}` so it's not a tuple schema // v is a tuple schema if "items" is an array of schemas - auto is_tuple = tuple_items_it != v.MemberEnd() - && tuple_items_it->value.IsArray(); - return !is_tuple; + return tuple_items_it != v.MemberEnd() + && tuple_items_it->value.IsArray(); }; - auto older_is_array = is_array(ctx.older.dialect(), older); - auto newer_is_array = is_array(ctx.newer.dialect(), newer); + auto older_is_tuple = is_tuple(ctx.older.dialect(), older); + auto newer_is_tuple = is_tuple(ctx.newer.dialect(), newer); - if (older_is_array != newer_is_array) { + if (older_is_tuple != newer_is_tuple) { // one is a tuple and the other is not. not compatible return false; } // both are tuples or both are arrays - if (older_is_array) { + if (!older_is_tuple) { // both are array, only "items" is relevant and it's a schema // note that "additionalItems" can be defined, but it's // not used by validation because every element is validated against From 80de2d63c63ed79f81335682e691fbd091b8a010 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Tue, 27 Aug 2024 22:57:19 +0200 Subject: [PATCH 3/5] schema_registry/json: fix is_object_required_superset the function was wrong, if the property has a "default" field the correct formulation is: is superset if newer["required"] has all the elements of older["required"] or more or every x element in older["reuired"] NOT in newer["required"], has a default value in older["properties"][x] chenged the whole function to just check older["required"] for this property (cherry picked from commit f81507bf0043bd22a1e4b199ac692131dc430481) --- src/v/pandaproxy/schema_registry/json.cc | 51 +++++++------------ .../schema_registry/test/test_json_schema.cc | 34 ++++++++++--- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index d50a8a9ab09f..0c0d6757e6a0 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -1132,50 +1132,35 @@ bool is_object_pattern_properties_superset( bool is_object_required_superset( context const& ctx, json::Value const& older, json::Value const& newer) { // to pass the check, a required property from newer has to be present in - // older, or if new it needs to be without a default value note that: + // older, or if new it needs to have a default value. + // note that: // 1. we check only required properties that are in both newer["properties"] // and older["properties"] // 2. there is no explicit check that older has an open content model // there might be a property name outside of (1) that could be rejected - // by older, if older["additionalProperties"] is false + // by update, if update["additionalProperties"] is false auto older_req = get_array_or_empty(older, "required"); auto newer_req = get_array_or_empty(newer, "required"); auto older_props = get_object_or_empty(ctx.older, older, "properties"); auto newer_props = get_object_or_empty(ctx.newer, newer, "properties"); - // TODO O(n^2) lookup that can be a set_intersection - auto newer_props_in_older = older_props - | std::views::transform([&](auto& n_v) { - return newer_props.FindMember(n_v.name); - }) - | std::views::filter( - [end = newer_props.end()](auto it) { - return it != end; - }); - // intersections of older["properties"] and newer["properties"] - for (auto prop_it : newer_props_in_older) { - auto& [name, newer_schema] = *prop_it; - - auto older_is_required = std::ranges::find(older_req, name) - != older_req.end(); - auto newer_is_required = std::ranges::find(newer_req, name) - != newer_req.end(); - if (older_is_required && !newer_is_required) { - // required property not present in newer, not compatible - return false; - } - - if (!older_is_required && newer_is_required) { - if (newer_schema.HasMember("default")) { - // newer required property with a default makes newer - // incompatible - return false; - } - } - } + // TODO O(n^2) lookup that can be a set_intersection. + auto older_req_in_both_properties + = older_req | std::views::filter([&](json::Value const& o) { + return newer_props.HasMember(o) && older_props.HasMember(o); + }); - return true; + // for each element: + // in older.required? | in newer.required? | result + // yes | yes | yes + // yes | no | if it has "default" in older + // no | yes | yes + return std::ranges::all_of( + older_req_in_both_properties, [&](json::Value const& o) { + return std::ranges::find(newer_req, o) != newer_req.End() + || older_props[o].HasMember("default"); + }); } bool is_object_dependencies_superset( diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 8f68a6a8a3a7..366dc0df9134 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -466,14 +466,6 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": "object", "patternProperties": {"^a": {"type": "number"}}})", .reader_is_compatible_with_writer = false, }, - // object checks: required needs to be compatible - { - .reader_schema - = R"({"type": "object", "properties": {"a": {"type": "integer"}}})", - .writer_schema - = R"({"type": "object", "properties": {"a": {"type": "integer", "default": 10}}, "required": ["a"]})", - .reader_is_compatible_with_writer = false, - }, // object checks: dependencies removed { .reader_schema = R"( @@ -798,6 +790,32 @@ static constexpr auto compatibility_test_cases = std::to_array< "additionalProperties": false, "required": ["aaaa", "cccc"], "dependencies": {"a": ["b", "c", "d"], "b": {"type": "integer"}, "d": ["a"]} +})", + .reader_is_compatible_with_writer = true, + }, + // object checks: a new required property is ok if it has a default value + { + .reader_schema = R"( +{ + "type": "object", + "properties": { + "a": { + "type": "integer" + } + } +})", + .writer_schema = R"( +{ + "type": "object", + "properties": { + "a": { + "type": "integer", + "default": 10 + } + }, + "required": [ + "a" + ] })", .reader_is_compatible_with_writer = true, }, From 28e75b7e4f82028566619f083d1dd9fe05806b65 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Wed, 28 Aug 2024 23:46:19 +0200 Subject: [PATCH 4/5] schema_registry/json: fix is_array_superset in the case of tuples with different size of elements, excess elements needs to be compatible with the other schema "additionalItems". previously, this property was cheked only for excess elements in newer (cherry picked from commit b6201a1df581beac4939013030707395b02af7a6) --- src/v/pandaproxy/schema_registry/json.cc | 33 ++++--- .../schema_registry/test/test_json_schema.cc | 92 ++++++++++++++++++- 2 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 0c0d6757e6a0..715163331bea 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -1015,21 +1015,32 @@ bool is_array_superset( return false; } - // no mismatching elements. two possible cases - // 1. older_tuple_schema.Size() >= newer_tuple_schema.Size() -> - // compatible (implies newer_it==end()) - // 2. older_tuple_schema.Size() < newer_tuple_schema.Size() -> check - // excess elements with older["additionalItems"] + // no mismatching elements, and they don't have the same size. + // To be compatible, excess elements needs to be compatible with the other + // "additionalItems" schema auto older_additional_schema = get_object_or_empty( ctx.older, older, get_additional_items_kw(ctx.older.dialect())); + if (!std::all_of( + newer_it, newer_tuple_schema.end(), [&](json::Value const& n) { + return is_superset(ctx, older_additional_schema, n); + })) { + // newer has excess elements that are not compatible with + // older["additionalItems"] + return false; + } - // check that all excess schemas are compatible with - // older["additionalItems"] - return std::all_of( - newer_it, newer_tuple_schema.end(), [&](json::Value const& n) { - return is_superset(ctx, older_additional_schema, n); - }); + auto newer_additional_schema = get_object_or_empty( + ctx.newer, newer, get_additional_items_kw(ctx.newer.dialect())); + if (!std::all_of( + older_it, older_tuple_schema.end(), [&](json::Value const& o) { + return is_superset(ctx, o, newer_additional_schema); + })) { + // older has excess elements that are not compatible with + // newer["additionalItems"] + return false; + } + return true; } bool is_object_properties_superset( diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 366dc0df9134..5ab1241e58b2 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -555,7 +555,14 @@ static constexpr auto compatibility_test_cases = std::to_array< .reader_schema = R"({"type": "array", "additionalItems": {"type": "integer"}, "items": [{"type":"boolean"}]})", .writer_schema - = R"({"type": "array", "additionalItems": {"type": "integer"}, "items": [{"type":"boolean"}, {"type": "boolean"}]})", + = R"({"type": "array", "additionalItems": false, "items": [{"type":"boolean"}, {"type": "number"}]})", + .reader_is_compatible_with_writer = false, + }, + { + .reader_schema + = R"({"type": "array", "additionalItems": {"type": "number"}, "items": [{"type":"boolean"}, {"type": "integer"}]})", + .writer_schema + = R"({"type": "array", "additionalItems": {"type": "number"}, "items": [{"type":"boolean"}]})", .reader_is_compatible_with_writer = false, }, // combinators: "not" is required on both schema @@ -865,6 +872,89 @@ static constexpr auto compatibility_test_cases = std::to_array< })", .reader_is_compatible_with_writer = true, }, + // array checks: excess elements are absorbed by additionalItems + { + .reader_schema = R"( +{ + "type": "array", + "items": [ + { + "type": "boolean" + } + ], + "additionalItems": { + "type": "number" + } +})", + .writer_schema = R"( +{ + "type": "array", + "items": [ + { + "type": "boolean" + }, + { + "type": "integer" + } + ], + "additionalItems": false +})", + .reader_is_compatible_with_writer = true, + }, + { + .reader_schema = R"( +{ + "type": "array", + "items": [ + { + "type": "boolean" + }, + { + "type": "number" + } + ], + "additionalItems": {"type": "number"} +})", + .writer_schema = R"( +{ + "type": "array", + "items": [ + { + "type": "boolean" + } + ], + "additionalItems": { + "type": "integer" + } +})", + .reader_is_compatible_with_writer = true, + }, + { + .reader_schema = R"( +{ + "type": "array", + "items": [ + { + "type": "boolean" + }, + { + "type": "number" + } + ], + "additionalItems": false +})", + .writer_schema = R"( +{ + "type": "array", + "items": [ + { + "type": "boolean" + } + ], + "additionalItems": false +})", + .reader_is_compatible_with_writer = true, + }, // array checks: prefixItems/items are compatible across drafts { .reader_schema = R"({ From f77749f224d2b0b138f4d2fef6c8f954a3f8ea34 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Wed, 28 Aug 2024 12:20:14 +0200 Subject: [PATCH 5/5] schema_registry/json: remove compat checks for "patternProperites" the legacy software does not directly check patternProperties. this seems likely an oversight, but for now this check is removed (cherry picked from commit d67bbcc176261427576720a794c34829f0eb063b) --- src/v/pandaproxy/schema_registry/json.cc | 41 ++++--------------- .../schema_registry/test/test_json_schema.cc | 36 ++++++++++++---- 2 files changed, 36 insertions(+), 41 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/json.cc b/src/v/pandaproxy/schema_registry/json.cc index 715163331bea..adb7fb9118f9 100644 --- a/src/v/pandaproxy/schema_registry/json.cc +++ b/src/v/pandaproxy/schema_registry/json.cc @@ -1111,35 +1111,6 @@ bool is_object_properties_superset( return true; } -bool is_object_pattern_properties_superset( - context const& ctx, json::Value const& older, json::Value const& newer) { - // check that every pattern property in newer["patternProperties"] - // appears in older["patternProperties"] and is compatible with the schema - - // "patternProperties" is a map of - auto newer_pattern_properties = get_object_or_empty( - ctx.newer, newer, "patternProperties"); - auto older_pattern_properties = get_object_or_empty( - ctx.older, older, "patternProperties"); - - // TODO O(n^2) lookup - for (auto const& [pattern, schema] : newer_pattern_properties) { - // search for pattern in older_pattern_properties and check schemas - auto older_pp_it = older_pattern_properties.FindMember(pattern); - if (older_pp_it == older_pattern_properties.MemberEnd()) { - // pattern not in older["patternProperties"], not compatible - return false; - } - - if (!is_superset(ctx, older_pp_it->value, schema)) { - // not compatible - return false; - } - } - - return true; -} - bool is_object_required_superset( context const& ctx, json::Value const& older, json::Value const& newer) { // to pass the check, a required property from newer has to be present in @@ -1250,10 +1221,14 @@ bool is_object_superset( // newer) return false; } - if (!is_object_pattern_properties_superset(ctx, older, newer)) { - // pattern properties checks are not compatible - return false; - } + + // note: to match the behavior of the legacy software, + // ``` + // if(!is_object_pattern_properties_superset(ctx, older, newer)) + // return false; + // ``` + // is omitted + if (!is_object_required_superset(ctx, older, newer)) { // required properties are not compatible return false; diff --git a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc index 5ab1241e58b2..1e58d56d7eb6 100644 --- a/src/v/pandaproxy/schema_registry/test/test_json_schema.cc +++ b/src/v/pandaproxy/schema_registry/test/test_json_schema.cc @@ -458,14 +458,6 @@ static constexpr auto compatibility_test_cases = std::to_array< = R"({"type": "object", "properties": {"aaaa": {"type": "string"}}})", .reader_is_compatible_with_writer = false, }, - // object checks: patternProperties need to be compatible - { - .reader_schema - = R"({"type": "object", "patternProperties": {"^a": {"type": "integer"}}})", - .writer_schema - = R"({"type": "object", "patternProperties": {"^a": {"type": "number"}}})", - .reader_is_compatible_with_writer = false, - }, // object checks: dependencies removed { .reader_schema = R"( @@ -1220,6 +1212,34 @@ static constexpr auto compatibility_test_cases = std::to_array< .reader_is_compatible_with_writer = true, }, + // object: "patternProperties" is not involved in compat checks + { + .reader_schema = R"( +{ + "type": "object", + "patternProperties": { + "^a": { + "type": "boolean" + }, + "^b": { + "type": "integer" + } + } +})", + .writer_schema = R"( +{ + "type": "object", + "patternProperties": { + "^a": { + "type": "null" + }, + "^c": { + "type": "string" + } + } +})", + .reader_is_compatible_with_writer = true, + }, }); SEASTAR_THREAD_TEST_CASE(test_compatibility_check) { store_fixture f;