From 739649a9188d2a776dd152eb2ae9affb4ff2cc2a Mon Sep 17 00:00:00 2001 From: SeaRise Date: Wed, 13 Dec 2023 18:22:27 +0800 Subject: [PATCH] refine for const type and path --- dbms/src/Functions/FunctionsJson.h | 261 +++++++++++------- .../tests/gtest_json_contains_path.cpp | 84 +++--- dbms/src/TiDB/Decode/JsonBinary.cpp | 6 +- dbms/src/TiDB/Decode/JsonBinary.h | 4 +- 4 files changed, 218 insertions(+), 137 deletions(-) diff --git a/dbms/src/Functions/FunctionsJson.h b/dbms/src/Functions/FunctionsJson.h index e6ff838fc35..c0f89853f4b 100644 --- a/dbms/src/Functions/FunctionsJson.h +++ b/dbms/src/Functions/FunctionsJson.h @@ -1578,22 +1578,20 @@ class FunctionJsonContainsPath : public IFunction StringSources path_sources; path_sources.reserve(arguments.size() - 2); - bool paths_nullable = false; std::vector path_null_maps; path_null_maps.reserve(arguments.size() - 2); + bool is_all_path_const = true; for (size_t i = 2; i < arguments.size(); ++i) { const auto & path_col = block.getByPosition(arguments[i]).column; if (path_col->onlyNull()) { path_sources.push_back(nullptr); - paths_nullable = true; path_null_maps.push_back(nullptr); } else if (path_col->isColumnNullable()) { path_sources.push_back(createDynamicStringSource(*nested_block.getByPosition(arguments[i]).column)); - paths_nullable = true; const auto & path_column_nullable = static_cast(*path_col); path_null_maps.push_back(&path_column_nullable.getNullMapData()); } @@ -1602,6 +1600,7 @@ class FunctionJsonContainsPath : public IFunction path_sources.push_back(createDynamicStringSource(*nested_block.getByPosition(arguments[i]).column)); path_null_maps.push_back(nullptr); } + is_all_path_const = is_all_path_const && path_col->isColumnConst(); } if (json_col->isColumnNullable()) @@ -1610,53 +1609,40 @@ class FunctionJsonContainsPath : public IFunction if (type_col->isColumnNullable()) { const auto & type_column_nullable = static_cast(*type_col); - if (paths_nullable) - doExecute( - json_source, - json_column_nullable.getNullMapData(), - type_source, - type_column_nullable.getNullMapData(), - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); - else - doExecute( - json_source, - json_column_nullable.getNullMapData(), - type_source, - type_column_nullable.getNullMapData(), - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); + doExecuteCommon( + json_source, + json_column_nullable.getNullMapData(), + type_source, + type_column_nullable.getNullMapData(), + path_sources, + path_null_maps, + rows, + data_to, + vec_null_map); + } + else if (type_col->isColumnConst() && is_all_path_const) + { + doExecuteForTypeAndPathConst( + json_source, + json_column_nullable.getNullMapData(), + type_source, + path_sources, + rows, + data_to, + vec_null_map); } else { - if (paths_nullable) - doExecute( - json_source, - json_column_nullable.getNullMapData(), - type_source, - {}, - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); - else - doExecute( - json_source, - json_column_nullable.getNullMapData(), - type_source, - {}, - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); + doExecuteCommon( + json_source, + json_column_nullable.getNullMapData(), + type_source, + {}, + path_sources, + path_null_maps, + rows, + data_to, + vec_null_map); } } else @@ -1664,53 +1650,40 @@ class FunctionJsonContainsPath : public IFunction if (type_col->isColumnNullable()) { const auto & type_column_nullable = static_cast(*type_col); - if (paths_nullable) - doExecute( - json_source, - {}, - type_source, - type_column_nullable.getNullMapData(), - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); - else - doExecute( - json_source, - {}, - type_source, - type_column_nullable.getNullMapData(), - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); + doExecuteCommon( + json_source, + {}, + type_source, + type_column_nullable.getNullMapData(), + path_sources, + path_null_maps, + rows, + data_to, + vec_null_map); + } + else if (type_col->isColumnConst() && is_all_path_const) + { + doExecuteForTypeAndPathConst( + json_source, + {}, + type_source, + path_sources, + rows, + data_to, + vec_null_map); } else { - if (paths_nullable) - doExecute( - json_source, - {}, - type_source, - {}, - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); - else - doExecute( - json_source, - {}, - type_source, - {}, - path_sources, - path_null_maps, - rows, - data_to, - vec_null_map); + doExecuteCommon( + json_source, + {}, + type_source, + {}, + path_sources, + path_null_maps, + rows, + data_to, + vec_null_map); } } @@ -1722,8 +1695,8 @@ class FunctionJsonContainsPath : public IFunction } private: - template - void doExecute( + template + void doExecuteCommon( const std::unique_ptr & json_source, const NullMap & null_map_json, const std::unique_ptr & type_source, @@ -1777,13 +1750,10 @@ class FunctionJsonContainsPath : public IFunction auto & res = data_to[row]; // default 1. for (size_t i = 0; i < path_sources.size(); ++i) { - if constexpr (paths_nullable) + if (!path_sources[i] || (path_null_maps[i] && (*path_null_maps[i])[row])) { - if (!path_sources[i] || (path_null_maps[i] && (*path_null_maps[i])[row])) - { - null_map_to[row] = 1; - break; - } + null_map_to[row] = 1; + break; } assert(path_sources[i]); @@ -1817,6 +1787,99 @@ class FunctionJsonContainsPath : public IFunction FINISH_PER_ROW } +#undef SET_NULL_AND_CONTINUE + } + + template + void doExecuteForTypeAndPathConst( + const std::unique_ptr & json_source, + const NullMap & null_map_json, + const std::unique_ptr & type_source, + const StringSources & path_sources, + size_t rows, + ColumnUInt8::Container & data_to, + NullMap & null_map_to) const + { + // build contains_type for type const col first. + const auto & type_val = type_source->getWhole(); + std::string_view type{reinterpret_cast(type_val.data), type_val.size}; + bool is_contains_path_one = JsonBinary::isJSONContainsPathOne(type); + bool is_contains_path_all = JsonBinary::isJSONContainsPathAll(type); + if unlikely (!is_contains_path_one && !is_contains_path_all) + throw Exception( + fmt::format("The second argument can only be either 'one' or 'all' of function {}.", getName()), + ErrorCodes::ILLEGAL_COLUMN); + + // build path exprs for path const cols next. + std::vector> path_expr_containor_vecs; + path_expr_containor_vecs.reserve(path_sources.size()); + for (const auto & path_source : path_sources) + { + if (!path_source) // only null const + { + path_expr_containor_vecs.push_back({}); + } + else + { + const auto & path_val = path_source->getWhole(); + auto path_expr = JsonPathExpr::parseJsonPathExpr(StringRef{path_val.data, path_val.size}); + /// If path_expr failed to parse, throw exception + if unlikely (!path_expr) + throw Exception( + fmt::format("Illegal json path expression of function {}", getName()), + ErrorCodes::ILLEGAL_COLUMN); + auto path_expr_containor = std::make_unique(path_expr); + path_expr_containor_vecs.emplace_back(); + path_expr_containor_vecs.back().resize(1); + path_expr_containor_vecs.back()[0] = std::move(path_expr_containor); + } + } + assert(path_sources.size() == path_expr_containor_vecs.size()); + + for (size_t row = 0; row < rows; ++row) + { + if constexpr (is_json_nullable) + { + if (null_map_json[row]) + { + json_source->next(); + null_map_to[row] = 1; + continue; + } + } + + const auto & json_val = json_source->getWhole(); + JsonBinary json_binary{json_val.data[0], StringRef{&json_val.data[1], json_val.size - 1}}; + + auto & res = data_to[row]; // default 1. + for (const auto & path_expr_containor_vec : path_expr_containor_vecs) + { + if (path_expr_containor_vec.empty()) + { + null_map_to[row] = 1; + break; + } + + bool exists = !json_binary.extract(path_expr_containor_vec).empty(); + if (exists && is_contains_path_one) + { + res = 1; + break; + } + else if (!exists && is_contains_path_one) + { + res = 0; + } + else if (!exists && is_contains_path_all) + { + res = 0; + break; + } + } + + json_source->next(); + } + #undef SET_NULL_AND_CONTINUE } }; diff --git a/dbms/src/Functions/tests/gtest_json_contains_path.cpp b/dbms/src/Functions/tests/gtest_json_contains_path.cpp index 3fdc4416264..7894a437baf 100644 --- a/dbms/src/Functions/tests/gtest_json_contains_path.cpp +++ b/dbms/src/Functions/tests/gtest_json_contains_path.cpp @@ -39,21 +39,7 @@ TEST_F(TestJsonContainsPath, TestOnlyNull) try { size_t rows_count = 2; - ColumnWithTypeAndName json_column; - { - auto val = ColumnString::create(); - // clang-format off - const UInt8 empty_array[] = { - JsonBinary::TYPE_CODE_ARRAY, // array_type - 0x0, 0x0, 0x0, 0x0, // element_count - 0x8, 0x0, 0x0, 0x0}; // total_size - // clang-format on - val->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); - val->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); - ColumnUInt8::MutablePtr col_null_map = ColumnUInt8::create(rows_count, 0); - auto json_col = ColumnNullable::create(std::move(val), std::move(col_null_map)); - json_column = ColumnWithTypeAndName(std::move(json_col), makeNullable(std::make_shared())); - } + ColumnWithTypeAndName json_column = castStringToJson(createColumn>({"[]", "[]"})); auto type_column = createColumn>({"one", "one"}); ColumnWithTypeAndName path_column = createColumn>({"$", "$"}); ColumnWithTypeAndName path_column2 = createColumn>({"$.a", "$.a"}); @@ -88,30 +74,47 @@ try ASSERT_COLUMN_EQ( null_bool_const, executeFunction(func_name, json_column, type_column, null_string_const, path_column)); + + // type and path const. + ASSERT_COLUMN_EQ( + createColumn>({true, true}), + executeFunction( + func_name, + json_column, + createConstColumn(2, "one"), + createConstColumn(2, "$"), + null_string_const)); + ASSERT_COLUMN_EQ( + createColumn>({true, true}), + executeFunction( + func_name, + json_column, + createConstColumn(2, "one"), + createConstColumn(2, "$"), + only_null_const)); + ASSERT_COLUMN_EQ( + null_bool_const, + executeFunction( + func_name, + json_column, + createConstColumn(2, "one"), + null_string_const, + createConstColumn(2, "$"))); + ASSERT_COLUMN_EQ( + only_null_const, + executeFunction( + func_name, + json_column, + createConstColumn(2, "one"), + only_null_const, + createConstColumn(2, "$"))); } CATCH TEST_F(TestJsonContainsPath, TestNullable) try { - ColumnWithTypeAndName json_column; - { - auto val = ColumnString::create(); - // clang-format off - const UInt8 empty_array[] = { - JsonBinary::TYPE_CODE_ARRAY, // array_type - 0x0, 0x0, 0x0, 0x0, // element_count - 0x8, 0x0, 0x0, 0x0}; // total_size - // clang-format on - val->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); - val->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); - val->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); - val->insertData(reinterpret_cast(empty_array), sizeof(empty_array) / sizeof(UInt8)); - ColumnUInt8::MutablePtr col_null_map = ColumnUInt8::create(4, 0); - col_null_map->getData()[0] = 1; - auto json_col = ColumnNullable::create(std::move(val), std::move(col_null_map)); - json_column = ColumnWithTypeAndName(std::move(json_col), makeNullable(std::make_shared())); - } + ColumnWithTypeAndName json_column = castStringToJson(createColumn>({{}, "[]", "[]", "[]"})); auto type_column = createColumn>({"one", {}, "one", "one"}); ColumnWithTypeAndName path_column = createColumn>({"$", "$", {}, "$"}); @@ -153,6 +156,13 @@ try {castStringToJson(createColumn({json, json})), createColumn({type, type}), createConstColumn(2, path)})); + ASSERT_COLUMN_EQ( + createColumn({expect, expect}), + executeFunction( + func_name, + {castStringToJson(createColumn({json, json})), + createConstColumn(2, type), + createConstColumn(2, path)})); }; auto exec_assert2 = [&](const String & json, const String & type, const String & path1, const String & path2, bool expect) { @@ -204,6 +214,14 @@ try createColumn({type, type}), createColumn({path1, path1}), createConstColumn(2, path2)})); + ASSERT_COLUMN_EQ( + createColumn({expect, expect}), + executeFunction( + func_name, + {castStringToJson(createColumn({json, json})), + createConstColumn(2, type), + createConstColumn(2, path1), + createConstColumn(2, path2)})); }; exec_assert1("{}", "one", "$", true); diff --git a/dbms/src/TiDB/Decode/JsonBinary.cpp b/dbms/src/TiDB/Decode/JsonBinary.cpp index f469f437ef2..1d288acbc36 100644 --- a/dbms/src/TiDB/Decode/JsonBinary.cpp +++ b/dbms/src/TiDB/Decode/JsonBinary.cpp @@ -795,10 +795,10 @@ String JsonBinary::unquoteString(const StringRef & ref) return ref.toString(); } -std::vector JsonBinary::extract(std::vector & path_expr_container_vec) +std::vector JsonBinary::extract(const std::vector & path_expr_container_vec) { std::vector extracted_json_binary_vec; - for (auto & path_expr_container : path_expr_container_vec) + for (const auto & path_expr_container : path_expr_container_vec) { DupCheckSet dup_check_set = std::make_unique>(); const auto * first_path_ref = path_expr_container->firstRef(); @@ -808,7 +808,7 @@ std::vector JsonBinary::extract(std::vector & path_expr_container_vec, + const std::vector & path_expr_container_vec, JsonBinaryWriteBuffer & write_buffer) { auto extracted_json_binary_vec = extract(path_expr_container_vec); diff --git a/dbms/src/TiDB/Decode/JsonBinary.h b/dbms/src/TiDB/Decode/JsonBinary.h index 3e91be72489..de944fc2adb 100644 --- a/dbms/src/TiDB/Decode/JsonBinary.h +++ b/dbms/src/TiDB/Decode/JsonBinary.h @@ -148,11 +148,11 @@ class JsonBinary String toString() const; /// For test usage, not efficient at all void toStringInBuffer(JsonBinaryWriteBuffer & write_buffer) const; - std::vector extract(std::vector & path_expr_container_vec); + std::vector extract(const std::vector & path_expr_container_vec); /// Extract receives several path expressions as arguments, matches them in bj, and returns true if any match: /// Serialize final results in 'write_buffer' bool extract( - std::vector & path_expr_container_vec, + const std::vector & path_expr_container_vec, JsonBinaryWriteBuffer & write_buffer); UInt64 getDepth() const;