diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index f14f0c62debddc..196d16dbe58b94 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -85,12 +85,6 @@ namespace doris::vectorized { namespace { DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool is_nullable) { - if (type == ColumnObject::MOST_COMMON_TYPE_ID) { - // JSONB type MUST NOT wrapped in ARRAY column, it should be top level. - // So we ignored num_dimensions. - return is_nullable ? make_nullable(std::make_shared()) - : std::make_shared(); - } DataTypePtr result = DataTypeFactory::instance().create_data_type(type, is_nullable); for (size_t i = 0; i < num_dimensions; ++i) { result = std::make_shared(result); @@ -945,11 +939,12 @@ void ColumnObject::Subcolumn::get(size_t n, Field& res) const { res = Null(); return; } - if (is_finalized()) { - if (least_common_type.get_base_type_id() == TypeIndex::JSONB) { - // JsonbFiled is special case - res = JsonbField(); - } + + // JSONB is a special type, it's not a scalar type, we need to handle it specially + // 1. we try to get the JSONB Field from ColumnString which has no JSONB type info + // 2. Array of JSONB is a special type, we get from ColumnArray of ColumnString, should convert from string Field to JSONB Field + if (is_finalized() && least_common_type.get_base_type_id() != TypeIndex::JSONB) { + // common type to get the field value get_finalized_column().get(n, res); return; } @@ -965,11 +960,20 @@ void ColumnObject::Subcolumn::get(size_t n, Field& res) const { const auto& part = data[i]; const auto& part_type = data_types[i]; if (ind < part->size()) { - res = vectorized::remove_nullable(part_type)->get_default(); + auto non_nullable_type = vectorized::remove_nullable(part_type); + bool is_nested_array_of_jsonb = + non_nullable_type->equals(*NESTED_TYPE_AS_ARRAY_OF_JSONB); + + res = non_nullable_type->get_default(); part->get(ind, res); - Field new_field; - convert_field_to_type(res, *least_common_type.get(), &new_field); - res = new_field; + + if (is_nested_array_of_jsonb) { + convert_array_string_to_array_jsonb(res); + } else { + Field new_field; + convert_field_to_type(res, *least_common_type.get(), &new_field); + res = new_field; + } return; } @@ -1861,6 +1865,10 @@ const DataTypePtr ColumnObject::NESTED_TYPE = std::make_shared(std::make_shared( std::make_shared()))); +const DataTypePtr ColumnObject::NESTED_TYPE_AS_ARRAY_OF_JSONB = + std::make_shared(std::make_shared( + std::make_shared())); + DataTypePtr ColumnObject::get_root_type() const { return subcolumns.get_root()->data.get_least_common_type(); } @@ -2055,4 +2063,21 @@ bool ColumnObject::try_insert_default_from_nested(const Subcolumns::NodePtr& ent return true; } +void ColumnObject::Subcolumn::convert_array_string_to_array_jsonb(Field& array_field) { + if (array_field.is_null()) { + return; + } + if (array_field.get_type() != Field::Types::Array) { + return; + } + Field converted_res = Array(); + for (auto& item : array_field.get()) { + DCHECK(item.get_type() == Field::Types::String); + auto& string_item = item.get(); + Field jsonb_item = JsonbField(string_item.c_str(), string_item.size()); + converted_res.get().emplace_back(std::move(jsonb_item)); + } + array_field = std::move(converted_res); +} + } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 16fe3430313189..03e14054d2e7a9 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -97,6 +97,9 @@ class ColumnObject final : public COWHelper { constexpr static TypeIndex MOST_COMMON_TYPE_ID = TypeIndex::JSONB; // Nullable(Array(Nullable(Object))) const static DataTypePtr NESTED_TYPE; + // Array(Nullable(Jsonb)) + const static DataTypePtr NESTED_TYPE_AS_ARRAY_OF_JSONB; + // Finlize mode for subcolumns, write mode will estimate which subcolumns are sparse columns(too many null values inside column), // merge and encode them into a shared column in root column. Only affects in flush block to segments. // Otherwise read mode should be as default mode. @@ -177,6 +180,9 @@ class ColumnObject final : public COWHelper { void add_new_column_part(DataTypePtr type); + /// Converts Array to Array for special case handling + static void convert_array_string_to_array_jsonb(Field& array_field); + friend class ColumnObject; private: diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index ecbce03ba6b10a..9141bb95862180 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -33,6 +33,7 @@ #include "common/exception.h" #include "common/status.h" #include "util/bitmap_value.h" +#include "util/jsonb_document.h" #include "util/jsonb_writer.h" #include "vec/common/field_visitors.h" #include "vec/common/typeid_cast.h" @@ -111,6 +112,11 @@ class FieldVisitorToJsonb : public StaticVisitor { writer->writeString(x); writer->writeEndString(); } + void operator()(const JsonbField& x, JsonbWriter* writer) const { + JsonbDocument* doc; + THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(x.get_value(), x.get_size(), &doc)); + writer->writeValue(doc->getValue()); + } void operator()(const Array& x, JsonbWriter* writer) const; void operator()(const Tuple& x, JsonbWriter* writer) const { @@ -146,9 +152,6 @@ class FieldVisitorToJsonb : public StaticVisitor { void operator()(const Map& x, JsonbWriter* writer) const { throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); } - void operator()(const JsonbField& x, JsonbWriter* writer) const { - throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted"); - } }; void FieldVisitorToJsonb::operator()(const Array& x, JsonbWriter* writer) const { @@ -316,4 +319,4 @@ void convert_field_to_type(const Field& from_value, const IDataType& to_type, Fi return convert_field_to_typeImpl(from_value, to_type, from_type_hint, to); } } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 99f5f625971946..0e7db6e407b478 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -850,7 +850,7 @@ struct ConvertNothingToJsonb { } }; -template +template struct ConvertImplFromJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { @@ -897,6 +897,18 @@ struct ConvertImplFromJsonb { res[i] = 0; continue; } + + // if value is string, convert by parse, otherwise the result is null if ToDataType is not string + if (value->isString()) { + const auto* blob = static_cast(value); + const auto& data = blob->getBlob(); + size_t len = blob->getBlobLen(); + ReadBuffer rb((char*)(data), len); + bool parsed = try_parse_impl(res[i], rb, context); + null_map[i] = !parsed; + continue; + } + if constexpr (type_index == TypeIndex::UInt8) { // cast from json value to boolean type if (value->isTrue()) { @@ -1991,22 +2003,22 @@ class FunctionCast final : public IFunctionBase { bool jsonb_string_as_string) const { switch (to_type->get_type_id()) { case TypeIndex::UInt8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int8: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int16: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int32: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Int128: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::Float64: - return &ConvertImplFromJsonb::execute; + return &ConvertImplFromJsonb::execute; case TypeIndex::String: if (!jsonb_string_as_string) { - // Conversion from String through parsing. return &ConvertImplGenericToString::execute2; } else { return ConvertImplGenericFromJsonb::execute; diff --git a/be/src/vec/json/json_parser.cpp b/be/src/vec/json/json_parser.cpp index f6e8a65cc08c71..e031f168820156 100644 --- a/be/src/vec/json/json_parser.cpp +++ b/be/src/vec/json/json_parser.cpp @@ -59,8 +59,14 @@ void JSONDataParser::traverse(const Element& element, ParseContext& if (element.isObject()) { traverseObject(element.getObject(), ctx); } else if (element.isArray()) { + if (ctx.has_nested_in_flatten) { + throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, + "Nesting of array in Nested array within variant subcolumns is " + "currently not supported."); + } has_nested = false; check_has_nested_object(element); + ctx.has_nested_in_flatten = has_nested && ctx.enable_flatten_nested; if (has_nested && !ctx.enable_flatten_nested) { // Parse nested arrays to JsonbField JsonbWriter writer; @@ -71,6 +77,8 @@ void JSONDataParser::traverse(const Element& element, ParseContext& } else { traverseArray(element.getArray(), ctx); } + // we should set has_nested_in_flatten to false when traverse array finished for next array otherwise it will be true for next array + ctx.has_nested_in_flatten = false; } else { ctx.paths.push_back(ctx.builder.get_parts()); ctx.values.push_back(getValueAsField(element)); @@ -137,6 +145,7 @@ template void JSONDataParser::traverseArray(const JSONArray& array, ParseContext& ctx) { /// Traverse elements of array and collect an array of fields by each path. ParseArrayContext array_ctx; + array_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; array_ctx.total_size = array.size(); for (auto it = array.begin(); it != array.end(); ++it) { traverseArrayElement(*it, array_ctx); @@ -162,8 +171,9 @@ template void JSONDataParser::traverseArrayElement(const Element& element, ParseArrayContext& ctx) { ParseContext element_ctx; + element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; traverse(element, element_ctx); - auto& [_, paths, values, flatten_nested] = element_ctx; + auto& [_, paths, values, flatten_nested, has_nested] = element_ctx; size_t size = paths.size(); size_t keys_to_update = ctx.arrays_by_path.size(); for (size_t i = 0; i < size; ++i) { diff --git a/be/src/vec/json/json_parser.h b/be/src/vec/json/json_parser.h index c1815ae5be4d50..401f225d000e2d 100644 --- a/be/src/vec/json/json_parser.h +++ b/be/src/vec/json/json_parser.h @@ -148,6 +148,7 @@ class JSONDataParser { std::vector paths; std::vector values; bool enable_flatten_nested = false; + bool has_nested_in_flatten = false; }; using PathPartsWithArray = std::pair; using PathToArray = phmap::flat_hash_map; @@ -157,6 +158,7 @@ class JSONDataParser { size_t total_size = 0; PathToArray arrays_by_path; KeyToSizes nested_sizes_by_key; + bool has_nested_in_flatten = false; }; void traverse(const Element& element, ParseContext& ctx); void traverseObject(const JSONObject& object, ParseContext& ctx); diff --git a/be/test/vec/columns/column_object_test.cpp b/be/test/vec/columns/column_object_test.cpp index a7498e82e87824..21c533e63d653f 100644 --- a/be/test/vec/columns/column_object_test.cpp +++ b/be/test/vec/columns/column_object_test.cpp @@ -165,4 +165,134 @@ TEST_F(ColumnObjectTest, test_pop_back_multiple_types) { EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nothing"); } +TEST_F(ColumnObjectTest, test_nested_array_of_jsonb_get) { + // Test case: Create a ColumnObject with subcolumn type Array + + // Create a ColumnObject with subcolumns + auto variant_column = ColumnObject::create(true); + + // Add subcolumn with path "nested.array" + variant_column->add_sub_column(PathInData("nested.array"), 0); + + // Get the subcolumn and manually set its type to Array + auto* subcolumn = variant_column->get_subcolumn(PathInData("nested.array")); + ASSERT_NE(subcolumn, nullptr); + + // Create test data: Array of strings + Field array_of_strings = Array(); + + // Add string elements to the array + std::string test_data1 = R"("a")"; + std::string test_data2 = R"(b)"; + + array_of_strings.get().emplace_back(test_data1); + array_of_strings.get().emplace_back(test_data2); + + // Insert the array field into the subcolumn + subcolumn->insert(array_of_strings); + + // Test 1: the column and test get method + { + EXPECT_TRUE(variant_column->is_finalized()); + // check the subcolumn get method + Field result; + EXPECT_NO_THROW(subcolumn->get(0, result)); + + // Verify the result is still an array + EXPECT_EQ(result.get_type(), doris::vectorized::Field::Types::Array); + + const auto& result_array = result.get(); + EXPECT_EQ(result_array.size(), 2); + + // Check that all elements are JSONB fields + for (const auto& item : result_array) { + EXPECT_EQ(item.get_type(), doris::vectorized::Field::Types::String); + } + + // Verify string content is preserved + const auto& string1 = result_array[0].get(); + const auto& string2 = result_array[1].get(); + + EXPECT_EQ(string1, R"("a")"); // "\"a\"" + EXPECT_EQ(string2, R"(b)"); // "b" + } + + // Test 2: Test with a row of different type of array to test the subcolumn get method + { + // Add another row with different int array + Field int_array = Array(); + int_array.get().push_back(1); + int_array.get().push_back(2); + int_array.get().push_back(3); + + // and we should add more data to the subcolumn column + subcolumn->insert(int_array); + + EXPECT_FALSE(variant_column->is_finalized()); + // check the subcolumn get method + Field result; + EXPECT_NO_THROW(subcolumn->get(1, result)); + EXPECT_EQ(result.get_type(), doris::vectorized::Field::Types::Array); + const auto& result_array = result.get(); + EXPECT_EQ(result_array.size(), 3); + EXPECT_EQ(result_array[0].get_type(), doris::vectorized::Field::Types::JSONB); + EXPECT_EQ(result_array[1].get_type(), doris::vectorized::Field::Types::JSONB); + EXPECT_EQ(result_array[2].get_type(), doris::vectorized::Field::Types::JSONB); + + // check the first row Field is a string + Field result_string; + EXPECT_NO_THROW(subcolumn->get(0, result_string)); + EXPECT_EQ(result_string.get_type(), doris::vectorized::Field::Types::Array); + const auto& result_string_array = result_string.get(); + EXPECT_EQ(result_string_array.size(), 2); + EXPECT_EQ(result_string_array[0].get_type(), doris::vectorized::Field::Types::JSONB); + EXPECT_EQ(result_string_array[1].get_type(), doris::vectorized::Field::Types::JSONB); + + // Finalize -> we should get the least common type of the subcolumn + variant_column->finalize(); + EXPECT_TRUE(variant_column->is_finalized()); + // we should get another subcolumn from the variant column + auto* subcolumn_finalized = variant_column->get_subcolumn(PathInData("nested.array")); + ASSERT_NE(subcolumn_finalized, nullptr); + // check the subcolumn_finalized get method + Field result1, result2; + EXPECT_NO_THROW(subcolumn_finalized->get(0, result1)); + EXPECT_NO_THROW(subcolumn_finalized->get(1, result2)); + + // Verify both results are arrays + EXPECT_EQ(result1.get_type(), doris::vectorized::Field::Types::Array); + EXPECT_EQ(result2.get_type(), doris::vectorized::Field::Types::Array); + + const auto& array1 = result1.get(); + const auto& array2 = result2.get(); + + EXPECT_EQ(array1.size(), 2); + EXPECT_EQ(array2.size(), 3); + + // Verify all elements are JSONB + for (const auto& item : array1) { + EXPECT_EQ(item.get_type(), doris::vectorized::Field::Types::JSONB); + } + for (const auto& item : array2) { + EXPECT_EQ(item.get_type(), doris::vectorized::Field::Types::JSONB); + } + } + + // Test 4: Test with empty array + { + auto* subcolumn = variant_column->get_subcolumn(PathInData("nested.array")); + ASSERT_NE(subcolumn, nullptr); + Field empty_array_field = Array(); + subcolumn->insert(empty_array_field); + + EXPECT_TRUE(variant_column->is_finalized()); + // check the subcolumn get method + Field result; + EXPECT_NO_THROW(subcolumn->get(2, result)); + EXPECT_EQ(result.get_type(), doris::vectorized::Field::Types::Array); + const auto& result_array = result.get(); + EXPECT_EQ(result_array.size(), 0); + } +} + } // namespace doris::vectorized diff --git a/be/test/vec/function/cast/function_variant_cast_test.cpp b/be/test/vec/function/cast/function_variant_cast_test.cpp new file mode 100644 index 00000000000000..49a7dc49e15899 --- /dev/null +++ b/be/test/vec/function/cast/function_variant_cast_test.cpp @@ -0,0 +1,466 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "common/status.h" +#include "gtest/gtest_pred_impl.h" +#include "olap/field.h" +#include "runtime/define_primitive_type.h" +#include "runtime/primitive_type.h" +#include "runtime/runtime_state.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_object.h" +#include "vec/core/field.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_object.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { +static doris::vectorized::Field construct_variant_map( + const std::vector>& key_and_values) { + doris::vectorized::Field res = VariantMap(); + auto& object = res.get(); + for (const auto& [k, v] : key_and_values) { + PathInData path(k); + object.try_emplace(path, v); + } + return res; +} + +static auto construct_basic_varint_column() { + // 1. create an empty variant column + auto variant = ColumnObject::create(5); + + std::vector> data; + + // 2. subcolumn path + data.emplace_back("v.a", 20); + data.emplace_back("v.b", "20"); + data.emplace_back("v.c", 20); + data.emplace_back("v.f", 20); + data.emplace_back("v.e", "50"); + for (int i = 0; i < 5; ++i) { + auto field = construct_variant_map(data); + variant->try_insert(field); + } + + return variant; +} + +TEST(FunctionVariantCast, CastToVariant) { + // Test casting from basic types to variant + { + // Test Int32 to variant + auto int32_type = std::make_shared(); + auto variant_type = std::make_shared(); + auto int32_col = ColumnInt32::create(); + int32_col->insert(42); + int32_col->insert(100); + int32_col->insert(-1); + + ColumnsWithTypeAndName arguments {{int32_col->get_ptr(), int32_type, "int32_col"}, + {nullptr, variant_type, "variant_type"}}; + + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, variant_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, variant_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 3).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* variant_col = assert_cast(result_col.get()); + ASSERT_EQ(variant_col->size(), 3); + } + + // Test casting from string to variant + { + auto string_type = std::make_shared(); + auto variant_type = std::make_shared(); + auto string_col = ColumnString::create(); + string_col->insert_data("hello", 5); + string_col->insert_data("world", 5); + + ColumnsWithTypeAndName arguments {{string_col->get_ptr(), string_type, "string_col"}, + {nullptr, variant_type, "variant_type"}}; + + auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments, + make_nullable(variant_type)); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, variant_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 2).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* variant_col = + assert_cast(remove_nullable(result_col).get()); + ASSERT_EQ(variant_col->size(), 2); + } + + // Test casting from array to variant + { + auto array_type = std::make_shared(std::make_shared()); + auto variant_type = std::make_shared(); + auto array_col = + ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create()); + auto& data = assert_cast(array_col->get_data()); + auto& offsets = array_col->get_offsets(); + + data.insert(1); + data.insert(2); + data.insert(3); + offsets.push_back(3); + + ColumnsWithTypeAndName arguments {{array_col->get_ptr(), array_type, "array_col"}, + {nullptr, variant_type, "variant_type"}}; + + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, variant_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, variant_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* variant_col = + assert_cast(remove_nullable(result_col).get()); + ASSERT_EQ(variant_col->size(), 1); + } +} + +TEST(FunctionVariantCast, CastFromVariant) { + // Test casting from variant to basic types + { + auto variant_type = std::make_shared(); + auto int32_type = std::make_shared(); + auto variant_col = ColumnObject::create(true); + + // Create a variant column with integer values + variant_col->create_root(int32_type, ColumnInt32::create()); + MutableColumnPtr data = variant_col->get_root(); + data->insert(42); + data->insert(100); + data->insert(-1); + + ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"}, + {nullptr, int32_type, "int32_type"}}; + + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, int32_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, int32_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 3).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + // always nullable + const auto* int32_result = + assert_cast(remove_nullable(result_col).get()); + ASSERT_EQ(int32_result->size(), 3); + ASSERT_EQ(int32_result->get_element(0), 42); + ASSERT_EQ(int32_result->get_element(1), 100); + ASSERT_EQ(int32_result->get_element(2), -1); + } + + // Test casting from variant to string + { + auto variant_type = std::make_shared(); + auto string_type = std::make_shared(); + auto variant_col = ColumnObject::create(true); + + // Create a variant column with string values + variant_col->create_root(string_type, ColumnString::create()); + MutableColumnPtr data = variant_col->get_root(); + data->insert_data("hello", 5); + data->insert_data("world", 5); + + ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"}, + {nullptr, string_type, "string_type"}}; + + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, string_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, string_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 2).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* string_result = + assert_cast(remove_nullable(result_col).get()); + ASSERT_EQ(string_result->size(), 2); + ASSERT_EQ(string_result->get_data_at(0).to_string(), "hello"); + ASSERT_EQ(string_result->get_data_at(1).to_string(), "world"); + } + + // Test casting from variant to array + { + auto variant_type = std::make_shared(); + auto array_type = std::make_shared(std::make_shared()); + auto variant_col = ColumnObject::create(true); + + // Create a variant column with array values + variant_col->create_root( + array_type, + ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create())); + MutableColumnPtr data = variant_col->get_root(); + + Field a = Array {1, 2, 3}; + + data->insert(a); + + ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"}, + {nullptr, array_type, "array_type"}}; + + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, array_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, array_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* array_result = + assert_cast(remove_nullable(result_col).get()); + ASSERT_EQ(array_result->size(), 1); + const auto& result_data = assert_cast(array_result->get_data()); + ASSERT_EQ(result_data.size(), 3); + ASSERT_EQ(result_data.get_element(0), 1); + ASSERT_EQ(result_data.get_element(1), 2); + ASSERT_EQ(result_data.get_element(2), 3); + } +} + +TEST(FunctionVariantCast, CastVariantWithNull) { + auto variant_type = std::make_shared(); + auto int32_type = std::make_shared(); + auto nullable_int32_type = std::make_shared(int32_type); + + // Create a variant column with nullable integer values + auto variant_col = ColumnObject::create(true); + variant_col->create_root(nullable_int32_type, + ColumnNullable::create(ColumnInt32::create(), ColumnUInt8::create())); + MutableColumnPtr data = variant_col->get_root(); + + data->insert(42); + data->insert(Null()); + data->insert(100); + + ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"}, + {nullptr, nullable_int32_type, "nullable_int32_type"}}; + + variant_col->finalize(); + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, nullable_int32_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, nullable_int32_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 3).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* nullable_result = assert_cast(result_col.get()); + ASSERT_EQ(nullable_result->size(), 3); + + const auto& result_data = assert_cast(nullable_result->get_nested_column()); + const auto& result_null_map = nullable_result->get_null_map_data(); + + ASSERT_EQ(result_data.get_element(0), 42); + ASSERT_EQ(result_null_map[0], 0); + ASSERT_EQ(result_null_map[1], 1); + ASSERT_EQ(result_data.get_element(2), 100); +} + +TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) { + // Test case 1: variant.empty() branch + { + auto variant_type = std::make_shared(); + auto int32_type = std::make_shared(); + MutableColumnPtr root = ColumnInt32::create(); + root->insert(42); + vectorized::ColumnObject::Subcolumns dynamic_subcolumns; + dynamic_subcolumns.add( + vectorized::PathInData(ColumnObject::COLUMN_NAME_DUMMY), + vectorized::ColumnObject::Subcolumn {root->get_ptr(), int32_type, true, true}); + auto variant_col = ColumnObject::create(std::move(dynamic_subcolumns), true); + + variant_col->finalize(); + ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"}, + {nullptr, int32_type, "int32_type"}}; + + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, int32_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, int32_type, "result"}); + + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + // always nullable + const auto* int32_result = + assert_cast(remove_nullable(result_col).get()); + ASSERT_EQ(int32_result->size(), 1); + // because of variant.empty() we insert_default with data_type_to + ASSERT_EQ(int32_result->get_element(0), 0); + } + + // Test case 2: !data_type_to->is_nullable() && !WhichDataType(data_type_to).is_string() branch + { + // object has sparse column + auto int32_type = std::make_shared(); + auto variant_col = construct_basic_varint_column(); + auto variant_type = std::make_shared(); + + ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"}, + {nullptr, int32_type, "int32_type"}}; + + variant_col->finalize(); + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, int32_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, int32_type, "result"}); + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* nullable_result = assert_cast(result_col.get()); + ASSERT_EQ(nullable_result->size(), 1); + ASSERT_TRUE(nullable_result->is_null_at(0)); + } + + // Test case 3: WhichDataType(data_type_to).is_string() branch + { + // variant has sparse column + auto int32_type = std::make_shared(); + auto variant_col = construct_basic_varint_column(); + + auto string_type = std::make_shared(); + auto variant_type = std::make_shared(); + + ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"}, + {nullptr, string_type, "string_type"}}; + + variant_col->finalize(); + auto function = + SimpleFunctionFactory::instance().get_function("CAST", arguments, string_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, string_type, "result"}); + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* string_result = assert_cast(result_col.get()); + // just call ConvertImplGenericToString which will insert all source column data to ColumnString + ASSERT_EQ(string_result->size(), variant_col->size()); + ASSERT_EQ(string_result->get_data_at(0).to_string(), + "{\"v\":{\"a\":20,\"b\":\"20\",\"c\":20,\"e\":\"50\",\"f\":20}}"); + } + + // Test case 4: else branch (nullable type) + { + auto variant_col = construct_basic_varint_column(); + variant_col->finalize(); + auto nullable_variant_col = make_nullable(variant_col->get_ptr()); + + auto nullable_string_type = make_nullable(std::make_shared()); + auto variant_type = std::make_shared(); + auto nullable_variant_type = make_nullable(variant_type); + + ColumnsWithTypeAndName arguments { + {nullable_variant_col->get_ptr(), nullable_variant_type, "variant_col"}, + {nullptr, nullable_string_type, "nullable_string_type"}}; + + auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments, + nullable_string_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, nullable_string_type, "result"}); + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* nullable_result = assert_cast(result_col.get()); + ASSERT_EQ(nullable_result->size(), 1); + ASSERT_TRUE(nullable_result->is_null_at(1)); + } +} + +} // namespace doris::vectorized diff --git a/be/test/vec/jsonb/convert_field_to_type_test.cpp b/be/test/vec/jsonb/convert_field_to_type_test.cpp new file mode 100644 index 00000000000000..065d86c039cc11 --- /dev/null +++ b/be/test/vec/jsonb/convert_field_to_type_test.cpp @@ -0,0 +1,521 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/data_types/convert_field_to_type.cpp" + +#include + +#include +#include + +#include "runtime/jsonb_value.h" +#include "util/jsonb_document.h" +#include "util/jsonb_writer.h" +#include "vec/core/field.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_jsonb.h" +#include "vec/data_types/data_type_nullable.h" + +namespace doris::vectorized { + +class ConvertFieldToTypeTest : public ::testing::Test { +protected: + void SetUp() override {} +}; + +// Test FieldVisitorToJsonb with different field types using the same pattern as convert_field_to_typeImpl +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Null) { + JsonbWriter writer; + + // Test null field using Field::dispatch pattern + Field null_field; + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + null_field); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's a null value + ASSERT_TRUE(doc->getValue()->isNull()); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Int64) { + JsonbWriter writer; + + // Test Int64 field using Field::dispatch pattern + Int64 test_value = 12345; + Field int_field = test_value; + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + int_field); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's an integer value + ASSERT_TRUE(doc->getValue()->isInt64()); + ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), test_value); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_UInt64) { + JsonbWriter writer; + + // Test UInt64 field using Field::dispatch pattern + UInt64 test_value = 12345; + Field uint_field = test_value; + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + uint_field); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's an integer value + ASSERT_TRUE(doc->getValue()->isInt64()); + ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), static_cast(test_value)); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Float64) { + JsonbWriter writer; + + // Test Float64 field using Field::dispatch pattern + Float64 test_value = 123.456; + Field double_field = test_value; + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + double_field); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's a double value + ASSERT_TRUE(doc->getValue()->isDouble()); + ASSERT_DOUBLE_EQ(((const JsonbDoubleVal*)doc->getValue())->val(), test_value); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_String) { + JsonbWriter writer; + + // Test String field using Field::dispatch pattern + Field string_field = "hello world"; + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + string_field); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's a string value + ASSERT_TRUE(doc->getValue()->isString()); + const auto* string_val = static_cast(doc->getValue()); + std::string real_string(string_val->getBlob(), string_val->getBlobLen()); + ASSERT_EQ(real_string, string_field.get()); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_JsonbField) { + JsonbWriter writer; + JsonBinaryValue jsonb_value; + std::string test_data = R"({"a": ["1", "2"]})"; + THROW_IF_ERROR(jsonb_value.from_json_string(test_data.data(), test_data.size())); + Field jsonb_field_obj = JsonbField(jsonb_value.value(), jsonb_value.size()); + + // Test JsonbField using Field::dispatch pattern + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + jsonb_field_obj); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's an object value + ASSERT_TRUE(doc->getValue()->isObject()); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_Array) { + JsonbWriter writer; + + // Create an array with mixed types + Array array_field; + array_field.push_back(123); + array_field.push_back("hello"); + array_field.push_back(456.789); + + Field array_obj = array_field; + + // Test Array using Field::dispatch pattern + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + array_obj); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's an array value + ASSERT_TRUE(doc->getValue()->isArray()); + const ArrayVal& array = static_cast(*doc->getValue()); + ASSERT_EQ(array.numElem(), 3); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_NestedArray) { + JsonbWriter writer; + + // Create a nested array + Array inner_array; + inner_array.push_back(1); + inner_array.push_back(2); + + Array outer_array; + outer_array.push_back(inner_array); + outer_array.push_back("nested"); + + Field nested_array_obj = outer_array; + + // Test nested Array using Field::dispatch pattern + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + nested_array_obj); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's an array value + ASSERT_TRUE(doc->getValue()->isArray()); + const ArrayVal& array = static_cast(*doc->getValue()); + ASSERT_EQ(array.numElem(), 2); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_LargeInt) { + JsonbWriter writer; + + // Test Int128 field using Field::dispatch pattern + Int128 test_value = 1234567890123456789; + Field largeint_field = test_value; + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + largeint_field); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's an int128 value + ASSERT_TRUE(doc->getValue()->isInt128()); + ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), test_value); +} + +TEST_F(ConvertFieldToTypeTest, FieldVisitorToJsonb_UInt128) { + JsonbWriter writer; + + // Test UInt128 field using Field::dispatch pattern + UInt128 test_value = 1234567890123456789; + Field uint128_field = test_value; + Field::dispatch([&writer](const auto& value) { FieldVisitorToJsonb()(value, &writer); }, + uint128_field); + + auto* output = writer.getOutput(); + ASSERT_NE(output, nullptr); + ASSERT_GT(output->getSize(), 0); + + // Verify the output is valid JSONB + JsonbDocument* doc = nullptr; + auto status = + JsonbDocument::checkAndCreateDocument(output->getBuffer(), output->getSize(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + + // Verify it's an int128 value + ASSERT_TRUE(doc->getValue()->isInt128()); + ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), static_cast(test_value)); +} + +// Test convert_field_to_type function with JSONB type (similar to convert_field_to_typeImpl) +TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ToJsonb) { + DataTypeJsonb jsonb_type; + + // Test converting Int64 to JSONB + { + Int64 test_value = 12345; + Field int_field = test_value; + Field result; + + convert_field_to_type(int_field, jsonb_type, &result); + + ASSERT_EQ(result.get_type(), Field::Types::JSONB); + ASSERT_FALSE(result.is_null()); + + const JsonbField& jsonb_result = result.get(); + ASSERT_NE(jsonb_result.get_value(), nullptr); + ASSERT_GT(jsonb_result.get_size(), 0); + + // Verify the JSONB content + JsonbDocument* doc = nullptr; + auto status = JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(), + jsonb_result.get_size(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + ASSERT_TRUE(doc->getValue()->isInt64()); + ASSERT_EQ(((const JsonbIntVal*)doc->getValue())->val(), test_value); + } + + // Test converting String to JSONB + { + Field string_field = "hello world"; + Field result; + + convert_field_to_type(string_field, jsonb_type, &result); + + ASSERT_EQ(result.get_type(), Field::Types::JSONB); + ASSERT_FALSE(result.is_null()); + + const JsonbField& jsonb_result = result.get(); + ASSERT_NE(jsonb_result.get_value(), nullptr); + ASSERT_GT(jsonb_result.get_size(), 0); + + // Verify the JSONB content + JsonbDocument* doc = nullptr; + auto status = JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(), + jsonb_result.get_size(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + ASSERT_TRUE(doc->getValue()->isString()); + const auto* string_val = static_cast(doc->getValue()); + std::string real_string(string_val->getBlob(), string_val->getBlobLen()); + ASSERT_EQ(real_string, string_field.get()); + } + + // Test converting Array to JSONB + { + Array array_field; + array_field.push_back(1); + array_field.push_back("test"); + array_field.push_back(3.14); + + Field array_obj = array_field; + Field result; + + convert_field_to_type(array_obj, jsonb_type, &result); + + ASSERT_EQ(result.get_type(), Field::Types::JSONB); + ASSERT_FALSE(result.is_null()); + + const JsonbField& jsonb_result = result.get(); + ASSERT_NE(jsonb_result.get_value(), nullptr); + ASSERT_GT(jsonb_result.get_size(), 0); + + // Verify the JSONB content + JsonbDocument* doc = nullptr; + auto status = JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(), + jsonb_result.get_size(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + ASSERT_TRUE(doc->getValue()->isArray()); + const ArrayVal& array = static_cast(*doc->getValue()); + ASSERT_EQ(array.numElem(), 3); + } + + // Test converting JSONB to JSONB (should be no-op) + { + JsonbWriter test_writer; + test_writer.writeStartObject(); + test_writer.writeKey("key"); + test_writer.writeString("value"); + test_writer.writeEndObject(); + + auto* test_output = test_writer.getOutput(); + JsonbField original_jsonb(test_output->getBuffer(), test_output->getSize()); + Field jsonb_field = original_jsonb; + Field result; + + convert_field_to_type(jsonb_field, jsonb_type, &result); + + ASSERT_EQ(result.get_type(), Field::Types::JSONB); + ASSERT_FALSE(result.is_null()); + + const JsonbField& jsonb_result = result.get(); + ASSERT_NE(jsonb_result.get_value(), nullptr); + ASSERT_EQ(jsonb_result.get_size(), original_jsonb.get_size()); + ASSERT_EQ(memcmp(jsonb_result.get_value(), original_jsonb.get_value(), + original_jsonb.get_size()), + 0); + } +} + +// Test convert_field_to_type with nullable JSONB type +TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ToNullableJsonb) { + auto nullable_jsonb_type = + std::make_shared(std::make_shared()); + + // Test converting null field + { + Field null_field; + Field result; + + convert_field_to_type(null_field, *nullable_jsonb_type, &result); + + ASSERT_TRUE(result.is_null()); + } + + // Test converting non-null field + { + Field string_field = "test string"; + Field result; + + convert_field_to_type(string_field, *nullable_jsonb_type, &result); + + ASSERT_EQ(result.get_type(), Field::Types::JSONB); + ASSERT_FALSE(result.is_null()); + + const JsonbField& jsonb_result = result.get(); + ASSERT_NE(jsonb_result.get_value(), nullptr); + ASSERT_GT(jsonb_result.get_size(), 0); + + // Verify the JSONB content + JsonbDocument* doc = nullptr; + auto status = JsonbDocument::checkAndCreateDocument(jsonb_result.get_value(), + jsonb_result.get_size(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument: " << status.to_string(); + ASSERT_NE(doc, nullptr); + ASSERT_TRUE(doc->getValue()->isString()); + const auto* string_val = static_cast(doc->getValue()); + std::string real_string(string_val->getBlob(), string_val->getBlobLen()); + ASSERT_EQ(real_string, string_field.get()); + } +} + +// Test convert_field_to_type with array of JSONB +TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ArrayToJsonb) { + auto array_jsonb_type = std::make_shared(std::make_shared()); + + // Create an array with mixed types that will be converted to JSONB + Array array_field; + array_field.push_back(123); + array_field.push_back("hello"); + array_field.push_back(456.789); + + Field array_obj = array_field; + Field result; + + convert_field_to_type(array_obj, *array_jsonb_type, &result); + + ASSERT_EQ(result.get_type(), Field::Types::Array); + ASSERT_FALSE(result.is_null()); + + const Array& result_array = result.get(); + ASSERT_EQ(result_array.size(), 3); + + // Verify each element is converted to JSONB + for (size_t i = 0; i < result_array.size(); ++i) { + ASSERT_EQ(result_array[i].get_type(), Field::Types::JSONB); + ASSERT_FALSE(result_array[i].is_null()); + + const auto& jsonb_element = result_array[i].get(); + ASSERT_NE(jsonb_element.get_value(), nullptr); + ASSERT_GT(jsonb_element.get_size(), 0); + + // Verify the JSONB content + JsonbDocument* doc = nullptr; + auto status = JsonbDocument::checkAndCreateDocument(jsonb_element.get_value(), + jsonb_element.get_size(), &doc); + ASSERT_TRUE(status.ok()) << "Failed to create JsonbDocument for element " << i << ": " + << status.to_string(); + ASSERT_NE(doc, nullptr); + } +} + +// Test error cases +TEST_F(ConvertFieldToTypeTest, ConvertFieldToType_ErrorCases) { + DataTypeJsonb jsonb_type; + + // Test with unsupported types (should throw exception) + { + Field tuple_field = Tuple(); + + EXPECT_THROW( + { + Field result; + convert_field_to_type(tuple_field, jsonb_type, &result); + }, + doris::Exception); + } +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/jsonb/json_parser_test.cpp b/be/test/vec/jsonb/json_parser_test.cpp new file mode 100644 index 00000000000000..924bd13197db7b --- /dev/null +++ b/be/test/vec/jsonb/json_parser_test.cpp @@ -0,0 +1,172 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/json/json_parser.h" + +#include + +#include + +#include "vec/core/field.h" +#include "vec/core/types.h" + +using doris::vectorized::JSONDataParser; +using doris::vectorized::SimdJSONParser; +using doris::vectorized::ParseConfig; + +TEST(JsonParserTest, ParseSimpleTypes) { + JSONDataParser parser; + ParseConfig config; + + // int + auto result = parser.parse("123", 3, config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + + // double + result = parser.parse("1.23", 4, config); + ASSERT_TRUE(result.has_value()); + + // bool + result = parser.parse("true", 4, config); + ASSERT_TRUE(result.has_value()); + + // null + result = parser.parse("null", 4, config); + ASSERT_TRUE(result.has_value()); + + // string + result = parser.parse("\"abc\"", 5, config); + ASSERT_TRUE(result.has_value()); +} + +TEST(JsonParserTest, ParseObjectAndArray) { + JSONDataParser parser; + ParseConfig config; + + // Object + auto result = parser.parse(R"({"a":1,"b":2})", 13, config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 2); + + // Array + result = parser.parse("[1,2,3]", 7, config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); +} + +TEST(JsonParserTest, ParseMultiLevelNestedArray) { + JSONDataParser parser; + ParseConfig config; + + auto result = parser.parse("[[1,2],[3,4]]", 13, config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + EXPECT_EQ(result->paths.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::vectorized::Field::Types::Array); + + result = parser.parse("[[[1],[2]],[[3],[4]]]", 21, config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + EXPECT_EQ(result->paths.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::vectorized::Field::Types::Array); + + result = parser.parse("[[1,2],[3],[4,5,6]]", 19, config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + EXPECT_EQ(result->paths.size(), 1); + + // Test complex nested structure + config.enable_flatten_nested = false; + std::string json1 = R"({"a":[[1,2],[3],[4,5,6]]})"; + // multi level nested array in object + result = parser.parse(json1.c_str(), json1.size(), config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + EXPECT_EQ(result->paths.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::vectorized::Field::Types::Array); + + std::string json = R"({"nested": [{"a": [1,2,3]}]})"; + // result should be jsonbField + result = parser.parse(json.c_str(), json.size(), config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + EXPECT_EQ(result->paths.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::vectorized::Field::Types::JSONB); + + // multi level nested array in nested array object + std::string json2 = R"({"a":[{"b":[[1,2,3]]}]})"; + result = parser.parse(json2.c_str(), json2.size(), config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + EXPECT_EQ(result->paths.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::vectorized::Field::Types::JSONB); + + // test flatten nested + config.enable_flatten_nested = true; + EXPECT_ANY_THROW(parser.parse(json.c_str(), json.size(), config)); + // test flatten nested with multi level nested array + // no throw because it is not nested object array + result = parser.parse(json1.c_str(), json1.size(), config); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result->values.size(), 1); + EXPECT_EQ(result->paths.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::vectorized::Field::Types::Array); + + EXPECT_ANY_THROW(parser.parse(json2.c_str(), json2.size(), config)); +} + +TEST(JsonParserTest, ParseNestedAndFlatten) { + JSONDataParser parser; + ParseConfig config; + config.enable_flatten_nested = true; + + std::string json = R"({"a":[{"b":1},{"b":2}]})"; + auto result = parser.parse(json.c_str(), json.size(), config); + ASSERT_TRUE(result.has_value()); + EXPECT_GT(result->values.size(), 0); + + config.enable_flatten_nested = false; + std::string json2 = R"({"a":[{"b":1},{"b":2}]})"; + result = parser.parse(json2.c_str(), json2.size(), config); + ASSERT_TRUE(result.has_value()); +} + +TEST(JsonParserTest, ParseInvalidJson) { + JSONDataParser parser; + ParseConfig config; + + auto result = parser.parse("{a:1}", 5, config); + ASSERT_FALSE(result.has_value()); + + result = parser.parse("", 0, config); + ASSERT_FALSE(result.has_value()); +} + +TEST(JsonParserTest, ParseCornerCases) { + JSONDataParser parser; + ParseConfig config; + + auto result = parser.parse("{}", 2, config); + ASSERT_TRUE(result.has_value()); + + result = parser.parse("[]", 2, config); + ASSERT_TRUE(result.has_value()); + + result = parser.parse(R"({"a":"\n\t"})", 12, config); + ASSERT_TRUE(result.has_value()); +} diff --git a/regression-test/data/variant_p0/desc.out b/regression-test/data/variant_p0/desc.out index 1eff52e44842f3..71f804cc25c5bb 100644 --- a/regression-test/data/variant_p0/desc.out +++ b/regression-test/data/variant_p0/desc.out @@ -16,7 +16,7 @@ v.b json Yes false \N NONE v.c.c smallint Yes false \N NONE v.c.e double Yes false \N NONE v.ddd.aaa tinyint Yes false \N NONE -v.ddd.mxmxm json Yes false \N NONE +v.ddd.mxmxm array Yes false \N NONE v.oooo.xxxx.xxx tinyint Yes false \N NONE v.xxxx text Yes false \N NONE @@ -37,7 +37,7 @@ v.b json Yes false \N NONE v.c.c smallint Yes false \N NONE v.c.e double Yes false \N NONE v.ddd.aaa tinyint Yes false \N NONE -v.ddd.mxmxm json Yes false \N NONE +v.ddd.mxmxm array Yes false \N NONE v.oooo.xxxx.xxx tinyint Yes false \N NONE v.xxxx text Yes false \N NONE @@ -66,7 +66,7 @@ v.b json Yes false \N NONE v.c.c smallint Yes false \N NONE v.c.e double Yes false \N NONE v.ddd.aaa tinyint Yes false \N NONE -v.ddd.mxmxm json Yes false \N NONE +v.ddd.mxmxm array Yes false \N NONE v.oooo.xxxx.xxx tinyint Yes false \N NONE v.xxxx text Yes false \N NONE diff --git a/regression-test/data/variant_p0/nested2.out b/regression-test/data/variant_p0/nested2.out new file mode 100644 index 00000000000000..c7790a107de3da --- /dev/null +++ b/regression-test/data/variant_p0/nested2.out @@ -0,0 +1,337 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_desc_1 -- +k bigint Yes true \N +v variant Yes false \N NONE +v.nested.a array Yes false \N NONE +v.nested.b array Yes false \N NONE +v.nested.c array Yes false \N NONE + +-- !sql_0 -- +1 {"nested":[{"a":1,"c":1.1},{"b":"1"}]} + +-- !sql_1 -- +[1, null] + +-- !sql_2 -- +[null, "1"] + +-- !sql_3 -- +[1.1, null] + +-- !sql_4 -- +[{"a":1,"c":1.1}, {"b":"1"}] + +-- !sql_8 -- +[1, null] 2 + +-- !sql_9 -- +[null, 1] 2 + +-- !sql_10 -- +[1, null] 2 + +-- !sql_11 -- +["1", null] 2 + +-- !sql_12 -- +[null, "1"] 2 + +-- !sql_13 -- +["1.1", null] 2 + +-- !sql_14 -- +[1, null] 2 + +-- !sql_15 -- +[null, 1] 2 + +-- !sql_16 -- +[1.1, null] 2 + +-- !sql_desc_2 -- +k bigint Yes true \N +v variant Yes false \N NONE +v.nested.a array Yes false \N NONE +v.nested.b array Yes false \N NONE +v.nested.c array Yes false \N NONE + +-- !sql_0 -- +1 {"nested":[{"a":1.0,"c":1.1},{"b":"1"}]} +2 {"nested":[{"a":2.5,"b":123.1}]} + +-- !sql_1 -- +[1, null] +[2.5] + +-- !sql_2 -- +[null, ""1""] +["123.1"] + +-- !sql_3 -- +[1.1, null] +\N + +-- !sql_4 -- +[{"a":1.0,"c":1.1}, {"b":"1"}] +[{"a":2.5,"b":123.1}] + +-- !sql_8 -- +[1, null] 2 +[2] 1 + +-- !sql_9 -- +[null, 1] 2 +[123] 1 + +-- !sql_10 -- +[1, null] 2 +\N \N + +-- !sql_11 -- +["1", null] 2 +["2.5"] 1 + +-- !sql_12 -- +[null, "1"] 2 +["123.1"] 1 + +-- !sql_13 -- +["1.1", null] 2 +\N \N + +-- !sql_14 -- +[1, null] 2 +[2.5] 1 + +-- !sql_15 -- +[null, 1] 2 +[123.1] 1 + +-- !sql_16 -- +[1.1, null] 2 +\N \N + +-- !sql_0 -- +1 {"nested":[{"a":1.0,"c":1.1},{"b":"1"}]} +2 {"nested":[{"a":2.5,"b":123.1}]} + +-- !sql_1 -- +[1, null] +[2.5] + +-- !sql_2 -- +[null, ""1""] +["123.1"] + +-- !sql_3 -- +[1.1, null] +[null] + +-- !sql_4 -- +[{"a":1.0,"c":1.1}, {"b":"1"}] +[{"a":2.5,"b":123.1}] + +-- !sql_8 -- +[1, null] 2 +[2] 1 + +-- !sql_9 -- +[null, 1] 2 +[123] 1 + +-- !sql_10 -- +[1, null] 2 +[null] 1 + +-- !sql_11 -- +["1", null] 2 +["2.5"] 1 + +-- !sql_12 -- +[null, "1"] 2 +["123.1"] 1 + +-- !sql_13 -- +["1.1", null] 2 +[null] 1 + +-- !sql_14 -- +[1, null] 2 +[2.5] 1 + +-- !sql_15 -- +[null, 1] 2 +[123.1] 1 + +-- !sql_16 -- +[1.1, null] 2 +[null] 1 + +-- !sql_desc_4 -- +k bigint Yes true \N +v variant Yes false \N NONE +v.nested.a array Yes false \N NONE +v.nested.b array Yes false \N NONE +v.nested.c array Yes false \N NONE + +-- !sql_0 -- +1 {"nested":[{"a":1,"b":1.1},{"a":"1","b":"1","c":"1"}]} + +-- !sql_1 -- +["1", ""1""] + +-- !sql_2 -- +["1.1", ""1""] + +-- !sql_3 -- +[null, "1"] + +-- !sql_4 -- +[{"a":1,"b":1.1}, {"a":"1","b":"1","c":"1"}] + +-- !sql_8 -- +[1, 1] 2 + +-- !sql_9 -- +[1, 1] 2 + +-- !sql_10 -- +[null, 1] 2 + +-- !sql_11 -- +["1", "1"] 2 + +-- !sql_12 -- +["1.1", "1"] 2 + +-- !sql_13 -- +[null, "1"] 2 + +-- !sql_14 -- +[1, 1] 2 + +-- !sql_15 -- +[1.1, 1] 2 + +-- !sql_16 -- +[null, 1] 2 + +-- !sql_desc_5 -- +k bigint Yes true \N +v variant Yes false \N NONE +v.nested.a array Yes false \N NONE +v.nested.b array Yes false \N NONE +v.nested.c array Yes false \N NONE + +-- !sql_0 -- +1 {"nested":[{"a":1,"b":1.1},{"a":"1","b":"1","c":"1"}]} +2 {"nested":[{"a":1,"c":1.1}]} + +-- !sql_1 -- +["1", ""1""] +["1"] + +-- !sql_2 -- +["1.1", ""1""] +\N + +-- !sql_3 -- +[null, ""1""] +["1.1"] + +-- !sql_4 -- +[{"a":1,"b":1.1}, {"a":"1","b":"1","c":"1"}] +[{"a":1,"c":1.1}] + +-- !sql_8 -- +[1, 1] 2 +[1] 1 + +-- !sql_9 -- +[1, 1] 2 +\N \N + +-- !sql_10 -- +[null, 1] 2 +[1] 1 + +-- !sql_11 -- +["1", "1"] 2 +["1"] 1 + +-- !sql_12 -- +["1.1", "1"] 2 +\N \N + +-- !sql_13 -- +[null, "1"] 2 +["1.1"] 1 + +-- !sql_14 -- +[1, 1] 2 +[1] 1 + +-- !sql_15 -- +[1.1, 1] 2 +\N \N + +-- !sql_16 -- +[null, 1] 2 +[1.1] 1 + +-- !sql_0 -- +1 {"nested":[{"a":1,"b":1.1},{"a":"1","b":"1","c":"1"}]} +2 {"nested":[{"a":1,"c":1.1}]} + +-- !sql_1 -- +["1", ""1""] +["1"] + +-- !sql_2 -- +["1.1", ""1""] +[null] + +-- !sql_3 -- +[null, ""1""] +["1.1"] + +-- !sql_4 -- +[{"a":1,"b":1.1}, {"a":"1","b":"1","c":"1"}] +[{"a":1,"c":1.1}] + +-- !sql_8 -- +[1, 1] 2 +[1] 1 + +-- !sql_9 -- +[1, 1] 2 +[null] 1 + +-- !sql_10 -- +[null, 1] 2 +[1] 1 + +-- !sql_11 -- +["1", "1"] 2 +["1"] 1 + +-- !sql_12 -- +["1.1", "1"] 2 +[null] 1 + +-- !sql_13 -- +[null, "1"] 2 +["1.1"] 1 + +-- !sql_14 -- +[1, 1] 2 +[1] 1 + +-- !sql_15 -- +[1.1, 1] 2 +[null] 1 + +-- !sql_16 -- +[null, 1] 2 +[1.1] 1 + diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 8ec837ff18bae2..8d87175d1d324c 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -14,6 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +import org.awaitility.Awaitility suite("regression_test_variant", "p0"){ diff --git a/regression-test/suites/variant_p0/nested2.groovy b/regression-test/suites/variant_p0/nested2.groovy new file mode 100644 index 00000000000000..8d48fcfce9b1c0 --- /dev/null +++ b/regression-test/suites/variant_p0/nested2.groovy @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// this test is used to test the type conflict of nested array +suite("variant_nested_type_conflict", "p0"){ + + try { + + def table_name = "var_nested_type_conflict" + sql "DROP TABLE IF EXISTS ${table_name}" + sql """set describe_extend_variant_column = true""" + + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really compaction in conflict case + properties("replication_num" = "1", "disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "true"); + """ + def sql_select_batch = { + qt_sql_0 """select * from ${table_name} order by k""" + + qt_sql_1 """select v['nested']['a'] from ${table_name} order by k""" + qt_sql_2 """select v['nested']['b'] from ${table_name} order by k""" + qt_sql_3 """select v['nested']['c'] from ${table_name} order by k""" + + qt_sql_4 """select v['nested'] from ${table_name} order by k""" + } + + def sql_test_cast_to_array = { + // test cast to array + qt_sql_8 """select cast(v['nested']['a'] as array), size(cast(v['nested']['a'] as array)) from ${table_name} order by k""" + qt_sql_9 """select cast(v['nested']['b'] as array), size(cast(v['nested']['b'] as array)) from ${table_name} order by k""" + qt_sql_10 """select cast(v['nested']['c'] as array), size(cast(v['nested']['c'] as array)) from ${table_name} order by k""" + + // test cast to array + qt_sql_11 """select cast(v['nested']['a'] as array), size(cast(v['nested']['a'] as array)) from ${table_name} order by k""" + qt_sql_12 """select cast(v['nested']['b'] as array), size(cast(v['nested']['b'] as array)) from ${table_name} order by k""" + qt_sql_13 """select cast(v['nested']['c'] as array), size(cast(v['nested']['c'] as array)) from ${table_name} order by k""" + + // test cast to array + qt_sql_14 """select cast(v['nested']['a'] as array), size(cast(v['nested']['a'] as array)) from ${table_name} order by k""" + qt_sql_15 """select cast(v['nested']['b'] as array), size(cast(v['nested']['b'] as array)) from ${table_name} order by k""" + qt_sql_16 """select cast(v['nested']['c'] as array), size(cast(v['nested']['c'] as array)) from ${table_name} order by k""" + + } + // insert Nested array in Nested array which is not supported + test { + sql """ + insert into ${table_name} values (1, '{"nested": [{"a": [1,2,3]}]}'); + """ + exception "Nesting of array in Nested array within variant subcolumns is currently not supported." + } + /// insert a array of object for a, b, c + // insert type conflict in multiple rows + sql """ + insert into ${table_name} values (1, '{"nested": [{"a": 1, "c": 1.1}, {"b": "1"}]}'); + """ + + // for cloud we should select first and then desc for syncing rowset to get latest schema + sql """ + select * from ${table_name} order by k limit 1; + """ + qt_sql_desc_1 """ + desc ${table_name}; + """ + // now select for a, b, c + sql_select_batch() + sql_test_cast_to_array() + /// insert a, b type changed to double + sql """ + insert into ${table_name} values (2, '{"nested": [{"a": 2.5, "b": 123.1}]}'); + """ + // for cloud we should select first and then desc for syncing rowset to get latest schema + sql """ + select * from ${table_name} order by k limit 1; + """ + qt_sql_desc_2 """ + desc ${table_name}; + """ + // now select for a, b, c + sql_select_batch() + sql_test_cast_to_array() + + // trigger and wait compaction + trigger_and_wait_compaction("${table_name}", "full") + + // now select for a, b, c + sql_select_batch() + sql_test_cast_to_array() + + sql """ truncate table ${table_name} """ + + + // insert type conflict in one row + sql """ + insert into ${table_name} values (1, '{"nested": [{"a": 1, "b": 1.1}, {"a": "1", "b": "1", "c": "1"}]}'); + """ + // for cloud we should select first and then desc for syncing rowset to get latest schema + sql """ + select * from ${table_name} order by k limit 1; + """ + qt_sql_desc_4 """ + desc ${table_name}; + """ + // now select for a, b, c + sql_select_batch() + sql_test_cast_to_array() + + // insert c type changed to double + sql """ + insert into ${table_name} values (2, '{"nested": [{"a": 1, "c": 1.1}]}'); + """ + // for cloud we should select first and then desc for syncing rowset to get latest schema + sql """ + select * from ${table_name} order by k limit 1; + """ + qt_sql_desc_5 """ + desc ${table_name}; + """ + // now select for a, b, c + sql_select_batch() + sql_test_cast_to_array() + + // trigger and wait compaction + trigger_and_wait_compaction("${table_name}", "full") + + // now select for a, b, c + sql_select_batch() + sql_test_cast_to_array() + + } finally { + } + +}