From 42cdcc665dc105fb857fea4ccc3f1ead40854e53 Mon Sep 17 00:00:00 2001 From: amory Date: Thu, 4 Sep 2025 14:31:49 +0800 Subject: [PATCH 1/7] [enhance](variant)enhance max_sparse_column_statistics_size for variant (#55124) delete be config for max_sparse_column_statistics_size and add it for variant properties --- be/src/common/config.cpp | 2 - be/src/common/config.h | 3 - be/src/common/consts.h | 1 + .../variant/variant_column_reader.cpp | 26 +- .../variant/variant_column_reader.h | 3 + .../segment_v2/variant_column_writer_impl.cpp | 2 +- .../segment_v2/variant_stats_calculator.cpp | 16 +- .../segment_v2/variant_stats_calculator.h | 4 +- be/src/olap/tablet_meta.cpp | 4 + be/src/olap/tablet_schema.cpp | 6 + be/src/olap/tablet_schema.h | 13 + be/src/vec/common/schema_util.cpp | 20 +- be/src/vec/common/schema_util.h | 4 +- .../rowset/segment_v2/mock/mock_segment.h | 3 + .../variant_column_writer_reader_test.cpp | 8 +- .../variant_stats_calculator_test.cpp | 41 ++- be/test/testutil/schema_utils.h | 1 + be/test/vec/common/schema_util_test.cpp | 7 +- .../org/apache/doris/catalog/ScalarType.java | 9 + .../org/apache/doris/catalog/VariantType.java | 21 +- .../java/org/apache/doris/catalog/Column.java | 8 +- .../doris/common/util/PropertyAnalyzer.java | 23 ++ .../nereids/parser/LogicalPlanBuilder.java | 10 +- .../apache/doris/nereids/types/DataType.java | 3 +- .../doris/nereids/types/VariantType.java | 30 +- .../org/apache/doris/qe/SessionVariable.java | 14 + .../doris/common/PropertyAnalyzerTest.java | 28 ++ .../functions/ComputeSignatureHelperTest.java | 308 ++++++++++++++++++ .../apache/doris/persist/ScalarTypeTest.java | 1 + gensrc/proto/olap_file.proto | 2 + gensrc/thrift/Descriptors.thrift | 1 + .../conf/regression-conf-custom.groovy | 1 + .../pipeline/p0/conf/regression-conf.groovy | 3 +- ...ariant_compaction_with_sparse_limit.groovy | 71 +++- ...ariant_compaction_with_sparse_limit.groovy | 21 +- 35 files changed, 639 insertions(+), 79 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 3d2492996076c1..096ada68cdf0ca 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1338,8 +1338,6 @@ DEFINE_Bool(enable_snapshot_action, "false"); DEFINE_mInt32(variant_max_merged_tablet_schema_size, "2048"); -DEFINE_mInt32(variant_max_sparse_column_statistics_size, "10000"); - DEFINE_mBool(enable_column_type_check, "true"); // 128 MB DEFINE_mInt64(local_exchange_buffer_mem_limit, "134217728"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 39d087cc515d45..3538fc4b38bf57 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1410,9 +1410,6 @@ DECLARE_Bool(enable_snapshot_action); // The max columns size for a tablet schema DECLARE_mInt32(variant_max_merged_tablet_schema_size); -// The max sparse column statistics size for a variant column -DECLARE_mInt32(variant_max_sparse_column_statistics_size); - DECLARE_mInt64(local_exchange_buffer_mem_limit); DECLARE_mInt64(enable_debug_log_timeout_secs); diff --git a/be/src/common/consts.h b/be/src/common/consts.h index 2ec9ae126796eb..32b4b1e7fa413d 100644 --- a/be/src/common/consts.h +++ b/be/src/common/consts.h @@ -46,5 +46,6 @@ static constexpr int MAX_DECIMALV2_SCALE = 9; static constexpr int MAX_DECIMALV3_PRECISION = MAX_DECIMAL256_PRECISION; static constexpr int MAX_DECIMALV3_SCALE = MAX_DECIMALV3_PRECISION; +static constexpr int DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE = 10000; } // namespace BeConsts } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp index ec0079edc64635..f9a2b21958e670 100644 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp @@ -74,9 +74,20 @@ bool VariantColumnReader::exist_in_sparse_column( } bool VariantColumnReader::is_exceeded_sparse_column_limit() const { - return !_statistics->sparse_column_non_null_size.empty() && - _statistics->sparse_column_non_null_size.size() >= - config::variant_max_sparse_column_statistics_size; + bool exceeded_sparse_column_limit = !_statistics->sparse_column_non_null_size.empty() && + _statistics->sparse_column_non_null_size.size() >= + _variant_sparse_column_statistics_size; + DBUG_EXECUTE_IF("exceeded_sparse_column_limit_must_be_false", { + if (exceeded_sparse_column_limit) { + throw doris::Exception( + ErrorCode::INTERNAL_ERROR, + "exceeded_sparse_column_limit_must_be_false, sparse_column_non_null_size: {} : " + " _variant_sparse_column_statistics_size: {}", + _statistics->sparse_column_non_null_size.size(), + _variant_sparse_column_statistics_size); + } + }) + return exceeded_sparse_column_limit; } int64_t VariantColumnReader::get_metadata_size() const { @@ -308,9 +319,7 @@ Status VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator, // Otherwise the prefix is not exist and the sparse column size is reached limit // which means the path maybe exist in sparse_column - bool exceeded_sparse_column_limit = !_statistics->sparse_column_non_null_size.empty() && - _statistics->sparse_column_non_null_size.size() >= - config::variant_max_sparse_column_statistics_size; + bool exceeded_sparse_column_limit = is_exceeded_sparse_column_limit(); // If the variant column has extracted columns and is a compaction reader, then read flat leaves // Otherwise read hierarchical data, since the variant subcolumns are flattened in schema_util::get_compaction_schema @@ -391,6 +400,11 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF _statistics = std::make_unique(); const ColumnMetaPB& self_column_pb = footer.columns(column_id); const auto& parent_index = opts.tablet_schema->inverted_indexs(self_column_pb.unique_id()); + // record variant_sparse_column_statistics_size from parent column + _variant_sparse_column_statistics_size = + opts.tablet_schema->column_by_uid(self_column_pb.unique_id()) + .variant_max_sparse_column_statistics_size(); + for (int32_t ordinal = 0; ordinal < footer.columns_size(); ++ordinal) { const ColumnMetaPB& column_pb = footer.columns(ordinal); // Find all columns belonging to the current variant column diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h index 21edf5c50bd134..f22809eed52cd2 100644 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h @@ -116,6 +116,9 @@ class VariantColumnReader : public ColumnReader { std::unique_ptr _statistics; // key: subcolumn path, value: subcolumn indexes std::unordered_map _variant_subcolumns_indexes; + // variant_sparse_column_statistics_size + size_t _variant_sparse_column_statistics_size = + BeConsts::DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE; }; class VariantRootColumnIterator : public ColumnIterator { diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp index 47890f75d04e04..6cb59d186da132 100644 --- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp +++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp @@ -349,7 +349,7 @@ Status VariantColumnWriterImpl::_process_sparse_column( it != sparse_data_paths_statistics.end()) { ++it->second; } else if (sparse_data_paths_statistics.size() < - config::variant_max_sparse_column_statistics_size) { + _tablet_column->variant_max_sparse_column_statistics_size()) { sparse_data_paths_statistics.emplace(path, 1); } } diff --git a/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp b/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp index aef71372666ed5..168efa547ec2a9 100644 --- a/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp +++ b/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp @@ -17,6 +17,8 @@ #include "olap/rowset/segment_v2/variant_stats_calculator.h" +#include + #include "common/logging.h" #include "util/simd/bits.h" #include "vec/columns/column_nullable.h" @@ -67,7 +69,13 @@ Status VariantStatsCaculator::calculate_variant_stats(const vectorized::Block* b // Check if this is a sparse column or sub column if (column_path.ends_with("__DORIS_VARIANT_SPARSE__")) { // This is a sparse column from variant column - _calculate_sparse_column_stats(*column, column_meta, row_pos, num_rows); + // get variant_max_sparse_column_statistics_size from tablet_schema + size_t variant_max_sparse_column_statistics_size = + _tablet_schema->column_by_uid(tablet_column.parent_unique_id()) + .variant_max_sparse_column_statistics_size(); + _calculate_sparse_column_stats(*column, column_meta, + variant_max_sparse_column_statistics_size, row_pos, + num_rows); } else { // This is a sub column from variant column _calculate_sub_column_stats(*column, column_meta, row_pos, num_rows); @@ -79,12 +87,14 @@ Status VariantStatsCaculator::calculate_variant_stats(const vectorized::Block* b void VariantStatsCaculator::_calculate_sparse_column_stats(const vectorized::IColumn& column, ColumnMetaPB* column_meta, + size_t max_sparse_column_statistics_size, size_t row_pos, size_t num_rows) { // Get or create variant statistics VariantStatisticsPB* stats = column_meta->mutable_variant_statistics(); // Use the same logic as the original calculate_variant_stats function - vectorized::schema_util::calculate_variant_stats(column, stats, row_pos, num_rows); + vectorized::schema_util::calculate_variant_stats( + column, stats, max_sparse_column_statistics_size, row_pos, num_rows); VLOG_DEBUG << "Sparse column stats updated, non-null size count: " << stats->sparse_column_non_null_size_size(); @@ -108,4 +118,4 @@ void VariantStatsCaculator::_calculate_sub_column_stats(const vectorized::IColum << " (added " << current_non_null_count << " from current block)"; } -} // namespace doris::segment_v2 \ No newline at end of file +} // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/variant_stats_calculator.h b/be/src/olap/rowset/segment_v2/variant_stats_calculator.h index 6ffd74036cb5ff..221c45b781dce8 100644 --- a/be/src/olap/rowset/segment_v2/variant_stats_calculator.h +++ b/be/src/olap/rowset/segment_v2/variant_stats_calculator.h @@ -45,7 +45,9 @@ class VariantStatsCaculator { // Helper method to calculate sparse column statistics void _calculate_sparse_column_stats(const vectorized::IColumn& column, - ColumnMetaPB* column_meta, size_t row_pos, size_t num_rows); + ColumnMetaPB* column_meta, + size_t max_sparse_column_statistics_size, size_t row_pos, + size_t num_rows); // Helper method to calculate sub column statistics void _calculate_sub_column_stats(const vectorized::IColumn& column, ColumnMetaPB* column_meta, diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 3352460fddbf02..0a246482788774 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -491,6 +491,10 @@ void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tco column->set_variant_enable_typed_paths_to_sparse( tcolumn.variant_enable_typed_paths_to_sparse); } + if (tcolumn.__isset.variant_max_sparse_column_statistics_size) { + column->set_variant_max_sparse_column_statistics_size( + tcolumn.variant_max_sparse_column_statistics_size); + } } void TabletMeta::remove_rowset_delete_bitmap(const RowsetId& rowset_id, const Version& version) { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index e20c0bbda0223b..6ddd37b2e0d783 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -613,6 +613,10 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { if (column.has_variant_max_subcolumns_count()) { _variant_max_subcolumns_count = column.variant_max_subcolumns_count(); } + if (column.has_variant_max_sparse_column_statistics_size()) { + _variant_max_sparse_column_statistics_size = + column.variant_max_sparse_column_statistics_size(); + } if (column.has_pattern_type()) { _pattern_type = column.pattern_type(); } @@ -704,6 +708,8 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { column->set_variant_max_subcolumns_count(_variant_max_subcolumns_count); column->set_pattern_type(_pattern_type); column->set_variant_enable_typed_paths_to_sparse(_variant_enable_typed_paths_to_sparse); + column->set_variant_max_sparse_column_statistics_size( + _variant_max_sparse_column_statistics_size); } void TabletColumn::add_sub_column(TabletColumn& sub_column) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index afcbaa0627f0b9..3c0c5e7c79e7b8 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -224,6 +224,12 @@ class TabletColumn : public MetadataAdder { void set_variant_max_subcolumns_count(int32_t variant_max_subcolumns_count) { _variant_max_subcolumns_count = variant_max_subcolumns_count; } + + void set_variant_max_sparse_column_statistics_size( + int32_t variant_max_sparse_column_statistics_size) { + _variant_max_sparse_column_statistics_size = variant_max_sparse_column_statistics_size; + } + int32_t variant_max_subcolumns_count() const { return _variant_max_subcolumns_count; } void set_variant_enable_typed_paths_to_sparse(bool variant_enable_typed_paths_to_sparse) { @@ -234,6 +240,10 @@ class TabletColumn : public MetadataAdder { return _variant_enable_typed_paths_to_sparse; } + int32_t variant_max_sparse_column_statistics_size() const { + return _variant_max_sparse_column_statistics_size; + } + private: int32_t _unique_id = -1; std::string _col_name; @@ -286,6 +296,9 @@ class TabletColumn : public MetadataAdder { int32_t _variant_max_subcolumns_count = 0; PatternTypePB _pattern_type = PatternTypePB::MATCH_NAME_GLOB; bool _variant_enable_typed_paths_to_sparse = false; + // set variant_max_sparse_column_statistics_size + int32_t _variant_max_sparse_column_statistics_size = + BeConsts::DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE; }; bool operator==(const TabletColumn& a, const TabletColumn& b); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 3dc6c4f69b6e67..176782dd86568c 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -885,7 +885,9 @@ Status check_path_stats(const std::vector& intputs, RowsetShare // In input rowsets, some rowsets may have statistics values exceeding the maximum limit, // which leads to inaccurate statistics - if (stats.size() > config::variant_max_sparse_column_statistics_size) { + if (stats.size() > output->tablet_schema() + ->column_by_uid(uid) + .variant_max_sparse_column_statistics_size()) { // When there is only one segment, we can ensure that the size of each path in output stats is accurate if (output->num_segments() == 1) { for (const auto& [path, size] : stats) { @@ -1010,7 +1012,8 @@ void get_compaction_subcolumns(TabletSchema::PathsSetInfo& paths_set_info, VLOG_DEBUG << "append typed column " << subpath; } else if (find_data_types == path_to_data_types.end() || find_data_types->second.empty() || sparse_paths.find(std::string(subpath)) != sparse_paths.end() || - sparse_paths.size() >= config::variant_max_sparse_column_statistics_size) { + sparse_paths.size() >= + parent_column->variant_max_sparse_column_statistics_size()) { TabletColumn subcolumn; subcolumn.set_name(column_name); subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); @@ -1107,8 +1110,9 @@ Status get_compaction_schema(const std::vector& rowsets, // Calculate statistics about variant data paths from the encoded sparse column void calculate_variant_stats(const IColumn& encoded_sparse_column, - segment_v2::VariantStatisticsPB* stats, size_t row_pos, - size_t num_rows) { + segment_v2::VariantStatisticsPB* stats, + size_t max_sparse_column_statistics_size, + size_t row_pos, size_t num_rows) { // Cast input column to ColumnMap type since sparse column is stored as a map const auto& map_column = assert_cast(encoded_sparse_column); @@ -1132,19 +1136,17 @@ void calculate_variant_stats(const IColumn& encoded_sparse_column, } // If path doesn't exist and we haven't hit the max statistics size limit, // add it with count 1 - else if (count_map.size() < config::variant_max_sparse_column_statistics_size) { + else if (count_map.size() < max_sparse_column_statistics_size) { count_map.emplace(sparse_path, 1); } } } - if (stats->sparse_column_non_null_size().size() > - config::variant_max_sparse_column_statistics_size) { + if (stats->sparse_column_non_null_size().size() > max_sparse_column_statistics_size) { throw doris::Exception( ErrorCode::INTERNAL_ERROR, "Sparse column non null size: {} is greater than max statistics size: {}", - stats->sparse_column_non_null_size().size(), - config::variant_max_sparse_column_statistics_size); + stats->sparse_column_non_null_size().size(), max_sparse_column_statistics_size); } } diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index fc5698bf966382..75f912a6ca2fb0 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -172,7 +172,9 @@ Status check_path_stats(const std::vector& intputs, RowsetShare // Calculate statistics about variant data paths from the encoded sparse column void calculate_variant_stats(const IColumn& encoded_sparse_column, - segment_v2::VariantStatisticsPB* stats, size_t row_pos, + segment_v2::VariantStatisticsPB* stats, + size_t max_sparse_column_statistics_size, + size_t row_pos, size_t num_rows); void get_field_info(const Field& field, FieldInfo* info); diff --git a/be/test/olap/rowset/segment_v2/mock/mock_segment.h b/be/test/olap/rowset/segment_v2/mock/mock_segment.h index 9cf443b2df0959..f4421c37f7d6f1 100644 --- a/be/test/olap/rowset/segment_v2/mock/mock_segment.h +++ b/be/test/olap/rowset/segment_v2/mock/mock_segment.h @@ -49,6 +49,9 @@ class MockSegment : public Segment { // Helper methods for test setup void add_column_uid_mapping(int32_t col_uid, int32_t footer_ordinal) { + _tablet_schema->_cols.push_back(std::make_shared()); + _tablet_schema->_cols.back()->set_unique_id(col_uid); + _tablet_schema->_field_uniqueid_to_index[col_uid] = footer_ordinal; _column_uid_to_footer_ordinal[col_uid] = footer_ordinal; } diff --git a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp index 8495e7c4e0bdc6..13ca9ebf4ab3e1 100644 --- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp +++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp @@ -483,15 +483,15 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) { // 13. check statistics size == limit auto& variant_stats = variant_column_reader->_statistics; EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() < - config::variant_max_sparse_column_statistics_size); - auto limit = config::variant_max_sparse_column_statistics_size - + variant_column_reader->_variant_sparse_column_statistics_size); + auto limit = variant_column_reader->_variant_sparse_column_statistics_size - variant_stats->sparse_column_non_null_size.size(); for (int i = 0; i < limit; ++i) { std::string key = parent_column.name_lower_case() + ".key10" + std::to_string(i); variant_stats->sparse_column_non_null_size[key] = 10000; } EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() == - config::variant_max_sparse_column_statistics_size); + variant_column_reader->_variant_sparse_column_statistics_size); EXPECT_TRUE(variant_column_reader->is_exceeded_sparse_column_limit()); ColumnIteratorUPtr it2; @@ -2500,4 +2500,4 @@ TEST_F(VariantColumnWriterReaderTest, test_read_with_checksum) { } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp b/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp index edbda0548254c9..5fbb2ed514d8a2 100644 --- a/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp +++ b/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp @@ -71,8 +71,10 @@ class VariantStatsCalculatorTest : public ::testing::Test { } // Helper method to create a footer column with path info - void add_footer_column_with_path(int32_t parent_unique_id, const std::string& path) { + void add_footer_column_with_path(int32_t parent_unique_id, const std::string& path, + uint32_t column_id = 0) { auto* column_meta = _footer->add_columns(); + column_meta->set_column_id(column_id); column_meta->set_unique_id(100 + _footer->columns_size()); auto* path_info = column_meta->mutable_column_path_info(); @@ -202,19 +204,26 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithSubColumn) { TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithSparseColumn) { // Setup footer with sparse column - add_footer_column_with_path(1, "sparse_col.__DORIS_VARIANT_SPARSE__"); + add_footer_column_with_path(-1, "sparse_col"); + add_footer_column_with_path(1, "sparse_col.__DORIS_VARIANT_SPARSE__", 1); // Create variant sparse column + TabletColumn parent_column = create_variant_column(1, "variant_col", -1, "sparse_col"); TabletColumn sparse_column = create_variant_column(2, "variant_col.__DORIS_VARIANT_SPARSE__", 1, "sparse_col.__DORIS_VARIANT_SPARSE__"); + _tablet_schema->append_column(parent_column); _tablet_schema->append_column(sparse_column); - std::vector column_ids = {0}; + std::vector column_ids = {0, 1}; VariantStatsCaculator calculator(_footer.get(), _tablet_schema, column_ids); // Create block with map column (sparse column) vectorized::Block block; auto map_column = create_map_column(); + auto string_column = vectorized::ColumnString::create(); + // add parant column to block + block.insert({std::move(string_column), std::make_shared(), + "variant_column"}); block.insert({std::move(map_column), std::make_shared( std::make_shared(), @@ -225,7 +234,7 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithSparseColumn) { EXPECT_TRUE(status.ok()); // Check that variant statistics were updated - auto& column_meta = _footer->columns(0); + auto& column_meta = _footer->columns(1); EXPECT_TRUE(column_meta.has_variant_statistics()); } @@ -275,10 +284,15 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMissingPathInFooter) } TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMultipleColumns) { + // parent column + add_footer_column_with_path(-1, "variant"); + TabletColumn parent_column = create_variant_column(1, "variant", -1, "variant"); + _tablet_schema->append_column(parent_column); + // Setup footer with multiple columns - add_footer_column_with_path(1, "sub1"); - add_footer_column_with_path(1, "sub2.__DORIS_VARIANT_SPARSE__"); - add_footer_column_with_path(2, "another_sub"); + add_footer_column_with_path(1, "sub1", 1); + add_footer_column_with_path(1, "sub2.__DORIS_VARIANT_SPARSE__", 2); + add_footer_column_with_path(2, "another_sub", 3); // Create multiple variant columns TabletColumn sub1 = create_variant_column(2, "variant.sub1", 1, "sub1"); @@ -290,12 +304,17 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMultipleColumns) { _tablet_schema->append_column(sparse); _tablet_schema->append_column(sub2); - std::vector column_ids = {0, 1, 2}; + std::vector column_ids = {0, 1, 2, 3}; VariantStatsCaculator calculator(_footer.get(), _tablet_schema, column_ids); // Create block with multiple columns vectorized::Block block; + // parent column + auto string_column = vectorized::ColumnString::create(); + string_column->insert_data("test", 4); + block.insert({std::move(string_column), std::make_shared(), + "variant_column"}); auto nullable_col1 = create_nullable_column({false, true, false}, {"a", "", "c"}); block.insert({std::move(nullable_col1), std::make_shared( @@ -320,9 +339,9 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMultipleColumns) { EXPECT_TRUE(status.ok()); // Check that statistics were updated for sub columns - EXPECT_EQ(_footer->columns(0).none_null_size(), 2); // sub1: 2 non-null - EXPECT_TRUE(_footer->columns(1).has_variant_statistics()); // sparse column - EXPECT_EQ(_footer->columns(2).none_null_size(), 1); // another_sub: 2 non-null + EXPECT_EQ(_footer->columns(1).none_null_size(), 2); // sub1: 2 non-null + EXPECT_TRUE(_footer->columns(2).has_variant_statistics()); // sparse column + EXPECT_EQ(_footer->columns(3).none_null_size(), 1); // another_sub: 2 non-null } TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithEmptyBlock) { diff --git a/be/test/testutil/schema_utils.h b/be/test/testutil/schema_utils.h index 400d3fcd652ce6..f294a86c462a9c 100644 --- a/be/test/testutil/schema_utils.h +++ b/be/test/testutil/schema_utils.h @@ -34,6 +34,7 @@ class SchemaUtils { column_pb->set_is_nullable(is_nullable); if (column_type == "VARIANT") { column_pb->set_variant_max_subcolumns_count(variant_max_subcolumns_count); + column_pb->set_variant_max_sparse_column_statistics_size(10000); } } diff --git a/be/test/vec/common/schema_util_test.cpp b/be/test/vec/common/schema_util_test.cpp index 3fa9037c72e0dd..b5b6c57ef1875d 100644 --- a/be/test/vec/common/schema_util_test.cpp +++ b/be/test/vec/common/schema_util_test.cpp @@ -1741,7 +1741,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns) { variant.set_unique_id(30); variant.set_variant_max_subcolumns_count(3); variant.set_aggregation_method(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE); - + variant.set_variant_max_sparse_column_statistics_size(10000); TabletSchemaSPtr schema = std::make_shared(); schema->append_column(variant); @@ -1798,7 +1798,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns) { output_schema = std::make_shared(); sparse_paths.clear(); - for (int i = 0; i < config::variant_max_sparse_column_statistics_size + 1; ++i) { + for (int i = 0; i < variant.variant_max_sparse_column_statistics_size() + 1; ++i) { sparse_paths.insert("dummy" + std::to_string(i)); } schema_util::get_compaction_subcolumns(paths_set_info, parent_column, schema, @@ -1815,6 +1815,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_advanced) { variant.set_variant_max_subcolumns_count(3); variant.set_aggregation_method(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE); variant.set_variant_enable_typed_paths_to_sparse(true); + variant.set_variant_max_sparse_column_statistics_size(10000); TabletColumn subcolumn; subcolumn.set_name("c"); subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_DATEV2); @@ -1890,7 +1891,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_advanced) { output_schema = std::make_shared(); sparse_paths.clear(); - for (int i = 0; i < config::variant_max_sparse_column_statistics_size + 1; ++i) { + for (int i = 0; i < variant.variant_max_sparse_column_statistics_size() + 1; ++i) { sparse_paths.insert("dummy" + std::to_string(i)); } schema_util::get_compaction_subcolumns(paths_set_info, parent_column, schema, diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java index 53790fbaa4f4a5..e04c3c99b1a52f 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -1235,4 +1235,13 @@ public boolean getVariantEnableTypedPathsToSparse() { } return false; } + + public int getVariantMaxSparseColumnStatisticsSize() { + // In the past, variant metadata used the ScalarType type. + // Now, we use VariantType, which inherits from ScalarType, as the new metadata storage. + if (this instanceof VariantType) { + return ((VariantType) this).getVariantMaxSparseColumnStatisticsSize(); + } + return 0; // The old variant type had a default value of 0. + } } diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java index 29342d73ca7c25..28b1177e398715 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java @@ -46,6 +46,9 @@ public class VariantType extends ScalarType { @SerializedName(value = "enableTypedPathsToSparse") private boolean enableTypedPathsToSparse = false; + @SerializedName(value = "variantMaxSparseColumnStatisticsSize") + private int variantMaxSparseColumnStatisticsSize = 0; + private Map properties = Maps.newHashMap(); public VariantType() { @@ -53,6 +56,7 @@ public VariantType() { this.predefinedFields = Lists.newArrayList(); this.variantMaxSubcolumnsCount = 0; this.enableTypedPathsToSparse = false; + this.variantMaxSparseColumnStatisticsSize = 0; } public VariantType(ArrayList fields) { @@ -81,7 +85,8 @@ public VariantType(ArrayList fields, Map propertie } public VariantType(ArrayList fields, int variantMaxSubcolumnsCount, - boolean enableTypedPathsToSparse) { + boolean enableTypedPathsToSparse, + int variantMaxSparseColumnStatisticsSize) { super(PrimitiveType.VARIANT); Preconditions.checkNotNull(fields); this.predefinedFields = fields; @@ -90,6 +95,7 @@ public VariantType(ArrayList fields, int variantMaxSubcolumnsCount } this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount; this.enableTypedPathsToSparse = enableTypedPathsToSparse; + this.variantMaxSparseColumnStatisticsSize = variantMaxSparseColumnStatisticsSize; } @Override @@ -123,6 +129,11 @@ public String toSql(int depth) { sb.append("\"variant_enable_typed_paths_to_sparse\" = \"") .append(String.valueOf(enableTypedPathsToSparse)).append("\""); } + if (variantMaxSparseColumnStatisticsSize != 10000) { + sb.append(","); + sb.append("\"variant_max_sparse_column_statistics_size\" = \"") + .append(String.valueOf(variantMaxSparseColumnStatisticsSize)).append("\""); + } sb.append(")>"); return sb.toString(); } @@ -188,4 +199,12 @@ public Map getProperties() { public void setEnableTypedPathsToSparse(boolean enableTypedPathsToSparse) { this.enableTypedPathsToSparse = enableTypedPathsToSparse; } + + public int getVariantMaxSparseColumnStatisticsSize() { + return variantMaxSparseColumnStatisticsSize; + } + + public void setVariantMaxSparseColumnStatisticsSize(int variantMaxSparseColumnStatisticsSize) { + this.variantMaxSparseColumnStatisticsSize = variantMaxSparseColumnStatisticsSize; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index ab6b423d976ffa..2cb974b73fe889 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -683,6 +683,7 @@ public TColumn toThrift() { } tColumn.setClusterKeyId(this.clusterKeyId); tColumn.setVariantEnableTypedPathsToSparse(this.getVariantEnableTypedPathsToSparse()); + tColumn.setVariantMaxSparseColumnStatisticsSize(this.getVariantMaxSparseColumnStatisticsSize()); // ATTN: // Currently, this `toThrift()` method is only used from CreateReplicaTask. // And CreateReplicaTask does not need `defineExpr` field. @@ -899,6 +900,7 @@ public OlapFile.ColumnPB toPb(Set bfColumns, List indexes) throws } else if (this.type.isVariantType()) { builder.setVariantMaxSubcolumnsCount(this.getVariantMaxSubcolumnsCount()); builder.setVariantEnableTypedPathsToSparse(this.getVariantEnableTypedPathsToSparse()); + builder.setVariantMaxSparseColumnStatisticsSize(this.getVariantMaxSparseColumnStatisticsSize()); // variant may contain predefined structured fields addChildren(builder); } @@ -1289,7 +1291,11 @@ public void setDefaultValueInfo(Column refColumn) { this.defaultValueExprDef = refColumn.defaultValueExprDef; this.realDefaultValue = refColumn.realDefaultValue; } - + + public int getVariantMaxSparseColumnStatisticsSize() { + return type.isVariantType() ? ((ScalarType) type).getVariantMaxSparseColumnStatisticsSize() : -1; + } + public String getExtraInfo() { return extraInfo; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 7a9f8567988c4a..4aadcb6260856c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -258,6 +258,9 @@ public class PropertyAnalyzer { public static final String SM4 = "SM4"; public static final String PLAINTEXT = "PLAINTEXT"; + public static final String PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE = + "variant_max_sparse_column_statistics_size"; + public enum RewriteType { PUT, // always put property REPLACE, // replace if exists property @@ -1876,6 +1879,26 @@ public static boolean analyzeEnableTypedPathsToSparse(Map proper return enableTypedPathsToSparse; } + public static int analyzeVariantMaxSparseColumnStatisticsSize(Map properties, int defuatValue) + throws AnalysisException { + int maxSparseColumnStatisticsSize = defuatValue; + if (properties != null && properties.containsKey(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE)) { + String maxSparseColumnStatisticsSizeStr = + properties.get(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE); + try { + maxSparseColumnStatisticsSize = Integer.parseInt(maxSparseColumnStatisticsSizeStr); + if (maxSparseColumnStatisticsSize < 0 || maxSparseColumnStatisticsSize > 50000) { + throw new AnalysisException("variant_max_sparse_column_statistics_size must between 0 and 50000 "); + } + } catch (Exception e) { + throw new AnalysisException("variant_max_sparse_column_statistics_size format error:" + e.getMessage()); + } + + properties.remove(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE); + } + return maxSparseColumnStatisticsSize; + } + public static TEncryptionAlgorithm analyzeTDEAlgorithm(Map properties) throws AnalysisException { String name; if (properties == null || !properties.containsKey(PROPERTIES_TDE_ALGORITHM)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 4b6dcf7d6521fb..d85b2dec8a5ea4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -3595,12 +3595,16 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) ConnectContext.get().getSessionVariable().getDefaultVariantMaxSubcolumnsCount(); boolean enableTypedPathsToSparse = ConnectContext.get() == null ? false : ConnectContext.get().getSessionVariable().getDefaultEnableTypedPathsToSparse(); + int variantMaxSparseColumnStatisticsSize = ConnectContext.get() == null ? 0 : + ConnectContext.get().getSessionVariable().getDefaultVariantMaxSparseColumnStatisticsSize(); try { variantMaxSubcolumnsCount = PropertyAnalyzer .analyzeVariantMaxSubcolumnsCount(properties, variantMaxSubcolumnsCount); enableTypedPathsToSparse = PropertyAnalyzer .analyzeEnableTypedPathsToSparse(properties, enableTypedPathsToSparse); + variantMaxSparseColumnStatisticsSize = PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize( + properties, variantMaxSparseColumnStatisticsSize); } catch (org.apache.doris.common.AnalysisException e) { throw new NotSupportedException(e.getMessage()); } @@ -3608,7 +3612,8 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) if (!properties.isEmpty()) { throw new NotSupportedException("only support for " + PropertyAnalyzer.PROPERTIES_VARIANT_ENABLE_TYPED_PATHS_TO_SPARSE - + " and " + PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SUBCOLUMNS_COUNT); + + " and " + PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SUBCOLUMNS_COUNT + + " and " + PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE); } if (variantMaxSubcolumnsCount == 0 && !fields.isEmpty()) { @@ -3616,7 +3621,8 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) + "when variant has fields, but got " + variantMaxSubcolumnsCount); } - return new VariantType(fields, variantMaxSubcolumnsCount, enableTypedPathsToSparse); + return new VariantType(fields, variantMaxSubcolumnsCount, enableTypedPathsToSparse, + variantMaxSparseColumnStatisticsSize); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java index 0bc1fa127c584d..6d1f87340afe23 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java @@ -400,7 +400,8 @@ public static DataType fromCatalogType(Type type) { .collect(ImmutableList.toImmutableList()); return new VariantType(variantFields, ((org.apache.doris.catalog.VariantType) type).getVariantMaxSubcolumnsCount(), - ((org.apache.doris.catalog.VariantType) type).getEnableTypedPathsToSparse()); + ((org.apache.doris.catalog.VariantType) type).getEnableTypedPathsToSparse(), + ((org.apache.doris.catalog.VariantType) type).getVariantMaxSparseColumnStatisticsSize()); } return new VariantType(0); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java index f30a328b5db2d8..01c392a3f96bdb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -42,9 +42,11 @@ public class VariantType extends PrimitiveType { public static final int WIDTH = 24; - private int variantMaxSubcolumnsCount = 0; + private final int variantMaxSubcolumnsCount; - private boolean enableTypedPathsToSparse = false; + private final boolean enableTypedPathsToSparse; + + private final int variantMaxSparseColumnStatisticsSize; private final List predefinedFields; @@ -52,6 +54,7 @@ public class VariantType extends PrimitiveType { public VariantType(int variantMaxSubcolumnsCount) { this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount; predefinedFields = Lists.newArrayList(); + this.variantMaxSparseColumnStatisticsSize = 0; } /** @@ -59,26 +62,30 @@ public VariantType(int variantMaxSubcolumnsCount) { */ public VariantType(List fields) { this.predefinedFields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); + this.variantMaxSparseColumnStatisticsSize = 0; } - public VariantType(List fields, int variantMaxSubcolumnsCount, boolean enableTypedPathsToSparse) { + public VariantType(List fields, int variantMaxSubcolumnsCount, boolean enableTypedPathsToSparse, + int variantMaxSparseColumnStatisticsSize) { this.predefinedFields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount; this.enableTypedPathsToSparse = enableTypedPathsToSparse; + this.variantMaxSparseColumnStatisticsSize = variantMaxSparseColumnStatisticsSize; } @Override public DataType conversion() { return new VariantType(predefinedFields.stream().map(VariantField::conversion) - .collect(Collectors.toList()), variantMaxSubcolumnsCount, - enableTypedPathsToSparse); + .collect(Collectors.toList()), variantMaxSubcolumnsCount, enableTypedPathsToSparse, + variantMaxSparseColumnStatisticsSize); } @Override public Type toCatalogDataType() { org.apache.doris.catalog.VariantType type = new org.apache.doris.catalog.VariantType(predefinedFields.stream() .map(VariantField::toCatalogDataType) - .collect(Collectors.toCollection(ArrayList::new)), variantMaxSubcolumnsCount, enableTypedPathsToSparse); + .collect(Collectors.toCollection(ArrayList::new)), variantMaxSubcolumnsCount, enableTypedPathsToSparse, + variantMaxSparseColumnStatisticsSize); return type; } @@ -117,6 +124,12 @@ public String toSql() { sb.append("\"variant_enable_typed_paths_to_sparse\" = \"") .append(String.valueOf(enableTypedPathsToSparse)).append("\""); } + if (variantMaxSparseColumnStatisticsSize != 0) { + sb.append(","); + sb.append("\"variant_max_sparse_column_statistics_size\" = \"") + .append(String.valueOf(variantMaxSparseColumnStatisticsSize)) + .append("\""); + } sb.append(")>"); return sb.toString(); } @@ -132,6 +145,7 @@ public boolean equals(Object o) { VariantType other = (VariantType) o; return this.variantMaxSubcolumnsCount == other.variantMaxSubcolumnsCount && this.enableTypedPathsToSparse == other.enableTypedPathsToSparse + && this.variantMaxSparseColumnStatisticsSize == other.variantMaxSparseColumnStatisticsSize && Objects.equals(predefinedFields, other.predefinedFields); } @@ -157,4 +171,8 @@ public List getPredefinedFields() { public int getVariantMaxSubcolumnsCount() { return variantMaxSubcolumnsCount; } + + public int getVariantMaxSparseColumnStatisticsSize() { + return variantMaxSparseColumnStatisticsSize; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index c625573a4abc53..5b959c8b9814ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -748,6 +748,9 @@ public class SessionVariable implements Serializable, Writable { public static final String PREFER_UDF_OVER_BUILTIN = "prefer_udf_over_builtin"; public static final String ENABLE_ADD_INDEX_FOR_NEW_DATA = "enable_add_index_for_new_data"; + public static final String DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE = + "default_variant_max_sparse_column_statistics_size"; + /** * If set false, user couldn't submit analyze SQL and FE won't allocate any related resources. */ @@ -2625,6 +2628,13 @@ public boolean isEnableSortSpill() { }) public boolean enableAddIndexForNewData = false; + @VariableMgr.VarAttr( + name = DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, + needForward = true, + fuzzy = true + ) + public int defaultVariantMaxSparseColumnStatisticsSize = 10000; + // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables, // not the default value set in the code. @SuppressWarnings("checkstyle:Indentation") @@ -4998,5 +5008,9 @@ public void setEnableAddIndexForNewData(boolean enableAddIndexForNewData) { public boolean getDefaultEnableTypedPathsToSparse() { return defaultEnableTypedPathsToSparse; } + + public int getDefaultVariantMaxSparseColumnStatisticsSize() { + return defaultVariantMaxSparseColumnStatisticsSize; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java index ab7291eaf161a0..ddd813df376ba4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java @@ -334,4 +334,32 @@ public void testAnalyzeInvertedIndexFileStorageFormat() throws AnalysisException e.getMessage()); } } + + @Test + public void testAnalyzeVariantMaxSparseColumnStatisticsSize() throws AnalysisException { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, "-1"); + try { + PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0); + Assertions.fail("Expected AnalysisException was not thrown"); + } catch (AnalysisException e) { + Assertions.assertNotNull(e.getMessage()); + } + properties.clear(); + properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, "50001"); + try { + PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0); + Assertions.fail("Expected AnalysisException was not thrown"); + } catch (AnalysisException e) { + Assertions.assertNotNull(e.getMessage()); + } + properties.clear(); + properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, "invalid"); + try { + PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0); + Assertions.fail("Expected AnalysisException was not thrown"); + } catch (AnalysisException e) { + Assertions.assertNotNull(e.getMessage()); + } + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java index ff518fb9d1fa87..edaff0ced318c6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java @@ -404,6 +404,314 @@ void testMapDateTimeV2ComputePrecision() { signature.getArgType(2)); } +<<<<<<< HEAD +======= + @Test + void testTimeV2PrecisionPromotion() { + FunctionSignature signature = FunctionSignature.ret(BooleanType.INSTANCE).args(TimeV2Type.INSTANCE, + TimeV2Type.INSTANCE, TimeV2Type.INSTANCE); + List arguments = Lists.newArrayList(new TimeV2Literal("12:34:56.12"), + new TimeV2Literal("12:34:56.123"), new TimeV2Literal("12:34:56.1")); + signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); + + // All arguments should be promoted to the highest precision (3) + Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(0)); + Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(1)); + Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(2)); + } + + @Test + void testMixedDateTimeV2AndTimeV2PrecisionPromotion() { + FunctionSignature signature = FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args( + DateTimeV2Type.SYSTEM_DEFAULT, TimeV2Type.INSTANCE, DateTimeV2Type.SYSTEM_DEFAULT); + List arguments = Lists.newArrayList(new DateTimeV2Literal("2020-02-02 00:00:00.12"), + new TimeV2Literal("12:34:56.123"), new DateTimeV2Literal("2020-02-02 00:00:00.1")); + signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); + + // All arguments should be promoted to the highest precision (3) + Assertions.assertEquals(DateTimeV2Type.of(3), signature.getArgType(0)); + Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(1)); + Assertions.assertEquals(DateTimeV2Type.of(3), signature.getArgType(2)); + // Return type should also be promoted to precision 3 + Assertions.assertEquals(DateTimeV2Type.of(3), signature.returnType); + } + + @Test + void testNestedTimeV2PrecisionPromotion() { + FunctionSignature signature = FunctionSignature.ret(ArrayType.of(TimeV2Type.INSTANCE)).args( + ArrayType.of(TimeV2Type.INSTANCE), + MapType.of(IntegerType.INSTANCE, TimeV2Type.INSTANCE), TimeV2Type.INSTANCE); + List arguments = Lists.newArrayList( + new ArrayLiteral(Lists.newArrayList(new TimeV2Literal("12:34:56.12"))), + new MapLiteral(Lists.newArrayList(new IntegerLiteral(1)), + Lists.newArrayList(new TimeV2Literal("12:34:56.1234"))), + new TimeV2Literal("12:34:56.123")); + signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); + + // Check array argument (precision should be 4 from the map value) + Assertions.assertTrue(signature.getArgType(0) instanceof ArrayType); + Assertions.assertEquals(TimeV2Type.of(4), ((ArrayType) signature.getArgType(0)).getItemType()); + + // Check map argument + Assertions.assertTrue(signature.getArgType(1) instanceof MapType); + Assertions.assertEquals(IntegerType.INSTANCE, ((MapType) signature.getArgType(1)).getKeyType()); + Assertions.assertEquals(TimeV2Type.of(4), ((MapType) signature.getArgType(1)).getValueType()); + + // Check scalar argument + Assertions.assertEquals(TimeV2Type.of(4), signature.getArgType(2)); + + // Check return type + Assertions.assertTrue(signature.returnType instanceof ArrayType); + Assertions.assertEquals(TimeV2Type.of(4), ((ArrayType) signature.returnType).getItemType()); + } + + @Test + void testComplexNestedMixedTimePrecisionPromotion() { + // Create a complex nested structure with both DateTimeV2 and TimeV2 types + FunctionSignature signature = FunctionSignature + .ret(MapType.of(DateTimeV2Type.SYSTEM_DEFAULT, ArrayType.of(TimeV2Type.INSTANCE))) + .args(MapType.of(DateTimeV2Type.SYSTEM_DEFAULT, ArrayType.of(TimeV2Type.INSTANCE)), + ArrayType.of(MapType.of(TimeV2Type.INSTANCE, + DateTimeV2Type.SYSTEM_DEFAULT)), + DateTimeV2Type.SYSTEM_DEFAULT); + + // Create complex arguments with different precisions + List arguments = Lists.newArrayList( + // Map(DateTimeV2(2) -> Array(TimeV2(1))) + new MapLiteral(Lists.newArrayList(new DateTimeV2Literal("2020-02-02 00:00:00.12")), + Lists.newArrayList(new ArrayLiteral( + Lists.newArrayList(new TimeV2Literal("12:34:56.1"))))), + // Array(Map(TimeV2(3) -> DateTimeV2(0))) + new ArrayLiteral(Lists.newArrayList(new MapLiteral( + Lists.newArrayList(new TimeV2Literal("12:34:56.123")), + Lists.newArrayList(new DateTimeV2Literal("2020-02-02 00:00:00"))))), + // DateTimeV2(4) + new DateTimeV2Literal("2020-02-02 00:00:00.1234")); + + signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); + + // All time types should be promoted to precision 4 + + // Check first argument: Map(DateTimeV2 -> Array(TimeV2)) + Assertions.assertTrue(signature.getArgType(0) instanceof MapType); + Assertions.assertEquals(DateTimeV2Type.of(4), ((MapType) signature.getArgType(0)).getKeyType()); + Assertions.assertTrue(((MapType) signature.getArgType(0)).getValueType() instanceof ArrayType); + Assertions.assertEquals(TimeV2Type.of(4), + ((ArrayType) ((MapType) signature.getArgType(0)).getValueType()).getItemType()); + + // Check second argument: Array(Map(TimeV2 -> DateTimeV2)) + Assertions.assertTrue(signature.getArgType(1) instanceof ArrayType); + Assertions.assertTrue(((ArrayType) signature.getArgType(1)).getItemType() instanceof MapType); + Assertions.assertEquals(TimeV2Type.of(4), + ((MapType) ((ArrayType) signature.getArgType(1)).getItemType()).getKeyType()); + Assertions.assertEquals(DateTimeV2Type.of(4), + ((MapType) ((ArrayType) signature.getArgType(1)).getItemType()).getValueType()); + + // Check third argument: DateTimeV2 + Assertions.assertEquals(DateTimeV2Type.of(4), signature.getArgType(2)); + + // Check return type: Map(DateTimeV2 -> Array(TimeV2)) + Assertions.assertTrue(signature.returnType instanceof MapType); + Assertions.assertEquals(DateTimeV2Type.of(4), ((MapType) signature.returnType).getKeyType()); + Assertions.assertTrue(((MapType) signature.returnType).getValueType() instanceof ArrayType); + Assertions.assertEquals(TimeV2Type.of(4), + ((ArrayType) ((MapType) signature.returnType).getValueType()).getItemType()); + } + + @Test + void testNoDynamicComputeVariantArgs() { + FunctionSignature signature = FunctionSignature.ret(DoubleType.INSTANCE).args(IntegerType.INSTANCE); + signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, Collections.emptyList()); + Assertions.assertTrue(signature.returnType instanceof DoubleType); + } + + @Test + void testDynamicComputeVariantArgsSingleVariant() { + VariantType variantType = new VariantType(100); + FunctionSignature signature = FunctionSignature.ret(VariantType.INSTANCE) + .args(VariantType.INSTANCE, IntegerType.INSTANCE); + + List arguments = Lists.newArrayList( + new MockVariantExpression(variantType), + new IntegerLiteral(42)); + + signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); + + Assertions.assertTrue(signature.returnType instanceof VariantType); + Assertions.assertEquals(100, ((VariantType) signature.returnType).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.returnType).getVariantMaxSparseColumnStatisticsSize()); + + Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); + Assertions.assertEquals(100, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); + + Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); + } + + @Test + void testDynamicComputeVariantArgsMultipleVariants() { + VariantType variantType1 = new VariantType(150); + VariantType variantType2 = new VariantType(250); + FunctionSignature signature = FunctionSignature.ret(IntegerType.INSTANCE) + .args(VariantType.INSTANCE, VariantType.INSTANCE); + + List arguments = Lists.newArrayList( + new MockVariantExpression(variantType1), + new MockVariantExpression(variantType2)); + + signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); + + Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); + Assertions.assertEquals(150, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); + Assertions.assertTrue(signature.getArgType(1) instanceof VariantType); + Assertions.assertEquals(250, ((VariantType) signature.getArgType(1)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(1)).getVariantMaxSparseColumnStatisticsSize()); + Assertions.assertTrue(signature.returnType instanceof IntegerType); + } + + @Test + void testDynamicComputeVariantArgsMixedTypesWithSingleVariant() { + VariantType variantType = new VariantType(75); + FunctionSignature signature = FunctionSignature.ret(BooleanType.INSTANCE) + .args(VariantType.INSTANCE, IntegerType.INSTANCE, DoubleType.INSTANCE); + + List arguments = Lists.newArrayList( + new MockVariantExpression(variantType), + new IntegerLiteral(10), + new DoubleLiteral(3.14)); + + signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); + + Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); + Assertions.assertEquals(75, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); + Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); + Assertions.assertTrue(signature.getArgType(2) instanceof DoubleType); + + Assertions.assertTrue(signature.returnType instanceof BooleanType); + } + + @Test + void testDynamicComputeVariantArgsWithNullLiteral() { + FunctionSignature signature = FunctionSignature.ret(BooleanType.INSTANCE) + .args(VariantType.INSTANCE, IntegerType.INSTANCE); + + List arguments = Lists.newArrayList( + new NullLiteral(), + new IntegerLiteral(10)); + + signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); + + Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); + Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); + } + + @Test + void testDynamicComputeVariantArgsNoVariantReturnType() { + VariantType variantType = new VariantType(300); + FunctionSignature signature = FunctionSignature.ret(IntegerType.INSTANCE) + .args(VariantType.INSTANCE); + + List arguments = Lists.newArrayList( + new MockVariantExpression(variantType)); + + signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); + + Assertions.assertTrue(signature.returnType instanceof IntegerType); + + Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); + Assertions.assertEquals(300, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); + } + + @Test + void testDynamicComputeVariantArgsWithVarArgsThrowsException() { + VariantType variantType1 = new VariantType(150); + VariantType variantType2 = new VariantType(250); + FunctionSignature signature = FunctionSignature.ret(VariantType.INSTANCE) + .args(VariantType.INSTANCE, VariantType.INSTANCE); + + List arguments = Lists.newArrayList( + new MockVariantExpression(variantType1), + new MockVariantExpression(variantType2)); + + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, () -> { + ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); + }); + + Assertions.assertEquals("variant type is not supported in multiple arguments", exception.getMessage()); + } + + @Test + void testDynamicComputeVariantArgsWithComputeSignature() { + VariantType variantType = new VariantType(200); + FunctionSignature signature = FunctionSignature.ret(VariantType.INSTANCE) + .args(VariantType.INSTANCE); + + List arguments = Lists.newArrayList( + new MockVariantExpression(variantType)); + + signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); + + Assertions.assertTrue(signature.returnType instanceof VariantType); + Assertions.assertEquals(200, ((VariantType) signature.returnType).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.returnType).getVariantMaxSparseColumnStatisticsSize()); + Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); + Assertions.assertEquals(200, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); + } + + /** + * Mock Expression class for testing VariantType + */ + private static class MockVariantExpression extends Expression { + private final VariantType variantType; + + public MockVariantExpression(VariantType variantType) { + super(Collections.emptyList()); + this.variantType = variantType; + } + + @Override + public DataType getDataType() { + return variantType; + } + + @Override + public boolean nullable() { + return true; + } + + @Override + public Expression withChildren(List children) { + return this; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visit(this, context); + } + + @Override + public int arity() { + return 0; + } + + @Override + public Expression child(int index) { + throw new IndexOutOfBoundsException("MockVariantExpression has no children"); + } + + @Override + public List children() { + return Collections.emptyList(); + } + } + +>>>>>>> c7f07263a3 ([enhance](variant)enhance max_sparse_column_statistics_size for variant (#55124)) private static class FakeComputeSignature implements ComputeSignature { @Override public List children() { diff --git a/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java b/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java index b1f2039e356348..3fac71bfc33d2d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java @@ -36,5 +36,6 @@ public void testScalarType() { Assert.assertEquals(scalarType.getPrimitiveType(), scalarType2.getPrimitiveType()); Assert.assertEquals(scalarType.getVariantMaxSubcolumnsCount(), 0); Assert.assertEquals(scalarType.getVariantEnableTypedPathsToSparse(), false); + Assert.assertEquals(scalarType.getVariantMaxSparseColumnStatisticsSize(), 0); } } diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 8136500491c2d0..1e97d5ad476cb1 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -344,6 +344,8 @@ message ColumnPB { optional bool variant_enable_typed_paths_to_sparse = 27 [default = false]; // this field is only used during flexible partial update load optional bool is_on_update_current_timestamp = 28 [default = false]; + // variant_max_sparse_column_statistics_size + optional int32 variant_max_sparse_column_statistics_size = 29 [default = 10000]; } // Dictionary of Schema info, to reduce TabletSchemaCloudPB fdb kv size diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index f1ef06103efc90..f14a8db707ca67 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -51,6 +51,7 @@ struct TColumn { 21: optional TPatternType pattern_type 22: optional bool variant_enable_typed_paths_to_sparse = false; 23: optional bool is_on_update_current_timestamp = false + 24: optional i32 variant_max_sparse_column_statistics_size = 10000 } struct TSlotDescriptor { diff --git a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy index 2b5b4007635d57..daeb83f3d206bb 100644 --- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy @@ -64,6 +64,7 @@ excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line "ccr_mow_syncer_p0," + "hdfs_vault_p2," + "inject_hdfs_vault_p0," + + "variant_p0/nested," + "plsql_p0," + // plsql is not developped any more, add by sk. "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index 6531e7e6d3cc97..da25d3c0d9aa25 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -87,7 +87,8 @@ excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line "nereids_rules_p0/subquery," + "unique_with_mow_c_p0," + "workload_manager_p1," + - "plsql_p0," + // plsql is not developped any more + "plsql_p0," + // plsql is not developped any more, add by sk + "variant_p0/nested," + "zzz_the_end_sentinel_do_not_touch"// keep this line as the last line customConf1 = "test_custom_conf_value" diff --git a/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy b/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy index 6e3d6a12ce8a43..67dd512e0ed97f 100644 --- a/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy +++ b/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy @@ -23,12 +23,6 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { def backendId_to_backendHttpPort = [:] getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); - def set_be_config = { key, value -> - for (String backend_id: backendId_to_backendIP.keySet()) { - def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) - logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) - } - } try { String backend_id = backendId_to_backendIP.keySet()[0] def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) @@ -45,16 +39,17 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { } } - set_be_config("variant_max_sparse_column_statistics_size", "2") - int max_subcolumns_count = Math.floor(Math.random() * 5) + + int max_subcolumns_count = Math.floor(Math.random() * 5) + int max_sparse_column_statistics_size = 2 if (max_subcolumns_count == 1) { max_subcolumns_count = 0 } def create_table = { tableName, buckets="auto", key_type="DUPLICATE" -> sql "DROP TABLE IF EXISTS ${tableName}" - def var_def = "variant " + def var_def = "variant " if (key_type == "AGGREGATE") { - var_def = "variant replace" + var_def = "variant replace" } sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( @@ -66,6 +61,25 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { properties("replication_num" = "1", "disable_auto_compaction" = "true"); """ } + // check the sparse column must not be read if max_subcolumns_count is 0 + def check_sparse_column_must_not_be_read = { tableName -> + if (max_subcolumns_count == 0) { + try { + GetDebugPoint().enableDebugPointForAllBEs("exist_in_sparse_column_must_be_false") + sql """ select v['a'], v['b'], v['c'], v['x'], v['y'], v['z'], v['m'], v['l'], v['g'], v['z'], v['sala'], v['dddd'] from ${tableName}""" + } finally { + GetDebugPoint().disableDebugPointForAllBEs("exist_in_sparse_column_must_be_false") + } + } else if (max_subcolumns_count > 1) { + // here will aways false + try { + GetDebugPoint().enableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + sql """ select v['mmm'] from ${tableName} where k = 30""" + } finally { + GetDebugPoint().disableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + } + } + } def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"] // def key_types = ["AGGREGATE"] for (int i = 0; i < key_types.size(); i++) { @@ -119,8 +133,41 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { qt_sql_55 "select cast(v['b'] as string), cast(v['b']['c'] as string) from ${tableName} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) != '{}' order by k desc limit 10;" } + } catch (e) { + logger.info("catch exception: ${e}") } finally { - // set back to default - set_be_config("variant_max_sparse_column_statistics_size", "10000") + sql "DROP TABLE IF EXISTS simple_variant_DUPLICATE" + sql "DROP TABLE IF EXISTS simple_variant_UNIQUE" + sql "DROP TABLE IF EXISTS simple_variant_AGGREGATE" + } + + // test variant_max_sparse_column_statistics_size debug error case + sql "DROP TABLE IF EXISTS tn_simple_variant_DUPLICATE" + sql """ + CREATE TABLE IF NOT EXISTS tn_simple_variant_DUPLICATE ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + // here will always true + sql """insert into tn_simple_variant_DUPLICATE values (1, '{"a" : 1, "b" : 2}');""" + GetDebugPoint().enableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + test { + sql """ select v['a'] from tn_simple_variant_DUPLICATE where k = 1""" + exception null } + + // here will always false + sql """ truncate table tn_simple_variant_DUPLICATE --force ; """ + sql """insert into tn_simple_variant_DUPLICATE values (1, '{"d" : "ddd", "s" : "fff", "da": "ddd", "m": 111}');""" + test { + sql """ select v['m'] from tn_simple_variant_DUPLICATE""" + exception "exceeded_sparse_column_limit_must_be_false" + } + + GetDebugPoint().disableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + } diff --git a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy index 0ab363d5671c72..d47c486047e042 100644 --- a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy +++ b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy @@ -23,12 +23,6 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { def backendId_to_backendHttpPort = [:] getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); - def set_be_config = { key, value -> - for (String backend_id: backendId_to_backendIP.keySet()) { - def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) - logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) - } - } try { String backend_id = backendId_to_backendIP.keySet()[0] def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) @@ -45,13 +39,14 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { } } - set_be_config("variant_max_sparse_column_statistics_size", "2") + int max_sparse_column_statistics_size = 2 def create_table = { tableName, buckets="auto", key_type="DUPLICATE" -> sql "DROP TABLE IF EXISTS ${tableName}" - def var_def = "variant <'sala' : int, 'ddd' : double, 'z' : double>" + def var_def = "variant " if (key_type == "AGGREGATE") { - var_def = "variant <'sala' : int, 'ddd' : double, 'z' : double> replace" + var_def = "variant replace" } + sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( k bigint, @@ -61,6 +56,9 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { DISTRIBUTED BY HASH(k) BUCKETS ${buckets} properties("replication_num" = "1", "disable_auto_compaction" = "true"); """ + def create_tbl_res = sql """ show create table ${tableName} """ + logger.info("${create_tbl_res}") + assertTrue(create_tbl_res.toString().contains("variant_max_sparse_column_statistics_size")) } def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"] // def key_types = ["AGGREGATE"] @@ -132,7 +130,8 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { order_qt_select "select * from ${tableName} order by k, cast(v as string) limit 5;" } } finally { - // set back to default - set_be_config("variant_max_sparse_column_statistics_size", "10000") + sql "DROP TABLE IF EXISTS simple_variant_DUPLICATE" + sql "DROP TABLE IF EXISTS simple_variant_UNIQUE" + sql "DROP TABLE IF EXISTS simple_variant_AGGREGATE" } } From 0ca1c16e05bc20052f4f9a8202784960859c20ec Mon Sep 17 00:00:00 2001 From: amorynan Date: Sun, 7 Sep 2025 17:37:38 +0800 Subject: [PATCH 2/7] fix code --- .../doris/nereids/types/VariantType.java | 10 +- .../functions/ComputeSignatureHelperTest.java | 308 ------------------ 2 files changed, 4 insertions(+), 314 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java index 01c392a3f96bdb..2f52d161129219 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -42,11 +42,11 @@ public class VariantType extends PrimitiveType { public static final int WIDTH = 24; - private final int variantMaxSubcolumnsCount; + private int variantMaxSubcolumnsCount = 0; - private final boolean enableTypedPathsToSparse; + private boolean enableTypedPathsToSparse = false; - private final int variantMaxSparseColumnStatisticsSize; + private int variantMaxSparseColumnStatisticsSize = 10000; private final List predefinedFields; @@ -54,7 +54,6 @@ public class VariantType extends PrimitiveType { public VariantType(int variantMaxSubcolumnsCount) { this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount; predefinedFields = Lists.newArrayList(); - this.variantMaxSparseColumnStatisticsSize = 0; } /** @@ -62,7 +61,6 @@ public VariantType(int variantMaxSubcolumnsCount) { */ public VariantType(List fields) { this.predefinedFields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); - this.variantMaxSparseColumnStatisticsSize = 0; } public VariantType(List fields, int variantMaxSubcolumnsCount, boolean enableTypedPathsToSparse, @@ -124,7 +122,7 @@ public String toSql() { sb.append("\"variant_enable_typed_paths_to_sparse\" = \"") .append(String.valueOf(enableTypedPathsToSparse)).append("\""); } - if (variantMaxSparseColumnStatisticsSize != 0) { + if (variantMaxSparseColumnStatisticsSize != 10000) { sb.append(","); sb.append("\"variant_max_sparse_column_statistics_size\" = \"") .append(String.valueOf(variantMaxSparseColumnStatisticsSize)) diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java index edaff0ced318c6..ff518fb9d1fa87 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java @@ -404,314 +404,6 @@ void testMapDateTimeV2ComputePrecision() { signature.getArgType(2)); } -<<<<<<< HEAD -======= - @Test - void testTimeV2PrecisionPromotion() { - FunctionSignature signature = FunctionSignature.ret(BooleanType.INSTANCE).args(TimeV2Type.INSTANCE, - TimeV2Type.INSTANCE, TimeV2Type.INSTANCE); - List arguments = Lists.newArrayList(new TimeV2Literal("12:34:56.12"), - new TimeV2Literal("12:34:56.123"), new TimeV2Literal("12:34:56.1")); - signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); - - // All arguments should be promoted to the highest precision (3) - Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(0)); - Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(1)); - Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(2)); - } - - @Test - void testMixedDateTimeV2AndTimeV2PrecisionPromotion() { - FunctionSignature signature = FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args( - DateTimeV2Type.SYSTEM_DEFAULT, TimeV2Type.INSTANCE, DateTimeV2Type.SYSTEM_DEFAULT); - List arguments = Lists.newArrayList(new DateTimeV2Literal("2020-02-02 00:00:00.12"), - new TimeV2Literal("12:34:56.123"), new DateTimeV2Literal("2020-02-02 00:00:00.1")); - signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); - - // All arguments should be promoted to the highest precision (3) - Assertions.assertEquals(DateTimeV2Type.of(3), signature.getArgType(0)); - Assertions.assertEquals(TimeV2Type.of(3), signature.getArgType(1)); - Assertions.assertEquals(DateTimeV2Type.of(3), signature.getArgType(2)); - // Return type should also be promoted to precision 3 - Assertions.assertEquals(DateTimeV2Type.of(3), signature.returnType); - } - - @Test - void testNestedTimeV2PrecisionPromotion() { - FunctionSignature signature = FunctionSignature.ret(ArrayType.of(TimeV2Type.INSTANCE)).args( - ArrayType.of(TimeV2Type.INSTANCE), - MapType.of(IntegerType.INSTANCE, TimeV2Type.INSTANCE), TimeV2Type.INSTANCE); - List arguments = Lists.newArrayList( - new ArrayLiteral(Lists.newArrayList(new TimeV2Literal("12:34:56.12"))), - new MapLiteral(Lists.newArrayList(new IntegerLiteral(1)), - Lists.newArrayList(new TimeV2Literal("12:34:56.1234"))), - new TimeV2Literal("12:34:56.123")); - signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); - - // Check array argument (precision should be 4 from the map value) - Assertions.assertTrue(signature.getArgType(0) instanceof ArrayType); - Assertions.assertEquals(TimeV2Type.of(4), ((ArrayType) signature.getArgType(0)).getItemType()); - - // Check map argument - Assertions.assertTrue(signature.getArgType(1) instanceof MapType); - Assertions.assertEquals(IntegerType.INSTANCE, ((MapType) signature.getArgType(1)).getKeyType()); - Assertions.assertEquals(TimeV2Type.of(4), ((MapType) signature.getArgType(1)).getValueType()); - - // Check scalar argument - Assertions.assertEquals(TimeV2Type.of(4), signature.getArgType(2)); - - // Check return type - Assertions.assertTrue(signature.returnType instanceof ArrayType); - Assertions.assertEquals(TimeV2Type.of(4), ((ArrayType) signature.returnType).getItemType()); - } - - @Test - void testComplexNestedMixedTimePrecisionPromotion() { - // Create a complex nested structure with both DateTimeV2 and TimeV2 types - FunctionSignature signature = FunctionSignature - .ret(MapType.of(DateTimeV2Type.SYSTEM_DEFAULT, ArrayType.of(TimeV2Type.INSTANCE))) - .args(MapType.of(DateTimeV2Type.SYSTEM_DEFAULT, ArrayType.of(TimeV2Type.INSTANCE)), - ArrayType.of(MapType.of(TimeV2Type.INSTANCE, - DateTimeV2Type.SYSTEM_DEFAULT)), - DateTimeV2Type.SYSTEM_DEFAULT); - - // Create complex arguments with different precisions - List arguments = Lists.newArrayList( - // Map(DateTimeV2(2) -> Array(TimeV2(1))) - new MapLiteral(Lists.newArrayList(new DateTimeV2Literal("2020-02-02 00:00:00.12")), - Lists.newArrayList(new ArrayLiteral( - Lists.newArrayList(new TimeV2Literal("12:34:56.1"))))), - // Array(Map(TimeV2(3) -> DateTimeV2(0))) - new ArrayLiteral(Lists.newArrayList(new MapLiteral( - Lists.newArrayList(new TimeV2Literal("12:34:56.123")), - Lists.newArrayList(new DateTimeV2Literal("2020-02-02 00:00:00"))))), - // DateTimeV2(4) - new DateTimeV2Literal("2020-02-02 00:00:00.1234")); - - signature = ComputeSignatureHelper.computePrecision(new FakeComputeSignature(), signature, arguments); - - // All time types should be promoted to precision 4 - - // Check first argument: Map(DateTimeV2 -> Array(TimeV2)) - Assertions.assertTrue(signature.getArgType(0) instanceof MapType); - Assertions.assertEquals(DateTimeV2Type.of(4), ((MapType) signature.getArgType(0)).getKeyType()); - Assertions.assertTrue(((MapType) signature.getArgType(0)).getValueType() instanceof ArrayType); - Assertions.assertEquals(TimeV2Type.of(4), - ((ArrayType) ((MapType) signature.getArgType(0)).getValueType()).getItemType()); - - // Check second argument: Array(Map(TimeV2 -> DateTimeV2)) - Assertions.assertTrue(signature.getArgType(1) instanceof ArrayType); - Assertions.assertTrue(((ArrayType) signature.getArgType(1)).getItemType() instanceof MapType); - Assertions.assertEquals(TimeV2Type.of(4), - ((MapType) ((ArrayType) signature.getArgType(1)).getItemType()).getKeyType()); - Assertions.assertEquals(DateTimeV2Type.of(4), - ((MapType) ((ArrayType) signature.getArgType(1)).getItemType()).getValueType()); - - // Check third argument: DateTimeV2 - Assertions.assertEquals(DateTimeV2Type.of(4), signature.getArgType(2)); - - // Check return type: Map(DateTimeV2 -> Array(TimeV2)) - Assertions.assertTrue(signature.returnType instanceof MapType); - Assertions.assertEquals(DateTimeV2Type.of(4), ((MapType) signature.returnType).getKeyType()); - Assertions.assertTrue(((MapType) signature.returnType).getValueType() instanceof ArrayType); - Assertions.assertEquals(TimeV2Type.of(4), - ((ArrayType) ((MapType) signature.returnType).getValueType()).getItemType()); - } - - @Test - void testNoDynamicComputeVariantArgs() { - FunctionSignature signature = FunctionSignature.ret(DoubleType.INSTANCE).args(IntegerType.INSTANCE); - signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, Collections.emptyList()); - Assertions.assertTrue(signature.returnType instanceof DoubleType); - } - - @Test - void testDynamicComputeVariantArgsSingleVariant() { - VariantType variantType = new VariantType(100); - FunctionSignature signature = FunctionSignature.ret(VariantType.INSTANCE) - .args(VariantType.INSTANCE, IntegerType.INSTANCE); - - List arguments = Lists.newArrayList( - new MockVariantExpression(variantType), - new IntegerLiteral(42)); - - signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); - - Assertions.assertTrue(signature.returnType instanceof VariantType); - Assertions.assertEquals(100, ((VariantType) signature.returnType).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.returnType).getVariantMaxSparseColumnStatisticsSize()); - - Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); - Assertions.assertEquals(100, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); - - Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); - } - - @Test - void testDynamicComputeVariantArgsMultipleVariants() { - VariantType variantType1 = new VariantType(150); - VariantType variantType2 = new VariantType(250); - FunctionSignature signature = FunctionSignature.ret(IntegerType.INSTANCE) - .args(VariantType.INSTANCE, VariantType.INSTANCE); - - List arguments = Lists.newArrayList( - new MockVariantExpression(variantType1), - new MockVariantExpression(variantType2)); - - signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); - - Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); - Assertions.assertEquals(150, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); - Assertions.assertTrue(signature.getArgType(1) instanceof VariantType); - Assertions.assertEquals(250, ((VariantType) signature.getArgType(1)).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(1)).getVariantMaxSparseColumnStatisticsSize()); - Assertions.assertTrue(signature.returnType instanceof IntegerType); - } - - @Test - void testDynamicComputeVariantArgsMixedTypesWithSingleVariant() { - VariantType variantType = new VariantType(75); - FunctionSignature signature = FunctionSignature.ret(BooleanType.INSTANCE) - .args(VariantType.INSTANCE, IntegerType.INSTANCE, DoubleType.INSTANCE); - - List arguments = Lists.newArrayList( - new MockVariantExpression(variantType), - new IntegerLiteral(10), - new DoubleLiteral(3.14)); - - signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); - - Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); - Assertions.assertEquals(75, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); - Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); - Assertions.assertTrue(signature.getArgType(2) instanceof DoubleType); - - Assertions.assertTrue(signature.returnType instanceof BooleanType); - } - - @Test - void testDynamicComputeVariantArgsWithNullLiteral() { - FunctionSignature signature = FunctionSignature.ret(BooleanType.INSTANCE) - .args(VariantType.INSTANCE, IntegerType.INSTANCE); - - List arguments = Lists.newArrayList( - new NullLiteral(), - new IntegerLiteral(10)); - - signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); - - Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); - Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); - } - - @Test - void testDynamicComputeVariantArgsNoVariantReturnType() { - VariantType variantType = new VariantType(300); - FunctionSignature signature = FunctionSignature.ret(IntegerType.INSTANCE) - .args(VariantType.INSTANCE); - - List arguments = Lists.newArrayList( - new MockVariantExpression(variantType)); - - signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); - - Assertions.assertTrue(signature.returnType instanceof IntegerType); - - Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); - Assertions.assertEquals(300, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); - } - - @Test - void testDynamicComputeVariantArgsWithVarArgsThrowsException() { - VariantType variantType1 = new VariantType(150); - VariantType variantType2 = new VariantType(250); - FunctionSignature signature = FunctionSignature.ret(VariantType.INSTANCE) - .args(VariantType.INSTANCE, VariantType.INSTANCE); - - List arguments = Lists.newArrayList( - new MockVariantExpression(variantType1), - new MockVariantExpression(variantType2)); - - AnalysisException exception = Assertions.assertThrows(AnalysisException.class, () -> { - ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); - }); - - Assertions.assertEquals("variant type is not supported in multiple arguments", exception.getMessage()); - } - - @Test - void testDynamicComputeVariantArgsWithComputeSignature() { - VariantType variantType = new VariantType(200); - FunctionSignature signature = FunctionSignature.ret(VariantType.INSTANCE) - .args(VariantType.INSTANCE); - - List arguments = Lists.newArrayList( - new MockVariantExpression(variantType)); - - signature = ComputeSignatureHelper.dynamicComputeVariantArgs(signature, arguments); - - Assertions.assertTrue(signature.returnType instanceof VariantType); - Assertions.assertEquals(200, ((VariantType) signature.returnType).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.returnType).getVariantMaxSparseColumnStatisticsSize()); - Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); - Assertions.assertEquals(200, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); - Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); - } - - /** - * Mock Expression class for testing VariantType - */ - private static class MockVariantExpression extends Expression { - private final VariantType variantType; - - public MockVariantExpression(VariantType variantType) { - super(Collections.emptyList()); - this.variantType = variantType; - } - - @Override - public DataType getDataType() { - return variantType; - } - - @Override - public boolean nullable() { - return true; - } - - @Override - public Expression withChildren(List children) { - return this; - } - - @Override - public R accept(ExpressionVisitor visitor, C context) { - return visitor.visit(this, context); - } - - @Override - public int arity() { - return 0; - } - - @Override - public Expression child(int index) { - throw new IndexOutOfBoundsException("MockVariantExpression has no children"); - } - - @Override - public List children() { - return Collections.emptyList(); - } - } - ->>>>>>> c7f07263a3 ([enhance](variant)enhance max_sparse_column_statistics_size for variant (#55124)) private static class FakeComputeSignature implements ComputeSignature { @Override public List children() { From 285f3a8b9f1430ef52d24b26f2475943e8052bc7 Mon Sep 17 00:00:00 2001 From: amorynan Date: Sun, 7 Sep 2025 17:39:33 +0800 Subject: [PATCH 3/7] fix be format --- be/src/vec/common/schema_util.cpp | 6 +++--- be/src/vec/common/schema_util.h | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 176782dd86568c..7519b4ed8fd0d3 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -1110,9 +1110,9 @@ Status get_compaction_schema(const std::vector& rowsets, // Calculate statistics about variant data paths from the encoded sparse column void calculate_variant_stats(const IColumn& encoded_sparse_column, - segment_v2::VariantStatisticsPB* stats, - size_t max_sparse_column_statistics_size, - size_t row_pos, size_t num_rows) { + segment_v2::VariantStatisticsPB* stats, + size_t max_sparse_column_statistics_size, size_t row_pos, + size_t num_rows) { // Cast input column to ColumnMap type since sparse column is stored as a map const auto& map_column = assert_cast(encoded_sparse_column); diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index 75f912a6ca2fb0..ab7fcec2b15b2d 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -172,9 +172,8 @@ Status check_path_stats(const std::vector& intputs, RowsetShare // Calculate statistics about variant data paths from the encoded sparse column void calculate_variant_stats(const IColumn& encoded_sparse_column, - segment_v2::VariantStatisticsPB* stats, - size_t max_sparse_column_statistics_size, - size_t row_pos, + segment_v2::VariantStatisticsPB* stats, + size_t max_sparse_column_statistics_size, size_t row_pos, size_t num_rows); void get_field_info(const Field& field, FieldInfo* info); From 7003d69950a1bc7d300e86d3c7b850f6d1bb3f2f Mon Sep 17 00:00:00 2001 From: wangqiannan Date: Mon, 8 Sep 2025 14:24:51 +0800 Subject: [PATCH 4/7] fix fe/be code format --- be/test/olap/rowset/segment_v2/mock/mock_segment.h | 2 +- fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/be/test/olap/rowset/segment_v2/mock/mock_segment.h b/be/test/olap/rowset/segment_v2/mock/mock_segment.h index f4421c37f7d6f1..7efe16e75867f0 100644 --- a/be/test/olap/rowset/segment_v2/mock/mock_segment.h +++ b/be/test/olap/rowset/segment_v2/mock/mock_segment.h @@ -51,7 +51,7 @@ class MockSegment : public Segment { void add_column_uid_mapping(int32_t col_uid, int32_t footer_ordinal) { _tablet_schema->_cols.push_back(std::make_shared()); _tablet_schema->_cols.back()->set_unique_id(col_uid); - _tablet_schema->_field_uniqueid_to_index[col_uid] = footer_ordinal; + _tablet_schema->_field_id_to_index[col_uid] = footer_ordinal; _column_uid_to_footer_ordinal[col_uid] = footer_ordinal; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 2cb974b73fe889..01993b3714eefb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -1291,11 +1291,11 @@ public void setDefaultValueInfo(Column refColumn) { this.defaultValueExprDef = refColumn.defaultValueExprDef; this.realDefaultValue = refColumn.realDefaultValue; } - + public int getVariantMaxSparseColumnStatisticsSize() { return type.isVariantType() ? ((ScalarType) type).getVariantMaxSparseColumnStatisticsSize() : -1; } - + public String getExtraInfo() { return extraInfo; } From 4e362a8522b9aa04d5a1219371db6cdf20d01aca Mon Sep 17 00:00:00 2001 From: wangqiannan Date: Mon, 8 Sep 2025 16:07:13 +0800 Subject: [PATCH 5/7] fix beut --- be/test/vec/common/schema_util_test.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/be/test/vec/common/schema_util_test.cpp b/be/test/vec/common/schema_util_test.cpp index d8813b312de4e4..59702fed2a7cc9 100644 --- a/be/test/vec/common/schema_util_test.cpp +++ b/be/test/vec/common/schema_util_test.cpp @@ -343,7 +343,8 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) { construct_column_map_with_random_values(column_map, 200, 100, "key_"); // calculate stats - schema_util::calculate_variant_stats(*column_map, &stats, 0, 200); + size_t max_sparse_column_statistics_size = 10000; + schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, 200); EXPECT_EQ(stats.sparse_column_non_null_size_size(), key_value_counts.size()); for (const auto& kv : key_value_counts) { @@ -356,7 +357,7 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) { column_map->clear(); const auto& key_value_counts2 = construct_column_map_with_random_values(column_map, 3000, 100, "key_"); - schema_util::calculate_variant_stats(*column_map, &stats, 0, 3000); + schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, 3000); EXPECT_EQ(stats.sparse_column_non_null_size_size(), 3000); for (const auto& [path, size] : stats.sparse_column_non_null_size()) { @@ -372,10 +373,10 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) { // test with max size column_map->clear(); const auto& key_value_counts3 = construct_column_map_with_random_values( - column_map, config::variant_max_sparse_column_statistics_size, 5, "key2_"); - schema_util::calculate_variant_stats(*column_map, &stats, 0, - config::variant_max_sparse_column_statistics_size); - EXPECT_EQ(config::variant_max_sparse_column_statistics_size, + column_map, max_sparse_column_statistics_size, 5, "key2_"); + schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, + max_sparse_column_statistics_size); + EXPECT_EQ(max_sparse_column_statistics_size, stats.sparse_column_non_null_size_size()); for (const auto& [path, size] : stats.sparse_column_non_null_size()) { From ffa42337ee95505aa988c724ea57731cb84eb066 Mon Sep 17 00:00:00 2001 From: wangqiannan Date: Mon, 8 Sep 2025 17:28:42 +0800 Subject: [PATCH 6/7] fix type --- be/test/vec/common/schema_util_test.cpp | 11 ++++++----- .../java/org/apache/doris/catalog/VariantType.java | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/be/test/vec/common/schema_util_test.cpp b/be/test/vec/common/schema_util_test.cpp index 59702fed2a7cc9..cb6e38cf007355 100644 --- a/be/test/vec/common/schema_util_test.cpp +++ b/be/test/vec/common/schema_util_test.cpp @@ -344,7 +344,8 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) { // calculate stats size_t max_sparse_column_statistics_size = 10000; - schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, 200); + schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, + 200); EXPECT_EQ(stats.sparse_column_non_null_size_size(), key_value_counts.size()); for (const auto& kv : key_value_counts) { @@ -357,7 +358,8 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) { column_map->clear(); const auto& key_value_counts2 = construct_column_map_with_random_values(column_map, 3000, 100, "key_"); - schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, 3000); + schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, + 3000); EXPECT_EQ(stats.sparse_column_non_null_size_size(), 3000); for (const auto& [path, size] : stats.sparse_column_non_null_size()) { @@ -374,10 +376,9 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) { column_map->clear(); const auto& key_value_counts3 = construct_column_map_with_random_values( column_map, max_sparse_column_statistics_size, 5, "key2_"); - schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, + schema_util::calculate_variant_stats(*column_map, &stats, max_sparse_column_statistics_size, 0, max_sparse_column_statistics_size); - EXPECT_EQ(max_sparse_column_statistics_size, - stats.sparse_column_non_null_size_size()); + EXPECT_EQ(max_sparse_column_statistics_size, stats.sparse_column_non_null_size_size()); for (const auto& [path, size] : stats.sparse_column_non_null_size()) { auto first_size = key_value_counts.find(path) == key_value_counts.end() diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java index 28b1177e398715..8daf37f689626f 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java @@ -47,7 +47,7 @@ public class VariantType extends ScalarType { private boolean enableTypedPathsToSparse = false; @SerializedName(value = "variantMaxSparseColumnStatisticsSize") - private int variantMaxSparseColumnStatisticsSize = 0; + private int variantMaxSparseColumnStatisticsSize = 10000; private Map properties = Maps.newHashMap(); @@ -56,7 +56,7 @@ public VariantType() { this.predefinedFields = Lists.newArrayList(); this.variantMaxSubcolumnsCount = 0; this.enableTypedPathsToSparse = false; - this.variantMaxSparseColumnStatisticsSize = 0; + this.variantMaxSparseColumnStatisticsSize = 10000; } public VariantType(ArrayList fields) { From 77388069296ab7eed1e3adc0c38322b80453bd7e Mon Sep 17 00:00:00 2001 From: wangqiannan Date: Wed, 10 Sep 2025 10:20:13 +0800 Subject: [PATCH 7/7] fix some toString --- .../src/main/java/org/apache/doris/catalog/VariantType.java | 3 ++- .../main/java/org/apache/doris/nereids/types/VariantType.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java index 8daf37f689626f..e2a3d76da7f42b 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java @@ -109,7 +109,8 @@ public String toSql(int depth) { if (!predefinedFields.isEmpty()) { sb.append(predefinedFields.stream() .map(variantField -> variantField.toSql(depth)).collect(Collectors.joining(","))); - if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse) { + if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse + && variantMaxSparseColumnStatisticsSize == 10000) { sb.append(">"); return sb.toString(); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java index 2f52d161129219..0d7ee104e2fbc7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -102,7 +102,8 @@ public String toSql() { sb.append("<"); if (!predefinedFields.isEmpty()) { sb.append(predefinedFields.stream().map(VariantField::toSql).collect(Collectors.joining(","))); - if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse) { + if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse + && variantMaxSparseColumnStatisticsSize == 10000) { sb.append(">"); return sb.toString(); } else {