diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index fc53f1a9e98aab..80bab59ac9b868 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1352,8 +1352,6 @@ DEFINE_Bool(enable_snapshot_action, "false"); DEFINE_mInt32(variant_max_merged_tablet_schema_size, "2048"); -DEFINE_mInt32(variant_max_sparse_column_statistics_size, "10000"); - DEFINE_mBool(enable_column_type_check, "true"); // 128 MB DEFINE_mInt64(local_exchange_buffer_mem_limit, "134217728"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 5323c9f6e2d279..1bff6ec69a0fea 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1414,9 +1414,6 @@ DECLARE_Bool(enable_snapshot_action); // The max columns size for a tablet schema DECLARE_mInt32(variant_max_merged_tablet_schema_size); -// The max sparse column statistics size for a variant column -DECLARE_mInt32(variant_max_sparse_column_statistics_size); - DECLARE_mInt64(local_exchange_buffer_mem_limit); DECLARE_mInt64(enable_debug_log_timeout_secs); diff --git a/be/src/common/consts.h b/be/src/common/consts.h index 1190ded8e46ad2..15b4c84a7b9898 100644 --- a/be/src/common/consts.h +++ b/be/src/common/consts.h @@ -49,5 +49,6 @@ static constexpr int MAX_DECIMALV2_SCALE = 9; static constexpr int MAX_DECIMALV3_PRECISION = MAX_DECIMAL256_PRECISION; static constexpr int MAX_DECIMALV3_SCALE = MAX_DECIMALV3_PRECISION; +static constexpr int DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE = 10000; } // namespace BeConsts } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp index 6552bb4bafad09..2702e8f18804d3 100644 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp @@ -76,9 +76,20 @@ bool VariantColumnReader::exist_in_sparse_column( } bool VariantColumnReader::is_exceeded_sparse_column_limit() const { - return !_statistics->sparse_column_non_null_size.empty() && - _statistics->sparse_column_non_null_size.size() >= - config::variant_max_sparse_column_statistics_size; + bool exceeded_sparse_column_limit = !_statistics->sparse_column_non_null_size.empty() && + _statistics->sparse_column_non_null_size.size() >= + _variant_sparse_column_statistics_size; + DBUG_EXECUTE_IF("exceeded_sparse_column_limit_must_be_false", { + if (exceeded_sparse_column_limit) { + throw doris::Exception( + ErrorCode::INTERNAL_ERROR, + "exceeded_sparse_column_limit_must_be_false, sparse_column_non_null_size: {} : " + " _variant_sparse_column_statistics_size: {}", + _statistics->sparse_column_non_null_size.size(), + _variant_sparse_column_statistics_size); + } + }) + return exceeded_sparse_column_limit; } int64_t VariantColumnReader::get_metadata_size() const { @@ -318,9 +329,7 @@ Status VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator, // Otherwise the prefix is not exist and the sparse column size is reached limit // which means the path maybe exist in sparse_column - bool exceeded_sparse_column_limit = !_statistics->sparse_column_non_null_size.empty() && - _statistics->sparse_column_non_null_size.size() >= - config::variant_max_sparse_column_statistics_size; + bool exceeded_sparse_column_limit = is_exceeded_sparse_column_limit(); // If the variant column has extracted columns and is a compaction reader, then read flat leaves // Otherwise read hierarchical data, since the variant subcolumns are flattened in schema_util::VariantCompactionUtil::get_extended_compaction_schema @@ -402,6 +411,11 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF _statistics = std::make_unique(); const ColumnMetaPB& self_column_pb = footer.columns(column_id); const auto& parent_index = opts.tablet_schema->inverted_indexs(self_column_pb.unique_id()); + // record variant_sparse_column_statistics_size from parent column + _variant_sparse_column_statistics_size = + opts.tablet_schema->column_by_uid(self_column_pb.unique_id()) + .variant_max_sparse_column_statistics_size(); + for (int32_t ordinal = 0; ordinal < footer.columns_size(); ++ordinal) { const ColumnMetaPB& column_pb = footer.columns(ordinal); // Find all columns belonging to the current variant column diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h index 551e4a9045f30a..1ba16881419e35 100644 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h @@ -118,6 +118,9 @@ class VariantColumnReader : public ColumnReader { std::unique_ptr _statistics; // key: subcolumn path, value: subcolumn indexes std::unordered_map _variant_subcolumns_indexes; + // variant_sparse_column_statistics_size + size_t _variant_sparse_column_statistics_size = + BeConsts::DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE; }; class VariantRootColumnIterator : public ColumnIterator { diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp index 088d8ec0f5a161..354224f1d9a4ab 100644 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp @@ -350,7 +350,7 @@ Status VariantColumnWriterImpl::_process_sparse_column( it != sparse_data_paths_statistics.end()) { ++it->second; } else if (sparse_data_paths_statistics.size() < - config::variant_max_sparse_column_statistics_size) { + _tablet_column->variant_max_sparse_column_statistics_size()) { sparse_data_paths_statistics.emplace(path, 1); } } diff --git a/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp b/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp index 6964fef452ef8b..baa57a08c0444c 100644 --- a/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp +++ b/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp @@ -17,6 +17,8 @@ #include "olap/rowset/segment_v2/variant_stats_calculator.h" +#include + #include "common/logging.h" #include "util/simd/bits.h" #include "vec/columns/column_nullable.h" @@ -63,7 +65,13 @@ Status VariantStatsCaculator::calculate_variant_stats(const vectorized::Block* b // Check if this is a sparse column or sub column if (column_path.ends_with("__DORIS_VARIANT_SPARSE__")) { // This is a sparse column from variant column - _calculate_sparse_column_stats(*column, column_meta, row_pos, num_rows); + // get variant_max_sparse_column_statistics_size from tablet_schema + size_t variant_max_sparse_column_statistics_size = + _tablet_schema->column_by_uid(tablet_column.parent_unique_id()) + .variant_max_sparse_column_statistics_size(); + _calculate_sparse_column_stats(*column, column_meta, + variant_max_sparse_column_statistics_size, row_pos, + num_rows); } else { // This is a sub column from variant column _calculate_sub_column_stats(*column, column_meta, row_pos, num_rows); @@ -75,13 +83,14 @@ Status VariantStatsCaculator::calculate_variant_stats(const vectorized::Block* b void VariantStatsCaculator::_calculate_sparse_column_stats(const vectorized::IColumn& column, ColumnMetaPB* column_meta, + size_t max_sparse_column_statistics_size, size_t row_pos, size_t num_rows) { // Get or create variant statistics VariantStatisticsPB* stats = column_meta->mutable_variant_statistics(); // Use the same logic as the original calculate_variant_stats function - vectorized::schema_util::VariantCompactionUtil::calculate_variant_stats(column, stats, row_pos, - num_rows); + vectorized::schema_util::VariantCompactionUtil::calculate_variant_stats( + column, stats, max_sparse_column_statistics_size, row_pos, num_rows); VLOG_DEBUG << "Sparse column stats updated, non-null size count: " << stats->sparse_column_non_null_size_size(); diff --git a/be/src/olap/rowset/segment_v2/variant_stats_calculator.h b/be/src/olap/rowset/segment_v2/variant_stats_calculator.h index 6ffd74036cb5ff..221c45b781dce8 100644 --- a/be/src/olap/rowset/segment_v2/variant_stats_calculator.h +++ b/be/src/olap/rowset/segment_v2/variant_stats_calculator.h @@ -45,7 +45,9 @@ class VariantStatsCaculator { // Helper method to calculate sparse column statistics void _calculate_sparse_column_stats(const vectorized::IColumn& column, - ColumnMetaPB* column_meta, size_t row_pos, size_t num_rows); + ColumnMetaPB* column_meta, + size_t max_sparse_column_statistics_size, size_t row_pos, + size_t num_rows); // Helper method to calculate sub column statistics void _calculate_sub_column_stats(const vectorized::IColumn& column, ColumnMetaPB* column_meta, diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index e1aedf20642164..f87d5b4d66105f 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -495,6 +495,10 @@ void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tco column->set_variant_enable_typed_paths_to_sparse( tcolumn.variant_enable_typed_paths_to_sparse); } + if (tcolumn.__isset.variant_max_sparse_column_statistics_size) { + column->set_variant_max_sparse_column_statistics_size( + tcolumn.variant_max_sparse_column_statistics_size); + } } void TabletMeta::remove_rowset_delete_bitmap(const RowsetId& rowset_id, const Version& version) { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 86d3857e639cfa..82b784b34832ea 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -682,6 +682,10 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { if (column.has_variant_enable_typed_paths_to_sparse()) { _variant_enable_typed_paths_to_sparse = column.variant_enable_typed_paths_to_sparse(); } + if (column.has_variant_max_sparse_column_statistics_size()) { + _variant_max_sparse_column_statistics_size = + column.variant_max_sparse_column_statistics_size(); + } if (column.has_pattern_type()) { _pattern_type = column.pattern_type(); } @@ -765,6 +769,8 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const { column->set_variant_max_subcolumns_count(_variant_max_subcolumns_count); column->set_pattern_type(_pattern_type); column->set_variant_enable_typed_paths_to_sparse(_variant_enable_typed_paths_to_sparse); + column->set_variant_max_sparse_column_statistics_size( + _variant_max_sparse_column_statistics_size); } void TabletColumn::add_sub_column(TabletColumn& sub_column) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 9d3d740cedb2ee..9ef2aa8557fefc 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -226,6 +226,11 @@ class TabletColumn : public MetadataAdder { _variant_enable_typed_paths_to_sparse = enable; } + void set_variant_max_sparse_column_statistics_size( + int32_t variant_max_sparse_column_statistics_size) { + _variant_max_sparse_column_statistics_size = variant_max_sparse_column_statistics_size; + } + int32_t variant_max_subcolumns_count() const { return _variant_max_subcolumns_count; } PatternTypePB pattern_type() const { return _pattern_type; } @@ -234,6 +239,10 @@ class TabletColumn : public MetadataAdder { return _variant_enable_typed_paths_to_sparse; } + int32_t variant_max_sparse_column_statistics_size() const { + return _variant_max_sparse_column_statistics_size; + } + bool is_decimal() const { return _is_decimal; } private: @@ -280,6 +289,9 @@ class TabletColumn : public MetadataAdder { int32_t _variant_max_subcolumns_count = 0; PatternTypePB _pattern_type = PatternTypePB::MATCH_NAME_GLOB; bool _variant_enable_typed_paths_to_sparse = false; + // set variant_max_sparse_column_statistics_size + int32_t _variant_max_sparse_column_statistics_size = + BeConsts::DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE; }; bool operator==(const TabletColumn& a, const TabletColumn& b); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index e9e6810ce248e9..8cbb9331823918 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -809,7 +809,9 @@ Status VariantCompactionUtil::check_path_stats(const std::vector config::variant_max_sparse_column_statistics_size) { + if (stats.size() > output->tablet_schema() + ->column_by_uid(uid) + .variant_max_sparse_column_statistics_size()) { // When there is only one segment, we can ensure that the size of each path in output stats is accurate if (output->num_segments() == 1) { for (const auto& [path, size] : stats) { @@ -933,7 +935,8 @@ void VariantCompactionUtil::get_compaction_subcolumns( VLOG_DEBUG << "append typed column " << subpath; } else if (find_data_types == path_to_data_types.end() || find_data_types->second.empty() || sparse_paths.find(std::string(subpath)) != sparse_paths.end() || - sparse_paths.size() >= config::variant_max_sparse_column_statistics_size) { + sparse_paths.size() >= + parent_column->variant_max_sparse_column_statistics_size()) { TabletColumn subcolumn; subcolumn.set_name(column_name); subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); @@ -1031,6 +1034,7 @@ Status VariantCompactionUtil::get_extended_compaction_schema( // Calculate statistics about variant data paths from the encoded sparse column void VariantCompactionUtil::calculate_variant_stats(const IColumn& encoded_sparse_column, segment_v2::VariantStatisticsPB* stats, + size_t max_sparse_column_statistics_size, size_t row_pos, size_t num_rows) { // Cast input column to ColumnMap type since sparse column is stored as a map const auto& map_column = assert_cast(encoded_sparse_column); @@ -1055,19 +1059,17 @@ void VariantCompactionUtil::calculate_variant_stats(const IColumn& encoded_spars } // If path doesn't exist and we haven't hit the max statistics size limit, // add it with count 1 - else if (count_map.size() < config::variant_max_sparse_column_statistics_size) { + else if (count_map.size() < max_sparse_column_statistics_size) { count_map.emplace(sparse_path, 1); } } } - if (stats->sparse_column_non_null_size().size() > - config::variant_max_sparse_column_statistics_size) { + if (stats->sparse_column_non_null_size().size() > max_sparse_column_statistics_size) { throw doris::Exception( ErrorCode::INTERNAL_ERROR, "Sparse column non null size: {} is greater than max statistics size: {}", - stats->sparse_column_non_null_size().size(), - config::variant_max_sparse_column_statistics_size); + stats->sparse_column_non_null_size().size(), max_sparse_column_statistics_size); } } diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index e6d9d25b5b69b9..762787777ebfea 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -189,7 +189,8 @@ class VariantCompactionUtil { // Calculate statistics about variant data paths from the encoded sparse column static void calculate_variant_stats(const IColumn& encoded_sparse_column, - segment_v2::VariantStatisticsPB* stats, size_t row_pos, + segment_v2::VariantStatisticsPB* stats, + size_t max_sparse_column_statistics_size, size_t row_pos, size_t num_rows); static void get_compaction_subcolumns(TabletSchema::PathsSetInfo& paths_set_info, diff --git a/be/test/olap/rowset/segment_v2/mock/mock_segment.h b/be/test/olap/rowset/segment_v2/mock/mock_segment.h index 9cf443b2df0959..f4421c37f7d6f1 100644 --- a/be/test/olap/rowset/segment_v2/mock/mock_segment.h +++ b/be/test/olap/rowset/segment_v2/mock/mock_segment.h @@ -49,6 +49,9 @@ class MockSegment : public Segment { // Helper methods for test setup void add_column_uid_mapping(int32_t col_uid, int32_t footer_ordinal) { + _tablet_schema->_cols.push_back(std::make_shared()); + _tablet_schema->_cols.back()->set_unique_id(col_uid); + _tablet_schema->_field_uniqueid_to_index[col_uid] = footer_ordinal; _column_uid_to_footer_ordinal[col_uid] = footer_ordinal; } diff --git a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp index aa7ef29b122807..e75a85f3051150 100644 --- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp +++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp @@ -45,6 +45,7 @@ static void construct_column(ColumnPB* column_pb, int32_t col_unique_id, column_pb->set_is_nullable(is_nullable); if (column_type == "VARIANT") { column_pb->set_variant_max_subcolumns_count(variant_max_subcolumns_count); + column_pb->set_variant_max_sparse_column_statistics_size(10000); } } @@ -505,15 +506,15 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) { // 13. check statistics size == limit auto& variant_stats = variant_column_reader->_statistics; EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() < - config::variant_max_sparse_column_statistics_size); - auto limit = config::variant_max_sparse_column_statistics_size - + variant_column_reader->_variant_sparse_column_statistics_size); + auto limit = variant_column_reader->_variant_sparse_column_statistics_size - variant_stats->sparse_column_non_null_size.size(); for (int i = 0; i < limit; ++i) { std::string key = parent_column.name_lower_case() + ".key10" + std::to_string(i); variant_stats->sparse_column_non_null_size[key] = 10000; } EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() == - config::variant_max_sparse_column_statistics_size); + variant_column_reader->_variant_sparse_column_statistics_size); EXPECT_TRUE(variant_column_reader->is_exceeded_sparse_column_limit()); ColumnIteratorUPtr it2; diff --git a/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp b/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp index 6591c799945c0e..31f694536ad68f 100644 --- a/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp +++ b/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp @@ -71,8 +71,10 @@ class VariantStatsCalculatorTest : public ::testing::Test { } // Helper method to create a footer column with path info - void add_footer_column_with_path(int32_t parent_unique_id, const std::string& path) { + void add_footer_column_with_path(int32_t parent_unique_id, const std::string& path, + uint32_t column_id = 0) { auto* column_meta = _footer->add_columns(); + column_meta->set_column_id(column_id); column_meta->set_unique_id(100 + _footer->columns_size()); auto* path_info = column_meta->mutable_column_path_info(); @@ -202,19 +204,26 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithSubColumn) { TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithSparseColumn) { // Setup footer with sparse column - add_footer_column_with_path(1, "sparse_col.__DORIS_VARIANT_SPARSE__"); + add_footer_column_with_path(-1, "sparse_col"); + add_footer_column_with_path(1, "sparse_col.__DORIS_VARIANT_SPARSE__", 1); // Create variant sparse column + TabletColumn parent_column = create_variant_column(1, "variant_col", -1, "sparse_col"); TabletColumn sparse_column = create_variant_column(2, "variant_col.__DORIS_VARIANT_SPARSE__", 1, "sparse_col.__DORIS_VARIANT_SPARSE__"); + _tablet_schema->append_column(parent_column); _tablet_schema->append_column(sparse_column); - std::vector column_ids = {0}; + std::vector column_ids = {0, 1}; VariantStatsCaculator calculator(_footer.get(), _tablet_schema, column_ids); // Create block with map column (sparse column) vectorized::Block block; auto map_column = create_map_column(); + auto string_column = vectorized::ColumnString::create(); + // add parant column to block + block.insert({std::move(string_column), std::make_shared(), + "variant_column"}); block.insert({std::move(map_column), std::make_shared( std::make_shared(), @@ -225,7 +234,7 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithSparseColumn) { EXPECT_TRUE(status.ok()); // Check that variant statistics were updated - auto& column_meta = _footer->columns(0); + auto& column_meta = _footer->columns(1); EXPECT_TRUE(column_meta.has_variant_statistics()); } @@ -275,10 +284,15 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMissingPathInFooter) } TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMultipleColumns) { + // parent column + add_footer_column_with_path(-1, "variant"); + TabletColumn parent_column = create_variant_column(1, "variant", -1, "variant"); + _tablet_schema->append_column(parent_column); + // Setup footer with multiple columns - add_footer_column_with_path(1, "sub1"); - add_footer_column_with_path(1, "sub2.__DORIS_VARIANT_SPARSE__"); - add_footer_column_with_path(2, "another_sub"); + add_footer_column_with_path(1, "sub1", 1); + add_footer_column_with_path(1, "sub2.__DORIS_VARIANT_SPARSE__", 2); + add_footer_column_with_path(2, "another_sub", 3); // Create multiple variant columns TabletColumn sub1 = create_variant_column(2, "variant.sub1", 1, "sub1"); @@ -290,12 +304,17 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMultipleColumns) { _tablet_schema->append_column(sparse); _tablet_schema->append_column(sub2); - std::vector column_ids = {0, 1, 2}; + std::vector column_ids = {0, 1, 2, 3}; VariantStatsCaculator calculator(_footer.get(), _tablet_schema, column_ids); // Create block with multiple columns vectorized::Block block; + // parent column + auto string_column = vectorized::ColumnString::create(); + string_column->insert_data("test", 4); + block.insert({std::move(string_column), std::make_shared(), + "variant_column"}); auto nullable_col1 = create_nullable_column({false, true, false}, {"a", "", "c"}); block.insert({std::move(nullable_col1), std::make_shared( @@ -320,9 +339,9 @@ TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMultipleColumns) { EXPECT_TRUE(status.ok()); // Check that statistics were updated for sub columns - EXPECT_EQ(_footer->columns(0).none_null_size(), 2); // sub1: 2 non-null - EXPECT_TRUE(_footer->columns(1).has_variant_statistics()); // sparse column - EXPECT_EQ(_footer->columns(2).none_null_size(), 1); // another_sub: 2 non-null + EXPECT_EQ(_footer->columns(1).none_null_size(), 2); // sub1: 2 non-null + EXPECT_TRUE(_footer->columns(2).has_variant_statistics()); // sparse column + EXPECT_EQ(_footer->columns(3).none_null_size(), 1); // another_sub: 2 non-null } TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithEmptyBlock) { diff --git a/be/test/vec/common/schema_util_test.cpp b/be/test/vec/common/schema_util_test.cpp index 3988ed1bb9a624..ae85f819fabebb 100644 --- a/be/test/vec/common/schema_util_test.cpp +++ b/be/test/vec/common/schema_util_test.cpp @@ -1457,7 +1457,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns) { variant.set_unique_id(30); variant.set_variant_max_subcolumns_count(3); variant.set_aggregation_method(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE); - + variant.set_variant_max_sparse_column_statistics_size(10000); TabletSchemaSPtr schema = std::make_shared(); schema->append_column(variant); @@ -1514,7 +1514,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns) { output_schema = std::make_shared(); sparse_paths.clear(); - for (int i = 0; i < config::variant_max_sparse_column_statistics_size + 1; ++i) { + for (int i = 0; i < variant.variant_max_sparse_column_statistics_size() + 1; ++i) { sparse_paths.insert("dummy" + std::to_string(i)); } schema_util::VariantCompactionUtil::get_compaction_subcolumns( @@ -1531,6 +1531,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_advanced) { variant.set_variant_max_subcolumns_count(3); variant.set_aggregation_method(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE); variant.set_variant_enable_typed_paths_to_sparse(true); + variant.set_variant_max_sparse_column_statistics_size(10000); TabletColumn subcolumn; subcolumn.set_name("c"); subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_DATEV2); @@ -1606,7 +1607,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_advanced) { output_schema = std::make_shared(); sparse_paths.clear(); - for (int i = 0; i < config::variant_max_sparse_column_statistics_size + 1; ++i) { + for (int i = 0; i < variant.variant_max_sparse_column_statistics_size() + 1; ++i) { sparse_paths.insert("dummy" + std::to_string(i)); } schema_util::VariantCompactionUtil::get_compaction_subcolumns( diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java index 1ca0c121814234..ee92c57c1d0f63 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -1226,4 +1226,13 @@ public boolean getVariantEnableTypedPathsToSparse() { } return false; // The old variant type had a default value of false. } + + public int getVariantMaxSparseColumnStatisticsSize() { + // In the past, variant metadata used the ScalarType type. + // Now, we use VariantType, which inherits from ScalarType, as the new metadata storage. + if (this instanceof VariantType) { + return ((VariantType) this).getVariantMaxSparseColumnStatisticsSize(); + } + return 0; // The old variant type had a default value of 0. + } } diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java index 29342d73ca7c25..28b1177e398715 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java @@ -46,6 +46,9 @@ public class VariantType extends ScalarType { @SerializedName(value = "enableTypedPathsToSparse") private boolean enableTypedPathsToSparse = false; + @SerializedName(value = "variantMaxSparseColumnStatisticsSize") + private int variantMaxSparseColumnStatisticsSize = 0; + private Map properties = Maps.newHashMap(); public VariantType() { @@ -53,6 +56,7 @@ public VariantType() { this.predefinedFields = Lists.newArrayList(); this.variantMaxSubcolumnsCount = 0; this.enableTypedPathsToSparse = false; + this.variantMaxSparseColumnStatisticsSize = 0; } public VariantType(ArrayList fields) { @@ -81,7 +85,8 @@ public VariantType(ArrayList fields, Map propertie } public VariantType(ArrayList fields, int variantMaxSubcolumnsCount, - boolean enableTypedPathsToSparse) { + boolean enableTypedPathsToSparse, + int variantMaxSparseColumnStatisticsSize) { super(PrimitiveType.VARIANT); Preconditions.checkNotNull(fields); this.predefinedFields = fields; @@ -90,6 +95,7 @@ public VariantType(ArrayList fields, int variantMaxSubcolumnsCount } this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount; this.enableTypedPathsToSparse = enableTypedPathsToSparse; + this.variantMaxSparseColumnStatisticsSize = variantMaxSparseColumnStatisticsSize; } @Override @@ -123,6 +129,11 @@ public String toSql(int depth) { sb.append("\"variant_enable_typed_paths_to_sparse\" = \"") .append(String.valueOf(enableTypedPathsToSparse)).append("\""); } + if (variantMaxSparseColumnStatisticsSize != 10000) { + sb.append(","); + sb.append("\"variant_max_sparse_column_statistics_size\" = \"") + .append(String.valueOf(variantMaxSparseColumnStatisticsSize)).append("\""); + } sb.append(")>"); return sb.toString(); } @@ -188,4 +199,12 @@ public Map getProperties() { public void setEnableTypedPathsToSparse(boolean enableTypedPathsToSparse) { this.enableTypedPathsToSparse = enableTypedPathsToSparse; } + + public int getVariantMaxSparseColumnStatisticsSize() { + return variantMaxSparseColumnStatisticsSize; + } + + public void setVariantMaxSparseColumnStatisticsSize(int variantMaxSparseColumnStatisticsSize) { + this.variantMaxSparseColumnStatisticsSize = variantMaxSparseColumnStatisticsSize; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 2b324d2ecd5534..bd4dfb43546f0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -656,6 +656,7 @@ public TColumn toThrift() { } tColumn.setClusterKeyId(this.clusterKeyId); tColumn.setVariantEnableTypedPathsToSparse(this.getVariantEnableTypedPathsToSparse()); + tColumn.setVariantMaxSparseColumnStatisticsSize(this.getVariantMaxSparseColumnStatisticsSize()); // ATTN: // Currently, this `toThrift()` method is only used from CreateReplicaTask. // And CreateReplicaTask does not need `defineExpr` field. @@ -883,6 +884,7 @@ public OlapFile.ColumnPB toPb(Set bfColumns, List indexes) throws } else if (this.type.isVariantType()) { builder.setVariantMaxSubcolumnsCount(this.getVariantMaxSubcolumnsCount()); builder.setVariantEnableTypedPathsToSparse(this.getVariantEnableTypedPathsToSparse()); + builder.setVariantMaxSparseColumnStatisticsSize(this.getVariantMaxSparseColumnStatisticsSize()); // variant may contain predefined structured fields addChildren(builder); } @@ -959,6 +961,9 @@ public void checkSchemaChangeAllowed(Column other) throws DdlException { if (this.getVariantEnableTypedPathsToSparse() != other.getVariantEnableTypedPathsToSparse()) { throw new DdlException("Can not change variant enable typed paths to sparse"); } + if (this.getVariantMaxSparseColumnStatisticsSize() != other.getVariantMaxSparseColumnStatisticsSize()) { + throw new DdlException("Can not change variant max sparse column statistics size"); + } if (!this.getChildren().isEmpty() || !other.getChildren().isEmpty()) { throw new DdlException("Can not change variant schema templates"); } @@ -1294,6 +1299,10 @@ public boolean getVariantEnableTypedPathsToSparse() { return type.isVariantType() ? ((ScalarType) type).getVariantEnableTypedPathsToSparse() : false; } + public int getVariantMaxSparseColumnStatisticsSize() { + return type.isVariantType() ? ((ScalarType) type).getVariantMaxSparseColumnStatisticsSize() : -1; + } + public void setFieldPatternType(TPatternType type) { fieldPatternType = type; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 6ee9a62844efab..b1d4d2ac897ec7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -257,6 +257,9 @@ public class PropertyAnalyzer { public static final String SM4 = "SM4"; public static final String PLAINTEXT = "PLAINTEXT"; + public static final String PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE = + "variant_max_sparse_column_statistics_size"; + public enum RewriteType { PUT, // always put property REPLACE, // replace if exists property @@ -1875,6 +1878,26 @@ public static boolean analyzeEnableTypedPathsToSparse(Map proper return enableTypedPathsToSparse; } + public static int analyzeVariantMaxSparseColumnStatisticsSize(Map properties, int defuatValue) + throws AnalysisException { + int maxSparseColumnStatisticsSize = defuatValue; + if (properties != null && properties.containsKey(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE)) { + String maxSparseColumnStatisticsSizeStr = + properties.get(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE); + try { + maxSparseColumnStatisticsSize = Integer.parseInt(maxSparseColumnStatisticsSizeStr); + if (maxSparseColumnStatisticsSize < 0 || maxSparseColumnStatisticsSize > 50000) { + throw new AnalysisException("variant_max_sparse_column_statistics_size must between 0 and 50000 "); + } + } catch (Exception e) { + throw new AnalysisException("variant_max_sparse_column_statistics_size format error:" + e.getMessage()); + } + + properties.remove(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE); + } + return maxSparseColumnStatisticsSize; + } + public static TEncryptionAlgorithm analyzeTDEAlgorithm(Map properties) throws AnalysisException { String name; if (properties == null || !properties.containsKey(PROPERTIES_TDE_ALGORITHM)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index e40b83bfe48adc..bfb7f9d0b1dd5e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -4680,12 +4680,16 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) ConnectContext.get().getSessionVariable().getDefaultVariantMaxSubcolumnsCount(); boolean enableTypedPathsToSparse = ConnectContext.get() == null ? false : ConnectContext.get().getSessionVariable().getDefaultEnableTypedPathsToSparse(); + int variantMaxSparseColumnStatisticsSize = ConnectContext.get() == null ? 0 : + ConnectContext.get().getSessionVariable().getDefaultVariantMaxSparseColumnStatisticsSize(); try { variantMaxSubcolumnsCount = PropertyAnalyzer .analyzeVariantMaxSubcolumnsCount(properties, variantMaxSubcolumnsCount); enableTypedPathsToSparse = PropertyAnalyzer .analyzeEnableTypedPathsToSparse(properties, enableTypedPathsToSparse); + variantMaxSparseColumnStatisticsSize = PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize( + properties, variantMaxSparseColumnStatisticsSize); } catch (org.apache.doris.common.AnalysisException e) { throw new NotSupportedException(e.getMessage()); } @@ -4693,10 +4697,12 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) if (!properties.isEmpty()) { throw new NotSupportedException("only support for " + PropertyAnalyzer.PROPERTIES_VARIANT_ENABLE_TYPED_PATHS_TO_SPARSE - + " and " + PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SUBCOLUMNS_COUNT); + + " and " + PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SUBCOLUMNS_COUNT + + " and " + PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE); } - return new VariantType(fields, variantMaxSubcolumnsCount, enableTypedPathsToSparse); + return new VariantType(fields, variantMaxSubcolumnsCount, enableTypedPathsToSparse, + variantMaxSparseColumnStatisticsSize); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java index 2100ec6424997d..8a12ec622255f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java @@ -451,7 +451,8 @@ public static DataType fromCatalogType(Type type) { .collect(ImmutableList.toImmutableList()); return new VariantType(variantFields, ((org.apache.doris.catalog.VariantType) type).getVariantMaxSubcolumnsCount(), - ((org.apache.doris.catalog.VariantType) type).getEnableTypedPathsToSparse()); + ((org.apache.doris.catalog.VariantType) type).getEnableTypedPathsToSparse(), + ((org.apache.doris.catalog.VariantType) type).getVariantMaxSparseColumnStatisticsSize()); } return VariantType.INSTANCE; } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java index aa17c3b4292506..13133887df07ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -46,6 +46,8 @@ public class VariantType extends PrimitiveType { private final boolean enableTypedPathsToSparse; + private final int variantMaxSparseColumnStatisticsSize; + private final List predefinedFields; // No predefined fields @@ -53,6 +55,7 @@ public VariantType(int variantMaxSubcolumnsCount) { this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount; this.predefinedFields = Lists.newArrayList(); this.enableTypedPathsToSparse = false; + this.variantMaxSparseColumnStatisticsSize = 0; } /** @@ -62,25 +65,30 @@ public VariantType(List fields) { this.predefinedFields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); this.variantMaxSubcolumnsCount = 0; this.enableTypedPathsToSparse = false; + this.variantMaxSparseColumnStatisticsSize = 0; } - public VariantType(List fields, int variantMaxSubcolumnsCount, boolean enableTypedPathsToSparse) { + public VariantType(List fields, int variantMaxSubcolumnsCount, boolean enableTypedPathsToSparse, + int variantMaxSparseColumnStatisticsSize) { this.predefinedFields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount; this.enableTypedPathsToSparse = enableTypedPathsToSparse; + this.variantMaxSparseColumnStatisticsSize = variantMaxSparseColumnStatisticsSize; } @Override public DataType conversion() { return new VariantType(predefinedFields.stream().map(VariantField::conversion) - .collect(Collectors.toList()), variantMaxSubcolumnsCount, enableTypedPathsToSparse); + .collect(Collectors.toList()), variantMaxSubcolumnsCount, enableTypedPathsToSparse, + variantMaxSparseColumnStatisticsSize); } @Override public Type toCatalogDataType() { org.apache.doris.catalog.VariantType type = new org.apache.doris.catalog.VariantType(predefinedFields.stream() .map(VariantField::toCatalogDataType) - .collect(Collectors.toCollection(ArrayList::new)), variantMaxSubcolumnsCount, enableTypedPathsToSparse); + .collect(Collectors.toCollection(ArrayList::new)), variantMaxSubcolumnsCount, enableTypedPathsToSparse, + variantMaxSparseColumnStatisticsSize); return type; } @@ -119,6 +127,12 @@ public String toSql() { sb.append("\"variant_enable_typed_paths_to_sparse\" = \"") .append(String.valueOf(enableTypedPathsToSparse)).append("\""); } + if (variantMaxSparseColumnStatisticsSize != 0) { + sb.append(","); + sb.append("\"variant_max_sparse_column_statistics_size\" = \"") + .append(String.valueOf(variantMaxSparseColumnStatisticsSize)) + .append("\""); + } sb.append(")>"); return sb.toString(); } @@ -134,12 +148,14 @@ public boolean equals(Object o) { VariantType other = (VariantType) o; return this.variantMaxSubcolumnsCount == other.variantMaxSubcolumnsCount && this.enableTypedPathsToSparse == other.enableTypedPathsToSparse + && this.variantMaxSparseColumnStatisticsSize == other.variantMaxSparseColumnStatisticsSize && Objects.equals(predefinedFields, other.predefinedFields); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), variantMaxSubcolumnsCount, enableTypedPathsToSparse, predefinedFields); + return Objects.hash(super.hashCode(), variantMaxSubcolumnsCount, enableTypedPathsToSparse, + variantMaxSparseColumnStatisticsSize, predefinedFields); } @Override @@ -159,4 +175,8 @@ public List getPredefinedFields() { public int getVariantMaxSubcolumnsCount() { return variantMaxSubcolumnsCount; } + + public int getVariantMaxSparseColumnStatisticsSize() { + return variantMaxSparseColumnStatisticsSize; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ea03ea64b9c3c8..a4b01bbdb9184d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -804,6 +804,9 @@ public static double getHotValueThreshold() { public static final String DEFAULT_VARIANT_ENABLE_TYPED_PATHS_TO_SPARSE = "default_variant_enable_typed_paths_to_sparse"; + public static final String DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE = + "default_variant_max_sparse_column_statistics_size"; + /** * If set false, user couldn't submit analyze SQL and FE won't allocate any related resources. */ @@ -2870,6 +2873,13 @@ public boolean isEnableESParallelScroll() { ) public boolean defaultEnableTypedPathsToSparse = false; + @VariableMgr.VarAttr( + name = DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, + needForward = true, + fuzzy = true + ) + public int defaultVariantMaxSparseColumnStatisticsSize = 10000; + // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables, // not the default value set in the code. @SuppressWarnings("checkstyle:Indentation") @@ -5252,6 +5262,10 @@ public int getDefaultVariantMaxSubcolumnsCount() { return defaultVariantMaxSubcolumnsCount; } + public int getDefaultVariantMaxSparseColumnStatisticsSize() { + return defaultVariantMaxSparseColumnStatisticsSize; + } + public static boolean isFeDebug() { if (ConnectContext.get() != null) { return ConnectContext.get().getSessionVariable().feDebug; diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java index ab7291eaf161a0..ddd813df376ba4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java @@ -334,4 +334,32 @@ public void testAnalyzeInvertedIndexFileStorageFormat() throws AnalysisException e.getMessage()); } } + + @Test + public void testAnalyzeVariantMaxSparseColumnStatisticsSize() throws AnalysisException { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, "-1"); + try { + PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0); + Assertions.fail("Expected AnalysisException was not thrown"); + } catch (AnalysisException e) { + Assertions.assertNotNull(e.getMessage()); + } + properties.clear(); + properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, "50001"); + try { + PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0); + Assertions.fail("Expected AnalysisException was not thrown"); + } catch (AnalysisException e) { + Assertions.assertNotNull(e.getMessage()); + } + properties.clear(); + properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE, "invalid"); + try { + PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0); + Assertions.fail("Expected AnalysisException was not thrown"); + } catch (AnalysisException e) { + Assertions.assertNotNull(e.getMessage()); + } + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java index 17fcd1a21a4439..2dc2447c97d79e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java @@ -544,9 +544,11 @@ void testDynamicComputeVariantArgsSingleVariant() { Assertions.assertTrue(signature.returnType instanceof VariantType); Assertions.assertEquals(100, ((VariantType) signature.returnType).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.returnType).getVariantMaxSparseColumnStatisticsSize()); Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); Assertions.assertEquals(100, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); } @@ -566,8 +568,10 @@ void testDynamicComputeVariantArgsMultipleVariants() { Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); Assertions.assertEquals(150, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); Assertions.assertTrue(signature.getArgType(1) instanceof VariantType); Assertions.assertEquals(250, ((VariantType) signature.getArgType(1)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(1)).getVariantMaxSparseColumnStatisticsSize()); Assertions.assertTrue(signature.returnType instanceof IntegerType); } @@ -586,7 +590,7 @@ void testDynamicComputeVariantArgsMixedTypesWithSingleVariant() { Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); Assertions.assertEquals(75, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); - + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); Assertions.assertTrue(signature.getArgType(2) instanceof DoubleType); @@ -606,6 +610,7 @@ void testDynamicComputeVariantArgsWithNullLiteral() { Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); Assertions.assertTrue(signature.getArgType(1) instanceof IntegerType); } @@ -624,6 +629,7 @@ void testDynamicComputeVariantArgsNoVariantReturnType() { Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); Assertions.assertEquals(300, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); } @Test @@ -657,8 +663,10 @@ void testDynamicComputeVariantArgsWithComputeSignature() { Assertions.assertTrue(signature.returnType instanceof VariantType); Assertions.assertEquals(200, ((VariantType) signature.returnType).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.returnType).getVariantMaxSparseColumnStatisticsSize()); Assertions.assertTrue(signature.getArgType(0) instanceof VariantType); Assertions.assertEquals(200, ((VariantType) signature.getArgType(0)).getVariantMaxSubcolumnsCount()); + Assertions.assertEquals(0, ((VariantType) signature.getArgType(0)).getVariantMaxSparseColumnStatisticsSize()); } /** diff --git a/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java b/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java index b1f2039e356348..3fac71bfc33d2d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java @@ -36,5 +36,6 @@ public void testScalarType() { Assert.assertEquals(scalarType.getPrimitiveType(), scalarType2.getPrimitiveType()); Assert.assertEquals(scalarType.getVariantMaxSubcolumnsCount(), 0); Assert.assertEquals(scalarType.getVariantEnableTypedPathsToSparse(), false); + Assert.assertEquals(scalarType.getVariantMaxSparseColumnStatisticsSize(), 0); } } diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 07d357fdb1dce8..a1d1719c5ebe51 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -354,6 +354,8 @@ message ColumnPB { optional bool variant_enable_typed_paths_to_sparse = 27 [default = false]; // this field is only used during flexible partial update load optional bool is_on_update_current_timestamp = 28 [default = false]; + // variant_max_sparse_column_statistics_size + optional int32 variant_max_sparse_column_statistics_size = 29 [default = 10000]; } // Dictionary of Schema info, to reduce TabletSchemaCloudPB fdb kv size diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index f7577749152648..73e5cf3a8a7025 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -51,6 +51,7 @@ struct TColumn { 21: optional TPatternType pattern_type 22: optional bool variant_enable_typed_paths_to_sparse = false 23: optional bool is_on_update_current_timestamp = false + 24: optional i32 variant_max_sparse_column_statistics_size = 10000 } struct TSlotDescriptor { diff --git a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy index 9f45ce32bb1fc0..357cdaab711454 100644 --- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy @@ -82,6 +82,7 @@ excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line "ccr_mow_syncer_p0," + "hdfs_vault_p2," + "inject_hdfs_vault_p0," + + "variant_p0/nested," + "plsql_p0," + // plsql is not developped any more, add by sk. "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index a1ddefbc566b6b..2859f166dfe4bc 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -80,6 +80,7 @@ excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line "cloud_p0," + "workload_manager_p1," + "plsql_p0," + // plsql is not developped any more, add by sk + "variant_p0/nested," + "zzz_the_end_sentinel_do_not_touch"// keep this line as the last line customConf1 = "test_custom_conf_value" diff --git a/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy b/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy index 5b32eb67a02c7e..88b228258306e6 100644 --- a/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy +++ b/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy @@ -23,12 +23,6 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { def backendId_to_backendHttpPort = [:] getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); - def set_be_config = { key, value -> - for (String backend_id: backendId_to_backendIP.keySet()) { - def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) - logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) - } - } try { String backend_id = backendId_to_backendIP.keySet()[0] def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) @@ -45,17 +39,17 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { } } - set_be_config("variant_max_sparse_column_statistics_size", "2") - int max_subcolumns_count = Math.floor(Math.random() * 5) + int max_subcolumns_count = Math.floor(Math.random() * 5) + int max_sparse_column_statistics_size = 2 if (max_subcolumns_count == 1) { max_subcolumns_count = 0 } def create_table = { tableName, buckets="auto", key_type="DUPLICATE" -> sql "DROP TABLE IF EXISTS ${tableName}" - def var_def = "variant " + def var_def = "variant " if (key_type == "AGGREGATE") { - var_def = "variant replace" + var_def = "variant replace" } sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( @@ -76,6 +70,14 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { } finally { GetDebugPoint().disableDebugPointForAllBEs("exist_in_sparse_column_must_be_false") } + } else if (max_subcolumns_count > 1) { + // here will aways false + try { + GetDebugPoint().enableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + sql """ select v['mmm'] from ${tableName} where k = 30""" + } finally { + GetDebugPoint().disableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + } } } def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"] @@ -133,8 +135,41 @@ suite("test_compaction_variant_with_sparse_limit", "nonConcurrent") { qt_sql_55 "select cast(v['b'] as string), cast(v['b']['c'] as string) from ${tableName} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) != '{}' order by k desc limit 10;" } + } catch (e) { + logger.info("catch exception: ${e}") } finally { - // set back to default - set_be_config("variant_max_sparse_column_statistics_size", "10000") + sql "DROP TABLE IF EXISTS simple_variant_DUPLICATE" + sql "DROP TABLE IF EXISTS simple_variant_UNIQUE" + sql "DROP TABLE IF EXISTS simple_variant_AGGREGATE" } + + // test variant_max_sparse_column_statistics_size debug error case + sql "DROP TABLE IF EXISTS tn_simple_variant_DUPLICATE" + sql """ + CREATE TABLE IF NOT EXISTS tn_simple_variant_DUPLICATE ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + // here will always true + sql """insert into tn_simple_variant_DUPLICATE values (1, '{"a" : 1, "b" : 2}');""" + GetDebugPoint().enableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + test { + sql """ select v['a'] from tn_simple_variant_DUPLICATE where k = 1""" + exception null + } + + // here will always false + sql """ truncate table tn_simple_variant_DUPLICATE --force ; """ + sql """insert into tn_simple_variant_DUPLICATE values (1, '{"d" : "ddd", "s" : "fff", "da": "ddd", "m": 111}');""" + test { + sql """ select v['m'] from tn_simple_variant_DUPLICATE""" + exception "exceeded_sparse_column_limit_must_be_false" + } + + GetDebugPoint().disableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false") + } diff --git a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy index 0ab363d5671c72..d47c486047e042 100644 --- a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy +++ b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy @@ -23,12 +23,6 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { def backendId_to_backendHttpPort = [:] getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); - def set_be_config = { key, value -> - for (String backend_id: backendId_to_backendIP.keySet()) { - def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) - logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) - } - } try { String backend_id = backendId_to_backendIP.keySet()[0] def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) @@ -45,13 +39,14 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { } } - set_be_config("variant_max_sparse_column_statistics_size", "2") + int max_sparse_column_statistics_size = 2 def create_table = { tableName, buckets="auto", key_type="DUPLICATE" -> sql "DROP TABLE IF EXISTS ${tableName}" - def var_def = "variant <'sala' : int, 'ddd' : double, 'z' : double>" + def var_def = "variant " if (key_type == "AGGREGATE") { - var_def = "variant <'sala' : int, 'ddd' : double, 'z' : double> replace" + var_def = "variant replace" } + sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( k bigint, @@ -61,6 +56,9 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { DISTRIBUTED BY HASH(k) BUCKETS ${buckets} properties("replication_num" = "1", "disable_auto_compaction" = "true"); """ + def create_tbl_res = sql """ show create table ${tableName} """ + logger.info("${create_tbl_res}") + assertTrue(create_tbl_res.toString().contains("variant_max_sparse_column_statistics_size")) } def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"] // def key_types = ["AGGREGATE"] @@ -132,7 +130,8 @@ suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") { order_qt_select "select * from ${tableName} order by k, cast(v as string) limit 5;" } } finally { - // set back to default - set_be_config("variant_max_sparse_column_statistics_size", "10000") + sql "DROP TABLE IF EXISTS simple_variant_DUPLICATE" + sql "DROP TABLE IF EXISTS simple_variant_UNIQUE" + sql "DROP TABLE IF EXISTS simple_variant_AGGREGATE" } }