From 975b622798151e9848643a28dfe070fd84668fef Mon Sep 17 00:00:00 2001 From: seawinde Date: Mon, 8 Dec 2025 11:23:22 +0800 Subject: [PATCH 1/4] [fix](mv) Fix stats unknown when calc sync mv plan statistics (#58426) Related PR: Problem Summary: Fix stats unknown when calc sync mv plan statistics For SQLs that are related to statistics, we should not collect or compute statistics. Previously this was determined by the `isInternal` flag, but `isInternal` is too broad: it covers not only statistics-related SQL but also SQL used to generate materialized view plans. Materialized view plan generation requires statistics, so we introduce a new flag `isPlanWithUnKnownColumnStats` to indicate connections that are used for statistics-only operations (treat column statistics as unknown). --- .../doris/nereids/stats/StatsCalculator.java | 4 +- .../doris/statistics/StatisticsCache.java | 6 +- .../nereids/stats/StatsCalculatorTest.java | 37 ++++++++ .../doris/statistics/StatisticsCacheTest.java | 90 +++++++++++++++++++ .../materialized_view_switch.groovy | 16 +++- 5 files changed, 145 insertions(+), 8 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 57f22c19d184af..c0ee2d33b5bcd4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -1187,7 +1187,7 @@ false, getTotalColumnStatisticMap(), false, private ColumnStatistic getColumnStatistic(TableIf table, String colName, long idxId) { ConnectContext connectContext = ConnectContext.get(); - if (connectContext != null && connectContext.getState().isInternal()) { + if (connectContext != null && connectContext.getState().isPlanWithUnKnownColumnStats()) { return ColumnStatistic.UNKNOWN; } long catalogId; @@ -1218,7 +1218,7 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i private ColumnStatistic getColumnStatistic(TableIf table, String colName, long idxId, List partitionNames) { ConnectContext connectContext = ConnectContext.get(); - if (connectContext != null && connectContext.getState().isInternal()) { + if (connectContext != null && connectContext.getState().isPlanWithUnKnownColumnStats()) { return ColumnStatistic.UNKNOWN; } long catalogId; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index 07a0c843f0ed41..eee60d1025a051 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -90,7 +90,7 @@ public class StatisticsCache { public ColumnStatistic getColumnStatistics(long catalogId, long dbId, long tblId, long idxId, String colName) { ConnectContext ctx = ConnectContext.get(); - if (ctx != null && ctx.getState().isInternal()) { + if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) { return ColumnStatistic.UNKNOWN; } // Need to change base index id to -1 for OlapTable. @@ -114,7 +114,7 @@ public ColumnStatistic getColumnStatistics(long catalogId, long dbId, long tblId public PartitionColumnStatistic getPartitionColumnStatistics(long catalogId, long dbId, long tblId, long idxId, String partName, String colName) { ConnectContext ctx = ConnectContext.get(); - if (ctx != null && ctx.getState().isInternal()) { + if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) { return PartitionColumnStatistic.UNKNOWN; } // Need to change base index id to -1 for OlapTable. @@ -157,7 +157,7 @@ public Histogram getHistogram(long ctlId, long dbId, long tblId, String colName) private Optional getHistogram(long ctlId, long dbId, long tblId, long idxId, String colName) { ConnectContext ctx = ConnectContext.get(); - if (ctx != null && ctx.getState().isInternal()) { + if (ctx != null && ctx.getState().isPlanWithUnKnownColumnStats()) { return Optional.empty(); } StatisticsCacheKey k = new StatisticsCacheKey(ctlId, dbId, tblId, idxId, colName); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java index 75cf3f2bea0d67..347c4beb171cd4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java @@ -41,6 +41,8 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.nereids.util.PlanConstructor; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Statistics; @@ -317,4 +319,39 @@ public void testTopN() { Assertions.assertEquals(1, slot1Stats.ndv, 0.1); Assertions.assertEquals(0, slot1Stats.numNulls, 0.1); } + + @Test + public void testOlapScanWithPlanWithUnknownColumnStats() { + boolean prevFlag = false; + if (ConnectContext.get() != null) { + prevFlag = ConnectContext.get().getState().isPlanWithUnKnownColumnStats(); + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true); + } + try { + long tableId1 = 100; + OlapTable table1 = PlanConstructor.newOlapTable(tableId1, "t_unknown", 0); + List qualifier = ImmutableList.of("test", "t"); + SlotReference slot1 = new SlotReference(new ExprId(0), "c1", IntegerType.INSTANCE, true, qualifier, + table1, new Column("c1", PrimitiveType.INT), + table1, new Column("c1", PrimitiveType.INT)); + + LogicalOlapScan logicalOlapScan1 = (LogicalOlapScan) new LogicalOlapScan( + StatementScopeIdGenerator.newRelationId(), table1, + Collections.emptyList()).withGroupExprLogicalPropChildren(Optional.empty(), + Optional.of(new LogicalProperties(() -> ImmutableList.of(slot1), () -> DataTrait.EMPTY_TRAIT)), ImmutableList.of()); + + GroupExpression groupExpression = new GroupExpression(logicalOlapScan1, ImmutableList.of()); + Group ownerGroup = new Group(null, groupExpression, null); + StatsCalculator.estimate(groupExpression, null); + Statistics stats = ownerGroup.getStatistics(); + Assertions.assertEquals(1, stats.columnStatistics().size()); + ColumnStatistic colStat = stats.columnStatistics().get(slot1); + Assertions.assertTrue(colStat.isUnKnown); + } finally { + if (ConnectContext.get() != null) { + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag); + } + } + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java new file mode 100644 index 00000000000000..cb1dffbe106faf --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.utframe.UtFrameUtils; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class StatisticsCacheTest { + + private ConnectContext ctx; + + @BeforeEach + public void setUp() throws Exception { + if (ConnectContext.get() == null) { + ctx = UtFrameUtils.createDefaultCtx(); + } else { + ctx = ConnectContext.get(); + } + } + + @Test + public void testGetColumnStatistics_withPlanWithUnknownColumnStats() { + Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext not available"); + + boolean prevFlag = ConnectContext.get().getState().isPlanWithUnKnownColumnStats(); + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true); + try { + StatisticsCache cache = new StatisticsCache(); + ColumnStatistic stat = cache.getColumnStatistics( + 1L, 1L, 1L, -1L, "col", ConnectContext.get()); + Assertions.assertEquals(ColumnStatistic.UNKNOWN, stat, + "Expect UNKNOWN when plan has unknown column stats"); + } finally { + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag); + } + } + + @Test + public void testGetHistogram_withPlanWithUnknownColumnStats() { + Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext not available"); + + boolean prevFlag = ConnectContext.get().getState().isPlanWithUnKnownColumnStats(); + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true); + try { + StatisticsCache cache = new StatisticsCache(); + // public getHistogram returns null when underlying optional is empty + Histogram hist = cache.getHistogram(1L, 1L, 1L, "col"); + Assertions.assertNull(hist, "Expect null histogram when plan has unknown column stats"); + } finally { + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag); + } + } + + @Test + public void testGetPartitionColumnStatistics_withPlanWithUnknownColumnStats() { + Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext not available"); + + boolean prevFlag = ConnectContext.get().getState().isPlanWithUnKnownColumnStats(); + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(true); + try { + StatisticsCache cache = new StatisticsCache(); + PartitionColumnStatistic pstat = cache.getPartitionColumnStatistics( + 1L, 1L, 1L, -1L, "p", "col", ConnectContext.get()); + Assertions.assertEquals(PartitionColumnStatistic.UNKNOWN, pstat, + "Expect UNKNOWN partition col stat when plan has unknown column stats"); + } finally { + ConnectContext.get().getState().setPlanWithUnKnownColumnStats(prevFlag); + } + } +} diff --git a/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy b/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy index 4af1d778c002b2..acfc699bbe3099 100644 --- a/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy @@ -143,7 +143,10 @@ suite("materialized_view_switch") { where o_orderdate = '2023-12-10' order by 1, 2, 3, 4, 5; """ - async_mv_rewrite_success(db, mv_name, query, "mv_name_1") + async_mv_rewrite_success(db, mv_name, query, "mv_name_1", [NOT_IN_RBO]) + // because compare total tree, mv fitler can not push down to scan base table in RBO mv rewrite as CBO mv prewrite, + // row count would be bigger than before + async_mv_rewrite_success_without_check_chosen(db, mv_name, query, "mv_name_1", [TRY_IN_RBO, FORCE_IN_RBO]) sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_1""" sql "SET enable_materialized_view_rewrite=false" @@ -152,7 +155,10 @@ suite("materialized_view_switch") { sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_2""" sql "SET enable_materialized_view_rewrite=true" - async_mv_rewrite_success(db, mv_name, query, "mv_name_3") + async_mv_rewrite_success(db, mv_name, query, "mv_name_3", [NOT_IN_RBO]) + // because compare total tree, mv fitler can not push down to scan base table in RBO mv rewrite as CBO mv prewrite, + // row count would be bigger than before + async_mv_rewrite_success_without_check_chosen(db, mv_name, query, "mv_name_3", [TRY_IN_RBO, FORCE_IN_RBO]) sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_3""" // test when materialized_view_relation_mapping_max_count is 8 @@ -167,7 +173,11 @@ suite("materialized_view_switch") { inner join lineitem t2 on t1.L_ORDERKEY = t2.L_ORDERKEY; """ order_qt_query1_0_before "${query1_0}" - async_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0") + async_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0", [NOT_IN_RBO]) + // because compare total tree, mv fitler can not push down to scan base table in RBO mv rewrite as CBO mv prewrite, + // row count would be bigger than before + async_mv_rewrite_success_without_check_chosen(db, mv1_0, query1_0, "mv1_0", [TRY_IN_RBO, FORCE_IN_RBO]) + order_qt_query1_0_after "${query1_0}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0""" From 5e36fab8b8331c381b61ea90f16a595ce210b582 Mon Sep 17 00:00:00 2001 From: seawinde Date: Wed, 10 Dec 2025 19:31:44 +0800 Subject: [PATCH 2/4] fix code --- .../java/org/apache/doris/nereids/stats/StatsCalculatorTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java index 347c4beb171cd4..37fa061b48dc83 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java @@ -42,7 +42,6 @@ import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Statistics; From f8112f76586c6b0a626504807e78e33974b1b7ae Mon Sep 17 00:00:00 2001 From: seawinde Date: Wed, 10 Dec 2025 19:41:20 +0800 Subject: [PATCH 3/4] fix code --- .../apache/doris/statistics/StatisticsCacheTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java index cb1dffbe106faf..5919b548d00c8b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsCacheTest.java @@ -39,7 +39,7 @@ public void setUp() throws Exception { } @Test - public void testGetColumnStatistics_withPlanWithUnknownColumnStats() { + public void testGetColumnStatisticsWithPlanWithUnknownColumnStats() { Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext not available"); boolean prevFlag = ConnectContext.get().getState().isPlanWithUnKnownColumnStats(); @@ -47,7 +47,7 @@ public void testGetColumnStatistics_withPlanWithUnknownColumnStats() { try { StatisticsCache cache = new StatisticsCache(); ColumnStatistic stat = cache.getColumnStatistics( - 1L, 1L, 1L, -1L, "col", ConnectContext.get()); + 1L, 1L, 1L, -1L, "col"); Assertions.assertEquals(ColumnStatistic.UNKNOWN, stat, "Expect UNKNOWN when plan has unknown column stats"); } finally { @@ -56,7 +56,7 @@ public void testGetColumnStatistics_withPlanWithUnknownColumnStats() { } @Test - public void testGetHistogram_withPlanWithUnknownColumnStats() { + public void testGetHistogramWithPlanWithUnknownColumnStats() { Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext not available"); boolean prevFlag = ConnectContext.get().getState().isPlanWithUnKnownColumnStats(); @@ -72,7 +72,7 @@ public void testGetHistogram_withPlanWithUnknownColumnStats() { } @Test - public void testGetPartitionColumnStatistics_withPlanWithUnknownColumnStats() { + public void testGetPartitionColumnStatisticsWithPlanWithUnknownColumnStats() { Assumptions.assumeTrue(ConnectContext.get() != null, "ConnectContext not available"); boolean prevFlag = ConnectContext.get().getState().isPlanWithUnKnownColumnStats(); @@ -80,7 +80,7 @@ public void testGetPartitionColumnStatistics_withPlanWithUnknownColumnStats() { try { StatisticsCache cache = new StatisticsCache(); PartitionColumnStatistic pstat = cache.getPartitionColumnStatistics( - 1L, 1L, 1L, -1L, "p", "col", ConnectContext.get()); + 1L, 1L, 1L, -1L, "p", "col"); Assertions.assertEquals(PartitionColumnStatistic.UNKNOWN, pstat, "Expect UNKNOWN partition col stat when plan has unknown column stats"); } finally { From d43557fb3cf4a0d7d6f15075400326aa6bad0cad Mon Sep 17 00:00:00 2001 From: seawinde Date: Wed, 10 Dec 2025 19:47:06 +0800 Subject: [PATCH 4/4] fix code --- .../availability/materialized_view_switch.groovy | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy b/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy index acfc699bbe3099..245087cfb4f6e5 100644 --- a/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/availability/materialized_view_switch.groovy @@ -143,10 +143,7 @@ suite("materialized_view_switch") { where o_orderdate = '2023-12-10' order by 1, 2, 3, 4, 5; """ - async_mv_rewrite_success(db, mv_name, query, "mv_name_1", [NOT_IN_RBO]) - // because compare total tree, mv fitler can not push down to scan base table in RBO mv rewrite as CBO mv prewrite, - // row count would be bigger than before - async_mv_rewrite_success_without_check_chosen(db, mv_name, query, "mv_name_1", [TRY_IN_RBO, FORCE_IN_RBO]) + async_mv_rewrite_success(db, mv_name, query, "mv_name_1") sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_1""" sql "SET enable_materialized_view_rewrite=false" @@ -155,10 +152,7 @@ suite("materialized_view_switch") { sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_2""" sql "SET enable_materialized_view_rewrite=true" - async_mv_rewrite_success(db, mv_name, query, "mv_name_3", [NOT_IN_RBO]) - // because compare total tree, mv fitler can not push down to scan base table in RBO mv rewrite as CBO mv prewrite, - // row count would be bigger than before - async_mv_rewrite_success_without_check_chosen(db, mv_name, query, "mv_name_3", [TRY_IN_RBO, FORCE_IN_RBO]) + async_mv_rewrite_success(db, mv_name, query, "mv_name_1") sql """ DROP MATERIALIZED VIEW IF EXISTS mv_name_3""" // test when materialized_view_relation_mapping_max_count is 8 @@ -173,11 +167,7 @@ suite("materialized_view_switch") { inner join lineitem t2 on t1.L_ORDERKEY = t2.L_ORDERKEY; """ order_qt_query1_0_before "${query1_0}" - async_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0", [NOT_IN_RBO]) - // because compare total tree, mv fitler can not push down to scan base table in RBO mv rewrite as CBO mv prewrite, - // row count would be bigger than before - async_mv_rewrite_success_without_check_chosen(db, mv1_0, query1_0, "mv1_0", [TRY_IN_RBO, FORCE_IN_RBO]) - + async_mv_rewrite_success(db, mv_name, query, "mv_name_1") order_qt_query1_0_after "${query1_0}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0"""