From a1c269d033e70856dabf1033c86597070765961e Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 16 Apr 2024 09:35:20 +0800 Subject: [PATCH 1/4] non-analyzed-rows --- .../doris/nereids/stats/StatsCalculator.java | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 57a79037d801d3..36e5666a0ec8ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -123,6 +123,7 @@ import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.util.PlanUtils; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.AnalysisManager; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Histogram; @@ -130,6 +131,7 @@ import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; +import org.apache.doris.statistics.TableStatsMeta; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; @@ -762,8 +764,12 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { } } Set slotSet = slotSetBuilder.build(); - Map columnStatisticMap = new HashMap<>(); + Map columnStatisticBuilderMap = new HashMap<>(); TableIf table = catalogRelation.getTable(); + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); + // rows newly updated after last analyze + long deltaRowCount = tableMeta.updatedRows.get(); double rowCount = catalogRelation.getTable().getRowCountForNereids(); boolean hasUnknownCol = false; long idxId = -1; @@ -789,40 +795,44 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { } else { cache = getColumnStatistic(table, colName, idxId); } + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); if (cache.avgSizeByte <= 0) { - cache = new ColumnStatisticBuilder(cache) - .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) - .build(); + colStatsBuilder.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()); } if (!cache.isUnKnown) { - rowCount = Math.max(rowCount, cache.count); + rowCount = Math.max(rowCount, cache.count + deltaRowCount); } else { hasUnknownCol = true; } if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) { - columnStatisticMap.put(slotReference, cache); + if (deltaRowCount > 0) { + // clear min-max to avoid error estimation + // for example, after yesterday data loaded, user send query about yesterday immediately. + // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer + // estimates the filter result is zero + colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) + .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); + } + columnStatisticBuilderMap.put(slotReference, colStatsBuilder); } else { - columnStatisticMap.put(slotReference, ColumnStatistic.UNKNOWN); + columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN)); hasUnknownCol = true; } } if (hasUnknownCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) { ConnectContext.get().getStatementContext().setHasUnknownColStats(true); } - Statistics stats = new Statistics(rowCount, columnStatisticMap); - stats = normalizeCatalogRelationColumnStatsRowCount(stats); - return stats; - } - - private Statistics normalizeCatalogRelationColumnStatsRowCount(Statistics stats) { - for (Expression slot : stats.columnStatistics().keySet()) { - ColumnStatistic colStats = stats.findColumnStatistics(slot); - Preconditions.checkArgument(colStats != null, - "can not find col stats for %s in table", slot.toSql()); - stats.addColumnStats(slot, - new ColumnStatisticBuilder(colStats).setCount(stats.getRowCount()).build()); + return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap); + } + + private Statistics normalizeCatalogRelationColumnStatsRowCount(double rowCount, + Map columnStatisticBuilderMap) { + Map columnStatisticMap = new HashMap<>(); + for (Expression slot : columnStatisticBuilderMap.keySet()) { + columnStatisticMap.put(slot, + columnStatisticBuilderMap.get(slot).setCount(rowCount).build()); } - return stats; + return new Statistics(rowCount, columnStatisticMap); } private Statistics computeTopN(TopN topN) { From bb067bd788f88da4ac0fc1326a17fa30ca35e927 Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 16 Apr 2024 09:41:41 +0800 Subject: [PATCH 2/4] log --- .../org/apache/doris/nereids/stats/StatsCalculator.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 36e5666a0ec8ac..60d1442dc6052e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -157,8 +157,6 @@ */ public class StatsCalculator extends DefaultPlanVisitor { public static double DEFAULT_AGGREGATE_RATIO = 0.5; - public static double DEFAULT_AGGREGATE_EXPAND_RATIO = 1.05; - public static double AGGREGATE_COLUMN_CORRELATION_COEFFICIENT = 0.75; public static double DEFAULT_COLUMN_NDV_RATIO = 0.5; @@ -779,6 +777,10 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { idxId = olapScan.getSelectedIndexId(); } } + if (deltaRowCount > 0) { + LOG.info(catalogRelation.getTable().getName() + + " is partially analyzed, clear min/max values in column stats"); + } for (SlotReference slotReference : slotSet) { String colName = slotReference.getColumn().isPresent() ? slotReference.getColumn().get().getName() From f0d83563fe13c16e828b7f1f6133d5463303020c Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 16 Apr 2024 15:08:10 +0800 Subject: [PATCH 3/4] fix --- .../java/org/apache/doris/nereids/stats/StatsCalculator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 60d1442dc6052e..50cedb78d01e8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -767,7 +767,7 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); // rows newly updated after last analyze - long deltaRowCount = tableMeta.updatedRows.get(); + long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get(); double rowCount = catalogRelation.getTable().getRowCountForNereids(); boolean hasUnknownCol = false; long idxId = -1; From 7636c2c2fc50fc854d8de35122e3c7eb59f702be Mon Sep 17 00:00:00 2001 From: minghong Date: Thu, 18 Apr 2024 09:19:26 +0800 Subject: [PATCH 4/4] comments --- .../org/apache/doris/nereids/stats/StatsCalculator.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 50cedb78d01e8e..34248d5a55a87e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -777,9 +777,9 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { idxId = olapScan.getSelectedIndexId(); } } - if (deltaRowCount > 0) { - LOG.info(catalogRelation.getTable().getName() - + " is partially analyzed, clear min/max values in column stats"); + if (deltaRowCount > 0 && LOG.isDebugEnabled()) { + LOG.debug("{} is partially analyzed, clear min/max values in column stats", + catalogRelation.getTable().getName()); } for (SlotReference slotReference : slotSet) { String colName = slotReference.getColumn().isPresent()