From 4bc05068cb2760a68939c07fdcc946334c3b0060 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 24 Apr 2025 16:56:48 +0800 Subject: [PATCH] [improvement](statistics)Agg table set preagg on when doing sample analyzing. (#49918) This pr includes 3 changes. 1. Nereids support set ScanNode preagg on by hint, like this: select * from table1 /*+PREAGGOPEN*/ 2. When sample analyze agg table and mor unique table, set preagg on to improve performance. 3. Skip sample analyzing agg table and mor unique table's value columns. --- .../doris/nereids/StatementContext.java | 9 + .../doris/nereids/jobs/executor/Analyzer.java | 4 +- .../nereids/parser/LogicalPlanBuilder.java | 138 ++++---- .../apache/doris/nereids/rules/RuleType.java | 1 + .../nereids/rules/analysis/BindRelation.java | 11 +- .../EliminateLogicalPreAggOnHint.java | 39 +++ .../doris/nereids/trees/plans/PlanType.java | 1 + .../plans/commands/info/BaseViewInfo.java | 4 +- .../plans/logical/LogicalPreAggOnHint.java | 88 ++++++ .../trees/plans/visitor/PlanVisitor.java | 5 + .../doris/statistics/AnalysisManager.java | 8 +- .../doris/statistics/BaseAnalysisTask.java | 6 +- .../statistics/ExternalAnalysisTask.java | 1 + .../doris/statistics/OlapAnalysisTask.java | 15 +- .../statistics/StatisticsAutoCollector.java | 23 +- .../doris/statistics/TableStatsMeta.java | 22 +- .../doris/statistics/util/StatisticsUtil.java | 20 ++ .../statistics/OlapAnalysisTaskTest.java | 62 +++- .../StatisticsAutoCollectorTest.java | 5 +- .../statistics/util/StatisticsUtilTest.java | 57 ++++ .../suites/statistics/analyze_stats.groovy | 3 +- .../suites/statistics/test_analyze_mv.groovy | 57 +--- .../test_skip_agg_table_value_column.groovy | 298 ++++++++++++++++++ 23 files changed, 730 insertions(+), 147 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java create mode 100644 regression-test/suites/statistics/test_skip_agg_table_value_column.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index a0a39c67af9662..c9e1e23d0b1212 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -156,6 +156,7 @@ public enum TableFrom { private final List joinFilters = new ArrayList<>(); private final List hints = new ArrayList<>(); + private boolean hintForcePreAggOn = false; // the columns in Plan.getExpressions(), such as columns in join condition or filter condition, group by expression private final Set keySlots = Sets.newHashSet(); @@ -246,6 +247,14 @@ public void setNeedLockTables(boolean needLockTables) { this.needLockTables = needLockTables; } + public void setHintForcePreAggOn(boolean preAggOn) { + this.hintForcePreAggOn = preAggOn; + } + + public boolean isHintForcePreAggOn() { + return hintForcePreAggOn; + } + /** * cache view info to avoid view's def and sql mode changed before lock it. * diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 0be72f205517bc..06e55bb2501b88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint; import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias; import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; +import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots; import org.apache.doris.nereids.rules.analysis.HavingToFilter; @@ -93,7 +94,8 @@ private static List buildAnalyzerJobs() { return jobs( // we should eliminate hint before "Subquery unnesting". topDown(new AnalyzeCTE()), - topDown(new EliminateLogicalSelectHint()), + topDown(new EliminateLogicalSelectHint(), + new EliminateLogicalPreAggOnHint()), bottomUp( new BindRelation(), new CheckPolicy() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index e247386ae71e98..6d21441c95f3a4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -459,6 +459,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSelectHint; @@ -1460,12 +1461,15 @@ public LogicalPlan visitRegularQuerySpecification(RegularQuerySpecificationConte return selectPlan; } List selectHintContexts = Lists.newArrayList(); + List preAggOnHintContexts = Lists.newArrayList(); for (Integer key : selectHintMap.keySet()) { if (key > selectCtx.getStart().getStopIndex() && key < selectCtx.getStop().getStartIndex()) { selectHintContexts.add(selectHintMap.get(key)); + } else { + preAggOnHintContexts.add(selectHintMap.get(key)); } } - return withSelectHint(selectPlan, selectHintContexts); + return withHints(selectPlan, selectHintContexts, preAggOnHintContexts); }); } @@ -3274,73 +3278,93 @@ private LogicalPlan withJoinRelations(LogicalPlan input, RelationContext ctx) { return last; } - private LogicalPlan withSelectHint(LogicalPlan logicalPlan, List hintContexts) { - if (hintContexts.isEmpty()) { + private LogicalPlan withHints(LogicalPlan logicalPlan, List selectHintContexts, + List preAggOnHintContexts) { + if (selectHintContexts.isEmpty() && preAggOnHintContexts.isEmpty()) { return logicalPlan; } - Map hints = Maps.newLinkedHashMap(); - for (ParserRuleContext hintContext : hintContexts) { - SelectHintContext selectHintContext = (SelectHintContext) hintContext; - for (HintStatementContext hintStatement : selectHintContext.hintStatements) { - String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT); - switch (hintName) { - case "set_var": - Map> parameters = Maps.newLinkedHashMap(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { - String parameterName = visitIdentifierOrText(kv.key); - Optional value = Optional.empty(); - if (kv.constantValue != null) { - Literal literal = (Literal) visit(kv.constantValue); - value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue()); - } else if (kv.identifierValue != null) { - // maybe we should throw exception when the identifierValue is quoted identifier - value = Optional.ofNullable(kv.identifierValue.getText()); + LogicalPlan newPlan = logicalPlan; + if (!selectHintContexts.isEmpty()) { + Map hints = Maps.newLinkedHashMap(); + for (ParserRuleContext hintContext : selectHintContexts) { + SelectHintContext selectHintContext = (SelectHintContext) hintContext; + for (HintStatementContext hintStatement : selectHintContext.hintStatements) { + String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT); + switch (hintName) { + case "set_var": + Map> parameters = Maps.newLinkedHashMap(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + Optional value = Optional.empty(); + if (kv.constantValue != null) { + Literal literal = (Literal) visit(kv.constantValue); + value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue()); + } else if (kv.identifierValue != null) { + // maybe we should throw exception when the identifierValue is quoted identifier + value = Optional.ofNullable(kv.identifierValue.getText()); + } + parameters.put(parameterName, value); } - parameters.put(parameterName, value); } - } - hints.put(hintName, new SelectHintSetVar(hintName, parameters)); - break; - case "leading": - List leadingParameters = new ArrayList<>(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { - String parameterName = visitIdentifierOrText(kv.key); - leadingParameters.add(parameterName); + hints.put(hintName, new SelectHintSetVar(hintName, parameters)); + break; + case "leading": + List leadingParameters = new ArrayList<>(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + leadingParameters.add(parameterName); + } } - } - hints.put(hintName, new SelectHintLeading(hintName, leadingParameters)); - break; - case "ordered": - hints.put(hintName, new SelectHintOrdered(hintName)); - break; - case "use_cbo_rule": - List useRuleParameters = new ArrayList<>(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { - String parameterName = visitIdentifierOrText(kv.key); - useRuleParameters.add(parameterName); + hints.put(hintName, new SelectHintLeading(hintName, leadingParameters)); + break; + case "ordered": + hints.put(hintName, new SelectHintOrdered(hintName)); + break; + case "use_cbo_rule": + List useRuleParameters = new ArrayList<>(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + useRuleParameters.add(parameterName); + } } - } - hints.put(hintName, new SelectHintUseCboRule(hintName, useRuleParameters, false)); - break; - case "no_use_cbo_rule": - List noUseRuleParameters = new ArrayList<>(); - for (HintAssignmentContext kv : hintStatement.parameters) { - String parameterName = visitIdentifierOrText(kv.key); - if (kv.key != null) { - noUseRuleParameters.add(parameterName); + hints.put(hintName, new SelectHintUseCboRule(hintName, useRuleParameters, false)); + break; + case "no_use_cbo_rule": + List noUseRuleParameters = new ArrayList<>(); + for (HintAssignmentContext kv : hintStatement.parameters) { + String parameterName = visitIdentifierOrText(kv.key); + if (kv.key != null) { + noUseRuleParameters.add(parameterName); + } } + hints.put(hintName, new SelectHintUseCboRule(hintName, noUseRuleParameters, true)); + break; + default: + break; + } + } + } + newPlan = new LogicalSelectHint<>(hints, newPlan); + } + if (!preAggOnHintContexts.isEmpty()) { + for (ParserRuleContext hintContext : preAggOnHintContexts) { + if (hintContext instanceof SelectHintContext) { + SelectHintContext preAggOnHintContext = (SelectHintContext) hintContext; + if (preAggOnHintContext.hintStatement != null + && preAggOnHintContext.hintStatement.hintName != null) { + String text = preAggOnHintContext.hintStatement.hintName.getText(); + if (text.equalsIgnoreCase("PREAGGOPEN")) { + newPlan = new LogicalPreAggOnHint<>(newPlan); + break; } - hints.put(hintName, new SelectHintUseCboRule(hintName, noUseRuleParameters, true)); - break; - default: - break; + } } } } - return new LogicalSelectHint<>(hints, logicalPlan); + return newPlan; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index ce42490361b37a..b7cbe5343d1129 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -122,6 +122,7 @@ public enum RuleType { ELIMINATE_GROUP_BY_CONSTANT(RuleTypeClass.REWRITE), ELIMINATE_LOGICAL_SELECT_HINT(RuleTypeClass.REWRITE), + ELIMINATE_LOGICAL_PRE_AGG_ON_HINT(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_CONSTANT(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_UNDER_SUBQUERY(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_UNDER_VIEW(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 41a00b422d4186..c1d7a81e28a77d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -95,6 +95,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.collections.CollectionUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.List; @@ -104,6 +106,7 @@ * Rule to bind relations in query plan. */ public class BindRelation extends OneAnalysisRuleFactory { + private static final Logger LOG = LogManager.getLogger(StatementContext.class); public BindRelation() {} @@ -178,7 +181,8 @@ private LogicalPlan bind(CascadesContext cascadesContext, UnboundRelation unboun return getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext); } - private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List qualifier) { + private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List qualifier, + CascadesContext cascadesContext) { LogicalOlapScan scan; List partIds = getPartitionIds(table, unboundRelation, qualifier); List tabletIds = unboundRelation.getTabletIds(); @@ -214,6 +218,9 @@ private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, // This tabletIds is set manually, so need to set specifiedTabletIds scan = scan.withManuallySpecifiedTabletIds(tabletIds); } + if (cascadesContext.getStatementContext().isHintForcePreAggOn()) { + return scan.withPreAggStatus(PreAggStatus.on()); + } if (needGenerateLogicalAggForRandomDistAggTable(scan)) { // it's a random distribution agg table // add agg on olap scan @@ -380,7 +387,7 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio switch (table.getType()) { case OLAP: case MATERIALIZED_VIEW: - return makeOlapScan(table, unboundRelation, qualifierWithoutTableName); + return makeOlapScan(table, unboundRelation, qualifierWithoutTableName, cascadesContext); case VIEW: View view = (View) table; isView = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java new file mode 100644 index 00000000000000..9f40f8e97b9bfe --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; + +/** + * eliminate logical common hint and set them to cascade context + */ +public class EliminateLogicalPreAggOnHint extends OneRewriteRuleFactory { + + @Override + public Rule build() { + return logicalPreAggOnHint().thenApply(ctx -> { + LogicalPreAggOnHint preAggHintPlan = ctx.root; + ctx.statementContext.setHintForcePreAggOn(true); + return preAggHintPlan.child(); + }).toRule(RuleType.ELIMINATE_LOGICAL_PRE_AGG_ON_HINT); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 05ed40f661ce64..559d62c85fd58c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -62,6 +62,7 @@ public enum PlanType { LOGICAL_APPLY, LOGICAL_ASSERT_NUM_ROWS, LOGICAL_CHECK_POLICY, + LOGICAL_COMMON_HINT, LOGICAL_CTE, LOGICAL_CTE_ANCHOR, LOGICAL_CTE_PRODUCER, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java index ad653fbc182fd4..af2f2378612bf9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java @@ -41,6 +41,7 @@ import org.apache.doris.nereids.rules.analysis.BindExpression; import org.apache.doris.nereids.rules.analysis.BindRelation; import org.apache.doris.nereids.rules.analysis.CheckPolicy; +import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -271,7 +272,8 @@ public List getJobs() { private static List buildAnalyzeViewJobsForStar() { return jobs( - topDown(new EliminateLogicalSelectHint()), + topDown(new EliminateLogicalSelectHint(), + new EliminateLogicalPreAggOnHint()), topDown(new AnalyzeCTE()), bottomUp( new BindRelation(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java new file mode 100644 index 00000000000000..f139308948b116 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.logical; + +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Optional; + +/** + * PreAggOn hint. + */ +public class LogicalPreAggOnHint extends LogicalUnary + implements BlockFuncDepsPropagation { + + public LogicalPreAggOnHint(CHILD_TYPE child) { + this(Optional.empty(), Optional.empty(), child); + } + + /** + * LogicalCommonHint's full parameter constructor. + * @param groupExpression groupExpression exists when this plan is copy out from memo. + * @param logicalProperties logicalProperties is use for compute output + * @param child child plan + */ + public LogicalPreAggOnHint(Optional groupExpression, + Optional logicalProperties, CHILD_TYPE child) { + super(PlanType.LOGICAL_COMMON_HINT, groupExpression, logicalProperties, child); + } + + @Override + public LogicalPreAggOnHint withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalPreAggOnHint<>(children.get(0)); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitLogicalPreAggOnHint(this, context); + } + + @Override + public List getExpressions() { + return ImmutableList.of(); + } + + @Override + public Plan withGroupExpression(Optional groupExpression) { + return new LogicalPreAggOnHint<>(groupExpression, Optional.of(getLogicalProperties()), child()); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalPreAggOnHint<>(groupExpression, logicalProperties, children.get(0)); + } + + @Override + public List computeOutput() { + return child().getOutput(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java index 8db1407220cf08..b6d3e24ba9963f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java @@ -38,6 +38,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; @@ -209,6 +210,10 @@ public R visitLogicalSelectHint(LogicalSelectHint hint, C contex return visit(hint, context); } + public R visitLogicalPreAggOnHint(LogicalPreAggOnHint hint, C context) { + return visit(hint, context); + } + public R visitLogicalSetOperation(LogicalSetOperation setOperation, C context) { return visit(setOperation, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 0db9e010c783ca..0bea8339dd55fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -369,7 +369,13 @@ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) { long periodTimeInMs = stmt.getPeriodTimeInMs(); infoBuilder.setPeriodTimeInMs(periodTimeInMs); - Set> jobColumns = table.getColumnIndexPairs(columnNames); + OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; + boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); + Set> jobColumns = table.getColumnIndexPairs(columnNames).stream() + .filter(c -> olapTable == null || StatisticsUtil.canCollectColumn( + olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + table, isSampleAnalyze, olapTable.getIndexIdByName(c.first))) + .collect(Collectors.toSet()); infoBuilder.setJobColumns(jobColumns); StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); for (Pair pair : jobColumns) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 54c246a6d401eb..cf17499ea999f7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -96,7 +96,8 @@ public abstract class BaseAnalysisTask { + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, " + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + "NOW() FROM ( " - + "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as t"; + + "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) " + + "as t ${preAggHint}"; protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT " + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, " @@ -117,7 +118,8 @@ public abstract class BaseAnalysisTask { + " SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` " + " FROM " + " (SELECT ${subStringColName} AS `colValue`, LENGTH(`${colName}`) as `len` " - + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0` " + + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0`" + + " ${preAggHint}" + " GROUP BY `t0`.`colValue` " + ") as `t1` "; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java index 4ea3d0ebc2c7c7..72beb343956253 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java @@ -89,6 +89,7 @@ protected Map buildSqlParams() { params.put("limit", ""); params.put("scaleFactor", "1"); params.put("index", ""); + params.put("preAggHint", ""); if (col != null) { params.put("type", col.getType().toString()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index be441496394ede..29478fc8a717dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -136,13 +136,6 @@ protected void doSample() { } protected ResultRow collectMinMax() { - // Agg table value columns has no zone map. - // For these columns, skip collecting min and max value to avoid scan whole table. - if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) && !col.isKey()) { - LOG.info("Aggregation table {} column {} is not a key column, skip collecting min and max.", - tbl.getName(), col.getName()); - return null; - } long startTime = System.currentTimeMillis(); Map params = buildSqlParams(); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); @@ -264,6 +257,13 @@ protected void getSampleParams(Map params, long tableRowCount) { params.put("type", col.getType().toString()); params.put("limit", ""); + // For agg table and mor unique table, set PREAGGOPEN preAggHint. + if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) + || ((OlapTable) tbl).getKeysType().equals(KeysType.UNIQUE_KEYS) + && !((OlapTable) tbl).isUniqKeyMergeOnWrite()) { + params.put("preAggHint", "/*+PREAGGOPEN*/"); + } + // If table row count is less than the target sample row count, simple scan the full table. if (tableRowCount <= targetSampleRows) { scanFullTable = true; @@ -384,6 +384,7 @@ protected Map buildSqlParams() { params.put("colName", StatisticsUtil.escapeColumnName(String.valueOf(info.colName))); params.put("tblName", String.valueOf(tbl.getName())); params.put("index", getIndex()); + params.put("preAggHint", ""); return params; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index f4fdc68f55c220..8420e86fa8ecb2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -141,10 +141,20 @@ protected Optional>>> fetchJobFrom protected void processOneJob(TableIf table, Set> columns, JobPriority priority) throws DdlException { appendAllColumns(table, columns); - columns = columns.stream().filter( - c -> StatisticsUtil.needAnalyzeColumn(table, c) || StatisticsUtil.isLongTimeColumn(table, c)) + AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() + ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; + if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) { + analysisMethod = AnalysisMethod.FULL; + } + boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); + OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; + columns = columns.stream() + .filter(c -> StatisticsUtil.needAnalyzeColumn(table, c) || StatisticsUtil.isLongTimeColumn(table, c)) + .filter(c -> olapTable == null || StatisticsUtil.canCollectColumn( + olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + table, isSampleAnalyze, olapTable.getIndexIdByName(c.first))) .collect(Collectors.toSet()); - AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority); + AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority, analysisMethod); if (analyzeJob == null) { return; } @@ -187,12 +197,7 @@ protected boolean supportAutoAnalyze(TableIf tableIf) { } protected AnalysisInfo createAnalyzeJobForTbl( - TableIf table, Set> jobColumns, JobPriority priority) { - AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; - if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) { - analysisMethod = AnalysisMethod.FULL; - } + TableIf table, Set> jobColumns, JobPriority priority, AnalysisMethod analysisMethod) { AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId()); if (table instanceof OlapTable && analysisMethod.equals(AnalysisMethod.SAMPLE)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 4ebdb019f27e36..0b77aba91e30d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -193,17 +193,23 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) { jobType = analyzedJob.jobType; if (tableIf != null) { if (tableIf instanceof OlapTable) { + OlapTable olapTable = (OlapTable) tableIf; indexesRowCount.putAll(analyzedJob.indexesRowCount); - clearStaleIndexRowCount((OlapTable) tableIf); + clearStaleIndexRowCount(olapTable); + if (analyzedJob.jobColumns.containsAll( + olapTable.getColumnIndexPairs(olapTable.getSchemaAllIndexes(false) + .stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map(Column::getName).collect(Collectors.toSet())) + .stream() + .filter(c -> StatisticsUtil.canCollectColumn(olapTable.getIndexMetaByIndexId( + olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + olapTable, true, olapTable.getIndexIdByName(c.first))) + .collect(Collectors.toSet()))) { + partitionChanged.set(false); + } } rowCount = analyzedJob.rowCount; - if (analyzedJob.jobColumns.containsAll( - tableIf.getColumnIndexPairs( - tableIf.getSchemaAllIndexes(false).stream() - .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .map(Column::getName).collect(Collectors.toSet())))) { - partitionChanged.set(false); - } // Set userInject back to false after manual analyze. if (JobType.MANUAL.equals(jobType) && !analyzedJob.userInject) { userInjected = false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index b482e5c225c648..7f8a31ddcd308e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -37,6 +37,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -725,6 +726,25 @@ public static boolean isUnsupportedType(Type type) { || type instanceof AggStateType; } + public static boolean canCollectColumn(Column c, TableIf table, boolean isSampleAnalyze, long indexId) { + // Full analyze can collect all columns. + if (!isSampleAnalyze) { + return true; + } + // External table can collect all columns. + if (!(table instanceof OlapTable)) { + return true; + } + OlapTable olapTable = (OlapTable) table; + // Skip agg table value columns + KeysType keysType = olapTable.getIndexMetaByIndexId(indexId).getKeysType(); + if (KeysType.AGG_KEYS.equals(keysType) && !c.isKey()) { + return false; + } + // Skip mor unique table value columns + return !KeysType.UNIQUE_KEYS.equals(keysType) || olapTable.isUniqKeyMergeOnWrite() || c.isKey(); + } + public static void sleep(long millis) { try { Thread.sleep(millis); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 301734cd2e035d..87525e8b9b8c4b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DataProperty; import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -145,7 +146,7 @@ public void runQuery(String sql) { + "${ndvFunction} as `ndv`, ROUND(SUM(CASE WHEN `null` IS NULL THEN 1 ELSE 0 END) * ${scaleFactor}) AS `null_count`, " + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() FROM " - + "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t", sql); + + "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t ", sql); return; } }; @@ -173,7 +174,8 @@ public void runQuery(String sql) { + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() " + "FROM ( SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` " + "FROM (SELECT ${subStringColName} AS `colValue`, LENGTH(`null`) as `len` " - + "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` GROUP BY `t0`.`colValue` ) as `t1` ", sql); + + "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` " + + " GROUP BY `t0`.`colValue` ) as `t1` ", sql); return; } @@ -342,22 +344,70 @@ protected boolean useLinearAnalyzeTemplate() { return false; } }; - task.col = new Column("test", PrimitiveType.INT); + + new MockUp() { + @Mock + public KeysType getKeysType() { + return KeysType.DUP_KEYS; + } + }; + task.col = new Column("testColumn", Type.INT, true, null, null, ""); + task.setTable(new OlapTable()); task.getSampleParams(params, 10); Assertions.assertTrue(task.scanFullTable()); Assertions.assertEquals("1.0", params.get("scaleFactor")); Assertions.assertEquals("", params.get("sampleHints")); Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) * SUM(`t1`.`count`) / 10)", params.get("ndvFunction")); + Assertions.assertNull(params.get("preAggHint")); params.clear(); + new MockUp() { + @Mock + public KeysType getKeysType() { + return KeysType.AGG_KEYS; + } + }; task = new OlapAnalysisTask(); - task.col = new Column("test", PrimitiveType.INT); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(`t1`.`column_key`) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) + SUM(IF(`t1`.`count` = 1 and `t1`.`column_key` is not null, 1, 0)) * SUM(`t1`.`count`) / 1000)", params.get("ndvFunction")); Assertions.assertEquals("SUM(t1.count) * 4", params.get("dataSizeFunction")); Assertions.assertEquals("`${colName}`", params.get("subStringColName")); + Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint")); + params.clear(); + + new MockUp() { + @Mock + public KeysType getKeysType() { + return KeysType.UNIQUE_KEYS; + } + + @Mock + public boolean isUniqKeyMergeOnWrite() { + return false; + } + }; + task = new OlapAnalysisTask(); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); + task.getSampleParams(params, 1000); + Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint")); + params.clear(); + + new MockUp() { + @Mock + public boolean isUniqKeyMergeOnWrite() { + return true; + } + }; + task = new OlapAnalysisTask(); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); + task.getSampleParams(params, 1000); + Assertions.assertNull(params.get("preAggHint")); params.clear(); new MockUp() { @@ -374,6 +424,7 @@ protected boolean isSingleUniqueKey() { task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -388,6 +439,7 @@ protected boolean isSingleUniqueKey() { }; task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -407,6 +459,7 @@ protected long getSampleRows() { }; task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("20.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -418,6 +471,7 @@ protected long getSampleRows() { task.col = new Column("test", Type.fromPrimitiveType(PrimitiveType.INT), true, null, null, null); task.setKeyColumnSampleTooManyRows(true); + task.setTable(new OlapTable()); task.getSampleParams(params, 2000000000); Assertions.assertEquals("2.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 1d9ea4bd4eb399..305eb201dc0882 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -29,6 +29,7 @@ import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; import org.apache.doris.datasource.jdbc.JdbcExternalDatabase; import org.apache.doris.datasource.jdbc.JdbcExternalTable; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import mockit.Mock; import mockit.MockUp; @@ -163,13 +164,13 @@ public boolean isPartitionedTable() { return false; } }; - Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null)); + Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null, AnalysisMethod.SAMPLE)); new MockUp() { @Mock public long getRowCountForIndex(long indexId, boolean strict) { return 100; } }; - Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null)); + Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null, AnalysisMethod.SAMPLE)); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 7514d96ddb5e1d..0467f9b89dd234 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -18,6 +18,8 @@ package org.apache.doris.statistics.util; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndexMeta; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.TableProperty; @@ -43,6 +45,7 @@ import org.apache.doris.statistics.ColStatsMeta; import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.TableStatsMeta; +import org.apache.doris.thrift.TStorageType; import com.google.common.collect.Lists; import mockit.Mock; @@ -501,4 +504,58 @@ public long fetchRowCount() { }; Assertions.assertTrue(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); } + + @Test + void testCanCollectColumn() { + Column column = new Column("testColumn", Type.INT, true, null, null, ""); + List schema = new ArrayList<>(); + schema.add(column); + OlapTable table = new OlapTable(200, "testTable", schema, KeysType.AGG_KEYS, null, null); + + // Test full analyze always return true; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, false, 1)); + + // Test null table return true; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, null, true, 1)); + + // Test external table always return true; + HMSExternalCatalog externalCatalog = new HMSExternalCatalog(); + HMSExternalDatabase externalDatabase = new HMSExternalDatabase(externalCatalog, 1L, "dbName", "dbName"); + HMSExternalTable hmsTable = new HMSExternalTable(1, "name", "name", externalCatalog, externalDatabase); + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, hmsTable, true, 1)); + + // Test agg key return true; + MaterializedIndexMeta meta = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.AGG_KEYS, null); + new MockUp() { + @Mock + public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) { + return meta; + } + }; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test agg value return false + column = new Column("testColumn", Type.INT, false, null, null, ""); + Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test unique mor value column return false + MaterializedIndexMeta meta1 = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.UNIQUE_KEYS, null); + new MockUp() { + @Mock + public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) { + return meta1; + } + + @Mock + public boolean isUniqKeyMergeOnWrite() { + return false; + } + }; + Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test unique mor key column return true + column = new Column("testColumn", Type.INT, true, null, null, ""); + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + } } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 4fbebed600280b..4c8708c17e50f1 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -265,8 +265,7 @@ suite("test_analyze") { sql """analyze table agg_table_test with sample rows 100 with sync""" def agg_result = sql """show column stats agg_table_test (name)""" logger.info("show column agg_table_test(name) stats: " + agg_result) - assertEquals(agg_result[0][7], "N/A") - assertEquals(agg_result[0][8], "N/A") + assertEquals(0, agg_result.size()) // Continue test partition load data for the first time. def reported = false; diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index d6de18fe5a1ea5..aa8be109203bc1 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -555,6 +555,7 @@ suite("test_analyze_mv") { assertEquals("4", result_row[0][3]) assertEquals("4", result_row[0][4]) + sql """drop stats mvTestDup""" sql """analyze table mvTestDup with sample rows 4000000""" wait_analyze_finish("mvTestDup") result_sample = sql """SHOW ANALYZE mvTestDup;""" @@ -563,7 +564,7 @@ suite("test_analyze_mv") { logger.info("Analyze job id is " + jobId) result_sample = sql """show column stats mvTestDup""" - assertEquals(12, result_sample.size()) + assertEquals(9, result_sample.size()) result_sample = sql """show column stats mvTestDup(key1)""" logger.info("result " + result_sample) @@ -625,60 +626,18 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS bigint)`)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS bigint)`)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_SUM__CAST(`value1` AS bigint)", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("6", result_sample[0][7]) - assertEquals("3001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_MAX__`value2`", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("4", result_sample[0][7]) - assertEquals("4001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_MIN__`value3`", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("5", result_sample[0][7]) - assertEquals("5001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show analyze task status ${jobId}""" - assertEquals(12, result_sample.size()) + assertEquals(9, result_sample.size()) def verifyTaskStatus = { result, colName, indexName -> def found = false; for (int i = 0; i < result.size(); i++) { @@ -698,9 +657,6 @@ suite("test_analyze_mv") { verifyTaskStatus(result_sample, "mv_key1", "mv3") verifyTaskStatus(result_sample, "mv_key2", "mv2") verifyTaskStatus(result_sample, "mv_key2", "mv3") - verifyTaskStatus(result_sample, "mva_MAX__`value2`", "mv3") - verifyTaskStatus(result_sample, "mva_MIN__`value3`", "mv3") - verifyTaskStatus(result_sample, "mva_SUM__CAST(`value1` AS bigint)", "mv3") // * Test row count report and report for nereids sql """truncate table mvTestDup""" @@ -826,4 +782,3 @@ suite("test_analyze_mv") { sql """drop database if exists test_analyze_mv""" } - diff --git a/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy b/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy new file mode 100644 index 00000000000000..606de9cebbf1c9 --- /dev/null +++ b/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy @@ -0,0 +1,298 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_skip_agg_table_value_column") { + + def wait_row_count_reported = { db, table, row, column, expected -> + def result = sql """show frontends;""" + logger.info("show frontends result origin: " + result) + def host + def port + for (int i = 0; i < result.size(); i++) { + if (result[i][8] == "true") { + host = result[i][1] + port = result[i][4] + } + } + def tokens = context.config.jdbcUrl.split('/') + def url=tokens[0] + "//" + host + ":" + port + logger.info("Master url is " + url) + connect(context.config.jdbcUser, context.config.jdbcPassword, url) { + sql """use ${db}""" + result = sql """show frontends;""" + logger.info("show frontends result master: " + result) + for (int i = 0; i < 120; i++) { + Thread.sleep(5000) + result = sql """SHOW DATA FROM ${table};""" + logger.info("result " + result) + if (result[row][column] == expected) { + return; + } + } + throw new Exception("Row count report timeout.") + } + + } + + sql """drop database if exists test_skip_agg_table_value_column""" + sql """create database test_skip_agg_table_value_column""" + sql """use test_skip_agg_table_value_column""" + sql """set global force_sample_analyze=false""" + sql """set global enable_auto_analyze=false""" + + // Test duplicate table + sql """CREATE TABLE dup ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ) + """ + createMV("create materialized view mv1 as select key1 from dup;") + createMV("create materialized view mv2 as select key2 from dup;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from dup group by key1, key2;") + sql """insert into dup values (1, 2, 3, 4, 5), (1, 2, 3, 4, 5), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + // Test aggregate table. + sql """CREATE TABLE agg ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int SUM NOT NULL, + value2 int MAX NOT NULL, + value3 int MIN NOT NULL + )ENGINE=OLAP + AGGREGATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key2 from agg group by key2;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from agg group by key1, key2;") + createMV("create materialized view mv6 as select key1, sum(value1) from agg group by key1;") + sql """insert into agg values (1, 2, 3, 4, 5), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + sql """CREATE TABLE agg_partition ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int SUM NOT NULL, + value2 int MAX NOT NULL, + value3 int MIN NOT NULL + )ENGINE=OLAP + AGGREGATE KEY(`key1`, `key2`) + COMMENT "OLAP" + PARTITION BY RANGE(`key1`) + (PARTITION p1 VALUES [("0"), ("100")), + PARTITION p2 VALUES [("100"), ("10000"))) + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key2 from agg_partition group by key2;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from agg_partition group by key1, key2;") + createMV("create materialized view mv6 as select key1, sum(value1) from agg_partition group by key1;") + sql """analyze table agg_partition(key1) with sync""" + sql """insert into agg_partition values (1, 2, 3, 4, 5), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + // Test unique table + sql """ + CREATE TABLE uni_mor ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = false, + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key1, key2 from uni_mor;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from uni_mor;") + sql """insert into uni_mor values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + sql """ + CREATE TABLE uni_mor_partition ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + PARTITION BY RANGE(`key1`) + (PARTITION p1 VALUES [("0"), ("100")), + PARTITION p2 VALUES [("100"), ("10000"))) + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = false, + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key1, key2 from uni_mor_partition;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from uni_mor_partition;") + sql """analyze table uni_mor_partition(key1) with sync""" + sql """insert into uni_mor_partition values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + sql """ + CREATE TABLE uni_mow ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = true, + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key1, key2 from uni_mow;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from uni_mow;") + sql """insert into uni_mow values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 0, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 1, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 2, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg_partition", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor_partition", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor_partition", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor_partition", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mow", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mow", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mow", 2, 4, "5") + + sql """analyze table dup with sync""" + def result = sql """show column stats dup""" + assertEquals(12, result.size()) + sql """drop stats dup""" + result = sql """show column stats dup""" + assertEquals(0, result.size()) + sql """analyze table dup with sync with sample rows 400000""" + result = sql """show column stats dup""" + assertEquals(9, result.size()) + sql """drop stats dup""" + result = sql """show column stats dup""" + assertEquals(0, result.size()) + + sql """analyze table agg with sync""" + result = sql """show column stats agg""" + assertEquals(13, result.size()) + sql """drop stats agg""" + result = sql """show column stats agg""" + assertEquals(0, result.size()) + sql """analyze table agg with sync with sample rows 400000""" + result = sql """show column stats agg""" + assertEquals(6, result.size()) + sql """drop stats agg""" + result = sql """show column stats agg""" + assertEquals(0, result.size()) + + result = sql """show table stats agg_partition""" + assertEquals("true", result[0][6]) + sql """analyze table agg_partition with sync with sample rows 400000""" + result = sql """show column stats agg_partition""" + assertEquals(6, result.size()) + result = sql """show table stats agg_partition""" + assertEquals("false", result[0][6]) + + sql """analyze table uni_mor with sync""" + result = sql """show column stats uni_mor""" + assertEquals(11, result.size()) + sql """drop stats uni_mor""" + result = sql """show column stats uni_mor""" + assertEquals(0, result.size()) + sql """analyze table uni_mor with sync with sample rows 400000""" + result = sql """show column stats uni_mor""" + assertEquals(6, result.size()) + sql """drop stats uni_mor""" + result = sql """show column stats uni_mor""" + assertEquals(0, result.size()) + + result = sql """show table stats uni_mor_partition""" + assertEquals("true", result[0][6]) + sql """analyze table uni_mor_partition with sync with sample rows 400000""" + result = sql """show column stats uni_mor_partition""" + assertEquals(6, result.size()) + result = sql """show table stats uni_mor_partition""" + assertEquals("false", result[0][6]) + + sql """analyze table uni_mow with sync""" + result = sql """show column stats uni_mow""" + assertEquals(11, result.size()) + sql """drop stats uni_mow""" + result = sql """show column stats uni_mow""" + assertEquals(0, result.size()) + sql """analyze table uni_mow with sync with sample rows 400000""" + result = sql """show column stats uni_mow""" + assertEquals(11, result.size()) + sql """drop stats uni_mow""" + result = sql """show column stats uni_mow""" + assertEquals(0, result.size()) + + // Test PREAGGOPEN hint. + explain { + sql("SELECT CONCAT(1744255158798, '-', 1744255158812, '-', 'mv_key2') AS `id`, 0 AS `catalog_id`, 1744255158742 AS `db_id`, 1744255158798 AS `tbl_id`, 1744255158812 AS `idx_id`, 'mv_key2' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key2`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key2` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`uni_mor` index `mv1` ) as t /*+PREAGGOPEN*/") + contains "PREAGGREGATION: ON" + notContains "PREAGGREGATION: OFF" + } + explain { + sql("SELECT CONCAT(1744255158798, '-', 1744255158812, '-', 'mv_key2') AS `id`, 0 AS `catalog_id`, 1744255158742 AS `db_id`, 1744255158798 AS `tbl_id`, 1744255158812 AS `idx_id`, 'mv_key2' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key2`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key2` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`uni_mor` index `mv1` ) as t") + contains "PREAGGREGATION: OFF" + notContains "PREAGGREGATION: ON" + } + explain { + sql("SELECT CONCAT(1744255159211, '-', 1744255159224, '-', 'mv_key1') AS `id`, 0 AS `catalog_id`, 1744255159182 AS `db_id`, 1744255159211 AS `tbl_id`, 1744255159224 AS `idx_id`, 'mv_key1' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key1`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key1` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('1001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`agg` index `mv3` ) as t /*+PREAGGOPEN*/") + contains "PREAGGREGATION: ON" + notContains "PREAGGREGATION: OFF" + } + explain { + sql("SELECT CONCAT(1744255159211, '-', 1744255159224, '-', 'mv_key1') AS `id`, 0 AS `catalog_id`, 1744255159182 AS `db_id`, 1744255159211 AS `tbl_id`, 1744255159224 AS `idx_id`, 'mv_key1' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key1`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key1` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('1001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_size`, NOW() FROM ( SELECT * FROM `internal`.`test_skip_agg_table_value_column`.`agg` index `mv3` ) as t") + contains "PREAGGREGATION: OFF" + notContains "PREAGGREGATION: ON" + } + + sql """drop database if exists test_skip_agg_table_value_column""" +}