From c54dddf371a44a163f6442f1fbfc178f3fbecdc4 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 23 Feb 2022 18:43:43 +0800 Subject: [PATCH 1/2] cherry pick #32370 to release-5.2 Signed-off-by: ti-srebot --- expression/util.go | 2 +- planner/core/rule_decorrelate.go | 16 ++++++++++++++++ planner/core/rule_partition_processor.go | 2 +- .../core/testdata/plan_suite_unexported_in.json | 3 ++- .../core/testdata/plan_suite_unexported_out.json | 3 ++- util/ranger/detacher.go | 2 +- 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/expression/util.go b/expression/util.go index 56d526495638f..fa2fdb27d5383 100644 --- a/expression/util.go +++ b/expression/util.go @@ -165,7 +165,7 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool } // ExtractColumnSet extracts the different values of `UniqueId` for columns in expressions. -func ExtractColumnSet(exprs []Expression) *intsets.Sparse { +func ExtractColumnSet(exprs ...Expression) *intsets.Sparse { set := &intsets.Sparse{} for _, expr := range exprs { extractColumnSet(expr, set) diff --git a/planner/core/rule_decorrelate.go b/planner/core/rule_decorrelate.go index 1e97d6ff21454..b77aa7c57b76e 100644 --- a/planner/core/rule_decorrelate.go +++ b/planner/core/rule_decorrelate.go @@ -146,6 +146,21 @@ func (s *decorrelateSolver) optimize(ctx context.Context, p LogicalPlan) (Logica return s.optimize(ctx, p) } } else if proj, ok := innerPlan.(*LogicalProjection); ok { + allConst := true + for _, expr := range proj.Exprs { + if len(expression.ExtractCorColumns(expr)) > 0 || !expression.ExtractColumnSet(expr).IsEmpty() { + allConst = false + break + } + } + if allConst && apply.JoinType == LeftOuterJoin { + // If the projection just references some constant. We cannot directly pull it up when the APPLY is an outer join. + // e.g. select (select 1 from t1 where t1.a=t2.a) from t2; When the t1.a=t2.a is false the join's output is NULL. + // But if we pull the projection upon the APPLY. It will return 1 since the projection is evaluated after the join. + // We disable the decorrelation directly for now. + // TODO: Actually, it can be optimized. We need to first push the projection down to the selection. And then the APPLY can be decorrelated. + goto NoOptimize + } for i, expr := range proj.Exprs { proj.Exprs[i] = expr.Decorrelate(outerPlan.Schema()) } @@ -296,6 +311,7 @@ func (s *decorrelateSolver) optimize(ctx context.Context, p LogicalPlan) (Logica return s.optimize(ctx, p) } } +NoOptimize: newChildren := make([]LogicalPlan, 0, len(p.Children())) for _, child := range p.Children() { np, err := s.optimize(ctx, child) diff --git a/planner/core/rule_partition_processor.go b/planner/core/rule_partition_processor.go index d398a239bb408..fad73b504d2d3 100644 --- a/planner/core/rule_partition_processor.go +++ b/planner/core/rule_partition_processor.go @@ -920,7 +920,7 @@ func makePartitionByFnCol(sctx sessionctx.Context, columns []*expression.Column, monotonous = getMonotoneMode(raw.FuncName.L) // Check the partitionExpr is in the form: fn(col, ...) // There should be only one column argument, and it should be the first parameter. - if expression.ExtractColumnSet(args).Len() == 1 { + if expression.ExtractColumnSet(args...).Len() == 1 { if col1, ok := args[0].(*expression.Column); ok { col = col1 } diff --git a/planner/core/testdata/plan_suite_unexported_in.json b/planner/core/testdata/plan_suite_unexported_in.json index f32d4d4ab8123..7dd07d08df3a2 100644 --- a/planner/core/testdata/plan_suite_unexported_in.json +++ b/planner/core/testdata/plan_suite_unexported_in.json @@ -131,7 +131,8 @@ "select t1.b from t t1 where t1.b in (select t2.b from t t2 where t2.a = t1.a order by t2.a)", "select t1.b from t t1 where exists(select t2.b from t t2 where t2.a = t1.a order by t2.a)", // `Sort` will not be eliminated, if it is not the top level operator. - "select t1.b from t t1 where t1.b = (select t2.b from t t2 where t2.a = t1.a order by t2.a limit 1)" + "select t1.b from t t1 where t1.b = (select t2.b from t t2 where t2.a = t1.a order by t2.a limit 1)", + "select (select 1 from t t1 where t1.a = t2.a) from t t2" ] }, { diff --git a/planner/core/testdata/plan_suite_unexported_out.json b/planner/core/testdata/plan_suite_unexported_out.json index 6401df6aa9264..c228901827259 100644 --- a/planner/core/testdata/plan_suite_unexported_out.json +++ b/planner/core/testdata/plan_suite_unexported_out.json @@ -117,7 +117,8 @@ "Join{DataScan(t1)->DataScan(t2)->Aggr(max(test.t.a),firstrow(test.t.b))}(test.t.b,test.t.b)->Projection->Sel([eq(test.t.b, Column#25)])->Projection", "Join{DataScan(t1)->DataScan(t2)}(test.t.a,test.t.a)(test.t.b,test.t.b)->Projection", "Join{DataScan(t1)->DataScan(t2)}(test.t.a,test.t.a)->Projection", - "Apply{DataScan(t1)->DataScan(t2)->Sel([eq(test.t.a, test.t.a)])->Projection->Sort->Limit}->Projection->Sel([eq(test.t.b, test.t.b)])->Projection" + "Apply{DataScan(t1)->DataScan(t2)->Sel([eq(test.t.a, test.t.a)])->Projection->Sort->Limit}->Projection->Sel([eq(test.t.b, test.t.b)])->Projection", + "Apply{DataScan(t2)->DataScan(t1)->Sel([eq(test.t.a, test.t.a)])->Projection}->Projection" ] }, { diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 6a07c531275cf..6eb47682f3726 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -196,7 +196,7 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E offset := int(-1) for i, cond := range conds { tmpConds := []expression.Expression{cond} - colSets := expression.ExtractColumnSet(tmpConds) + colSets := expression.ExtractColumnSet(cond) if colSets.Len() == 0 { continue } From 4e6c3836df1969a53fa633d091b298c51c2cea54 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Fri, 15 Apr 2022 14:18:54 +0800 Subject: [PATCH 2/2] fix conflicts --- planner/core/find_best_task.go | 6 +++--- planner/core/stats.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 807b668f91a30..b8b35e232dedc 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -542,15 +542,15 @@ func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.Physical func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { candidate := &candidatePath{path: path} candidate.isMatchProp = ds.isMatchProp(path, prop) - candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) + candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds...) return candidate } func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { candidate := &candidatePath{path: path} candidate.isMatchProp = ds.isMatchProp(path, prop) - candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds) - candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters) + candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds...) + candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters...) return candidate } diff --git a/planner/core/stats.go b/planner/core/stats.go index 3a21a6f14d904..1f73d1c406797 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -298,7 +298,7 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error { if selected == nil && len(uniqueIdxsWithDoubleScan) > 0 { uniqueIdxColumnSets := make([]*intsets.Sparse, 0, len(uniqueIdxsWithDoubleScan)) for _, uniqueIdx := range uniqueIdxsWithDoubleScan { - uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds)) + uniqueIdxColumnSets = append(uniqueIdxColumnSets, expression.ExtractColumnSet(uniqueIdx.AccessConds...)) // Find the unique index with the minimal number of ranges as `uniqueBest`. if uniqueBest == nil || len(uniqueIdx.Ranges) < len(uniqueBest.Ranges) { uniqueBest = uniqueIdx @@ -313,7 +313,7 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error { // Hence, for each index in `singleScanIdxs`, we check whether it is better than some index in `uniqueIdxsWithDoubleScan`. // If yes, the index is a refined one. We find the refined index with the minimal number of ranges as `refineBest`. for _, singleScanIdx := range singleScanIdxs { - columnSet := expression.ExtractColumnSet(singleScanIdx.AccessConds) + columnSet := expression.ExtractColumnSet(singleScanIdx.AccessConds...) for _, uniqueIdxColumnSet := range uniqueIdxColumnSets { setsResult, comparable := compareColumnSet(columnSet, uniqueIdxColumnSet) if comparable && setsResult == 1 {