From d31846794f84af69d7b6b171083eda75db30ea6c Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 23 Feb 2022 18:43:43 +0800 Subject: [PATCH] cherry pick #32370 to release-4.0 Signed-off-by: ti-srebot --- expression/util.go | 2 +- planner/core/rule_decorrelate.go | 16 ++++ planner/core/rule_partition_processor.go | 10 +++ .../testdata/plan_suite_unexported_in.json | 3 +- .../testdata/plan_suite_unexported_out.json | 3 +- util/ranger/detacher.go | 79 +++++++++++++++++++ 6 files changed, 110 insertions(+), 3 deletions(-) diff --git a/expression/util.go b/expression/util.go index f23c518cd9427..f8fd93f6b727e 100644 --- a/expression/util.go +++ b/expression/util.go @@ -162,7 +162,7 @@ func extractColumns(result []*Column, expr Expression, filter func(*Column) bool } // ExtractColumnSet extracts the different values of `UniqueId` for columns in expressions. -func ExtractColumnSet(exprs []Expression) *intsets.Sparse { +func ExtractColumnSet(exprs ...Expression) *intsets.Sparse { set := &intsets.Sparse{} for _, expr := range exprs { extractColumnSet(expr, set) diff --git a/planner/core/rule_decorrelate.go b/planner/core/rule_decorrelate.go index db19ec99d55bd..ffc15918d0950 100644 --- a/planner/core/rule_decorrelate.go +++ b/planner/core/rule_decorrelate.go @@ -136,6 +136,21 @@ func (s *decorrelateSolver) optimize(ctx context.Context, p LogicalPlan) (Logica return s.optimize(ctx, p) } } else if proj, ok := innerPlan.(*LogicalProjection); ok { + allConst := true + for _, expr := range proj.Exprs { + if len(expression.ExtractCorColumns(expr)) > 0 || !expression.ExtractColumnSet(expr).IsEmpty() { + allConst = false + break + } + } + if allConst && apply.JoinType == LeftOuterJoin { + // If the projection just references some constant. We cannot directly pull it up when the APPLY is an outer join. + // e.g. select (select 1 from t1 where t1.a=t2.a) from t2; When the t1.a=t2.a is false the join's output is NULL. + // But if we pull the projection upon the APPLY. It will return 1 since the projection is evaluated after the join. + // We disable the decorrelation directly for now. + // TODO: Actually, it can be optimized. We need to first push the projection down to the selection. And then the APPLY can be decorrelated. + goto NoOptimize + } for i, expr := range proj.Exprs { proj.Exprs[i] = expr.Decorrelate(outerPlan.Schema()) } @@ -288,6 +303,7 @@ func (s *decorrelateSolver) optimize(ctx context.Context, p LogicalPlan) (Logica return s.optimize(ctx, p) } } +NoOptimize: newChildren := make([]LogicalPlan, 0, len(p.Children())) for _, child := range p.Children() { np, err := s.optimize(ctx, child) diff --git a/planner/core/rule_partition_processor.go b/planner/core/rule_partition_processor.go index ccb0e7fc76419..5f1a319018d88 100644 --- a/planner/core/rule_partition_processor.go +++ b/planner/core/rule_partition_processor.go @@ -395,6 +395,7 @@ func makePartitionByFnCol(sctx sessionctx.Context, columns []*expression.Column, } } +<<<<<<< HEAD if _, ok := monotoneIncFuncs[raw.FuncName.L]; ok { fn = raw args := fn.GetArgs() @@ -405,6 +406,15 @@ func makePartitionByFnCol(sctx sessionctx.Context, columns []*expression.Column, col = c } } +======= + fn = raw + monotonous = getMonotoneMode(raw.FuncName.L) + // Check the partitionExpr is in the form: fn(col, ...) + // There should be only one column argument, and it should be the first parameter. + if expression.ExtractColumnSet(args...).Len() == 1 { + if col1, ok := args[0].(*expression.Column); ok { + col = col1 +>>>>>>> 991132080... planner: don't decorrelate the APPLY when the inner's projection reference no column (#32370) } } case *expression.Column: diff --git a/planner/core/testdata/plan_suite_unexported_in.json b/planner/core/testdata/plan_suite_unexported_in.json index c9cce58b23ffe..f35d30c182ed5 100644 --- a/planner/core/testdata/plan_suite_unexported_in.json +++ b/planner/core/testdata/plan_suite_unexported_in.json @@ -131,7 +131,8 @@ "select t1.b from t t1 where t1.b in (select t2.b from t t2 where t2.a = t1.a order by t2.a)", "select t1.b from t t1 where exists(select t2.b from t t2 where t2.a = t1.a order by t2.a)", // `Sort` will not be eliminated, if it is not the top level operator. - "select t1.b from t t1 where t1.b = (select t2.b from t t2 where t2.a = t1.a order by t2.a limit 1)" + "select t1.b from t t1 where t1.b = (select t2.b from t t2 where t2.a = t1.a order by t2.a limit 1)", + "select (select 1 from t t1 where t1.a = t2.a) from t t2" ] }, { diff --git a/planner/core/testdata/plan_suite_unexported_out.json b/planner/core/testdata/plan_suite_unexported_out.json index c75a8921a66d7..a979d79fc4f6c 100644 --- a/planner/core/testdata/plan_suite_unexported_out.json +++ b/planner/core/testdata/plan_suite_unexported_out.json @@ -117,7 +117,8 @@ "Join{DataScan(t1)->DataScan(t2)->Aggr(max(test.t.a),firstrow(test.t.b))}(test.t.b,test.t.b)->Projection->Sel([eq(test.t.b, Column#25)])->Projection", "Join{DataScan(t1)->DataScan(t2)}(test.t.a,test.t.a)(test.t.b,test.t.b)->Projection", "Join{DataScan(t1)->DataScan(t2)}(test.t.a,test.t.a)->Projection", - "Apply{DataScan(t1)->DataScan(t2)->Sel([eq(test.t.a, test.t.a)])->Projection->Sort->Limit}->Projection->Sel([eq(test.t.b, test.t.b)])->Projection" + "Apply{DataScan(t1)->DataScan(t2)->Sel([eq(test.t.a, test.t.a)])->Projection->Sort->Limit}->Projection->Sel([eq(test.t.b, test.t.b)])->Projection", + "Apply{DataScan(t2)->DataScan(t1)->Sel([eq(test.t.a, test.t.a)])->Projection}->Projection" ] }, { diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 8b1f6f696fa3c..50e02ccbe1958 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -145,6 +145,85 @@ func getEqOrInColOffset(expr expression.Expression, cols []*expression.Column) i return -1 } +<<<<<<< HEAD +======= +// extractIndexPointRangesForCNF extracts a CNF item from the input CNF expressions, such that the CNF item +// is totally composed of point range filters. +// e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2)) +// ((a,b,c) in (1,1,1),(2,2,2)) would be extracted. +func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, lengths []int) (*DetachRangeResult, int, []*valueInfo, error) { + if len(conds) < 2 { + return nil, -1, nil, nil + } + var r *DetachRangeResult + columnValues := make([]*valueInfo, len(cols)) + maxNumCols := int(0) + offset := int(-1) + for i, cond := range conds { + tmpConds := []expression.Expression{cond} + colSets := expression.ExtractColumnSet(cond) + if colSets.Len() == 0 { + continue + } + res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, cols, lengths) + if err != nil { + return nil, -1, nil, err + } + if len(res.Ranges) == 0 { + return &DetachRangeResult{}, -1, nil, nil + } + // take the union of the two columnValues + columnValues = unionColumnValues(columnValues, res.ColumnValues) + if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 { + continue + } + sameLens, allPoints := true, true + numCols := int(0) + for j, ran := range res.Ranges { + if !ran.IsPoint(sctx) { + allPoints = false + break + } + if j == 0 { + numCols = len(ran.LowVal) + } else if numCols != len(ran.LowVal) { + sameLens = false + break + } + } + if !allPoints || !sameLens { + continue + } + if numCols > maxNumCols { + r = res + offset = i + maxNumCols = numCols + } + } + if r != nil { + r.IsDNFCond = false + } + return r, offset, columnValues, nil +} + +func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo { + if lhs == nil { + return rhs + } + if rhs != nil { + for i, valInfo := range lhs { + if i >= len(rhs) { + break + } + if valInfo == nil && rhs[i] != nil { + lhs[i] = rhs[i] + } + } + } + return lhs +} + +>>>>>>> 991132080... planner: don't decorrelate the APPLY when the inner's projection reference no column (#32370) // detachCNFCondAndBuildRangeForIndex will detach the index filters from table filters. These conditions are connected with `and` // It will first find the point query column and then extract the range query column. // considerDNF is true means it will try to extract access conditions from the DNF expressions.