Skip to content

Commit

Permalink
planner: support trace for min/max eliminate (#30441)
Browse files Browse the repository at this point in the history
* support min/max trace

Signed-off-by: yisaer <disxiaofei@163.com>

* address the comment

Signed-off-by: yisaer <disxiaofei@163.com>

Co-authored-by: Ti Chi Robot <ti-community-prow-bot@tidb.io>
  • Loading branch information
Yisaer and ti-chi-bot authored Dec 10, 2021
1 parent ed384b2 commit 828c1dd
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 12 deletions.
30 changes: 30 additions & 0 deletions planner/core/logical_plan_trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,36 @@ func (s *testPlanSuite) TestSingleRuleTraceStep(c *C) {
},
},
},
{
sql: "select max(a)-min(a) from t",
flags: []uint64{flagBuildKeyInfo, flagPrunColumns, flagMaxMinEliminate},
assertRuleName: "max_min_eliminate",
assertRuleSteps: []assertTraceStep{
{
assertAction: "add sort[8],add limit[9] during eliminating agg[4] max function",
assertReason: "agg[4] has only one function[max] without group by, the columns in agg[4] should be sorted",
},
{
assertAction: "add sort[10],add limit[11] during eliminating agg[6] min function",
assertReason: "agg[6] has only one function[min] without group by, the columns in agg[6] should be sorted",
},
{
assertAction: "agg[2] splited into aggs[4,6], and add joins[12] to connect them during eliminating agg[2] multi min/max functions",
assertReason: "each column is sorted and can benefit from index/primary key in agg[4,6] and none of them has group by clause",
},
},
},
{
sql: "select max(e) from t",
flags: []uint64{flagBuildKeyInfo, flagPrunColumns, flagMaxMinEliminate},
assertRuleName: "max_min_eliminate",
assertRuleSteps: []assertTraceStep{
{
assertAction: "add selection[4],add sort[5],add limit[6] during eliminating agg[2] max function",
assertReason: "agg[2] has only one function[max] without group by, the columns in agg[2] shouldn't be NULL and needs NULL to be filtered out, the columns in agg[2] should be sorted",
},
},
},
}

for i, tc := range tt {
Expand Down
90 changes: 78 additions & 12 deletions planner/core/rule_max_min_eliminate.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
package core

import (
"bytes"
"context"
"fmt"

"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
Expand All @@ -35,20 +37,23 @@ type maxMinEliminator struct {
}

func (a *maxMinEliminator) optimize(ctx context.Context, p LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, error) {
return a.eliminateMaxMin(p), nil
return a.eliminateMaxMin(p, opt), nil
}

// composeAggsByInnerJoin composes the scalar aggregations by cartesianJoin.
func (a *maxMinEliminator) composeAggsByInnerJoin(aggs []*LogicalAggregation) (plan LogicalPlan) {
func (a *maxMinEliminator) composeAggsByInnerJoin(originAgg *LogicalAggregation, aggs []*LogicalAggregation, opt *logicalOptimizeOp) (plan LogicalPlan) {
plan = aggs[0]
sctx := plan.SCtx()
joins := make([]*LogicalJoin, 0)
for i := 1; i < len(aggs); i++ {
join := LogicalJoin{JoinType: InnerJoin}.Init(sctx, plan.SelectBlockOffset())
join.SetChildren(plan, aggs[i])
join.schema = buildLogicalJoinSchema(InnerJoin, join)
join.cartesianJoin = true
plan = join
joins = append(joins, join)
}
appendEliminateMultiMinMaxTraceStep(originAgg, aggs, joins, opt)
return
}

Expand Down Expand Up @@ -132,7 +137,7 @@ func (a *maxMinEliminator) cloneSubPlans(plan LogicalPlan) LogicalPlan {
// `select max(a) from t` + `select min(a) from t` + `select max(b) from t`.
// Then we check whether `a` and `b` have indices. If any of the used column has no index, we cannot eliminate
// this aggregation.
func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation) (aggs []*LogicalAggregation, canEliminate bool) {
func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation, opt *logicalOptimizeOp) (aggs []*LogicalAggregation, canEliminate bool) {
for _, f := range agg.AggFuncs {
// We must make sure the args of max/min is a simple single column.
col, ok := f.Args[0].(*expression.Column)
Expand All @@ -158,16 +163,18 @@ func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation)
}

// eliminateSingleMaxMin tries to convert a single max/min to Limit+Sort operators.
func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation) *LogicalAggregation {
func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation, opt *logicalOptimizeOp) *LogicalAggregation {
f := agg.AggFuncs[0]
child := agg.Children()[0]
ctx := agg.SCtx()

var sel *LogicalSelection
var sort *LogicalSort
// If there's no column in f.GetArgs()[0], we still need limit and read data from real table because the result should be NULL if the input is empty.
if len(expression.ExtractColumns(f.Args[0])) > 0 {
// If it can be NULL, we need to filter NULL out first.
if !mysql.HasNotNullFlag(f.Args[0].GetType().Flag) {
sel := LogicalSelection{}.Init(ctx, agg.blockOffset)
sel = LogicalSelection{}.Init(ctx, agg.blockOffset)
isNullFunc := expression.NewFunctionInternal(ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), f.Args[0])
notNullFunc := expression.NewFunctionInternal(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), isNullFunc)
sel.Conditions = []expression.Expression{notNullFunc}
Expand All @@ -179,7 +186,7 @@ func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation) *Logic
// For max function, the sort order should be desc.
desc := f.Name == ast.AggFuncMax
// Compose Sort operator.
sort := LogicalSort{}.Init(ctx, agg.blockOffset)
sort = LogicalSort{}.Init(ctx, agg.blockOffset)
sort.ByItems = append(sort.ByItems, &util.ByItems{Expr: f.Args[0], Desc: desc})
sort.SetChildren(child)
child = sort
Expand All @@ -192,14 +199,15 @@ func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation) *Logic
// If no data in the child, we need to return NULL instead of empty. This cannot be done by sort and limit themselves.
// Since now there would be at most one row returned, the remained agg operator is not expensive anymore.
agg.SetChildren(li)
appendEliminateSingleMaxMinTrace(agg, sel, sort, li, opt)
return agg
}

// eliminateMaxMin tries to convert max/min to Limit+Sort operators.
func (a *maxMinEliminator) eliminateMaxMin(p LogicalPlan) LogicalPlan {
func (a *maxMinEliminator) eliminateMaxMin(p LogicalPlan, opt *logicalOptimizeOp) LogicalPlan {
newChildren := make([]LogicalPlan, 0, len(p.Children()))
for _, child := range p.Children() {
newChildren = append(newChildren, a.eliminateMaxMin(child))
newChildren = append(newChildren, a.eliminateMaxMin(child, opt))
}
p.SetChildren(newChildren...)
if agg, ok := p.(*LogicalAggregation); ok {
Expand All @@ -222,22 +230,80 @@ func (a *maxMinEliminator) eliminateMaxMin(p LogicalPlan) LogicalPlan {
if len(agg.AggFuncs) == 1 {
// If there is only one aggFunc, we don't need to guarantee that the child of it is a data
// source, or whether the sort can be eliminated. This transformation won't be worse than previous.
return a.eliminateSingleMaxMin(agg)
return a.eliminateSingleMaxMin(agg, opt)
}
// If we have more than one aggFunc, we can eliminate this agg only if all of the aggFuncs can benefit from
// their column's index.
aggs, canEliminate := a.splitAggFuncAndCheckIndices(agg)
aggs, canEliminate := a.splitAggFuncAndCheckIndices(agg, opt)
if !canEliminate {
return agg
}
for i := range aggs {
aggs[i] = a.eliminateSingleMaxMin(aggs[i])
aggs[i] = a.eliminateSingleMaxMin(aggs[i], opt)
}
return a.composeAggsByInnerJoin(aggs)
return a.composeAggsByInnerJoin(agg, aggs, opt)
}
return p
}

func (*maxMinEliminator) name() string {
return "max_min_eliminate"
}

func appendEliminateSingleMaxMinTrace(agg *LogicalAggregation, sel *LogicalSelection, sort *LogicalSort, limit *LogicalLimit, opt *logicalOptimizeOp) {
action := func() string {
buffer := bytes.NewBufferString("")
if sel != nil {
buffer.WriteString(fmt.Sprintf("add selection[%v],", sel.ID()))
}
if sort != nil {
buffer.WriteString(fmt.Sprintf("add sort[%v],", sort.ID()))
}
buffer.WriteString(fmt.Sprintf("add limit[%v] during eliminating agg[%v] %s function", limit.ID(), agg.ID(), agg.AggFuncs[0].Name))
return buffer.String()
}()
reason := func() string {
buffer := bytes.NewBufferString(fmt.Sprintf("agg[%v] has only one function[%s] without group by", agg.ID(), agg.AggFuncs[0].Name))
if sel != nil {
buffer.WriteString(fmt.Sprintf(", the columns in agg[%v] shouldn't be NULL and needs NULL to be filtered out", agg.ID()))
}
if sort != nil {
buffer.WriteString(fmt.Sprintf(", the columns in agg[%v] should be sorted", agg.ID()))
}
return buffer.String()
}()
opt.appendStepToCurrent(agg.ID(), agg.TP(), reason, action)
}

func appendEliminateMultiMinMaxTraceStep(originAgg *LogicalAggregation, aggs []*LogicalAggregation, joins []*LogicalJoin, opt *logicalOptimizeOp) {
action := func() string {
buffer := bytes.NewBufferString(fmt.Sprintf("agg[%v] splited into aggs[", originAgg.ID()))
for i, agg := range aggs {
if i > 0 {
buffer.WriteString(",")
}
buffer.WriteString(fmt.Sprintf("%v", agg.ID()))
}
buffer.WriteString("], and add joins[")
for i, join := range joins {
if i > 0 {
buffer.WriteString(",")
}
buffer.WriteString(fmt.Sprintf("%v", join.ID()))
}
buffer.WriteString(fmt.Sprintf("] to connect them during eliminating agg[%v] multi min/max functions", originAgg.ID()))
return buffer.String()
}()
reason := func() string {
buffer := bytes.NewBufferString("each column is sorted and can benefit from index/primary key in agg[")
for i, agg := range aggs {
if i > 0 {
buffer.WriteString(",")
}
buffer.WriteString(fmt.Sprintf("%v", agg.ID()))
}
buffer.WriteString("] and none of them has group by clause")
return buffer.String()
}()
opt.appendStepToCurrent(originAgg.ID(), originAgg.TP(), reason, action)
}

0 comments on commit 828c1dd

Please sign in to comment.