From 3eb3464e3173b5ec2fec5a4c6393b9976577f440 Mon Sep 17 00:00:00 2001 From: yisaer Date: Mon, 6 Dec 2021 18:08:43 +0800 Subject: [PATCH 1/2] support min/max trace Signed-off-by: yisaer --- planner/core/logical_plan_trace_test.go | 30 +++++++++ planner/core/rule_max_min_eliminate.go | 90 +++++++++++++++++++++---- 2 files changed, 108 insertions(+), 12 deletions(-) diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go index a16f0111677d0..b45d27ee4c83d 100644 --- a/planner/core/logical_plan_trace_test.go +++ b/planner/core/logical_plan_trace_test.go @@ -142,6 +142,36 @@ func (s *testPlanSuite) TestSingleRuleTraceStep(c *C) { }, }, }, + { + sql: "select max(a)-min(a) from t", + flags: []uint64{flagBuildKeyInfo, flagPrunColumns, flagMaxMinEliminate}, + assertRuleName: "max_min_eliminate", + assertRuleSteps: []assertTraceStep{ + { + assertAction: "add sort[8],add limit[9] during eliminate agg[4] max function", + assertReason: "agg[4] has only one function[max] without group by, the columns in agg[4] should be sorted", + }, + { + assertAction: "add sort[10],add limit[11] during eliminate agg[6] min function", + assertReason: "agg[6] has only one function[min] without group by, the columns in agg[6] should be sorted", + }, + { + assertAction: "agg[2] splited into aggs[4,6], and add joins[12] as their parent during eliminate agg[2] multi min/max functions", + assertReason: "each column is sorted and has index in agg[4,6] and none of them has group by statement", + }, + }, + }, + { + sql: "select max(f) from t", + flags: []uint64{flagBuildKeyInfo, flagPrunColumns, flagMaxMinEliminate}, + assertRuleName: "max_min_eliminate", + assertRuleSteps: []assertTraceStep{ + { + assertAction: "add sort[4],add limit[5] during eliminate agg[2] max function", + assertReason: "agg[2] has only one function[max] without group by, the columns in agg[2] should be sorted", + }, + }, + }, } for i, tc := range tt { diff --git a/planner/core/rule_max_min_eliminate.go b/planner/core/rule_max_min_eliminate.go index efad9c9296459..769cde438b044 100644 --- a/planner/core/rule_max_min_eliminate.go +++ b/planner/core/rule_max_min_eliminate.go @@ -15,7 +15,9 @@ package core import ( + "bytes" "context" + "fmt" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" @@ -35,20 +37,23 @@ type maxMinEliminator struct { } func (a *maxMinEliminator) optimize(ctx context.Context, p LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, error) { - return a.eliminateMaxMin(p), nil + return a.eliminateMaxMin(p, opt), nil } // composeAggsByInnerJoin composes the scalar aggregations by cartesianJoin. -func (a *maxMinEliminator) composeAggsByInnerJoin(aggs []*LogicalAggregation) (plan LogicalPlan) { +func (a *maxMinEliminator) composeAggsByInnerJoin(originAgg *LogicalAggregation, aggs []*LogicalAggregation, opt *logicalOptimizeOp) (plan LogicalPlan) { plan = aggs[0] sctx := plan.SCtx() + joins := make([]*LogicalJoin, 0) for i := 1; i < len(aggs); i++ { join := LogicalJoin{JoinType: InnerJoin}.Init(sctx, plan.SelectBlockOffset()) join.SetChildren(plan, aggs[i]) join.schema = buildLogicalJoinSchema(InnerJoin, join) join.cartesianJoin = true plan = join + joins = append(joins, join) } + appendEliminateMultiMinMaxTraceStep(originAgg, aggs, joins, opt) return } @@ -132,7 +137,7 @@ func (a *maxMinEliminator) cloneSubPlans(plan LogicalPlan) LogicalPlan { // `select max(a) from t` + `select min(a) from t` + `select max(b) from t`. // Then we check whether `a` and `b` have indices. If any of the used column has no index, we cannot eliminate // this aggregation. -func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation) (aggs []*LogicalAggregation, canEliminate bool) { +func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation, opt *logicalOptimizeOp) (aggs []*LogicalAggregation, canEliminate bool) { for _, f := range agg.AggFuncs { // We must make sure the args of max/min is a simple single column. col, ok := f.Args[0].(*expression.Column) @@ -158,16 +163,18 @@ func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation) } // eliminateSingleMaxMin tries to convert a single max/min to Limit+Sort operators. -func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation) *LogicalAggregation { +func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation, opt *logicalOptimizeOp) *LogicalAggregation { f := agg.AggFuncs[0] child := agg.Children()[0] ctx := agg.SCtx() + var sel *LogicalSelection + var sort *LogicalSort // If there's no column in f.GetArgs()[0], we still need limit and read data from real table because the result should be NULL if the input is empty. if len(expression.ExtractColumns(f.Args[0])) > 0 { // If it can be NULL, we need to filter NULL out first. if !mysql.HasNotNullFlag(f.Args[0].GetType().Flag) { - sel := LogicalSelection{}.Init(ctx, agg.blockOffset) + sel = LogicalSelection{}.Init(ctx, agg.blockOffset) isNullFunc := expression.NewFunctionInternal(ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), f.Args[0]) notNullFunc := expression.NewFunctionInternal(ctx, ast.UnaryNot, types.NewFieldType(mysql.TypeTiny), isNullFunc) sel.Conditions = []expression.Expression{notNullFunc} @@ -179,7 +186,7 @@ func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation) *Logic // For max function, the sort order should be desc. desc := f.Name == ast.AggFuncMax // Compose Sort operator. - sort := LogicalSort{}.Init(ctx, agg.blockOffset) + sort = LogicalSort{}.Init(ctx, agg.blockOffset) sort.ByItems = append(sort.ByItems, &util.ByItems{Expr: f.Args[0], Desc: desc}) sort.SetChildren(child) child = sort @@ -192,14 +199,15 @@ func (a *maxMinEliminator) eliminateSingleMaxMin(agg *LogicalAggregation) *Logic // If no data in the child, we need to return NULL instead of empty. This cannot be done by sort and limit themselves. // Since now there would be at most one row returned, the remained agg operator is not expensive anymore. agg.SetChildren(li) + appendEliminateSingleMaxMinTrace(agg, sel, sort, li, opt) return agg } // eliminateMaxMin tries to convert max/min to Limit+Sort operators. -func (a *maxMinEliminator) eliminateMaxMin(p LogicalPlan) LogicalPlan { +func (a *maxMinEliminator) eliminateMaxMin(p LogicalPlan, opt *logicalOptimizeOp) LogicalPlan { newChildren := make([]LogicalPlan, 0, len(p.Children())) for _, child := range p.Children() { - newChildren = append(newChildren, a.eliminateMaxMin(child)) + newChildren = append(newChildren, a.eliminateMaxMin(child, opt)) } p.SetChildren(newChildren...) if agg, ok := p.(*LogicalAggregation); ok { @@ -222,18 +230,18 @@ func (a *maxMinEliminator) eliminateMaxMin(p LogicalPlan) LogicalPlan { if len(agg.AggFuncs) == 1 { // If there is only one aggFunc, we don't need to guarantee that the child of it is a data // source, or whether the sort can be eliminated. This transformation won't be worse than previous. - return a.eliminateSingleMaxMin(agg) + return a.eliminateSingleMaxMin(agg, opt) } // If we have more than one aggFunc, we can eliminate this agg only if all of the aggFuncs can benefit from // their column's index. - aggs, canEliminate := a.splitAggFuncAndCheckIndices(agg) + aggs, canEliminate := a.splitAggFuncAndCheckIndices(agg, opt) if !canEliminate { return agg } for i := range aggs { - aggs[i] = a.eliminateSingleMaxMin(aggs[i]) + aggs[i] = a.eliminateSingleMaxMin(aggs[i], opt) } - return a.composeAggsByInnerJoin(aggs) + return a.composeAggsByInnerJoin(agg, aggs, opt) } return p } @@ -241,3 +249,61 @@ func (a *maxMinEliminator) eliminateMaxMin(p LogicalPlan) LogicalPlan { func (*maxMinEliminator) name() string { return "max_min_eliminate" } + +func appendEliminateSingleMaxMinTrace(agg *LogicalAggregation, sel *LogicalSelection, sort *LogicalSort, limit *LogicalLimit, opt *logicalOptimizeOp) { + action := func() string { + buffer := bytes.NewBufferString("") + if sel != nil { + buffer.WriteString(fmt.Sprintf("add selection[%v],", sel.ID())) + } + if sort != nil { + buffer.WriteString(fmt.Sprintf("add sort[%v],", sort.ID())) + } + buffer.WriteString(fmt.Sprintf("add limit[%v] during eliminate agg[%v] %s function", limit.ID(), agg.ID(), agg.AggFuncs[0].Name)) + return buffer.String() + }() + reason := func() string { + buffer := bytes.NewBufferString(fmt.Sprintf("agg[%v] has only one function[%s] without group by", agg.ID(), agg.AggFuncs[0].Name)) + if sel != nil { + buffer.WriteString(fmt.Sprintf(", the columns in agg[%v] shouldn't be NULL and needs to be filer NULL out", agg.ID())) + } + if sort != nil { + buffer.WriteString(fmt.Sprintf(", the columns in agg[%v] should be sorted", agg.ID())) + } + return buffer.String() + }() + opt.appendStepToCurrent(agg.ID(), agg.TP(), reason, action) +} + +func appendEliminateMultiMinMaxTraceStep(originAgg *LogicalAggregation, aggs []*LogicalAggregation, joins []*LogicalJoin, opt *logicalOptimizeOp) { + action := func() string { + buffer := bytes.NewBufferString(fmt.Sprintf("agg[%v] splited into aggs[", originAgg.ID())) + for i, agg := range aggs { + if i > 0 { + buffer.WriteString(",") + } + buffer.WriteString(fmt.Sprintf("%v", agg.ID())) + } + buffer.WriteString("], and add joins[") + for i, join := range joins { + if i > 0 { + buffer.WriteString(",") + } + buffer.WriteString(fmt.Sprintf("%v", join.ID())) + } + buffer.WriteString(fmt.Sprintf("] as their parent during eliminate agg[%v] multi min/max functions", originAgg.ID())) + return buffer.String() + }() + reason := func() string { + buffer := bytes.NewBufferString("each column is sorted and has index in agg[") + for i, agg := range aggs { + if i > 0 { + buffer.WriteString(",") + } + buffer.WriteString(fmt.Sprintf("%v", agg.ID())) + } + buffer.WriteString("] and none of them has group by statement") + return buffer.String() + }() + opt.appendStepToCurrent(originAgg.ID(), originAgg.TP(), reason, action) +} From e4a21f7b2017d3b0ca19e7d249a8690e59ce8ff5 Mon Sep 17 00:00:00 2001 From: yisaer Date: Fri, 10 Dec 2021 12:31:45 +0800 Subject: [PATCH 2/2] address the comment Signed-off-by: yisaer --- planner/core/logical_plan_trace_test.go | 14 +++++++------- planner/core/rule_max_min_eliminate.go | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go index b45d27ee4c83d..727405eb7cf34 100644 --- a/planner/core/logical_plan_trace_test.go +++ b/planner/core/logical_plan_trace_test.go @@ -148,27 +148,27 @@ func (s *testPlanSuite) TestSingleRuleTraceStep(c *C) { assertRuleName: "max_min_eliminate", assertRuleSteps: []assertTraceStep{ { - assertAction: "add sort[8],add limit[9] during eliminate agg[4] max function", + assertAction: "add sort[8],add limit[9] during eliminating agg[4] max function", assertReason: "agg[4] has only one function[max] without group by, the columns in agg[4] should be sorted", }, { - assertAction: "add sort[10],add limit[11] during eliminate agg[6] min function", + assertAction: "add sort[10],add limit[11] during eliminating agg[6] min function", assertReason: "agg[6] has only one function[min] without group by, the columns in agg[6] should be sorted", }, { - assertAction: "agg[2] splited into aggs[4,6], and add joins[12] as their parent during eliminate agg[2] multi min/max functions", - assertReason: "each column is sorted and has index in agg[4,6] and none of them has group by statement", + assertAction: "agg[2] splited into aggs[4,6], and add joins[12] to connect them during eliminating agg[2] multi min/max functions", + assertReason: "each column is sorted and can benefit from index/primary key in agg[4,6] and none of them has group by clause", }, }, }, { - sql: "select max(f) from t", + sql: "select max(e) from t", flags: []uint64{flagBuildKeyInfo, flagPrunColumns, flagMaxMinEliminate}, assertRuleName: "max_min_eliminate", assertRuleSteps: []assertTraceStep{ { - assertAction: "add sort[4],add limit[5] during eliminate agg[2] max function", - assertReason: "agg[2] has only one function[max] without group by, the columns in agg[2] should be sorted", + assertAction: "add selection[4],add sort[5],add limit[6] during eliminating agg[2] max function", + assertReason: "agg[2] has only one function[max] without group by, the columns in agg[2] shouldn't be NULL and needs NULL to be filtered out, the columns in agg[2] should be sorted", }, }, }, diff --git a/planner/core/rule_max_min_eliminate.go b/planner/core/rule_max_min_eliminate.go index 769cde438b044..858f9005c2273 100644 --- a/planner/core/rule_max_min_eliminate.go +++ b/planner/core/rule_max_min_eliminate.go @@ -259,13 +259,13 @@ func appendEliminateSingleMaxMinTrace(agg *LogicalAggregation, sel *LogicalSelec if sort != nil { buffer.WriteString(fmt.Sprintf("add sort[%v],", sort.ID())) } - buffer.WriteString(fmt.Sprintf("add limit[%v] during eliminate agg[%v] %s function", limit.ID(), agg.ID(), agg.AggFuncs[0].Name)) + buffer.WriteString(fmt.Sprintf("add limit[%v] during eliminating agg[%v] %s function", limit.ID(), agg.ID(), agg.AggFuncs[0].Name)) return buffer.String() }() reason := func() string { buffer := bytes.NewBufferString(fmt.Sprintf("agg[%v] has only one function[%s] without group by", agg.ID(), agg.AggFuncs[0].Name)) if sel != nil { - buffer.WriteString(fmt.Sprintf(", the columns in agg[%v] shouldn't be NULL and needs to be filer NULL out", agg.ID())) + buffer.WriteString(fmt.Sprintf(", the columns in agg[%v] shouldn't be NULL and needs NULL to be filtered out", agg.ID())) } if sort != nil { buffer.WriteString(fmt.Sprintf(", the columns in agg[%v] should be sorted", agg.ID())) @@ -291,18 +291,18 @@ func appendEliminateMultiMinMaxTraceStep(originAgg *LogicalAggregation, aggs []* } buffer.WriteString(fmt.Sprintf("%v", join.ID())) } - buffer.WriteString(fmt.Sprintf("] as their parent during eliminate agg[%v] multi min/max functions", originAgg.ID())) + buffer.WriteString(fmt.Sprintf("] to connect them during eliminating agg[%v] multi min/max functions", originAgg.ID())) return buffer.String() }() reason := func() string { - buffer := bytes.NewBufferString("each column is sorted and has index in agg[") + buffer := bytes.NewBufferString("each column is sorted and can benefit from index/primary key in agg[") for i, agg := range aggs { if i > 0 { buffer.WriteString(",") } buffer.WriteString(fmt.Sprintf("%v", agg.ID())) } - buffer.WriteString("] and none of them has group by statement") + buffer.WriteString("] and none of them has group by clause") return buffer.String() }() opt.appendStepToCurrent(originAgg.ID(), originAgg.TP(), reason, action)