From bb5bfa4bd7f9b3bb67e06d3de14a82c61c2eb9d8 Mon Sep 17 00:00:00 2001 From: "Zhuomin(Charming) Liu" Date: Mon, 9 Sep 2019 17:44:41 +0800 Subject: [PATCH] planner: consider agg func type in cost model (#12038) --- cmd/explaintest/r/explain_easy.result | 53 +++++++++------------ cmd/explaintest/r/explain_easy_stats.result | 11 ++--- cmd/explaintest/r/index_join.result | 7 ++- planner/core/cbo_test.go | 2 +- planner/core/find_best_task.go | 19 ++++++++ planner/core/physical_plan_test.go | 6 +-- planner/core/physical_plans.go | 15 ++++++ planner/core/task.go | 20 +++----- 8 files changed, 76 insertions(+), 57 deletions(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 025988602a61d..263e0acd597f8 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -98,14 +98,13 @@ StreamAgg_12 1.00 root funcs:sum(col_0) explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info Projection_9 9990.00 root test.t1.c1 -└─HashLeftJoin_17 9990.00 root inner join, inner:HashAgg_24, equal:[eq(test.t1.c1, test.t2.c2)] +└─HashLeftJoin_17 9990.00 root inner join, inner:HashAgg_21, equal:[eq(test.t1.c1, test.t2.c2)] ├─TableReader_30 10000.00 root data:TableScan_29 │ └─TableScan_29 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - └─HashAgg_24 7992.00 root group by:col_1, funcs:firstrow(col_1) - └─TableReader_25 7992.00 root data:HashAgg_19 - └─HashAgg_19 7992.00 cop group by:test.t2.c2, - └─Selection_23 9990.00 cop not(isnull(test.t2.c2)) - └─TableScan_22 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo + └─HashAgg_21 7992.00 root group by:test.t2.c2, funcs:firstrow(test.t2.c2) + └─TableReader_28 9990.00 root data:Selection_27 + └─Selection_27 9990.00 cop not(isnull(test.t2.c2)) + └─TableScan_26 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo explain select (select count(1) k from t1 s where s.c1 = t1.c1 having k != 0) from t1; id count task operator info Projection_12 10000.00 root ifnull(5_col_0, 0) @@ -165,32 +164,27 @@ id count task operator info Union_17 26000.00 root ├─HashAgg_21 16000.00 root group by:c1, funcs:firstrow(join_agg_0) │ └─Union_22 16000.00 root -│ ├─StreamAgg_34 8000.00 root group by:col_2, funcs:firstrow(col_2), firstrow(col_2) -│ │ └─IndexReader_35 8000.00 root index:StreamAgg_26 -│ │ └─StreamAgg_26 8000.00 cop group by:test.t2.c1, -│ │ └─IndexScan_33 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo -│ └─StreamAgg_49 8000.00 root group by:col_2, funcs:firstrow(col_2), firstrow(col_2) -│ └─IndexReader_50 8000.00 root index:StreamAgg_41 -│ └─StreamAgg_41 8000.00 cop group by:test.t2.c1, -│ └─IndexScan_48 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo +│ ├─StreamAgg_27 8000.00 root group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) +│ │ └─IndexReader_37 10000.00 root index:IndexScan_36 +│ │ └─IndexScan_36 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo +│ └─StreamAgg_42 8000.00 root group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) +│ └─IndexReader_52 10000.00 root index:IndexScan_51 +│ └─IndexScan_51 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo └─TableReader_55 10000.00 root data:TableScan_54 └─TableScan_54 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo explain select c1 from t2 union all select c1 from t2 union select c1 from t2; id count task operator info HashAgg_18 24000.00 root group by:c1, funcs:firstrow(join_agg_0) └─Union_19 24000.00 root - ├─StreamAgg_31 8000.00 root group by:col_2, funcs:firstrow(col_2), firstrow(col_2) - │ └─IndexReader_32 8000.00 root index:StreamAgg_23 - │ └─StreamAgg_23 8000.00 cop group by:test.t2.c1, - │ └─IndexScan_30 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo - ├─StreamAgg_46 8000.00 root group by:col_2, funcs:firstrow(col_2), firstrow(col_2) - │ └─IndexReader_47 8000.00 root index:StreamAgg_38 - │ └─StreamAgg_38 8000.00 cop group by:test.t2.c1, - │ └─IndexScan_45 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo - └─StreamAgg_61 8000.00 root group by:col_2, funcs:firstrow(col_2), firstrow(col_2) - └─IndexReader_62 8000.00 root index:StreamAgg_53 - └─StreamAgg_53 8000.00 cop group by:test.t2.c1, - └─IndexScan_60 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + ├─StreamAgg_24 8000.00 root group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) + │ └─IndexReader_34 10000.00 root index:IndexScan_33 + │ └─IndexScan_33 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + ├─StreamAgg_39 8000.00 root group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) + │ └─IndexReader_49 10000.00 root index:IndexScan_48 + │ └─IndexScan_48 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo + └─StreamAgg_54 8000.00 root group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) + └─IndexReader_64 10000.00 root index:IndexScan_63 + └─IndexScan_63 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo explain select count(1) from (select count(1) from (select * from t1 where c3 = 100) k) k2; id count task operator info StreamAgg_13 1.00 root funcs:count(1) @@ -216,10 +210,9 @@ StreamAgg_11 1.00 root funcs:count(1) explain select count(1) from (select count(c2) from t1 group by c3) k; id count task operator info StreamAgg_11 1.00 root funcs:count(1) -└─HashAgg_23 8000.00 root group by:col_1, funcs:firstrow(col_0) - └─TableReader_24 8000.00 root data:HashAgg_20 - └─HashAgg_20 8000.00 cop group by:test.t1.c3, funcs:firstrow(1) - └─TableScan_15 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo +└─HashAgg_22 8000.00 root group by:test.t1.c3, funcs:firstrow(1) + └─TableReader_19 10000.00 root data:TableScan_18 + └─TableScan_18 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index 122d8e142a848..c8b12b04121fb 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -64,14 +64,13 @@ IndexLookUp_9 0.00 root explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1; id count task operator info Projection_11 1985.00 root cast(join_agg_0) -└─HashLeftJoin_19 1985.00 root inner join, inner:HashAgg_26, equal:[eq(test.a.c1, test.b.c2)] +└─HashLeftJoin_19 1985.00 root inner join, inner:HashAgg_23, equal:[eq(test.a.c1, test.b.c2)] ├─TableReader_32 1999.00 root data:TableScan_31 │ └─TableScan_31 1999.00 cop table:a, range:[-inf,+inf], keep order:false - └─HashAgg_26 1985.00 root group by:col_2, funcs:count(col_0), firstrow(col_2) - └─TableReader_27 1985.00 root data:HashAgg_21 - └─HashAgg_21 1985.00 cop group by:test.b.c2, funcs:count(test.b.c2) - └─Selection_25 1985.00 cop not(isnull(test.b.c2)) - └─TableScan_24 1985.00 cop table:b, range:[-inf,+inf], keep order:false + └─HashAgg_23 1985.00 root group by:test.b.c2, funcs:count(test.b.c2), firstrow(test.b.c2) + └─TableReader_30 1985.00 root data:Selection_29 + └─Selection_29 1985.00 cop not(isnull(test.b.c2)) + └─TableScan_28 1985.00 cop table:b, range:[-inf,+inf], keep order:false explain select * from t2 order by t2.c2 limit 0, 1; id count task operator info TopN_7 1.00 root test.t2.c2:asc, offset:0, count:1 diff --git a/cmd/explaintest/r/index_join.result b/cmd/explaintest/r/index_join.result index de06b3264385a..8261b6f59ccf8 100644 --- a/cmd/explaintest/r/index_join.result +++ b/cmd/explaintest/r/index_join.result @@ -50,7 +50,6 @@ Projection_8 10000.00 root test.t1.a, test.t1.b ├─IndexLookUp_11 10.00 root │ ├─IndexScan_9 10.00 cop table:t1, index:a, range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo │ └─TableScan_10 10.00 cop table:t1, keep order:false, stats:pseudo - └─StreamAgg_29 8000.00 root group by:col_1, funcs:firstrow(col_1) - └─IndexReader_30 8000.00 root index:StreamAgg_21 - └─StreamAgg_21 8000.00 cop group by:test.t2.a, - └─IndexScan_28 10000.00 cop table:t2, index:a, range:[NULL,+inf], keep order:true, stats:pseudo + └─StreamAgg_22 8000.00 root group by:test.t2.a, funcs:firstrow(test.t2.a) + └─IndexReader_32 10000.00 root index:IndexScan_31 + └─IndexScan_31 10000.00 cop table:t2, index:a, range:[NULL,+inf], keep order:true, stats:pseudo diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index 2f92e9e778451..0764305e84e8e 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -411,7 +411,7 @@ func (s *testAnalyzeSuite) TestEmptyTable(c *C) { }, { sql: "select * from t where c1 in (select c1 from t1)", - best: "LeftHashJoin{TableReader(Table(t)->Sel([not(isnull(test.t.c1))]))->TableReader(Table(t1)->Sel([not(isnull(test.t1.c1))])->HashAgg)->HashAgg}(test.t.c1,test.t1.c1)->Projection", + best: "LeftHashJoin{TableReader(Table(t)->Sel([not(isnull(test.t.c1))]))->TableReader(Table(t1)->Sel([not(isnull(test.t1.c1))]))->HashAgg}(test.t.c1,test.t1.c1)->Projection", }, { sql: "select * from t, t1 where t.c1 = t1.c1", diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 390bbe22e2b91..6044a1a9390ee 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -16,6 +16,7 @@ package core import ( "math" + "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" @@ -43,6 +44,24 @@ const ( distinctFactor = 0.8 ) +var aggFuncFactor = map[string]float64{ + ast.AggFuncCount: 1.0, + ast.AggFuncSum: 1.0, + ast.AggFuncAvg: 2.0, + ast.AggFuncFirstRow: 0.1, + ast.AggFuncMax: 1.0, + ast.AggFuncMin: 1.0, + ast.AggFuncGroupConcat: 1.0, + ast.AggFuncBitOr: 0.9, + ast.AggFuncBitXor: 0.9, + ast.AggFuncBitAnd: 0.9, + ast.AggFuncVarPop: 3.0, + ast.AggFuncVarSamp: 3.0, + ast.AggFuncStddevPop: 3.0, + ast.AggFuncStddevSamp: 3.0, + "default": 1.5, +} + // wholeTaskTypes records all possible kinds of task that a plan can return. For Agg, TopN and Limit, we will try to get // these tasks one by one. var wholeTaskTypes = [...]property.TaskType{property.CopSingleReadTaskType, property.CopDoubleReadTaskType, property.RootTaskType} diff --git a/planner/core/physical_plan_test.go b/planner/core/physical_plan_test.go index 7754b77a362ec..982d1afe212e7 100644 --- a/planner/core/physical_plan_test.go +++ b/planner/core/physical_plan_test.go @@ -855,7 +855,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderAgg(c *C) { // Test distinct. { sql: "select distinct b from t", - best: "TableReader(Table(t)->HashAgg)->HashAgg", + best: "TableReader(Table(t))->HashAgg", }, { sql: "select count(*) from (select * from t order by b) t group by b", @@ -868,7 +868,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderAgg(c *C) { // Test agg + table. { sql: "select sum(a), avg(b + c) from t group by d", - best: "TableReader(Table(t))->Projection->HashAgg", + best: "TableReader(Table(t)->HashAgg)->HashAgg", }, { sql: "select sum(distinct a), avg(b + c) from t group by d", @@ -1639,7 +1639,7 @@ func (s *testPlanSuite) TestAggregationHints(c *C) { // additional test { sql: "select /*+ STREAM_AGG() */ distinct a from t", - best: "TableReader(Table(t)->StreamAgg)->StreamAgg", + best: "TableReader(Table(t))->StreamAgg", }, { sql: "select /*+ HASH_AGG() */ t1.a from t t1 where t1.a < any(select t2.b from t t2)", diff --git a/planner/core/physical_plans.go b/planner/core/physical_plans.go index aeb16a5a14243..1505e39ce357e 100644 --- a/planner/core/physical_plans.go +++ b/planner/core/physical_plans.go @@ -345,6 +345,21 @@ func (p *basePhysicalAgg) numDistinctFunc() (num int) { return } +func (p *basePhysicalAgg) getAggFuncCostFactor() (factor float64) { + factor = 0.0 + for _, agg := range p.AggFuncs { + if fac, ok := aggFuncFactor[agg.Name]; ok { + factor += fac + } else { + factor += aggFuncFactor["default"] + } + } + if factor == 0 { + factor = 1.0 + } + return +} + // PhysicalHashAgg is hash operator of aggregate. type PhysicalHashAgg struct { basePhysicalAgg diff --git a/planner/core/task.go b/planner/core/task.go index a10cb1a3ee573..920763b96eeec 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -877,15 +877,12 @@ func (p *PhysicalStreamAgg) attach2Task(tasks ...task) task { // GetCost computes cost of stream aggregation considering CPU/memory. func (p *PhysicalStreamAgg) GetCost(inputRows float64, isRoot bool) float64 { - numAggFunc := len(p.AggFuncs) - if numAggFunc == 0 { - numAggFunc = 1 - } + aggFuncFactor := p.getAggFuncCostFactor() var cpuCost float64 if isRoot { - cpuCost = inputRows * cpuFactor * float64(numAggFunc) + cpuCost = inputRows * cpuFactor * aggFuncFactor } else { - cpuCost = inputRows * copCPUFactor * float64(numAggFunc) + cpuCost = inputRows * copCPUFactor * aggFuncFactor } rowsPerGroup := inputRows / p.statsInfo().RowCount memoryCost := rowsPerGroup * distinctFactor * memoryFactor * float64(p.numDistinctFunc()) @@ -958,13 +955,10 @@ func (p *PhysicalHashAgg) attach2Task(tasks ...task) task { func (p *PhysicalHashAgg) GetCost(inputRows float64, isRoot bool) float64 { cardinality := p.statsInfo().RowCount numDistinctFunc := p.numDistinctFunc() - numAggFunc := len(p.AggFuncs) - if numAggFunc == 0 { - numAggFunc = 1 - } + aggFuncFactor := p.getAggFuncCostFactor() var cpuCost float64 if isRoot { - cpuCost = inputRows * cpuFactor * float64(numAggFunc) + cpuCost = inputRows * cpuFactor * aggFuncFactor divisor, con := p.cpuCostDivisor(numDistinctFunc > 0) if divisor > 0 { cpuCost /= divisor @@ -972,9 +966,9 @@ func (p *PhysicalHashAgg) GetCost(inputRows float64, isRoot bool) float64 { cpuCost += (con + 1) * concurrencyFactor } } else { - cpuCost = inputRows * copCPUFactor * float64(numAggFunc) + cpuCost = inputRows * copCPUFactor * aggFuncFactor } - memoryCost := cardinality * memoryFactor * float64(numAggFunc) + memoryCost := cardinality * memoryFactor * float64(len(p.AggFuncs)) // When aggregation has distinct flag, we would allocate a map for each group to // check duplication. memoryCost += inputRows * distinctFactor * memoryFactor * float64(numDistinctFunc)