Skip to content

Commit

Permalink
plan: convert in subquery to agg and inner join (#7531)
Browse files Browse the repository at this point in the history
  • Loading branch information
winoros authored Oct 30, 2018
1 parent a37417e commit 696ef7b
Show file tree
Hide file tree
Showing 17 changed files with 133 additions and 112 deletions.
25 changes: 17 additions & 8 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ create table t2 (c1 int unique, c2 int);
insert into t2 values(1, 0), (2, 1);
create table t3 (a bigint, b bigint, c bigint, d bigint);
create table t4 (a int, b int, c int, index idx(a, b), primary key(a));
set @@session.tidb_opt_insubquery_unfold = 1;
set @@session.tidb_opt_agg_push_down = 1;
set @@session.tidb_opt_insubq_to_join_and_agg=1;
explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a );
id count task operator info
Projection_12 8000.00 root test.t3.a, test.t3.b, test.t3.c, test.t3.d
Expand Down Expand Up @@ -86,14 +86,22 @@ TableReader_7 0.33 root data:Selection_6
└─TableScan_5 1.00 cop table:t1, range:[1,1], keep order:false, stats:pseudo
explain select sum(t1.c1 in (select c1 from t2)) from t1;
id count task operator info
StreamAgg_21 1.00 root funcs:sum(col_0)
└─TableReader_22 1.00 root data:StreamAgg_13
└─StreamAgg_13 1.00 cop funcs:sum(in(test.t1.c1, 1, 2))
└─TableScan_20 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
StreamAgg_12 1.00 root funcs:sum(5_aux_0)
└─MergeJoin_28 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1
├─TableReader_19 10000.00 root data:TableScan_18
│ └─TableScan_18 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo
└─IndexReader_23 10000.00 root index:IndexScan_22
└─IndexScan_22 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
explain select c1 from t1 where c1 in (select c2 from t2);
id count task operator info
TableReader_11 2.00 root data:TableScan_10
└─TableScan_10 2.00 cop table:t1, range:[0,0], [1,1], keep order:false, stats:pseudo
Projection_8 10000.00 root test.t1.c1
└─IndexJoin_11 10000.00 root inner join, inner:TableReader_10, outer key:test.t2.c2, inner key:test.t1.c1
├─TableReader_10 10.00 root data:TableScan_9
│ └─TableScan_9 10.00 cop table:t1, range: decided by [test.t2.c2], keep order:false, stats:pseudo
└─HashAgg_18 8000.00 root group by:col_1, funcs:firstrow(col_0)
└─TableReader_19 8000.00 root data:HashAgg_14
└─HashAgg_14 8000.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2)
└─TableScan_17 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
explain select (select count(1) k from t1 s where s.c1 = t1.c1 having k != 0) from t1;
id count task operator info
Projection_12 10000.00 root k
Expand Down Expand Up @@ -179,7 +187,7 @@ HashAgg_18 24000.00 root group by:t2.c1, funcs:firstrow(join_agg_0)
└─IndexReader_67 8000.00 root index:StreamAgg_57
└─StreamAgg_57 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1)
└─IndexScan_65 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo
set @@session.tidb_opt_insubquery_unfold = 0;
set @@session.tidb_opt_insubq_to_join_and_agg=0;
explain select sum(t1.c1 in (select c1 from t2)) from t1;
id count task operator info
StreamAgg_12 1.00 root funcs:sum(5_aux_0)
Expand Down Expand Up @@ -445,3 +453,4 @@ Projection_4 2666.67 root test.t.a
└─Selection_6 2666.67 cop gt(test.t.a, 0)
└─TableScan_5 3333.33 cop table:t, range:(0,+inf], keep order:false, stats:pseudo
drop table if exists t;
set @@session.tidb_opt_insubq_to_join_and_agg=1;
14 changes: 11 additions & 3 deletions cmd/explaintest/r/explain_easy_stats.result
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ create table t3 (a bigint, b bigint, c bigint, d bigint);
load stats 's/explain_easy_stats_t3.json';
create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a));
load stats 's/explain_easy_stats_index_prune.json';
set @@session.tidb_opt_insubquery_unfold = 1;
set @@session.tidb_opt_agg_push_down = 1;
set @@session.tidb_opt_insubq_to_join_and_agg=1;
explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a );
id count task operator info
Projection_12 1600.00 root test.t3.a, test.t3.b, test.t3.c, test.t3.d
Expand Down Expand Up @@ -92,7 +92,14 @@ TableReader_7 0.50 root data:Selection_6
└─TableScan_5 1.00 cop table:t1, range:[1,1], keep order:false
explain select c1 from t1 where c1 in (select c2 from t2);
id count task operator info
TableDual_11 0.00 root rows:0
Projection_8 1985.00 root test.t1.c1
└─IndexJoin_11 1985.00 root inner join, inner:TableReader_10, outer key:test.t2.c2, inner key:test.t1.c1
├─TableReader_10 1.00 root data:TableScan_9
│ └─TableScan_9 1.00 cop table:t1, range: decided by [test.t2.c2], keep order:false
└─HashAgg_18 1985.00 root group by:col_1, funcs:firstrow(col_0)
└─TableReader_19 1985.00 root data:HashAgg_14
└─HashAgg_14 1985.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2)
└─TableScan_17 1985.00 cop table:t2, range:[-inf,+inf], keep order:false
explain select * from information_schema.columns;
id count task operator info
MemTableScan_4 10000.00 root
Expand All @@ -113,7 +120,7 @@ Limit_10 1.00 root offset:0, count:1
└─TableReader_21 1.00 root data:Limit_20
└─Limit_20 1.00 cop offset:0, count:1
└─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc
set @@session.tidb_opt_insubquery_unfold = 0;
set @@session.tidb_opt_insubq_to_join_and_agg=0;
explain select 1 in (select c2 from t2) from t1;
id count task operator info
Projection_6 1999.00 root 5_aux_0
Expand Down Expand Up @@ -184,3 +191,4 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085
id count task operator info
Point_Get_1 1.00 root table:index_prune, index:a b
drop table if exists t1, t2, t3, index_prune;
set @@session.tidb_opt_insubq_to_join_and_agg=1;
85 changes: 43 additions & 42 deletions cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ The Large Volume Customer Query ranks customers based on their having placed a l
quantity orders are defined as those orders whose total quantity is above a certain level.
The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large quantity orders.
The query lists the customer name, customer key, the order key, date and total price and the quantity for the order.
Planner enhancement: unfold in subquery.
Planner enhancement: cost estimation is not so good, join reorder. The inner subquery's result is only 300+ rows.
*/
explain
select
Expand Down Expand Up @@ -1021,24 +1021,24 @@ o_totalprice desc,
o_orderdate
limit 100;
id count task operator info
Projection_20 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.orders.o_orderkey, tpch.orders.o_orderdate, tpch.orders.o_totalprice, 13_col_0
└─TopN_23 100.00 root tpch.orders.o_totalprice:desc, tpch.orders.o_orderdate:asc, offset:0, count:100
└─HashAgg_26 60000000.00 root group by:tpch.customer.c_custkey, tpch.customer.c_name, tpch.orders.o_orderdate, tpch.orders.o_orderkey, tpch.orders.o_totalprice, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.customer.c_custkey), firstrow(tpch.customer.c_name), firstrow(tpch.orders.o_orderkey), firstrow(tpch.orders.o_totalprice), firstrow(tpch.orders.o_orderdate)
└─HashLeftJoin_27 240004648.80 root semi join, inner:Selection_55, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)]
├─IndexJoin_32 300005811.00 root inner join, inner:IndexLookUp_31, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey
│ ├─HashRightJoin_48 75000000.00 root inner join, inner:TableReader_52, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
│ │ ├─TableReader_52 7500000.00 root data:TableScan_51
│ │ │ └─TableScan_51 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false
│ │ └─TableReader_50 75000000.00 root data:TableScan_49
│ │ └─TableScan_49 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ └─IndexLookUp_31 1.00 root
│ ├─IndexScan_29 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ └─TableScan_30 1.00 cop table:lineitem, keep order:false
└─Selection_55 59251097.60 root gt(sel_agg_2, 314)
└─HashAgg_62 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1)
└─TableReader_63 74063872.00 root data:HashAgg_56
└─HashAgg_56 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey)
└─TableScan_61 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false
Projection_22 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.orders.o_orderkey, tpch.orders.o_orderdate, tpch.orders.o_totalprice, 14_col_0
└─TopN_25 100.00 root tpch.orders.o_totalprice:desc, tpch.orders.o_orderdate:asc, offset:0, count:100
└─HashAgg_28 75000000.00 root group by:tpch.customer.c_custkey, tpch.customer.c_name, tpch.orders.o_orderdate, tpch.orders.o_orderkey, tpch.orders.o_totalprice, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.customer.c_custkey), firstrow(tpch.customer.c_name), firstrow(tpch.orders.o_orderkey), firstrow(tpch.orders.o_totalprice), firstrow(tpch.orders.o_orderdate)
└─HashLeftJoin_29 237008981.18 root inner join, inner:Selection_58, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)]
├─IndexJoin_35 300005811.00 root inner join, inner:IndexLookUp_34, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey
│ ├─HashRightJoin_51 75000000.00 root inner join, inner:TableReader_55, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
│ │ ├─TableReader_55 7500000.00 root data:TableScan_54
│ │ │ └─TableScan_54 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false
│ │ └─TableReader_53 75000000.00 root data:TableScan_52
│ │ └─TableScan_52 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ └─IndexLookUp_34 1.00 root
│ ├─IndexScan_32 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ └─TableScan_33 1.00 cop table:lineitem, keep order:false
└─Selection_58 59251097.60 root gt(sel_agg_2, 314)
└─HashAgg_65 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1)
└─TableReader_66 74063872.00 root data:HashAgg_59
└─HashAgg_59 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey)
└─TableScan_64 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false
/*
Q19 Discounted Revenue Query
The Discounted Revenue Query reports the gross discounted revenue attributed to the sale of selected parts handled
Expand Down Expand Up @@ -1140,29 +1140,30 @@ and n_name = 'ALGERIA'
order by
s_name;
id count task operator info
Sort_23 16000.00 root tpch.supplier.s_name:asc
└─Projection_25 16000.00 root tpch.supplier.s_name, tpch.supplier.s_address
└─HashLeftJoin_26 16000.00 root semi join, inner:Projection_38, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)]
├─HashRightJoin_32 20000.00 root inner join, inner:TableReader_37, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)]
│ ├─TableReader_37 1.00 root data:Selection_36
│ │ └─Selection_36 1.00 cop eq(tpch.nation.n_name, "ALGERIA")
│ │ └─TableScan_35 25.00 cop table:nation, range:[-inf,+inf], keep order:false
│ └─TableReader_34 500000.00 root data:TableScan_33
│ └─TableScan_33 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false
└─Projection_38 6363545.60 root tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, tpch.partsupp.ps_availqty, mul(0.5, 13_col_0)
└─Selection_39 6363545.60 root gt(cast(tpch.partsupp.ps_availqty), mul(0.5, 13_col_0))
└─HashAgg_42 7954432.00 root group by:tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_partkey), firstrow(tpch.partsupp.ps_suppkey), firstrow(tpch.partsupp.ps_availqty), sum(tpch.lineitem.l_quantity)
└─HashLeftJoin_45 177770004.55 root left outer join, inner:TableReader_66, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)]
├─MergeJoin_46 32000000.00 root semi join, left key:tpch.partsupp.ps_partkey, right key:tpch.part.p_partkey
│ ├─IndexLookUp_55 40000000.00 root
│ │ ├─IndexScan_53 40000000.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range:[NULL,+inf], keep order:true
│ │ └─TableScan_54 40000000.00 cop table:partsupp, keep order:false
│ └─TableReader_58 80007.93 root data:Selection_57
│ └─Selection_57 80007.93 cop like(tpch.part.p_name, "green%", 92)
│ └─TableScan_56 10000000.00 cop table:part, range:[-inf,+inf], keep order:true
└─TableReader_66 44189356.65 root data:Selection_65
└─Selection_65 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01)
└─TableScan_64 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false
Sort_26 20000.00 root tpch.supplier.s_name:asc
└─Projection_28 20000.00 root tpch.supplier.s_name, tpch.supplier.s_address
└─HashRightJoin_30 20000.00 root inner join, inner:HashRightJoin_36, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)]
├─HashRightJoin_36 20000.00 root inner join, inner:TableReader_41, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)]
│ ├─TableReader_41 1.00 root data:Selection_40
│ │ └─Selection_40 1.00 cop eq(tpch.nation.n_name, "ALGERIA")
│ │ └─TableScan_39 25.00 cop table:nation, range:[-inf,+inf], keep order:false
│ └─TableReader_38 500000.00 root data:TableScan_37
│ └─TableScan_37 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false
└─HashAgg_44 257492.04 root group by:tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_suppkey)
└─Projection_45 257492.04 root tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, tpch.partsupp.ps_availqty, tpch.part.p_partkey, mul(0.5, 14_col_0)
└─Selection_46 257492.04 root gt(cast(tpch.partsupp.ps_availqty), mul(0.5, 14_col_0))
└─HashAgg_49 321865.05 root group by:tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_partkey), firstrow(tpch.partsupp.ps_suppkey), firstrow(tpch.partsupp.ps_availqty), firstrow(tpch.part.p_partkey), sum(tpch.lineitem.l_quantity)
└─HashLeftJoin_52 9711455.06 root left outer join, inner:TableReader_78, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)]
├─IndexJoin_61 321865.05 root inner join, inner:IndexLookUp_60, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey
│ ├─IndexLookUp_60 1.00 root
│ │ ├─IndexScan_58 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false
│ │ └─TableScan_59 1.00 cop table:partsupp, keep order:false
│ └─TableReader_73 80007.93 root data:Selection_72
│ └─Selection_72 80007.93 cop like(tpch.part.p_name, "green%", 92)
│ └─TableScan_71 10000000.00 cop table:part, range:[-inf,+inf], keep order:false
└─TableReader_78 44189356.65 root data:Selection_77
└─Selection_77 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01)
└─TableScan_76 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false
/*
Q21 Suppliers Who Kept Orders Waiting Query
This query identifies certain suppliers who were not able to ship required parts in a timely manner.
Expand Down
5 changes: 3 additions & 2 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ insert into t2 values(1, 0), (2, 1);
create table t3 (a bigint, b bigint, c bigint, d bigint);
create table t4 (a int, b int, c int, index idx(a, b), primary key(a));

set @@session.tidb_opt_insubquery_unfold = 1;
set @@session.tidb_opt_agg_push_down = 1;
set @@session.tidb_opt_insubq_to_join_and_agg=1;

explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a );
explain select * from t1;
Expand Down Expand Up @@ -35,7 +35,7 @@ explain select if(10, t1.c1, t1.c2) from t1;
explain select c1 from t2 union select c1 from t2 union all select c1 from t2;
explain select c1 from t2 union all select c1 from t2 union select c1 from t2;

set @@session.tidb_opt_insubquery_unfold = 0;
set @@session.tidb_opt_insubq_to_join_and_agg=0;

explain select sum(t1.c1 in (select c1 from t2)) from t1;
explain select 1 in (select c2 from t2) from t1;
Expand Down Expand Up @@ -100,3 +100,4 @@ explain select * from t where _tidb_rowid > 0;
explain select a, _tidb_rowid from t where a > 0;
explain select * from t where _tidb_rowid > 0 and a > 0;
drop table if exists t;
set @@session.tidb_opt_insubq_to_join_and_agg=1;
5 changes: 3 additions & 2 deletions cmd/explaintest/t/explain_easy_stats.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ create table t3 (a bigint, b bigint, c bigint, d bigint);
load stats 's/explain_easy_stats_t3.json';
create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a));
load stats 's/explain_easy_stats_index_prune.json';
set @@session.tidb_opt_insubquery_unfold = 1;
set @@session.tidb_opt_agg_push_down = 1;
set @@session.tidb_opt_insubq_to_join_and_agg=1;


explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a );
Expand Down Expand Up @@ -38,7 +38,7 @@ explain select * from information_schema.columns;
explain select c2 = (select c2 from t2 where t1.c1 = t2.c1 order by c1 limit 1) from t1;
explain select * from t1 order by c1 desc limit 1;

set @@session.tidb_opt_insubquery_unfold = 0;
set @@session.tidb_opt_insubq_to_join_and_agg=0;

# explain select sum(t1.c1 in (select c1 from t2)) from t1;
explain select 1 in (select c2 from t2) from t1;
Expand All @@ -57,3 +57,4 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085
explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085526790 GROUP BY b ORDER BY a limit 1;

drop table if exists t1, t2, t3, index_prune;
set @@session.tidb_opt_insubq_to_join_and_agg=1;
2 changes: 1 addition & 1 deletion cmd/explaintest/t/tpch.test
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ where
The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large quantity orders.
The query lists the customer name, customer key, the order key, date and total price and the quantity for the order.

Planner enhancement: unfold in subquery.
Planner enhancement: cost estimation is not so good, join reorder. The inner subquery's result is only 300+ rows.
*/
explain
select
Expand Down
4 changes: 2 additions & 2 deletions executor/aggregate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,10 @@ func (s *testSuite) TestAggregation(c *C) {
tk.MustExec("create table t2 (c1 int)")
tk.MustExec("insert into t1 values(3), (2)")
tk.MustExec("insert into t2 values(1), (2)")
tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 1")
tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 0")
result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1")
result.Check(testkit.Rows("1"))
tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 0")
tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 1")
result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1")
result.Check(testkit.Rows("1"))
result = tk.MustQuery("select sum(c1) k from (select * from t1 union all select * from t2)t group by c1 * 2 order by k")
Expand Down
Loading

0 comments on commit 696ef7b

Please sign in to comment.