diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index dbde6d506a2ff..8a99f3bde0c30 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -5,8 +5,8 @@ create table t2 (c1 int unique, c2 int); insert into t2 values(1, 0), (2, 1); create table t3 (a bigint, b bigint, c bigint, d bigint); create table t4 (a int, b int, c int, index idx(a, b), primary key(a)); -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); id count task operator info Projection_12 8000.00 root test.t3.a, test.t3.b, test.t3.c, test.t3.d @@ -86,14 +86,22 @@ TableReader_7 0.33 root data:Selection_6 └─TableScan_5 1.00 cop table:t1, range:[1,1], keep order:false, stats:pseudo explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info -StreamAgg_21 1.00 root funcs:sum(col_0) -└─TableReader_22 1.00 root data:StreamAgg_13 - └─StreamAgg_13 1.00 cop funcs:sum(in(test.t1.c1, 1, 2)) - └─TableScan_20 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo +StreamAgg_12 1.00 root funcs:sum(5_aux_0) +└─MergeJoin_28 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 + ├─TableReader_19 10000.00 root data:TableScan_18 + │ └─TableScan_18 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo + └─IndexReader_23 10000.00 root index:IndexScan_22 + └─IndexScan_22 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info -TableReader_11 2.00 root data:TableScan_10 -└─TableScan_10 2.00 cop table:t1, range:[0,0], [1,1], keep order:false, stats:pseudo +Projection_8 10000.00 root test.t1.c1 +└─IndexJoin_11 10000.00 root inner join, inner:TableReader_10, outer key:test.t2.c2, inner key:test.t1.c1 + ├─TableReader_10 10.00 root data:TableScan_9 + │ └─TableScan_9 10.00 cop table:t1, range: decided by [test.t2.c2], keep order:false, stats:pseudo + └─HashAgg_18 8000.00 root group by:col_1, funcs:firstrow(col_0) + └─TableReader_19 8000.00 root data:HashAgg_14 + └─HashAgg_14 8000.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2) + └─TableScan_17 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo explain select (select count(1) k from t1 s where s.c1 = t1.c1 having k != 0) from t1; id count task operator info Projection_12 10000.00 root k @@ -179,7 +187,7 @@ HashAgg_18 24000.00 root group by:t2.c1, funcs:firstrow(join_agg_0) └─IndexReader_67 8000.00 root index:StreamAgg_57 └─StreamAgg_57 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) └─IndexScan_65 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo -set @@session.tidb_opt_insubquery_unfold = 0; +set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info StreamAgg_12 1.00 root funcs:sum(5_aux_0) @@ -445,3 +453,4 @@ Projection_4 2666.67 root test.t.a └─Selection_6 2666.67 cop gt(test.t.a, 0) └─TableScan_5 3333.33 cop table:t, range:(0,+inf], keep order:false, stats:pseudo drop table if exists t; +set @@session.tidb_opt_insubq_to_join_and_agg=1; diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index 73cbf7877b685..18b351916ff89 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -8,8 +8,8 @@ create table t3 (a bigint, b bigint, c bigint, d bigint); load stats 's/explain_easy_stats_t3.json'; create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a)); load stats 's/explain_easy_stats_index_prune.json'; -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); id count task operator info Projection_12 1600.00 root test.t3.a, test.t3.b, test.t3.c, test.t3.d @@ -92,7 +92,14 @@ TableReader_7 0.50 root data:Selection_6 └─TableScan_5 1.00 cop table:t1, range:[1,1], keep order:false explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info -TableDual_11 0.00 root rows:0 +Projection_8 1985.00 root test.t1.c1 +└─IndexJoin_11 1985.00 root inner join, inner:TableReader_10, outer key:test.t2.c2, inner key:test.t1.c1 + ├─TableReader_10 1.00 root data:TableScan_9 + │ └─TableScan_9 1.00 cop table:t1, range: decided by [test.t2.c2], keep order:false + └─HashAgg_18 1985.00 root group by:col_1, funcs:firstrow(col_0) + └─TableReader_19 1985.00 root data:HashAgg_14 + └─HashAgg_14 1985.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2) + └─TableScan_17 1985.00 cop table:t2, range:[-inf,+inf], keep order:false explain select * from information_schema.columns; id count task operator info MemTableScan_4 10000.00 root @@ -113,7 +120,7 @@ Limit_10 1.00 root offset:0, count:1 └─TableReader_21 1.00 root data:Limit_20 └─Limit_20 1.00 cop offset:0, count:1 └─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc -set @@session.tidb_opt_insubquery_unfold = 0; +set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select 1 in (select c2 from t2) from t1; id count task operator info Projection_6 1999.00 root 5_aux_0 @@ -184,3 +191,4 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085 id count task operator info Point_Get_1 1.00 root table:index_prune, index:a b drop table if exists t1, t2, t3, index_prune; +set @@session.tidb_opt_insubq_to_join_and_agg=1; diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index 72211fa37bbd1..cb8d8d7a4db2c 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -984,7 +984,7 @@ The Large Volume Customer Query ranks customers based on their having placed a l quantity orders are defined as those orders whose total quantity is above a certain level. The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large quantity orders. The query lists the customer name, customer key, the order key, date and total price and the quantity for the order. -Planner enhancement: unfold in subquery. +Planner enhancement: cost estimation is not so good, join reorder. The inner subquery's result is only 300+ rows. */ explain select @@ -1021,24 +1021,24 @@ o_totalprice desc, o_orderdate limit 100; id count task operator info -Projection_20 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.orders.o_orderkey, tpch.orders.o_orderdate, tpch.orders.o_totalprice, 13_col_0 -└─TopN_23 100.00 root tpch.orders.o_totalprice:desc, tpch.orders.o_orderdate:asc, offset:0, count:100 - └─HashAgg_26 60000000.00 root group by:tpch.customer.c_custkey, tpch.customer.c_name, tpch.orders.o_orderdate, tpch.orders.o_orderkey, tpch.orders.o_totalprice, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.customer.c_custkey), firstrow(tpch.customer.c_name), firstrow(tpch.orders.o_orderkey), firstrow(tpch.orders.o_totalprice), firstrow(tpch.orders.o_orderdate) - └─HashLeftJoin_27 240004648.80 root semi join, inner:Selection_55, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)] - ├─IndexJoin_32 300005811.00 root inner join, inner:IndexLookUp_31, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey - │ ├─HashRightJoin_48 75000000.00 root inner join, inner:TableReader_52, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] - │ │ ├─TableReader_52 7500000.00 root data:TableScan_51 - │ │ │ └─TableScan_51 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false - │ │ └─TableReader_50 75000000.00 root data:TableScan_49 - │ │ └─TableScan_49 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - │ └─IndexLookUp_31 1.00 root - │ ├─IndexScan_29 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false - │ └─TableScan_30 1.00 cop table:lineitem, keep order:false - └─Selection_55 59251097.60 root gt(sel_agg_2, 314) - └─HashAgg_62 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1) - └─TableReader_63 74063872.00 root data:HashAgg_56 - └─HashAgg_56 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey) - └─TableScan_61 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false +Projection_22 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.orders.o_orderkey, tpch.orders.o_orderdate, tpch.orders.o_totalprice, 14_col_0 +└─TopN_25 100.00 root tpch.orders.o_totalprice:desc, tpch.orders.o_orderdate:asc, offset:0, count:100 + └─HashAgg_28 75000000.00 root group by:tpch.customer.c_custkey, tpch.customer.c_name, tpch.orders.o_orderdate, tpch.orders.o_orderkey, tpch.orders.o_totalprice, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.customer.c_custkey), firstrow(tpch.customer.c_name), firstrow(tpch.orders.o_orderkey), firstrow(tpch.orders.o_totalprice), firstrow(tpch.orders.o_orderdate) + └─HashLeftJoin_29 237008981.18 root inner join, inner:Selection_58, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)] + ├─IndexJoin_35 300005811.00 root inner join, inner:IndexLookUp_34, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey + │ ├─HashRightJoin_51 75000000.00 root inner join, inner:TableReader_55, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] + │ │ ├─TableReader_55 7500000.00 root data:TableScan_54 + │ │ │ └─TableScan_54 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false + │ │ └─TableReader_53 75000000.00 root data:TableScan_52 + │ │ └─TableScan_52 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false + │ └─IndexLookUp_34 1.00 root + │ ├─IndexScan_32 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + │ └─TableScan_33 1.00 cop table:lineitem, keep order:false + └─Selection_58 59251097.60 root gt(sel_agg_2, 314) + └─HashAgg_65 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1) + └─TableReader_66 74063872.00 root data:HashAgg_59 + └─HashAgg_59 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey) + └─TableScan_64 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false /* Q19 Discounted Revenue Query The Discounted Revenue Query reports the gross discounted revenue attributed to the sale of selected parts handled @@ -1140,29 +1140,30 @@ and n_name = 'ALGERIA' order by s_name; id count task operator info -Sort_23 16000.00 root tpch.supplier.s_name:asc -└─Projection_25 16000.00 root tpch.supplier.s_name, tpch.supplier.s_address - └─HashLeftJoin_26 16000.00 root semi join, inner:Projection_38, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)] - ├─HashRightJoin_32 20000.00 root inner join, inner:TableReader_37, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)] - │ ├─TableReader_37 1.00 root data:Selection_36 - │ │ └─Selection_36 1.00 cop eq(tpch.nation.n_name, "ALGERIA") - │ │ └─TableScan_35 25.00 cop table:nation, range:[-inf,+inf], keep order:false - │ └─TableReader_34 500000.00 root data:TableScan_33 - │ └─TableScan_33 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false - └─Projection_38 6363545.60 root tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, tpch.partsupp.ps_availqty, mul(0.5, 13_col_0) - └─Selection_39 6363545.60 root gt(cast(tpch.partsupp.ps_availqty), mul(0.5, 13_col_0)) - └─HashAgg_42 7954432.00 root group by:tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_partkey), firstrow(tpch.partsupp.ps_suppkey), firstrow(tpch.partsupp.ps_availqty), sum(tpch.lineitem.l_quantity) - └─HashLeftJoin_45 177770004.55 root left outer join, inner:TableReader_66, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)] - ├─MergeJoin_46 32000000.00 root semi join, left key:tpch.partsupp.ps_partkey, right key:tpch.part.p_partkey - │ ├─IndexLookUp_55 40000000.00 root - │ │ ├─IndexScan_53 40000000.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range:[NULL,+inf], keep order:true - │ │ └─TableScan_54 40000000.00 cop table:partsupp, keep order:false - │ └─TableReader_58 80007.93 root data:Selection_57 - │ └─Selection_57 80007.93 cop like(tpch.part.p_name, "green%", 92) - │ └─TableScan_56 10000000.00 cop table:part, range:[-inf,+inf], keep order:true - └─TableReader_66 44189356.65 root data:Selection_65 - └─Selection_65 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01) - └─TableScan_64 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false +Sort_26 20000.00 root tpch.supplier.s_name:asc +└─Projection_28 20000.00 root tpch.supplier.s_name, tpch.supplier.s_address + └─HashRightJoin_30 20000.00 root inner join, inner:HashRightJoin_36, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)] + ├─HashRightJoin_36 20000.00 root inner join, inner:TableReader_41, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)] + │ ├─TableReader_41 1.00 root data:Selection_40 + │ │ └─Selection_40 1.00 cop eq(tpch.nation.n_name, "ALGERIA") + │ │ └─TableScan_39 25.00 cop table:nation, range:[-inf,+inf], keep order:false + │ └─TableReader_38 500000.00 root data:TableScan_37 + │ └─TableScan_37 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false + └─HashAgg_44 257492.04 root group by:tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_suppkey) + └─Projection_45 257492.04 root tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, tpch.partsupp.ps_availqty, tpch.part.p_partkey, mul(0.5, 14_col_0) + └─Selection_46 257492.04 root gt(cast(tpch.partsupp.ps_availqty), mul(0.5, 14_col_0)) + └─HashAgg_49 321865.05 root group by:tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_partkey), firstrow(tpch.partsupp.ps_suppkey), firstrow(tpch.partsupp.ps_availqty), firstrow(tpch.part.p_partkey), sum(tpch.lineitem.l_quantity) + └─HashLeftJoin_52 9711455.06 root left outer join, inner:TableReader_78, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)] + ├─IndexJoin_61 321865.05 root inner join, inner:IndexLookUp_60, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey + │ ├─IndexLookUp_60 1.00 root + │ │ ├─IndexScan_58 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false + │ │ └─TableScan_59 1.00 cop table:partsupp, keep order:false + │ └─TableReader_73 80007.93 root data:Selection_72 + │ └─Selection_72 80007.93 cop like(tpch.part.p_name, "green%", 92) + │ └─TableScan_71 10000000.00 cop table:part, range:[-inf,+inf], keep order:false + └─TableReader_78 44189356.65 root data:Selection_77 + └─Selection_77 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01) + └─TableScan_76 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false /* Q21 Suppliers Who Kept Orders Waiting Query This query identifies certain suppliers who were not able to ship required parts in a timely manner. diff --git a/cmd/explaintest/t/explain_easy.test b/cmd/explaintest/t/explain_easy.test index f8ca1c8bb1dc8..c8fcf28f94cff 100644 --- a/cmd/explaintest/t/explain_easy.test +++ b/cmd/explaintest/t/explain_easy.test @@ -6,8 +6,8 @@ insert into t2 values(1, 0), (2, 1); create table t3 (a bigint, b bigint, c bigint, d bigint); create table t4 (a int, b int, c int, index idx(a, b), primary key(a)); -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); explain select * from t1; @@ -35,7 +35,7 @@ explain select if(10, t1.c1, t1.c2) from t1; explain select c1 from t2 union select c1 from t2 union all select c1 from t2; explain select c1 from t2 union all select c1 from t2 union select c1 from t2; -set @@session.tidb_opt_insubquery_unfold = 0; +set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select sum(t1.c1 in (select c1 from t2)) from t1; explain select 1 in (select c2 from t2) from t1; @@ -100,3 +100,4 @@ explain select * from t where _tidb_rowid > 0; explain select a, _tidb_rowid from t where a > 0; explain select * from t where _tidb_rowid > 0 and a > 0; drop table if exists t; +set @@session.tidb_opt_insubq_to_join_and_agg=1; diff --git a/cmd/explaintest/t/explain_easy_stats.test b/cmd/explaintest/t/explain_easy_stats.test index 06c9dd8d6d3f8..fc9cee2f9c5bc 100644 --- a/cmd/explaintest/t/explain_easy_stats.test +++ b/cmd/explaintest/t/explain_easy_stats.test @@ -8,8 +8,8 @@ create table t3 (a bigint, b bigint, c bigint, d bigint); load stats 's/explain_easy_stats_t3.json'; create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a)); load stats 's/explain_easy_stats_index_prune.json'; -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); @@ -38,7 +38,7 @@ explain select * from information_schema.columns; explain select c2 = (select c2 from t2 where t1.c1 = t2.c1 order by c1 limit 1) from t1; explain select * from t1 order by c1 desc limit 1; -set @@session.tidb_opt_insubquery_unfold = 0; +set @@session.tidb_opt_insubq_to_join_and_agg=0; # explain select sum(t1.c1 in (select c1 from t2)) from t1; explain select 1 in (select c2 from t2) from t1; @@ -57,3 +57,4 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085 explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085526790 GROUP BY b ORDER BY a limit 1; drop table if exists t1, t2, t3, index_prune; +set @@session.tidb_opt_insubq_to_join_and_agg=1; diff --git a/cmd/explaintest/t/tpch.test b/cmd/explaintest/t/tpch.test index 5d313d9cd11fb..6909c72ad7e6b 100644 --- a/cmd/explaintest/t/tpch.test +++ b/cmd/explaintest/t/tpch.test @@ -773,7 +773,7 @@ where The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large quantity orders. The query lists the customer name, customer key, the order key, date and total price and the quantity for the order. - Planner enhancement: unfold in subquery. + Planner enhancement: cost estimation is not so good, join reorder. The inner subquery's result is only 300+ rows. */ explain select diff --git a/executor/aggregate_test.go b/executor/aggregate_test.go index 1e049fb9d5673..5b1fb19e636e7 100644 --- a/executor/aggregate_test.go +++ b/executor/aggregate_test.go @@ -248,10 +248,10 @@ func (s *testSuite) TestAggregation(c *C) { tk.MustExec("create table t2 (c1 int)") tk.MustExec("insert into t1 values(3), (2)") tk.MustExec("insert into t2 values(1), (2)") - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 1") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 0") result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1") result.Check(testkit.Rows("1")) - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 0") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 1") result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1") result.Check(testkit.Rows("1")) result = tk.MustQuery("select sum(c1) k from (select * from t1 union all select * from t2)t group by c1 * 2 order by k") diff --git a/executor/join_test.go b/executor/join_test.go index 8b4c37e654f6b..9310c7677e3c3 100644 --- a/executor/join_test.go +++ b/executor/join_test.go @@ -726,14 +726,14 @@ func (s *testSuite) TestInSubquery(c *C) { tk.MustExec("create table t2 (a int)") tk.MustExec("insert into t1 values (1),(2)") tk.MustExec("insert into t2 values (1),(2)") - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 1") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 0") result = tk.MustQuery("select * from t1 where a in (select * from t2)") result.Sort().Check(testkit.Rows("1", "2")) result = tk.MustQuery("select * from t1 where a in (select * from t2 where false)") result.Check(testkit.Rows()) result = tk.MustQuery("select * from t1 where a not in (select * from t2 where false)") result.Sort().Check(testkit.Rows("1", "2")) - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 0") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 1") result = tk.MustQuery("select * from t1 where a in (select * from t2)") result.Sort().Check(testkit.Rows("1", "2")) result = tk.MustQuery("select * from t1 where a in (select * from t2 where false)") diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index ffe7d9c1016d2..058c74a01b775 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -384,7 +384,7 @@ func (s *testAnalyzeSuite) TestEmptyTable(c *C) { }, { sql: "select * from t where c1 in (select c1 from t1)", - best: "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t1))}(test.t.c1,test.t1.c1)", + best: "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t1)->HashAgg)->HashAgg}(test.t.c1,test.t1.c1)->Projection", }, { sql: "select * from t, t1 where t.c1 = t1.c1", diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 6b34b0acee953..0719039f3f6c6 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -618,38 +618,6 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = expression.ErrOperandColumns.GenWithStackByArgs(lLen) return v, true } - // Sometimes we can unfold the in subquery. For example, a in (select * from t) can rewrite to `a in (1,2,3,4)`. - // TODO: Now we cannot add it to CBO framework. Instead, user can set a session variable to open this optimization. - // We will improve our CBO framework in future. - if lLen == 1 && er.ctx.GetSessionVars().AllowInSubqueryUnFolding && len(np.extractCorrelatedCols()) == 0 { - physicalPlan, err1 := DoOptimize(er.b.optFlag, np) - if err1 != nil { - er.err = errors.Trace(err1) - return v, true - } - rows, err1 := EvalSubquery(physicalPlan, er.b.is, er.b.ctx) - if err1 != nil { - er.err = errors.Trace(err1) - return v, true - } - for _, row := range rows { - con := &expression.Constant{ - Value: row[0], - RetType: np.Schema().Columns[0].GetType(), - } - er.ctxStack = append(er.ctxStack, con) - } - listLen := len(rows) - if listLen == 0 { - er.ctxStack[len(er.ctxStack)-1] = &expression.Constant{ - Value: types.NewDatum(v.Not), - RetType: types.NewFieldType(mysql.TypeTiny), - } - } else { - er.inToExpression(listLen, v.Not, &v.Type) - } - return v, true - } var rexpr expression.Expression if np.Schema().Len() == 1 { rexpr = np.Schema().Columns[0] @@ -671,9 +639,40 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = errors.Trace(err) return v, true } - er.p, er.err = er.b.buildSemiApply(er.p, np, expression.SplitCNFItems(checkCondition), asScalar, v.Not) - if er.err != nil { - return v, true + // If it's not the form of `not in (SUBQUERY)`, has no correlated column and don't need to append a scalar value. We can rewrite it to inner join. + if er.ctx.GetSessionVars().AllowInSubqToJoinAndAgg && !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { + // We need to try to eliminate the agg and the projection produced by this operation. + er.b.optFlag |= flagEliminateAgg + er.b.optFlag |= flagEliminateProjection + // Build distinct for the inner query. + agg := er.b.buildDistinct(np, np.Schema().Len()) + for _, col := range agg.schema.Columns { + col.IsAggOrSubq = true + } + eq, left, right, other := extractOnCondition(expression.SplitCNFItems(checkCondition), er.p, agg, false, false) + // Build inner join above the aggregation. + join := LogicalJoin{ + JoinType: InnerJoin, + EqualConditions: eq, + LeftConditions: left, + RightConditions: right, + OtherConditions: other, + }.Init(er.ctx) + join.SetChildren(er.p, agg) + join.SetSchema(expression.MergeSchema(er.p.Schema(), agg.schema)) + // Set join hint for this join. + if er.b.TableHints() != nil { + er.err = join.setPreferredJoinType(er.b.TableHints()) + if er.err != nil { + return v, true + } + } + er.p = join + } else { + er.p, er.err = er.b.buildSemiApply(er.p, np, expression.SplitCNFItems(checkCondition), asScalar, v.Not) + if er.err != nil { + return v, true + } } if asScalar { diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index b6a758c191825..724705af02872 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -77,6 +77,7 @@ func (b *PlanBuilder) buildAggregation(p LogicalPlan, aggFuncList []*ast.Aggrega // when we eliminate the max and min we may add `is not null` filter. b.optFlag = b.optFlag | flagPredicatePushDown b.optFlag = b.optFlag | flagEliminateAgg + b.optFlag = b.optFlag | flagEliminateProjection plan4Agg := LogicalAggregation{AggFuncs: make([]*aggregation.AggFuncDesc, 0, len(aggFuncList))}.Init(b.ctx) schema4Agg := expression.NewSchema(make([]*expression.Column, 0, len(aggFuncList)+p.Schema().Len())...) diff --git a/planner/core/physical_plan_test.go b/planner/core/physical_plan_test.go index 71532c0aa9508..987db44868b82 100644 --- a/planner/core/physical_plan_test.go +++ b/planner/core/physical_plan_test.go @@ -389,12 +389,12 @@ func (s *testPlanSuite) TestDAGPlanBuilderJoin(c *C) { // Test Semi Join hint success. { sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 where t1.a in (select a from t t2)", - best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)", + best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)->Projection", }, // Test Semi Join hint fail. { sql: "select /*+ TIDB_INLJ(t2) */ * from t t1 where t1.a in (select a from t t2)", - best: "MergeSemiJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)", + best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t2.a,t1.a)->Projection", }, { sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.c=t2.c and t1.f=t2.f", @@ -457,7 +457,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSubquery(c *C) { //}, { sql: "select * from t where a in (select s.a from t s) order by t.a", - best: "MergeSemiJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,s.a)", + best: "MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,s.a)->Projection", }, // Test Nested sub query. { @@ -467,7 +467,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSubquery(c *C) { // Test Semi Join + Order by. { sql: "select * from t where a in (select a from t) order by b", - best: "MergeSemiJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Sort", + best: "MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Projection->Sort", }, // Test Apply. { diff --git a/session/session.go b/session/session.go index 0fed9b1f910de..32ba44cb68e13 100644 --- a/session/session.go +++ b/session/session.go @@ -1290,7 +1290,7 @@ const loadCommonGlobalVarsSQL = "select HIGH_PRIORITY * from mysql.global_variab variable.TiDBBackoffLockFast + quoteCommaQuote + variable.TiDBConstraintCheckInPlace + quoteCommaQuote + variable.TiDBDDLReorgWorkerCount + quoteCommaQuote + - variable.TiDBOptInSubqUnFolding + quoteCommaQuote + + variable.TiDBOptInSubqToJoinAndAgg + quoteCommaQuote + variable.TiDBDistSQLScanConcurrency + quoteCommaQuote + variable.TiDBMaxChunkSize + quoteCommaQuote + variable.TiDBEnableCascadesPlanner + quoteCommaQuote + diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 4df9277511a69..b720753809ac4 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -253,8 +253,8 @@ type SessionVars struct { // AllowAggPushDown can be set to false to forbid aggregation push down. AllowAggPushDown bool - // AllowInSubqueryUnFolding can be set to true to fold in subquery - AllowInSubqueryUnFolding bool + // AllowInSubqToJoinAndAgg can be set to false to forbid rewriting the semi join to inner join with agg. + AllowInSubqToJoinAndAgg bool // CurrInsertValues is used to record current ValuesExpr's values. // See http://dev.mysql.com/doc/refman/5.7/en/miscellaneous-functions.html#function_values @@ -335,6 +335,7 @@ func NewSessionVars() *SessionVars { RetryLimit: DefTiDBRetryLimit, DisableTxnAutoRetry: DefTiDBDisableTxnAutoRetry, DDLReorgPriority: kv.PriorityLow, + AllowInSubqToJoinAndAgg: DefOptInSubqToJoinAndAgg, EnableRadixJoin: false, L2CacheSize: cpuid.CPU.Cache.L2, CommandValue: uint32(mysql.ComSleep), @@ -553,8 +554,8 @@ func (s *SessionVars) SetSystemVar(name string, val string) error { s.SkipUTF8Check = TiDBOptOn(val) case TiDBOptAggPushDown: s.AllowAggPushDown = TiDBOptOn(val) - case TiDBOptInSubqUnFolding: - s.AllowInSubqueryUnFolding = TiDBOptOn(val) + case TiDBOptInSubqToJoinAndAgg: + s.AllowInSubqToJoinAndAgg = TiDBOptOn(val) case TiDBIndexLookupConcurrency: s.IndexLookupConcurrency = tidbOptPositiveInt32(val, DefIndexLookupConcurrency) case TiDBIndexLookupJoinConcurrency: diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 00383aa56d860..63e7991d55371 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -627,7 +627,7 @@ var defaultSysVars = []*SysVar{ {ScopeGlobal, TiDBAutoAnalyzeEndTime, DefAutoAnalyzeEndTime}, {ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)}, {ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)}, - {ScopeGlobal | ScopeSession, TiDBOptInSubqUnFolding, boolToIntStr(DefOptInSubqUnfolding)}, + {ScopeGlobal | ScopeSession, TiDBOptInSubqToJoinAndAgg, boolToIntStr(DefOptInSubqToJoinAndAgg)}, {ScopeGlobal | ScopeSession, TiDBIndexJoinBatchSize, strconv.Itoa(DefIndexJoinBatchSize)}, {ScopeGlobal | ScopeSession, TiDBIndexLookupSize, strconv.Itoa(DefIndexLookupSize)}, {ScopeGlobal | ScopeSession, TiDBIndexLookupConcurrency, strconv.Itoa(DefIndexLookupConcurrency)}, diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 5787d2a99e01f..1cde648d31fa0 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -127,8 +127,8 @@ const ( // If the query has a LIMIT clause, high concurrency makes the system do much more work than needed. TiDBDistSQLScanConcurrency = "tidb_distsql_scan_concurrency" - // tidb_opt_insubquery_unfold is used to enable/disable the optimizer rule of in subquery unfold. - TiDBOptInSubqUnFolding = "tidb_opt_insubquery_unfold" + // tidb_opt_insubquery_to_join_and_agg is used to enable/disable the optimizer rule of rewriting IN subquery. + TiDBOptInSubqToJoinAndAgg = "tidb_opt_insubq_to_join_and_agg" // tidb_index_join_batch_size is used to set the batch size of a index lookup join. // The index lookup join fetches batches of data from outer executor and constructs ranges for inner executor. @@ -223,7 +223,7 @@ const ( DefChecksumTableConcurrency = 4 DefSkipUTF8Check = false DefOptAggPushDown = false - DefOptInSubqUnfolding = false + DefOptInSubqToJoinAndAgg = true DefBatchInsert = false DefBatchDelete = false DefCurretTS = 0 diff --git a/sessionctx/variable/varsutil.go b/sessionctx/variable/varsutil.go index 3ab4defe4747c..5f95fae07cf71 100644 --- a/sessionctx/variable/varsutil.go +++ b/sessionctx/variable/varsutil.go @@ -292,7 +292,7 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string, } return value, ErrWrongValueForVar.GenWithStackByArgs(name, value) case AutocommitVar, TiDBSkipUTF8Check, TiDBOptAggPushDown, - TiDBOptInSubqUnFolding, TiDBEnableTablePartition, + TiDBOptInSubqToJoinAndAgg, TiDBEnableTablePartition, TiDBBatchInsert, TiDBDisableTxnAutoRetry, TiDBEnableStreaming, TiDBBatchDelete, TiDBEnableCascadesPlanner: if strings.EqualFold(value, "ON") || value == "1" || strings.EqualFold(value, "OFF") || value == "0" {